barebrowse 0.12.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +47 -1
- package/README.md +19 -13
- package/barebrowse.context.md +28 -4
- package/cli.js +3 -0
- package/mcp-server.js +35 -7
- package/package.json +5 -1
- package/src/bareagent.js +10 -0
- package/src/cdp.js +11 -2
- package/src/daemon.js +29 -2
- package/src/index.js +8 -0
- package/src/readable.js +116 -0
- package/types/cdp.d.ts +0 -10
- package/types/readable.d.ts +18 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,6 +1,52 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
-
## [
|
|
3
|
+
## [0.14.0] - 2026-06-15
|
|
4
|
+
|
|
5
|
+
### Documentation
|
|
6
|
+
|
|
7
|
+
- **README — "The bare ecosystem" section recast from a 4-column table to a Core / Optional-reach list.** Now covers all six modules — core `bareagent` · `bareguard` · `litectx`, optional reach `barebrowse` · `baremobile` · `beeperbox` — in a scannable row form that also renders cleanly on npm. README only; no package change.
|
|
8
|
+
|
|
9
|
+
## [0.13.0] - 2026-06-12
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
- **`readable()` — clean article extraction.** New read mode that returns the
|
|
13
|
+
main article of a page as clean text (title + body prose, nav/ads/sidebars
|
|
14
|
+
stripped) via Mozilla Readability injected in-page over CDP. Companion to
|
|
15
|
+
`snapshot()`, not a replacement: `snapshot()` is the *actionable* ARIA tree
|
|
16
|
+
for clicking/typing; `readable()` is for *reading/summarising* article-like
|
|
17
|
+
pages (news, blogs, docs, wiki), where `snapshot()` is noisy and silently
|
|
18
|
+
lossy on long prose. Article detection is unreliable, so `readable()` never
|
|
19
|
+
hard-gates — it always returns the text plus an advisory `confidence`
|
|
20
|
+
(`high`/`low`) and a hint to fall back to `snapshot()` on non-article pages.
|
|
21
|
+
Exposed everywhere: `page.readable()`, MCP `readable` tool, bareagent
|
|
22
|
+
`readable` tool, and `barebrowse readable` CLI (→ `.barebrowse/article-*.txt`).
|
|
23
|
+
|
|
24
|
+
### Fixed
|
|
25
|
+
- **Large pages no longer kill the CDP connection.** Node's built-in WebSocket
|
|
26
|
+
(undici) silently caps decompressed messages at ~3 MB and *permanently* tears
|
|
27
|
+
down the socket when a single `Accessibility.getFullAXTree` response exceeds
|
|
28
|
+
it — which broke `snapshot()` (and consent dismissal during `goto()`) on big
|
|
29
|
+
pages (e.g. long Wikipedia articles). `cdp.js` now uses the `ws` package with
|
|
30
|
+
a 256 MB `maxPayload`; the built-in exposes no way to raise the limit.
|
|
31
|
+
Regression test: `connect.test.js` snapshots a 12k-node page that tripped the
|
|
32
|
+
old cap.
|
|
33
|
+
|
|
34
|
+
### Security
|
|
35
|
+
- **MCP output files are now owner-only (`0600`).** `saveSnapshot()` and the
|
|
36
|
+
screenshot tool previously wrote snapshots / articles / screenshots with
|
|
37
|
+
default perms (`0644` in a `0755` dir under the standard umask) —
|
|
38
|
+
authenticated page content readable by other local users on a shared host.
|
|
39
|
+
They now write `0600` files in a `0700` dir, umask-independent, matching the
|
|
40
|
+
daemon's existing invariant. Regression-guarded by a test that fails on a
|
|
41
|
+
`0644` write.
|
|
42
|
+
- **Daemon hardening:** `GET /status` (the only pre-auth endpoint) no longer
|
|
43
|
+
returns the pid; `/command` now caps the request body at 16 MB (→ `413`).
|
|
44
|
+
|
|
45
|
+
### Changed
|
|
46
|
+
- **Two runtime dependencies (previously zero):** `ws` (CDP transport, above)
|
|
47
|
+
and `@mozilla/readability` (`readable()`). Both are lightweight, widely
|
|
48
|
+
adopted, and actively maintained, per the project's dependency rule (external
|
|
49
|
+
only when the stdlib genuinely can't do the job).
|
|
4
50
|
|
|
5
51
|
## [0.12.0] - 2026-05-29
|
|
6
52
|
|
package/README.md
CHANGED
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
```
|
|
11
11
|
|
|
12
12
|
<p align="center">
|
|
13
|
+
<a href="https://github.com/hamr0/barebrowse/actions/workflows/ci.yml"><img src="https://img.shields.io/github/actions/workflow/status/hamr0/barebrowse/ci.yml?label=CI" alt="CI"></a>
|
|
13
14
|
<img src="https://img.shields.io/github/package-json/v/hamr0/barebrowse?label=version&color=2a4f8c" alt="version (auto from package.json)">
|
|
14
15
|
<img src="https://img.shields.io/badge/license-Apache%202.0-2a4f8c" alt="license: Apache 2.0">
|
|
15
16
|
</p>
|
|
@@ -25,7 +26,7 @@ barebrowse gives your AI agent a real browser. Navigate, read, interact, move on
|
|
|
25
26
|
|
|
26
27
|
It uses the browser you already have -- your sessions, your cookies. Pages come back stripped to what matters -- 40-90% fewer tokens than raw output.
|
|
27
28
|
|
|
28
|
-
No Playwright.
|
|
29
|
+
No Playwright. No bundled browser. No 200MB download. Two tiny dependencies (`ws` + Mozilla Readability).
|
|
29
30
|
|
|
30
31
|
## Install
|
|
31
32
|
|
|
@@ -44,6 +45,7 @@ Ships with TypeScript types (generated from JSDoc) — autocomplete and type-che
|
|
|
44
45
|
```bash
|
|
45
46
|
barebrowse open https://example.com # Start session + navigate
|
|
46
47
|
barebrowse snapshot # ARIA snapshot → .barebrowse/page-*.yml
|
|
48
|
+
barebrowse readable # Clean article text → .barebrowse/article-*.txt
|
|
47
49
|
barebrowse click 8 # Click element
|
|
48
50
|
barebrowse close # End session
|
|
49
51
|
```
|
|
@@ -94,7 +96,7 @@ Or manually add to your config (`claude_desktop_config.json`, `.cursor/mcp.json`
|
|
|
94
96
|
}
|
|
95
97
|
```
|
|
96
98
|
|
|
97
|
-
|
|
99
|
+
19 tools: `browse`, `goto`, `snapshot`, `readable`, `click`, `type`, `press`, `scroll`, `hover`, `select`, `back`, `forward`, `reload`, `drag`, `upload`, `pdf`, `screenshot`, `wait_for`, `tabs`. Plus `assess` (privacy scan) if [wearehere](https://github.com/hamr0/wearehere) is installed. Plus opt-in `eval` (`BAREBROWSE_MCP_EVAL=1`) — runs JS in the authenticated session, off by default because it can read cookies/localStorage. Session runs in hybrid mode with automatic cookie injection. Per-tool timeouts (goto/reload/wait_for 60s, back/forward 30s, interactive ops 15s, pdf/screenshot/upload 45s) with auto-retry on transient failures (idempotent only — mutating tools fail loudly to avoid double-submits).
|
|
98
100
|
|
|
99
101
|
`browse` and `snapshot` accept `pruneMode: 'act'|'read'` (v0.9.1). `act` (default) keeps interactive elements — best for clicking/filling. `read` keeps paragraphs, headings, and long text — best for articles, docs, and content extraction. If act-mode collapses a content-heavy page near-totally, the snapshot includes a `hint: …` line suggesting `pruneMode='read'` so the agent doesn't bail to a separate HTTP fetch.
|
|
100
102
|
|
|
@@ -102,7 +104,7 @@ Troubleshooting MCP setup: `npx barebrowse doctor` scans every known config loca
|
|
|
102
104
|
|
|
103
105
|
### 3. Library -- for agentic automation
|
|
104
106
|
|
|
105
|
-
Import barebrowse in your agent code. One-shot reads, interactive sessions, full observe-think-act loops. Works with any LLM orchestration library. Ships with a ready-made adapter for [bareagent](https://www.npmjs.com/package/bare-agent) (
|
|
107
|
+
Import barebrowse in your agent code. One-shot reads, interactive sessions, full observe-think-act loops. Works with any LLM orchestration library. Ships with a ready-made adapter for [bareagent](https://www.npmjs.com/package/bare-agent) (18 tools, auto-snapshot after every action).
|
|
106
108
|
|
|
107
109
|
For code examples, API reference, and wiring instructions, see **[barebrowse.context.md](barebrowse.context.md)** -- the full integration guide.
|
|
108
110
|
|
|
@@ -169,6 +171,7 @@ Everything the agent can do through barebrowse:
|
|
|
169
171
|
| **Navigate** | Load a URL, wait for page load, auto-dismiss consent |
|
|
170
172
|
| **Back / Forward** | Browser history navigation |
|
|
171
173
|
| **Snapshot** | Pruned ARIA tree with `[ref=N]` markers. Two modes: `act` (buttons, links, inputs) and `read` (full text). 40-90% token reduction. |
|
|
174
|
+
| **Readable** | Clean article text (title + body, chrome stripped — Reader-View engine). For *reading* article-like pages, not interacting. Advisory `confidence`; falls back to snapshot on non-articles. |
|
|
172
175
|
| **Click** | Scroll into view + mouse click at element center, JS fallback for hidden elements |
|
|
173
176
|
| **Type** | Focus + insert text, with option to clear existing content first |
|
|
174
177
|
| **Press** | Special keys: Enter, Tab, Escape, Backspace, Delete, arrows, Space |
|
|
@@ -215,7 +218,7 @@ URL -> find/launch browser (chromium.js)
|
|
|
215
218
|
-> agent-ready snapshot with [ref=N] markers
|
|
216
219
|
```
|
|
217
220
|
|
|
218
|
-
|
|
221
|
+
14 modules, ~3,000 lines, two small dependencies (`ws`, `@mozilla/readability`).
|
|
219
222
|
|
|
220
223
|
## Requirements
|
|
221
224
|
|
|
@@ -225,17 +228,20 @@ URL -> find/launch browser (chromium.js)
|
|
|
225
228
|
|
|
226
229
|
## The bare ecosystem
|
|
227
230
|
|
|
228
|
-
|
|
231
|
+
Local-first, composable agent infrastructure. Same API patterns throughout —
|
|
232
|
+
mix and match, each module works standalone.
|
|
229
233
|
|
|
230
|
-
|
|
231
|
-
|---|---|---|---|---|
|
|
232
|
-
| **Does** | Gives agents a think→act loop | Gives agents a real browser | Gives agents an Android device | Gates everything an agent does |
|
|
233
|
-
| **How** | Goal in → coordinated actions out | URL in → pruned snapshot out | Screen in → pruned snapshot out | Action in → allow / deny / human-asked out |
|
|
234
|
-
| **Replaces** | LangChain, CrewAI, AutoGen | Playwright, Selenium, Puppeteer | Appium, Espresso, UIAutomator2 | Hand-rolled allowlists, scattered policy code |
|
|
235
|
-
| **Interfaces** | Library · CLI · subprocess | Library · CLI · MCP | Library · CLI · MCP | Library |
|
|
236
|
-
| **Solo or together** | Orchestrates the others as tools | Works standalone | Works standalone | Embedded in bareagent's loop; usable by any runner |
|
|
234
|
+
**Core** — the brain, the gate, the memory.
|
|
237
235
|
|
|
238
|
-
|
|
236
|
+
- **[bareagent](https://npmjs.com/package/bare-agent)** — the think→act→observe loop. *Goal in → coordinated actions out.* Replaces LangChain, CrewAI, AutoGen.
|
|
237
|
+
- **[bareguard](https://npmjs.com/package/bareguard)** — the single gate every action passes through. *Action in → allow / deny / ask-a-human out.* Replaces hand-rolled allowlists and scattered policy code.
|
|
238
|
+
- **[litectx](https://npmjs.com/package/litectx)** — tree-sitter code + memory graph with activation decay, plus lightweight context engineering (write · select · compress · isolate). *Query in → ranked context out.*
|
|
239
|
+
|
|
240
|
+
**Optional reach** — give the agent hands.
|
|
241
|
+
|
|
242
|
+
- **[barebrowse](https://npmjs.com/package/barebrowse)** — a real browser for agents. *URL in → pruned snapshot out.* Replaces Playwright, Selenium, Puppeteer.
|
|
243
|
+
- **[baremobile](https://npmjs.com/package/baremobile)** — Android + iOS device control. *Screen in → pruned snapshot out.* Replaces Appium, Espresso, XCUITest.
|
|
244
|
+
- **[beeperbox](https://github.com/hamr0/beeperbox)** — 50+ messaging networks via one MCP server (headless Beeper Desktop in Docker). *Chat in → unified message stream out.* Replaces Twilio, per-platform bot APIs.
|
|
239
245
|
|
|
240
246
|
**What you can build:**
|
|
241
247
|
|
package/barebrowse.context.md
CHANGED
|
@@ -67,6 +67,7 @@ const snapshot = await browse('https://example.com', {
|
|
|
67
67
|
| `goForward()` | -- | void | Navigate forward in browser history |
|
|
68
68
|
| `reload(opts?)` | { ignoreCache?: boolean, timeout?: number } | void | Reload the current page. Clears refMap (refs from pre-reload reject). |
|
|
69
69
|
| `snapshot(pruneOpts?)` | false or { mode: 'act'\|'read' } | string | ARIA tree with `[ref=N]` markers. Pass `false` for raw. |
|
|
70
|
+
| `readable()` | -- | object | Clean article text (Reader-View engine). `{ ok, title, byline, text, length, confidence: 'high'\|'low', readerable, hint? }` or `{ ok: false, hint }`. For *reading*, not interacting — see note below. |
|
|
70
71
|
| `click(ref)` | ref: string | void | Scroll into view + mouse press+release at center |
|
|
71
72
|
| `type(ref, text, opts?)` | ref: string, text: string, opts: { clear?, keyEvents? } | void | Focus + insert text. `clear: true` replaces existing. |
|
|
72
73
|
| `press(key)` | key: string | void | Special key: Enter, Tab, Escape, Backspace, Delete, arrows, Home, End, PageUp, PageDown, Space |
|
|
@@ -122,6 +123,28 @@ Key rules:
|
|
|
122
123
|
- `click(ref)` / `type(ref, text)` / `hover(ref)` / `select(ref, value)` use these ref strings
|
|
123
124
|
- Pruning removes noise (~47-95% token reduction) while keeping all interactive elements
|
|
124
125
|
|
|
126
|
+
## readable() vs snapshot() — which to use
|
|
127
|
+
|
|
128
|
+
Two different jobs:
|
|
129
|
+
|
|
130
|
+
- **`snapshot()`** → the *actionable* ARIA tree (`[ref=N]` markers). Use it to **interact** (click/type/fill) or on **any** page type (home pages, search results, app UIs).
|
|
131
|
+
- **`readable()`** → the *article* as clean reading text (title + body prose; nav/ads/sidebars stripped). Use it **only** to **read or summarise** article-like pages (news, blogs, docs, wiki), where `snapshot()` is both noisy and *silently lossy on long prose* (`read` pruning can drop body text).
|
|
132
|
+
|
|
133
|
+
`readable()` is **not** a token-savings feature — vs a read-mode snapshot it can be smaller *or* larger depending on the page; its value is **complete, clean prose**. It returns no refs, so you cannot interact with its output.
|
|
134
|
+
|
|
135
|
+
**Article detection is unreliable**, so `readable()` never hard-gates. It always returns whatever it extracted plus an advisory `confidence`:
|
|
136
|
+
- `confidence: 'high'` → safe to treat as an article.
|
|
137
|
+
- `confidence: 'low'` (or `ok: false`) → probably not an article; the `hint` tells the agent to fall back to `snapshot()`.
|
|
138
|
+
|
|
139
|
+
```js
|
|
140
|
+
const r = await page.readable();
|
|
141
|
+
if (r.ok && r.confidence === 'high') {
|
|
142
|
+
use(r.text); // clean article prose
|
|
143
|
+
} else {
|
|
144
|
+
const snap = await page.snapshot({ mode: 'read' }); // fall back
|
|
145
|
+
}
|
|
146
|
+
```
|
|
147
|
+
|
|
125
148
|
## Interaction loop: observe, think, act
|
|
126
149
|
|
|
127
150
|
```javascript
|
|
@@ -207,10 +230,10 @@ try {
|
|
|
207
230
|
```
|
|
208
231
|
|
|
209
232
|
`createBrowseTools(opts)` returns:
|
|
210
|
-
- `tools` -- array of bareagent-compatible tool objects: `browse`, `goto`, `snapshot`, `click`, `type`, `press`, `scroll`, `select`, `hover`, `back`, `forward`, `reload` (v0.9.0), `drag`, `upload`, `tabs`, `switchTab`, `pdf`, `screenshot`, `wait_for` (v0.9.0), `downloads` (v0.9.0), plus `assess` if wearehere installed
|
|
233
|
+
- `tools` -- array of bareagent-compatible tool objects: `browse`, `goto`, `snapshot`, `readable`, `click`, `type`, `press`, `scroll`, `select`, `hover`, `back`, `forward`, `reload` (v0.9.0), `drag`, `upload`, `tabs`, `switchTab`, `pdf`, `screenshot`, `wait_for` (v0.9.0), `downloads` (v0.9.0), plus `assess` if wearehere installed
|
|
211
234
|
- `close()` -- cleanup function, call when done
|
|
212
235
|
|
|
213
|
-
Action tools (click, type, press, scroll, hover, goto, back, forward, reload, drag, upload, select, switchTab, wait_for) auto-return a fresh snapshot so the LLM always sees the result. 300ms settle delay after actions for DOM updates.
|
|
236
|
+
Action tools (click, type, press, scroll, hover, goto, back, forward, reload, drag, upload, select, switchTab, wait_for) auto-return a fresh snapshot so the LLM always sees the result. 300ms settle delay after actions for DOM updates. `readable` is a read tool (like `snapshot`): it returns the article text directly, not a follow-up snapshot.
|
|
214
237
|
|
|
215
238
|
`onDialog` is intentionally not exposed as a tool — it's a callback shape that doesn't fit a request/response tool loop. If your bareagent flow needs to override a confirm/prompt, drop to `import { connect }` directly and pass the page through.
|
|
216
239
|
|
|
@@ -221,6 +244,7 @@ For coding agents (Claude Code, Copilot, Cursor) and quick interactive testing.
|
|
|
221
244
|
```bash
|
|
222
245
|
barebrowse open https://example.com # Start daemon + navigate
|
|
223
246
|
barebrowse snapshot # → .barebrowse/page-<timestamp>.yml
|
|
247
|
+
barebrowse readable # → .barebrowse/article-<timestamp>.txt (clean article text)
|
|
224
248
|
barebrowse click 8 # Click element ref=8
|
|
225
249
|
barebrowse type 12 hello world # Type into element ref=12
|
|
226
250
|
barebrowse back # Go back in history
|
|
@@ -262,11 +286,11 @@ barebrowse ships an MCP server for direct use with Claude Desktop, Cursor, or an
|
|
|
262
286
|
}
|
|
263
287
|
```
|
|
264
288
|
|
|
265
|
-
|
|
289
|
+
19 core tools: `browse` (one-shot), `goto`, `snapshot`, `readable`, `click`, `type`, `press`, `scroll`, `hover`, `select`, `back`, `forward`, `reload`, `drag`, `upload`, `pdf`, `screenshot`, `wait_for`, `tabs`. Plus `assess` (privacy scan) if `wearehere` is installed (`npm install wearehere`). Plus the **opt-in `eval` tool** gated by `BAREBROWSE_MCP_EVAL=1` (default OFF) — `Runtime.evaluate` in the user's authenticated session can read cookies/localStorage and hit any same-origin endpoint, so opt-in only.
|
|
266
290
|
|
|
267
291
|
Action tools return `'ok'` -- the agent calls `snapshot` explicitly to observe. This avoids double-token output since MCP tool calls are cheap to chain.
|
|
268
292
|
|
|
269
|
-
`browse` and `
|
|
293
|
+
`browse`, `snapshot`, and `readable` accept a `maxChars` param (default 30000). If the output exceeds the limit it's saved to `.barebrowse/` and a short message with the file path is returned instead (`page-<timestamp>.yml` for snapshots, `article-<timestamp>.txt` for `readable`). `screenshot` always saves to `.barebrowse/screenshot-<timestamp>.{png,jpeg,webp}` and returns the file path (raw base64 in a JSON-RPC response would blow `maxChars`). `tabs` returns the JSON array, or with `switchTo: N` it switches and returns `'ok'`. All files MCP writes are owner-only (`0600` in a `0700` dir) — they can hold authenticated page content, so they're not world-readable on a shared host.
|
|
270
294
|
|
|
271
295
|
`browse` and `snapshot` also accept `pruneMode: 'act'|'read'`. `act` (the default) keeps interactive elements and short labels — best for clicking/filling. `read` keeps paragraphs, headings, and long text — best for articles, docs, and content extraction. Same surface on the bareagent adapter. If act mode collapses a content-heavy page (raw > 5 KB → pruned < 500 chars AND < 5% of raw), the result includes a `hint: act mode dropped most of the page — retry with pruneMode='read' …` line between the stats and the tree so the caller knows to re-snapshot in read mode instead of bailing to a separate HTTP fetch.
|
|
272
296
|
|
package/cli.js
CHANGED
|
@@ -38,6 +38,8 @@ if (args.includes('--daemon-internal')) {
|
|
|
38
38
|
await cmdProxy('goto', { url: args[1], timeout: parseFlag('--timeout') });
|
|
39
39
|
} else if (cmd === 'snapshot') {
|
|
40
40
|
await cmdProxy('snapshot', { mode: parseFlag('--mode') });
|
|
41
|
+
} else if (cmd === 'readable') {
|
|
42
|
+
await cmdProxy('readable');
|
|
41
43
|
} else if (cmd === 'screenshot') {
|
|
42
44
|
await cmdProxy('screenshot', { format: parseFlag('--format') });
|
|
43
45
|
} else if (cmd === 'click' && args[1]) {
|
|
@@ -504,6 +506,7 @@ Navigation:
|
|
|
504
506
|
barebrowse forward Go forward in history
|
|
505
507
|
barebrowse reload [--no-cache] Reload current page
|
|
506
508
|
barebrowse snapshot [--mode=M] ARIA snapshot -> .barebrowse/page-*.yml
|
|
509
|
+
barebrowse readable Clean article text -> .barebrowse/article-*.txt
|
|
507
510
|
barebrowse screenshot [--format] Screenshot -> .barebrowse/screenshot-*.png
|
|
508
511
|
barebrowse pdf [--landscape] PDF export -> .barebrowse/page-*.pdf
|
|
509
512
|
|
package/mcp-server.js
CHANGED
|
@@ -3,13 +3,15 @@
|
|
|
3
3
|
* mcp-server.js — MCP server for barebrowse.
|
|
4
4
|
*
|
|
5
5
|
* Raw JSON-RPC 2.0 over stdio. No SDK dependency.
|
|
6
|
-
*
|
|
6
|
+
* Tools: browse, goto, snapshot, readable, click, type, press, scroll, back,
|
|
7
|
+
* forward, drag, upload, pdf, reload, screenshot, wait_for, tabs, select, hover.
|
|
7
8
|
*
|
|
8
9
|
* Session tools share a singleton page, lazy-created on first use.
|
|
9
10
|
* Action tools return 'ok' — agent calls snapshot explicitly to observe.
|
|
10
11
|
*/
|
|
11
12
|
|
|
12
13
|
import { browse, connect } from './src/index.js';
|
|
14
|
+
import { formatReadable } from './src/readable.js';
|
|
13
15
|
import { mkdirSync, writeFileSync, readFileSync } from 'node:fs';
|
|
14
16
|
import { join, dirname } from 'node:path';
|
|
15
17
|
import { pathToFileURL, fileURLToPath } from 'node:url';
|
|
@@ -102,11 +104,14 @@ async function withRetry(fn, timeoutMs, { retry = true } = {}) {
|
|
|
102
104
|
const MAX_CHARS_DEFAULT = 30000;
|
|
103
105
|
const OUTPUT_DIR = join(process.cwd(), '.barebrowse');
|
|
104
106
|
|
|
105
|
-
function saveSnapshot(text) {
|
|
106
|
-
|
|
107
|
+
export function saveSnapshot(text, { prefix = 'page', ext = 'yml' } = {}) {
|
|
108
|
+
// Owner-only: snapshots/articles can hold authenticated page content
|
|
109
|
+
// (logged-in text, reflected session data). Matches the daemon's 0600/0700
|
|
110
|
+
// invariant — and is umask-independent because the modes are explicit.
|
|
111
|
+
mkdirSync(OUTPUT_DIR, { recursive: true, mode: 0o700 });
|
|
107
112
|
const ts = new Date().toISOString().replace(/[:.]/g, '-');
|
|
108
|
-
const file = join(OUTPUT_DIR,
|
|
109
|
-
writeFileSync(file, text);
|
|
113
|
+
const file = join(OUTPUT_DIR, `${prefix}-${ts}.${ext}`);
|
|
114
|
+
writeFileSync(file, text, { mode: 0o600 });
|
|
110
115
|
return file;
|
|
111
116
|
}
|
|
112
117
|
|
|
@@ -178,6 +183,16 @@ export const TOOLS = [
|
|
|
178
183
|
},
|
|
179
184
|
},
|
|
180
185
|
},
|
|
186
|
+
{
|
|
187
|
+
name: 'readable',
|
|
188
|
+
description: 'Extract the main article of the current page as clean reading text (title + body prose, nav/ads/sidebars stripped — the Firefox Reader View engine). Use ONLY when your goal is to READ or SUMMARISE article-like content (news, blog posts, docs, wiki). For clicking/typing/forms, or for non-article pages (home pages, search results, app UIs), use snapshot instead. On a non-article page this returns a low-confidence result with a hint to use snapshot.',
|
|
189
|
+
inputSchema: {
|
|
190
|
+
type: 'object',
|
|
191
|
+
properties: {
|
|
192
|
+
maxChars: { type: 'number', description: 'Max chars to return inline. Longer articles are saved to .barebrowse/ and a file path is returned instead. Default: 30000.' },
|
|
193
|
+
},
|
|
194
|
+
},
|
|
195
|
+
},
|
|
181
196
|
{
|
|
182
197
|
name: 'click',
|
|
183
198
|
description: 'Click an element by its ref from the snapshot. Returns ok — call snapshot to observe.',
|
|
@@ -403,6 +418,18 @@ async function handleToolCall(name, args) {
|
|
|
403
418
|
}
|
|
404
419
|
return text;
|
|
405
420
|
}, TIMEOUTS.snapshot);
|
|
421
|
+
case 'readable': return withRetry(async () => {
|
|
422
|
+
const page = await getPage();
|
|
423
|
+
const r = await page.readable();
|
|
424
|
+
if (!r.ok) return r.hint;
|
|
425
|
+
const body = formatReadable(r);
|
|
426
|
+
const limit = args.maxChars ?? MAX_CHARS_DEFAULT;
|
|
427
|
+
if (body.length > limit) {
|
|
428
|
+
const file = saveSnapshot(body, { prefix: 'article', ext: 'txt' });
|
|
429
|
+
return `Article "${r.title}" (${r.text.length} chars, confidence: ${r.confidence}) saved to ${file}`;
|
|
430
|
+
}
|
|
431
|
+
return body;
|
|
432
|
+
}, TIMEOUTS.snapshot);
|
|
406
433
|
case 'click': return withRetry(async () => {
|
|
407
434
|
const page = await getPage();
|
|
408
435
|
await page.click(args.ref);
|
|
@@ -463,10 +490,11 @@ async function handleToolCall(name, args) {
|
|
|
463
490
|
const page = await getPage();
|
|
464
491
|
const format = args.format || 'png';
|
|
465
492
|
const b64 = await page.screenshot({ format, quality: args.quality });
|
|
466
|
-
mkdirSync(OUTPUT_DIR, { recursive: true });
|
|
493
|
+
mkdirSync(OUTPUT_DIR, { recursive: true, mode: 0o700 });
|
|
467
494
|
const ts = new Date().toISOString().replace(/[:.]/g, '-');
|
|
468
495
|
const file = join(OUTPUT_DIR, `screenshot-${ts}.${format}`);
|
|
469
|
-
|
|
496
|
+
// Owner-only: a screenshot of an authenticated page is sensitive too.
|
|
497
|
+
writeFileSync(file, Buffer.from(b64, 'base64'), { mode: 0o600 });
|
|
470
498
|
return file;
|
|
471
499
|
}, TIMEOUTS.screenshot);
|
|
472
500
|
case 'wait_for': return withRetry(async () => {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "barebrowse",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.14.0",
|
|
4
4
|
"description": "Authenticated web browsing for autonomous agents via CDP. URL in, pruned ARIA snapshot out.",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -63,5 +63,9 @@
|
|
|
63
63
|
"devDependencies": {
|
|
64
64
|
"@types/node": "^25.9.1",
|
|
65
65
|
"typescript": "^6.0.3"
|
|
66
|
+
},
|
|
67
|
+
"dependencies": {
|
|
68
|
+
"@mozilla/readability": "^0.6.0",
|
|
69
|
+
"ws": "^8.21.0"
|
|
66
70
|
}
|
|
67
71
|
}
|
package/src/bareagent.js
CHANGED
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
/// <reference path="./wearehere.d.ts" />
|
|
15
15
|
|
|
16
16
|
import { browse, connect } from './index.js';
|
|
17
|
+
import { formatReadable } from './readable.js';
|
|
17
18
|
|
|
18
19
|
// Optional: privacy assessment via wearehere
|
|
19
20
|
let assessFn = null;
|
|
@@ -93,6 +94,15 @@ export function createBrowseTools(opts = {}) {
|
|
|
93
94
|
return await page.snapshot(pruneMode ? { mode: pruneMode } : undefined);
|
|
94
95
|
},
|
|
95
96
|
},
|
|
97
|
+
{
|
|
98
|
+
name: 'readable',
|
|
99
|
+
description: 'Extract the main article as clean reading text (title + body prose, chrome stripped — Firefox Reader View engine). Use ONLY to READ/SUMMARISE article-like pages (news, blogs, docs, wiki). For interacting, or for non-article pages, use snapshot. Returns a low-confidence hint to use snapshot when the page is not an article.',
|
|
100
|
+
parameters: { type: 'object', properties: {} },
|
|
101
|
+
execute: async () => {
|
|
102
|
+
const page = await getPage();
|
|
103
|
+
return formatReadable(await page.readable());
|
|
104
|
+
},
|
|
105
|
+
},
|
|
96
106
|
{
|
|
97
107
|
name: 'click',
|
|
98
108
|
description: 'Click an element by its ref from the snapshot. Returns the updated snapshot.',
|
package/src/cdp.js
CHANGED
|
@@ -2,20 +2,29 @@
|
|
|
2
2
|
* cdp.js — Minimal Chrome DevTools Protocol client over WebSocket.
|
|
3
3
|
*
|
|
4
4
|
* Sends JSON-RPC commands, receives responses and events.
|
|
5
|
-
* Uses Node
|
|
5
|
+
* Uses the `ws` package (not Node's built-in WebSocket): the built-in
|
|
6
|
+
* silently caps decompressed messages at ~3 MB and permanently kills the
|
|
7
|
+
* socket when a single CDP response (e.g. Accessibility.getFullAXTree on a
|
|
8
|
+
* large page) exceeds it — with no way to raise the limit. `ws` exposes
|
|
9
|
+
* maxPayload, so we lift the ceiling and disable compression.
|
|
6
10
|
*
|
|
7
11
|
* Supports flattened sessions: when a sessionId is provided,
|
|
8
12
|
* it's sent at the top level of the message (not inside params).
|
|
9
13
|
* Events from sessions are also dispatched by sessionId.
|
|
10
14
|
*/
|
|
11
15
|
|
|
16
|
+
import WebSocket from 'ws';
|
|
17
|
+
|
|
18
|
+
/** Lift the message ceiling well past any realistic AX/DOM payload. */
|
|
19
|
+
const MAX_PAYLOAD = 256 * 1024 * 1024; // 256 MB
|
|
20
|
+
|
|
12
21
|
/**
|
|
13
22
|
* Create a CDP client connected to the given WebSocket URL.
|
|
14
23
|
* @param {string} wsUrl - WebSocket URL (ws://127.0.0.1:PORT/devtools/...)
|
|
15
24
|
* @returns {Promise<object>} CDP client ({ send, on, once, session, close })
|
|
16
25
|
*/
|
|
17
26
|
export async function createCDP(wsUrl) {
|
|
18
|
-
const ws = new WebSocket(wsUrl);
|
|
27
|
+
const ws = new WebSocket(wsUrl, { maxPayload: MAX_PAYLOAD, perMessageDeflate: false });
|
|
19
28
|
let nextId = 1;
|
|
20
29
|
const pending = new Map(); // id → { resolve, reject }
|
|
21
30
|
const listeners = new Map(); // "method" or "sessionId:method" → Set<callback>
|
package/src/daemon.js
CHANGED
|
@@ -11,6 +11,7 @@ import { writeFileSync, mkdirSync, existsSync, readFileSync, unlinkSync } from '
|
|
|
11
11
|
import { randomBytes, timingSafeEqual } from 'node:crypto';
|
|
12
12
|
import { join, resolve } from 'node:path';
|
|
13
13
|
import { connect } from './index.js';
|
|
14
|
+
import { formatReadable } from './readable.js';
|
|
14
15
|
|
|
15
16
|
/** Owner-only file write helper — daemon artifacts can hold authenticated content. */
|
|
16
17
|
function writeFilePrivate(path, data) {
|
|
@@ -191,6 +192,17 @@ export async function runDaemon(opts, outputDir, initialUrl) {
|
|
|
191
192
|
return { ok: true, file };
|
|
192
193
|
},
|
|
193
194
|
|
|
195
|
+
async readable() {
|
|
196
|
+
const r = await page.readable();
|
|
197
|
+
// A non-article page is not an error — surface the hint so the agent
|
|
198
|
+
// knows to fall back to snapshot, rather than failing the command.
|
|
199
|
+
if (!r.ok) return { ok: true, value: r.hint };
|
|
200
|
+
const ts = new Date().toISOString().replace(/[:.]/g, '-');
|
|
201
|
+
const file = join(absDir, `article-${ts}.txt`);
|
|
202
|
+
writeFilePrivate(file, formatReadable(r));
|
|
203
|
+
return { ok: true, file };
|
|
204
|
+
},
|
|
205
|
+
|
|
194
206
|
async screenshot({ format }) {
|
|
195
207
|
const data = await page.screenshot({ format: format || 'png' });
|
|
196
208
|
const ts = new Date().toISOString().replace(/[:.]/g, '-');
|
|
@@ -361,8 +373,11 @@ export async function runDaemon(opts, outputDir, initialUrl) {
|
|
|
361
373
|
// Start HTTP server on random port
|
|
362
374
|
const server = createServer(async (req, res) => {
|
|
363
375
|
if (req.method === 'GET' && req.url === '/status') {
|
|
376
|
+
// Liveness only — no pid. /status is the one pre-auth endpoint, and
|
|
377
|
+
// isAlive() just checks res.ok; the pid clients show comes from
|
|
378
|
+
// session.json (owner-only), so nothing consumes a pid here.
|
|
364
379
|
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
365
|
-
res.end(JSON.stringify({ ok: true
|
|
380
|
+
res.end(JSON.stringify({ ok: true }));
|
|
366
381
|
return;
|
|
367
382
|
}
|
|
368
383
|
|
|
@@ -380,8 +395,20 @@ export async function runDaemon(opts, outputDir, initialUrl) {
|
|
|
380
395
|
return;
|
|
381
396
|
}
|
|
382
397
|
|
|
398
|
+
// Cap the request body. Post-auth, single local user, but an unbounded
|
|
399
|
+
// `body +=` is a needless memory-DoS foot-gun. 16 MB covers any realistic
|
|
400
|
+
// eval expression / typed text.
|
|
401
|
+
const MAX_BODY = 16 * 1024 * 1024;
|
|
383
402
|
let body = '';
|
|
384
|
-
for await (const chunk of req)
|
|
403
|
+
for await (const chunk of req) {
|
|
404
|
+
body += chunk;
|
|
405
|
+
if (body.length > MAX_BODY) {
|
|
406
|
+
res.writeHead(413, { 'Content-Type': 'application/json' });
|
|
407
|
+
res.end(JSON.stringify({ ok: false, error: 'Request body too large' }));
|
|
408
|
+
req.destroy();
|
|
409
|
+
return;
|
|
410
|
+
}
|
|
411
|
+
}
|
|
385
412
|
|
|
386
413
|
let parsed;
|
|
387
414
|
try {
|
package/src/index.js
CHANGED
|
@@ -18,6 +18,7 @@ import { dismissConsent } from './consent.js';
|
|
|
18
18
|
import { applyStealth } from './stealth.js';
|
|
19
19
|
import { DEFAULT_BLOCKLIST } from './blocklist.js';
|
|
20
20
|
import { waitForNetworkIdle } from './network-idle.js';
|
|
21
|
+
import { readable as extractReadable } from './readable.js';
|
|
21
22
|
import { assertNavigable, assertUploadAllowed } from './url-guard.js';
|
|
22
23
|
import { join as pathJoin } from 'node:path';
|
|
23
24
|
import { chmodSync } from 'node:fs';
|
|
@@ -459,6 +460,13 @@ export async function connect(opts = {}) {
|
|
|
459
460
|
return stats + '\n' + hint + warn + out;
|
|
460
461
|
},
|
|
461
462
|
|
|
463
|
+
// Clean article text (Firefox Reader View engine), for reading/summarising
|
|
464
|
+
// — not for interacting. Returns { ok:false, hint } on non-article pages.
|
|
465
|
+
// See readable.js for why this never hard-gates on article detection.
|
|
466
|
+
async readable() {
|
|
467
|
+
return extractReadable(page.session);
|
|
468
|
+
},
|
|
469
|
+
|
|
462
470
|
async click(ref) {
|
|
463
471
|
const entry = refMap.get(ref);
|
|
464
472
|
if (!entry) throw new Error(`No element found for ref "${ref}"`);
|
package/src/readable.js
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* readable.js — extract the main article of a page as clean reading text.
|
|
3
|
+
*
|
|
4
|
+
* Companion to snapshot(): snapshot() yields an *actionable* ARIA tree for
|
|
5
|
+
* clicking/typing; readable() yields the *readable* article (title + body
|
|
6
|
+
* prose, nav/ads/sidebars stripped) for "read/summarise this" tasks, where
|
|
7
|
+
* snapshot() is both noisy and silently lossy on long prose.
|
|
8
|
+
*
|
|
9
|
+
* Runs Mozilla's Readability (the engine behind Firefox Reader View) inside
|
|
10
|
+
* the live page over CDP — so JS-rendered articles work, unlike a raw fetch.
|
|
11
|
+
* `isProbablyReaderable` gives an article-likelihood signal, but it is not
|
|
12
|
+
* reliable on its own (false negatives on minimally-marked-up essays, false
|
|
13
|
+
* positives on link-dense portals), so readable() never hard-gates: it always
|
|
14
|
+
* returns whatever Readability extracted plus an advisory `confidence`. A
|
|
15
|
+
* low-confidence result is the agent's cue to fall back to snapshot().
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import { readFileSync } from 'node:fs';
|
|
19
|
+
import { createRequire } from 'node:module';
|
|
20
|
+
|
|
21
|
+
const require = createRequire(import.meta.url);
|
|
22
|
+
|
|
23
|
+
// Read the self-contained browser builds once at module load and inject their
|
|
24
|
+
// source into the page. Both define globals (Readability, isProbablyReaderable)
|
|
25
|
+
// when evaluated in a non-module context; the `if (typeof module ...)` tails are
|
|
26
|
+
// harmless no-ops in the page.
|
|
27
|
+
const READABILITY_SRC = readFileSync(require.resolve('@mozilla/readability/Readability.js'), 'utf8');
|
|
28
|
+
const READERABLE_SRC = readFileSync(require.resolve('@mozilla/readability/Readability-readerable.js'), 'utf8');
|
|
29
|
+
|
|
30
|
+
/** Below this many characters of extracted text, treat as low confidence. */
|
|
31
|
+
const MIN_ARTICLE_CHARS = 1500;
|
|
32
|
+
|
|
33
|
+
// Fully static — interpolates only the two module-level source constants — so
|
|
34
|
+
// it's built once at load, not rebuilt (~120 KB) on every readable() call.
|
|
35
|
+
const EXTRACT_EXPRESSION = `(() => {
|
|
36
|
+
${READERABLE_SRC}
|
|
37
|
+
${READABILITY_SRC}
|
|
38
|
+
try {
|
|
39
|
+
const readerable = isProbablyReaderable(document);
|
|
40
|
+
// Readability mutates the document it parses — clone so the live page
|
|
41
|
+
// (and any later snapshot()/interaction) is untouched.
|
|
42
|
+
const art = new Readability(document.cloneNode(true)).parse();
|
|
43
|
+
if (!art || !art.textContent || !art.textContent.trim()) {
|
|
44
|
+
return { ok: false, readerable };
|
|
45
|
+
}
|
|
46
|
+
return {
|
|
47
|
+
ok: true,
|
|
48
|
+
readerable,
|
|
49
|
+
title: art.title || '',
|
|
50
|
+
byline: art.byline || '',
|
|
51
|
+
text: art.textContent.trim(),
|
|
52
|
+
length: art.length || art.textContent.length,
|
|
53
|
+
};
|
|
54
|
+
} catch (e) {
|
|
55
|
+
return { ok: false, err: String(e && e.message || e) };
|
|
56
|
+
}
|
|
57
|
+
})()`;
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Render a readable() result as a text block: a short header (title / byline /
|
|
61
|
+
* confidence, with the fall-back hint inline when present) then the body. On a
|
|
62
|
+
* failed extraction it returns the hint. Shared by the MCP, bareagent, and
|
|
63
|
+
* CLI/daemon surfaces so their output can't drift apart.
|
|
64
|
+
* @param {object} r - a readable() result.
|
|
65
|
+
* @returns {string}
|
|
66
|
+
*/
|
|
67
|
+
export function formatReadable(r) {
|
|
68
|
+
if (!r.ok) return r.hint;
|
|
69
|
+
const header = `title: ${r.title}${r.byline ? `\nbyline: ${r.byline}` : ''}\n`
|
|
70
|
+
+ `confidence: ${r.confidence}${r.hint ? ` (${r.hint})` : ''}\n\n`;
|
|
71
|
+
return header + r.text;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Extract the main article from the current page.
|
|
76
|
+
* @param {object} session - CDP session-scoped handle (.send()).
|
|
77
|
+
* @returns {Promise<object>} One of:
|
|
78
|
+
* { ok: false, hint } — no article content found
|
|
79
|
+
* { ok: true, title, byline, text, length,
|
|
80
|
+
* confidence: 'high'|'low', readerable, hint? } — extracted article
|
|
81
|
+
*/
|
|
82
|
+
export async function readable(session) {
|
|
83
|
+
const { result } = await session.send('Runtime.evaluate', {
|
|
84
|
+
expression: EXTRACT_EXPRESSION,
|
|
85
|
+
returnByValue: true,
|
|
86
|
+
awaitPromise: true,
|
|
87
|
+
});
|
|
88
|
+
const r = result.value || {};
|
|
89
|
+
|
|
90
|
+
if (!r.ok) {
|
|
91
|
+
return {
|
|
92
|
+
ok: false,
|
|
93
|
+
hint: r.err
|
|
94
|
+
? `readable extraction failed (${r.err}); use snapshot()`
|
|
95
|
+
: 'no article content found on this page; use snapshot() instead',
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// Advisory confidence: high only when the reader-view heuristic agrees AND
|
|
100
|
+
// there is a substantial amount of text. Low is not an error — the text is
|
|
101
|
+
// still returned; it just means "verify, or prefer snapshot()".
|
|
102
|
+
const confidence = r.readerable && r.length >= MIN_ARTICLE_CHARS ? 'high' : 'low';
|
|
103
|
+
const out = {
|
|
104
|
+
ok: true,
|
|
105
|
+
title: r.title,
|
|
106
|
+
byline: r.byline,
|
|
107
|
+
text: r.text,
|
|
108
|
+
length: r.length,
|
|
109
|
+
readerable: r.readerable,
|
|
110
|
+
confidence,
|
|
111
|
+
};
|
|
112
|
+
if (confidence === 'low') {
|
|
113
|
+
out.hint = 'low article confidence — this may not be an article; consider snapshot()';
|
|
114
|
+
}
|
|
115
|
+
return out;
|
|
116
|
+
}
|
package/types/cdp.d.ts
CHANGED
|
@@ -1,13 +1,3 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* cdp.js — Minimal Chrome DevTools Protocol client over WebSocket.
|
|
3
|
-
*
|
|
4
|
-
* Sends JSON-RPC commands, receives responses and events.
|
|
5
|
-
* Uses Node 22's built-in WebSocket (no external deps).
|
|
6
|
-
*
|
|
7
|
-
* Supports flattened sessions: when a sessionId is provided,
|
|
8
|
-
* it's sent at the top level of the message (not inside params).
|
|
9
|
-
* Events from sessions are also dispatched by sessionId.
|
|
10
|
-
*/
|
|
11
1
|
/**
|
|
12
2
|
* Create a CDP client connected to the given WebSocket URL.
|
|
13
3
|
* @param {string} wsUrl - WebSocket URL (ws://127.0.0.1:PORT/devtools/...)
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Render a readable() result as a text block: a short header (title / byline /
|
|
3
|
+
* confidence, with the fall-back hint inline when present) then the body. On a
|
|
4
|
+
* failed extraction it returns the hint. Shared by the MCP, bareagent, and
|
|
5
|
+
* CLI/daemon surfaces so their output can't drift apart.
|
|
6
|
+
* @param {object} r - a readable() result.
|
|
7
|
+
* @returns {string}
|
|
8
|
+
*/
|
|
9
|
+
export function formatReadable(r: object): string;
|
|
10
|
+
/**
|
|
11
|
+
* Extract the main article from the current page.
|
|
12
|
+
* @param {object} session - CDP session-scoped handle (.send()).
|
|
13
|
+
* @returns {Promise<object>} One of:
|
|
14
|
+
* { ok: false, hint } — no article content found
|
|
15
|
+
* { ok: true, title, byline, text, length,
|
|
16
|
+
* confidence: 'high'|'low', readerable, hint? } — extracted article
|
|
17
|
+
*/
|
|
18
|
+
export function readable(session: object): Promise<object>;
|