barebrowse 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$">
5
+ <sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
6
+ </content>
7
+ <orderEntry type="jdk" jdkName="Python 3.14" jdkType="Python SDK" />
8
+ <orderEntry type="sourceFolder" forTests="false" />
9
+ </component>
10
+ </module>
@@ -0,0 +1,6 @@
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
package/.idea/misc.xml ADDED
@@ -0,0 +1,4 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.14" project-jdk-type="Python SDK" />
4
+ </project>
@@ -0,0 +1,8 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/barebrowse.iml" filepath="$PROJECT_DIR$/.idea/barebrowse.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
package/.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="" vcs="Git" />
5
+ </component>
6
+ </project>
package/CHANGELOG.md CHANGED
@@ -1,5 +1,39 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.4.2
4
+
5
+ Authenticated browsing improvements. MCP sessions now auto-inject cookies and fall back to headed mode when bot-detected.
6
+
7
+ ### MCP server
8
+ - Session uses `mode: 'hybrid'` — headless by default, automatic headed fallback on challenge pages
9
+ - `goto` tool now injects cookies from user's browsers before navigation (Chromium + Firefox merged)
10
+ - Tool descriptions updated with trigger words for better agent tool selection
11
+
12
+ ### Cookie extraction (`src/auth.js`)
13
+ - `extractCookies()` auto mode merges all browsers (Chromium + Firefox, last-write-wins by `name@domain`)
14
+ - `authenticate()` strips subdomains (`mail.google.com` → `google.com`) so parent-domain cookies are included
15
+
16
+ ### Challenge detection (`src/index.js`)
17
+ - `isChallengePage()` detects Reddit block pages ("prove your humanity", "file a ticket")
18
+ - `connect()` hybrid fallback triggers on `goto()` when challenge detected
19
+
20
+ ### Skill files
21
+ - New: `commands/barebrowse.md` — CLI command reference for non-Claude agents (same as SKILL.md)
22
+ - Moved: `SKILL.md` from `.claude/skills/barebrowse/` to `commands/barebrowse/SKILL.md`
23
+ - `install --skill` reads from new `commands/` path
24
+
25
+ ### Docs
26
+ - README: MCP tool count 7→12, bareagent tools 9→13, skill install paths updated
27
+ - barebrowse.context.md: v0.4.2, hybrid for connect(), MCP cookie injection
28
+ - docs/00-context/system-state.md: bareagent 13 tools, CLI 27 commands, file map updated, published to npm
29
+ - docs/03-logs/validation-log.md: full MCP validation results (Gmail, YouTube, LinkedIn, Reddit, Amazon, GitHub)
30
+
31
+ ## 0.4.1
32
+
33
+ - Docs: testing guide updated with v0.4.0 manual validation table
34
+ - Docs: barebrowse.context.md — CLI examples expanded, open flags listed, MCP tool count 7→12
35
+ - Docs: validation-log.md — full manual test results for all 10 new features
36
+
3
37
  ## 0.4.0
4
38
 
5
39
  10 new features inspired by Playwright MCP. All validated manually against live sites.
package/README.md CHANGED
@@ -45,9 +45,9 @@ Outputs go to `.barebrowse/` as files -- agents read them with their file tools,
45
45
 
46
46
  **Teach your agent the commands** by installing the skill file (a markdown reference the agent reads as context). The CLI tool itself still needs `npm install barebrowse` -- the skill just teaches the agent how to use it.
47
47
 
48
- **Claude Code:** `.claude/skills/barebrowse/` (project) or `~/.claude/skills/barebrowse/` (global, via `barebrowse install --skill`).
48
+ **Claude Code:** Copy `commands/barebrowse/SKILL.md` to `.claude/skills/barebrowse/SKILL.md` (project) or run `barebrowse install --skill` (global).
49
49
 
50
- **Other agents:** `.barebrowse/commands/` (project) or `~/.config/barebrowse/commands/` (global). Copy [SKILL.md](.claude/skills/barebrowse/SKILL.md) there.
50
+ **Other agents:** Copy `commands/barebrowse.md` to your agent's command/skill directory.
51
51
 
52
52
  For writing your own skill files for other CLI tools: [docs/skill-template.md](docs/skill-template.md).
53
53
 
@@ -87,11 +87,11 @@ Or manually add to your config (`claude_desktop_config.json`, `.cursor/mcp.json`
87
87
  }
88
88
  ```
89
89
 
90
- 7 tools: `browse`, `goto`, `snapshot`, `click`, `type`, `press`, `scroll`.
90
+ 12 tools: `browse`, `goto`, `snapshot`, `click`, `type`, `press`, `scroll`, `back`, `forward`, `drag`, `upload`, `pdf`. Session runs in hybrid mode with automatic cookie injection.
91
91
 
92
92
  ### 3. Library -- for agentic automation
93
93
 
94
- Import barebrowse in your agent code. One-shot reads, interactive sessions, full observe-think-act loops. Works with any LLM orchestration library. Ships with a ready-made adapter for [bareagent](https://www.npmjs.com/package/bare-agent) (9 tools, auto-snapshot after every action).
94
+ Import barebrowse in your agent code. One-shot reads, interactive sessions, full observe-think-act loops. Works with any LLM orchestration library. Ships with a ready-made adapter for [bareagent](https://www.npmjs.com/package/bare-agent) (13 tools, auto-snapshot after every action).
95
95
 
96
96
  For code examples, API reference, and wiring instructions, see **[barebrowse.context.md](barebrowse.context.md)** -- the full integration guide.
97
97
 
@@ -115,13 +115,13 @@ This is the obstacle course your agent doesn't have to think about:
115
115
  | **Consent behind iframe overlay** | JS click via DOM.resolveNode bypasses z-index/overlay issues | Both |
116
116
  | **Permission prompts** (location, camera, mic) | Launch flags + CDP Browser.setPermission auto-deny | Both |
117
117
  | **Media autoplay blocked** | Autoplay policy flag on launch | Both |
118
- | **Login walls** | Cookie extraction from Firefox/Chromium, injected via CDP | Both |
118
+ | **Login walls** | Cookie extraction from all browsers (Firefox + Chromium merged), injected via CDP | Both |
119
119
  | **Pre-filled form inputs** | Select-all + delete before typing | Both |
120
120
  | **Off-screen elements** | Scrolled into view before every click | Both |
121
121
  | **Form submission** | Enter key triggers onsubmit | Both |
122
122
  | **Tab between fields** | Tab key moves focus correctly | Both |
123
123
  | **SPA navigation** (YouTube, GitHub) | SPA-aware wait: frameNavigated + loadEventFired | Both |
124
- | **Bot detection** (Google, Reddit) | Stealth patches (headless) + headed fallback with real cookies | Both |
124
+ | **Bot detection** (Google, Reddit) | Stealth patches (headless) + automatic headed fallback with real cookies | Hybrid |
125
125
  | **navigator.webdriver leak** | Patched before page scripts run: webdriver, plugins, languages, chrome object | Headless |
126
126
  | **JS dialogs** (alert/confirm/prompt) | Auto-dismiss via CDP, logged for inspection | Both |
127
127
  | **Profile locking** | Unique temp dir per headless instance | Headless |
@@ -148,7 +148,7 @@ Everything the agent can do through barebrowse:
148
148
  |--------|-------------|
149
149
  | **Navigate** | Load a URL, wait for page load, auto-dismiss consent |
150
150
  | **Back / Forward** | Browser history navigation |
151
- | **Snapshot** | Pruned ARIA tree with `[ref=N]` markers (40-90% token reduction) |
151
+ | **Snapshot** | Pruned ARIA tree with `[ref=N]` markers. Two modes: `act` (buttons, links, inputs) and `read` (full text). 40-90% token reduction. |
152
152
  | **Click** | Scroll into view + mouse click at element center |
153
153
  | **Type** | Focus + insert text, with option to clear existing content first |
154
154
  | **Press** | Special keys: Enter, Tab, Escape, Backspace, Delete, arrows, Space |
@@ -1,7 +1,7 @@
1
1
  # barebrowse -- Integration Guide
2
2
 
3
3
  > For AI assistants and developers wiring barebrowse into a project.
4
- > v0.3.0 | Node.js >= 22 | 0 required deps | MIT
4
+ > v0.4.2 | Node.js >= 22 | 0 required deps | MIT
5
5
 
6
6
  ## What this is
7
7
 
@@ -51,7 +51,7 @@ const snapshot = await browse('https://example.com', {
51
51
 
52
52
  ## connect() API
53
53
 
54
- `connect(opts)` returns a page handle for interactive sessions. Same opts as `browse()` for mode/port.
54
+ `connect(opts)` returns a page handle for interactive sessions. Same opts as `browse()` for mode/port. Supports `hybrid` mode — starts headless, falls back to headed on bot detection (same as `browse()`).
55
55
 
56
56
  | Method | Args | Returns | Notes |
57
57
  |---|---|---|---|
@@ -185,7 +185,7 @@ try {
185
185
  ```
186
186
 
187
187
  `createBrowseTools(opts)` returns:
188
- - `tools` -- array of bareagent-compatible tool objects (browse, goto, snapshot, click, type, press, scroll, select, screenshot)
188
+ - `tools` -- array of bareagent-compatible tool objects (browse, goto, snapshot, click, type, press, scroll, select, back, forward, drag, upload, screenshot)
189
189
  - `close()` -- cleanup function, call when done
190
190
 
191
191
  Action tools (click, type, press, scroll, goto) auto-return a fresh snapshot so the LLM always sees the result. 300ms settle delay after actions for DOM updates.
@@ -199,14 +199,20 @@ barebrowse open https://example.com # Start daemon + navigate
199
199
  barebrowse snapshot # → .barebrowse/page-<timestamp>.yml
200
200
  barebrowse click 8 # Click element ref=8
201
201
  barebrowse type 12 hello world # Type into element ref=12
202
- barebrowse screenshot # .barebrowse/screenshot-<timestamp>.png
203
- barebrowse console-logs # .barebrowse/console-<timestamp>.json
202
+ barebrowse back # Go back in history
203
+ barebrowse upload 7 /path/to/file.pdf # Upload file to file input
204
+ barebrowse pdf # → .barebrowse/page-<timestamp>.pdf
205
+ barebrowse wait-for --text="Success" # Wait for content to appear
206
+ barebrowse tabs # List open tabs
207
+ barebrowse save-state # → .barebrowse/state-<timestamp>.json
204
208
  barebrowse close # Kill daemon + browser
205
209
  ```
206
210
 
207
- Session lifecycle: `open` spawns a background daemon holding a `connect()` session. Subsequent commands POST to the daemon over HTTP (localhost). `close` shuts everything down.
211
+ **Open flags:** `--mode=headless|headed|hybrid`, `--proxy=URL`, `--viewport=WxH`, `--storage-state=FILE`, `--no-cookies`, `--browser=firefox|chromium`, `--timeout=N`
208
212
 
209
- Full command reference: `.claude/skills/barebrowse/SKILL.md`
213
+ Session lifecycle: `open` spawns a background daemon holding a `connect()` session. Subsequent commands POST to the daemon over HTTP (localhost). `close` shuts everything down. JS dialogs (alert/confirm/prompt) are auto-dismissed and logged.
214
+
215
+ Full command reference: `commands/barebrowse/SKILL.md` (Claude Code) or `commands/barebrowse.md` (other agents)
210
216
 
211
217
  ## MCP wrapper
212
218
 
@@ -228,11 +234,13 @@ barebrowse ships an MCP server for direct use with Claude Desktop, Cursor, or an
228
234
  }
229
235
  ```
230
236
 
231
- 7 tools exposed: `browse` (one-shot), `goto`, `snapshot`, `click`, `type`, `press`, `scroll`.
237
+ 12 tools exposed: `browse` (one-shot), `goto`, `snapshot`, `click`, `type`, `press`, `scroll`, `back`, `forward`, `drag`, `upload`, `pdf`.
232
238
 
233
239
  Action tools return `'ok'` -- the agent calls `snapshot` explicitly to observe. This avoids double-token output since MCP tool calls are cheap to chain.
234
240
 
235
- Session tools (goto, snapshot, click, type, press, scroll) share a singleton page, lazy-created on first use.
241
+ Session runs in hybrid mode (headless with automatic headed fallback on bot detection). `goto` injects cookies from the user's browser before navigation for authenticated access.
242
+
243
+ Session tools share a singleton page, lazy-created on first use.
236
244
 
237
245
  ## Architecture
238
246
 
package/cli.js CHANGED
@@ -286,7 +286,7 @@ function install() {
286
286
 
287
287
  function installSkill() {
288
288
  const thisDir = fileURLToPath(new URL('.', import.meta.url));
289
- const src = join(thisDir, '.claude', 'skills', 'barebrowse', 'SKILL.md');
289
+ const src = join(thisDir, 'commands', 'barebrowse', 'SKILL.md');
290
290
 
291
291
  if (!existsSync(src)) {
292
292
  console.error('SKILL.md not found in package. Reinstall barebrowse.');
@@ -408,6 +408,6 @@ MCP:
408
408
  As a library:
409
409
  import { browse, connect } from 'barebrowse';
410
410
 
411
- More: see README.md or barebrowse.context.md
411
+ More: see README.md or commands/barebrowse.md
412
412
  `);
413
413
  }
@@ -0,0 +1,134 @@
1
+ ---
2
+ name: barebrowse
3
+ description: Browser automation using the user's real browser with real cookies. Handles consent walls, login sessions, and bot detection automatically.
4
+ allowed-tools: Bash(barebrowse:*)
5
+ ---
6
+ # barebrowse CLI — Browser Automation for Agents
7
+
8
+ Browse any URL using the user's real browser with real cookies. Returns pruned ARIA snapshots (40-90% smaller than raw) with `[ref=N]` markers for interaction. Handles cookie consent, login sessions, JS dialogs, and bot detection automatically.
9
+
10
+ ## Quick Start
11
+
12
+ ```bash
13
+ barebrowse open https://example.com # Start session + navigate
14
+ barebrowse snapshot # Get ARIA snapshot → .barebrowse/page-*.yml
15
+ barebrowse click 8 # Click element with ref=8
16
+ barebrowse snapshot # See result
17
+ barebrowse close # End session
18
+ ```
19
+
20
+ All output files go to `.barebrowse/` in the current directory. Read them with the Read tool when needed.
21
+
22
+ ## Commands
23
+
24
+ ### Session Lifecycle
25
+
26
+ | Command | Description |
27
+ |---------|-------------|
28
+ | `barebrowse open [url] [flags]` | Start browser session. Optionally navigate to URL. |
29
+ | `barebrowse close` | Close session and kill browser. |
30
+ | `barebrowse status` | Check if session is running. |
31
+
32
+ **Open flags:**
33
+ - `--mode=headless|headed|hybrid` — Browser mode (default: headless)
34
+ - `--no-cookies` — Skip cookie injection
35
+ - `--browser=firefox|chromium` — Cookie source
36
+ - `--prune-mode=act|read` — Default pruning mode
37
+ - `--timeout=N` — Navigation timeout in ms
38
+ - `--proxy=URL` — HTTP/SOCKS proxy server
39
+ - `--viewport=WxH` — Viewport size (e.g. 1280x720)
40
+ - `--storage-state=FILE` — Load cookies/localStorage from JSON file
41
+
42
+ ### Navigation
43
+
44
+ | Command | Output |
45
+ |---------|--------|
46
+ | `barebrowse goto <url>` | Navigates, waits for load, dismisses consent. Prints "ok". |
47
+ | `barebrowse back` | Go back in browser history. |
48
+ | `barebrowse forward` | Go forward in browser history. |
49
+ | `barebrowse snapshot` | ARIA snapshot → `.barebrowse/page-<timestamp>.yml` |
50
+ | `barebrowse snapshot --mode=read` | Read mode: keeps all text (for content extraction) |
51
+ | `barebrowse screenshot` | Screenshot → `.barebrowse/screenshot-<timestamp>.png` |
52
+ | `barebrowse pdf [--landscape]` | PDF export → `.barebrowse/page-<timestamp>.pdf` |
53
+
54
+ ### Interaction
55
+
56
+ | Command | Description |
57
+ |---------|-------------|
58
+ | `barebrowse click <ref>` | Click element (scrolls into view first) |
59
+ | `barebrowse type <ref> <text>` | Type text into element |
60
+ | `barebrowse fill <ref> <text>` | Clear existing content + type new text |
61
+ | `barebrowse press <key>` | Press key: Enter, Tab, Escape, Backspace, Delete, arrows, Space |
62
+ | `barebrowse scroll <deltaY>` | Scroll page (positive=down, negative=up) |
63
+ | `barebrowse hover <ref>` | Hover over element (triggers tooltips) |
64
+ | `barebrowse select <ref> <value>` | Select dropdown option |
65
+ | `barebrowse drag <fromRef> <toRef>` | Drag element to another element |
66
+ | `barebrowse upload <ref> <files..>` | Upload file(s) to a file input element |
67
+
68
+ ### Tabs
69
+
70
+ | Command | Description |
71
+ |---------|-------------|
72
+ | `barebrowse tabs` | List open tabs (index, url, title) |
73
+ | `barebrowse tab <index>` | Switch to tab by index |
74
+
75
+ ### Debugging
76
+
77
+ | Command | Output |
78
+ |---------|--------|
79
+ | `barebrowse eval <expression>` | Evaluate JS in page, print result |
80
+ | `barebrowse wait-idle` | Wait for network idle (no requests for 500ms) |
81
+ | `barebrowse wait-for [opts]` | Wait for content to appear on page |
82
+ | `barebrowse console-logs` | Console logs → `.barebrowse/console-<timestamp>.json` |
83
+ | `barebrowse network-log` | Network log → `.barebrowse/network-<timestamp>.json` |
84
+ | `barebrowse network-log --failed` | Only failed/4xx/5xx requests |
85
+ | `barebrowse dialog-log` | JS dialog log → `.barebrowse/dialogs-<timestamp>.json` |
86
+ | `barebrowse save-state` | Cookies + localStorage → `.barebrowse/state-<timestamp>.json` |
87
+
88
+ **wait-for flags:**
89
+ - `--text=STRING` — Wait for text to appear in page body
90
+ - `--selector=CSS` — Wait for CSS selector to match
91
+ - `--timeout=N` — Max wait time in ms (default: 30000)
92
+
93
+ ## Snapshot Format
94
+
95
+ The snapshot is a YAML-like ARIA tree. Each line is one node:
96
+
97
+ ```
98
+ - WebArea "Example Domain" [ref=1]
99
+ - heading "Example Domain" [level=1] [ref=3]
100
+ - paragraph [ref=5]
101
+ - StaticText "This domain is for use in illustrative examples." [ref=6]
102
+ - link "More information..." [ref=8]
103
+ ```
104
+
105
+ - `[ref=N]` — Use this number with click, type, fill, hover, select, drag, upload
106
+ - Refs change on every snapshot — always take a fresh snapshot before interacting
107
+ - **act mode** (default): interactive elements + labels — for clicking, typing, navigating
108
+ - **read mode**: all text content — for reading articles, extracting data
109
+
110
+ ## Workflow Pattern
111
+
112
+ 1. `barebrowse open <url>` — start session
113
+ 2. `barebrowse snapshot` — observe page (read the .yml file)
114
+ 3. Decide action based on snapshot content
115
+ 4. `barebrowse click/type/fill/press/scroll/drag/upload <ref>` — act
116
+ 5. `barebrowse snapshot` — observe result (refs are now different!)
117
+ 6. Repeat 3-5 until goal achieved
118
+ 7. `barebrowse close` — clean up
119
+
120
+ ## Tips
121
+
122
+ - **Always snapshot before interacting** — refs are ephemeral and change every time
123
+ - **Use `fill` instead of `type`** when replacing existing text in input fields
124
+ - **Use `--mode=read`** for snapshot when you need to extract article content or data
125
+ - **Use `back`/`forward`** to navigate browser history instead of re-entering URLs
126
+ - **Use `upload`** for file inputs — pass absolute paths to the files
127
+ - **Use `wait-for`** when content loads asynchronously — more reliable than `wait-idle`
128
+ - **Check `dialog-log`** if JS alerts/confirms were auto-dismissed during your session
129
+ - **Use `save-state`** to persist cookies/localStorage for later sessions via `--storage-state`
130
+ - **Check `console-logs`** when page behavior seems wrong — JS errors show up there
131
+ - **Check `network-log --failed`** to debug missing content or broken API calls
132
+ - **Use `eval`** as an escape hatch when ARIA tree doesn't show what you need
133
+ - **One session per project** — `.barebrowse/` is project-scoped
134
+ - For bot-detected sites, use `--mode=headed` (requires browser with `--remote-debugging-port=9222`)
@@ -70,7 +70,7 @@ Every action returns a **pruned ARIA snapshot** -- the agent's view of the page
70
70
  | **Consent behind iframe overlay** | JS `.click()` via `DOM.resolveNode` bypasses z-index/overlay issues | Both |
71
71
  | **Permission prompts** (location, notifications, camera, mic) | Launch flags + CDP `Browser.setPermission` auto-deny | Both |
72
72
  | **Media autoplay blocked** | `--autoplay-policy=no-user-gesture-required` | Both |
73
- | **Login walls** | Firefox cookie extraction, CDP injection (user's real sessions) | Both |
73
+ | **Login walls** | All-browser cookie merge (Firefox + Chromium), CDP injection (user's real sessions) | Both |
74
74
  | **Pre-filled form inputs** | `type({ clear: true })` selects all + deletes before typing | Both |
75
75
  | **Off-screen elements** | `DOM.scrollIntoViewIfNeeded` before every click | Both |
76
76
  | **Form submission** | `press('Enter')` with proper `text: '\r'` triggers onsubmit | Both |
@@ -86,8 +86,6 @@ Every action returns a **pruned ARIA snapshot** -- the agent's view of the page
86
86
 
87
87
  | Obstacle | What's Needed | Difficulty |
88
88
  |----------|--------------|------------|
89
- | File upload | `Input.setFiles` via CDP | Low |
90
- | Drag and drop | `Input.dispatchDragEvent` sequence | Medium |
91
89
  | Infinite scroll | Scroll + wait for new content strategy | Medium |
92
90
  | CAPTCHAs | Cannot solve -- headed mode lets user solve manually | N/A |
93
91
  | Cross-origin iframes | Frame tree traversal via CDP | Medium |
@@ -251,9 +249,10 @@ Chrome permission prompts (location, notifications, camera, mic, etc.) are suppr
251
249
  - No user prompt ever appears -- agents browse without interruption
252
250
 
253
251
  ### Cross-browser cookie injection -- done
254
- Firefox cookies (user's default browser) extracted from SQLite -> injected into headless or headed Chromium via CDP `Network.setCookie`. No need to use Chromium as daily browser.
255
- - `browse()`: auto-injects cookies before navigation (opt-out with `{ cookies: false }`)
252
+ Auto mode merges cookies from all detected browsers (Chromium + Firefox, last-write-wins by name+domain). No need to use Chromium as daily browser.
253
+ - `browse()`: auto-injects merged cookies before navigation (opt-out with `{ cookies: false }`)
256
254
  - `connect()`: manual injection via `page.injectCookies(url, { browser: 'firefox' })`
255
+ - MCP `goto`: auto-injects cookies before every navigation
257
256
  - Proven: YouTube login session transferred from Firefox -> headed Chromium -> video playback
258
257
 
259
258
  ### Stealth patches -- done
@@ -292,7 +291,7 @@ const result = await loop.run(messages, tools);
292
291
  await close();
293
292
  ```
294
293
 
295
- 9 tools: browse, goto, snapshot, click, type, press, scroll, select, screenshot.
294
+ 13 tools: browse, goto, snapshot, click, type, press, scroll, select, back, forward, drag, upload, screenshot.
296
295
  Action tools auto-return snapshot (300ms settle delay). The LLM always sees the result.
297
296
 
298
297
  ### MCP server
@@ -310,8 +309,9 @@ Raw JSON-RPC 2.0 over stdio. Zero SDK dependencies. `npm install barebrowse` the
310
309
  }
311
310
  ```
312
311
 
313
- 7 tools: browse (one-shot), goto, snapshot, click, type, press, scroll.
312
+ 12 tools: browse (one-shot), goto, snapshot, click, type, press, scroll, back, forward, drag, upload, pdf.
314
313
  Action tools return `'ok'` -- agent calls `snapshot` explicitly (MCP tool calls are cheap to chain).
314
+ Session runs in hybrid mode (headless + automatic headed fallback on bot detection). `goto` injects cookies from the user's browser before navigation.
315
315
  Session tools share a singleton page, lazy-created on first use.
316
316
 
317
317
  ### CLI session -- for coding agents + human devs
@@ -328,11 +328,11 @@ barebrowse close # Kill daemon + browser
328
328
 
329
329
  Architecture: `open` spawns a detached child process running an HTTP server on a random localhost port. Session state stored in `.barebrowse/session.json`. Subsequent commands POST to the daemon. `close` sends shutdown, daemon calls `page.close()` + `process.exit(0)`.
330
330
 
331
- Full commands: open, close, status, goto, snapshot, screenshot, click, type, fill, press, scroll, hover, select, eval, wait-idle, console-logs, network-log.
331
+ Full commands: open, close, status, goto, back, forward, snapshot, screenshot, pdf, click, type, fill, press, scroll, hover, select, drag, upload, tabs, tab, eval, wait-idle, wait-for, console-logs, network-log, dialog-log, save-state.
332
332
 
333
333
  Self-sufficiency features (console/network capture, eval) let agents debug without guessing -- they see JS errors and failed requests directly.
334
334
 
335
- SKILL.md (`.claude/skills/barebrowse/SKILL.md`) teaches Claude Code the CLI commands. Install with `barebrowse install --skill`.
335
+ SKILL.md (`commands/barebrowse/SKILL.md`) teaches Claude Code the CLI commands. Install with `barebrowse install --skill`.
336
336
 
337
337
  ---
338
338
 
@@ -353,7 +353,7 @@ barebrowse = the eyes + hands (browse, read, interact with the web)
353
353
  - **Linux first.** Tested on Fedora/KDE. macOS/Windows cookie extraction paths exist in auth.js but untested.
354
354
  - **Node >= 22.** Built-in WebSocket, built-in SQLite.
355
355
  - **Not a server.** Library that agents import. Wrap as MCP (included) or HTTP if needed.
356
- - **Not cross-platform tested.** Local development only, not published to npm.
356
+ - **Not cross-platform tested.** Tested on Linux only. Published to npm as `barebrowse`.
357
357
 
358
358
  ---
359
359
 
@@ -381,14 +381,20 @@ barebrowse/
381
381
  │ ├── headed-demo.js # Interactive demo: Wikipedia → DuckDuckGo
382
382
  │ └── yt-demo.js # YouTube demo: Firefox cookies → search → play video
383
383
  ├── docs/
384
- │ ├── prd.md # Decisions + rationale (reference)
385
- │ ├── poc-plan.md # Original POC phases + DoD
386
- │ ├── blueprint.md # This file
387
- └── testing.md # Test guide: pyramid, all 54 tests, CI strategy
384
+ │ ├── README.md # Documentation navigation guide
385
+ │ ├── 00-context/ # vision, assumptions, system-state (this file)
386
+ │ ├── 01-product/ # prd.md
387
+ ├── 03-logs/ # decisions, implementation, bugs, validation, insights
388
+ │ ├── 04-process/ # dev-workflow, definition-of-done, testing (64 tests)
389
+ │ └── archive/ # poc-plan.md
388
390
  ├── mcp-server.js # MCP server (JSON-RPC 2.0 over stdio)
389
391
  ├── cli.js # CLI entry: session commands, MCP, browse, install
390
392
  ├── .mcp.json # MCP server config for Claude Desktop / Cursor
391
393
  ├── barebrowse.context.md # LLM-consumable integration guide
394
+ ├── commands/
395
+ │ ├── barebrowse.md # CLI command reference (any agent)
396
+ │ └── barebrowse/
397
+ │ └── SKILL.md # CLI command reference (Claude Code skill)
392
398
  ├── package.json
393
399
  ├── README.md
394
400
  └── CLAUDE.md
@@ -120,4 +120,150 @@ Full end-to-end validation of every CLI command against real websites.
120
120
 
121
121
  ---
122
122
 
123
+ ## New features manual validation (v0.4.0)
124
+
125
+ All tested against live sites via CLI session from `/tmp`.
126
+
127
+ ### Navigation: back/forward
128
+
129
+ | Command | Result |
130
+ |---------|--------|
131
+ | `open https://example.com` | Session started |
132
+ | `goto https://wikipedia.org` | "ok" |
133
+ | `back` | "ok" — returned to example.com |
134
+ | `forward` | "ok" — returned to wikipedia.org |
135
+
136
+ ### File upload
137
+
138
+ | Command | Result |
139
+ |---------|--------|
140
+ | `goto 'data:text/html,<input type="file" id="f"><script>...</script>'` | "ok" |
141
+ | `snapshot` | `button "Choose File" [ref=7]` |
142
+ | `upload 7 /tmp/test-upload.txt` | "ok" |
143
+ | `eval 'document.title'` | `"uploaded"` — onchange fired, confirmed working |
144
+
145
+ ### PDF export
146
+
147
+ | Command | Result |
148
+ |---------|--------|
149
+ | (on wikipedia.org) `pdf` | `.barebrowse/page-*.pdf` — 200,716 bytes |
150
+
151
+ ### Tabs
152
+
153
+ | Command | Result |
154
+ |---------|--------|
155
+ | `tabs` | `[{"index":0,"url":"https://www.wikipedia.org/","title":"Wikipedia",...}, {"index":1,"url":"about:blank",...}]` |
156
+
157
+ ### Wait-for
158
+
159
+ | Command | Result |
160
+ |---------|--------|
161
+ | `wait-for --text=Wikipedia` | "ok" — found text immediately |
162
+ | `wait-for --selector=body` | "ok" — found selector immediately |
163
+
164
+ ### JS dialog auto-dismiss
165
+
166
+ | Command | Result |
167
+ |---------|--------|
168
+ | `eval 'alert("hello from dialog"); "done"'` | `"done"` — alert auto-dismissed, eval continued |
169
+ | `dialog-log` | `.barebrowse/dialogs-*.json (1 entries)` — dialog logged with type, message, timestamp |
170
+
171
+ ### Save state
172
+
173
+ | Command | Result |
174
+ |---------|--------|
175
+ | `save-state` | `.barebrowse/state-*.json` — 2,836 bytes (cookies + localStorage) |
176
+
177
+ ### Viewport flag
178
+
179
+ | Command | Result |
180
+ |---------|--------|
181
+ | `open https://example.com --viewport=800x600` | Session started |
182
+ | `eval 'window.innerWidth + "x" + window.innerHeight'` | `"800x600"` — confirmed |
183
+
184
+ ### Drag (wired, needs drag-and-drop UI for visual test)
185
+
186
+ Wired through interact.js → index.js → daemon.js → cli.js. Mouse event sequence: mousePressed at source → mouseMoved to midpoint → mouseMoved to target → mouseReleased at target. Requires a drag-and-drop UI to validate visually.
187
+
188
+ ### Proxy flag
189
+
190
+ Wired through cli.js → daemon.js → chromium.js → `--proxy-server` Chromium launch arg. Requires a proxy server to validate.
191
+
192
+ ### Storage-state flag
193
+
194
+ Wired through cli.js → daemon.js → connect() → `Network.setCookies` on startup. Loads from JSON file produced by `save-state`.
195
+
196
+ ---
197
+
198
+ ## MCP server validation (v0.4.1)
199
+
200
+ All 12 MCP tools tested live via Claude Code MCP integration. Stats line (`# X chars → Y chars (N% pruned)`) confirmed on every snapshot.
201
+
202
+ ### Tools tested successfully (10/12)
203
+
204
+ | Tool | Test | Result |
205
+ |------|------|--------|
206
+ | `browse` | One-shot HN | `51,397 → 26,983 (48% pruned)` — stats line present |
207
+ | `goto` | DDG, Wikipedia, data: URLs | All navigated successfully |
208
+ | `snapshot` | Multiple pages | Stats line on every snapshot, pruning working |
209
+ | `click` | Wikipedia "About Wikipedia" link | Navigated to target page |
210
+ | `type` | DDG search box `barebrowse npm` | Text entered correctly |
211
+ | `press` | Enter to submit DDG search | Search submitted (CAPTCHA returned — expected headless) |
212
+ | `scroll` | 500px down on Wikipedia:About | Scrolled successfully |
213
+ | `back` | After Wikipedia:About → CDP page | Returned to previous page |
214
+ | `forward` | After back | Returned to Wikipedia:About |
215
+ | `pdf` | Wikipedia:About | 380K base64 PDF generated |
216
+
217
+ ### Tools tested with known limitations (2/12)
218
+
219
+ | Tool | Test | Result |
220
+ |------|------|--------|
221
+ | `upload` | data: page with file input | `ok` returned, file set via DOM.setFileInputFiles. onchange fires but result text pruned in act mode (non-interactive content). Works in integration tests. |
222
+ | `drag` | data: page with draggable divs | Mouse events dispatched but HTML5 drag/drop dataTransfer not populated via CDP synthetic events. Known CDP limitation (same as Playwright). |
223
+
224
+ ### Observations
225
+
226
+ - DDG returned CAPTCHA in headless ("Select all squares containing a duck") — expected, hybrid mode handles this
227
+ - Stats line format: `# 42,367 chars → 5,453 chars (87% pruned)` — present on all pruned snapshots
228
+ - Token reduction ranges observed: 37% (Wikipedia) to 88% (example.com)
229
+
230
+ ---
231
+
232
+ ## MCP cookies + hybrid fallback validation (v0.4.2)
233
+
234
+ Three changes tested: all-browser cookie merge in auth.js, hybrid mode for connect(), cookie injection + hybrid in MCP goto.
235
+
236
+ ### Cookie injection — login-walled sites via MCP goto
237
+
238
+ | Site | Logged In? | Details |
239
+ |------|-----------|---------|
240
+ | **Gmail** | Yes | Full inbox visible: Compose, labels, 4 emails. Required domain-stripping fix (`mail.google.com` → `google.com`) to capture parent-domain cookies (SID, HSID, etc.). 47 cookies merged from Firefox + Chromium. |
241
+ | **YouTube** | Yes | Personalized feed: tabs for Linux, AI, Electrical Engineering. Recommendations include Claude Code videos, KDE Plasma. Account buttons visible. |
242
+ | **LinkedIn** | Yes | Full feed as Amr Hassan: Home, My Network, Jobs, Messaging, Notifications. Posts visible. Stealth patches + cookies bypassed LinkedIn's aggressive bot detection. |
243
+ | **Amazon.nl** | No (expected) | Not logged in but consent dismissed, search + product pages worked. Cookie injection had no effect (no Amazon session in Firefox). |
244
+ | **GitHub** | No | Shows generic homepage with "Sign in". No GitHub session cookies in Firefox. |
245
+
246
+ ### Bot detection — hybrid fallback
247
+
248
+ | Site | Headless Result | Hybrid Fallback | Final Result |
249
+ |------|----------------|-----------------|--------------|
250
+ | **Google Search** | Full results, no CAPTCHA | Not triggered (stealth sufficient) | Pass — logged in as Amr Hassan |
251
+ | **Reddit** | "Prove your humanity" + reCAPTCHA | Triggered → connected to headed Chromium on 9222 | Pass — full feed with posts, logged in |
252
+ | **LinkedIn** | Loaded fine with stealth + cookies | Not triggered | Pass |
253
+
254
+ ### Bug fixes discovered during validation
255
+
256
+ 1. **Domain stripping in authenticate()**: `mail.google.com` extracted only 9 cookies (subdomain-specific). Fix: strip to registrable domain (`google.com`) → 47 cookies including all auth cookies (SID, HSID, SSID, APISID, SAPISID).
257
+ 2. **Reddit challenge detection**: Block page shows "Prove your humanity" and "File a ticket" — neither matched existing challenge phrases. Added both to `isChallengePage()`.
258
+
259
+ ### connect() hybrid mode
260
+
261
+ Tested `connect({ mode: 'hybrid' })` with Reddit: headless detected challenge → killed browser → connected to headed Chromium → Reddit loaded with full content. Same code path as MCP session.
262
+
263
+ ### All-browser cookie merge
264
+
265
+ `extractCookies({ domain: 'google.com' })` in auto mode: Chromium cookies merged first, then Firefox cookies (last-write-wins by `name@domain`). 47 cookies total for google.com. Previous behavior: stopped at first browser found (Chromium only, missed Firefox session).
266
+
267
+ ---
268
+
123
269
  *Add new validation entries when testing against new sites or features.*
@@ -152,6 +152,27 @@ Tests real interactions: clicking, typing, scrolling, form submission, and navig
152
152
 
153
153
  ---
154
154
 
155
+ ## Manual validation (v0.4.0 features)
156
+
157
+ Features added in v0.4.0 are manually validated but not yet in the automated test suite. See `docs/03-logs/validation-log.md` for full results.
158
+
159
+ | Feature | Validation method | Result |
160
+ |---------|-------------------|--------|
161
+ | `back` / `forward` | example.com → wikipedia → back → forward | ok |
162
+ | `upload <ref> <files..>` | data: URL with file input, verified onchange fired | ok |
163
+ | `pdf` | Wikipedia export, 200KB PDF | ok |
164
+ | `tabs` | Listed 2 tabs with urls/titles | ok |
165
+ | `wait-for --text` | Found "Wikipedia" text | ok |
166
+ | `wait-for --selector` | Found `body` selector | ok |
167
+ | `dialog-log` | alert() auto-dismissed, 1 entry logged | ok |
168
+ | `save-state` | 2.8KB cookies + localStorage JSON | ok |
169
+ | `--viewport=WxH` | 800x600, confirmed via innerWidth/innerHeight | ok |
170
+ | `drag` | Wired through all layers, needs drag UI to visually test |
171
+ | `--proxy` | Wired to Chromium launch arg, needs proxy to test |
172
+ | `--storage-state` | Wired to Network.setCookies, loads from save-state output |
173
+
174
+ ---
175
+
155
176
  ## Writing new tests
156
177
 
157
178
  Follow the existing pattern:
package/docs/README.md CHANGED
@@ -50,6 +50,7 @@
50
50
  |------|---------|
51
51
  | `README.md` | Public-facing project overview |
52
52
  | `barebrowse.context.md` | LLM-consumable integration guide (full API, gotchas, wiring) |
53
- | `.claude/skills/barebrowse/SKILL.md` | CLI command reference + Claude Code skill definition |
53
+ | `commands/barebrowse.md` | CLI command reference for any agent (same as SKILL.md without frontmatter) |
54
+ | `commands/barebrowse/SKILL.md` | CLI command reference for Claude Code (copy to `.claude/skills/`) |
54
55
  | `CHANGELOG.md` | Detailed version-by-version changelog |
55
56
  | `CLAUDE.md` | AI agent instructions for this project |
@@ -92,15 +92,15 @@ Both require the underlying tool to be installed. Choose based on your agent's c
92
92
 
93
93
  ## Example: barebrowse
94
94
 
95
- barebrowse ships its skill at `.claude/skills/barebrowse/SKILL.md`:
95
+ barebrowse ships its skill at `commands/barebrowse/SKILL.md`:
96
96
 
97
97
  ```bash
98
98
  # Claude Code — project
99
- cp node_modules/barebrowse/.claude/skills/barebrowse/SKILL.md .claude/skills/barebrowse/SKILL.md
99
+ cp node_modules/barebrowse/commands/barebrowse/SKILL.md .claude/skills/barebrowse/SKILL.md
100
100
 
101
101
  # Claude Code — global
102
102
  barebrowse install --skill
103
103
 
104
104
  # Other agents — project or global
105
- cp node_modules/barebrowse/.claude/skills/barebrowse/SKILL.md .barebrowse/commands/SKILL.md
105
+ cp node_modules/barebrowse/commands/barebrowse/SKILL.md .barebrowse/commands/SKILL.md
106
106
  ```
package/mcp-server.js CHANGED
@@ -14,14 +14,14 @@ import { browse, connect } from './src/index.js';
14
14
  let _page = null;
15
15
 
16
16
  async function getPage() {
17
- if (!_page) _page = await connect();
17
+ if (!_page) _page = await connect({ mode: 'hybrid' });
18
18
  return _page;
19
19
  }
20
20
 
21
21
  const TOOLS = [
22
22
  {
23
23
  name: 'browse',
24
- description: 'One-shot: navigate to a URL and return a pruned ARIA snapshot. Stateless — does not use the session page.',
24
+ description: 'Browse a URL in a real browser. Use instead of web fetch when the page needs JavaScript, login cookies, consent dismissal, or bot detection. Returns a pruned ARIA snapshot with [ref=N] markers for interaction. Stateless — does not use the session page.',
25
25
  inputSchema: {
26
26
  type: 'object',
27
27
  properties: {
@@ -33,7 +33,7 @@ const TOOLS = [
33
33
  },
34
34
  {
35
35
  name: 'goto',
36
- description: 'Navigate the session page to a URL. Returns ok — call snapshot to observe.',
36
+ description: 'Navigate the session page to a URL. Injects cookies from the user\'s browser for authenticated access. Returns ok — call snapshot to observe.',
37
37
  inputSchema: {
38
38
  type: 'object',
39
39
  properties: {
@@ -146,6 +146,7 @@ async function handleToolCall(name, args) {
146
146
 
147
147
  case 'goto': {
148
148
  const page = await getPage();
149
+ try { await page.injectCookies(args.url); } catch {}
149
150
  await page.goto(args.url);
150
151
  return 'ok';
151
152
  }
@@ -217,7 +218,7 @@ async function handleMessage(msg) {
217
218
  return jsonrpcResponse(id, {
218
219
  protocolVersion: '2024-11-05',
219
220
  capabilities: { tools: {} },
220
- serverInfo: { name: 'barebrowse', version: '0.2.2' },
221
+ serverInfo: { name: 'barebrowse', version: '0.4.2' },
221
222
  });
222
223
  }
223
224
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "barebrowse",
3
- "version": "0.4.0",
3
+ "version": "0.4.2",
4
4
  "description": "Authenticated web browsing for autonomous agents via CDP. URL in, pruned ARIA snapshot out.",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
package/src/auth.js CHANGED
@@ -232,14 +232,20 @@ export function extractCookies(opts = {}) {
232
232
  return extractChromiumCookies(path, domain);
233
233
  }
234
234
 
235
- // Auto-detect: try Chromium browsers first, then Firefox
235
+ // Auto: try all browsers, merge (last-write-wins by name+domain)
236
+ const all = new Map();
236
237
  const chromium = findChromiumCookieDb();
237
- if (chromium) return extractChromiumCookies(chromium.path, domain);
238
-
238
+ if (chromium) {
239
+ for (const c of extractChromiumCookies(chromium.path, domain))
240
+ all.set(`${c.name}@${c.domain}`, c);
241
+ }
239
242
  const firefox = findFirefoxCookieDb();
240
- if (firefox) return extractFirefoxCookies(firefox, domain);
241
-
242
- throw new Error('No browser cookie database found');
243
+ if (firefox) {
244
+ for (const c of extractFirefoxCookies(firefox, domain))
245
+ all.set(`${c.name}@${c.domain}`, c);
246
+ }
247
+ if (all.size === 0) throw new Error('No browser cookie database found');
248
+ return [...all.values()];
243
249
  }
244
250
 
245
251
  /**
@@ -270,7 +276,11 @@ export async function injectCookies(session, cookies) {
270
276
  * @param {object} [opts] - Options passed to extractCookies
271
277
  */
272
278
  export async function authenticate(session, url, opts = {}) {
273
- const domain = new URL(url).hostname.replace(/^www\./, '');
279
+ // Strip to registrable domain so mail.google.com → google.com
280
+ // This ensures parent-domain cookies (.google.com) are included
281
+ const hostname = new URL(url).hostname.replace(/^www\./, '');
282
+ const parts = hostname.split('.');
283
+ const domain = parts.length > 2 ? parts.slice(-2).join('.') : hostname;
274
284
  const cookies = extractCookies({ ...opts, domain });
275
285
  if (cookies.length > 0) {
276
286
  await injectCookies(session, cookies);
package/src/index.js CHANGED
@@ -95,13 +95,16 @@ export async function browse(url, opts = {}) {
95
95
  }
96
96
 
97
97
  // Step 6: Prune for agent consumption
98
+ const raw = formatTree(tree);
98
99
  let snapshot;
99
100
  if (opts.prune !== false) {
100
101
  const pruned = pruneTree(tree, { mode: opts.pruneMode || 'act' });
101
102
  snapshot = formatTree(pruned);
102
103
  } else {
103
- snapshot = formatTree(tree);
104
+ snapshot = raw;
104
105
  }
106
+ const stats = `# ${raw.length.toLocaleString()} chars → ${snapshot.length.toLocaleString()} chars (${Math.round((1 - snapshot.length / raw.length) * 100)}% pruned)`;
107
+ snapshot = stats + '\n' + snapshot;
105
108
 
106
109
  // Step 7: Clean up
107
110
  await cdp.send('Target.closeTarget', { targetId: page.targetId });
@@ -117,7 +120,7 @@ export async function browse(url, opts = {}) {
117
120
  * Connect to a browser for a long-lived interactive session.
118
121
  *
119
122
  * @param {object} [opts]
120
- * @param {'headless'|'headed'} [opts.mode='headless'] - Browser mode
123
+ * @param {'headless'|'headed'|'hybrid'} [opts.mode='headless'] - Browser mode
121
124
  * @param {number} [opts.port=9222] - CDP port for headed mode
122
125
  * @returns {Promise<object>} Page handle with goto, snapshot, close
123
126
  */
@@ -135,7 +138,7 @@ export async function connect(opts = {}) {
135
138
  cdp = await createCDP(browser.wsUrl);
136
139
  }
137
140
 
138
- const page = await createPage(cdp, mode !== 'headed', { viewport: opts.viewport });
141
+ let page = await createPage(cdp, mode !== 'headed', { viewport: opts.viewport });
139
142
  let refMap = new Map();
140
143
 
141
144
  // Suppress permission prompts for all modes
@@ -154,17 +157,20 @@ export async function connect(opts = {}) {
154
157
 
155
158
  // Auto-dismiss JS dialogs (alert, confirm, prompt)
156
159
  const dialogLog = [];
157
- page.session.on('Page.javascriptDialogOpening', async (params) => {
158
- dialogLog.push({
159
- type: params.type,
160
- message: params.message,
161
- timestamp: new Date().toISOString(),
162
- });
163
- await page.session.send('Page.handleJavaScriptDialog', {
164
- accept: params.type !== 'beforeunload',
165
- promptText: params.defaultPrompt || '',
160
+ function setupDialogHandler(session) {
161
+ session.on('Page.javascriptDialogOpening', async (params) => {
162
+ dialogLog.push({
163
+ type: params.type,
164
+ message: params.message,
165
+ timestamp: new Date().toISOString(),
166
+ });
167
+ await session.send('Page.handleJavaScriptDialog', {
168
+ accept: params.type !== 'beforeunload',
169
+ promptText: params.defaultPrompt || '',
170
+ });
166
171
  });
167
- });
172
+ }
173
+ setupDialogHandler(page.session);
168
174
 
169
175
  return {
170
176
  async goto(url, timeout = 30000) {
@@ -172,6 +178,25 @@ export async function connect(opts = {}) {
172
178
  if (opts.consent !== false) {
173
179
  await dismissConsent(page.session);
174
180
  }
181
+
182
+ // Hybrid fallback: if bot-blocked, retry with headed browser
183
+ if (mode === 'hybrid') {
184
+ const { tree } = await ariaTree(page);
185
+ if (isChallengePage(tree)) {
186
+ await cdp.send('Target.closeTarget', { targetId: page.targetId });
187
+ cdp.close();
188
+ if (browser) { browser.process.kill(); browser = null; }
189
+
190
+ const port = opts.port || 9222;
191
+ const wsUrl = await getDebugUrl(port);
192
+ cdp = await createCDP(wsUrl);
193
+ page = await createPage(cdp, false, { viewport: opts.viewport });
194
+ setupDialogHandler(page.session);
195
+ await suppressPermissions(cdp);
196
+ await navigate(page, url, timeout);
197
+ if (opts.consent !== false) await dismissConsent(page.session);
198
+ }
199
+ }
175
200
  },
176
201
 
177
202
  async goBack() {
@@ -195,9 +220,12 @@ export async function connect(opts = {}) {
195
220
  async snapshot(pruneOpts) {
196
221
  const result = await ariaTree(page);
197
222
  refMap = result.refMap;
198
- if (pruneOpts === false) return formatTree(result.tree);
223
+ const raw = formatTree(result.tree);
224
+ if (pruneOpts === false) return raw;
199
225
  const pruned = pruneTree(result.tree, { mode: pruneOpts?.mode || 'act' });
200
- return formatTree(pruned);
226
+ const out = formatTree(pruned);
227
+ const stats = `# ${raw.length.toLocaleString()} chars → ${out.length.toLocaleString()} chars (${Math.round((1 - out.length / raw.length) * 100)}% pruned)`;
228
+ return stats + '\n' + out;
201
229
  },
202
230
 
203
231
  async click(ref) {
@@ -541,7 +569,9 @@ function isChallengePage(tree) {
541
569
  'checking your browser',
542
570
  'please wait',
543
571
  'verify you are human',
572
+ 'prove your humanity',
544
573
  'attention required',
574
+ 'file a ticket',
545
575
  ];
546
576
  const lower = text.toLowerCase();
547
577
  return challengePhrases.some((p) => lower.includes(p));