barebrowse 0.4.0 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.idea/barebrowse.iml +10 -0
- package/.idea/inspectionProfiles/profiles_settings.xml +6 -0
- package/.idea/misc.xml +4 -0
- package/.idea/modules.xml +8 -0
- package/.idea/vcs.xml +6 -0
- package/CHANGELOG.md +34 -0
- package/README.md +7 -7
- package/barebrowse.context.md +17 -9
- package/cli.js +2 -2
- package/commands/barebrowse.md +134 -0
- package/docs/00-context/system-state.md +20 -14
- package/docs/03-logs/validation-log.md +146 -0
- package/docs/04-process/testing.md +21 -0
- package/docs/README.md +2 -1
- package/docs/skill-template.md +3 -3
- package/mcp-server.js +5 -4
- package/package.json +1 -1
- package/src/auth.js +17 -7
- package/src/index.js +45 -15
- /package/{.claude/skills → commands}/barebrowse/SKILL.md +0 -0
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
+
<module type="PYTHON_MODULE" version="4">
|
|
3
|
+
<component name="NewModuleRootManager">
|
|
4
|
+
<content url="file://$MODULE_DIR$">
|
|
5
|
+
<sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
|
|
6
|
+
</content>
|
|
7
|
+
<orderEntry type="jdk" jdkName="Python 3.14" jdkType="Python SDK" />
|
|
8
|
+
<orderEntry type="sourceFolder" forTests="false" />
|
|
9
|
+
</component>
|
|
10
|
+
</module>
|
package/.idea/misc.xml
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
+
<project version="4">
|
|
3
|
+
<component name="ProjectModuleManager">
|
|
4
|
+
<modules>
|
|
5
|
+
<module fileurl="file://$PROJECT_DIR$/.idea/barebrowse.iml" filepath="$PROJECT_DIR$/.idea/barebrowse.iml" />
|
|
6
|
+
</modules>
|
|
7
|
+
</component>
|
|
8
|
+
</project>
|
package/.idea/vcs.xml
ADDED
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,39 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.4.2
|
|
4
|
+
|
|
5
|
+
Authenticated browsing improvements. MCP sessions now auto-inject cookies and fall back to headed mode when bot-detected.
|
|
6
|
+
|
|
7
|
+
### MCP server
|
|
8
|
+
- Session uses `mode: 'hybrid'` — headless by default, automatic headed fallback on challenge pages
|
|
9
|
+
- `goto` tool now injects cookies from user's browsers before navigation (Chromium + Firefox merged)
|
|
10
|
+
- Tool descriptions updated with trigger words for better agent tool selection
|
|
11
|
+
|
|
12
|
+
### Cookie extraction (`src/auth.js`)
|
|
13
|
+
- `extractCookies()` auto mode merges all browsers (Chromium + Firefox, last-write-wins by `name@domain`)
|
|
14
|
+
- `authenticate()` strips subdomains (`mail.google.com` → `google.com`) so parent-domain cookies are included
|
|
15
|
+
|
|
16
|
+
### Challenge detection (`src/index.js`)
|
|
17
|
+
- `isChallengePage()` detects Reddit block pages ("prove your humanity", "file a ticket")
|
|
18
|
+
- `connect()` hybrid fallback triggers on `goto()` when challenge detected
|
|
19
|
+
|
|
20
|
+
### Skill files
|
|
21
|
+
- New: `commands/barebrowse.md` — CLI command reference for non-Claude agents (same as SKILL.md)
|
|
22
|
+
- Moved: `SKILL.md` from `.claude/skills/barebrowse/` to `commands/barebrowse/SKILL.md`
|
|
23
|
+
- `install --skill` reads from new `commands/` path
|
|
24
|
+
|
|
25
|
+
### Docs
|
|
26
|
+
- README: MCP tool count 7→12, bareagent tools 9→13, skill install paths updated
|
|
27
|
+
- barebrowse.context.md: v0.4.2, hybrid for connect(), MCP cookie injection
|
|
28
|
+
- docs/00-context/system-state.md: bareagent 13 tools, CLI 27 commands, file map updated, published to npm
|
|
29
|
+
- docs/03-logs/validation-log.md: full MCP validation results (Gmail, YouTube, LinkedIn, Reddit, Amazon, GitHub)
|
|
30
|
+
|
|
31
|
+
## 0.4.1
|
|
32
|
+
|
|
33
|
+
- Docs: testing guide updated with v0.4.0 manual validation table
|
|
34
|
+
- Docs: barebrowse.context.md — CLI examples expanded, open flags listed, MCP tool count 7→12
|
|
35
|
+
- Docs: validation-log.md — full manual test results for all 10 new features
|
|
36
|
+
|
|
3
37
|
## 0.4.0
|
|
4
38
|
|
|
5
39
|
10 new features inspired by Playwright MCP. All validated manually against live sites.
|
package/README.md
CHANGED
|
@@ -45,9 +45,9 @@ Outputs go to `.barebrowse/` as files -- agents read them with their file tools,
|
|
|
45
45
|
|
|
46
46
|
**Teach your agent the commands** by installing the skill file (a markdown reference the agent reads as context). The CLI tool itself still needs `npm install barebrowse` -- the skill just teaches the agent how to use it.
|
|
47
47
|
|
|
48
|
-
**Claude Code:** `.claude/skills/barebrowse
|
|
48
|
+
**Claude Code:** Copy `commands/barebrowse/SKILL.md` to `.claude/skills/barebrowse/SKILL.md` (project) or run `barebrowse install --skill` (global).
|
|
49
49
|
|
|
50
|
-
**Other agents:**
|
|
50
|
+
**Other agents:** Copy `commands/barebrowse.md` to your agent's command/skill directory.
|
|
51
51
|
|
|
52
52
|
For writing your own skill files for other CLI tools: [docs/skill-template.md](docs/skill-template.md).
|
|
53
53
|
|
|
@@ -87,11 +87,11 @@ Or manually add to your config (`claude_desktop_config.json`, `.cursor/mcp.json`
|
|
|
87
87
|
}
|
|
88
88
|
```
|
|
89
89
|
|
|
90
|
-
|
|
90
|
+
12 tools: `browse`, `goto`, `snapshot`, `click`, `type`, `press`, `scroll`, `back`, `forward`, `drag`, `upload`, `pdf`. Session runs in hybrid mode with automatic cookie injection.
|
|
91
91
|
|
|
92
92
|
### 3. Library -- for agentic automation
|
|
93
93
|
|
|
94
|
-
Import barebrowse in your agent code. One-shot reads, interactive sessions, full observe-think-act loops. Works with any LLM orchestration library. Ships with a ready-made adapter for [bareagent](https://www.npmjs.com/package/bare-agent) (
|
|
94
|
+
Import barebrowse in your agent code. One-shot reads, interactive sessions, full observe-think-act loops. Works with any LLM orchestration library. Ships with a ready-made adapter for [bareagent](https://www.npmjs.com/package/bare-agent) (13 tools, auto-snapshot after every action).
|
|
95
95
|
|
|
96
96
|
For code examples, API reference, and wiring instructions, see **[barebrowse.context.md](barebrowse.context.md)** -- the full integration guide.
|
|
97
97
|
|
|
@@ -115,13 +115,13 @@ This is the obstacle course your agent doesn't have to think about:
|
|
|
115
115
|
| **Consent behind iframe overlay** | JS click via DOM.resolveNode bypasses z-index/overlay issues | Both |
|
|
116
116
|
| **Permission prompts** (location, camera, mic) | Launch flags + CDP Browser.setPermission auto-deny | Both |
|
|
117
117
|
| **Media autoplay blocked** | Autoplay policy flag on launch | Both |
|
|
118
|
-
| **Login walls** | Cookie extraction from Firefox
|
|
118
|
+
| **Login walls** | Cookie extraction from all browsers (Firefox + Chromium merged), injected via CDP | Both |
|
|
119
119
|
| **Pre-filled form inputs** | Select-all + delete before typing | Both |
|
|
120
120
|
| **Off-screen elements** | Scrolled into view before every click | Both |
|
|
121
121
|
| **Form submission** | Enter key triggers onsubmit | Both |
|
|
122
122
|
| **Tab between fields** | Tab key moves focus correctly | Both |
|
|
123
123
|
| **SPA navigation** (YouTube, GitHub) | SPA-aware wait: frameNavigated + loadEventFired | Both |
|
|
124
|
-
| **Bot detection** (Google, Reddit) | Stealth patches (headless) + headed fallback with real cookies |
|
|
124
|
+
| **Bot detection** (Google, Reddit) | Stealth patches (headless) + automatic headed fallback with real cookies | Hybrid |
|
|
125
125
|
| **navigator.webdriver leak** | Patched before page scripts run: webdriver, plugins, languages, chrome object | Headless |
|
|
126
126
|
| **JS dialogs** (alert/confirm/prompt) | Auto-dismiss via CDP, logged for inspection | Both |
|
|
127
127
|
| **Profile locking** | Unique temp dir per headless instance | Headless |
|
|
@@ -148,7 +148,7 @@ Everything the agent can do through barebrowse:
|
|
|
148
148
|
|--------|-------------|
|
|
149
149
|
| **Navigate** | Load a URL, wait for page load, auto-dismiss consent |
|
|
150
150
|
| **Back / Forward** | Browser history navigation |
|
|
151
|
-
| **Snapshot** | Pruned ARIA tree with `[ref=N]` markers (40-90% token reduction
|
|
151
|
+
| **Snapshot** | Pruned ARIA tree with `[ref=N]` markers. Two modes: `act` (buttons, links, inputs) and `read` (full text). 40-90% token reduction. |
|
|
152
152
|
| **Click** | Scroll into view + mouse click at element center |
|
|
153
153
|
| **Type** | Focus + insert text, with option to clear existing content first |
|
|
154
154
|
| **Press** | Special keys: Enter, Tab, Escape, Backspace, Delete, arrows, Space |
|
package/barebrowse.context.md
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# barebrowse -- Integration Guide
|
|
2
2
|
|
|
3
3
|
> For AI assistants and developers wiring barebrowse into a project.
|
|
4
|
-
> v0.
|
|
4
|
+
> v0.4.2 | Node.js >= 22 | 0 required deps | MIT
|
|
5
5
|
|
|
6
6
|
## What this is
|
|
7
7
|
|
|
@@ -51,7 +51,7 @@ const snapshot = await browse('https://example.com', {
|
|
|
51
51
|
|
|
52
52
|
## connect() API
|
|
53
53
|
|
|
54
|
-
`connect(opts)` returns a page handle for interactive sessions. Same opts as `browse()` for mode/port.
|
|
54
|
+
`connect(opts)` returns a page handle for interactive sessions. Same opts as `browse()` for mode/port. Supports `hybrid` mode — starts headless, falls back to headed on bot detection (same as `browse()`).
|
|
55
55
|
|
|
56
56
|
| Method | Args | Returns | Notes |
|
|
57
57
|
|---|---|---|---|
|
|
@@ -185,7 +185,7 @@ try {
|
|
|
185
185
|
```
|
|
186
186
|
|
|
187
187
|
`createBrowseTools(opts)` returns:
|
|
188
|
-
- `tools` -- array of bareagent-compatible tool objects (browse, goto, snapshot, click, type, press, scroll, select, screenshot)
|
|
188
|
+
- `tools` -- array of bareagent-compatible tool objects (browse, goto, snapshot, click, type, press, scroll, select, back, forward, drag, upload, screenshot)
|
|
189
189
|
- `close()` -- cleanup function, call when done
|
|
190
190
|
|
|
191
191
|
Action tools (click, type, press, scroll, goto) auto-return a fresh snapshot so the LLM always sees the result. 300ms settle delay after actions for DOM updates.
|
|
@@ -199,14 +199,20 @@ barebrowse open https://example.com # Start daemon + navigate
|
|
|
199
199
|
barebrowse snapshot # → .barebrowse/page-<timestamp>.yml
|
|
200
200
|
barebrowse click 8 # Click element ref=8
|
|
201
201
|
barebrowse type 12 hello world # Type into element ref=12
|
|
202
|
-
barebrowse
|
|
203
|
-
barebrowse
|
|
202
|
+
barebrowse back # Go back in history
|
|
203
|
+
barebrowse upload 7 /path/to/file.pdf # Upload file to file input
|
|
204
|
+
barebrowse pdf # → .barebrowse/page-<timestamp>.pdf
|
|
205
|
+
barebrowse wait-for --text="Success" # Wait for content to appear
|
|
206
|
+
barebrowse tabs # List open tabs
|
|
207
|
+
barebrowse save-state # → .barebrowse/state-<timestamp>.json
|
|
204
208
|
barebrowse close # Kill daemon + browser
|
|
205
209
|
```
|
|
206
210
|
|
|
207
|
-
|
|
211
|
+
**Open flags:** `--mode=headless|headed|hybrid`, `--proxy=URL`, `--viewport=WxH`, `--storage-state=FILE`, `--no-cookies`, `--browser=firefox|chromium`, `--timeout=N`
|
|
208
212
|
|
|
209
|
-
|
|
213
|
+
Session lifecycle: `open` spawns a background daemon holding a `connect()` session. Subsequent commands POST to the daemon over HTTP (localhost). `close` shuts everything down. JS dialogs (alert/confirm/prompt) are auto-dismissed and logged.
|
|
214
|
+
|
|
215
|
+
Full command reference: `commands/barebrowse/SKILL.md` (Claude Code) or `commands/barebrowse.md` (other agents)
|
|
210
216
|
|
|
211
217
|
## MCP wrapper
|
|
212
218
|
|
|
@@ -228,11 +234,13 @@ barebrowse ships an MCP server for direct use with Claude Desktop, Cursor, or an
|
|
|
228
234
|
}
|
|
229
235
|
```
|
|
230
236
|
|
|
231
|
-
|
|
237
|
+
12 tools exposed: `browse` (one-shot), `goto`, `snapshot`, `click`, `type`, `press`, `scroll`, `back`, `forward`, `drag`, `upload`, `pdf`.
|
|
232
238
|
|
|
233
239
|
Action tools return `'ok'` -- the agent calls `snapshot` explicitly to observe. This avoids double-token output since MCP tool calls are cheap to chain.
|
|
234
240
|
|
|
235
|
-
Session
|
|
241
|
+
Session runs in hybrid mode (headless with automatic headed fallback on bot detection). `goto` injects cookies from the user's browser before navigation for authenticated access.
|
|
242
|
+
|
|
243
|
+
Session tools share a singleton page, lazy-created on first use.
|
|
236
244
|
|
|
237
245
|
## Architecture
|
|
238
246
|
|
package/cli.js
CHANGED
|
@@ -286,7 +286,7 @@ function install() {
|
|
|
286
286
|
|
|
287
287
|
function installSkill() {
|
|
288
288
|
const thisDir = fileURLToPath(new URL('.', import.meta.url));
|
|
289
|
-
const src = join(thisDir, '
|
|
289
|
+
const src = join(thisDir, 'commands', 'barebrowse', 'SKILL.md');
|
|
290
290
|
|
|
291
291
|
if (!existsSync(src)) {
|
|
292
292
|
console.error('SKILL.md not found in package. Reinstall barebrowse.');
|
|
@@ -408,6 +408,6 @@ MCP:
|
|
|
408
408
|
As a library:
|
|
409
409
|
import { browse, connect } from 'barebrowse';
|
|
410
410
|
|
|
411
|
-
More: see README.md or barebrowse.
|
|
411
|
+
More: see README.md or commands/barebrowse.md
|
|
412
412
|
`);
|
|
413
413
|
}
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: barebrowse
|
|
3
|
+
description: Browser automation using the user's real browser with real cookies. Handles consent walls, login sessions, and bot detection automatically.
|
|
4
|
+
allowed-tools: Bash(barebrowse:*)
|
|
5
|
+
---
|
|
6
|
+
# barebrowse CLI — Browser Automation for Agents
|
|
7
|
+
|
|
8
|
+
Browse any URL using the user's real browser with real cookies. Returns pruned ARIA snapshots (40-90% smaller than raw) with `[ref=N]` markers for interaction. Handles cookie consent, login sessions, JS dialogs, and bot detection automatically.
|
|
9
|
+
|
|
10
|
+
## Quick Start
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
barebrowse open https://example.com # Start session + navigate
|
|
14
|
+
barebrowse snapshot # Get ARIA snapshot → .barebrowse/page-*.yml
|
|
15
|
+
barebrowse click 8 # Click element with ref=8
|
|
16
|
+
barebrowse snapshot # See result
|
|
17
|
+
barebrowse close # End session
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
All output files go to `.barebrowse/` in the current directory. Read them with the Read tool when needed.
|
|
21
|
+
|
|
22
|
+
## Commands
|
|
23
|
+
|
|
24
|
+
### Session Lifecycle
|
|
25
|
+
|
|
26
|
+
| Command | Description |
|
|
27
|
+
|---------|-------------|
|
|
28
|
+
| `barebrowse open [url] [flags]` | Start browser session. Optionally navigate to URL. |
|
|
29
|
+
| `barebrowse close` | Close session and kill browser. |
|
|
30
|
+
| `barebrowse status` | Check if session is running. |
|
|
31
|
+
|
|
32
|
+
**Open flags:**
|
|
33
|
+
- `--mode=headless|headed|hybrid` — Browser mode (default: headless)
|
|
34
|
+
- `--no-cookies` — Skip cookie injection
|
|
35
|
+
- `--browser=firefox|chromium` — Cookie source
|
|
36
|
+
- `--prune-mode=act|read` — Default pruning mode
|
|
37
|
+
- `--timeout=N` — Navigation timeout in ms
|
|
38
|
+
- `--proxy=URL` — HTTP/SOCKS proxy server
|
|
39
|
+
- `--viewport=WxH` — Viewport size (e.g. 1280x720)
|
|
40
|
+
- `--storage-state=FILE` — Load cookies/localStorage from JSON file
|
|
41
|
+
|
|
42
|
+
### Navigation
|
|
43
|
+
|
|
44
|
+
| Command | Output |
|
|
45
|
+
|---------|--------|
|
|
46
|
+
| `barebrowse goto <url>` | Navigates, waits for load, dismisses consent. Prints "ok". |
|
|
47
|
+
| `barebrowse back` | Go back in browser history. |
|
|
48
|
+
| `barebrowse forward` | Go forward in browser history. |
|
|
49
|
+
| `barebrowse snapshot` | ARIA snapshot → `.barebrowse/page-<timestamp>.yml` |
|
|
50
|
+
| `barebrowse snapshot --mode=read` | Read mode: keeps all text (for content extraction) |
|
|
51
|
+
| `barebrowse screenshot` | Screenshot → `.barebrowse/screenshot-<timestamp>.png` |
|
|
52
|
+
| `barebrowse pdf [--landscape]` | PDF export → `.barebrowse/page-<timestamp>.pdf` |
|
|
53
|
+
|
|
54
|
+
### Interaction
|
|
55
|
+
|
|
56
|
+
| Command | Description |
|
|
57
|
+
|---------|-------------|
|
|
58
|
+
| `barebrowse click <ref>` | Click element (scrolls into view first) |
|
|
59
|
+
| `barebrowse type <ref> <text>` | Type text into element |
|
|
60
|
+
| `barebrowse fill <ref> <text>` | Clear existing content + type new text |
|
|
61
|
+
| `barebrowse press <key>` | Press key: Enter, Tab, Escape, Backspace, Delete, arrows, Space |
|
|
62
|
+
| `barebrowse scroll <deltaY>` | Scroll page (positive=down, negative=up) |
|
|
63
|
+
| `barebrowse hover <ref>` | Hover over element (triggers tooltips) |
|
|
64
|
+
| `barebrowse select <ref> <value>` | Select dropdown option |
|
|
65
|
+
| `barebrowse drag <fromRef> <toRef>` | Drag element to another element |
|
|
66
|
+
| `barebrowse upload <ref> <files..>` | Upload file(s) to a file input element |
|
|
67
|
+
|
|
68
|
+
### Tabs
|
|
69
|
+
|
|
70
|
+
| Command | Description |
|
|
71
|
+
|---------|-------------|
|
|
72
|
+
| `barebrowse tabs` | List open tabs (index, url, title) |
|
|
73
|
+
| `barebrowse tab <index>` | Switch to tab by index |
|
|
74
|
+
|
|
75
|
+
### Debugging
|
|
76
|
+
|
|
77
|
+
| Command | Output |
|
|
78
|
+
|---------|--------|
|
|
79
|
+
| `barebrowse eval <expression>` | Evaluate JS in page, print result |
|
|
80
|
+
| `barebrowse wait-idle` | Wait for network idle (no requests for 500ms) |
|
|
81
|
+
| `barebrowse wait-for [opts]` | Wait for content to appear on page |
|
|
82
|
+
| `barebrowse console-logs` | Console logs → `.barebrowse/console-<timestamp>.json` |
|
|
83
|
+
| `barebrowse network-log` | Network log → `.barebrowse/network-<timestamp>.json` |
|
|
84
|
+
| `barebrowse network-log --failed` | Only failed/4xx/5xx requests |
|
|
85
|
+
| `barebrowse dialog-log` | JS dialog log → `.barebrowse/dialogs-<timestamp>.json` |
|
|
86
|
+
| `barebrowse save-state` | Cookies + localStorage → `.barebrowse/state-<timestamp>.json` |
|
|
87
|
+
|
|
88
|
+
**wait-for flags:**
|
|
89
|
+
- `--text=STRING` — Wait for text to appear in page body
|
|
90
|
+
- `--selector=CSS` — Wait for CSS selector to match
|
|
91
|
+
- `--timeout=N` — Max wait time in ms (default: 30000)
|
|
92
|
+
|
|
93
|
+
## Snapshot Format
|
|
94
|
+
|
|
95
|
+
The snapshot is a YAML-like ARIA tree. Each line is one node:
|
|
96
|
+
|
|
97
|
+
```
|
|
98
|
+
- WebArea "Example Domain" [ref=1]
|
|
99
|
+
- heading "Example Domain" [level=1] [ref=3]
|
|
100
|
+
- paragraph [ref=5]
|
|
101
|
+
- StaticText "This domain is for use in illustrative examples." [ref=6]
|
|
102
|
+
- link "More information..." [ref=8]
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
- `[ref=N]` — Use this number with click, type, fill, hover, select, drag, upload
|
|
106
|
+
- Refs change on every snapshot — always take a fresh snapshot before interacting
|
|
107
|
+
- **act mode** (default): interactive elements + labels — for clicking, typing, navigating
|
|
108
|
+
- **read mode**: all text content — for reading articles, extracting data
|
|
109
|
+
|
|
110
|
+
## Workflow Pattern
|
|
111
|
+
|
|
112
|
+
1. `barebrowse open <url>` — start session
|
|
113
|
+
2. `barebrowse snapshot` — observe page (read the .yml file)
|
|
114
|
+
3. Decide action based on snapshot content
|
|
115
|
+
4. `barebrowse click/type/fill/press/scroll/drag/upload <ref>` — act
|
|
116
|
+
5. `barebrowse snapshot` — observe result (refs are now different!)
|
|
117
|
+
6. Repeat 3-5 until goal achieved
|
|
118
|
+
7. `barebrowse close` — clean up
|
|
119
|
+
|
|
120
|
+
## Tips
|
|
121
|
+
|
|
122
|
+
- **Always snapshot before interacting** — refs are ephemeral and change every time
|
|
123
|
+
- **Use `fill` instead of `type`** when replacing existing text in input fields
|
|
124
|
+
- **Use `--mode=read`** for snapshot when you need to extract article content or data
|
|
125
|
+
- **Use `back`/`forward`** to navigate browser history instead of re-entering URLs
|
|
126
|
+
- **Use `upload`** for file inputs — pass absolute paths to the files
|
|
127
|
+
- **Use `wait-for`** when content loads asynchronously — more reliable than `wait-idle`
|
|
128
|
+
- **Check `dialog-log`** if JS alerts/confirms were auto-dismissed during your session
|
|
129
|
+
- **Use `save-state`** to persist cookies/localStorage for later sessions via `--storage-state`
|
|
130
|
+
- **Check `console-logs`** when page behavior seems wrong — JS errors show up there
|
|
131
|
+
- **Check `network-log --failed`** to debug missing content or broken API calls
|
|
132
|
+
- **Use `eval`** as an escape hatch when ARIA tree doesn't show what you need
|
|
133
|
+
- **One session per project** — `.barebrowse/` is project-scoped
|
|
134
|
+
- For bot-detected sites, use `--mode=headed` (requires browser with `--remote-debugging-port=9222`)
|
|
@@ -70,7 +70,7 @@ Every action returns a **pruned ARIA snapshot** -- the agent's view of the page
|
|
|
70
70
|
| **Consent behind iframe overlay** | JS `.click()` via `DOM.resolveNode` bypasses z-index/overlay issues | Both |
|
|
71
71
|
| **Permission prompts** (location, notifications, camera, mic) | Launch flags + CDP `Browser.setPermission` auto-deny | Both |
|
|
72
72
|
| **Media autoplay blocked** | `--autoplay-policy=no-user-gesture-required` | Both |
|
|
73
|
-
| **Login walls** |
|
|
73
|
+
| **Login walls** | All-browser cookie merge (Firefox + Chromium), CDP injection (user's real sessions) | Both |
|
|
74
74
|
| **Pre-filled form inputs** | `type({ clear: true })` selects all + deletes before typing | Both |
|
|
75
75
|
| **Off-screen elements** | `DOM.scrollIntoViewIfNeeded` before every click | Both |
|
|
76
76
|
| **Form submission** | `press('Enter')` with proper `text: '\r'` triggers onsubmit | Both |
|
|
@@ -86,8 +86,6 @@ Every action returns a **pruned ARIA snapshot** -- the agent's view of the page
|
|
|
86
86
|
|
|
87
87
|
| Obstacle | What's Needed | Difficulty |
|
|
88
88
|
|----------|--------------|------------|
|
|
89
|
-
| File upload | `Input.setFiles` via CDP | Low |
|
|
90
|
-
| Drag and drop | `Input.dispatchDragEvent` sequence | Medium |
|
|
91
89
|
| Infinite scroll | Scroll + wait for new content strategy | Medium |
|
|
92
90
|
| CAPTCHAs | Cannot solve -- headed mode lets user solve manually | N/A |
|
|
93
91
|
| Cross-origin iframes | Frame tree traversal via CDP | Medium |
|
|
@@ -251,9 +249,10 @@ Chrome permission prompts (location, notifications, camera, mic, etc.) are suppr
|
|
|
251
249
|
- No user prompt ever appears -- agents browse without interruption
|
|
252
250
|
|
|
253
251
|
### Cross-browser cookie injection -- done
|
|
254
|
-
|
|
255
|
-
- `browse()`: auto-injects cookies before navigation (opt-out with `{ cookies: false }`)
|
|
252
|
+
Auto mode merges cookies from all detected browsers (Chromium + Firefox, last-write-wins by name+domain). No need to use Chromium as daily browser.
|
|
253
|
+
- `browse()`: auto-injects merged cookies before navigation (opt-out with `{ cookies: false }`)
|
|
256
254
|
- `connect()`: manual injection via `page.injectCookies(url, { browser: 'firefox' })`
|
|
255
|
+
- MCP `goto`: auto-injects cookies before every navigation
|
|
257
256
|
- Proven: YouTube login session transferred from Firefox -> headed Chromium -> video playback
|
|
258
257
|
|
|
259
258
|
### Stealth patches -- done
|
|
@@ -292,7 +291,7 @@ const result = await loop.run(messages, tools);
|
|
|
292
291
|
await close();
|
|
293
292
|
```
|
|
294
293
|
|
|
295
|
-
|
|
294
|
+
13 tools: browse, goto, snapshot, click, type, press, scroll, select, back, forward, drag, upload, screenshot.
|
|
296
295
|
Action tools auto-return snapshot (300ms settle delay). The LLM always sees the result.
|
|
297
296
|
|
|
298
297
|
### MCP server
|
|
@@ -310,8 +309,9 @@ Raw JSON-RPC 2.0 over stdio. Zero SDK dependencies. `npm install barebrowse` the
|
|
|
310
309
|
}
|
|
311
310
|
```
|
|
312
311
|
|
|
313
|
-
|
|
312
|
+
12 tools: browse (one-shot), goto, snapshot, click, type, press, scroll, back, forward, drag, upload, pdf.
|
|
314
313
|
Action tools return `'ok'` -- agent calls `snapshot` explicitly (MCP tool calls are cheap to chain).
|
|
314
|
+
Session runs in hybrid mode (headless + automatic headed fallback on bot detection). `goto` injects cookies from the user's browser before navigation.
|
|
315
315
|
Session tools share a singleton page, lazy-created on first use.
|
|
316
316
|
|
|
317
317
|
### CLI session -- for coding agents + human devs
|
|
@@ -328,11 +328,11 @@ barebrowse close # Kill daemon + browser
|
|
|
328
328
|
|
|
329
329
|
Architecture: `open` spawns a detached child process running an HTTP server on a random localhost port. Session state stored in `.barebrowse/session.json`. Subsequent commands POST to the daemon. `close` sends shutdown, daemon calls `page.close()` + `process.exit(0)`.
|
|
330
330
|
|
|
331
|
-
Full commands: open, close, status, goto, snapshot, screenshot, click, type, fill, press, scroll, hover, select, eval, wait-idle, console-logs, network-log.
|
|
331
|
+
Full commands: open, close, status, goto, back, forward, snapshot, screenshot, pdf, click, type, fill, press, scroll, hover, select, drag, upload, tabs, tab, eval, wait-idle, wait-for, console-logs, network-log, dialog-log, save-state.
|
|
332
332
|
|
|
333
333
|
Self-sufficiency features (console/network capture, eval) let agents debug without guessing -- they see JS errors and failed requests directly.
|
|
334
334
|
|
|
335
|
-
SKILL.md (
|
|
335
|
+
SKILL.md (`commands/barebrowse/SKILL.md`) teaches Claude Code the CLI commands. Install with `barebrowse install --skill`.
|
|
336
336
|
|
|
337
337
|
---
|
|
338
338
|
|
|
@@ -353,7 +353,7 @@ barebrowse = the eyes + hands (browse, read, interact with the web)
|
|
|
353
353
|
- **Linux first.** Tested on Fedora/KDE. macOS/Windows cookie extraction paths exist in auth.js but untested.
|
|
354
354
|
- **Node >= 22.** Built-in WebSocket, built-in SQLite.
|
|
355
355
|
- **Not a server.** Library that agents import. Wrap as MCP (included) or HTTP if needed.
|
|
356
|
-
- **Not cross-platform tested.**
|
|
356
|
+
- **Not cross-platform tested.** Tested on Linux only. Published to npm as `barebrowse`.
|
|
357
357
|
|
|
358
358
|
---
|
|
359
359
|
|
|
@@ -381,14 +381,20 @@ barebrowse/
|
|
|
381
381
|
│ ├── headed-demo.js # Interactive demo: Wikipedia → DuckDuckGo
|
|
382
382
|
│ └── yt-demo.js # YouTube demo: Firefox cookies → search → play video
|
|
383
383
|
├── docs/
|
|
384
|
-
│ ├──
|
|
385
|
-
│ ├──
|
|
386
|
-
│ ├──
|
|
387
|
-
│
|
|
384
|
+
│ ├── README.md # Documentation navigation guide
|
|
385
|
+
│ ├── 00-context/ # vision, assumptions, system-state (this file)
|
|
386
|
+
│ ├── 01-product/ # prd.md
|
|
387
|
+
│ ├── 03-logs/ # decisions, implementation, bugs, validation, insights
|
|
388
|
+
│ ├── 04-process/ # dev-workflow, definition-of-done, testing (64 tests)
|
|
389
|
+
│ └── archive/ # poc-plan.md
|
|
388
390
|
├── mcp-server.js # MCP server (JSON-RPC 2.0 over stdio)
|
|
389
391
|
├── cli.js # CLI entry: session commands, MCP, browse, install
|
|
390
392
|
├── .mcp.json # MCP server config for Claude Desktop / Cursor
|
|
391
393
|
├── barebrowse.context.md # LLM-consumable integration guide
|
|
394
|
+
├── commands/
|
|
395
|
+
│ ├── barebrowse.md # CLI command reference (any agent)
|
|
396
|
+
│ └── barebrowse/
|
|
397
|
+
│ └── SKILL.md # CLI command reference (Claude Code skill)
|
|
392
398
|
├── package.json
|
|
393
399
|
├── README.md
|
|
394
400
|
└── CLAUDE.md
|
|
@@ -120,4 +120,150 @@ Full end-to-end validation of every CLI command against real websites.
|
|
|
120
120
|
|
|
121
121
|
---
|
|
122
122
|
|
|
123
|
+
## New features manual validation (v0.4.0)
|
|
124
|
+
|
|
125
|
+
All tested against live sites via CLI session from `/tmp`.
|
|
126
|
+
|
|
127
|
+
### Navigation: back/forward
|
|
128
|
+
|
|
129
|
+
| Command | Result |
|
|
130
|
+
|---------|--------|
|
|
131
|
+
| `open https://example.com` | Session started |
|
|
132
|
+
| `goto https://wikipedia.org` | "ok" |
|
|
133
|
+
| `back` | "ok" — returned to example.com |
|
|
134
|
+
| `forward` | "ok" — returned to wikipedia.org |
|
|
135
|
+
|
|
136
|
+
### File upload
|
|
137
|
+
|
|
138
|
+
| Command | Result |
|
|
139
|
+
|---------|--------|
|
|
140
|
+
| `goto 'data:text/html,<input type="file" id="f"><script>...</script>'` | "ok" |
|
|
141
|
+
| `snapshot` | `button "Choose File" [ref=7]` |
|
|
142
|
+
| `upload 7 /tmp/test-upload.txt` | "ok" |
|
|
143
|
+
| `eval 'document.title'` | `"uploaded"` — onchange fired, confirmed working |
|
|
144
|
+
|
|
145
|
+
### PDF export
|
|
146
|
+
|
|
147
|
+
| Command | Result |
|
|
148
|
+
|---------|--------|
|
|
149
|
+
| (on wikipedia.org) `pdf` | `.barebrowse/page-*.pdf` — 200,716 bytes |
|
|
150
|
+
|
|
151
|
+
### Tabs
|
|
152
|
+
|
|
153
|
+
| Command | Result |
|
|
154
|
+
|---------|--------|
|
|
155
|
+
| `tabs` | `[{"index":0,"url":"https://www.wikipedia.org/","title":"Wikipedia",...}, {"index":1,"url":"about:blank",...}]` |
|
|
156
|
+
|
|
157
|
+
### Wait-for
|
|
158
|
+
|
|
159
|
+
| Command | Result |
|
|
160
|
+
|---------|--------|
|
|
161
|
+
| `wait-for --text=Wikipedia` | "ok" — found text immediately |
|
|
162
|
+
| `wait-for --selector=body` | "ok" — found selector immediately |
|
|
163
|
+
|
|
164
|
+
### JS dialog auto-dismiss
|
|
165
|
+
|
|
166
|
+
| Command | Result |
|
|
167
|
+
|---------|--------|
|
|
168
|
+
| `eval 'alert("hello from dialog"); "done"'` | `"done"` — alert auto-dismissed, eval continued |
|
|
169
|
+
| `dialog-log` | `.barebrowse/dialogs-*.json (1 entries)` — dialog logged with type, message, timestamp |
|
|
170
|
+
|
|
171
|
+
### Save state
|
|
172
|
+
|
|
173
|
+
| Command | Result |
|
|
174
|
+
|---------|--------|
|
|
175
|
+
| `save-state` | `.barebrowse/state-*.json` — 2,836 bytes (cookies + localStorage) |
|
|
176
|
+
|
|
177
|
+
### Viewport flag
|
|
178
|
+
|
|
179
|
+
| Command | Result |
|
|
180
|
+
|---------|--------|
|
|
181
|
+
| `open https://example.com --viewport=800x600` | Session started |
|
|
182
|
+
| `eval 'window.innerWidth + "x" + window.innerHeight'` | `"800x600"` — confirmed |
|
|
183
|
+
|
|
184
|
+
### Drag (wired, needs drag-and-drop UI for visual test)
|
|
185
|
+
|
|
186
|
+
Wired through interact.js → index.js → daemon.js → cli.js. Mouse event sequence: mousePressed at source → mouseMoved to midpoint → mouseMoved to target → mouseReleased at target. Requires a drag-and-drop UI to validate visually.
|
|
187
|
+
|
|
188
|
+
### Proxy flag
|
|
189
|
+
|
|
190
|
+
Wired through cli.js → daemon.js → chromium.js → `--proxy-server` Chromium launch arg. Requires a proxy server to validate.
|
|
191
|
+
|
|
192
|
+
### Storage-state flag
|
|
193
|
+
|
|
194
|
+
Wired through cli.js → daemon.js → connect() → `Network.setCookies` on startup. Loads from JSON file produced by `save-state`.
|
|
195
|
+
|
|
196
|
+
---
|
|
197
|
+
|
|
198
|
+
## MCP server validation (v0.4.1)
|
|
199
|
+
|
|
200
|
+
All 12 MCP tools tested live via Claude Code MCP integration. Stats line (`# X chars → Y chars (N% pruned)`) confirmed on every snapshot.
|
|
201
|
+
|
|
202
|
+
### Tools tested successfully (10/12)
|
|
203
|
+
|
|
204
|
+
| Tool | Test | Result |
|
|
205
|
+
|------|------|--------|
|
|
206
|
+
| `browse` | One-shot HN | `51,397 → 26,983 (48% pruned)` — stats line present |
|
|
207
|
+
| `goto` | DDG, Wikipedia, data: URLs | All navigated successfully |
|
|
208
|
+
| `snapshot` | Multiple pages | Stats line on every snapshot, pruning working |
|
|
209
|
+
| `click` | Wikipedia "About Wikipedia" link | Navigated to target page |
|
|
210
|
+
| `type` | DDG search box `barebrowse npm` | Text entered correctly |
|
|
211
|
+
| `press` | Enter to submit DDG search | Search submitted (CAPTCHA returned — expected headless) |
|
|
212
|
+
| `scroll` | 500px down on Wikipedia:About | Scrolled successfully |
|
|
213
|
+
| `back` | After Wikipedia:About → CDP page | Returned to previous page |
|
|
214
|
+
| `forward` | After back | Returned to Wikipedia:About |
|
|
215
|
+
| `pdf` | Wikipedia:About | 380K base64 PDF generated |
|
|
216
|
+
|
|
217
|
+
### Tools tested with known limitations (2/12)
|
|
218
|
+
|
|
219
|
+
| Tool | Test | Result |
|
|
220
|
+
|------|------|--------|
|
|
221
|
+
| `upload` | data: page with file input | `ok` returned, file set via DOM.setFileInputFiles. onchange fires but result text pruned in act mode (non-interactive content). Works in integration tests. |
|
|
222
|
+
| `drag` | data: page with draggable divs | Mouse events dispatched but HTML5 drag/drop dataTransfer not populated via CDP synthetic events. Known CDP limitation (same as Playwright). |
|
|
223
|
+
|
|
224
|
+
### Observations
|
|
225
|
+
|
|
226
|
+
- DDG returned CAPTCHA in headless ("Select all squares containing a duck") — expected, hybrid mode handles this
|
|
227
|
+
- Stats line format: `# 42,367 chars → 5,453 chars (87% pruned)` — present on all pruned snapshots
|
|
228
|
+
- Token reduction ranges observed: 37% (Wikipedia) to 88% (example.com)
|
|
229
|
+
|
|
230
|
+
---
|
|
231
|
+
|
|
232
|
+
## MCP cookies + hybrid fallback validation (v0.4.2)
|
|
233
|
+
|
|
234
|
+
Three changes tested: all-browser cookie merge in auth.js, hybrid mode for connect(), cookie injection + hybrid in MCP goto.
|
|
235
|
+
|
|
236
|
+
### Cookie injection — login-walled sites via MCP goto
|
|
237
|
+
|
|
238
|
+
| Site | Logged In? | Details |
|
|
239
|
+
|------|-----------|---------|
|
|
240
|
+
| **Gmail** | Yes | Full inbox visible: Compose, labels, 4 emails. Required domain-stripping fix (`mail.google.com` → `google.com`) to capture parent-domain cookies (SID, HSID, etc.). 47 cookies merged from Firefox + Chromium. |
|
|
241
|
+
| **YouTube** | Yes | Personalized feed: tabs for Linux, AI, Electrical Engineering. Recommendations include Claude Code videos, KDE Plasma. Account buttons visible. |
|
|
242
|
+
| **LinkedIn** | Yes | Full feed as Amr Hassan: Home, My Network, Jobs, Messaging, Notifications. Posts visible. Stealth patches + cookies bypassed LinkedIn's aggressive bot detection. |
|
|
243
|
+
| **Amazon.nl** | No (expected) | Not logged in but consent dismissed, search + product pages worked. Cookie injection had no effect (no Amazon session in Firefox). |
|
|
244
|
+
| **GitHub** | No | Shows generic homepage with "Sign in". No GitHub session cookies in Firefox. |
|
|
245
|
+
|
|
246
|
+
### Bot detection — hybrid fallback
|
|
247
|
+
|
|
248
|
+
| Site | Headless Result | Hybrid Fallback | Final Result |
|
|
249
|
+
|------|----------------|-----------------|--------------|
|
|
250
|
+
| **Google Search** | Full results, no CAPTCHA | Not triggered (stealth sufficient) | Pass — logged in as Amr Hassan |
|
|
251
|
+
| **Reddit** | "Prove your humanity" + reCAPTCHA | Triggered → connected to headed Chromium on 9222 | Pass — full feed with posts, logged in |
|
|
252
|
+
| **LinkedIn** | Loaded fine with stealth + cookies | Not triggered | Pass |
|
|
253
|
+
|
|
254
|
+
### Bug fixes discovered during validation
|
|
255
|
+
|
|
256
|
+
1. **Domain stripping in authenticate()**: `mail.google.com` extracted only 9 cookies (subdomain-specific). Fix: strip to registrable domain (`google.com`) → 47 cookies including all auth cookies (SID, HSID, SSID, APISID, SAPISID).
|
|
257
|
+
2. **Reddit challenge detection**: Block page shows "Prove your humanity" and "File a ticket" — neither matched existing challenge phrases. Added both to `isChallengePage()`.
|
|
258
|
+
|
|
259
|
+
### connect() hybrid mode
|
|
260
|
+
|
|
261
|
+
Tested `connect({ mode: 'hybrid' })` with Reddit: headless detected challenge → killed browser → connected to headed Chromium → Reddit loaded with full content. Same code path as MCP session.
|
|
262
|
+
|
|
263
|
+
### All-browser cookie merge
|
|
264
|
+
|
|
265
|
+
`extractCookies({ domain: 'google.com' })` in auto mode: Chromium cookies merged first, then Firefox cookies (last-write-wins by `name@domain`). 47 cookies total for google.com. Previous behavior: stopped at first browser found (Chromium only, missed Firefox session).
|
|
266
|
+
|
|
267
|
+
---
|
|
268
|
+
|
|
123
269
|
*Add new validation entries when testing against new sites or features.*
|
|
@@ -152,6 +152,27 @@ Tests real interactions: clicking, typing, scrolling, form submission, and navig
|
|
|
152
152
|
|
|
153
153
|
---
|
|
154
154
|
|
|
155
|
+
## Manual validation (v0.4.0 features)
|
|
156
|
+
|
|
157
|
+
Features added in v0.4.0 are manually validated but not yet in the automated test suite. See `docs/03-logs/validation-log.md` for full results.
|
|
158
|
+
|
|
159
|
+
| Feature | Validation method | Result |
|
|
160
|
+
|---------|-------------------|--------|
|
|
161
|
+
| `back` / `forward` | example.com → wikipedia → back → forward | ok |
|
|
162
|
+
| `upload <ref> <files..>` | data: URL with file input, verified onchange fired | ok |
|
|
163
|
+
| `pdf` | Wikipedia export, 200KB PDF | ok |
|
|
164
|
+
| `tabs` | Listed 2 tabs with urls/titles | ok |
|
|
165
|
+
| `wait-for --text` | Found "Wikipedia" text | ok |
|
|
166
|
+
| `wait-for --selector` | Found `body` selector | ok |
|
|
167
|
+
| `dialog-log` | alert() auto-dismissed, 1 entry logged | ok |
|
|
168
|
+
| `save-state` | 2.8KB cookies + localStorage JSON | ok |
|
|
169
|
+
| `--viewport=WxH` | 800x600, confirmed via innerWidth/innerHeight | ok |
|
|
170
|
+
| `drag` | Wired through all layers, needs drag UI to visually test |
|
|
171
|
+
| `--proxy` | Wired to Chromium launch arg, needs proxy to test |
|
|
172
|
+
| `--storage-state` | Wired to Network.setCookies, loads from save-state output |
|
|
173
|
+
|
|
174
|
+
---
|
|
175
|
+
|
|
155
176
|
## Writing new tests
|
|
156
177
|
|
|
157
178
|
Follow the existing pattern:
|
package/docs/README.md
CHANGED
|
@@ -50,6 +50,7 @@
|
|
|
50
50
|
|------|---------|
|
|
51
51
|
| `README.md` | Public-facing project overview |
|
|
52
52
|
| `barebrowse.context.md` | LLM-consumable integration guide (full API, gotchas, wiring) |
|
|
53
|
-
|
|
|
53
|
+
| `commands/barebrowse.md` | CLI command reference for any agent (same as SKILL.md without frontmatter) |
|
|
54
|
+
| `commands/barebrowse/SKILL.md` | CLI command reference for Claude Code (copy to `.claude/skills/`) |
|
|
54
55
|
| `CHANGELOG.md` | Detailed version-by-version changelog |
|
|
55
56
|
| `CLAUDE.md` | AI agent instructions for this project |
|
package/docs/skill-template.md
CHANGED
|
@@ -92,15 +92,15 @@ Both require the underlying tool to be installed. Choose based on your agent's c
|
|
|
92
92
|
|
|
93
93
|
## Example: barebrowse
|
|
94
94
|
|
|
95
|
-
barebrowse ships its skill at
|
|
95
|
+
barebrowse ships its skill at `commands/barebrowse/SKILL.md`:
|
|
96
96
|
|
|
97
97
|
```bash
|
|
98
98
|
# Claude Code — project
|
|
99
|
-
cp node_modules/barebrowse
|
|
99
|
+
cp node_modules/barebrowse/commands/barebrowse/SKILL.md .claude/skills/barebrowse/SKILL.md
|
|
100
100
|
|
|
101
101
|
# Claude Code — global
|
|
102
102
|
barebrowse install --skill
|
|
103
103
|
|
|
104
104
|
# Other agents — project or global
|
|
105
|
-
cp node_modules/barebrowse
|
|
105
|
+
cp node_modules/barebrowse/commands/barebrowse/SKILL.md .barebrowse/commands/SKILL.md
|
|
106
106
|
```
|
package/mcp-server.js
CHANGED
|
@@ -14,14 +14,14 @@ import { browse, connect } from './src/index.js';
|
|
|
14
14
|
let _page = null;
|
|
15
15
|
|
|
16
16
|
async function getPage() {
|
|
17
|
-
if (!_page) _page = await connect();
|
|
17
|
+
if (!_page) _page = await connect({ mode: 'hybrid' });
|
|
18
18
|
return _page;
|
|
19
19
|
}
|
|
20
20
|
|
|
21
21
|
const TOOLS = [
|
|
22
22
|
{
|
|
23
23
|
name: 'browse',
|
|
24
|
-
description: '
|
|
24
|
+
description: 'Browse a URL in a real browser. Use instead of web fetch when the page needs JavaScript, login cookies, consent dismissal, or bot detection. Returns a pruned ARIA snapshot with [ref=N] markers for interaction. Stateless — does not use the session page.',
|
|
25
25
|
inputSchema: {
|
|
26
26
|
type: 'object',
|
|
27
27
|
properties: {
|
|
@@ -33,7 +33,7 @@ const TOOLS = [
|
|
|
33
33
|
},
|
|
34
34
|
{
|
|
35
35
|
name: 'goto',
|
|
36
|
-
description: 'Navigate the session page to a URL. Returns ok — call snapshot to observe.',
|
|
36
|
+
description: 'Navigate the session page to a URL. Injects cookies from the user\'s browser for authenticated access. Returns ok — call snapshot to observe.',
|
|
37
37
|
inputSchema: {
|
|
38
38
|
type: 'object',
|
|
39
39
|
properties: {
|
|
@@ -146,6 +146,7 @@ async function handleToolCall(name, args) {
|
|
|
146
146
|
|
|
147
147
|
case 'goto': {
|
|
148
148
|
const page = await getPage();
|
|
149
|
+
try { await page.injectCookies(args.url); } catch {}
|
|
149
150
|
await page.goto(args.url);
|
|
150
151
|
return 'ok';
|
|
151
152
|
}
|
|
@@ -217,7 +218,7 @@ async function handleMessage(msg) {
|
|
|
217
218
|
return jsonrpcResponse(id, {
|
|
218
219
|
protocolVersion: '2024-11-05',
|
|
219
220
|
capabilities: { tools: {} },
|
|
220
|
-
serverInfo: { name: 'barebrowse', version: '0.
|
|
221
|
+
serverInfo: { name: 'barebrowse', version: '0.4.2' },
|
|
221
222
|
});
|
|
222
223
|
}
|
|
223
224
|
|
package/package.json
CHANGED
package/src/auth.js
CHANGED
|
@@ -232,14 +232,20 @@ export function extractCookies(opts = {}) {
|
|
|
232
232
|
return extractChromiumCookies(path, domain);
|
|
233
233
|
}
|
|
234
234
|
|
|
235
|
-
// Auto
|
|
235
|
+
// Auto: try all browsers, merge (last-write-wins by name+domain)
|
|
236
|
+
const all = new Map();
|
|
236
237
|
const chromium = findChromiumCookieDb();
|
|
237
|
-
if (chromium)
|
|
238
|
-
|
|
238
|
+
if (chromium) {
|
|
239
|
+
for (const c of extractChromiumCookies(chromium.path, domain))
|
|
240
|
+
all.set(`${c.name}@${c.domain}`, c);
|
|
241
|
+
}
|
|
239
242
|
const firefox = findFirefoxCookieDb();
|
|
240
|
-
if (firefox)
|
|
241
|
-
|
|
242
|
-
|
|
243
|
+
if (firefox) {
|
|
244
|
+
for (const c of extractFirefoxCookies(firefox, domain))
|
|
245
|
+
all.set(`${c.name}@${c.domain}`, c);
|
|
246
|
+
}
|
|
247
|
+
if (all.size === 0) throw new Error('No browser cookie database found');
|
|
248
|
+
return [...all.values()];
|
|
243
249
|
}
|
|
244
250
|
|
|
245
251
|
/**
|
|
@@ -270,7 +276,11 @@ export async function injectCookies(session, cookies) {
|
|
|
270
276
|
* @param {object} [opts] - Options passed to extractCookies
|
|
271
277
|
*/
|
|
272
278
|
export async function authenticate(session, url, opts = {}) {
|
|
273
|
-
|
|
279
|
+
// Strip to registrable domain so mail.google.com → google.com
|
|
280
|
+
// This ensures parent-domain cookies (.google.com) are included
|
|
281
|
+
const hostname = new URL(url).hostname.replace(/^www\./, '');
|
|
282
|
+
const parts = hostname.split('.');
|
|
283
|
+
const domain = parts.length > 2 ? parts.slice(-2).join('.') : hostname;
|
|
274
284
|
const cookies = extractCookies({ ...opts, domain });
|
|
275
285
|
if (cookies.length > 0) {
|
|
276
286
|
await injectCookies(session, cookies);
|
package/src/index.js
CHANGED
|
@@ -95,13 +95,16 @@ export async function browse(url, opts = {}) {
|
|
|
95
95
|
}
|
|
96
96
|
|
|
97
97
|
// Step 6: Prune for agent consumption
|
|
98
|
+
const raw = formatTree(tree);
|
|
98
99
|
let snapshot;
|
|
99
100
|
if (opts.prune !== false) {
|
|
100
101
|
const pruned = pruneTree(tree, { mode: opts.pruneMode || 'act' });
|
|
101
102
|
snapshot = formatTree(pruned);
|
|
102
103
|
} else {
|
|
103
|
-
snapshot =
|
|
104
|
+
snapshot = raw;
|
|
104
105
|
}
|
|
106
|
+
const stats = `# ${raw.length.toLocaleString()} chars → ${snapshot.length.toLocaleString()} chars (${Math.round((1 - snapshot.length / raw.length) * 100)}% pruned)`;
|
|
107
|
+
snapshot = stats + '\n' + snapshot;
|
|
105
108
|
|
|
106
109
|
// Step 7: Clean up
|
|
107
110
|
await cdp.send('Target.closeTarget', { targetId: page.targetId });
|
|
@@ -117,7 +120,7 @@ export async function browse(url, opts = {}) {
|
|
|
117
120
|
* Connect to a browser for a long-lived interactive session.
|
|
118
121
|
*
|
|
119
122
|
* @param {object} [opts]
|
|
120
|
-
* @param {'headless'|'headed'} [opts.mode='headless'] - Browser mode
|
|
123
|
+
* @param {'headless'|'headed'|'hybrid'} [opts.mode='headless'] - Browser mode
|
|
121
124
|
* @param {number} [opts.port=9222] - CDP port for headed mode
|
|
122
125
|
* @returns {Promise<object>} Page handle with goto, snapshot, close
|
|
123
126
|
*/
|
|
@@ -135,7 +138,7 @@ export async function connect(opts = {}) {
|
|
|
135
138
|
cdp = await createCDP(browser.wsUrl);
|
|
136
139
|
}
|
|
137
140
|
|
|
138
|
-
|
|
141
|
+
let page = await createPage(cdp, mode !== 'headed', { viewport: opts.viewport });
|
|
139
142
|
let refMap = new Map();
|
|
140
143
|
|
|
141
144
|
// Suppress permission prompts for all modes
|
|
@@ -154,17 +157,20 @@ export async function connect(opts = {}) {
|
|
|
154
157
|
|
|
155
158
|
// Auto-dismiss JS dialogs (alert, confirm, prompt)
|
|
156
159
|
const dialogLog = [];
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
160
|
+
function setupDialogHandler(session) {
|
|
161
|
+
session.on('Page.javascriptDialogOpening', async (params) => {
|
|
162
|
+
dialogLog.push({
|
|
163
|
+
type: params.type,
|
|
164
|
+
message: params.message,
|
|
165
|
+
timestamp: new Date().toISOString(),
|
|
166
|
+
});
|
|
167
|
+
await session.send('Page.handleJavaScriptDialog', {
|
|
168
|
+
accept: params.type !== 'beforeunload',
|
|
169
|
+
promptText: params.defaultPrompt || '',
|
|
170
|
+
});
|
|
166
171
|
});
|
|
167
|
-
}
|
|
172
|
+
}
|
|
173
|
+
setupDialogHandler(page.session);
|
|
168
174
|
|
|
169
175
|
return {
|
|
170
176
|
async goto(url, timeout = 30000) {
|
|
@@ -172,6 +178,25 @@ export async function connect(opts = {}) {
|
|
|
172
178
|
if (opts.consent !== false) {
|
|
173
179
|
await dismissConsent(page.session);
|
|
174
180
|
}
|
|
181
|
+
|
|
182
|
+
// Hybrid fallback: if bot-blocked, retry with headed browser
|
|
183
|
+
if (mode === 'hybrid') {
|
|
184
|
+
const { tree } = await ariaTree(page);
|
|
185
|
+
if (isChallengePage(tree)) {
|
|
186
|
+
await cdp.send('Target.closeTarget', { targetId: page.targetId });
|
|
187
|
+
cdp.close();
|
|
188
|
+
if (browser) { browser.process.kill(); browser = null; }
|
|
189
|
+
|
|
190
|
+
const port = opts.port || 9222;
|
|
191
|
+
const wsUrl = await getDebugUrl(port);
|
|
192
|
+
cdp = await createCDP(wsUrl);
|
|
193
|
+
page = await createPage(cdp, false, { viewport: opts.viewport });
|
|
194
|
+
setupDialogHandler(page.session);
|
|
195
|
+
await suppressPermissions(cdp);
|
|
196
|
+
await navigate(page, url, timeout);
|
|
197
|
+
if (opts.consent !== false) await dismissConsent(page.session);
|
|
198
|
+
}
|
|
199
|
+
}
|
|
175
200
|
},
|
|
176
201
|
|
|
177
202
|
async goBack() {
|
|
@@ -195,9 +220,12 @@ export async function connect(opts = {}) {
|
|
|
195
220
|
async snapshot(pruneOpts) {
|
|
196
221
|
const result = await ariaTree(page);
|
|
197
222
|
refMap = result.refMap;
|
|
198
|
-
|
|
223
|
+
const raw = formatTree(result.tree);
|
|
224
|
+
if (pruneOpts === false) return raw;
|
|
199
225
|
const pruned = pruneTree(result.tree, { mode: pruneOpts?.mode || 'act' });
|
|
200
|
-
|
|
226
|
+
const out = formatTree(pruned);
|
|
227
|
+
const stats = `# ${raw.length.toLocaleString()} chars → ${out.length.toLocaleString()} chars (${Math.round((1 - out.length / raw.length) * 100)}% pruned)`;
|
|
228
|
+
return stats + '\n' + out;
|
|
201
229
|
},
|
|
202
230
|
|
|
203
231
|
async click(ref) {
|
|
@@ -541,7 +569,9 @@ function isChallengePage(tree) {
|
|
|
541
569
|
'checking your browser',
|
|
542
570
|
'please wait',
|
|
543
571
|
'verify you are human',
|
|
572
|
+
'prove your humanity',
|
|
544
573
|
'attention required',
|
|
574
|
+
'file a ticket',
|
|
545
575
|
];
|
|
546
576
|
const lower = text.toLowerCase();
|
|
547
577
|
return challengePhrases.some((p) => lower.includes(p));
|
|
File without changes
|