barebrowse 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.mcp.json +8 -0
- package/CHANGELOG.md +100 -0
- package/CLAUDE.md +22 -0
- package/README.md +123 -43
- package/barebrowse.context.md +261 -0
- package/cli.js +156 -0
- package/docs/blueprint.md +361 -0
- package/docs/testing.md +202 -0
- package/mcp-server.js +216 -0
- package/package.json +22 -9
- package/src/aria.js +69 -0
- package/src/auth.js +279 -0
- package/src/bareagent.js +161 -0
- package/src/cdp.js +148 -0
- package/src/chromium.js +148 -0
- package/src/consent.js +210 -0
- package/src/index.js +186 -10
- package/src/interact.js +208 -0
- package/src/prune.js +472 -0
- package/src/stealth.js +51 -0
package/cli.js
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* cli.js -- barebrowse CLI entry point.
|
|
4
|
+
*
|
|
5
|
+
* Usage:
|
|
6
|
+
* npx barebrowse mcp Start the MCP server (JSON-RPC over stdio)
|
|
7
|
+
* npx barebrowse install Auto-configure MCP in Claude Desktop / Cursor / Claude Code
|
|
8
|
+
* npx barebrowse browse <url> One-shot browse, print snapshot to stdout
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'node:fs';
|
|
12
|
+
import { join } from 'node:path';
|
|
13
|
+
import { homedir, platform } from 'node:os';
|
|
14
|
+
|
|
15
|
+
const cmd = process.argv[2];
|
|
16
|
+
|
|
17
|
+
if (cmd === 'mcp') {
|
|
18
|
+
await import('./mcp-server.js');
|
|
19
|
+
|
|
20
|
+
} else if (cmd === 'install') {
|
|
21
|
+
install();
|
|
22
|
+
|
|
23
|
+
} else if (cmd === 'browse' && process.argv[3]) {
|
|
24
|
+
const { browse } = await import('./src/index.js');
|
|
25
|
+
const url = process.argv[3];
|
|
26
|
+
const mode = process.argv[4] || 'headless';
|
|
27
|
+
try {
|
|
28
|
+
const snapshot = await browse(url, { mode });
|
|
29
|
+
process.stdout.write(snapshot + '\n');
|
|
30
|
+
process.exit(0);
|
|
31
|
+
} catch (err) {
|
|
32
|
+
process.stderr.write(`Error: ${err.message}\n`);
|
|
33
|
+
process.exit(1);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
} else {
|
|
37
|
+
process.stdout.write(`barebrowse -- CDP-direct browsing for autonomous agents
|
|
38
|
+
|
|
39
|
+
Usage:
|
|
40
|
+
barebrowse mcp Start MCP server (JSON-RPC over stdio)
|
|
41
|
+
barebrowse install Auto-configure MCP for Claude Desktop / Cursor / Claude Code
|
|
42
|
+
barebrowse browse <url> One-shot browse, print ARIA snapshot
|
|
43
|
+
|
|
44
|
+
As a library:
|
|
45
|
+
import { browse, connect } from 'barebrowse';
|
|
46
|
+
|
|
47
|
+
As bareagent tools:
|
|
48
|
+
import { createBrowseTools } from 'barebrowse/bareagent';
|
|
49
|
+
|
|
50
|
+
More: see README.md or barebrowse.context.md
|
|
51
|
+
`);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// --- MCP auto-installer ---
|
|
55
|
+
|
|
56
|
+
function install() {
|
|
57
|
+
const mcpEntry = {
|
|
58
|
+
command: 'npx',
|
|
59
|
+
args: ['barebrowse', 'mcp'],
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
const targets = detectTargets();
|
|
63
|
+
|
|
64
|
+
if (targets.length === 0) {
|
|
65
|
+
console.log('No MCP clients detected. You can manually add this to your MCP config:\n');
|
|
66
|
+
console.log(JSON.stringify({ mcpServers: { barebrowse: mcpEntry } }, null, 2));
|
|
67
|
+
console.log('\nSupported clients: Claude Desktop, Cursor, Claude Code');
|
|
68
|
+
return;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
let installed = 0;
|
|
72
|
+
|
|
73
|
+
for (const target of targets) {
|
|
74
|
+
try {
|
|
75
|
+
const config = readJsonOrEmpty(target.path);
|
|
76
|
+
if (!config.mcpServers) config.mcpServers = {};
|
|
77
|
+
|
|
78
|
+
if (config.mcpServers.barebrowse) {
|
|
79
|
+
console.log(` ${target.name}: already configured`);
|
|
80
|
+
installed++;
|
|
81
|
+
continue;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
config.mcpServers.barebrowse = mcpEntry;
|
|
85
|
+
|
|
86
|
+
// Ensure parent dir exists
|
|
87
|
+
const dir = join(target.path, '..');
|
|
88
|
+
mkdirSync(dir, { recursive: true });
|
|
89
|
+
|
|
90
|
+
writeFileSync(target.path, JSON.stringify(config, null, 2) + '\n');
|
|
91
|
+
console.log(` ${target.name}: installed -> ${target.path}`);
|
|
92
|
+
installed++;
|
|
93
|
+
} catch (err) {
|
|
94
|
+
console.log(` ${target.name}: failed (${err.message})`);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
if (installed > 0) {
|
|
99
|
+
console.log(`\nDone. Restart your MCP client to pick up the new server.`);
|
|
100
|
+
console.log('Tools available: browse, goto, snapshot, click, type, press, scroll');
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function detectTargets() {
|
|
105
|
+
const home = homedir();
|
|
106
|
+
const os = platform();
|
|
107
|
+
const targets = [];
|
|
108
|
+
|
|
109
|
+
// Claude Desktop
|
|
110
|
+
let claudeDesktop;
|
|
111
|
+
if (os === 'darwin') {
|
|
112
|
+
claudeDesktop = join(home, 'Library', 'Application Support', 'Claude', 'claude_desktop_config.json');
|
|
113
|
+
} else if (os === 'linux') {
|
|
114
|
+
claudeDesktop = join(home, '.config', 'Claude', 'claude_desktop_config.json');
|
|
115
|
+
} else if (os === 'win32') {
|
|
116
|
+
claudeDesktop = join(home, 'AppData', 'Roaming', 'Claude', 'claude_desktop_config.json');
|
|
117
|
+
}
|
|
118
|
+
if (claudeDesktop) {
|
|
119
|
+
// Check if Claude Desktop dir exists (even if config doesn't yet)
|
|
120
|
+
const dir = join(claudeDesktop, '..');
|
|
121
|
+
if (existsSync(dir)) {
|
|
122
|
+
targets.push({ name: 'Claude Desktop', path: claudeDesktop });
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Cursor
|
|
127
|
+
let cursorDir;
|
|
128
|
+
if (os === 'darwin') {
|
|
129
|
+
cursorDir = join(home, '.cursor');
|
|
130
|
+
} else if (os === 'linux') {
|
|
131
|
+
cursorDir = join(home, '.cursor');
|
|
132
|
+
} else if (os === 'win32') {
|
|
133
|
+
cursorDir = join(home, '.cursor');
|
|
134
|
+
}
|
|
135
|
+
if (cursorDir && existsSync(cursorDir)) {
|
|
136
|
+
targets.push({ name: 'Cursor', path: join(cursorDir, 'mcp.json') });
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// Claude Code (project-level .mcp.json in cwd)
|
|
140
|
+
const cwd = process.cwd();
|
|
141
|
+
const claudeCodePath = join(cwd, '.mcp.json');
|
|
142
|
+
// Only suggest if we're in a project directory (has package.json or .git)
|
|
143
|
+
if (existsSync(join(cwd, 'package.json')) || existsSync(join(cwd, '.git'))) {
|
|
144
|
+
targets.push({ name: 'Claude Code (this project)', path: claudeCodePath });
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
return targets;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
function readJsonOrEmpty(path) {
|
|
151
|
+
try {
|
|
152
|
+
return JSON.parse(readFileSync(path, 'utf8'));
|
|
153
|
+
} catch {
|
|
154
|
+
return {};
|
|
155
|
+
}
|
|
156
|
+
}
|
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
# barebrowse -- Blueprint
|
|
2
|
+
|
|
3
|
+
Vanilla JS library. CDP-direct. URL in, pruned ARIA snapshot out.
|
|
4
|
+
No Playwright, no bundled browser, no build step.
|
|
5
|
+
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## What It Does
|
|
9
|
+
|
|
10
|
+
Gives autonomous agents authenticated access to the web through the user's own Chromium browser.
|
|
11
|
+
|
|
12
|
+
```js
|
|
13
|
+
import { browse, connect } from 'barebrowse';
|
|
14
|
+
|
|
15
|
+
// One-shot: read a page
|
|
16
|
+
const snapshot = await browse('https://any-page.com');
|
|
17
|
+
|
|
18
|
+
// Session: navigate, interact, observe
|
|
19
|
+
const page = await connect();
|
|
20
|
+
await page.goto('https://any-page.com');
|
|
21
|
+
console.log(await page.snapshot());
|
|
22
|
+
await page.click('8'); // ref from snapshot
|
|
23
|
+
await page.type('3', 'hello');
|
|
24
|
+
await page.scroll(500);
|
|
25
|
+
await page.close();
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## Capabilities
|
|
31
|
+
|
|
32
|
+
Every action returns a **pruned ARIA snapshot** -- the agent's view of the page after each move. The snapshot is a YAML-like tree with `[ref=N]` markers on interactive elements. The agent reads the snapshot, picks a ref, acts, then reads the next snapshot. This is the observe-think-act loop.
|
|
33
|
+
|
|
34
|
+
### Actions
|
|
35
|
+
|
|
36
|
+
| Action | Method | What It Does | Status |
|
|
37
|
+
|--------|--------|-------------|--------|
|
|
38
|
+
| Navigate | `page.goto(url)` | Load a URL, wait for page load, dismiss consent | Done |
|
|
39
|
+
| Snapshot | `page.snapshot()` | Pruned ARIA tree (47-95% token reduction) | Done |
|
|
40
|
+
| Click | `page.click(ref)` | Scroll into view, mouse press+release at element center | Done |
|
|
41
|
+
| Type | `page.type(ref, text)` | Focus element, insert text (fast batch mode) | Done |
|
|
42
|
+
| Type (clear) | `page.type(ref, text, { clear: true })` | Select-all + delete, then type (replaces pre-filled content) | Done |
|
|
43
|
+
| Type (key events) | `page.type(ref, text, { keyEvents: true })` | Char-by-char keyDown/keyUp (triggers JS handlers) | Done |
|
|
44
|
+
| Press | `page.press(key)` | Special key: Enter, Tab, Escape, Backspace, Delete, arrows, Home/End, PageUp/Down, Space | Done |
|
|
45
|
+
| Scroll | `page.scroll(deltaY)` | Mouse wheel event (positive=down, negative=up) | Done |
|
|
46
|
+
| Hover | `page.hover(ref)` | Move mouse to element center (triggers hover styles/tooltips) | Done |
|
|
47
|
+
| Select | `page.select(ref, value)` | Set `<select>` value or click custom dropdown option | Done |
|
|
48
|
+
| Screenshot | `page.screenshot(opts)` | `Page.captureScreenshot`, returns base64 string | Done |
|
|
49
|
+
| Wait for nav | `page.waitForNavigation()` | Promise.race of loadEventFired + frameNavigated (SPA-aware) | Done |
|
|
50
|
+
| Wait for idle | `page.waitForNetworkIdle(opts)` | Resolve when no pending requests for N ms (default 500) | Done |
|
|
51
|
+
| Inject cookies | `page.injectCookies(url, opts)` | Extract cookies from Firefox/Chromium, inject via CDP | Done |
|
|
52
|
+
| Raw CDP | `page.cdp.send(method, params)` | Escape hatch for any CDP command | Done |
|
|
53
|
+
| Close | `page.close()` | Close page target, disconnect CDP, kill browser (if headless) | Done |
|
|
54
|
+
|
|
55
|
+
### Obstacle course -- what barebrowse handles automatically
|
|
56
|
+
|
|
57
|
+
| Obstacle | How It's Handled | Mode |
|
|
58
|
+
|----------|-----------------|------|
|
|
59
|
+
| **Cookie consent walls** (GDPR) | ARIA tree scan, jsClick accept button. 7 languages: EN, NL, DE, FR, ES, IT, PT | Both |
|
|
60
|
+
| **Consent in dialog role** | Detect `dialog`/`alertdialog` with consent hints, click accept inside | Both |
|
|
61
|
+
| **Consent outside dialog** (BBC SourcePoint) | Fallback global button scan when dialog has no accept button | Both |
|
|
62
|
+
| **Consent behind iframe overlay** | JS `.click()` via `DOM.resolveNode` bypasses z-index/overlay issues | Both |
|
|
63
|
+
| **Permission prompts** (location, notifications, camera, mic) | Launch flags + CDP `Browser.setPermission` auto-deny | Both |
|
|
64
|
+
| **Media autoplay blocked** | `--autoplay-policy=no-user-gesture-required` | Both |
|
|
65
|
+
| **Login walls** | Firefox cookie extraction, CDP injection (user's real sessions) | Both |
|
|
66
|
+
| **Pre-filled form inputs** | `type({ clear: true })` selects all + deletes before typing | Both |
|
|
67
|
+
| **Off-screen elements** | `DOM.scrollIntoViewIfNeeded` before every click | Both |
|
|
68
|
+
| **Form submission** | `press('Enter')` with proper `text: '\r'` triggers onsubmit | Both |
|
|
69
|
+
| **Tab between fields** | `press('Tab')` with `text: '\t'` moves focus | Both |
|
|
70
|
+
| **SPA navigation** (YouTube, GitHub) | `waitForNavigation()` uses frameNavigated + loadEventFired race | Both |
|
|
71
|
+
| **Bot detection** (Google, Reddit) | Stealth patches (headless) + headed mode with real cookies | Both |
|
|
72
|
+
| **`navigator.webdriver`** | Stealth patches: webdriver, plugins, languages, chrome object | Headless |
|
|
73
|
+
| **Profile locking** | Unique temp dir per headless instance (`/tmp/barebrowse-<pid>-<ts>`) | Headless |
|
|
74
|
+
| **ARIA noise** | 9-step pruning: wrapper collapse, noise removal, landmark promotion | Both |
|
|
75
|
+
|
|
76
|
+
### Not yet handled
|
|
77
|
+
|
|
78
|
+
| Obstacle | What's Needed | Difficulty |
|
|
79
|
+
|----------|--------------|------------|
|
|
80
|
+
| File upload | `Input.setFiles` via CDP | Low |
|
|
81
|
+
| Drag and drop | `Input.dispatchDragEvent` sequence | Medium |
|
|
82
|
+
| Infinite scroll | Scroll + wait for new content strategy | Medium |
|
|
83
|
+
| CAPTCHAs | Cannot solve -- headed mode lets user solve manually | N/A |
|
|
84
|
+
| Cross-origin iframes | Frame tree traversal via CDP | Medium |
|
|
85
|
+
| Canvas/WebGL | Opaque to ARIA -- needs screenshot + vision model | Hard |
|
|
86
|
+
|
|
87
|
+
### Tested sites (16+ sites, 8 countries, all consent dismissed)
|
|
88
|
+
|
|
89
|
+
| Site | Consent | Cookies | Interactions | Notes |
|
|
90
|
+
|------|---------|---------|-------------|-------|
|
|
91
|
+
| google.com | NL dialog dismissed | Firefox injection | Search (combobox + Enter) | Bot-blocks headless |
|
|
92
|
+
| youtube.com | Bypassed via cookies | Firefox injection | Search + video playback | Full e2e demo, SPA nav |
|
|
93
|
+
| bbc.com | SourcePoint dismissed | -- | -- | Button outside dialog |
|
|
94
|
+
| wikipedia.org | -- | -- | Link click + navigation | Clean, no consent |
|
|
95
|
+
| github.com | -- | -- | SPA navigation | Needs settle time |
|
|
96
|
+
| duckduckgo.com | -- | -- | Search + results | Headless-friendly |
|
|
97
|
+
| news.ycombinator.com | -- | -- | Story link click | Clean, simple DOM |
|
|
98
|
+
| amazon.de | Banner dismissed | -- | -- | |
|
|
99
|
+
| theguardian.com | CMP dismissed | -- | -- | |
|
|
100
|
+
| spiegel.de | CMP dismissed | -- | -- | German |
|
|
101
|
+
| lemonde.fr | CMP dismissed | -- | -- | French |
|
|
102
|
+
| elpais.com | CMP dismissed | -- | -- | Spanish |
|
|
103
|
+
| corriere.it | CMP dismissed | -- | -- | Italian |
|
|
104
|
+
| nos.nl | CMP dismissed | -- | -- | Dutch |
|
|
105
|
+
| bild.de | CMP dismissed | -- | -- | German |
|
|
106
|
+
| nu.nl | CMP dismissed | -- | -- | Dutch |
|
|
107
|
+
| booking.com | Banner dismissed | -- | -- | |
|
|
108
|
+
| nytimes.com | -- | -- | -- | No consent wall |
|
|
109
|
+
| stackoverflow.com | Footer link only | -- | -- | Not blocking |
|
|
110
|
+
| cnn.com | -- | -- | -- | No consent wall |
|
|
111
|
+
| reddit.com | -- | -- | Fallback to old.reddit | Bot-blocks headless |
|
|
112
|
+
|
|
113
|
+
---
|
|
114
|
+
|
|
115
|
+
## Architecture
|
|
116
|
+
|
|
117
|
+
### Full pipeline: browse(url) or connect() -> goto(url)
|
|
118
|
+
|
|
119
|
+
```
|
|
120
|
+
1. LAUNCH chromium.js finds installed browser
|
|
121
|
+
Headless: spawn fresh Chromium with permission flags
|
|
122
|
+
Headed: connect to running browser on CDP port
|
|
123
|
+
Hybrid: try headless, detect challenge page, fallback to headed
|
|
124
|
+
|
|
125
|
+
2. CDP CONNECTION cdp.js opens WebSocket to browser
|
|
126
|
+
Creates page target, attaches flattened session
|
|
127
|
+
Enables Page, Network, DOM domains
|
|
128
|
+
|
|
129
|
+
3. STEALTH stealth.js (headless only)
|
|
130
|
+
Page.addScriptToEvaluateOnNewDocument before any page scripts
|
|
131
|
+
Patches: navigator.webdriver, plugins, languages, chrome object
|
|
132
|
+
|
|
133
|
+
4. PERMISSIONS Browser.setPermission denies all prompts
|
|
134
|
+
geo, notifications, camera, mic, midi, sensors, idle
|
|
135
|
+
|
|
136
|
+
5. AUTH auth.js extracts cookies from user's browser
|
|
137
|
+
Firefox: SQLite cookies.sqlite (plaintext)
|
|
138
|
+
Chromium: SQLite Cookies + AES decrypt via keyring
|
|
139
|
+
Injects via Network.setCookie before navigation
|
|
140
|
+
|
|
141
|
+
6. NAVIGATE Page.navigate(url), wait for Page.loadEventFired
|
|
142
|
+
500ms settle for dynamic content
|
|
143
|
+
|
|
144
|
+
7. CONSENT consent.js scans ARIA tree post-load
|
|
145
|
+
Finds dialog/alertdialog with consent hints
|
|
146
|
+
Falls back to global button scan (BBC SourcePoint pattern)
|
|
147
|
+
jsClick via DOM.resolveNode (bypasses iframe overlays)
|
|
148
|
+
|
|
149
|
+
8. SNAPSHOT Accessibility.getFullAXTree -> nested tree (aria.js)
|
|
150
|
+
prune.js: 9-step pipeline (47-95% token reduction)
|
|
151
|
+
Output: YAML-like text with [ref=N] markers
|
|
152
|
+
|
|
153
|
+
9. INTERACT interact.js dispatches real CDP Input events
|
|
154
|
+
click: scrollIntoView -> getBoxModel -> mousePressed/Released
|
|
155
|
+
type: DOM.focus -> insertText or keyDown/keyUp per char
|
|
156
|
+
press: special keys (Enter, Tab, Escape, arrows, etc.)
|
|
157
|
+
scroll: mouseWheel events
|
|
158
|
+
hover: mouseMoved at element center
|
|
159
|
+
select: set <select> value or click custom dropdown option
|
|
160
|
+
|
|
161
|
+
10. OBSERVE AGAIN Back to step 8. Refs are ephemeral -- fresh snapshot needed.
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
### Module table
|
|
165
|
+
|
|
166
|
+
Eleven modules, 2,396 lines, zero required dependencies.
|
|
167
|
+
|
|
168
|
+
| Module | Lines | Purpose |
|
|
169
|
+
|---|---|---|
|
|
170
|
+
| `src/index.js` | 434 | Public API: `browse()`, `connect()`, screenshot, network idle, hybrid |
|
|
171
|
+
| `src/cdp.js` | 148 | WebSocket CDP client, flattened sessions |
|
|
172
|
+
| `src/chromium.js` | 148 | Find/launch Chromium browsers, permission-suppressing flags |
|
|
173
|
+
| `src/aria.js` | 69 | Format ARIA tree as YAML-like text |
|
|
174
|
+
| `src/auth.js` | 279 | Cookie extraction (Chromium AES + keyring, Firefox), CDP injection |
|
|
175
|
+
| `src/prune.js` | 472 | ARIA pruning pipeline (9-step, ported from mcprune) |
|
|
176
|
+
| `src/interact.js` | 208 | Click, type, press, scroll, hover, select |
|
|
177
|
+
| `src/consent.js` | 210 | Auto-dismiss cookie consent dialogs, 7 languages |
|
|
178
|
+
| `src/stealth.js` | 51 | Navigator patches for headless anti-detection |
|
|
179
|
+
| `src/bareagent.js` | 161 | Tool adapter for bareagent Loop |
|
|
180
|
+
| `mcp-server.js` | 216 | MCP server (JSON-RPC 2.0 over stdio) |
|
|
181
|
+
|
|
182
|
+
---
|
|
183
|
+
|
|
184
|
+
## What's Built
|
|
185
|
+
|
|
186
|
+
### Headless mode -- done
|
|
187
|
+
Spawn a fresh Chromium, navigate, snapshot, close. Default mode.
|
|
188
|
+
- Cookie extraction from user's Firefox or Chromium profile
|
|
189
|
+
- Cookie injection via `Network.setCookie` before navigation
|
|
190
|
+
- ARIA tree extraction via `Accessibility.getFullAXTree`
|
|
191
|
+
- 9-step pruning: landmarks, noise removal, wrapper collapsing, context filtering
|
|
192
|
+
- 47-95% token reduction depending on page complexity
|
|
193
|
+
- Permission prompts auto-suppressed (notifications, geolocation, camera, mic)
|
|
194
|
+
- Stealth patches: `navigator.webdriver`, plugins, languages, chrome object
|
|
195
|
+
|
|
196
|
+
### Headed mode -- done
|
|
197
|
+
Connect to an already-running browser on a CDP debug port.
|
|
198
|
+
- Same ARIA + prune pipeline
|
|
199
|
+
- Manual cookie injection via `page.injectCookies(url, { browser })` (e.g. inject Firefox cookies into headed Chromium)
|
|
200
|
+
- Permission prompts suppressed via CDP `Browser.setPermission`
|
|
201
|
+
- User must launch browser with `--remote-debugging-port=9222`
|
|
202
|
+
|
|
203
|
+
### Hybrid mode -- done
|
|
204
|
+
Try headless first. If bot-blocked (Cloudflare, etc.), fall back to headed automatically.
|
|
205
|
+
- Detection: heuristic on ARIA tree for challenge phrases ("Just a moment", "Checking your browser")
|
|
206
|
+
- Fallback: kill headless, connect to user's running browser on port 9222, re-navigate
|
|
207
|
+
- One flag: `mode: 'hybrid'`
|
|
208
|
+
|
|
209
|
+
### Interactions -- done, real-world tested
|
|
210
|
+
On `connect()` sessions: `click(ref)`, `type(ref, text, opts)`, `press(key)`, `scroll(deltaY)`, `hover(ref)`, `select(ref, value)`, `screenshot()`, `waitForNavigation()`, `waitForNetworkIdle()`, `injectCookies(url, opts)`.
|
|
211
|
+
- Refs come from ARIA snapshot (`[ref=N]` markers)
|
|
212
|
+
- Click: `DOM.scrollIntoViewIfNeeded` -> `DOM.getBoxModel` -> center -> `Input.dispatchMouseEvent`
|
|
213
|
+
- Type: `DOM.focus` + `Input.insertText` (fast) or `Input.dispatchKeyEvent` (triggers handlers)
|
|
214
|
+
- Type with `{ clear: true }`: select-all (Ctrl+A) + delete before typing
|
|
215
|
+
- Press: special keys (Enter, Tab, Escape, Backspace, arrows) with proper key/code/keyCode
|
|
216
|
+
- Scroll: `Input.dispatchMouseEvent` mouseWheel
|
|
217
|
+
- Hover: `DOM.scrollIntoViewIfNeeded` -> `Input.dispatchMouseEvent` mouseMoved
|
|
218
|
+
- Select: native `<select>` (set value + change event) or custom dropdown (click + find option)
|
|
219
|
+
- Screenshot: `Page.captureScreenshot` -> base64 string (png/jpeg/webp)
|
|
220
|
+
- WaitForNavigation: `Promise.race` of `Page.loadEventFired` + `Page.frameNavigated` (SPA-aware)
|
|
221
|
+
- WaitForNetworkIdle: track pending requests, resolve when 0 for N ms
|
|
222
|
+
|
|
223
|
+
**Real-world tested against:** Google, Wikipedia, GitHub (SPA), Hacker News, DuckDuckGo, YouTube (search + video playback), example.com
|
|
224
|
+
|
|
225
|
+
### Cookie consent auto-dismiss -- done
|
|
226
|
+
Automatically detects and dismisses GDPR/cookie consent dialogs after page load.
|
|
227
|
+
- Scans ARIA tree for `dialog`/`alertdialog` with consent-related content
|
|
228
|
+
- Falls back to global button scan for sites that don't use dialog roles (e.g. BBC SourcePoint)
|
|
229
|
+
- Uses JS `.click()` via `DOM.resolveNode` + `Runtime.callFunctionOn` to bypass iframe overlays
|
|
230
|
+
- Multi-language: EN, NL, DE, FR, ES, IT, PT button text patterns
|
|
231
|
+
- Opt-out via `{ consent: false }`
|
|
232
|
+
- Works in both headless and headed modes
|
|
233
|
+
|
|
234
|
+
**Tested against 16+ sites across 8 countries, 0 consent dialogs remaining.**
|
|
235
|
+
|
|
236
|
+
### Permission suppression -- done
|
|
237
|
+
Chrome permission prompts (location, notifications, camera, mic, etc.) are suppressed automatically.
|
|
238
|
+
- Headless: launch flags (`--disable-notifications`, `--autoplay-policy=no-user-gesture-required`, `--use-fake-device-for-media-stream`, `--use-fake-ui-for-media-stream`, `--disable-features=MediaRouter`)
|
|
239
|
+
- Both modes: CDP `Browser.setPermission` denies geolocation, notifications, midi, audioCapture, videoCapture, sensors, idleDetection, etc.
|
|
240
|
+
- No user prompt ever appears -- agents browse without interruption
|
|
241
|
+
|
|
242
|
+
### Cross-browser cookie injection -- done
|
|
243
|
+
Firefox cookies (user's default browser) extracted from SQLite -> injected into headless or headed Chromium via CDP `Network.setCookie`. No need to use Chromium as daily browser.
|
|
244
|
+
- `browse()`: auto-injects cookies before navigation (opt-out with `{ cookies: false }`)
|
|
245
|
+
- `connect()`: manual injection via `page.injectCookies(url, { browser: 'firefox' })`
|
|
246
|
+
- Proven: YouTube login session transferred from Firefox -> headed Chromium -> video playback
|
|
247
|
+
|
|
248
|
+
### Stealth patches -- done
|
|
249
|
+
Anti-detection for headless mode via `Page.addScriptToEvaluateOnNewDocument` (runs before page scripts).
|
|
250
|
+
- `navigator.webdriver` -> undefined
|
|
251
|
+
- `navigator.plugins` -> fake 3 plugins
|
|
252
|
+
- `navigator.languages` -> `['en-US', 'en']`
|
|
253
|
+
- `window.chrome` -> fake object
|
|
254
|
+
- `Permissions.prototype.query` -> notifications return 'prompt'
|
|
255
|
+
- Applied automatically in headless mode
|
|
256
|
+
|
|
257
|
+
### Tests -- 47+ passing
|
|
258
|
+
- 16 unit tests (pruning logic)
|
|
259
|
+
- 7 unit tests (cookie extraction -- 2 skip when Chromium profile locked)
|
|
260
|
+
- 5 unit tests (CDP client + browser launch)
|
|
261
|
+
- 11 integration tests (end-to-end browse pipeline)
|
|
262
|
+
- 15 integration tests (real-world interactions: data: URL fixture + live sites)
|
|
263
|
+
|
|
264
|
+
---
|
|
265
|
+
|
|
266
|
+
## Integrations
|
|
267
|
+
|
|
268
|
+
### bareagent -- tool adapter
|
|
269
|
+
|
|
270
|
+
`createBrowseTools(opts)` returns bareagent-compatible tools for the Loop:
|
|
271
|
+
|
|
272
|
+
```js
|
|
273
|
+
import { Loop } from 'bare-agent';
|
|
274
|
+
import { Anthropic } from 'bare-agent/providers';
|
|
275
|
+
import { createBrowseTools } from 'barebrowse/src/bareagent.js';
|
|
276
|
+
|
|
277
|
+
const { tools, close } = createBrowseTools();
|
|
278
|
+
const loop = new Loop({ provider: new Anthropic({ apiKey }) });
|
|
279
|
+
const result = await loop.run(messages, tools);
|
|
280
|
+
await close();
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
9 tools: browse, goto, snapshot, click, type, press, scroll, select, screenshot.
|
|
284
|
+
Action tools auto-return snapshot (300ms settle delay). The LLM always sees the result.
|
|
285
|
+
|
|
286
|
+
### MCP server
|
|
287
|
+
|
|
288
|
+
Raw JSON-RPC 2.0 over stdio. Zero SDK dependencies. `npm install barebrowse` then:
|
|
289
|
+
|
|
290
|
+
```json
|
|
291
|
+
{
|
|
292
|
+
"mcpServers": {
|
|
293
|
+
"barebrowse": {
|
|
294
|
+
"command": "npx",
|
|
295
|
+
"args": ["barebrowse", "mcp"]
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
7 tools: browse (one-shot), goto, snapshot, click, type, press, scroll.
|
|
302
|
+
Action tools return `'ok'` -- agent calls `snapshot` explicitly (MCP tool calls are cheap to chain).
|
|
303
|
+
Session tools share a singleton page, lazy-created on first use.
|
|
304
|
+
|
|
305
|
+
---
|
|
306
|
+
|
|
307
|
+
## Ecosystem
|
|
308
|
+
|
|
309
|
+
```
|
|
310
|
+
bareagent = the brain (orchestration, LLM loop, memory, retries)
|
|
311
|
+
barebrowse = the eyes + hands (browse, read, interact with the web)
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
**barebrowse is a library.** bareagent imports it as a capability. barebrowse doesn't know about bareagent. bareagent doesn't know about CDP. Clean boundary. Each ships and tests independently.
|
|
315
|
+
|
|
316
|
+
---
|
|
317
|
+
|
|
318
|
+
## Constraints
|
|
319
|
+
|
|
320
|
+
- **Chromium-only.** CDP protocol. Covers Chrome, Chromium, Edge, Brave, Vivaldi, Arc, Opera (~80% desktop share). Firefox later via WebDriver BiDi.
|
|
321
|
+
- **Linux first.** Tested on Fedora/KDE. macOS/Windows cookie extraction paths exist in auth.js but untested.
|
|
322
|
+
- **Node >= 22.** Built-in WebSocket, built-in SQLite.
|
|
323
|
+
- **Not a server.** Library that agents import. Wrap as MCP (included) or HTTP if needed.
|
|
324
|
+
- **Not cross-platform tested.** Local development only, not published to npm.
|
|
325
|
+
|
|
326
|
+
---
|
|
327
|
+
|
|
328
|
+
## File Map
|
|
329
|
+
|
|
330
|
+
```
|
|
331
|
+
barebrowse/
|
|
332
|
+
├── src/
|
|
333
|
+
│ ├── index.js # Public API: browse(), connect(), screenshot, network idle, hybrid
|
|
334
|
+
│ ├── cdp.js # WebSocket CDP client
|
|
335
|
+
│ ├── chromium.js # Find/launch Chromium, permission flags
|
|
336
|
+
│ ├── aria.js # ARIA tree formatting
|
|
337
|
+
│ ├── auth.js # Cookie extraction + injection
|
|
338
|
+
│ ├── prune.js # ARIA pruning (9-step pipeline)
|
|
339
|
+
│ ├── interact.js # Click, type, press, scroll, hover, select
|
|
340
|
+
│ ├── consent.js # Auto-dismiss cookie consent dialogs
|
|
341
|
+
│ ├── stealth.js # Navigator patches for headless anti-detection
|
|
342
|
+
│ └── bareagent.js # Tool adapter for bareagent Loop
|
|
343
|
+
├── test/
|
|
344
|
+
│ ├── unit/ # prune, auth, cdp tests
|
|
345
|
+
│ └── integration/ # browse + interact tests (real sites)
|
|
346
|
+
├── examples/
|
|
347
|
+
│ ├── headed-demo.js # Interactive demo: Wikipedia → DuckDuckGo
|
|
348
|
+
│ └── yt-demo.js # YouTube demo: Firefox cookies → search → play video
|
|
349
|
+
├── docs/
|
|
350
|
+
│ ├── prd.md # Decisions + rationale (reference)
|
|
351
|
+
│ ├── poc-plan.md # Original POC phases + DoD
|
|
352
|
+
│ ├── blueprint.md # This file
|
|
353
|
+
│ └── testing.md # Test guide: pyramid, all 54 tests, CI strategy
|
|
354
|
+
├── mcp-server.js # MCP server (JSON-RPC 2.0 over stdio)
|
|
355
|
+
├── cli.js # CLI entry: `npx barebrowse mcp` or `npx barebrowse browse <url>`
|
|
356
|
+
├── .mcp.json # MCP server config for Claude Desktop / Cursor
|
|
357
|
+
├── barebrowse.context.md # LLM-consumable integration guide
|
|
358
|
+
├── package.json
|
|
359
|
+
├── README.md
|
|
360
|
+
└── CLAUDE.md
|
|
361
|
+
```
|