barebrowse 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +70 -0
- package/package.json +19 -0
- package/src/index.js +258 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 hamr0
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# barebrowse
|
|
2
|
+
|
|
3
|
+
Vanilla JS library. CDP-direct. URL in, pruned ARIA snapshot out.
|
|
4
|
+
No Playwright, no bundled browser, no build step.
|
|
5
|
+
|
|
6
|
+
## What It Does
|
|
7
|
+
|
|
8
|
+
Gives autonomous agents authenticated access to the web through the user's own Chromium browser.
|
|
9
|
+
|
|
10
|
+
```js
|
|
11
|
+
import { browse, connect } from 'barebrowse';
|
|
12
|
+
|
|
13
|
+
// One-shot: read a page
|
|
14
|
+
const snapshot = await browse('https://any-page.com');
|
|
15
|
+
|
|
16
|
+
// Session: navigate, interact, observe
|
|
17
|
+
const page = await connect();
|
|
18
|
+
await page.goto('https://any-page.com');
|
|
19
|
+
console.log(await page.snapshot());
|
|
20
|
+
await page.click('8'); // ref from snapshot
|
|
21
|
+
await page.type('3', 'hello');
|
|
22
|
+
await page.scroll(500);
|
|
23
|
+
await page.close();
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Features
|
|
27
|
+
|
|
28
|
+
- **CDP direct** — no Playwright, no 200MB download, uses your installed Chromium
|
|
29
|
+
- **ARIA snapshots** — semantic, token-efficient output for LLMs
|
|
30
|
+
- **Built-in pruning** — 47-95% token reduction via 9-step pipeline
|
|
31
|
+
- **Cookie extraction** — authenticated browsing from your existing sessions (Chromium + Firefox)
|
|
32
|
+
- **Interactions** — click, type, scroll via CDP Input domain
|
|
33
|
+
- **Three modes** — headless (default), headed (connect to running browser), hybrid (planned)
|
|
34
|
+
|
|
35
|
+
## Architecture
|
|
36
|
+
|
|
37
|
+
```
|
|
38
|
+
URL → chromium.js (find/launch browser)
|
|
39
|
+
→ cdp.js (WebSocket CDP client)
|
|
40
|
+
→ auth.js (extract cookies → inject via CDP)
|
|
41
|
+
→ Page.navigate
|
|
42
|
+
→ aria.js (Accessibility.getFullAXTree → nested tree)
|
|
43
|
+
→ prune.js (9-step role-based pruning)
|
|
44
|
+
→ interact.js (click/type/scroll via Input domain)
|
|
45
|
+
→ agent-ready snapshot
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Seven modules, ~1,400 lines, zero required dependencies.
|
|
49
|
+
|
|
50
|
+
## Requirements
|
|
51
|
+
|
|
52
|
+
- Node.js >= 22
|
|
53
|
+
- Any installed Chromium-based browser (Chrome, Chromium, Brave, Edge, Vivaldi, Arc, Opera)
|
|
54
|
+
|
|
55
|
+
## Ecosystem
|
|
56
|
+
|
|
57
|
+
```
|
|
58
|
+
bareagent = the brain (orchestration, LLM loop, memory, retries)
|
|
59
|
+
barebrowse = the eyes + hands (browse, read, interact with the web)
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
barebrowse is a library. bareagent imports it as a capability.
|
|
63
|
+
|
|
64
|
+
## Status
|
|
65
|
+
|
|
66
|
+
Early development. API may change.
|
|
67
|
+
|
|
68
|
+
## License
|
|
69
|
+
|
|
70
|
+
MIT
|
package/package.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "barebrowse",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Authenticated web browsing for autonomous agents via CDP",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "src/index.js",
|
|
7
|
+
"exports": "./src/index.js",
|
|
8
|
+
"engines": {
|
|
9
|
+
"node": ">=22"
|
|
10
|
+
},
|
|
11
|
+
"scripts": {
|
|
12
|
+
"test": "node --test test/**/*.test.js"
|
|
13
|
+
},
|
|
14
|
+
"license": "MIT",
|
|
15
|
+
"files": [
|
|
16
|
+
"README.md",
|
|
17
|
+
"LICENSE"
|
|
18
|
+
]
|
|
19
|
+
}
|
package/src/index.js
ADDED
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* barebrowse — Authenticated web browsing for autonomous agents via CDP.
|
|
3
|
+
*
|
|
4
|
+
* One package. One import. Three modes.
|
|
5
|
+
*
|
|
6
|
+
* Usage:
|
|
7
|
+
* import { browse, connect } from 'barebrowse';
|
|
8
|
+
* const snapshot = await browse('https://example.com');
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { launch, getDebugUrl } from './chromium.js';
|
|
12
|
+
import { createCDP } from './cdp.js';
|
|
13
|
+
import { formatTree } from './aria.js';
|
|
14
|
+
import { authenticate } from './auth.js';
|
|
15
|
+
import { prune as pruneTree } from './prune.js';
|
|
16
|
+
import { click as cdpClick, type as cdpType, scroll as cdpScroll, press as cdpPress } from './interact.js';
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Browse a URL and return an ARIA snapshot.
|
|
20
|
+
* This is the primary API — URL in, agent-ready snapshot out.
|
|
21
|
+
*
|
|
22
|
+
* @param {string} url - The URL to browse
|
|
23
|
+
* @param {object} [opts]
|
|
24
|
+
* @param {'headless'|'headed'|'hybrid'} [opts.mode='headless'] - Browser mode
|
|
25
|
+
* @param {boolean} [opts.cookies=true] - Inject user's cookies (Phase 2)
|
|
26
|
+
* @param {boolean} [opts.prune=true] - Apply ARIA pruning (Phase 2)
|
|
27
|
+
* @param {number} [opts.timeout=30000] - Navigation timeout in ms
|
|
28
|
+
* @param {number} [opts.port] - CDP port for headed mode
|
|
29
|
+
* @returns {Promise<string>} ARIA snapshot text
|
|
30
|
+
*/
|
|
31
|
+
export async function browse(url, opts = {}) {
|
|
32
|
+
const mode = opts.mode || 'headless';
|
|
33
|
+
const timeout = opts.timeout || 30000;
|
|
34
|
+
|
|
35
|
+
let browser = null;
|
|
36
|
+
let cdp = null;
|
|
37
|
+
|
|
38
|
+
try {
|
|
39
|
+
// Step 1: Get a CDP connection
|
|
40
|
+
if (mode === 'headed') {
|
|
41
|
+
const port = opts.port || 9222;
|
|
42
|
+
const wsUrl = await getDebugUrl(port);
|
|
43
|
+
cdp = await createCDP(wsUrl);
|
|
44
|
+
} else {
|
|
45
|
+
// headless (hybrid fallback logic comes in Phase 4)
|
|
46
|
+
browser = await launch();
|
|
47
|
+
cdp = await createCDP(browser.wsUrl);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// Step 2: Create a new page target and attach
|
|
51
|
+
const page = await createPage(cdp);
|
|
52
|
+
|
|
53
|
+
// Step 3: Cookie injection — extract from user's browser, inject via CDP
|
|
54
|
+
if (opts.cookies !== false) {
|
|
55
|
+
try {
|
|
56
|
+
await authenticate(page.session, url, { browser: opts.browser });
|
|
57
|
+
} catch {
|
|
58
|
+
// No cookies found — continue without auth (public pages still work)
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// Step 4: Navigate and wait for load
|
|
63
|
+
await navigate(page, url, timeout);
|
|
64
|
+
|
|
65
|
+
// Step 5: Get ARIA tree
|
|
66
|
+
const { tree } = await ariaTree(page);
|
|
67
|
+
|
|
68
|
+
// Step 6: Prune for agent consumption
|
|
69
|
+
let snapshot;
|
|
70
|
+
if (opts.prune !== false) {
|
|
71
|
+
const pruned = pruneTree(tree, { mode: opts.pruneMode || 'act' });
|
|
72
|
+
snapshot = formatTree(pruned);
|
|
73
|
+
} else {
|
|
74
|
+
snapshot = formatTree(tree);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Step 7: Clean up
|
|
78
|
+
await cdp.send('Target.closeTarget', { targetId: page.targetId });
|
|
79
|
+
|
|
80
|
+
return snapshot;
|
|
81
|
+
} finally {
|
|
82
|
+
if (cdp) cdp.close();
|
|
83
|
+
if (browser) browser.process.kill();
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Connect to a browser for a long-lived interactive session.
|
|
89
|
+
*
|
|
90
|
+
* @param {object} [opts]
|
|
91
|
+
* @param {'headless'|'headed'} [opts.mode='headless'] - Browser mode
|
|
92
|
+
* @param {number} [opts.port=9222] - CDP port for headed mode
|
|
93
|
+
* @returns {Promise<object>} Page handle with goto, snapshot, close
|
|
94
|
+
*/
|
|
95
|
+
export async function connect(opts = {}) {
|
|
96
|
+
const mode = opts.mode || 'headless';
|
|
97
|
+
let browser = null;
|
|
98
|
+
let cdp;
|
|
99
|
+
|
|
100
|
+
if (mode === 'headed') {
|
|
101
|
+
const port = opts.port || 9222;
|
|
102
|
+
const wsUrl = await getDebugUrl(port);
|
|
103
|
+
cdp = await createCDP(wsUrl);
|
|
104
|
+
} else {
|
|
105
|
+
browser = await launch();
|
|
106
|
+
cdp = await createCDP(browser.wsUrl);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
const page = await createPage(cdp);
|
|
110
|
+
let refMap = new Map();
|
|
111
|
+
|
|
112
|
+
return {
|
|
113
|
+
async goto(url, timeout = 30000) {
|
|
114
|
+
await navigate(page, url, timeout);
|
|
115
|
+
},
|
|
116
|
+
|
|
117
|
+
async snapshot(pruneOpts) {
|
|
118
|
+
const result = await ariaTree(page);
|
|
119
|
+
refMap = result.refMap;
|
|
120
|
+
if (pruneOpts === false) return formatTree(result.tree);
|
|
121
|
+
const pruned = pruneTree(result.tree, { mode: pruneOpts?.mode || 'act' });
|
|
122
|
+
return formatTree(pruned);
|
|
123
|
+
},
|
|
124
|
+
|
|
125
|
+
async click(ref) {
|
|
126
|
+
const backendNodeId = refMap.get(ref);
|
|
127
|
+
if (!backendNodeId) throw new Error(`No element found for ref "${ref}"`);
|
|
128
|
+
await cdpClick(page.session, backendNodeId);
|
|
129
|
+
},
|
|
130
|
+
|
|
131
|
+
async type(ref, text, opts) {
|
|
132
|
+
const backendNodeId = refMap.get(ref);
|
|
133
|
+
if (!backendNodeId) throw new Error(`No element found for ref "${ref}"`);
|
|
134
|
+
await cdpType(page.session, backendNodeId, text, opts);
|
|
135
|
+
},
|
|
136
|
+
|
|
137
|
+
async scroll(deltaY) {
|
|
138
|
+
await cdpScroll(page.session, deltaY);
|
|
139
|
+
},
|
|
140
|
+
|
|
141
|
+
async press(key) {
|
|
142
|
+
await cdpPress(page.session, key);
|
|
143
|
+
},
|
|
144
|
+
|
|
145
|
+
waitForNavigation(timeout = 30000) {
|
|
146
|
+
return page.session.once('Page.loadEventFired', timeout);
|
|
147
|
+
},
|
|
148
|
+
|
|
149
|
+
/** Raw CDP session for escape hatch */
|
|
150
|
+
cdp: page.session,
|
|
151
|
+
|
|
152
|
+
async close() {
|
|
153
|
+
await cdp.send('Target.closeTarget', { targetId: page.targetId });
|
|
154
|
+
cdp.close();
|
|
155
|
+
if (browser) browser.process.kill();
|
|
156
|
+
},
|
|
157
|
+
};
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// --- Internal helpers ---
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Create a new page target and return a session-scoped handle.
|
|
164
|
+
*/
|
|
165
|
+
async function createPage(cdp) {
|
|
166
|
+
const { targetId } = await cdp.send('Target.createTarget', { url: 'about:blank' });
|
|
167
|
+
const { sessionId } = await cdp.send('Target.attachToTarget', {
|
|
168
|
+
targetId,
|
|
169
|
+
flatten: true,
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
const session = cdp.session(sessionId);
|
|
173
|
+
|
|
174
|
+
// Enable required CDP domains on this page
|
|
175
|
+
await session.send('Page.enable');
|
|
176
|
+
await session.send('Network.enable');
|
|
177
|
+
await session.send('DOM.enable');
|
|
178
|
+
|
|
179
|
+
return { session, targetId, sessionId };
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
/**
|
|
183
|
+
* Navigate to a URL and wait for the page to load.
|
|
184
|
+
*/
|
|
185
|
+
async function navigate(page, url, timeout = 30000) {
|
|
186
|
+
const loadPromise = page.session.once('Page.loadEventFired', timeout);
|
|
187
|
+
await page.session.send('Page.navigate', { url });
|
|
188
|
+
await loadPromise;
|
|
189
|
+
// Brief settle time for dynamic content
|
|
190
|
+
await new Promise((r) => setTimeout(r, 500));
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Get the ARIA accessibility tree for a page as a nested object.
|
|
195
|
+
*/
|
|
196
|
+
async function ariaTree(page) {
|
|
197
|
+
await page.session.send('Accessibility.enable');
|
|
198
|
+
const { nodes } = await page.session.send('Accessibility.getFullAXTree');
|
|
199
|
+
const tree = buildTree(nodes);
|
|
200
|
+
|
|
201
|
+
// Build ref → backendDOMNodeId map in one pass over raw CDP nodes
|
|
202
|
+
const refMap = new Map();
|
|
203
|
+
for (const node of nodes) {
|
|
204
|
+
if (node.backendDOMNodeId) {
|
|
205
|
+
refMap.set(node.nodeId, node.backendDOMNodeId);
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
return { tree, refMap };
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
/**
|
|
213
|
+
* Transform CDP's flat AXNode array into a nested tree.
|
|
214
|
+
* CDP nodes have parentId — we use that exclusively to avoid double-linking.
|
|
215
|
+
*/
|
|
216
|
+
function buildTree(nodes) {
|
|
217
|
+
if (!nodes || nodes.length === 0) return null;
|
|
218
|
+
|
|
219
|
+
const nodeMap = new Map();
|
|
220
|
+
const linked = new Set(); // track which nodes have been linked to a parent
|
|
221
|
+
|
|
222
|
+
// First pass: create tree nodes
|
|
223
|
+
for (const node of nodes) {
|
|
224
|
+
nodeMap.set(node.nodeId, {
|
|
225
|
+
nodeId: node.nodeId,
|
|
226
|
+
backendDOMNodeId: node.backendDOMNodeId,
|
|
227
|
+
role: node.role?.value || '',
|
|
228
|
+
name: node.name?.value || '',
|
|
229
|
+
properties: extractProps(node.properties),
|
|
230
|
+
ignored: node.ignored || false,
|
|
231
|
+
children: [],
|
|
232
|
+
});
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
// Second pass: link via parentId only (avoids duplicates from childIds)
|
|
236
|
+
let root = null;
|
|
237
|
+
for (const node of nodes) {
|
|
238
|
+
const treeNode = nodeMap.get(node.nodeId);
|
|
239
|
+
if (node.parentId && !linked.has(node.nodeId)) {
|
|
240
|
+
const parent = nodeMap.get(node.parentId);
|
|
241
|
+
if (parent) {
|
|
242
|
+
parent.children.push(treeNode);
|
|
243
|
+
linked.add(node.nodeId);
|
|
244
|
+
}
|
|
245
|
+
} else if (!node.parentId && !root) {
|
|
246
|
+
root = treeNode;
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
return root;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
function extractProps(props) {
|
|
254
|
+
if (!props) return {};
|
|
255
|
+
const result = {};
|
|
256
|
+
for (const p of props) result[p.name] = p.value?.value;
|
|
257
|
+
return result;
|
|
258
|
+
}
|