barebrowse 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +70 -0
  3. package/package.json +19 -0
  4. package/src/index.js +258 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 hamr0
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,70 @@
1
+ # barebrowse
2
+
3
+ Vanilla JS library. CDP-direct. URL in, pruned ARIA snapshot out.
4
+ No Playwright, no bundled browser, no build step.
5
+
6
+ ## What It Does
7
+
8
+ Gives autonomous agents authenticated access to the web through the user's own Chromium browser.
9
+
10
+ ```js
11
+ import { browse, connect } from 'barebrowse';
12
+
13
+ // One-shot: read a page
14
+ const snapshot = await browse('https://any-page.com');
15
+
16
+ // Session: navigate, interact, observe
17
+ const page = await connect();
18
+ await page.goto('https://any-page.com');
19
+ console.log(await page.snapshot());
20
+ await page.click('8'); // ref from snapshot
21
+ await page.type('3', 'hello');
22
+ await page.scroll(500);
23
+ await page.close();
24
+ ```
25
+
26
+ ## Features
27
+
28
+ - **CDP direct** — no Playwright, no 200MB download, uses your installed Chromium
29
+ - **ARIA snapshots** — semantic, token-efficient output for LLMs
30
+ - **Built-in pruning** — 47-95% token reduction via 9-step pipeline
31
+ - **Cookie extraction** — authenticated browsing from your existing sessions (Chromium + Firefox)
32
+ - **Interactions** — click, type, scroll via CDP Input domain
33
+ - **Three modes** — headless (default), headed (connect to running browser), hybrid (planned)
34
+
35
+ ## Architecture
36
+
37
+ ```
38
+ URL → chromium.js (find/launch browser)
39
+ → cdp.js (WebSocket CDP client)
40
+ → auth.js (extract cookies → inject via CDP)
41
+ → Page.navigate
42
+ → aria.js (Accessibility.getFullAXTree → nested tree)
43
+ → prune.js (9-step role-based pruning)
44
+ → interact.js (click/type/scroll via Input domain)
45
+ → agent-ready snapshot
46
+ ```
47
+
48
+ Seven modules, ~1,400 lines, zero required dependencies.
49
+
50
+ ## Requirements
51
+
52
+ - Node.js >= 22
53
+ - Any installed Chromium-based browser (Chrome, Chromium, Brave, Edge, Vivaldi, Arc, Opera)
54
+
55
+ ## Ecosystem
56
+
57
+ ```
58
+ bareagent = the brain (orchestration, LLM loop, memory, retries)
59
+ barebrowse = the eyes + hands (browse, read, interact with the web)
60
+ ```
61
+
62
+ barebrowse is a library. bareagent imports it as a capability.
63
+
64
+ ## Status
65
+
66
+ Early development. API may change.
67
+
68
+ ## License
69
+
70
+ MIT
package/package.json ADDED
@@ -0,0 +1,19 @@
1
+ {
2
+ "name": "barebrowse",
3
+ "version": "0.1.0",
4
+ "description": "Authenticated web browsing for autonomous agents via CDP",
5
+ "type": "module",
6
+ "main": "src/index.js",
7
+ "exports": "./src/index.js",
8
+ "engines": {
9
+ "node": ">=22"
10
+ },
11
+ "scripts": {
12
+ "test": "node --test test/**/*.test.js"
13
+ },
14
+ "license": "MIT",
15
+ "files": [
16
+ "README.md",
17
+ "LICENSE"
18
+ ]
19
+ }
package/src/index.js ADDED
@@ -0,0 +1,258 @@
1
+ /**
2
+ * barebrowse — Authenticated web browsing for autonomous agents via CDP.
3
+ *
4
+ * One package. One import. Three modes.
5
+ *
6
+ * Usage:
7
+ * import { browse, connect } from 'barebrowse';
8
+ * const snapshot = await browse('https://example.com');
9
+ */
10
+
11
+ import { launch, getDebugUrl } from './chromium.js';
12
+ import { createCDP } from './cdp.js';
13
+ import { formatTree } from './aria.js';
14
+ import { authenticate } from './auth.js';
15
+ import { prune as pruneTree } from './prune.js';
16
+ import { click as cdpClick, type as cdpType, scroll as cdpScroll, press as cdpPress } from './interact.js';
17
+
18
+ /**
19
+ * Browse a URL and return an ARIA snapshot.
20
+ * This is the primary API — URL in, agent-ready snapshot out.
21
+ *
22
+ * @param {string} url - The URL to browse
23
+ * @param {object} [opts]
24
+ * @param {'headless'|'headed'|'hybrid'} [opts.mode='headless'] - Browser mode
25
+ * @param {boolean} [opts.cookies=true] - Inject user's cookies (Phase 2)
26
+ * @param {boolean} [opts.prune=true] - Apply ARIA pruning (Phase 2)
27
+ * @param {number} [opts.timeout=30000] - Navigation timeout in ms
28
+ * @param {number} [opts.port] - CDP port for headed mode
29
+ * @returns {Promise<string>} ARIA snapshot text
30
+ */
31
+ export async function browse(url, opts = {}) {
32
+ const mode = opts.mode || 'headless';
33
+ const timeout = opts.timeout || 30000;
34
+
35
+ let browser = null;
36
+ let cdp = null;
37
+
38
+ try {
39
+ // Step 1: Get a CDP connection
40
+ if (mode === 'headed') {
41
+ const port = opts.port || 9222;
42
+ const wsUrl = await getDebugUrl(port);
43
+ cdp = await createCDP(wsUrl);
44
+ } else {
45
+ // headless (hybrid fallback logic comes in Phase 4)
46
+ browser = await launch();
47
+ cdp = await createCDP(browser.wsUrl);
48
+ }
49
+
50
+ // Step 2: Create a new page target and attach
51
+ const page = await createPage(cdp);
52
+
53
+ // Step 3: Cookie injection — extract from user's browser, inject via CDP
54
+ if (opts.cookies !== false) {
55
+ try {
56
+ await authenticate(page.session, url, { browser: opts.browser });
57
+ } catch {
58
+ // No cookies found — continue without auth (public pages still work)
59
+ }
60
+ }
61
+
62
+ // Step 4: Navigate and wait for load
63
+ await navigate(page, url, timeout);
64
+
65
+ // Step 5: Get ARIA tree
66
+ const { tree } = await ariaTree(page);
67
+
68
+ // Step 6: Prune for agent consumption
69
+ let snapshot;
70
+ if (opts.prune !== false) {
71
+ const pruned = pruneTree(tree, { mode: opts.pruneMode || 'act' });
72
+ snapshot = formatTree(pruned);
73
+ } else {
74
+ snapshot = formatTree(tree);
75
+ }
76
+
77
+ // Step 7: Clean up
78
+ await cdp.send('Target.closeTarget', { targetId: page.targetId });
79
+
80
+ return snapshot;
81
+ } finally {
82
+ if (cdp) cdp.close();
83
+ if (browser) browser.process.kill();
84
+ }
85
+ }
86
+
87
+ /**
88
+ * Connect to a browser for a long-lived interactive session.
89
+ *
90
+ * @param {object} [opts]
91
+ * @param {'headless'|'headed'} [opts.mode='headless'] - Browser mode
92
+ * @param {number} [opts.port=9222] - CDP port for headed mode
93
+ * @returns {Promise<object>} Page handle with goto, snapshot, close
94
+ */
95
+ export async function connect(opts = {}) {
96
+ const mode = opts.mode || 'headless';
97
+ let browser = null;
98
+ let cdp;
99
+
100
+ if (mode === 'headed') {
101
+ const port = opts.port || 9222;
102
+ const wsUrl = await getDebugUrl(port);
103
+ cdp = await createCDP(wsUrl);
104
+ } else {
105
+ browser = await launch();
106
+ cdp = await createCDP(browser.wsUrl);
107
+ }
108
+
109
+ const page = await createPage(cdp);
110
+ let refMap = new Map();
111
+
112
+ return {
113
+ async goto(url, timeout = 30000) {
114
+ await navigate(page, url, timeout);
115
+ },
116
+
117
+ async snapshot(pruneOpts) {
118
+ const result = await ariaTree(page);
119
+ refMap = result.refMap;
120
+ if (pruneOpts === false) return formatTree(result.tree);
121
+ const pruned = pruneTree(result.tree, { mode: pruneOpts?.mode || 'act' });
122
+ return formatTree(pruned);
123
+ },
124
+
125
+ async click(ref) {
126
+ const backendNodeId = refMap.get(ref);
127
+ if (!backendNodeId) throw new Error(`No element found for ref "${ref}"`);
128
+ await cdpClick(page.session, backendNodeId);
129
+ },
130
+
131
+ async type(ref, text, opts) {
132
+ const backendNodeId = refMap.get(ref);
133
+ if (!backendNodeId) throw new Error(`No element found for ref "${ref}"`);
134
+ await cdpType(page.session, backendNodeId, text, opts);
135
+ },
136
+
137
+ async scroll(deltaY) {
138
+ await cdpScroll(page.session, deltaY);
139
+ },
140
+
141
+ async press(key) {
142
+ await cdpPress(page.session, key);
143
+ },
144
+
145
+ waitForNavigation(timeout = 30000) {
146
+ return page.session.once('Page.loadEventFired', timeout);
147
+ },
148
+
149
+ /** Raw CDP session for escape hatch */
150
+ cdp: page.session,
151
+
152
+ async close() {
153
+ await cdp.send('Target.closeTarget', { targetId: page.targetId });
154
+ cdp.close();
155
+ if (browser) browser.process.kill();
156
+ },
157
+ };
158
+ }
159
+
160
+ // --- Internal helpers ---
161
+
162
+ /**
163
+ * Create a new page target and return a session-scoped handle.
164
+ */
165
+ async function createPage(cdp) {
166
+ const { targetId } = await cdp.send('Target.createTarget', { url: 'about:blank' });
167
+ const { sessionId } = await cdp.send('Target.attachToTarget', {
168
+ targetId,
169
+ flatten: true,
170
+ });
171
+
172
+ const session = cdp.session(sessionId);
173
+
174
+ // Enable required CDP domains on this page
175
+ await session.send('Page.enable');
176
+ await session.send('Network.enable');
177
+ await session.send('DOM.enable');
178
+
179
+ return { session, targetId, sessionId };
180
+ }
181
+
182
+ /**
183
+ * Navigate to a URL and wait for the page to load.
184
+ */
185
+ async function navigate(page, url, timeout = 30000) {
186
+ const loadPromise = page.session.once('Page.loadEventFired', timeout);
187
+ await page.session.send('Page.navigate', { url });
188
+ await loadPromise;
189
+ // Brief settle time for dynamic content
190
+ await new Promise((r) => setTimeout(r, 500));
191
+ }
192
+
193
+ /**
194
+ * Get the ARIA accessibility tree for a page as a nested object.
195
+ */
196
+ async function ariaTree(page) {
197
+ await page.session.send('Accessibility.enable');
198
+ const { nodes } = await page.session.send('Accessibility.getFullAXTree');
199
+ const tree = buildTree(nodes);
200
+
201
+ // Build ref → backendDOMNodeId map in one pass over raw CDP nodes
202
+ const refMap = new Map();
203
+ for (const node of nodes) {
204
+ if (node.backendDOMNodeId) {
205
+ refMap.set(node.nodeId, node.backendDOMNodeId);
206
+ }
207
+ }
208
+
209
+ return { tree, refMap };
210
+ }
211
+
212
+ /**
213
+ * Transform CDP's flat AXNode array into a nested tree.
214
+ * CDP nodes have parentId — we use that exclusively to avoid double-linking.
215
+ */
216
+ function buildTree(nodes) {
217
+ if (!nodes || nodes.length === 0) return null;
218
+
219
+ const nodeMap = new Map();
220
+ const linked = new Set(); // track which nodes have been linked to a parent
221
+
222
+ // First pass: create tree nodes
223
+ for (const node of nodes) {
224
+ nodeMap.set(node.nodeId, {
225
+ nodeId: node.nodeId,
226
+ backendDOMNodeId: node.backendDOMNodeId,
227
+ role: node.role?.value || '',
228
+ name: node.name?.value || '',
229
+ properties: extractProps(node.properties),
230
+ ignored: node.ignored || false,
231
+ children: [],
232
+ });
233
+ }
234
+
235
+ // Second pass: link via parentId only (avoids duplicates from childIds)
236
+ let root = null;
237
+ for (const node of nodes) {
238
+ const treeNode = nodeMap.get(node.nodeId);
239
+ if (node.parentId && !linked.has(node.nodeId)) {
240
+ const parent = nodeMap.get(node.parentId);
241
+ if (parent) {
242
+ parent.children.push(treeNode);
243
+ linked.add(node.nodeId);
244
+ }
245
+ } else if (!node.parentId && !root) {
246
+ root = treeNode;
247
+ }
248
+ }
249
+
250
+ return root;
251
+ }
252
+
253
+ function extractProps(props) {
254
+ if (!props) return {};
255
+ const result = {};
256
+ for (const p of props) result[p.name] = p.value?.value;
257
+ return result;
258
+ }