barebrowse 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.mcp.json +8 -0
- package/CHANGELOG.md +100 -0
- package/CLAUDE.md +22 -0
- package/README.md +123 -43
- package/barebrowse.context.md +261 -0
- package/cli.js +156 -0
- package/docs/blueprint.md +361 -0
- package/docs/testing.md +202 -0
- package/mcp-server.js +216 -0
- package/package.json +22 -9
- package/src/aria.js +69 -0
- package/src/auth.js +279 -0
- package/src/bareagent.js +161 -0
- package/src/cdp.js +148 -0
- package/src/chromium.js +148 -0
- package/src/consent.js +210 -0
- package/src/index.js +186 -10
- package/src/interact.js +208 -0
- package/src/prune.js +472 -0
- package/src/stealth.js +51 -0
package/docs/testing.md
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
# barebrowse -- Testing Guide
|
|
2
|
+
|
|
3
|
+
## Run all tests
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
node --test test/unit/*.test.js test/integration/*.test.js
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
54 tests, 5 files, ~45s on a typical machine. No test framework -- uses Node's built-in `node:test` runner.
|
|
10
|
+
|
|
11
|
+
## Test pyramid
|
|
12
|
+
|
|
13
|
+
```
|
|
14
|
+
/ E2E \ 15 tests — real websites (Google, Wikipedia, GitHub, etc.)
|
|
15
|
+
/----------\
|
|
16
|
+
/ Integration \ 11 tests — full browse/connect pipeline against example.com, HN
|
|
17
|
+
/----------------\
|
|
18
|
+
/ Unit \ 28 tests — pruning, cookie extraction, CDP client, browser launch
|
|
19
|
+
/--------------------\
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
Unit tests are fast and isolated. Integration tests launch a real headless Chromium. E2E tests (part of interact.test.js) hit live websites — they require internet and may be slower or flaky on CI.
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Unit tests (28 tests)
|
|
27
|
+
|
|
28
|
+
### `test/unit/prune.test.js` -- 16 tests
|
|
29
|
+
|
|
30
|
+
Tests the 9-step ARIA pruning pipeline in isolation. No browser, no network.
|
|
31
|
+
|
|
32
|
+
| # | Test | What it validates |
|
|
33
|
+
|---|------|-------------------|
|
|
34
|
+
| 1 | returns null for empty tree | prune(null) returns null |
|
|
35
|
+
| 2 | unwraps RootWebArea | Root container node stripped from output |
|
|
36
|
+
| 3 | keeps interactive elements in act mode | Buttons, links, textboxes survive pruning |
|
|
37
|
+
| 4 | drops paragraphs in act mode | Non-interactive text removed in act mode |
|
|
38
|
+
| 5 | keeps paragraphs in browse mode | Text content preserved in browse/read mode |
|
|
39
|
+
| 6 | drops InlineTextBox noise | Low-level rendering nodes always filtered |
|
|
40
|
+
| 7 | keeps headings | h1/h2 headings preserved in browse mode |
|
|
41
|
+
| 8 | drops description headings in act mode | Only primary h1 kept, secondary headings removed |
|
|
42
|
+
| 9 | collapses unnamed structural wrappers | Nested generic divs flattened, children promoted |
|
|
43
|
+
| 10 | keeps named groups | Radiogroup/radio elements preserved |
|
|
44
|
+
| 11 | drops separators | Separator/hr nodes always removed |
|
|
45
|
+
| 12 | drops images in act mode, keeps named in browse | Act strips all images, browse keeps named ones |
|
|
46
|
+
| 13 | trims combobox to just name + selected value | Combobox children (options list) stripped |
|
|
47
|
+
| 14 | uses context keywords to condense non-matching cards | Context filtering collapses irrelevant list items |
|
|
48
|
+
| 15 | extracts main landmark when present | Act mode keeps only main content area |
|
|
49
|
+
| 16 | handles pages without landmarks (HN-style) | Pruning works on flat, landmark-less pages |
|
|
50
|
+
|
|
51
|
+
### `test/unit/auth.test.js` -- 7 tests
|
|
52
|
+
|
|
53
|
+
Tests cookie extraction from the local filesystem. Reads real browser cookie databases.
|
|
54
|
+
|
|
55
|
+
| # | Test | What it validates |
|
|
56
|
+
|---|------|-------------------|
|
|
57
|
+
| 1 | auto-detects a browser and returns cookies | extractCookies() finds Firefox or Chromium and returns array |
|
|
58
|
+
| 2 | returns cookies with correct shape | Each cookie has name, value, domain, path, secure, httpOnly, sameSite, expires |
|
|
59
|
+
| 3 | filters by domain | Domain filter parameter restricts results |
|
|
60
|
+
| 4 | extracts from firefox explicitly | `{ browser: 'firefox' }` parameter works |
|
|
61
|
+
| 5 | throws for non-existent browser | Error thrown for unknown browser string |
|
|
62
|
+
| 6 | cookies have non-empty values | All returned cookies have non-empty value strings |
|
|
63
|
+
| 7 | sameSite is a valid value | sameSite is one of 'None', 'Lax', or 'Strict' |
|
|
64
|
+
|
|
65
|
+
Note: 2 tests may skip when Chromium profile is locked by a running instance (AES decryption needs keyring access).
|
|
66
|
+
|
|
67
|
+
### `test/unit/cdp.test.js` -- 5 tests
|
|
68
|
+
|
|
69
|
+
Tests browser discovery, launch, CDP WebSocket client, and session handling.
|
|
70
|
+
|
|
71
|
+
| # | Test | What it validates |
|
|
72
|
+
|---|------|-------------------|
|
|
73
|
+
| 1 | finds a Chromium-based browser | findBrowser() returns path to chromium/chrome/brave/edge |
|
|
74
|
+
| 2 | launches headless Chromium and returns WebSocket URL | launch() returns valid ws:// URL, port, and live process |
|
|
75
|
+
| 3 | connects to browser and sends commands | createCDP() connects, Browser.getVersion responds |
|
|
76
|
+
| 4 | creates session-scoped handles | Target.createTarget + session() dispatches to correct target |
|
|
77
|
+
| 5 | gets accessibility tree from a page | Accessibility.getFullAXTree returns nodes with role/name |
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
## Integration tests (11 tests)
|
|
82
|
+
|
|
83
|
+
### `test/integration/browse.test.js` -- 11 tests
|
|
84
|
+
|
|
85
|
+
Tests the full `browse()` and `connect()` pipeline end-to-end against real pages.
|
|
86
|
+
|
|
87
|
+
| # | Suite | Test | What it validates |
|
|
88
|
+
|---|-------|------|-------------------|
|
|
89
|
+
| 1 | browse() | returns ARIA snapshot for a public page | browse('example.com') returns non-empty snapshot with title |
|
|
90
|
+
| 2 | browse() | includes heading and ref markers | Snapshot contains roles and [ref=N] markers |
|
|
91
|
+
| 3 | browse() | prunes by default (act mode) | Pruned output smaller than raw ARIA tree |
|
|
92
|
+
| 4 | browse() | browse mode preserves paragraphs | pruneMode: 'browse' keeps text content |
|
|
93
|
+
| 5 | browse() | act mode drops paragraphs | pruneMode: 'act' removes non-interactive text |
|
|
94
|
+
| 6 | browse() | handles complex pages with significant reduction | Hacker News pruned by at least 20% |
|
|
95
|
+
| 7 | browse() | can disable cookies | cookies: false works without error |
|
|
96
|
+
| 8 | browse() | can disable pruning | prune: false keeps raw RootWebArea |
|
|
97
|
+
| 9 | connect() | creates a long-lived session and navigates | connect() + goto() + snapshot() works |
|
|
98
|
+
| 10 | connect() | supports multiple navigations in one session | Multiple goto() calls on same page |
|
|
99
|
+
| 11 | connect() | snapshot accepts prune: false for raw output | snapshot(false) preserves full tree |
|
|
100
|
+
|
|
101
|
+
---
|
|
102
|
+
|
|
103
|
+
## E2E tests (15 tests)
|
|
104
|
+
|
|
105
|
+
### `test/integration/interact.test.js` -- 15 tests
|
|
106
|
+
|
|
107
|
+
Tests real interactions: clicking, typing, scrolling, form submission, and navigation. Uses a local `data:` URL fixture for deterministic tests, plus live websites for real-world coverage.
|
|
108
|
+
|
|
109
|
+
#### Data URL fixture tests (7 tests)
|
|
110
|
+
|
|
111
|
+
| # | Test | What it validates |
|
|
112
|
+
|---|------|-------------------|
|
|
113
|
+
| 1 | click sets button result text | page.click(ref) triggers onclick handler |
|
|
114
|
+
| 2 | type fills an empty input | page.type(ref, text) fills empty textbox |
|
|
115
|
+
| 3 | type with clear replaces existing text | { clear: true } replaces prefilled input |
|
|
116
|
+
| 4 | click on offscreen element scrolls into view first | Auto-scroll before click on element at 3000px |
|
|
117
|
+
| 5 | press Enter submits a form | page.press('Enter') triggers form onsubmit |
|
|
118
|
+
| 6 | press throws on unknown key | Error thrown for unrecognized key names |
|
|
119
|
+
| 7 | link click + waitForNavigation navigates | Cross-page navigation via click + waitForNavigation |
|
|
120
|
+
|
|
121
|
+
#### Live website tests (8 tests)
|
|
122
|
+
|
|
123
|
+
| # | Site | Test | What it validates |
|
|
124
|
+
|---|------|------|-------------------|
|
|
125
|
+
| 1 | Google | search and navigate results | type() + press('Enter') + waitForNavigation() on Google |
|
|
126
|
+
| 2 | Wikipedia | navigate article links | click() + waitForNavigation() on Wikipedia article links |
|
|
127
|
+
| 3 | GitHub | navigate SPA repo links | click() works for SPA navigation (no loadEventFired) |
|
|
128
|
+
| 4 | DuckDuckGo | search query and verify results | type() + press('Enter') + navigation on DDG |
|
|
129
|
+
| 5 | Hacker News | load homepage and navigate to a story | click() + waitForNavigation() on HN story links |
|
|
130
|
+
| 6 | Reddit (old) | load and navigate to a post | Page navigation with fallback to www.reddit.com |
|
|
131
|
+
| 7 | Firefox cookies | extract and inject into CDP session | extractCookies() + injectCookies() workflow |
|
|
132
|
+
| 8 | Firefox cookies | extractCookies with firefox returns array | Explicit browser parameter returns proper array |
|
|
133
|
+
|
|
134
|
+
---
|
|
135
|
+
|
|
136
|
+
## Writing new tests
|
|
137
|
+
|
|
138
|
+
Follow the existing pattern:
|
|
139
|
+
|
|
140
|
+
```javascript
|
|
141
|
+
import { describe, it } from 'node:test';
|
|
142
|
+
import assert from 'node:assert/strict';
|
|
143
|
+
import { connect } from '../../src/index.js';
|
|
144
|
+
|
|
145
|
+
describe('my feature', () => {
|
|
146
|
+
it('does the thing', async () => {
|
|
147
|
+
const page = await connect();
|
|
148
|
+
try {
|
|
149
|
+
await page.goto('https://example.com');
|
|
150
|
+
const snap = await page.snapshot();
|
|
151
|
+
assert.ok(snap.includes('Example Domain'));
|
|
152
|
+
} finally {
|
|
153
|
+
await page.close();
|
|
154
|
+
}
|
|
155
|
+
});
|
|
156
|
+
});
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
Key conventions:
|
|
160
|
+
- Always `page.close()` in a `finally` block to avoid leaked browser processes
|
|
161
|
+
- Use `data:` URL fixtures for deterministic tests (no network dependency)
|
|
162
|
+
- Real-site tests go in interact.test.js, grouped by site in `describe()` blocks
|
|
163
|
+
- Use `assert.ok()` and `assert.strictEqual()` from `node:assert/strict`
|
|
164
|
+
- No test framework dependencies -- `node:test` only
|
|
165
|
+
|
|
166
|
+
### Data URL fixture pattern
|
|
167
|
+
|
|
168
|
+
For testing interactions without network:
|
|
169
|
+
|
|
170
|
+
```javascript
|
|
171
|
+
const FIXTURE = `data:text/html,${encodeURIComponent(`
|
|
172
|
+
<html><body>
|
|
173
|
+
<button onclick="document.getElementById('r').textContent='clicked'">Click Me</button>
|
|
174
|
+
<div id="r"></div>
|
|
175
|
+
</body></html>
|
|
176
|
+
`)}`;
|
|
177
|
+
|
|
178
|
+
it('clicks the button', async () => {
|
|
179
|
+
const page = await connect();
|
|
180
|
+
try {
|
|
181
|
+
await page.goto(FIXTURE);
|
|
182
|
+
const snap = await page.snapshot({ mode: 'browse' });
|
|
183
|
+
const ref = findRef(snap, 'button', 'Click Me');
|
|
184
|
+
await page.click(ref);
|
|
185
|
+
const snap2 = await page.snapshot({ mode: 'browse' });
|
|
186
|
+
assert.ok(snap2.includes('clicked'));
|
|
187
|
+
} finally {
|
|
188
|
+
await page.close();
|
|
189
|
+
}
|
|
190
|
+
});
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
---
|
|
194
|
+
|
|
195
|
+
## CI considerations
|
|
196
|
+
|
|
197
|
+
- Unit tests: fast, no network, always safe to run
|
|
198
|
+
- Integration tests: need Chromium installed, no network (uses example.com/HN but tolerates failures)
|
|
199
|
+
- E2E tests: need internet, may be flaky (sites change, rate limits, geo-blocks)
|
|
200
|
+
- Recommended CI split: run unit + integration always, E2E on manual trigger or nightly
|
|
201
|
+
- Each test launches/kills its own browser instance -- no shared state between tests
|
|
202
|
+
- Auth tests may skip when Chromium profile is locked by a running instance
|
package/mcp-server.js
ADDED
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* mcp-server.js — MCP server for barebrowse.
|
|
4
|
+
*
|
|
5
|
+
* Raw JSON-RPC 2.0 over stdio. No SDK dependency.
|
|
6
|
+
* 7 tools: browse (one-shot), goto, snapshot, click, type, press, scroll.
|
|
7
|
+
*
|
|
8
|
+
* Session tools share a singleton page, lazy-created on first use.
|
|
9
|
+
* Action tools return 'ok' — agent calls snapshot explicitly to observe.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { browse, connect } from './src/index.js';
|
|
13
|
+
|
|
14
|
+
let _page = null;
|
|
15
|
+
|
|
16
|
+
async function getPage() {
|
|
17
|
+
if (!_page) _page = await connect();
|
|
18
|
+
return _page;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
const TOOLS = [
|
|
22
|
+
{
|
|
23
|
+
name: 'browse',
|
|
24
|
+
description: 'One-shot: navigate to a URL and return a pruned ARIA snapshot. Stateless — does not use the session page.',
|
|
25
|
+
inputSchema: {
|
|
26
|
+
type: 'object',
|
|
27
|
+
properties: {
|
|
28
|
+
url: { type: 'string', description: 'URL to browse' },
|
|
29
|
+
mode: { type: 'string', enum: ['headless', 'headed', 'hybrid'], description: 'Browser mode (default: headless)' },
|
|
30
|
+
},
|
|
31
|
+
required: ['url'],
|
|
32
|
+
},
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
name: 'goto',
|
|
36
|
+
description: 'Navigate the session page to a URL. Returns ok — call snapshot to observe.',
|
|
37
|
+
inputSchema: {
|
|
38
|
+
type: 'object',
|
|
39
|
+
properties: {
|
|
40
|
+
url: { type: 'string', description: 'URL to navigate to' },
|
|
41
|
+
},
|
|
42
|
+
required: ['url'],
|
|
43
|
+
},
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
name: 'snapshot',
|
|
47
|
+
description: 'Get the current ARIA snapshot of the session page. Returns a YAML-like tree with [ref=N] markers on interactive elements.',
|
|
48
|
+
inputSchema: { type: 'object', properties: {} },
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
name: 'click',
|
|
52
|
+
description: 'Click an element by its ref from the snapshot. Returns ok — call snapshot to observe.',
|
|
53
|
+
inputSchema: {
|
|
54
|
+
type: 'object',
|
|
55
|
+
properties: {
|
|
56
|
+
ref: { type: 'string', description: 'Element ref from snapshot (e.g. "8")' },
|
|
57
|
+
},
|
|
58
|
+
required: ['ref'],
|
|
59
|
+
},
|
|
60
|
+
},
|
|
61
|
+
{
|
|
62
|
+
name: 'type',
|
|
63
|
+
description: 'Type text into an element by its ref. Returns ok — call snapshot to observe.',
|
|
64
|
+
inputSchema: {
|
|
65
|
+
type: 'object',
|
|
66
|
+
properties: {
|
|
67
|
+
ref: { type: 'string', description: 'Element ref from snapshot' },
|
|
68
|
+
text: { type: 'string', description: 'Text to type' },
|
|
69
|
+
clear: { type: 'boolean', description: 'Clear existing content first (default: false)' },
|
|
70
|
+
},
|
|
71
|
+
required: ['ref', 'text'],
|
|
72
|
+
},
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
name: 'press',
|
|
76
|
+
description: 'Press a special key (Enter, Tab, Escape, Backspace, Delete, ArrowUp/Down/Left/Right, Home, End, PageUp, PageDown, Space). Returns ok.',
|
|
77
|
+
inputSchema: {
|
|
78
|
+
type: 'object',
|
|
79
|
+
properties: {
|
|
80
|
+
key: { type: 'string', description: 'Key name' },
|
|
81
|
+
},
|
|
82
|
+
required: ['key'],
|
|
83
|
+
},
|
|
84
|
+
},
|
|
85
|
+
{
|
|
86
|
+
name: 'scroll',
|
|
87
|
+
description: 'Scroll the page. Positive deltaY scrolls down, negative scrolls up. Returns ok.',
|
|
88
|
+
inputSchema: {
|
|
89
|
+
type: 'object',
|
|
90
|
+
properties: {
|
|
91
|
+
deltaY: { type: 'number', description: 'Pixels to scroll (positive=down, negative=up)' },
|
|
92
|
+
},
|
|
93
|
+
required: ['deltaY'],
|
|
94
|
+
},
|
|
95
|
+
},
|
|
96
|
+
];
|
|
97
|
+
|
|
98
|
+
async function handleToolCall(name, args) {
|
|
99
|
+
switch (name) {
|
|
100
|
+
case 'browse':
|
|
101
|
+
return await browse(args.url, { mode: args.mode });
|
|
102
|
+
|
|
103
|
+
case 'goto': {
|
|
104
|
+
const page = await getPage();
|
|
105
|
+
await page.goto(args.url);
|
|
106
|
+
return 'ok';
|
|
107
|
+
}
|
|
108
|
+
case 'snapshot': {
|
|
109
|
+
const page = await getPage();
|
|
110
|
+
return await page.snapshot();
|
|
111
|
+
}
|
|
112
|
+
case 'click': {
|
|
113
|
+
const page = await getPage();
|
|
114
|
+
await page.click(args.ref);
|
|
115
|
+
return 'ok';
|
|
116
|
+
}
|
|
117
|
+
case 'type': {
|
|
118
|
+
const page = await getPage();
|
|
119
|
+
await page.type(args.ref, args.text, { clear: args.clear });
|
|
120
|
+
return 'ok';
|
|
121
|
+
}
|
|
122
|
+
case 'press': {
|
|
123
|
+
const page = await getPage();
|
|
124
|
+
await page.press(args.key);
|
|
125
|
+
return 'ok';
|
|
126
|
+
}
|
|
127
|
+
case 'scroll': {
|
|
128
|
+
const page = await getPage();
|
|
129
|
+
await page.scroll(args.deltaY);
|
|
130
|
+
return 'ok';
|
|
131
|
+
}
|
|
132
|
+
default:
|
|
133
|
+
throw new Error(`Unknown tool: ${name}`);
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
function jsonrpcResponse(id, result) {
|
|
138
|
+
return JSON.stringify({ jsonrpc: '2.0', id, result });
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
function jsonrpcError(id, code, message) {
|
|
142
|
+
return JSON.stringify({ jsonrpc: '2.0', id, error: { code, message } });
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
async function handleMessage(msg) {
|
|
146
|
+
const { id, method, params } = msg;
|
|
147
|
+
|
|
148
|
+
if (method === 'initialize') {
|
|
149
|
+
return jsonrpcResponse(id, {
|
|
150
|
+
protocolVersion: '2024-11-05',
|
|
151
|
+
capabilities: { tools: {} },
|
|
152
|
+
serverInfo: { name: 'barebrowse', version: '0.2.1' },
|
|
153
|
+
});
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
if (method === 'notifications/initialized') {
|
|
157
|
+
return null; // notification, no response
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
if (method === 'tools/list') {
|
|
161
|
+
return jsonrpcResponse(id, { tools: TOOLS });
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
if (method === 'tools/call') {
|
|
165
|
+
const { name, arguments: args } = params;
|
|
166
|
+
try {
|
|
167
|
+
const result = await handleToolCall(name, args || {});
|
|
168
|
+
return jsonrpcResponse(id, {
|
|
169
|
+
content: [{ type: 'text', text: typeof result === 'string' ? result : JSON.stringify(result) }],
|
|
170
|
+
});
|
|
171
|
+
} catch (err) {
|
|
172
|
+
return jsonrpcResponse(id, {
|
|
173
|
+
content: [{ type: 'text', text: `Error: ${err.message}` }],
|
|
174
|
+
isError: true,
|
|
175
|
+
});
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
return jsonrpcError(id, -32601, `Method not found: ${method}`);
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// --- Stdio transport ---
|
|
183
|
+
|
|
184
|
+
let buffer = '';
|
|
185
|
+
|
|
186
|
+
process.stdin.setEncoding('utf8');
|
|
187
|
+
process.stdin.on('data', async (chunk) => {
|
|
188
|
+
buffer += chunk;
|
|
189
|
+
let newlineIdx;
|
|
190
|
+
while ((newlineIdx = buffer.indexOf('\n')) !== -1) {
|
|
191
|
+
const line = buffer.slice(0, newlineIdx).trim();
|
|
192
|
+
buffer = buffer.slice(newlineIdx + 1);
|
|
193
|
+
if (!line) continue;
|
|
194
|
+
|
|
195
|
+
try {
|
|
196
|
+
const msg = JSON.parse(line);
|
|
197
|
+
const response = await handleMessage(msg);
|
|
198
|
+
if (response) {
|
|
199
|
+
process.stdout.write(response + '\n');
|
|
200
|
+
}
|
|
201
|
+
} catch (err) {
|
|
202
|
+
process.stdout.write(jsonrpcError(null, -32700, `Parse error: ${err.message}`) + '\n');
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
// Clean up on exit
|
|
208
|
+
process.on('SIGINT', async () => {
|
|
209
|
+
if (_page) await _page.close().catch(() => {});
|
|
210
|
+
process.exit(0);
|
|
211
|
+
});
|
|
212
|
+
|
|
213
|
+
process.on('SIGTERM', async () => {
|
|
214
|
+
if (_page) await _page.close().catch(() => {});
|
|
215
|
+
process.exit(0);
|
|
216
|
+
});
|
package/package.json
CHANGED
|
@@ -1,19 +1,32 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "barebrowse",
|
|
3
|
-
"version": "0.1
|
|
4
|
-
"description": "Authenticated web browsing for autonomous agents via CDP",
|
|
3
|
+
"version": "0.2.1",
|
|
4
|
+
"description": "Authenticated web browsing for autonomous agents via CDP. URL in, pruned ARIA snapshot out.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/index.js",
|
|
7
|
-
"exports":
|
|
7
|
+
"exports": {
|
|
8
|
+
".": "./src/index.js",
|
|
9
|
+
"./bareagent": "./src/bareagent.js"
|
|
10
|
+
},
|
|
11
|
+
"bin": {
|
|
12
|
+
"barebrowse": "./cli.js"
|
|
13
|
+
},
|
|
8
14
|
"engines": {
|
|
9
15
|
"node": ">=22"
|
|
10
16
|
},
|
|
11
17
|
"scripts": {
|
|
12
|
-
"test": "node --test test
|
|
18
|
+
"test": "node --test test/unit/*.test.js test/integration/*.test.js"
|
|
13
19
|
},
|
|
14
|
-
"
|
|
15
|
-
|
|
16
|
-
"
|
|
17
|
-
"
|
|
18
|
-
|
|
20
|
+
"keywords": [
|
|
21
|
+
"browser",
|
|
22
|
+
"cdp",
|
|
23
|
+
"chromium",
|
|
24
|
+
"aria",
|
|
25
|
+
"accessibility",
|
|
26
|
+
"web-scraping",
|
|
27
|
+
"agent",
|
|
28
|
+
"mcp",
|
|
29
|
+
"headless"
|
|
30
|
+
],
|
|
31
|
+
"license": "MIT"
|
|
19
32
|
}
|
package/src/aria.js
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* aria.js — Format ARIA accessibility tree nodes for agent consumption.
|
|
3
|
+
*
|
|
4
|
+
* Takes a nested tree (built from CDP's Accessibility.getFullAXTree)
|
|
5
|
+
* and formats it as readable YAML-like text, similar to Playwright's ariaSnapshot.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Format a nested ARIA tree as readable text output.
|
|
10
|
+
*
|
|
11
|
+
* Output format (one node per line, indented):
|
|
12
|
+
* - role "name" [props] [ref=nodeId]
|
|
13
|
+
*
|
|
14
|
+
* @param {object} node - Tree node { role, name, properties, children, ignored, nodeId }
|
|
15
|
+
* @param {number} [depth=0] - Current indentation depth
|
|
16
|
+
* @returns {string} Formatted ARIA tree text
|
|
17
|
+
*/
|
|
18
|
+
export function formatTree(node, depth = 0) {
|
|
19
|
+
if (!node) return '';
|
|
20
|
+
|
|
21
|
+
// Skip ignored nodes but still process their children
|
|
22
|
+
if (node.ignored) {
|
|
23
|
+
return node.children.map((c) => formatTree(c, depth)).join('');
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// Skip low-level rendering nodes that are noise for agents
|
|
27
|
+
const SKIP_ROLES = new Set(['InlineTextBox', 'LineBreak']);
|
|
28
|
+
if (SKIP_ROLES.has(node.role)) return '';
|
|
29
|
+
|
|
30
|
+
const indent = ' '.repeat(depth);
|
|
31
|
+
const lines = [];
|
|
32
|
+
|
|
33
|
+
// Build line: "- role "name" [properties] [ref=id]"
|
|
34
|
+
let line = `${indent}- ${node.role || 'none'}`;
|
|
35
|
+
|
|
36
|
+
if (node.name) {
|
|
37
|
+
line += ` "${node.name}"`;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// Notable properties that agents care about
|
|
41
|
+
const props = node.properties || {};
|
|
42
|
+
const propParts = [];
|
|
43
|
+
if (props.checked !== undefined) propParts.push(`checked=${props.checked}`);
|
|
44
|
+
if (props.disabled) propParts.push('disabled');
|
|
45
|
+
if (props.expanded !== undefined) propParts.push(`expanded=${props.expanded}`);
|
|
46
|
+
if (props.level) propParts.push(`level=${props.level}`);
|
|
47
|
+
if (props.selected) propParts.push('selected');
|
|
48
|
+
if (props.required) propParts.push('required');
|
|
49
|
+
if (props.value !== undefined && props.value !== '') propParts.push(`value="${props.value}"`);
|
|
50
|
+
|
|
51
|
+
if (propParts.length > 0) {
|
|
52
|
+
line += ` [${propParts.join(', ')}]`;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// Node ID as ref — agents use this to target interactions
|
|
56
|
+
if (node.nodeId) {
|
|
57
|
+
line += ` [ref=${node.nodeId}]`;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
lines.push(line);
|
|
61
|
+
|
|
62
|
+
// Recurse into children
|
|
63
|
+
for (const child of node.children) {
|
|
64
|
+
const childText = formatTree(child, depth + 1);
|
|
65
|
+
if (childText) lines.push(childText);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
return lines.join('\n');
|
|
69
|
+
}
|