barebrowse 0.7.1 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +213 -0
- package/LICENSE +202 -21
- package/NOTICE +8 -0
- package/README.md +37 -10
- package/barebrowse.context.md +43 -18
- package/cli.js +114 -3
- package/mcp-server.js +272 -68
- package/package.json +2 -2
- package/src/bareagent.js +33 -0
- package/src/chromium.js +115 -5
- package/src/consent.js +3 -8
- package/src/daemon.js +13 -0
- package/src/index.js +429 -132
- package/src/network-idle.js +62 -0
- package/src/stealth.js +87 -6
package/mcp-server.js
CHANGED
|
@@ -10,8 +10,47 @@
|
|
|
10
10
|
*/
|
|
11
11
|
|
|
12
12
|
import { browse, connect } from './src/index.js';
|
|
13
|
-
import { mkdirSync, writeFileSync } from 'node:fs';
|
|
14
|
-
import { join } from 'node:path';
|
|
13
|
+
import { mkdirSync, writeFileSync, readFileSync } from 'node:fs';
|
|
14
|
+
import { join, dirname } from 'node:path';
|
|
15
|
+
import { pathToFileURL, fileURLToPath } from 'node:url';
|
|
16
|
+
|
|
17
|
+
// Read version from package.json so serverInfo.version doesn't drift behind
|
|
18
|
+
// release bumps (pre-fix this was hardcoded 0.7.1 while package.json was 0.8.0).
|
|
19
|
+
const _pkgPath = join(dirname(fileURLToPath(import.meta.url)), 'package.json');
|
|
20
|
+
const PKG_VERSION = JSON.parse(readFileSync(_pkgPath, 'utf8')).version;
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Per-tool timeouts (ms). One blanket 30s was too short for SPA cold loads
|
|
24
|
+
* (goto regularly exceeded it on slow sites) and too long for instant ops
|
|
25
|
+
* like scroll. The split below is the H5 plan:
|
|
26
|
+
* - navigation (goto/reload): 60s
|
|
27
|
+
* - browser-history nav (back/forward): 30s
|
|
28
|
+
* - interactive ops (click/type/press/scroll/hover/select/drag): 15s
|
|
29
|
+
* - read-only ops (snapshot/tabs/eval/wait_for): 15s (wait_for has its own
|
|
30
|
+
* internal deadline; this is the outer cap)
|
|
31
|
+
* - heavy I/O (pdf/screenshot/upload): 45s
|
|
32
|
+
* Exported so tests can pin the contract.
|
|
33
|
+
*/
|
|
34
|
+
export const TIMEOUTS = {
|
|
35
|
+
goto: 60000,
|
|
36
|
+
reload: 60000,
|
|
37
|
+
back: 30000,
|
|
38
|
+
forward: 30000,
|
|
39
|
+
snapshot: 15000,
|
|
40
|
+
click: 15000,
|
|
41
|
+
type: 15000,
|
|
42
|
+
press: 15000,
|
|
43
|
+
scroll: 15000,
|
|
44
|
+
hover: 15000,
|
|
45
|
+
select: 15000,
|
|
46
|
+
drag: 15000,
|
|
47
|
+
tabs: 5000,
|
|
48
|
+
eval: 15000,
|
|
49
|
+
wait_for: 60000,
|
|
50
|
+
upload: 45000,
|
|
51
|
+
pdf: 45000,
|
|
52
|
+
screenshot: 45000,
|
|
53
|
+
};
|
|
15
54
|
|
|
16
55
|
// Optional: privacy assessment via wearehere
|
|
17
56
|
let assessFn = null;
|
|
@@ -27,12 +66,17 @@ function isTransient(err) {
|
|
|
27
66
|
}
|
|
28
67
|
|
|
29
68
|
/**
|
|
30
|
-
*
|
|
31
|
-
*
|
|
69
|
+
* Run fn with a per-attempt timeout. On transient failure (CDP death OR
|
|
70
|
+
* timeout), reset the session. If `retry` is true (default), retry once on
|
|
71
|
+
* a fresh page; if false, rethrow without retrying — required for
|
|
72
|
+
* non-idempotent ops (click/type/etc.) where a partial first attempt
|
|
73
|
+
* shouldn't be replayed against a blank fresh page.
|
|
32
74
|
* @param {Function} fn - async function to execute
|
|
33
75
|
* @param {number} timeoutMs - per-attempt timeout in ms
|
|
76
|
+
* @param {object} [opts]
|
|
77
|
+
* @param {boolean} [opts.retry=true] - whether to retry once on transient failure
|
|
34
78
|
*/
|
|
35
|
-
async function withRetry(fn, timeoutMs) {
|
|
79
|
+
async function withRetry(fn, timeoutMs, { retry = true } = {}) {
|
|
36
80
|
async function attempt() {
|
|
37
81
|
if (!timeoutMs) return await fn();
|
|
38
82
|
let timer;
|
|
@@ -48,8 +92,9 @@ async function withRetry(fn, timeoutMs) {
|
|
|
48
92
|
return await attempt();
|
|
49
93
|
} catch (err) {
|
|
50
94
|
if (!isTransient(err)) throw err;
|
|
51
|
-
// Transient failure — reset session
|
|
95
|
+
// Transient failure — reset session so the next request gets a fresh page.
|
|
52
96
|
_page = null;
|
|
97
|
+
if (!retry) throw err;
|
|
53
98
|
return await attempt();
|
|
54
99
|
}
|
|
55
100
|
}
|
|
@@ -96,10 +141,10 @@ function acquireAssessSlot() {
|
|
|
96
141
|
}
|
|
97
142
|
|
|
98
143
|
|
|
99
|
-
const TOOLS = [
|
|
144
|
+
export const TOOLS = [
|
|
100
145
|
{
|
|
101
146
|
name: 'browse',
|
|
102
|
-
description: '
|
|
147
|
+
description: 'One-shot headless browse — fetches a URL through a real browser (executes JS, injects cookies, dismisses consent, evades bot detection). Only when plain HTTP fetch can\'t render the page. Returns a pruned ARIA snapshot with [ref=N] markers. Stateless — for multi-step interaction use goto.',
|
|
103
148
|
inputSchema: {
|
|
104
149
|
type: 'object',
|
|
105
150
|
properties: {
|
|
@@ -112,7 +157,7 @@ const TOOLS = [
|
|
|
112
157
|
},
|
|
113
158
|
{
|
|
114
159
|
name: 'goto',
|
|
115
|
-
description: '
|
|
160
|
+
description: 'Open URL in a persistent interactive browser session (pair with snapshot/click/type/press for multi-step flows). Use when the task needs clicking, typing, or form submission. Injects auth cookies. Returns ok — call snapshot to observe.',
|
|
116
161
|
inputSchema: {
|
|
117
162
|
type: 'object',
|
|
118
163
|
properties: {
|
|
@@ -221,8 +266,92 @@ const TOOLS = [
|
|
|
221
266
|
},
|
|
222
267
|
},
|
|
223
268
|
},
|
|
269
|
+
{
|
|
270
|
+
name: 'reload',
|
|
271
|
+
description: 'Reload the current page in the session. Returns ok — call snapshot to observe.',
|
|
272
|
+
inputSchema: {
|
|
273
|
+
type: 'object',
|
|
274
|
+
properties: {
|
|
275
|
+
ignoreCache: { type: 'boolean', description: 'Bypass HTTP cache (hard reload). Default: false.' },
|
|
276
|
+
},
|
|
277
|
+
},
|
|
278
|
+
},
|
|
279
|
+
{
|
|
280
|
+
name: 'screenshot',
|
|
281
|
+
description: 'Capture a screenshot of the current page. Saves to .barebrowse/screenshot-*.png (or .jpeg/.webp) and returns the file path. Use the file with your image tools.',
|
|
282
|
+
inputSchema: {
|
|
283
|
+
type: 'object',
|
|
284
|
+
properties: {
|
|
285
|
+
format: { type: 'string', enum: ['png', 'jpeg', 'webp'], description: 'Image format (default: png)' },
|
|
286
|
+
quality: { type: 'number', description: 'JPEG/WebP quality 0-100 (default: 80, ignored for PNG)' },
|
|
287
|
+
},
|
|
288
|
+
},
|
|
289
|
+
},
|
|
290
|
+
{
|
|
291
|
+
name: 'wait_for',
|
|
292
|
+
description: 'Wait for visible text or a CSS selector to appear on the current page. Returns ok when found, throws on timeout.',
|
|
293
|
+
inputSchema: {
|
|
294
|
+
type: 'object',
|
|
295
|
+
properties: {
|
|
296
|
+
text: { type: 'string', description: 'Substring that must appear in document.body.innerText' },
|
|
297
|
+
selector: { type: 'string', description: 'CSS selector that must match document.querySelector' },
|
|
298
|
+
timeout: { type: 'number', description: 'Timeout in ms (default: 30000)' },
|
|
299
|
+
},
|
|
300
|
+
},
|
|
301
|
+
},
|
|
302
|
+
{
|
|
303
|
+
name: 'tabs',
|
|
304
|
+
description: 'List open tabs in the session, or switch to one by index. Returns JSON array of { index, url, title } or "ok" after switch.',
|
|
305
|
+
inputSchema: {
|
|
306
|
+
type: 'object',
|
|
307
|
+
properties: {
|
|
308
|
+
switchTo: { type: 'number', description: 'Tab index to activate. Omit to just list tabs.' },
|
|
309
|
+
},
|
|
310
|
+
},
|
|
311
|
+
},
|
|
312
|
+
{
|
|
313
|
+
name: 'select',
|
|
314
|
+
description: 'Set the value of a <select> dropdown (or custom listbox) by ref. Returns ok.',
|
|
315
|
+
inputSchema: {
|
|
316
|
+
type: 'object',
|
|
317
|
+
properties: {
|
|
318
|
+
ref: { type: 'string', description: 'Element ref from snapshot' },
|
|
319
|
+
value: { type: 'string', description: 'Option value or visible text to select' },
|
|
320
|
+
},
|
|
321
|
+
required: ['ref', 'value'],
|
|
322
|
+
},
|
|
323
|
+
},
|
|
324
|
+
{
|
|
325
|
+
name: 'hover',
|
|
326
|
+
description: 'Hover over an element by ref (triggers tooltips, hover menus). Returns ok.',
|
|
327
|
+
inputSchema: {
|
|
328
|
+
type: 'object',
|
|
329
|
+
properties: {
|
|
330
|
+
ref: { type: 'string', description: 'Element ref from snapshot' },
|
|
331
|
+
},
|
|
332
|
+
required: ['ref'],
|
|
333
|
+
},
|
|
334
|
+
},
|
|
224
335
|
];
|
|
225
336
|
|
|
337
|
+
// Powerful escape hatch — guarded behind an explicit env-var opt-in.
|
|
338
|
+
// Runtime.evaluate in the user's authenticated session lets an agent read
|
|
339
|
+
// cookies/localStorage, dispatch arbitrary events, hit any endpoint, etc.
|
|
340
|
+
// Off by default; flip BAREBROWSE_MCP_EVAL=1 to enable.
|
|
341
|
+
if (process.env.BAREBROWSE_MCP_EVAL === '1') {
|
|
342
|
+
TOOLS.push({
|
|
343
|
+
name: 'eval',
|
|
344
|
+
description: 'Run a JavaScript expression in the current page and return the result. POWERFUL: full access to the authenticated session — DOM, cookies, localStorage, fetch. Enabled because BAREBROWSE_MCP_EVAL=1 is set.',
|
|
345
|
+
inputSchema: {
|
|
346
|
+
type: 'object',
|
|
347
|
+
properties: {
|
|
348
|
+
expression: { type: 'string', description: 'JavaScript expression to evaluate' },
|
|
349
|
+
},
|
|
350
|
+
required: ['expression'],
|
|
351
|
+
},
|
|
352
|
+
});
|
|
353
|
+
}
|
|
354
|
+
|
|
226
355
|
// Add assess tool if wearehere is installed
|
|
227
356
|
if (assessFn) {
|
|
228
357
|
TOOLS.push({
|
|
@@ -261,7 +390,7 @@ async function handleToolCall(name, args) {
|
|
|
261
390
|
try { await page.injectCookies(args.url); } catch {}
|
|
262
391
|
await page.goto(args.url);
|
|
263
392
|
return 'ok';
|
|
264
|
-
},
|
|
393
|
+
}, TIMEOUTS.goto);
|
|
265
394
|
case 'snapshot': return withRetry(async () => {
|
|
266
395
|
const page = await getPage();
|
|
267
396
|
const text = await page.snapshot();
|
|
@@ -271,22 +400,22 @@ async function handleToolCall(name, args) {
|
|
|
271
400
|
return `Snapshot (${text.length} chars) saved to ${file}`;
|
|
272
401
|
}
|
|
273
402
|
return text;
|
|
274
|
-
},
|
|
403
|
+
}, TIMEOUTS.snapshot);
|
|
275
404
|
case 'click': return withRetry(async () => {
|
|
276
405
|
const page = await getPage();
|
|
277
406
|
await page.click(args.ref);
|
|
278
407
|
return 'ok';
|
|
279
|
-
},
|
|
408
|
+
}, TIMEOUTS.click, { retry: false });
|
|
280
409
|
case 'type': return withRetry(async () => {
|
|
281
410
|
const page = await getPage();
|
|
282
411
|
await page.type(args.ref, args.text, { clear: args.clear });
|
|
283
412
|
return 'ok';
|
|
284
|
-
},
|
|
413
|
+
}, TIMEOUTS.type, { retry: false });
|
|
285
414
|
case 'press': return withRetry(async () => {
|
|
286
415
|
const page = await getPage();
|
|
287
416
|
await page.press(args.key);
|
|
288
417
|
return 'ok';
|
|
289
|
-
},
|
|
418
|
+
}, TIMEOUTS.press, { retry: false });
|
|
290
419
|
case 'scroll': return withRetry(async () => {
|
|
291
420
|
const page = await getPage();
|
|
292
421
|
let dy = args.deltaY;
|
|
@@ -298,31 +427,90 @@ async function handleToolCall(name, args) {
|
|
|
298
427
|
}
|
|
299
428
|
await page.scroll(dy);
|
|
300
429
|
return 'ok';
|
|
301
|
-
},
|
|
430
|
+
}, TIMEOUTS.scroll, { retry: false });
|
|
302
431
|
case 'back': return withRetry(async () => {
|
|
303
432
|
const page = await getPage();
|
|
304
433
|
await page.goBack();
|
|
305
434
|
return 'ok';
|
|
306
|
-
},
|
|
435
|
+
}, TIMEOUTS.back, { retry: false });
|
|
307
436
|
case 'forward': return withRetry(async () => {
|
|
308
437
|
const page = await getPage();
|
|
309
438
|
await page.goForward();
|
|
310
439
|
return 'ok';
|
|
311
|
-
},
|
|
440
|
+
}, TIMEOUTS.forward, { retry: false });
|
|
312
441
|
case 'drag': return withRetry(async () => {
|
|
313
442
|
const page = await getPage();
|
|
314
443
|
await page.drag(args.fromRef, args.toRef);
|
|
315
444
|
return 'ok';
|
|
316
|
-
},
|
|
445
|
+
}, TIMEOUTS.drag, { retry: false });
|
|
317
446
|
case 'upload': return withRetry(async () => {
|
|
318
447
|
const page = await getPage();
|
|
319
448
|
await page.upload(args.ref, args.files);
|
|
320
449
|
return 'ok';
|
|
321
|
-
},
|
|
450
|
+
}, TIMEOUTS.upload, { retry: false });
|
|
322
451
|
case 'pdf': return withRetry(async () => {
|
|
323
452
|
const page = await getPage();
|
|
324
453
|
return await page.pdf({ landscape: args.landscape });
|
|
325
|
-
},
|
|
454
|
+
}, TIMEOUTS.pdf);
|
|
455
|
+
case 'reload': return withRetry(async () => {
|
|
456
|
+
const page = await getPage();
|
|
457
|
+
await page.reload({ ignoreCache: !!args.ignoreCache });
|
|
458
|
+
return 'ok';
|
|
459
|
+
}, TIMEOUTS.reload);
|
|
460
|
+
case 'screenshot': return withRetry(async () => {
|
|
461
|
+
const page = await getPage();
|
|
462
|
+
const format = args.format || 'png';
|
|
463
|
+
const b64 = await page.screenshot({ format, quality: args.quality });
|
|
464
|
+
mkdirSync(OUTPUT_DIR, { recursive: true });
|
|
465
|
+
const ts = new Date().toISOString().replace(/[:.]/g, '-');
|
|
466
|
+
const file = join(OUTPUT_DIR, `screenshot-${ts}.${format}`);
|
|
467
|
+
writeFileSync(file, Buffer.from(b64, 'base64'));
|
|
468
|
+
return file;
|
|
469
|
+
}, TIMEOUTS.screenshot);
|
|
470
|
+
case 'wait_for': return withRetry(async () => {
|
|
471
|
+
const page = await getPage();
|
|
472
|
+
await page.waitFor({ text: args.text, selector: args.selector, timeout: args.timeout });
|
|
473
|
+
return 'ok';
|
|
474
|
+
}, TIMEOUTS.wait_for, { retry: false });
|
|
475
|
+
case 'tabs': return withRetry(async () => {
|
|
476
|
+
const page = await getPage();
|
|
477
|
+
if (typeof args.switchTo === 'number') {
|
|
478
|
+
await page.switchTab(args.switchTo);
|
|
479
|
+
return 'ok';
|
|
480
|
+
}
|
|
481
|
+
const list = await page.tabs();
|
|
482
|
+
return JSON.stringify(list, null, 2);
|
|
483
|
+
}, TIMEOUTS.tabs, { retry: false });
|
|
484
|
+
case 'select': return withRetry(async () => {
|
|
485
|
+
const page = await getPage();
|
|
486
|
+
await page.select(args.ref, args.value);
|
|
487
|
+
return 'ok';
|
|
488
|
+
}, TIMEOUTS.select, { retry: false });
|
|
489
|
+
case 'hover': return withRetry(async () => {
|
|
490
|
+
const page = await getPage();
|
|
491
|
+
await page.hover(args.ref);
|
|
492
|
+
return 'ok';
|
|
493
|
+
}, TIMEOUTS.hover, { retry: false });
|
|
494
|
+
case 'eval': {
|
|
495
|
+
// Only reachable when BAREBROWSE_MCP_EVAL=1 — the tool isn't registered
|
|
496
|
+
// otherwise, but this guard is the second line of defense in case the
|
|
497
|
+
// env var changes between tools/list and tools/call.
|
|
498
|
+
if (process.env.BAREBROWSE_MCP_EVAL !== '1') {
|
|
499
|
+
throw new Error('eval is disabled. Set BAREBROWSE_MCP_EVAL=1 to enable.');
|
|
500
|
+
}
|
|
501
|
+
return withRetry(async () => {
|
|
502
|
+
const page = await getPage();
|
|
503
|
+
const { result, exceptionDetails } = await page.cdp.send('Runtime.evaluate', {
|
|
504
|
+
expression: args.expression,
|
|
505
|
+
returnByValue: true,
|
|
506
|
+
awaitPromise: true,
|
|
507
|
+
});
|
|
508
|
+
if (exceptionDetails) {
|
|
509
|
+
throw new Error(exceptionDetails.text + (exceptionDetails.exception?.description ? `: ${exceptionDetails.exception.description}` : ''));
|
|
510
|
+
}
|
|
511
|
+
return result.value === undefined ? 'undefined' : JSON.stringify(result.value);
|
|
512
|
+
}, TIMEOUTS.eval, { retry: false });
|
|
513
|
+
}
|
|
326
514
|
case 'assess': {
|
|
327
515
|
if (!assessFn) throw new Error('wearehere is not installed. Run: npm install wearehere');
|
|
328
516
|
const releaseSlot = await acquireAssessSlot();
|
|
@@ -391,7 +579,7 @@ async function handleMessage(msg) {
|
|
|
391
579
|
return jsonrpcResponse(id, {
|
|
392
580
|
protocolVersion: '2024-11-05',
|
|
393
581
|
capabilities: { tools: {} },
|
|
394
|
-
serverInfo: { name: 'barebrowse', version:
|
|
582
|
+
serverInfo: { name: 'barebrowse', version: PKG_VERSION },
|
|
395
583
|
});
|
|
396
584
|
}
|
|
397
585
|
|
|
@@ -423,55 +611,71 @@ async function handleMessage(msg) {
|
|
|
423
611
|
}
|
|
424
612
|
|
|
425
613
|
// --- Stdio transport ---
|
|
614
|
+
//
|
|
615
|
+
// Exported as runStdio() so callers (notably cli.js) can explicitly start the
|
|
616
|
+
// JSON-RPC loop. The previous "auto-start when isMain" guard broke the
|
|
617
|
+
// `npx barebrowse mcp` path because cli.js launches the server via
|
|
618
|
+
// `await import('./mcp-server.js')` — process.argv[1] is cli.js, not
|
|
619
|
+
// mcp-server.js, so isMain was false and the loop never started. Both the
|
|
620
|
+
// direct `node mcp-server.js` invocation and the cli.js path now call
|
|
621
|
+
// runStdio() explicitly. Tests import TIMEOUTS/TOOLS without calling it.
|
|
622
|
+
|
|
623
|
+
export function runStdio() {
|
|
624
|
+
// One-line startup banner to stderr (stderr because stdout is the JSON-RPC
|
|
625
|
+
// channel — must not contain non-JSON-RPC bytes). Captured by Claude Code's
|
|
626
|
+
// MCP log, makes "I added barebrowse twice and got the wrong one" issues
|
|
627
|
+
// diagnosable: the path here is the absolute file actually being served,
|
|
628
|
+
// so a scope conflict shows two different paths in two log files.
|
|
629
|
+
const _selfPath = fileURLToPath(import.meta.url);
|
|
630
|
+
process.stderr.write(`barebrowse mcp v${PKG_VERSION} | serving from ${_selfPath} | pid ${process.pid}\n`);
|
|
631
|
+
|
|
632
|
+
let buffer = '';
|
|
633
|
+
|
|
634
|
+
process.stdin.setEncoding('utf8');
|
|
635
|
+
process.stdin.on('data', (chunk) => {
|
|
636
|
+
buffer += chunk;
|
|
637
|
+
|
|
638
|
+
let newlineIdx;
|
|
639
|
+
while ((newlineIdx = buffer.indexOf('\n')) !== -1) {
|
|
640
|
+
const line = buffer.slice(0, newlineIdx).trim();
|
|
641
|
+
buffer = buffer.slice(newlineIdx + 1);
|
|
642
|
+
if (!line) continue;
|
|
426
643
|
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
process.stdin.setEncoding('utf8');
|
|
430
|
-
process.stdin.on('data', (chunk) => {
|
|
431
|
-
buffer += chunk;
|
|
432
|
-
|
|
433
|
-
let newlineIdx;
|
|
434
|
-
while ((newlineIdx = buffer.indexOf('\n')) !== -1) {
|
|
435
|
-
const line = buffer.slice(0, newlineIdx).trim();
|
|
436
|
-
buffer = buffer.slice(newlineIdx + 1);
|
|
437
|
-
if (!line) continue;
|
|
438
|
-
|
|
439
|
-
try {
|
|
440
|
-
const msg = JSON.parse(line);
|
|
441
|
-
|
|
442
|
-
handleMessage(msg).then((response) => {
|
|
443
|
-
if (response) {
|
|
644
|
+
try {
|
|
645
|
+
const msg = JSON.parse(line);
|
|
444
646
|
|
|
445
|
-
|
|
647
|
+
handleMessage(msg).then((response) => {
|
|
648
|
+
if (response) {
|
|
649
|
+
process.stdout.write(response + '\n');
|
|
650
|
+
}
|
|
651
|
+
}).catch((err) => {
|
|
652
|
+
process.stdout.write(jsonrpcError(msg.id, -32700, `Error: ${err.message}`) + '\n');
|
|
653
|
+
});
|
|
654
|
+
} catch (err) {
|
|
655
|
+
process.stdout.write(jsonrpcError(null, -32700, `Parse error: ${err.message}`) + '\n');
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
});
|
|
446
659
|
|
|
447
|
-
|
|
448
|
-
|
|
660
|
+
// Prevent unhandled rejections and uncaught exceptions from crashing the server.
|
|
661
|
+
// Browser OOM/crash rejects all pending CDP promises — some may not be awaited.
|
|
662
|
+
process.on('unhandledRejection', () => { _page = null; });
|
|
663
|
+
process.on('uncaughtException', () => { _page = null; });
|
|
449
664
|
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
665
|
+
// Clean up on exit
|
|
666
|
+
process.on('SIGINT', async () => {
|
|
667
|
+
if (_page) await _page.close().catch(() => {});
|
|
668
|
+
process.exit(0);
|
|
669
|
+
});
|
|
670
|
+
process.on('SIGTERM', async () => {
|
|
671
|
+
if (_page) await _page.close().catch(() => {});
|
|
672
|
+
process.exit(0);
|
|
673
|
+
});
|
|
674
|
+
}
|
|
453
675
|
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
// Browser OOM/crash rejects all pending CDP promises — some may not be awaited.
|
|
461
|
-
process.on('unhandledRejection', (err) => {
|
|
462
|
-
_page = null;
|
|
463
|
-
});
|
|
464
|
-
process.on('uncaughtException', (err) => {
|
|
465
|
-
_page = null;
|
|
466
|
-
});
|
|
467
|
-
|
|
468
|
-
// Clean up on exit
|
|
469
|
-
process.on('SIGINT', async () => {
|
|
470
|
-
if (_page) await _page.close().catch(() => {});
|
|
471
|
-
process.exit(0);
|
|
472
|
-
});
|
|
473
|
-
|
|
474
|
-
process.on('SIGTERM', async () => {
|
|
475
|
-
if (_page) await _page.close().catch(() => {});
|
|
476
|
-
process.exit(0);
|
|
477
|
-
});
|
|
676
|
+
// Direct invocation (`node mcp-server.js`) still works without cli.js — auto-
|
|
677
|
+
// start if this file IS process.argv[1]. The cli.js path imports + calls
|
|
678
|
+
// runStdio() explicitly so we never depend on argv[1] matching.
|
|
679
|
+
if (process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href) {
|
|
680
|
+
runStdio();
|
|
681
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "barebrowse",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.9.0",
|
|
4
4
|
"description": "Authenticated web browsing for autonomous agents via CDP. URL in, pruned ARIA snapshot out.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/index.js",
|
|
@@ -31,5 +31,5 @@
|
|
|
31
31
|
"optionalDependencies": {
|
|
32
32
|
"wearehere": "^1.0.0"
|
|
33
33
|
},
|
|
34
|
-
"license": "
|
|
34
|
+
"license": "Apache-2.0"
|
|
35
35
|
}
|
package/src/bareagent.js
CHANGED
|
@@ -244,6 +244,39 @@ export function createBrowseTools(opts = {}) {
|
|
|
244
244
|
return await page.screenshot({ format });
|
|
245
245
|
},
|
|
246
246
|
},
|
|
247
|
+
{
|
|
248
|
+
name: 'reload',
|
|
249
|
+
description: 'Reload the current page. Returns the updated snapshot.',
|
|
250
|
+
parameters: {
|
|
251
|
+
type: 'object',
|
|
252
|
+
properties: {
|
|
253
|
+
ignoreCache: { type: 'boolean', description: 'Bypass HTTP cache (hard reload). Default: false.' },
|
|
254
|
+
},
|
|
255
|
+
},
|
|
256
|
+
execute: async ({ ignoreCache } = {}) => actionAndSnapshot((page) => page.reload({ ignoreCache })),
|
|
257
|
+
},
|
|
258
|
+
{
|
|
259
|
+
name: 'wait_for',
|
|
260
|
+
description: 'Wait for visible text or a CSS selector to appear on the current page. Returns the updated snapshot once found.',
|
|
261
|
+
parameters: {
|
|
262
|
+
type: 'object',
|
|
263
|
+
properties: {
|
|
264
|
+
text: { type: 'string', description: 'Substring that must appear in document.body.innerText' },
|
|
265
|
+
selector: { type: 'string', description: 'CSS selector that must match document.querySelector' },
|
|
266
|
+
timeout: { type: 'number', description: 'Timeout in ms (default: 30000)' },
|
|
267
|
+
},
|
|
268
|
+
},
|
|
269
|
+
execute: async ({ text, selector, timeout } = {}) => actionAndSnapshot((page) => page.waitFor({ text, selector, timeout })),
|
|
270
|
+
},
|
|
271
|
+
{
|
|
272
|
+
name: 'downloads',
|
|
273
|
+
description: 'List files captured via Content-Disposition: attachment downloads during this session. Returns JSON array of { url, suggestedFilename, savedPath, state, totalBytes, receivedBytes } per file.',
|
|
274
|
+
parameters: { type: 'object', properties: {} },
|
|
275
|
+
execute: async () => {
|
|
276
|
+
const page = await getPage();
|
|
277
|
+
return JSON.stringify(page.downloads.map((d) => ({ ...d })), null, 2);
|
|
278
|
+
},
|
|
279
|
+
},
|
|
247
280
|
];
|
|
248
281
|
|
|
249
282
|
// Add assess tool if wearehere is installed
|
package/src/chromium.js
CHANGED
|
@@ -6,7 +6,47 @@
|
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
8
|
import { execSync, spawn } from 'node:child_process';
|
|
9
|
-
import { existsSync } from 'node:fs';
|
|
9
|
+
import { existsSync, rmSync } from 'node:fs';
|
|
10
|
+
|
|
11
|
+
// Track launched browsers so we can clean them up if the parent crashes.
|
|
12
|
+
// Registered exit handlers (one-time) iterate this set on shutdown.
|
|
13
|
+
const activeBrowsers = new Set();
|
|
14
|
+
let exitHandlersRegistered = false;
|
|
15
|
+
|
|
16
|
+
function reapAllSync() {
|
|
17
|
+
const toReap = [...activeBrowsers];
|
|
18
|
+
activeBrowsers.clear();
|
|
19
|
+
// Send SIGKILL to everything first so the kernel reaps in parallel
|
|
20
|
+
for (const b of toReap) {
|
|
21
|
+
try { if (!b.process.killed) b.process.kill('SIGKILL'); } catch {}
|
|
22
|
+
}
|
|
23
|
+
// Then poll each for actual death before removing its profile dir —
|
|
24
|
+
// Chromium can hold file handles briefly even after SIGKILL, which would
|
|
25
|
+
// race rmSync. Cap the wait so a stuck process can't hang shutdown.
|
|
26
|
+
for (const b of toReap) {
|
|
27
|
+
for (let i = 0; i < 20; i++) {
|
|
28
|
+
try { process.kill(b.process.pid, 0); } catch { break; }
|
|
29
|
+
try { execSync('sleep 0.05'); } catch {}
|
|
30
|
+
}
|
|
31
|
+
if (b.ownedProfileDir) {
|
|
32
|
+
try { rmSync(b.ownedProfileDir, { recursive: true, force: true }); } catch {}
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function registerExitHandlers() {
|
|
38
|
+
if (exitHandlersRegistered) return;
|
|
39
|
+
exitHandlersRegistered = true;
|
|
40
|
+
// 'exit' is sync-only — must use synchronous APIs (SIGKILL, rmSync)
|
|
41
|
+
process.once('exit', reapAllSync);
|
|
42
|
+
for (const sig of ['SIGINT', 'SIGTERM', 'SIGHUP']) {
|
|
43
|
+
process.once(sig, () => {
|
|
44
|
+
reapAllSync();
|
|
45
|
+
// Re-raise default behavior so the parent's exit code matches the signal
|
|
46
|
+
process.kill(process.pid, sig);
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
}
|
|
10
50
|
|
|
11
51
|
// Common Chromium binary paths by platform (Linux focus for POC)
|
|
12
52
|
const CANDIDATES = [
|
|
@@ -75,6 +115,14 @@ export async function launch(opts = {}) {
|
|
|
75
115
|
'--disable-sync',
|
|
76
116
|
'--disable-translate',
|
|
77
117
|
'--mute-audio',
|
|
118
|
+
// Force every iframe (same-origin included) into its own renderer so it
|
|
119
|
+
// gets a dedicated CDP session via Target.setAutoAttach. Without this,
|
|
120
|
+
// same-origin iframes stay in the parent process — getFullAXTree still
|
|
121
|
+
// works via frameId, but Input.dispatchMouseEvent on the parent session
|
|
122
|
+
// uses parent-viewport coords while DOM.getBoxModel for iframe-internal
|
|
123
|
+
// nodes returns frame-local coords, so clicks land off-target. The OOPIF
|
|
124
|
+
// path side-steps that: each frame has its own Input domain.
|
|
125
|
+
'--site-per-process',
|
|
78
126
|
// Headless-only flags
|
|
79
127
|
...(!opts.headed ? ['--headless=new', '--hide-scrollbars'] : []),
|
|
80
128
|
// Suppress permission prompts (location, notifications, camera, mic, etc.)
|
|
@@ -90,12 +138,14 @@ export async function launch(opts = {}) {
|
|
|
90
138
|
args.push(`--proxy-server=${opts.proxy}`);
|
|
91
139
|
}
|
|
92
140
|
|
|
141
|
+
// Track the temp profile dir only when we create one — caller-supplied dirs
|
|
142
|
+
// are the caller's to manage. ownedProfileDir gets rm'd in cleanupBrowser.
|
|
143
|
+
let ownedProfileDir = null;
|
|
93
144
|
if (opts.userDataDir) {
|
|
94
145
|
args.push(`--user-data-dir=${opts.userDataDir}`);
|
|
95
146
|
} else {
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
args.push(`--user-data-dir=/tmp/barebrowse-${process.pid}-${Date.now()}`);
|
|
147
|
+
ownedProfileDir = `/tmp/barebrowse-${process.pid}-${Date.now()}`;
|
|
148
|
+
args.push(`--user-data-dir=${ownedProfileDir}`);
|
|
99
149
|
}
|
|
100
150
|
|
|
101
151
|
// about:blank as initial page
|
|
@@ -138,7 +188,52 @@ export async function launch(opts = {}) {
|
|
|
138
188
|
// Extract port from wsUrl
|
|
139
189
|
const actualPort = parseInt(new URL(wsUrl).port, 10);
|
|
140
190
|
|
|
141
|
-
|
|
191
|
+
const browser = { wsUrl, process: child, port: actualPort, ownedProfileDir };
|
|
192
|
+
|
|
193
|
+
// Register for parent-crash reaping. Auto-untrack on natural exit so
|
|
194
|
+
// a normally-exited browser doesn't leave a stale entry around.
|
|
195
|
+
registerExitHandlers();
|
|
196
|
+
activeBrowsers.add(browser);
|
|
197
|
+
child.once('exit', () => activeBrowsers.delete(browser));
|
|
198
|
+
|
|
199
|
+
return browser;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
/**
|
|
203
|
+
* Kill a launched browser and remove its temp profile dir (if we created one).
|
|
204
|
+
* Waits up to 2s for the process to actually exit before unlinking the dir —
|
|
205
|
+
* Chromium can still hold files briefly after SIGTERM, which races rmSync.
|
|
206
|
+
* Safe to call on partially-failed launches or already-dead processes.
|
|
207
|
+
* @returns {Promise<void>}
|
|
208
|
+
*/
|
|
209
|
+
export async function cleanupBrowser(browser) {
|
|
210
|
+
if (!browser) return;
|
|
211
|
+
activeBrowsers.delete(browser);
|
|
212
|
+
if (browser.process && !browser.process.killed && browser.process.exitCode === null) {
|
|
213
|
+
const exited = new Promise((resolve) => {
|
|
214
|
+
const timer = setTimeout(resolve, 2000);
|
|
215
|
+
browser.process.once('exit', () => { clearTimeout(timer); resolve(); });
|
|
216
|
+
});
|
|
217
|
+
try { browser.process.kill(); } catch {}
|
|
218
|
+
await exited;
|
|
219
|
+
}
|
|
220
|
+
if (browser.ownedProfileDir) {
|
|
221
|
+
// Chromium can still flush files for ~hundreds of ms after exit; with
|
|
222
|
+
// --site-per-process (added in H2) every iframe is its own renderer
|
|
223
|
+
// process, each with its own pending file handles, so the old 10×100ms
|
|
224
|
+
// window (1s) wasn't always enough under parallel test load. Now
|
|
225
|
+
// 25×100ms (2.5s) plus a polling jitter to avoid every concurrent
|
|
226
|
+
// cleanup hammering at the same tick.
|
|
227
|
+
for (let i = 0; i < 25; i++) {
|
|
228
|
+
try {
|
|
229
|
+
rmSync(browser.ownedProfileDir, { recursive: true, force: true });
|
|
230
|
+
break;
|
|
231
|
+
} catch (err) {
|
|
232
|
+
if (err.code !== 'ENOTEMPTY' && err.code !== 'EBUSY') break;
|
|
233
|
+
await new Promise((r) => setTimeout(r, 100 + Math.floor(Math.random() * 50)));
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
}
|
|
142
237
|
}
|
|
143
238
|
|
|
144
239
|
/**
|
|
@@ -152,3 +247,18 @@ export async function getDebugUrl(port) {
|
|
|
152
247
|
const data = await res.json();
|
|
153
248
|
return data.webSocketDebuggerUrl;
|
|
154
249
|
}
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* Attach to a Chromium already running with --remote-debugging-port=<port>.
|
|
253
|
+
* Returns the same shape as launch() but with process: null and
|
|
254
|
+
* ownedProfileDir: null — cleanupBrowser() becomes a no-op so we never
|
|
255
|
+
* kill a browser we did not start or remove a profile we do not own.
|
|
256
|
+
* @param {object} opts
|
|
257
|
+
* @param {number} opts.port - The debug port the running browser is listening on
|
|
258
|
+
* @returns {Promise<{wsUrl: string, process: null, port: number, ownedProfileDir: null}>}
|
|
259
|
+
*/
|
|
260
|
+
export async function attach({ port }) {
|
|
261
|
+
if (!port) throw new Error('attach({ port }) requires a port number');
|
|
262
|
+
const wsUrl = await getDebugUrl(port);
|
|
263
|
+
return { wsUrl, process: null, port, ownedProfileDir: null };
|
|
264
|
+
}
|
package/src/consent.js
CHANGED
|
@@ -290,14 +290,9 @@ function findAcceptButton(dialogId, nodes, nodeMap, parentMap) {
|
|
|
290
290
|
* Only matches strong patterns (not single-word fallbacks) to avoid false positives.
|
|
291
291
|
*/
|
|
292
292
|
function tryGlobalConsentButton(nodes, session) {
|
|
293
|
-
//
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
return src.includes('\\s') || src.includes('\\b.*\\b.*\\b');
|
|
297
|
-
});
|
|
298
|
-
|
|
299
|
-
// Actually, let's just use all non-single-word patterns
|
|
300
|
-
const safePatterns = ACCEPT_PATTERNS.slice(0, -3); // exclude ^accept$, ^agree$, ^ok$
|
|
293
|
+
// Multi-word patterns only — exclude the bare ^accept$/^agree$/^ok$ from
|
|
294
|
+
// ACCEPT_PATTERNS so we don't false-match unrelated buttons page-wide.
|
|
295
|
+
const safePatterns = ACCEPT_PATTERNS.slice(0, -3);
|
|
301
296
|
|
|
302
297
|
for (const pattern of safePatterns) {
|
|
303
298
|
for (const node of nodes) {
|