barebrowse 0.7.1 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +249 -0
- package/LICENSE +202 -21
- package/NOTICE +8 -0
- package/README.md +39 -10
- package/barebrowse.context.md +45 -18
- package/cli.js +114 -3
- package/mcp-server.js +276 -70
- package/package.json +2 -2
- package/src/bareagent.js +43 -4
- package/src/chromium.js +115 -5
- package/src/consent.js +3 -8
- package/src/daemon.js +13 -0
- package/src/index.js +440 -135
- package/src/network-idle.js +62 -0
- package/src/prune.js +2 -1
- package/src/stealth.js +87 -6
package/mcp-server.js
CHANGED
|
@@ -10,8 +10,47 @@
|
|
|
10
10
|
*/
|
|
11
11
|
|
|
12
12
|
import { browse, connect } from './src/index.js';
|
|
13
|
-
import { mkdirSync, writeFileSync } from 'node:fs';
|
|
14
|
-
import { join } from 'node:path';
|
|
13
|
+
import { mkdirSync, writeFileSync, readFileSync } from 'node:fs';
|
|
14
|
+
import { join, dirname } from 'node:path';
|
|
15
|
+
import { pathToFileURL, fileURLToPath } from 'node:url';
|
|
16
|
+
|
|
17
|
+
// Read version from package.json so serverInfo.version doesn't drift behind
|
|
18
|
+
// release bumps (pre-fix this was hardcoded 0.7.1 while package.json was 0.8.0).
|
|
19
|
+
const _pkgPath = join(dirname(fileURLToPath(import.meta.url)), 'package.json');
|
|
20
|
+
const PKG_VERSION = JSON.parse(readFileSync(_pkgPath, 'utf8')).version;
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Per-tool timeouts (ms). One blanket 30s was too short for SPA cold loads
|
|
24
|
+
* (goto regularly exceeded it on slow sites) and too long for instant ops
|
|
25
|
+
* like scroll. The split below is the H5 plan:
|
|
26
|
+
* - navigation (goto/reload): 60s
|
|
27
|
+
* - browser-history nav (back/forward): 30s
|
|
28
|
+
* - interactive ops (click/type/press/scroll/hover/select/drag): 15s
|
|
29
|
+
* - read-only ops (snapshot/tabs/eval/wait_for): 15s (wait_for has its own
|
|
30
|
+
* internal deadline; this is the outer cap)
|
|
31
|
+
* - heavy I/O (pdf/screenshot/upload): 45s
|
|
32
|
+
* Exported so tests can pin the contract.
|
|
33
|
+
*/
|
|
34
|
+
export const TIMEOUTS = {
|
|
35
|
+
goto: 60000,
|
|
36
|
+
reload: 60000,
|
|
37
|
+
back: 30000,
|
|
38
|
+
forward: 30000,
|
|
39
|
+
snapshot: 15000,
|
|
40
|
+
click: 15000,
|
|
41
|
+
type: 15000,
|
|
42
|
+
press: 15000,
|
|
43
|
+
scroll: 15000,
|
|
44
|
+
hover: 15000,
|
|
45
|
+
select: 15000,
|
|
46
|
+
drag: 15000,
|
|
47
|
+
tabs: 5000,
|
|
48
|
+
eval: 15000,
|
|
49
|
+
wait_for: 60000,
|
|
50
|
+
upload: 45000,
|
|
51
|
+
pdf: 45000,
|
|
52
|
+
screenshot: 45000,
|
|
53
|
+
};
|
|
15
54
|
|
|
16
55
|
// Optional: privacy assessment via wearehere
|
|
17
56
|
let assessFn = null;
|
|
@@ -27,12 +66,17 @@ function isTransient(err) {
|
|
|
27
66
|
}
|
|
28
67
|
|
|
29
68
|
/**
|
|
30
|
-
*
|
|
31
|
-
*
|
|
69
|
+
* Run fn with a per-attempt timeout. On transient failure (CDP death OR
|
|
70
|
+
* timeout), reset the session. If `retry` is true (default), retry once on
|
|
71
|
+
* a fresh page; if false, rethrow without retrying — required for
|
|
72
|
+
* non-idempotent ops (click/type/etc.) where a partial first attempt
|
|
73
|
+
* shouldn't be replayed against a blank fresh page.
|
|
32
74
|
* @param {Function} fn - async function to execute
|
|
33
75
|
* @param {number} timeoutMs - per-attempt timeout in ms
|
|
76
|
+
* @param {object} [opts]
|
|
77
|
+
* @param {boolean} [opts.retry=true] - whether to retry once on transient failure
|
|
34
78
|
*/
|
|
35
|
-
async function withRetry(fn, timeoutMs) {
|
|
79
|
+
async function withRetry(fn, timeoutMs, { retry = true } = {}) {
|
|
36
80
|
async function attempt() {
|
|
37
81
|
if (!timeoutMs) return await fn();
|
|
38
82
|
let timer;
|
|
@@ -48,8 +92,9 @@ async function withRetry(fn, timeoutMs) {
|
|
|
48
92
|
return await attempt();
|
|
49
93
|
} catch (err) {
|
|
50
94
|
if (!isTransient(err)) throw err;
|
|
51
|
-
// Transient failure — reset session
|
|
95
|
+
// Transient failure — reset session so the next request gets a fresh page.
|
|
52
96
|
_page = null;
|
|
97
|
+
if (!retry) throw err;
|
|
53
98
|
return await attempt();
|
|
54
99
|
}
|
|
55
100
|
}
|
|
@@ -96,15 +141,16 @@ function acquireAssessSlot() {
|
|
|
96
141
|
}
|
|
97
142
|
|
|
98
143
|
|
|
99
|
-
const TOOLS = [
|
|
144
|
+
export const TOOLS = [
|
|
100
145
|
{
|
|
101
146
|
name: 'browse',
|
|
102
|
-
description: '
|
|
147
|
+
description: 'One-shot headless browse — fetches a URL through a real browser (executes JS, injects cookies, dismisses consent, evades bot detection). Only when plain HTTP fetch can\'t render the page. Returns a pruned ARIA snapshot with [ref=N] markers. Stateless — for multi-step interaction use goto.',
|
|
103
148
|
inputSchema: {
|
|
104
149
|
type: 'object',
|
|
105
150
|
properties: {
|
|
106
151
|
url: { type: 'string', description: 'URL to browse' },
|
|
107
152
|
mode: { type: 'string', enum: ['headless', 'headed', 'hybrid'], description: 'Browser mode (default: headless)' },
|
|
153
|
+
pruneMode: { type: 'string', enum: ['act', 'read'], description: 'Pruning mode. "act" (default) keeps interactive elements and short labels — best for clicking/filling. "read" keeps paragraphs, headings, and long text — best for articles, docs, and content extraction. If the page is content-heavy and act-mode returns mostly empty, retry with "read".' },
|
|
108
154
|
maxChars: { type: 'number', description: 'Max chars to return inline. Larger snapshots are saved to .barebrowse/ and a file path is returned instead. Default: 30000.' },
|
|
109
155
|
},
|
|
110
156
|
required: ['url'],
|
|
@@ -112,7 +158,7 @@ const TOOLS = [
|
|
|
112
158
|
},
|
|
113
159
|
{
|
|
114
160
|
name: 'goto',
|
|
115
|
-
description: '
|
|
161
|
+
description: 'Open URL in a persistent interactive browser session (pair with snapshot/click/type/press for multi-step flows). Use when the task needs clicking, typing, or form submission. Injects auth cookies. Returns ok — call snapshot to observe.',
|
|
116
162
|
inputSchema: {
|
|
117
163
|
type: 'object',
|
|
118
164
|
properties: {
|
|
@@ -127,6 +173,7 @@ const TOOLS = [
|
|
|
127
173
|
inputSchema: {
|
|
128
174
|
type: 'object',
|
|
129
175
|
properties: {
|
|
176
|
+
pruneMode: { type: 'string', enum: ['act', 'read'], description: 'Pruning mode. "act" (default) keeps interactive elements and short labels — best for clicking/filling. "read" keeps paragraphs, headings, and long text — best for articles, docs, and content extraction. If a previous snapshot looked empty on a content-heavy page, retry with "read".' },
|
|
130
177
|
maxChars: { type: 'number', description: 'Max chars to return inline. Larger snapshots are saved to .barebrowse/ and a file path is returned instead. Default: 30000.' },
|
|
131
178
|
},
|
|
132
179
|
},
|
|
@@ -221,8 +268,92 @@ const TOOLS = [
|
|
|
221
268
|
},
|
|
222
269
|
},
|
|
223
270
|
},
|
|
271
|
+
{
|
|
272
|
+
name: 'reload',
|
|
273
|
+
description: 'Reload the current page in the session. Returns ok — call snapshot to observe.',
|
|
274
|
+
inputSchema: {
|
|
275
|
+
type: 'object',
|
|
276
|
+
properties: {
|
|
277
|
+
ignoreCache: { type: 'boolean', description: 'Bypass HTTP cache (hard reload). Default: false.' },
|
|
278
|
+
},
|
|
279
|
+
},
|
|
280
|
+
},
|
|
281
|
+
{
|
|
282
|
+
name: 'screenshot',
|
|
283
|
+
description: 'Capture a screenshot of the current page. Saves to .barebrowse/screenshot-*.png (or .jpeg/.webp) and returns the file path. Use the file with your image tools.',
|
|
284
|
+
inputSchema: {
|
|
285
|
+
type: 'object',
|
|
286
|
+
properties: {
|
|
287
|
+
format: { type: 'string', enum: ['png', 'jpeg', 'webp'], description: 'Image format (default: png)' },
|
|
288
|
+
quality: { type: 'number', description: 'JPEG/WebP quality 0-100 (default: 80, ignored for PNG)' },
|
|
289
|
+
},
|
|
290
|
+
},
|
|
291
|
+
},
|
|
292
|
+
{
|
|
293
|
+
name: 'wait_for',
|
|
294
|
+
description: 'Wait for visible text or a CSS selector to appear on the current page. Returns ok when found, throws on timeout.',
|
|
295
|
+
inputSchema: {
|
|
296
|
+
type: 'object',
|
|
297
|
+
properties: {
|
|
298
|
+
text: { type: 'string', description: 'Substring that must appear in document.body.innerText' },
|
|
299
|
+
selector: { type: 'string', description: 'CSS selector that must match document.querySelector' },
|
|
300
|
+
timeout: { type: 'number', description: 'Timeout in ms (default: 30000)' },
|
|
301
|
+
},
|
|
302
|
+
},
|
|
303
|
+
},
|
|
304
|
+
{
|
|
305
|
+
name: 'tabs',
|
|
306
|
+
description: 'List open tabs in the session, or switch to one by index. Returns JSON array of { index, url, title } or "ok" after switch.',
|
|
307
|
+
inputSchema: {
|
|
308
|
+
type: 'object',
|
|
309
|
+
properties: {
|
|
310
|
+
switchTo: { type: 'number', description: 'Tab index to activate. Omit to just list tabs.' },
|
|
311
|
+
},
|
|
312
|
+
},
|
|
313
|
+
},
|
|
314
|
+
{
|
|
315
|
+
name: 'select',
|
|
316
|
+
description: 'Set the value of a <select> dropdown (or custom listbox) by ref. Returns ok.',
|
|
317
|
+
inputSchema: {
|
|
318
|
+
type: 'object',
|
|
319
|
+
properties: {
|
|
320
|
+
ref: { type: 'string', description: 'Element ref from snapshot' },
|
|
321
|
+
value: { type: 'string', description: 'Option value or visible text to select' },
|
|
322
|
+
},
|
|
323
|
+
required: ['ref', 'value'],
|
|
324
|
+
},
|
|
325
|
+
},
|
|
326
|
+
{
|
|
327
|
+
name: 'hover',
|
|
328
|
+
description: 'Hover over an element by ref (triggers tooltips, hover menus). Returns ok.',
|
|
329
|
+
inputSchema: {
|
|
330
|
+
type: 'object',
|
|
331
|
+
properties: {
|
|
332
|
+
ref: { type: 'string', description: 'Element ref from snapshot' },
|
|
333
|
+
},
|
|
334
|
+
required: ['ref'],
|
|
335
|
+
},
|
|
336
|
+
},
|
|
224
337
|
];
|
|
225
338
|
|
|
339
|
+
// Powerful escape hatch — guarded behind an explicit env-var opt-in.
|
|
340
|
+
// Runtime.evaluate in the user's authenticated session lets an agent read
|
|
341
|
+
// cookies/localStorage, dispatch arbitrary events, hit any endpoint, etc.
|
|
342
|
+
// Off by default; flip BAREBROWSE_MCP_EVAL=1 to enable.
|
|
343
|
+
if (process.env.BAREBROWSE_MCP_EVAL === '1') {
|
|
344
|
+
TOOLS.push({
|
|
345
|
+
name: 'eval',
|
|
346
|
+
description: 'Run a JavaScript expression in the current page and return the result. POWERFUL: full access to the authenticated session — DOM, cookies, localStorage, fetch. Enabled because BAREBROWSE_MCP_EVAL=1 is set.',
|
|
347
|
+
inputSchema: {
|
|
348
|
+
type: 'object',
|
|
349
|
+
properties: {
|
|
350
|
+
expression: { type: 'string', description: 'JavaScript expression to evaluate' },
|
|
351
|
+
},
|
|
352
|
+
required: ['expression'],
|
|
353
|
+
},
|
|
354
|
+
});
|
|
355
|
+
}
|
|
356
|
+
|
|
226
357
|
// Add assess tool if wearehere is installed
|
|
227
358
|
if (assessFn) {
|
|
228
359
|
TOOLS.push({
|
|
@@ -245,7 +376,7 @@ async function handleToolCall(name, args) {
|
|
|
245
376
|
case 'browse': {
|
|
246
377
|
let timer;
|
|
247
378
|
const text = await Promise.race([
|
|
248
|
-
browse(args.url, { mode: args.mode }),
|
|
379
|
+
browse(args.url, { mode: args.mode, pruneMode: args.pruneMode }),
|
|
249
380
|
new Promise((_, rej) => { timer = setTimeout(() => rej(new Error('browse timed out after 60s')), 60000); }),
|
|
250
381
|
]);
|
|
251
382
|
clearTimeout(timer);
|
|
@@ -261,32 +392,32 @@ async function handleToolCall(name, args) {
|
|
|
261
392
|
try { await page.injectCookies(args.url); } catch {}
|
|
262
393
|
await page.goto(args.url);
|
|
263
394
|
return 'ok';
|
|
264
|
-
},
|
|
395
|
+
}, TIMEOUTS.goto);
|
|
265
396
|
case 'snapshot': return withRetry(async () => {
|
|
266
397
|
const page = await getPage();
|
|
267
|
-
const text = await page.snapshot();
|
|
398
|
+
const text = await page.snapshot(args.pruneMode ? { mode: args.pruneMode } : undefined);
|
|
268
399
|
const limit = args.maxChars ?? MAX_CHARS_DEFAULT;
|
|
269
400
|
if (text.length > limit) {
|
|
270
401
|
const file = saveSnapshot(text);
|
|
271
402
|
return `Snapshot (${text.length} chars) saved to ${file}`;
|
|
272
403
|
}
|
|
273
404
|
return text;
|
|
274
|
-
},
|
|
405
|
+
}, TIMEOUTS.snapshot);
|
|
275
406
|
case 'click': return withRetry(async () => {
|
|
276
407
|
const page = await getPage();
|
|
277
408
|
await page.click(args.ref);
|
|
278
409
|
return 'ok';
|
|
279
|
-
},
|
|
410
|
+
}, TIMEOUTS.click, { retry: false });
|
|
280
411
|
case 'type': return withRetry(async () => {
|
|
281
412
|
const page = await getPage();
|
|
282
413
|
await page.type(args.ref, args.text, { clear: args.clear });
|
|
283
414
|
return 'ok';
|
|
284
|
-
},
|
|
415
|
+
}, TIMEOUTS.type, { retry: false });
|
|
285
416
|
case 'press': return withRetry(async () => {
|
|
286
417
|
const page = await getPage();
|
|
287
418
|
await page.press(args.key);
|
|
288
419
|
return 'ok';
|
|
289
|
-
},
|
|
420
|
+
}, TIMEOUTS.press, { retry: false });
|
|
290
421
|
case 'scroll': return withRetry(async () => {
|
|
291
422
|
const page = await getPage();
|
|
292
423
|
let dy = args.deltaY;
|
|
@@ -298,31 +429,90 @@ async function handleToolCall(name, args) {
|
|
|
298
429
|
}
|
|
299
430
|
await page.scroll(dy);
|
|
300
431
|
return 'ok';
|
|
301
|
-
},
|
|
432
|
+
}, TIMEOUTS.scroll, { retry: false });
|
|
302
433
|
case 'back': return withRetry(async () => {
|
|
303
434
|
const page = await getPage();
|
|
304
435
|
await page.goBack();
|
|
305
436
|
return 'ok';
|
|
306
|
-
},
|
|
437
|
+
}, TIMEOUTS.back, { retry: false });
|
|
307
438
|
case 'forward': return withRetry(async () => {
|
|
308
439
|
const page = await getPage();
|
|
309
440
|
await page.goForward();
|
|
310
441
|
return 'ok';
|
|
311
|
-
},
|
|
442
|
+
}, TIMEOUTS.forward, { retry: false });
|
|
312
443
|
case 'drag': return withRetry(async () => {
|
|
313
444
|
const page = await getPage();
|
|
314
445
|
await page.drag(args.fromRef, args.toRef);
|
|
315
446
|
return 'ok';
|
|
316
|
-
},
|
|
447
|
+
}, TIMEOUTS.drag, { retry: false });
|
|
317
448
|
case 'upload': return withRetry(async () => {
|
|
318
449
|
const page = await getPage();
|
|
319
450
|
await page.upload(args.ref, args.files);
|
|
320
451
|
return 'ok';
|
|
321
|
-
},
|
|
452
|
+
}, TIMEOUTS.upload, { retry: false });
|
|
322
453
|
case 'pdf': return withRetry(async () => {
|
|
323
454
|
const page = await getPage();
|
|
324
455
|
return await page.pdf({ landscape: args.landscape });
|
|
325
|
-
},
|
|
456
|
+
}, TIMEOUTS.pdf);
|
|
457
|
+
case 'reload': return withRetry(async () => {
|
|
458
|
+
const page = await getPage();
|
|
459
|
+
await page.reload({ ignoreCache: !!args.ignoreCache });
|
|
460
|
+
return 'ok';
|
|
461
|
+
}, TIMEOUTS.reload);
|
|
462
|
+
case 'screenshot': return withRetry(async () => {
|
|
463
|
+
const page = await getPage();
|
|
464
|
+
const format = args.format || 'png';
|
|
465
|
+
const b64 = await page.screenshot({ format, quality: args.quality });
|
|
466
|
+
mkdirSync(OUTPUT_DIR, { recursive: true });
|
|
467
|
+
const ts = new Date().toISOString().replace(/[:.]/g, '-');
|
|
468
|
+
const file = join(OUTPUT_DIR, `screenshot-${ts}.${format}`);
|
|
469
|
+
writeFileSync(file, Buffer.from(b64, 'base64'));
|
|
470
|
+
return file;
|
|
471
|
+
}, TIMEOUTS.screenshot);
|
|
472
|
+
case 'wait_for': return withRetry(async () => {
|
|
473
|
+
const page = await getPage();
|
|
474
|
+
await page.waitFor({ text: args.text, selector: args.selector, timeout: args.timeout });
|
|
475
|
+
return 'ok';
|
|
476
|
+
}, TIMEOUTS.wait_for, { retry: false });
|
|
477
|
+
case 'tabs': return withRetry(async () => {
|
|
478
|
+
const page = await getPage();
|
|
479
|
+
if (typeof args.switchTo === 'number') {
|
|
480
|
+
await page.switchTab(args.switchTo);
|
|
481
|
+
return 'ok';
|
|
482
|
+
}
|
|
483
|
+
const list = await page.tabs();
|
|
484
|
+
return JSON.stringify(list, null, 2);
|
|
485
|
+
}, TIMEOUTS.tabs, { retry: false });
|
|
486
|
+
case 'select': return withRetry(async () => {
|
|
487
|
+
const page = await getPage();
|
|
488
|
+
await page.select(args.ref, args.value);
|
|
489
|
+
return 'ok';
|
|
490
|
+
}, TIMEOUTS.select, { retry: false });
|
|
491
|
+
case 'hover': return withRetry(async () => {
|
|
492
|
+
const page = await getPage();
|
|
493
|
+
await page.hover(args.ref);
|
|
494
|
+
return 'ok';
|
|
495
|
+
}, TIMEOUTS.hover, { retry: false });
|
|
496
|
+
case 'eval': {
|
|
497
|
+
// Only reachable when BAREBROWSE_MCP_EVAL=1 — the tool isn't registered
|
|
498
|
+
// otherwise, but this guard is the second line of defense in case the
|
|
499
|
+
// env var changes between tools/list and tools/call.
|
|
500
|
+
if (process.env.BAREBROWSE_MCP_EVAL !== '1') {
|
|
501
|
+
throw new Error('eval is disabled. Set BAREBROWSE_MCP_EVAL=1 to enable.');
|
|
502
|
+
}
|
|
503
|
+
return withRetry(async () => {
|
|
504
|
+
const page = await getPage();
|
|
505
|
+
const { result, exceptionDetails } = await page.cdp.send('Runtime.evaluate', {
|
|
506
|
+
expression: args.expression,
|
|
507
|
+
returnByValue: true,
|
|
508
|
+
awaitPromise: true,
|
|
509
|
+
});
|
|
510
|
+
if (exceptionDetails) {
|
|
511
|
+
throw new Error(exceptionDetails.text + (exceptionDetails.exception?.description ? `: ${exceptionDetails.exception.description}` : ''));
|
|
512
|
+
}
|
|
513
|
+
return result.value === undefined ? 'undefined' : JSON.stringify(result.value);
|
|
514
|
+
}, TIMEOUTS.eval, { retry: false });
|
|
515
|
+
}
|
|
326
516
|
case 'assess': {
|
|
327
517
|
if (!assessFn) throw new Error('wearehere is not installed. Run: npm install wearehere');
|
|
328
518
|
const releaseSlot = await acquireAssessSlot();
|
|
@@ -391,7 +581,7 @@ async function handleMessage(msg) {
|
|
|
391
581
|
return jsonrpcResponse(id, {
|
|
392
582
|
protocolVersion: '2024-11-05',
|
|
393
583
|
capabilities: { tools: {} },
|
|
394
|
-
serverInfo: { name: 'barebrowse', version:
|
|
584
|
+
serverInfo: { name: 'barebrowse', version: PKG_VERSION },
|
|
395
585
|
});
|
|
396
586
|
}
|
|
397
587
|
|
|
@@ -423,55 +613,71 @@ async function handleMessage(msg) {
|
|
|
423
613
|
}
|
|
424
614
|
|
|
425
615
|
// --- Stdio transport ---
|
|
616
|
+
//
|
|
617
|
+
// Exported as runStdio() so callers (notably cli.js) can explicitly start the
|
|
618
|
+
// JSON-RPC loop. The previous "auto-start when isMain" guard broke the
|
|
619
|
+
// `npx barebrowse mcp` path because cli.js launches the server via
|
|
620
|
+
// `await import('./mcp-server.js')` — process.argv[1] is cli.js, not
|
|
621
|
+
// mcp-server.js, so isMain was false and the loop never started. Both the
|
|
622
|
+
// direct `node mcp-server.js` invocation and the cli.js path now call
|
|
623
|
+
// runStdio() explicitly. Tests import TIMEOUTS/TOOLS without calling it.
|
|
624
|
+
|
|
625
|
+
export function runStdio() {
|
|
626
|
+
// One-line startup banner to stderr (stderr because stdout is the JSON-RPC
|
|
627
|
+
// channel — must not contain non-JSON-RPC bytes). Captured by Claude Code's
|
|
628
|
+
// MCP log, makes "I added barebrowse twice and got the wrong one" issues
|
|
629
|
+
// diagnosable: the path here is the absolute file actually being served,
|
|
630
|
+
// so a scope conflict shows two different paths in two log files.
|
|
631
|
+
const _selfPath = fileURLToPath(import.meta.url);
|
|
632
|
+
process.stderr.write(`barebrowse mcp v${PKG_VERSION} | serving from ${_selfPath} | pid ${process.pid}\n`);
|
|
633
|
+
|
|
634
|
+
let buffer = '';
|
|
635
|
+
|
|
636
|
+
process.stdin.setEncoding('utf8');
|
|
637
|
+
process.stdin.on('data', (chunk) => {
|
|
638
|
+
buffer += chunk;
|
|
639
|
+
|
|
640
|
+
let newlineIdx;
|
|
641
|
+
while ((newlineIdx = buffer.indexOf('\n')) !== -1) {
|
|
642
|
+
const line = buffer.slice(0, newlineIdx).trim();
|
|
643
|
+
buffer = buffer.slice(newlineIdx + 1);
|
|
644
|
+
if (!line) continue;
|
|
426
645
|
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
process.stdin.setEncoding('utf8');
|
|
430
|
-
process.stdin.on('data', (chunk) => {
|
|
431
|
-
buffer += chunk;
|
|
432
|
-
|
|
433
|
-
let newlineIdx;
|
|
434
|
-
while ((newlineIdx = buffer.indexOf('\n')) !== -1) {
|
|
435
|
-
const line = buffer.slice(0, newlineIdx).trim();
|
|
436
|
-
buffer = buffer.slice(newlineIdx + 1);
|
|
437
|
-
if (!line) continue;
|
|
438
|
-
|
|
439
|
-
try {
|
|
440
|
-
const msg = JSON.parse(line);
|
|
441
|
-
|
|
442
|
-
handleMessage(msg).then((response) => {
|
|
443
|
-
if (response) {
|
|
646
|
+
try {
|
|
647
|
+
const msg = JSON.parse(line);
|
|
444
648
|
|
|
445
|
-
|
|
649
|
+
handleMessage(msg).then((response) => {
|
|
650
|
+
if (response) {
|
|
651
|
+
process.stdout.write(response + '\n');
|
|
652
|
+
}
|
|
653
|
+
}).catch((err) => {
|
|
654
|
+
process.stdout.write(jsonrpcError(msg.id, -32700, `Error: ${err.message}`) + '\n');
|
|
655
|
+
});
|
|
656
|
+
} catch (err) {
|
|
657
|
+
process.stdout.write(jsonrpcError(null, -32700, `Parse error: ${err.message}`) + '\n');
|
|
658
|
+
}
|
|
659
|
+
}
|
|
660
|
+
});
|
|
446
661
|
|
|
447
|
-
|
|
448
|
-
|
|
662
|
+
// Prevent unhandled rejections and uncaught exceptions from crashing the server.
|
|
663
|
+
// Browser OOM/crash rejects all pending CDP promises — some may not be awaited.
|
|
664
|
+
process.on('unhandledRejection', () => { _page = null; });
|
|
665
|
+
process.on('uncaughtException', () => { _page = null; });
|
|
449
666
|
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
667
|
+
// Clean up on exit
|
|
668
|
+
process.on('SIGINT', async () => {
|
|
669
|
+
if (_page) await _page.close().catch(() => {});
|
|
670
|
+
process.exit(0);
|
|
671
|
+
});
|
|
672
|
+
process.on('SIGTERM', async () => {
|
|
673
|
+
if (_page) await _page.close().catch(() => {});
|
|
674
|
+
process.exit(0);
|
|
675
|
+
});
|
|
676
|
+
}
|
|
453
677
|
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
// Browser OOM/crash rejects all pending CDP promises — some may not be awaited.
|
|
461
|
-
process.on('unhandledRejection', (err) => {
|
|
462
|
-
_page = null;
|
|
463
|
-
});
|
|
464
|
-
process.on('uncaughtException', (err) => {
|
|
465
|
-
_page = null;
|
|
466
|
-
});
|
|
467
|
-
|
|
468
|
-
// Clean up on exit
|
|
469
|
-
process.on('SIGINT', async () => {
|
|
470
|
-
if (_page) await _page.close().catch(() => {});
|
|
471
|
-
process.exit(0);
|
|
472
|
-
});
|
|
473
|
-
|
|
474
|
-
process.on('SIGTERM', async () => {
|
|
475
|
-
if (_page) await _page.close().catch(() => {});
|
|
476
|
-
process.exit(0);
|
|
477
|
-
});
|
|
678
|
+
// Direct invocation (`node mcp-server.js`) still works without cli.js — auto-
|
|
679
|
+
// start if this file IS process.argv[1]. The cli.js path imports + calls
|
|
680
|
+
// runStdio() explicitly so we never depend on argv[1] matching.
|
|
681
|
+
if (process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href) {
|
|
682
|
+
runStdio();
|
|
683
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "barebrowse",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.9.1",
|
|
4
4
|
"description": "Authenticated web browsing for autonomous agents via CDP. URL in, pruned ARIA snapshot out.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "src/index.js",
|
|
@@ -31,5 +31,5 @@
|
|
|
31
31
|
"optionalDependencies": {
|
|
32
32
|
"wearehere": "^1.0.0"
|
|
33
33
|
},
|
|
34
|
-
"license": "
|
|
34
|
+
"license": "Apache-2.0"
|
|
35
35
|
}
|
package/src/bareagent.js
CHANGED
|
@@ -50,10 +50,11 @@ export function createBrowseTools(opts = {}) {
|
|
|
50
50
|
type: 'object',
|
|
51
51
|
properties: {
|
|
52
52
|
url: { type: 'string', description: 'URL to browse' },
|
|
53
|
+
pruneMode: { type: 'string', enum: ['act', 'read'], description: '"act" (default) for interactive elements only; "read" for paragraphs and long text (articles/docs).' },
|
|
53
54
|
},
|
|
54
55
|
required: ['url'],
|
|
55
56
|
},
|
|
56
|
-
execute: async ({ url }) => await browse(url, opts),
|
|
57
|
+
execute: async ({ url, pruneMode }) => await browse(url, pruneMode ? { ...opts, pruneMode } : opts),
|
|
57
58
|
},
|
|
58
59
|
{
|
|
59
60
|
name: 'goto',
|
|
@@ -70,10 +71,15 @@ export function createBrowseTools(opts = {}) {
|
|
|
70
71
|
{
|
|
71
72
|
name: 'snapshot',
|
|
72
73
|
description: 'Get the current ARIA snapshot. Returns a YAML-like tree with [ref=N] markers on interactive elements.',
|
|
73
|
-
parameters: {
|
|
74
|
-
|
|
74
|
+
parameters: {
|
|
75
|
+
type: 'object',
|
|
76
|
+
properties: {
|
|
77
|
+
pruneMode: { type: 'string', enum: ['act', 'read'], description: '"act" (default) for interactive elements only; "read" for paragraphs and long text (articles/docs).' },
|
|
78
|
+
},
|
|
79
|
+
},
|
|
80
|
+
execute: async ({ pruneMode } = {}) => {
|
|
75
81
|
const page = await getPage();
|
|
76
|
-
return await page.snapshot();
|
|
82
|
+
return await page.snapshot(pruneMode ? { mode: pruneMode } : undefined);
|
|
77
83
|
},
|
|
78
84
|
},
|
|
79
85
|
{
|
|
@@ -244,6 +250,39 @@ export function createBrowseTools(opts = {}) {
|
|
|
244
250
|
return await page.screenshot({ format });
|
|
245
251
|
},
|
|
246
252
|
},
|
|
253
|
+
{
|
|
254
|
+
name: 'reload',
|
|
255
|
+
description: 'Reload the current page. Returns the updated snapshot.',
|
|
256
|
+
parameters: {
|
|
257
|
+
type: 'object',
|
|
258
|
+
properties: {
|
|
259
|
+
ignoreCache: { type: 'boolean', description: 'Bypass HTTP cache (hard reload). Default: false.' },
|
|
260
|
+
},
|
|
261
|
+
},
|
|
262
|
+
execute: async ({ ignoreCache } = {}) => actionAndSnapshot((page) => page.reload({ ignoreCache })),
|
|
263
|
+
},
|
|
264
|
+
{
|
|
265
|
+
name: 'wait_for',
|
|
266
|
+
description: 'Wait for visible text or a CSS selector to appear on the current page. Returns the updated snapshot once found.',
|
|
267
|
+
parameters: {
|
|
268
|
+
type: 'object',
|
|
269
|
+
properties: {
|
|
270
|
+
text: { type: 'string', description: 'Substring that must appear in document.body.innerText' },
|
|
271
|
+
selector: { type: 'string', description: 'CSS selector that must match document.querySelector' },
|
|
272
|
+
timeout: { type: 'number', description: 'Timeout in ms (default: 30000)' },
|
|
273
|
+
},
|
|
274
|
+
},
|
|
275
|
+
execute: async ({ text, selector, timeout } = {}) => actionAndSnapshot((page) => page.waitFor({ text, selector, timeout })),
|
|
276
|
+
},
|
|
277
|
+
{
|
|
278
|
+
name: 'downloads',
|
|
279
|
+
description: 'List files captured via Content-Disposition: attachment downloads during this session. Returns JSON array of { url, suggestedFilename, savedPath, state, totalBytes, receivedBytes } per file.',
|
|
280
|
+
parameters: { type: 'object', properties: {} },
|
|
281
|
+
execute: async () => {
|
|
282
|
+
const page = await getPage();
|
|
283
|
+
return JSON.stringify(page.downloads.map((d) => ({ ...d })), null, 2);
|
|
284
|
+
},
|
|
285
|
+
},
|
|
247
286
|
];
|
|
248
287
|
|
|
249
288
|
// Add assess tool if wearehere is installed
|