barebrowse 0.10.1 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +100 -0
- package/README.md +13 -0
- package/barebrowse.context.md +14 -2
- package/cli.js +8 -0
- package/package.json +38 -6
- package/src/auth.js +35 -10
- package/src/bareagent.js +16 -5
- package/src/cdp.js +4 -2
- package/src/chromium.js +19 -6
- package/src/daemon.js +54 -12
- package/src/index.js +50 -2
- package/src/network-idle.js +4 -1
- package/src/prune.js +1 -1
- package/src/session-client.js +6 -2
- package/src/url-guard.js +138 -0
- package/src/wearehere.d.ts +6 -0
- package/types/aria.d.ts +17 -0
- package/types/auth.d.ts +35 -0
- package/types/bareagent.d.ts +25 -0
- package/types/blocklist.d.ts +21 -0
- package/types/cdp.d.ts +16 -0
- package/types/chromium.d.ts +58 -0
- package/types/consent.d.ts +9 -0
- package/types/daemon.d.ts +10 -0
- package/types/index.d.ts +138 -0
- package/types/interact.d.ts +79 -0
- package/types/network-idle.d.ts +19 -0
- package/types/prune.d.ts +13 -0
- package/types/session-client.d.ts +19 -0
- package/types/stealth.d.ts +14 -0
- package/types/url-guard.d.ts +26 -0
- package/commands/barebrowse/SKILL.md +0 -133
- package/commands/barebrowse.md +0 -132
package/src/daemon.js
CHANGED
|
@@ -8,9 +8,25 @@
|
|
|
8
8
|
import { createServer } from 'node:http';
|
|
9
9
|
import { spawn } from 'node:child_process';
|
|
10
10
|
import { writeFileSync, mkdirSync, existsSync, readFileSync, unlinkSync } from 'node:fs';
|
|
11
|
+
import { randomBytes, timingSafeEqual } from 'node:crypto';
|
|
11
12
|
import { join, resolve } from 'node:path';
|
|
12
13
|
import { connect } from './index.js';
|
|
13
14
|
|
|
15
|
+
/** Owner-only file write helper — daemon artifacts can hold authenticated content. */
|
|
16
|
+
function writeFilePrivate(path, data) {
|
|
17
|
+
writeFileSync(path, data, { mode: 0o600 });
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/** Constant-time token compare; false on any length/format mismatch. */
|
|
21
|
+
function tokenMatches(expected, got) {
|
|
22
|
+
if (typeof got !== 'string' || got.length !== expected.length) return false;
|
|
23
|
+
try {
|
|
24
|
+
return timingSafeEqual(Buffer.from(got), Buffer.from(expected));
|
|
25
|
+
} catch {
|
|
26
|
+
return false;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
14
30
|
const SESSION_FILE = 'session.json';
|
|
15
31
|
|
|
16
32
|
/**
|
|
@@ -19,7 +35,7 @@ const SESSION_FILE = 'session.json';
|
|
|
19
35
|
*/
|
|
20
36
|
export async function startDaemon(opts, outputDir, initialUrl) {
|
|
21
37
|
const absDir = resolve(outputDir);
|
|
22
|
-
mkdirSync(absDir, { recursive: true });
|
|
38
|
+
mkdirSync(absDir, { recursive: true, mode: 0o700 });
|
|
23
39
|
|
|
24
40
|
// Clean stale session
|
|
25
41
|
const sessionPath = join(absDir, SESSION_FILE);
|
|
@@ -44,6 +60,8 @@ export async function startDaemon(opts, outputDir, initialUrl) {
|
|
|
44
60
|
if (Array.isArray(opts.blockUrls)) {
|
|
45
61
|
for (const p of opts.blockUrls) args.push('--block-urls', p);
|
|
46
62
|
}
|
|
63
|
+
if (opts.blockPrivateNetwork) args.push('--block-private-network');
|
|
64
|
+
if (opts.uploadDir) args.push('--upload-dir', opts.uploadDir);
|
|
47
65
|
|
|
48
66
|
const child = spawn(process.execPath, args, {
|
|
49
67
|
detached: true,
|
|
@@ -75,7 +93,13 @@ export async function startDaemon(opts, outputDir, initialUrl) {
|
|
|
75
93
|
*/
|
|
76
94
|
export async function runDaemon(opts, outputDir, initialUrl) {
|
|
77
95
|
const absDir = resolve(outputDir);
|
|
78
|
-
mkdirSync(absDir, { recursive: true });
|
|
96
|
+
mkdirSync(absDir, { recursive: true, mode: 0o700 });
|
|
97
|
+
|
|
98
|
+
// Per-session auth token. The daemon binds to loopback, but loopback is
|
|
99
|
+
// shared across local users — without a token any local user/process could
|
|
100
|
+
// POST /command and drive the authenticated browser (incl. `eval`). The
|
|
101
|
+
// token is written into session.json (mode 0600) so only the owner reads it.
|
|
102
|
+
const authToken = randomBytes(32).toString('hex');
|
|
79
103
|
|
|
80
104
|
// Connect to browser
|
|
81
105
|
const page = await connect({
|
|
@@ -88,6 +112,8 @@ export async function runDaemon(opts, outputDir, initialUrl) {
|
|
|
88
112
|
downloadPath: opts.downloadPath,
|
|
89
113
|
blockAds: opts.blockAds,
|
|
90
114
|
blockUrls: opts.blockUrls,
|
|
115
|
+
blockPrivateNetwork: opts.blockPrivateNetwork,
|
|
116
|
+
uploadDir: opts.uploadDir,
|
|
91
117
|
});
|
|
92
118
|
|
|
93
119
|
// Console log capture
|
|
@@ -161,7 +187,7 @@ export async function runDaemon(opts, outputDir, initialUrl) {
|
|
|
161
187
|
const text = await page.snapshot({ mode: pruneMode });
|
|
162
188
|
const ts = new Date().toISOString().replace(/[:.]/g, '-');
|
|
163
189
|
const file = join(absDir, `page-${ts}.yml`);
|
|
164
|
-
|
|
190
|
+
writeFilePrivate(file, text);
|
|
165
191
|
return { ok: true, file };
|
|
166
192
|
},
|
|
167
193
|
|
|
@@ -170,7 +196,7 @@ export async function runDaemon(opts, outputDir, initialUrl) {
|
|
|
170
196
|
const ts = new Date().toISOString().replace(/[:.]/g, '-');
|
|
171
197
|
const ext = format || 'png';
|
|
172
198
|
const file = join(absDir, `screenshot-${ts}.${ext}`);
|
|
173
|
-
|
|
199
|
+
writeFilePrivate(file, Buffer.from(data, 'base64'));
|
|
174
200
|
return { ok: true, file };
|
|
175
201
|
},
|
|
176
202
|
|
|
@@ -244,7 +270,7 @@ export async function runDaemon(opts, outputDir, initialUrl) {
|
|
|
244
270
|
const data = await page.pdf({ landscape });
|
|
245
271
|
const ts = new Date().toISOString().replace(/[:.]/g, '-');
|
|
246
272
|
const file = join(absDir, `page-${ts}.pdf`);
|
|
247
|
-
|
|
273
|
+
writeFilePrivate(file, Buffer.from(data, 'base64'));
|
|
248
274
|
return { ok: true, file };
|
|
249
275
|
},
|
|
250
276
|
|
|
@@ -273,7 +299,7 @@ export async function runDaemon(opts, outputDir, initialUrl) {
|
|
|
273
299
|
async 'dialog-log'() {
|
|
274
300
|
const ts = new Date().toISOString().replace(/[:.]/g, '-');
|
|
275
301
|
const file = join(absDir, `dialogs-${ts}.json`);
|
|
276
|
-
|
|
302
|
+
writeFilePrivate(file, JSON.stringify(page.dialogLog, null, 2));
|
|
277
303
|
return { ok: true, file, count: page.dialogLog.length };
|
|
278
304
|
},
|
|
279
305
|
|
|
@@ -304,7 +330,7 @@ export async function runDaemon(opts, outputDir, initialUrl) {
|
|
|
304
330
|
if (level) logs = logs.filter((l) => l.type === level);
|
|
305
331
|
const ts = new Date().toISOString().replace(/[:.]/g, '-');
|
|
306
332
|
const file = join(absDir, `console-${ts}.json`);
|
|
307
|
-
|
|
333
|
+
writeFilePrivate(file, JSON.stringify(logs, null, 2));
|
|
308
334
|
if (clear) consoleLogs.length = 0;
|
|
309
335
|
return { ok: true, file, count: logs.length };
|
|
310
336
|
},
|
|
@@ -314,7 +340,7 @@ export async function runDaemon(opts, outputDir, initialUrl) {
|
|
|
314
340
|
if (failed) logs = logs.filter((l) => l.status === 0 || l.status >= 400);
|
|
315
341
|
const ts = new Date().toISOString().replace(/[:.]/g, '-');
|
|
316
342
|
const file = join(absDir, `network-${ts}.json`);
|
|
317
|
-
|
|
343
|
+
writeFilePrivate(file, JSON.stringify(logs, null, 2));
|
|
318
344
|
return { ok: true, file, count: logs.length };
|
|
319
345
|
},
|
|
320
346
|
|
|
@@ -346,6 +372,14 @@ export async function runDaemon(opts, outputDir, initialUrl) {
|
|
|
346
372
|
return;
|
|
347
373
|
}
|
|
348
374
|
|
|
375
|
+
// Require the per-session token. Rejects any local process that hasn't
|
|
376
|
+
// read session.json (which is owner-only). Constant-time compare.
|
|
377
|
+
if (!tokenMatches(authToken, req.headers['x-barebrowse-token'])) {
|
|
378
|
+
res.writeHead(401, { 'Content-Type': 'application/json' });
|
|
379
|
+
res.end(JSON.stringify({ ok: false, error: 'Unauthorized: missing or invalid token' }));
|
|
380
|
+
return;
|
|
381
|
+
}
|
|
382
|
+
|
|
349
383
|
let body = '';
|
|
350
384
|
for await (const chunk of req) body += chunk;
|
|
351
385
|
|
|
@@ -382,17 +416,25 @@ export async function runDaemon(opts, outputDir, initialUrl) {
|
|
|
382
416
|
}
|
|
383
417
|
});
|
|
384
418
|
|
|
385
|
-
|
|
419
|
+
/** @type {Promise<void>} */
|
|
420
|
+
const listening = new Promise((resolve) => {
|
|
386
421
|
server.listen(0, '127.0.0.1', () => resolve());
|
|
387
422
|
});
|
|
423
|
+
await listening;
|
|
388
424
|
|
|
389
|
-
const
|
|
425
|
+
const address = server.address();
|
|
426
|
+
if (!address || typeof address === 'string') {
|
|
427
|
+
throw new Error('Daemon server failed to bind to a TCP port');
|
|
428
|
+
}
|
|
429
|
+
const port = address.port;
|
|
390
430
|
|
|
391
|
-
// Write session.json so parent/clients can find us
|
|
431
|
+
// Write session.json so parent/clients can find us. Owner-only: it carries
|
|
432
|
+
// the auth token that gates /command.
|
|
392
433
|
const sessionPath = join(absDir, SESSION_FILE);
|
|
393
|
-
|
|
434
|
+
writeFilePrivate(sessionPath, JSON.stringify({
|
|
394
435
|
port,
|
|
395
436
|
pid: process.pid,
|
|
437
|
+
token: authToken,
|
|
396
438
|
startedAt: new Date().toISOString(),
|
|
397
439
|
}));
|
|
398
440
|
|
package/src/index.js
CHANGED
|
@@ -18,7 +18,9 @@ import { dismissConsent } from './consent.js';
|
|
|
18
18
|
import { applyStealth } from './stealth.js';
|
|
19
19
|
import { DEFAULT_BLOCKLIST } from './blocklist.js';
|
|
20
20
|
import { waitForNetworkIdle } from './network-idle.js';
|
|
21
|
+
import { assertNavigable, assertUploadAllowed } from './url-guard.js';
|
|
21
22
|
import { join as pathJoin } from 'node:path';
|
|
23
|
+
import { chmodSync } from 'node:fs';
|
|
22
24
|
|
|
23
25
|
/**
|
|
24
26
|
* Browse a URL and return an ARIA snapshot.
|
|
@@ -35,12 +37,27 @@ import { join as pathJoin } from 'node:path';
|
|
|
35
37
|
* See src/blocklist.js for the default set. Set false to disable.
|
|
36
38
|
* @param {string[]} [opts.blockUrls] - Extra URL glob patterns to block,
|
|
37
39
|
* merged with the default unless blockAds:false.
|
|
40
|
+
* @param {boolean} [opts.allowLocalUrls=false] - Permit navigation to local-
|
|
41
|
+
* resource schemes (file:, view-source:, chrome:, …). Blocked by default.
|
|
42
|
+
* @param {boolean} [opts.blockPrivateNetwork=false] - Reject navigation to
|
|
43
|
+
* loopback / RFC-1918 / link-local / cloud-metadata hosts (SSRF guard).
|
|
44
|
+
* @param {string} [opts.proxy] - Proxy server (e.g. 'http://host:port').
|
|
45
|
+
* @param {string} [opts.binary] - Path to browser binary (auto-detected if omitted).
|
|
46
|
+
* @param {string} [opts.userDataDir] - Browser profile directory.
|
|
47
|
+
* @param {{width: number, height: number}} [opts.viewport] - Viewport dimensions.
|
|
48
|
+
* @param {string} [opts.browser] - Source browser for cookie extraction.
|
|
49
|
+
* @param {boolean} [opts.consent=true] - Auto-dismiss cookie consent dialogs.
|
|
50
|
+
* @param {'act'|'browse'|'navigate'|'full'|'read'} [opts.pruneMode='act'] - Pruning mode.
|
|
38
51
|
* @returns {Promise<string>} ARIA snapshot text
|
|
39
52
|
*/
|
|
40
53
|
export async function browse(url, opts = {}) {
|
|
41
54
|
const mode = opts.mode || 'headless';
|
|
42
55
|
const timeout = opts.timeout || 30000;
|
|
43
56
|
|
|
57
|
+
// Reject local-resource schemes (and optionally private hosts) before we
|
|
58
|
+
// spend a browser launch on a URL we won't navigate to.
|
|
59
|
+
assertNavigable(url, { allowLocalUrls: opts.allowLocalUrls, blockPrivateNetwork: opts.blockPrivateNetwork });
|
|
60
|
+
|
|
44
61
|
let browser = null;
|
|
45
62
|
let cdp = null;
|
|
46
63
|
// Forward caller-supplied launch knobs (binary, userDataDir, proxy) into
|
|
@@ -154,6 +171,23 @@ export async function browse(url, opts = {}) {
|
|
|
154
171
|
* attached to and follows the session across switchTab() until close.
|
|
155
172
|
* @param {string[]} [opts.blockUrls] - Extra URL glob patterns to block,
|
|
156
173
|
* merged with the default unless blockAds is false.
|
|
174
|
+
* @param {boolean} [opts.allowLocalUrls=false] - Permit navigation to local-
|
|
175
|
+
* resource schemes (file:, view-source:, chrome:, …). Blocked by default
|
|
176
|
+
* because a prompt-injected agent could use them to read local files.
|
|
177
|
+
* @param {boolean} [opts.blockPrivateNetwork=false] - Reject navigation to
|
|
178
|
+
* loopback / RFC-1918 / link-local / cloud-metadata hosts (SSRF guard).
|
|
179
|
+
* Off by default so localhost dev-server browsing keeps working.
|
|
180
|
+
* @param {string} [opts.uploadDir] - When set, upload() rejects any file that
|
|
181
|
+
* does not resolve (symlinks included) inside this directory. Sandboxes the
|
|
182
|
+
* agent's file-upload capability. Default: no restriction.
|
|
183
|
+
* @param {string} [opts.proxy] - Proxy server (e.g. 'http://host:port').
|
|
184
|
+
* @param {string} [opts.binary] - Path to browser binary (auto-detected if omitted).
|
|
185
|
+
* @param {string} [opts.userDataDir] - Browser profile directory.
|
|
186
|
+
* @param {{width: number, height: number}} [opts.viewport] - Viewport dimensions.
|
|
187
|
+
* @param {boolean} [opts.consent=true] - Auto-dismiss cookie consent dialogs.
|
|
188
|
+
* @param {string} [opts.storageState] - Path to a storage-state JSON file
|
|
189
|
+
* (cookies + localStorage) to load before navigation.
|
|
190
|
+
* @param {'act'|'browse'|'navigate'|'full'|'read'} [opts.pruneMode='act'] - Pruning mode.
|
|
157
191
|
* @returns {Promise<object>} Page handle with goto, snapshot, close
|
|
158
192
|
*/
|
|
159
193
|
export async function connect(opts = {}) {
|
|
@@ -164,12 +198,17 @@ export async function connect(opts = {}) {
|
|
|
164
198
|
// Forward caller-supplied launch knobs into every launch() below,
|
|
165
199
|
// including hybrid-fallback re-launches inside goto().
|
|
166
200
|
const launchOpts = { proxy: opts.proxy, binary: opts.binary, userDataDir: opts.userDataDir };
|
|
201
|
+
// Navigation safety policy, applied on every goto()/createTab().goto().
|
|
202
|
+
const urlGuard = { allowLocalUrls: opts.allowLocalUrls, blockPrivateNetwork: opts.blockPrivateNetwork };
|
|
203
|
+
// Optional upload sandbox: when set, upload() rejects files outside this dir.
|
|
204
|
+
// assertUploadAllowed resolves it (realpath) at check time.
|
|
205
|
+
const uploadDir = opts.uploadDir || null;
|
|
167
206
|
|
|
168
207
|
if (attachMode) {
|
|
169
208
|
// Reuse the user's running browser — do not launch, do not own the
|
|
170
209
|
// profile. cleanupBrowser() is a no-op on this shape (process: null,
|
|
171
210
|
// ownedProfileDir: null), which is the whole point.
|
|
172
|
-
browser = await attach({ port: opts.port });
|
|
211
|
+
browser = await attach({ port: opts.port ?? 0 });
|
|
173
212
|
cdp = await createCDP(browser.wsUrl);
|
|
174
213
|
} else if (mode === 'headed') {
|
|
175
214
|
browser = await launch({ ...launchOpts, headed: true });
|
|
@@ -312,6 +351,7 @@ export async function connect(opts = {}) {
|
|
|
312
351
|
|
|
313
352
|
return {
|
|
314
353
|
async goto(url, timeout = 30000) {
|
|
354
|
+
assertNavigable(url, urlGuard);
|
|
315
355
|
// Refs from the previous page are about to become invalid — clear
|
|
316
356
|
// before navigating so a stale click(ref) errors clearly instead of
|
|
317
357
|
// silently resolving to whatever backendNodeId happens to still be in
|
|
@@ -467,6 +507,10 @@ export async function connect(opts = {}) {
|
|
|
467
507
|
async upload(ref, files) {
|
|
468
508
|
const entry = refMap.get(ref);
|
|
469
509
|
if (!entry) throw new Error(`No element found for ref "${ref}"`);
|
|
510
|
+
// Upload sandbox: when uploadDir is set, every path must resolve
|
|
511
|
+
// (symlinks included, via realpath) inside it. Stops a prompt-injected
|
|
512
|
+
// agent from attaching ~/.ssh/id_rsa or other arbitrary local files.
|
|
513
|
+
assertUploadAllowed(files, uploadDir);
|
|
470
514
|
await cdpUpload(entry.session, entry.backendNodeId, files);
|
|
471
515
|
},
|
|
472
516
|
|
|
@@ -535,7 +579,10 @@ export async function connect(opts = {}) {
|
|
|
535
579
|
});
|
|
536
580
|
const state = { cookies, localStorage: JSON.parse(result.value || '{}') };
|
|
537
581
|
const { writeFileSync } = await import('node:fs');
|
|
538
|
-
|
|
582
|
+
// State holds cookies + localStorage (session tokens) — write owner-only
|
|
583
|
+
// so a multi-user host can't read another user's credentials off disk.
|
|
584
|
+
writeFileSync(filePath, JSON.stringify(state, null, 2), { mode: 0o600 });
|
|
585
|
+
try { chmodSync(filePath, 0o600); } catch { /* best effort if pre-existing */ }
|
|
539
586
|
},
|
|
540
587
|
|
|
541
588
|
get botBlocked() { return botBlocked; },
|
|
@@ -590,6 +637,7 @@ export async function connect(opts = {}) {
|
|
|
590
637
|
let tabBotBlocked = false;
|
|
591
638
|
return {
|
|
592
639
|
async goto(url, timeout = 30000) {
|
|
640
|
+
assertNavigable(url, urlGuard);
|
|
593
641
|
await navigate(tab, url, timeout);
|
|
594
642
|
if (opts.consent !== false) {
|
|
595
643
|
await dismissConsent(tab.session);
|
package/src/network-idle.js
CHANGED
|
@@ -12,12 +12,14 @@
|
|
|
12
12
|
* @param {object} [opts]
|
|
13
13
|
* @param {number} [opts.timeout=30000] - Max wait time before reject
|
|
14
14
|
* @param {number} [opts.idle=500] - Required idle duration before resolve
|
|
15
|
+
* @returns {Promise<void>}
|
|
15
16
|
*/
|
|
16
17
|
export function waitForNetworkIdle(session, opts = {}) {
|
|
17
18
|
const timeout = opts.timeout || 30000;
|
|
18
19
|
const idle = opts.idle || 500;
|
|
19
20
|
|
|
20
|
-
|
|
21
|
+
/** @type {Promise<void>} */
|
|
22
|
+
const settled = new Promise((resolve, reject) => {
|
|
21
23
|
const pending = new Set();
|
|
22
24
|
let timer = null;
|
|
23
25
|
const unsubs = [];
|
|
@@ -59,4 +61,5 @@ export function waitForNetworkIdle(session, opts = {}) {
|
|
|
59
61
|
// Start check immediately (might already be idle)
|
|
60
62
|
check();
|
|
61
63
|
});
|
|
64
|
+
return settled;
|
|
62
65
|
}
|
package/src/prune.js
CHANGED
|
@@ -60,7 +60,7 @@ const SKIP_ROLES = new Set([
|
|
|
60
60
|
*
|
|
61
61
|
* @param {object} tree - Root node from buildTree() (CDP format)
|
|
62
62
|
* @param {object} [options]
|
|
63
|
-
* @param {'act'|'browse'|'navigate'|'full'} [options.mode='act'] - Pruning mode
|
|
63
|
+
* @param {'act'|'browse'|'navigate'|'full'|'read'} [options.mode='act'] - Pruning mode ('read' is an alias for 'browse')
|
|
64
64
|
* @param {string} [options.context=''] - Search context for relevance filtering
|
|
65
65
|
* @returns {object|null} Pruned tree
|
|
66
66
|
*/
|
package/src/session-client.js
CHANGED
|
@@ -13,7 +13,7 @@ const SESSION_FILE = 'session.json';
|
|
|
13
13
|
|
|
14
14
|
/**
|
|
15
15
|
* Read session.json from the output directory.
|
|
16
|
-
* @returns {{ port: number, pid: number, startedAt: string } | null}
|
|
16
|
+
* @returns {{ port: number, pid: number, token?: string, startedAt: string } | null}
|
|
17
17
|
*/
|
|
18
18
|
export function readSession(outputDir) {
|
|
19
19
|
const sessionPath = join(resolve(outputDir), SESSION_FILE);
|
|
@@ -53,7 +53,11 @@ export async function sendCommand(command, args, outputDir) {
|
|
|
53
53
|
try {
|
|
54
54
|
res = await fetch(`http://127.0.0.1:${session.port}/command`, {
|
|
55
55
|
method: 'POST',
|
|
56
|
-
headers: {
|
|
56
|
+
headers: {
|
|
57
|
+
'Content-Type': 'application/json',
|
|
58
|
+
// Authenticate to the daemon with the per-session token from session.json.
|
|
59
|
+
...(session.token ? { 'x-barebrowse-token': session.token } : {}),
|
|
60
|
+
},
|
|
57
61
|
body: JSON.stringify({ command, args }),
|
|
58
62
|
signal: AbortSignal.timeout(60000),
|
|
59
63
|
});
|
package/src/url-guard.js
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* url-guard.js — Navigation safety checks for goto()/browse().
|
|
3
|
+
*
|
|
4
|
+
* Closes two confirmed vectors for an autonomous (and therefore
|
|
5
|
+
* prompt-injectable) agent:
|
|
6
|
+
* 1. Local-resource schemes (file:, view-source:, chrome:, …) that let a
|
|
7
|
+
* page-sourced instruction read local files or browser internals.
|
|
8
|
+
* 2. Optional private-network blocking (loopback, RFC-1918, link-local,
|
|
9
|
+
* cloud-metadata) to stop SSRF to internal services.
|
|
10
|
+
*
|
|
11
|
+
* Scheme blocking is on by default; private-network blocking is opt-in
|
|
12
|
+
* (blockPrivateNetwork) so localhost dev-server browsing keeps working.
|
|
13
|
+
*
|
|
14
|
+
* Limitation: private-network checks match the URL hostname only. A public
|
|
15
|
+
* DNS name that resolves to a private IP (DNS rebinding) is NOT caught here —
|
|
16
|
+
* that needs connection-time IP inspection. Documented, not silently assumed.
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import { realpathSync } from 'node:fs';
|
|
20
|
+
import { resolve, sep } from 'node:path';
|
|
21
|
+
|
|
22
|
+
// Schemes safe to navigate to. Everything else is treated as a local-resource
|
|
23
|
+
// or browser-internal scheme and blocked unless allowLocalUrls is set.
|
|
24
|
+
// data:/blob:/about: stay allowed: opaque origins, no file:// or cross-origin
|
|
25
|
+
// read, and data: is the library's test-fixture mechanism.
|
|
26
|
+
const ALLOWED_SCHEMES = new Set(['http:', 'https:', 'data:', 'blob:', 'about:']);
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* @param {string} host - hostname (no brackets for IPv6)
|
|
30
|
+
* @returns {boolean} true if it names a private/loopback/link-local/internal host
|
|
31
|
+
*/
|
|
32
|
+
function isPrivateHost(host) {
|
|
33
|
+
const h = host.toLowerCase().replace(/^\[|\]$/g, ''); // strip IPv6 brackets
|
|
34
|
+
|
|
35
|
+
// Internal hostnames
|
|
36
|
+
if (h === 'localhost' || h.endsWith('.localhost')) return true;
|
|
37
|
+
if (h.endsWith('.local') || h.endsWith('.internal')) return true;
|
|
38
|
+
if (h === 'metadata.google.internal') return true;
|
|
39
|
+
|
|
40
|
+
// IPv4 (incl. ranges)
|
|
41
|
+
const v4 = h.match(/^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/);
|
|
42
|
+
if (v4) {
|
|
43
|
+
const [a, b] = [Number(v4[1]), Number(v4[2])];
|
|
44
|
+
if (a === 127) return true; // loopback 127.0.0.0/8
|
|
45
|
+
if (a === 10) return true; // 10.0.0.0/8
|
|
46
|
+
if (a === 0) return true; // 0.0.0.0/8
|
|
47
|
+
if (a === 169 && b === 254) return true; // link-local / cloud metadata
|
|
48
|
+
if (a === 172 && b >= 16 && b <= 31) return true; // 172.16.0.0/12
|
|
49
|
+
if (a === 192 && b === 168) return true; // 192.168.0.0/16
|
|
50
|
+
return false;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// IPv6 — gated on the host actually being an IPv6 literal (contains a
|
|
54
|
+
// colon). Without this gate, ordinary hostnames like "fcbarcelona.com" or
|
|
55
|
+
// "fdic.gov" would match the fc00::/7 ULA prefix check and be wrongly blocked.
|
|
56
|
+
if (h.includes(':')) {
|
|
57
|
+
if (h === '::1' || h === '::') return true; // loopback / unspecified
|
|
58
|
+
if (h.startsWith('fe80:')) return true; // link-local fe80::/10
|
|
59
|
+
if (h.startsWith('fc') || h.startsWith('fd')) return true; // fc00::/7 ULA
|
|
60
|
+
// IPv4-mapped IPv6 (e.g. ::ffff:127.0.0.1)
|
|
61
|
+
const mapped = h.match(/::ffff:(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$/);
|
|
62
|
+
if (mapped) return isPrivateHost(mapped[1]);
|
|
63
|
+
return false;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
return false;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Throw if `url` is unsafe to navigate to under the given policy.
|
|
71
|
+
* @param {string} url
|
|
72
|
+
* @param {object} [opts]
|
|
73
|
+
* @param {boolean} [opts.allowLocalUrls=false] - permit file:/chrome:/etc.
|
|
74
|
+
* @param {boolean} [opts.blockPrivateNetwork=false] - reject loopback/RFC-1918/metadata.
|
|
75
|
+
*/
|
|
76
|
+
export function assertNavigable(url, opts = {}) {
|
|
77
|
+
let parsed;
|
|
78
|
+
try {
|
|
79
|
+
parsed = new URL(url);
|
|
80
|
+
} catch {
|
|
81
|
+
throw new Error(`Refusing to navigate: not a valid URL (${String(url).slice(0, 80)})`);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
if (!opts.allowLocalUrls && !ALLOWED_SCHEMES.has(parsed.protocol)) {
|
|
85
|
+
throw new Error(
|
|
86
|
+
`Refusing to navigate to "${parsed.protocol}" URL — local-resource and ` +
|
|
87
|
+
`browser-internal schemes are blocked (reads local files / browser state). ` +
|
|
88
|
+
`Pass { allowLocalUrls: true } to override.`
|
|
89
|
+
);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
if (
|
|
93
|
+
opts.blockPrivateNetwork &&
|
|
94
|
+
(parsed.protocol === 'http:' || parsed.protocol === 'https:') &&
|
|
95
|
+
parsed.hostname &&
|
|
96
|
+
isPrivateHost(parsed.hostname)
|
|
97
|
+
) {
|
|
98
|
+
throw new Error(
|
|
99
|
+
`Refusing to navigate to private/internal host "${parsed.hostname}" — ` +
|
|
100
|
+
`blockPrivateNetwork is enabled (SSRF guard). ` +
|
|
101
|
+
`Unset it to allow localhost / internal browsing.`
|
|
102
|
+
);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Throw if any file in `files` resolves outside `uploadDir`. Both the base
|
|
108
|
+
* dir and each file are resolved through realpath, so symlinks (in either the
|
|
109
|
+
* base path — e.g. macOS /tmp → /private/tmp — or the file) can't be used to
|
|
110
|
+
* escape the sandbox or to false-reject a legitimate file.
|
|
111
|
+
* No-op when `uploadDir` is falsy (no restriction configured).
|
|
112
|
+
* @param {string|string[]} files
|
|
113
|
+
* @param {string|null} uploadDir
|
|
114
|
+
*/
|
|
115
|
+
export function assertUploadAllowed(files, uploadDir) {
|
|
116
|
+
if (!uploadDir) return;
|
|
117
|
+
let baseReal;
|
|
118
|
+
try {
|
|
119
|
+
baseReal = realpathSync(resolve(uploadDir));
|
|
120
|
+
} catch {
|
|
121
|
+
throw new Error(`upload: uploadDir does not exist or is unreadable (${uploadDir})`);
|
|
122
|
+
}
|
|
123
|
+
const list = Array.isArray(files) ? files : [files];
|
|
124
|
+
for (const f of list) {
|
|
125
|
+
let real;
|
|
126
|
+
try {
|
|
127
|
+
real = realpathSync(resolve(String(f)));
|
|
128
|
+
} catch {
|
|
129
|
+
throw new Error(`upload: cannot resolve "${f}" (must exist inside uploadDir)`);
|
|
130
|
+
}
|
|
131
|
+
if (real !== baseReal && !real.startsWith(baseReal + sep)) {
|
|
132
|
+
throw new Error(`upload: "${f}" is outside the allowed uploadDir (${uploadDir})`);
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// Exported for unit tests.
|
|
138
|
+
export { isPrivateHost };
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
// Ambient shim for the optional 'wearehere' dependency.
|
|
2
|
+
// It is dynamically imported and may not be installed; this declaration
|
|
3
|
+
// satisfies the typechecker without pulling in a hard dependency.
|
|
4
|
+
declare module 'wearehere' {
|
|
5
|
+
export function assess(...args: any[]): Promise<any>;
|
|
6
|
+
}
|
package/types/aria.d.ts
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* aria.js — Format ARIA accessibility tree nodes for agent consumption.
|
|
3
|
+
*
|
|
4
|
+
* Takes a nested tree (built from CDP's Accessibility.getFullAXTree)
|
|
5
|
+
* and formats it as readable YAML-like text, similar to Playwright's ariaSnapshot.
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Format a nested ARIA tree as readable text output.
|
|
9
|
+
*
|
|
10
|
+
* Output format (one node per line, indented):
|
|
11
|
+
* - role "name" [props] [ref=nodeId]
|
|
12
|
+
*
|
|
13
|
+
* @param {object} node - Tree node { role, name, properties, children, ignored, nodeId }
|
|
14
|
+
* @param {number} [depth=0] - Current indentation depth
|
|
15
|
+
* @returns {string} Formatted ARIA tree text
|
|
16
|
+
*/
|
|
17
|
+
export function formatTree(node: object, depth?: number): string;
|
package/types/auth.d.ts
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Extract cookies from the user's browser, auto-detecting which browser to use.
|
|
3
|
+
* @param {object} [opts]
|
|
4
|
+
* @param {string} [opts.browser] - 'chromium', 'chrome', 'brave', 'edge', 'firefox', or 'auto'
|
|
5
|
+
* @param {string} [opts.domain] - Filter by domain
|
|
6
|
+
* @returns {Array<object>} Cookies in CDP-compatible format
|
|
7
|
+
*/
|
|
8
|
+
export function extractCookies(opts?: {
|
|
9
|
+
browser?: string | undefined;
|
|
10
|
+
domain?: string | undefined;
|
|
11
|
+
}): Array<object>;
|
|
12
|
+
/**
|
|
13
|
+
* Inject cookies into a CDP session via Network.setCookie.
|
|
14
|
+
* @param {object} session - CDP session handle (from cdp.session())
|
|
15
|
+
* @param {Array<object>} cookies - Cookies from extractCookies()
|
|
16
|
+
*/
|
|
17
|
+
export function injectCookies(session: object, cookies: Array<object>): Promise<void>;
|
|
18
|
+
/**
|
|
19
|
+
* RFC 6265 domain-match: does `host` belong to a cookie declared for
|
|
20
|
+
* `cookieDomain`? Leading dot on the cookie domain is ignored (host-only
|
|
21
|
+
* vs domain cookies are matched the same here, intentionally — we want
|
|
22
|
+
* parent-domain cookies like .google.com to apply to mail.google.com).
|
|
23
|
+
* @param {string} host - target hostname (e.g. 'mail.google.com')
|
|
24
|
+
* @param {string} cookieDomain - cookie's host_key (e.g. '.google.com')
|
|
25
|
+
* @returns {boolean}
|
|
26
|
+
*/
|
|
27
|
+
export function cookieDomainMatch(host: string, cookieDomain: string): boolean;
|
|
28
|
+
/**
|
|
29
|
+
* Extract cookies for a URL and inject them into a CDP session.
|
|
30
|
+
* Convenience function combining extractCookies + injectCookies.
|
|
31
|
+
* @param {object} session - CDP session handle
|
|
32
|
+
* @param {string} url - URL to extract cookies for
|
|
33
|
+
* @param {object} [opts] - Options passed to extractCookies
|
|
34
|
+
*/
|
|
35
|
+
export function authenticate(session: object, url: string, opts?: object): Promise<number>;
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @typedef {object} BrowseTool
|
|
3
|
+
* @property {string} name
|
|
4
|
+
* @property {string} description
|
|
5
|
+
* @property {object} parameters - JSON-schema-shaped parameter spec
|
|
6
|
+
* @property {(args?: any) => Promise<any>} execute
|
|
7
|
+
*/
|
|
8
|
+
/**
|
|
9
|
+
* Create bareagent-compatible browse tools.
|
|
10
|
+
* @param {object} [opts] - Options passed to connect() for session tools
|
|
11
|
+
* @returns {{ tools: Array, close: () => Promise<void> }}
|
|
12
|
+
*/
|
|
13
|
+
export function createBrowseTools(opts?: object): {
|
|
14
|
+
tools: any[];
|
|
15
|
+
close: () => Promise<void>;
|
|
16
|
+
};
|
|
17
|
+
export type BrowseTool = {
|
|
18
|
+
name: string;
|
|
19
|
+
description: string;
|
|
20
|
+
/**
|
|
21
|
+
* - JSON-schema-shaped parameter spec
|
|
22
|
+
*/
|
|
23
|
+
parameters: object;
|
|
24
|
+
execute: (args?: any) => Promise<any>;
|
|
25
|
+
};
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* blocklist.js — Ad/tracker URL patterns for CDP Network.setBlockedURLs.
|
|
3
|
+
*
|
|
4
|
+
* Curated by real-world frequency, not pulled wholesale from Peter Lowe /
|
|
5
|
+
* EasyList. CDP does linear pattern matching per request, so 3,000-entry
|
|
6
|
+
* lists add ~150ms cumulative cost on a typical page for ~5% extra coverage
|
|
7
|
+
* (long-tail regional networks the agent rarely encounters). The set below
|
|
8
|
+
* is ~120 patterns covering the trackers that actually show up in agent
|
|
9
|
+
* traffic: Google/FB/Amazon/MS/Adobe ad+analytics, the major SaaS analytics
|
|
10
|
+
* stacks (Segment/Amplitude/Mixpanel/HubSpot/Hotjar/FullStory/Heap/Mouseflow),
|
|
11
|
+
* session-replay (LogRocket/Crazy Egg/Optimizely/VWO), content-recommendation
|
|
12
|
+
* (Taboola/Outbrain/Criteo), and the consumer-pixel cluster (LinkedIn/Twitter/
|
|
13
|
+
* TikTok/Snap/Pinterest/Reddit).
|
|
14
|
+
*
|
|
15
|
+
* Patterns are CDP-format globs: '*' matches any character run.
|
|
16
|
+
*
|
|
17
|
+
* To extend at runtime, pass connect({ blockUrls: [...] }) — your patterns
|
|
18
|
+
* are merged with this default. To turn the default off entirely, pass
|
|
19
|
+
* { blockAds: false }.
|
|
20
|
+
*/
|
|
21
|
+
export const DEFAULT_BLOCKLIST: string[];
|
package/types/cdp.d.ts
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* cdp.js — Minimal Chrome DevTools Protocol client over WebSocket.
|
|
3
|
+
*
|
|
4
|
+
* Sends JSON-RPC commands, receives responses and events.
|
|
5
|
+
* Uses Node 22's built-in WebSocket (no external deps).
|
|
6
|
+
*
|
|
7
|
+
* Supports flattened sessions: when a sessionId is provided,
|
|
8
|
+
* it's sent at the top level of the message (not inside params).
|
|
9
|
+
* Events from sessions are also dispatched by sessionId.
|
|
10
|
+
*/
|
|
11
|
+
/**
|
|
12
|
+
* Create a CDP client connected to the given WebSocket URL.
|
|
13
|
+
* @param {string} wsUrl - WebSocket URL (ws://127.0.0.1:PORT/devtools/...)
|
|
14
|
+
* @returns {Promise<object>} CDP client ({ send, on, once, session, close })
|
|
15
|
+
*/
|
|
16
|
+
export function createCDP(wsUrl: string): Promise<object>;
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Find the first available Chromium binary on the system.
|
|
3
|
+
* @returns {string} Path to the binary
|
|
4
|
+
* @throws {Error} If no Chromium browser is found
|
|
5
|
+
*/
|
|
6
|
+
export function findBrowser(): string;
|
|
7
|
+
/**
|
|
8
|
+
* Launch a Chromium instance with CDP enabled.
|
|
9
|
+
* @param {object} [opts]
|
|
10
|
+
* @param {string} [opts.binary] - Path to browser binary (auto-detected if omitted)
|
|
11
|
+
* @param {number} [opts.port=0] - CDP port (0 = random available port)
|
|
12
|
+
* @param {string} [opts.userDataDir] - Browser profile directory
|
|
13
|
+
* @param {boolean} [opts.headed=false] - Launch in headed mode (with visible window)
|
|
14
|
+
* @param {string} [opts.proxy] - Proxy server (e.g. 'http://host:port')
|
|
15
|
+
* @returns {Promise<{wsUrl: string, process: import('node:child_process').ChildProcess, port: number}>}
|
|
16
|
+
*/
|
|
17
|
+
export function launch(opts?: {
|
|
18
|
+
binary?: string | undefined;
|
|
19
|
+
port?: number | undefined;
|
|
20
|
+
userDataDir?: string | undefined;
|
|
21
|
+
headed?: boolean | undefined;
|
|
22
|
+
proxy?: string | undefined;
|
|
23
|
+
}): Promise<{
|
|
24
|
+
wsUrl: string;
|
|
25
|
+
process: import("node:child_process").ChildProcess;
|
|
26
|
+
port: number;
|
|
27
|
+
}>;
|
|
28
|
+
/**
|
|
29
|
+
* Kill a launched browser and remove its temp profile dir (if we created one).
|
|
30
|
+
* Waits up to 2s for the process to actually exit before unlinking the dir —
|
|
31
|
+
* Chromium can still hold files briefly after SIGTERM, which races rmSync.
|
|
32
|
+
* Safe to call on partially-failed launches or already-dead processes.
|
|
33
|
+
* @returns {Promise<void>}
|
|
34
|
+
*/
|
|
35
|
+
export function cleanupBrowser(browser: any): Promise<void>;
|
|
36
|
+
/**
|
|
37
|
+
* Get the CDP WebSocket URL for a browser already running with --remote-debugging-port.
|
|
38
|
+
* @param {number} port - The debug port
|
|
39
|
+
* @returns {Promise<string>} WebSocket URL
|
|
40
|
+
*/
|
|
41
|
+
export function getDebugUrl(port: number): Promise<string>;
|
|
42
|
+
/**
|
|
43
|
+
* Attach to a Chromium already running with --remote-debugging-port=<port>.
|
|
44
|
+
* Returns the same shape as launch() but with process: null and
|
|
45
|
+
* ownedProfileDir: null — cleanupBrowser() becomes a no-op so we never
|
|
46
|
+
* kill a browser we did not start or remove a profile we do not own.
|
|
47
|
+
* @param {object} opts
|
|
48
|
+
* @param {number} opts.port - The debug port the running browser is listening on
|
|
49
|
+
* @returns {Promise<{wsUrl: string, process: null, port: number, ownedProfileDir: null}>}
|
|
50
|
+
*/
|
|
51
|
+
export function attach({ port }: {
|
|
52
|
+
port: number;
|
|
53
|
+
}): Promise<{
|
|
54
|
+
wsUrl: string;
|
|
55
|
+
process: null;
|
|
56
|
+
port: number;
|
|
57
|
+
ownedProfileDir: null;
|
|
58
|
+
}>;
|