browserclaw 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,9 +1,10 @@
1
1
  import os from 'os';
2
- import path from 'path';
2
+ import path, { normalize, resolve, sep } from 'path';
3
3
  import fs from 'fs';
4
4
  import net from 'net';
5
5
  import { spawn, execFileSync } from 'child_process';
6
6
  import { devices, chromium } from 'playwright-core';
7
+ import { lookup } from 'dns/promises';
7
8
 
8
9
  // src/chrome-launcher.ts
9
10
  var CHROMIUM_BUNDLE_IDS = /* @__PURE__ */ new Set([
@@ -263,12 +264,12 @@ function resolveBrowserExecutable(opts) {
263
264
  return null;
264
265
  }
265
266
  async function ensurePortAvailable(port) {
266
- await new Promise((resolve, reject) => {
267
+ await new Promise((resolve2, reject) => {
267
268
  const tester = net.createServer().once("error", (err) => {
268
269
  if (err.code === "EADDRINUSE") reject(new Error(`Port ${port} is already in use`));
269
270
  else reject(err);
270
271
  }).once("listening", () => {
271
- tester.close(() => resolve());
272
+ tester.close(() => resolve2());
272
273
  }).listen(port);
273
274
  });
274
275
  }
@@ -338,11 +339,13 @@ function resolveUserDataDir(profileName) {
338
339
  const configDir = process.env.XDG_CONFIG_HOME ?? path.join(os.homedir(), ".config");
339
340
  return path.join(configDir, "browserclaw", "profiles", profileName, "user-data");
340
341
  }
341
- async function isChromeReachable(cdpUrl, timeoutMs = 500) {
342
+ async function isChromeReachable(cdpUrl, timeoutMs = 500, authToken) {
342
343
  const ctrl = new AbortController();
343
344
  const t = setTimeout(() => ctrl.abort(), timeoutMs);
344
345
  try {
345
- const res = await fetch(`${cdpUrl.replace(/\/+$/, "")}/json/version`, { signal: ctrl.signal });
346
+ const headers = {};
347
+ if (authToken) headers["Authorization"] = `Bearer ${authToken}`;
348
+ const res = await fetch(`${cdpUrl.replace(/\/+$/, "")}/json/version`, { signal: ctrl.signal, headers });
346
349
  return res.ok;
347
350
  } catch {
348
351
  return false;
@@ -350,11 +353,13 @@ async function isChromeReachable(cdpUrl, timeoutMs = 500) {
350
353
  clearTimeout(t);
351
354
  }
352
355
  }
353
- async function getChromeWebSocketUrl(cdpUrl, timeoutMs = 500) {
356
+ async function getChromeWebSocketUrl(cdpUrl, timeoutMs = 500, authToken) {
354
357
  const ctrl = new AbortController();
355
358
  const t = setTimeout(() => ctrl.abort(), timeoutMs);
356
359
  try {
357
- const res = await fetch(`${cdpUrl.replace(/\/+$/, "")}/json/version`, { signal: ctrl.signal });
360
+ const headers = {};
361
+ if (authToken) headers["Authorization"] = `Bearer ${authToken}`;
362
+ const res = await fetch(`${cdpUrl.replace(/\/+$/, "")}/json/version`, { signal: ctrl.signal, headers });
358
363
  if (!res.ok) return null;
359
364
  const data = await res.json();
360
365
  return String(data?.webSocketDebuggerUrl ?? "").trim() || null;
@@ -620,7 +625,7 @@ function restoreRoleRefsForTarget(opts) {
620
625
  state.roleRefsFrameSelector = entry.frameSelector;
621
626
  state.roleRefsMode = entry.mode;
622
627
  }
623
- async function connectBrowser(cdpUrl) {
628
+ async function connectBrowser(cdpUrl, authToken) {
624
629
  const normalized = normalizeCdpUrl(cdpUrl);
625
630
  if (cached?.cdpUrl === normalized) return cached;
626
631
  const existing = connectingByUrl.get(normalized);
@@ -630,9 +635,11 @@ async function connectBrowser(cdpUrl) {
630
635
  for (let attempt = 0; attempt < 3; attempt++) {
631
636
  try {
632
637
  const timeout = 5e3 + attempt * 2e3;
633
- const endpoint = await getChromeWebSocketUrl(normalized, timeout).catch(() => null) ?? normalized;
634
- const browser = await chromium.connectOverCDP(endpoint, { timeout });
635
- const connected = { browser, cdpUrl: normalized };
638
+ const endpoint = await getChromeWebSocketUrl(normalized, timeout, authToken).catch(() => null) ?? normalized;
639
+ const headers = {};
640
+ if (authToken) headers["Authorization"] = `Bearer ${authToken}`;
641
+ const browser = await chromium.connectOverCDP(endpoint, { timeout, headers });
642
+ const connected = { browser, cdpUrl: normalized, authToken };
636
643
  cached = connected;
637
644
  observeBrowser(browser);
638
645
  browser.on("disconnected", () => {
@@ -692,7 +699,9 @@ async function findPageByTargetId(browser, targetId, cdpUrl) {
692
699
  if (cdpUrl) {
693
700
  try {
694
701
  const listUrl = `${cdpUrl.replace(/\/+$/, "").replace(/^ws:/, "http:").replace(/\/cdp$/, "")}/json/list`;
695
- const response = await fetch(listUrl);
702
+ const headers = {};
703
+ if (cached?.authToken) headers["Authorization"] = `Bearer ${cached.authToken}`;
704
+ const response = await fetch(listUrl, { headers });
696
705
  if (response.ok) {
697
706
  const targets = await response.json();
698
707
  const target = targets.find((t) => t.id === targetId);
@@ -1017,6 +1026,7 @@ async function snapshotAi(opts) {
1017
1026
  if (!maybe._snapshotForAI) {
1018
1027
  throw new Error("Playwright _snapshotForAI is not available. Upgrade playwright-core to >= 1.50.");
1019
1028
  }
1029
+ const sourceUrl = page.url();
1020
1030
  const result = await maybe._snapshotForAI({
1021
1031
  timeout: normalizeTimeoutMs(opts.timeoutMs, 5e3, 6e4),
1022
1032
  track: "response"
@@ -1043,7 +1053,12 @@ async function snapshotAi(opts) {
1043
1053
  snapshot: built.snapshot,
1044
1054
  refs: built.refs,
1045
1055
  stats: getRoleSnapshotStats(built.snapshot, built.refs),
1046
- untrusted: true
1056
+ untrusted: true,
1057
+ contentMeta: {
1058
+ sourceUrl,
1059
+ contentType: "browser-snapshot",
1060
+ capturedAt: (/* @__PURE__ */ new Date()).toISOString()
1061
+ }
1047
1062
  };
1048
1063
  }
1049
1064
 
@@ -1051,6 +1066,7 @@ async function snapshotAi(opts) {
1051
1066
  async function snapshotRole(opts) {
1052
1067
  const page = await getPageForTargetId({ cdpUrl: opts.cdpUrl, targetId: opts.targetId });
1053
1068
  ensurePageState(page);
1069
+ const sourceUrl = page.url();
1054
1070
  const frameSelector = opts.frameSelector?.trim() || "";
1055
1071
  const selector = opts.selector?.trim() || "";
1056
1072
  const locator = frameSelector ? selector ? page.frameLocator(frameSelector).locator(selector) : page.frameLocator(frameSelector).locator(":root") : selector ? page.locator(selector) : page.locator(":root");
@@ -1068,19 +1084,33 @@ async function snapshotRole(opts) {
1068
1084
  snapshot: built.snapshot,
1069
1085
  refs: built.refs,
1070
1086
  stats: getRoleSnapshotStats(built.snapshot, built.refs),
1071
- untrusted: true
1087
+ untrusted: true,
1088
+ contentMeta: {
1089
+ sourceUrl,
1090
+ contentType: "browser-snapshot",
1091
+ capturedAt: (/* @__PURE__ */ new Date()).toISOString()
1092
+ }
1072
1093
  };
1073
1094
  }
1074
1095
  async function snapshotAria(opts) {
1075
1096
  const limit = Math.max(1, Math.min(2e3, Math.floor(opts.limit ?? 500)));
1076
1097
  const page = await getPageForTargetId({ cdpUrl: opts.cdpUrl, targetId: opts.targetId });
1077
1098
  ensurePageState(page);
1099
+ const sourceUrl = page.url();
1078
1100
  const session = await page.context().newCDPSession(page);
1079
1101
  try {
1080
1102
  await session.send("Accessibility.enable").catch(() => {
1081
1103
  });
1082
1104
  const res = await session.send("Accessibility.getFullAXTree");
1083
- return { nodes: formatAriaNodes(Array.isArray(res?.nodes) ? res.nodes : [], limit), untrusted: true };
1105
+ return {
1106
+ nodes: formatAriaNodes(Array.isArray(res?.nodes) ? res.nodes : [], limit),
1107
+ untrusted: true,
1108
+ contentMeta: {
1109
+ sourceUrl,
1110
+ contentType: "browser-aria-tree",
1111
+ capturedAt: (/* @__PURE__ */ new Date()).toISOString()
1112
+ }
1113
+ };
1084
1114
  } finally {
1085
1115
  await session.detach().catch(() => {
1086
1116
  });
@@ -1271,7 +1301,7 @@ async function armDialogViaPlaywright(opts) {
1271
1301
  const page = await getPageForTargetId({ cdpUrl: opts.cdpUrl, targetId: opts.targetId });
1272
1302
  ensurePageState(page);
1273
1303
  const timeout = normalizeTimeoutMs(opts.timeoutMs, 3e4, 12e4);
1274
- return new Promise((resolve, reject) => {
1304
+ return new Promise((resolve2, reject) => {
1275
1305
  const timer = setTimeout(() => {
1276
1306
  page.removeListener("dialog", handler);
1277
1307
  reject(new Error(`No dialog appeared within ${timeout}ms`));
@@ -1284,7 +1314,7 @@ async function armDialogViaPlaywright(opts) {
1284
1314
  } else {
1285
1315
  await dialog.dismiss();
1286
1316
  }
1287
- resolve();
1317
+ resolve2();
1288
1318
  } catch (err) {
1289
1319
  reject(err);
1290
1320
  }
@@ -1296,7 +1326,7 @@ async function armFileUploadViaPlaywright(opts) {
1296
1326
  const page = await getPageForTargetId({ cdpUrl: opts.cdpUrl, targetId: opts.targetId });
1297
1327
  ensurePageState(page);
1298
1328
  const timeout = normalizeTimeoutMs(opts.timeoutMs, 3e4, 12e4);
1299
- return new Promise((resolve, reject) => {
1329
+ return new Promise((resolve2, reject) => {
1300
1330
  const timer = setTimeout(() => {
1301
1331
  page.removeListener("filechooser", handler);
1302
1332
  reject(new Error(`No file chooser appeared within ${timeout}ms`));
@@ -1305,7 +1335,7 @@ async function armFileUploadViaPlaywright(opts) {
1305
1335
  clearTimeout(timer);
1306
1336
  try {
1307
1337
  await fc.setFiles(opts.paths ?? []);
1308
- resolve();
1338
+ resolve2();
1309
1339
  } catch (err) {
1310
1340
  reject(err);
1311
1341
  }
@@ -1322,11 +1352,82 @@ async function pressKeyViaPlaywright(opts) {
1322
1352
  ensurePageState(page);
1323
1353
  await page.keyboard.press(key, { delay: Math.max(0, Math.floor(opts.delayMs ?? 0)) });
1324
1354
  }
1355
+ function assertSafeOutputPath(path2, allowedRoots) {
1356
+ if (!path2 || typeof path2 !== "string") {
1357
+ throw new Error("Output path is required.");
1358
+ }
1359
+ const normalized = normalize(path2);
1360
+ if (normalized.includes("..")) {
1361
+ throw new Error(`Unsafe output path: directory traversal detected in "${path2}".`);
1362
+ }
1363
+ if (allowedRoots?.length) {
1364
+ const resolved = resolve(normalized);
1365
+ const withinRoot = allowedRoots.some((root) => {
1366
+ const normalizedRoot = resolve(root);
1367
+ return resolved === normalizedRoot || resolved.startsWith(normalizedRoot + sep);
1368
+ });
1369
+ if (!withinRoot) {
1370
+ throw new Error(`Unsafe output path: "${path2}" is outside allowed directories.`);
1371
+ }
1372
+ }
1373
+ }
1374
+ function isInternalIP(ip) {
1375
+ if (/^127\./.test(ip)) return true;
1376
+ if (/^10\./.test(ip)) return true;
1377
+ if (/^172\.(1[6-9]|2\d|3[01])\./.test(ip)) return true;
1378
+ if (/^192\.168\./.test(ip)) return true;
1379
+ if (/^169\.254\./.test(ip)) return true;
1380
+ if (/^100\.(6[4-9]|[7-9]\d|1[01]\d|12[0-7])\./.test(ip)) return true;
1381
+ if (ip === "0.0.0.0") return true;
1382
+ const lower = ip.toLowerCase();
1383
+ if (lower === "::1") return true;
1384
+ if (lower.startsWith("fe80:")) return true;
1385
+ if (lower.startsWith("fc") || lower.startsWith("fd")) return true;
1386
+ if (lower.startsWith("::ffff:")) {
1387
+ const v4 = lower.replace(/^::ffff:/, "");
1388
+ return isInternalIP(v4);
1389
+ }
1390
+ return false;
1391
+ }
1392
+ function isInternalUrl(url) {
1393
+ let parsed;
1394
+ try {
1395
+ parsed = new URL(url);
1396
+ } catch {
1397
+ return true;
1398
+ }
1399
+ const hostname = parsed.hostname.toLowerCase();
1400
+ if (hostname === "localhost") return true;
1401
+ if (isInternalIP(hostname)) return true;
1402
+ if (hostname.endsWith(".local") || hostname.endsWith(".internal") || hostname.endsWith(".localhost")) {
1403
+ return true;
1404
+ }
1405
+ return false;
1406
+ }
1407
+ async function isInternalUrlResolved(url) {
1408
+ if (isInternalUrl(url)) return true;
1409
+ let parsed;
1410
+ try {
1411
+ parsed = new URL(url);
1412
+ } catch {
1413
+ return true;
1414
+ }
1415
+ try {
1416
+ const { address } = await lookup(parsed.hostname);
1417
+ if (isInternalIP(address)) return true;
1418
+ } catch {
1419
+ return true;
1420
+ }
1421
+ return false;
1422
+ }
1325
1423
 
1326
1424
  // src/actions/navigation.ts
1327
1425
  async function navigateViaPlaywright(opts) {
1328
1426
  const url = String(opts.url ?? "").trim();
1329
1427
  if (!url) throw new Error("url is required");
1428
+ if (!opts.allowInternal && await isInternalUrlResolved(url)) {
1429
+ throw new Error(`Navigation to internal/loopback address blocked: "${url}". Set allowInternal: true if this is intentional.`);
1430
+ }
1330
1431
  const page = await getPageForTargetId({ cdpUrl: opts.cdpUrl, targetId: opts.targetId });
1331
1432
  ensurePageState(page);
1332
1433
  await page.goto(url, { timeout: normalizeTimeoutMs(opts.timeoutMs, 2e4) });
@@ -1348,11 +1449,14 @@ async function listPagesViaPlaywright(opts) {
1348
1449
  return results;
1349
1450
  }
1350
1451
  async function createPageViaPlaywright(opts) {
1452
+ const targetUrl = (opts.url ?? "").trim() || "about:blank";
1453
+ if (targetUrl !== "about:blank" && !opts.allowInternal && await isInternalUrlResolved(targetUrl)) {
1454
+ throw new Error(`Navigation to internal/loopback address blocked: "${targetUrl}". Set allowInternal: true if this is intentional.`);
1455
+ }
1351
1456
  const { browser } = await connectBrowser(opts.cdpUrl);
1352
1457
  const context = browser.contexts()[0] ?? await browser.newContext();
1353
1458
  const page = await context.newPage();
1354
1459
  ensurePageState(page);
1355
- const targetUrl = (opts.url ?? "").trim() || "about:blank";
1356
1460
  if (targetUrl !== "about:blank") {
1357
1461
  await page.goto(targetUrl, { timeout: normalizeTimeoutMs(void 0, 2e4) });
1358
1462
  }
@@ -1498,6 +1602,7 @@ async function evaluateViaPlaywright(opts) {
1498
1602
 
1499
1603
  // src/actions/download.ts
1500
1604
  async function downloadViaPlaywright(opts) {
1605
+ assertSafeOutputPath(opts.path, opts.allowedOutputRoots);
1501
1606
  const page = await getPageForTargetId({ cdpUrl: opts.cdpUrl, targetId: opts.targetId });
1502
1607
  ensurePageState(page);
1503
1608
  restoreRoleRefsForTarget({ cdpUrl: opts.cdpUrl, targetId: opts.targetId, page });
@@ -1524,6 +1629,7 @@ async function waitForDownloadViaPlaywright(opts) {
1524
1629
  const timeout = normalizeTimeoutMs(opts.timeoutMs, 3e4, 12e4);
1525
1630
  const download = await page.waitForEvent("download", { timeout });
1526
1631
  const savePath = opts.path ?? download.suggestedFilename();
1632
+ assertSafeOutputPath(savePath, opts.allowedOutputRoots);
1527
1633
  await download.saveAs(savePath);
1528
1634
  return {
1529
1635
  url: download.url(),
@@ -1707,6 +1813,7 @@ async function traceStartViaPlaywright(opts) {
1707
1813
  });
1708
1814
  }
1709
1815
  async function traceStopViaPlaywright(opts) {
1816
+ assertSafeOutputPath(opts.path, opts.allowedOutputRoots);
1710
1817
  const page = await getPageForTargetId({ cdpUrl: opts.cdpUrl, targetId: opts.targetId });
1711
1818
  ensurePageState(page);
1712
1819
  const context = page.context();
@@ -1852,10 +1959,12 @@ async function storageClearViaPlaywright(opts) {
1852
1959
  var CrawlPage = class {
1853
1960
  cdpUrl;
1854
1961
  targetId;
1962
+ allowInternal;
1855
1963
  /** @internal */
1856
- constructor(cdpUrl, targetId) {
1964
+ constructor(cdpUrl, targetId, allowInternal = false) {
1857
1965
  this.cdpUrl = cdpUrl;
1858
1966
  this.targetId = targetId;
1967
+ this.allowInternal = allowInternal;
1859
1968
  }
1860
1969
  /** The CDP target ID for this page. Use this to identify the page in multi-tab scenarios. */
1861
1970
  get id() {
@@ -2187,7 +2296,8 @@ var CrawlPage = class {
2187
2296
  cdpUrl: this.cdpUrl,
2188
2297
  targetId: this.targetId,
2189
2298
  url,
2190
- timeoutMs: opts?.timeoutMs
2299
+ timeoutMs: opts?.timeoutMs,
2300
+ allowInternal: this.allowInternal
2191
2301
  });
2192
2302
  }
2193
2303
  /**
@@ -2374,12 +2484,14 @@ var CrawlPage = class {
2374
2484
  * Stop recording a trace and save it to a file.
2375
2485
  *
2376
2486
  * @param path - File path to save the trace (e.g. `'trace.zip'`)
2487
+ * @param opts - Options (allowedOutputRoots: constrain output to specific directories)
2377
2488
  */
2378
- async traceStop(path2) {
2489
+ async traceStop(path2, opts) {
2379
2490
  return traceStopViaPlaywright({
2380
2491
  cdpUrl: this.cdpUrl,
2381
2492
  targetId: this.targetId,
2382
- path: path2
2493
+ path: path2,
2494
+ allowedOutputRoots: opts?.allowedOutputRoots
2383
2495
  });
2384
2496
  }
2385
2497
  /**
@@ -2567,7 +2679,8 @@ var CrawlPage = class {
2567
2679
  targetId: this.targetId,
2568
2680
  ref,
2569
2681
  path: path2,
2570
- timeoutMs: opts?.timeoutMs
2682
+ timeoutMs: opts?.timeoutMs,
2683
+ allowedOutputRoots: opts?.allowedOutputRoots
2571
2684
  });
2572
2685
  }
2573
2686
  /**
@@ -2583,7 +2696,8 @@ var CrawlPage = class {
2583
2696
  cdpUrl: this.cdpUrl,
2584
2697
  targetId: this.targetId,
2585
2698
  path: opts?.path,
2586
- timeoutMs: opts?.timeoutMs
2699
+ timeoutMs: opts?.timeoutMs,
2700
+ allowedOutputRoots: opts?.allowedOutputRoots
2587
2701
  });
2588
2702
  }
2589
2703
  // ── Emulation ───────────────────────────────────────────────
@@ -2713,10 +2827,12 @@ var CrawlPage = class {
2713
2827
  };
2714
2828
  var BrowserClaw = class _BrowserClaw {
2715
2829
  cdpUrl;
2830
+ allowInternal;
2716
2831
  chrome;
2717
- constructor(cdpUrl, chrome) {
2832
+ constructor(cdpUrl, chrome, allowInternal = false) {
2718
2833
  this.cdpUrl = cdpUrl;
2719
2834
  this.chrome = chrome;
2835
+ this.allowInternal = allowInternal;
2720
2836
  }
2721
2837
  /**
2722
2838
  * Launch a new Chrome instance and connect to it.
@@ -2744,7 +2860,7 @@ var BrowserClaw = class _BrowserClaw {
2744
2860
  static async launch(opts = {}) {
2745
2861
  const chrome = await launchChrome(opts);
2746
2862
  const cdpUrl = `http://127.0.0.1:${chrome.cdpPort}`;
2747
- return new _BrowserClaw(cdpUrl, chrome);
2863
+ return new _BrowserClaw(cdpUrl, chrome, opts.allowInternal);
2748
2864
  }
2749
2865
  /**
2750
2866
  * Connect to an already-running Chrome instance via its CDP endpoint.
@@ -2760,12 +2876,12 @@ var BrowserClaw = class _BrowserClaw {
2760
2876
  * const browser = await BrowserClaw.connect('http://localhost:9222');
2761
2877
  * ```
2762
2878
  */
2763
- static async connect(cdpUrl) {
2764
- if (!await isChromeReachable(cdpUrl, 3e3)) {
2879
+ static async connect(cdpUrl, opts) {
2880
+ if (!await isChromeReachable(cdpUrl, 3e3, opts?.authToken)) {
2765
2881
  throw new Error(`Cannot connect to Chrome at ${cdpUrl}. Is Chrome running with --remote-debugging-port?`);
2766
2882
  }
2767
- await connectBrowser(cdpUrl);
2768
- return new _BrowserClaw(cdpUrl, null);
2883
+ await connectBrowser(cdpUrl, opts?.authToken);
2884
+ return new _BrowserClaw(cdpUrl, null, opts?.allowInternal);
2769
2885
  }
2770
2886
  /**
2771
2887
  * Open a URL in a new tab and return the page handle.
@@ -2780,8 +2896,8 @@ var BrowserClaw = class _BrowserClaw {
2780
2896
  * ```
2781
2897
  */
2782
2898
  async open(url) {
2783
- const tab = await createPageViaPlaywright({ cdpUrl: this.cdpUrl, url });
2784
- return new CrawlPage(this.cdpUrl, tab.targetId);
2899
+ const tab = await createPageViaPlaywright({ cdpUrl: this.cdpUrl, url, allowInternal: this.allowInternal });
2900
+ return new CrawlPage(this.cdpUrl, tab.targetId, this.allowInternal);
2785
2901
  }
2786
2902
  /**
2787
2903
  * Get a CrawlPage handle for the currently active tab.
@@ -2794,7 +2910,7 @@ var BrowserClaw = class _BrowserClaw {
2794
2910
  if (!pages.length) throw new Error("No pages available. Use browser.open(url) to create a tab.");
2795
2911
  const tid = await pageTargetId(pages[0]).catch(() => null);
2796
2912
  if (!tid) throw new Error("Failed to get targetId for the current page.");
2797
- return new CrawlPage(this.cdpUrl, tid);
2913
+ return new CrawlPage(this.cdpUrl, tid, this.allowInternal);
2798
2914
  }
2799
2915
  /**
2800
2916
  * List all open tabs.
@@ -2829,7 +2945,7 @@ var BrowserClaw = class _BrowserClaw {
2829
2945
  * @returns CrawlPage for the specified tab
2830
2946
  */
2831
2947
  page(targetId) {
2832
- return new CrawlPage(this.cdpUrl, targetId);
2948
+ return new CrawlPage(this.cdpUrl, targetId, this.allowInternal);
2833
2949
  }
2834
2950
  /** The CDP endpoint URL for this browser connection. */
2835
2951
  get url() {