browserclaw 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -6,6 +6,7 @@ var fs = require('fs');
6
6
  var net = require('net');
7
7
  var child_process = require('child_process');
8
8
  var playwrightCore = require('playwright-core');
9
+ var promises = require('dns/promises');
9
10
 
10
11
  function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
11
12
 
@@ -272,12 +273,12 @@ function resolveBrowserExecutable(opts) {
272
273
  return null;
273
274
  }
274
275
  async function ensurePortAvailable(port) {
275
- await new Promise((resolve, reject) => {
276
+ await new Promise((resolve2, reject) => {
276
277
  const tester = net__default.default.createServer().once("error", (err) => {
277
278
  if (err.code === "EADDRINUSE") reject(new Error(`Port ${port} is already in use`));
278
279
  else reject(err);
279
280
  }).once("listening", () => {
280
- tester.close(() => resolve());
281
+ tester.close(() => resolve2());
281
282
  }).listen(port);
282
283
  });
283
284
  }
@@ -347,11 +348,13 @@ function resolveUserDataDir(profileName) {
347
348
  const configDir = process.env.XDG_CONFIG_HOME ?? path__default.default.join(os__default.default.homedir(), ".config");
348
349
  return path__default.default.join(configDir, "browserclaw", "profiles", profileName, "user-data");
349
350
  }
350
- async function isChromeReachable(cdpUrl, timeoutMs = 500) {
351
+ async function isChromeReachable(cdpUrl, timeoutMs = 500, authToken) {
351
352
  const ctrl = new AbortController();
352
353
  const t = setTimeout(() => ctrl.abort(), timeoutMs);
353
354
  try {
354
- const res = await fetch(`${cdpUrl.replace(/\/+$/, "")}/json/version`, { signal: ctrl.signal });
355
+ const headers = {};
356
+ if (authToken) headers["Authorization"] = `Bearer ${authToken}`;
357
+ const res = await fetch(`${cdpUrl.replace(/\/+$/, "")}/json/version`, { signal: ctrl.signal, headers });
355
358
  return res.ok;
356
359
  } catch {
357
360
  return false;
@@ -359,11 +362,13 @@ async function isChromeReachable(cdpUrl, timeoutMs = 500) {
359
362
  clearTimeout(t);
360
363
  }
361
364
  }
362
- async function getChromeWebSocketUrl(cdpUrl, timeoutMs = 500) {
365
+ async function getChromeWebSocketUrl(cdpUrl, timeoutMs = 500, authToken) {
363
366
  const ctrl = new AbortController();
364
367
  const t = setTimeout(() => ctrl.abort(), timeoutMs);
365
368
  try {
366
- const res = await fetch(`${cdpUrl.replace(/\/+$/, "")}/json/version`, { signal: ctrl.signal });
369
+ const headers = {};
370
+ if (authToken) headers["Authorization"] = `Bearer ${authToken}`;
371
+ const res = await fetch(`${cdpUrl.replace(/\/+$/, "")}/json/version`, { signal: ctrl.signal, headers });
367
372
  if (!res.ok) return null;
368
373
  const data = await res.json();
369
374
  return String(data?.webSocketDebuggerUrl ?? "").trim() || null;
@@ -629,7 +634,7 @@ function restoreRoleRefsForTarget(opts) {
629
634
  state.roleRefsFrameSelector = entry.frameSelector;
630
635
  state.roleRefsMode = entry.mode;
631
636
  }
632
- async function connectBrowser(cdpUrl) {
637
+ async function connectBrowser(cdpUrl, authToken) {
633
638
  const normalized = normalizeCdpUrl(cdpUrl);
634
639
  if (cached?.cdpUrl === normalized) return cached;
635
640
  const existing = connectingByUrl.get(normalized);
@@ -639,9 +644,11 @@ async function connectBrowser(cdpUrl) {
639
644
  for (let attempt = 0; attempt < 3; attempt++) {
640
645
  try {
641
646
  const timeout = 5e3 + attempt * 2e3;
642
- const endpoint = await getChromeWebSocketUrl(normalized, timeout).catch(() => null) ?? normalized;
643
- const browser = await playwrightCore.chromium.connectOverCDP(endpoint, { timeout });
644
- const connected = { browser, cdpUrl: normalized };
647
+ const endpoint = await getChromeWebSocketUrl(normalized, timeout, authToken).catch(() => null) ?? normalized;
648
+ const headers = {};
649
+ if (authToken) headers["Authorization"] = `Bearer ${authToken}`;
650
+ const browser = await playwrightCore.chromium.connectOverCDP(endpoint, { timeout, headers });
651
+ const connected = { browser, cdpUrl: normalized, authToken };
645
652
  cached = connected;
646
653
  observeBrowser(browser);
647
654
  browser.on("disconnected", () => {
@@ -701,7 +708,9 @@ async function findPageByTargetId(browser, targetId, cdpUrl) {
701
708
  if (cdpUrl) {
702
709
  try {
703
710
  const listUrl = `${cdpUrl.replace(/\/+$/, "").replace(/^ws:/, "http:").replace(/\/cdp$/, "")}/json/list`;
704
- const response = await fetch(listUrl);
711
+ const headers = {};
712
+ if (cached?.authToken) headers["Authorization"] = `Bearer ${cached.authToken}`;
713
+ const response = await fetch(listUrl, { headers });
705
714
  if (response.ok) {
706
715
  const targets = await response.json();
707
716
  const target = targets.find((t) => t.id === targetId);
@@ -1026,6 +1035,7 @@ async function snapshotAi(opts) {
1026
1035
  if (!maybe._snapshotForAI) {
1027
1036
  throw new Error("Playwright _snapshotForAI is not available. Upgrade playwright-core to >= 1.50.");
1028
1037
  }
1038
+ const sourceUrl = page.url();
1029
1039
  const result = await maybe._snapshotForAI({
1030
1040
  timeout: normalizeTimeoutMs(opts.timeoutMs, 5e3, 6e4),
1031
1041
  track: "response"
@@ -1052,7 +1062,12 @@ async function snapshotAi(opts) {
1052
1062
  snapshot: built.snapshot,
1053
1063
  refs: built.refs,
1054
1064
  stats: getRoleSnapshotStats(built.snapshot, built.refs),
1055
- untrusted: true
1065
+ untrusted: true,
1066
+ contentMeta: {
1067
+ sourceUrl,
1068
+ contentType: "browser-snapshot",
1069
+ capturedAt: (/* @__PURE__ */ new Date()).toISOString()
1070
+ }
1056
1071
  };
1057
1072
  }
1058
1073
 
@@ -1060,6 +1075,7 @@ async function snapshotAi(opts) {
1060
1075
  async function snapshotRole(opts) {
1061
1076
  const page = await getPageForTargetId({ cdpUrl: opts.cdpUrl, targetId: opts.targetId });
1062
1077
  ensurePageState(page);
1078
+ const sourceUrl = page.url();
1063
1079
  const frameSelector = opts.frameSelector?.trim() || "";
1064
1080
  const selector = opts.selector?.trim() || "";
1065
1081
  const locator = frameSelector ? selector ? page.frameLocator(frameSelector).locator(selector) : page.frameLocator(frameSelector).locator(":root") : selector ? page.locator(selector) : page.locator(":root");
@@ -1077,19 +1093,33 @@ async function snapshotRole(opts) {
1077
1093
  snapshot: built.snapshot,
1078
1094
  refs: built.refs,
1079
1095
  stats: getRoleSnapshotStats(built.snapshot, built.refs),
1080
- untrusted: true
1096
+ untrusted: true,
1097
+ contentMeta: {
1098
+ sourceUrl,
1099
+ contentType: "browser-snapshot",
1100
+ capturedAt: (/* @__PURE__ */ new Date()).toISOString()
1101
+ }
1081
1102
  };
1082
1103
  }
1083
1104
  async function snapshotAria(opts) {
1084
1105
  const limit = Math.max(1, Math.min(2e3, Math.floor(opts.limit ?? 500)));
1085
1106
  const page = await getPageForTargetId({ cdpUrl: opts.cdpUrl, targetId: opts.targetId });
1086
1107
  ensurePageState(page);
1108
+ const sourceUrl = page.url();
1087
1109
  const session = await page.context().newCDPSession(page);
1088
1110
  try {
1089
1111
  await session.send("Accessibility.enable").catch(() => {
1090
1112
  });
1091
1113
  const res = await session.send("Accessibility.getFullAXTree");
1092
- return { nodes: formatAriaNodes(Array.isArray(res?.nodes) ? res.nodes : [], limit), untrusted: true };
1114
+ return {
1115
+ nodes: formatAriaNodes(Array.isArray(res?.nodes) ? res.nodes : [], limit),
1116
+ untrusted: true,
1117
+ contentMeta: {
1118
+ sourceUrl,
1119
+ contentType: "browser-aria-tree",
1120
+ capturedAt: (/* @__PURE__ */ new Date()).toISOString()
1121
+ }
1122
+ };
1093
1123
  } finally {
1094
1124
  await session.detach().catch(() => {
1095
1125
  });
@@ -1280,7 +1310,7 @@ async function armDialogViaPlaywright(opts) {
1280
1310
  const page = await getPageForTargetId({ cdpUrl: opts.cdpUrl, targetId: opts.targetId });
1281
1311
  ensurePageState(page);
1282
1312
  const timeout = normalizeTimeoutMs(opts.timeoutMs, 3e4, 12e4);
1283
- return new Promise((resolve, reject) => {
1313
+ return new Promise((resolve2, reject) => {
1284
1314
  const timer = setTimeout(() => {
1285
1315
  page.removeListener("dialog", handler);
1286
1316
  reject(new Error(`No dialog appeared within ${timeout}ms`));
@@ -1293,7 +1323,7 @@ async function armDialogViaPlaywright(opts) {
1293
1323
  } else {
1294
1324
  await dialog.dismiss();
1295
1325
  }
1296
- resolve();
1326
+ resolve2();
1297
1327
  } catch (err) {
1298
1328
  reject(err);
1299
1329
  }
@@ -1305,7 +1335,7 @@ async function armFileUploadViaPlaywright(opts) {
1305
1335
  const page = await getPageForTargetId({ cdpUrl: opts.cdpUrl, targetId: opts.targetId });
1306
1336
  ensurePageState(page);
1307
1337
  const timeout = normalizeTimeoutMs(opts.timeoutMs, 3e4, 12e4);
1308
- return new Promise((resolve, reject) => {
1338
+ return new Promise((resolve2, reject) => {
1309
1339
  const timer = setTimeout(() => {
1310
1340
  page.removeListener("filechooser", handler);
1311
1341
  reject(new Error(`No file chooser appeared within ${timeout}ms`));
@@ -1314,7 +1344,7 @@ async function armFileUploadViaPlaywright(opts) {
1314
1344
  clearTimeout(timer);
1315
1345
  try {
1316
1346
  await fc.setFiles(opts.paths ?? []);
1317
- resolve();
1347
+ resolve2();
1318
1348
  } catch (err) {
1319
1349
  reject(err);
1320
1350
  }
@@ -1331,11 +1361,82 @@ async function pressKeyViaPlaywright(opts) {
1331
1361
  ensurePageState(page);
1332
1362
  await page.keyboard.press(key, { delay: Math.max(0, Math.floor(opts.delayMs ?? 0)) });
1333
1363
  }
1364
+ function assertSafeOutputPath(path2, allowedRoots) {
1365
+ if (!path2 || typeof path2 !== "string") {
1366
+ throw new Error("Output path is required.");
1367
+ }
1368
+ const normalized = path.normalize(path2);
1369
+ if (normalized.includes("..")) {
1370
+ throw new Error(`Unsafe output path: directory traversal detected in "${path2}".`);
1371
+ }
1372
+ if (allowedRoots?.length) {
1373
+ const resolved = path.resolve(normalized);
1374
+ const withinRoot = allowedRoots.some((root) => {
1375
+ const normalizedRoot = path.resolve(root);
1376
+ return resolved === normalizedRoot || resolved.startsWith(normalizedRoot + path.sep);
1377
+ });
1378
+ if (!withinRoot) {
1379
+ throw new Error(`Unsafe output path: "${path2}" is outside allowed directories.`);
1380
+ }
1381
+ }
1382
+ }
1383
+ function isInternalIP(ip) {
1384
+ if (/^127\./.test(ip)) return true;
1385
+ if (/^10\./.test(ip)) return true;
1386
+ if (/^172\.(1[6-9]|2\d|3[01])\./.test(ip)) return true;
1387
+ if (/^192\.168\./.test(ip)) return true;
1388
+ if (/^169\.254\./.test(ip)) return true;
1389
+ if (/^100\.(6[4-9]|[7-9]\d|1[01]\d|12[0-7])\./.test(ip)) return true;
1390
+ if (ip === "0.0.0.0") return true;
1391
+ const lower = ip.toLowerCase();
1392
+ if (lower === "::1") return true;
1393
+ if (lower.startsWith("fe80:")) return true;
1394
+ if (lower.startsWith("fc") || lower.startsWith("fd")) return true;
1395
+ if (lower.startsWith("::ffff:")) {
1396
+ const v4 = lower.replace(/^::ffff:/, "");
1397
+ return isInternalIP(v4);
1398
+ }
1399
+ return false;
1400
+ }
1401
+ function isInternalUrl(url) {
1402
+ let parsed;
1403
+ try {
1404
+ parsed = new URL(url);
1405
+ } catch {
1406
+ return true;
1407
+ }
1408
+ const hostname = parsed.hostname.toLowerCase();
1409
+ if (hostname === "localhost") return true;
1410
+ if (isInternalIP(hostname)) return true;
1411
+ if (hostname.endsWith(".local") || hostname.endsWith(".internal") || hostname.endsWith(".localhost")) {
1412
+ return true;
1413
+ }
1414
+ return false;
1415
+ }
1416
+ async function isInternalUrlResolved(url) {
1417
+ if (isInternalUrl(url)) return true;
1418
+ let parsed;
1419
+ try {
1420
+ parsed = new URL(url);
1421
+ } catch {
1422
+ return true;
1423
+ }
1424
+ try {
1425
+ const { address } = await promises.lookup(parsed.hostname);
1426
+ if (isInternalIP(address)) return true;
1427
+ } catch {
1428
+ return true;
1429
+ }
1430
+ return false;
1431
+ }
1334
1432
 
1335
1433
  // src/actions/navigation.ts
1336
1434
  async function navigateViaPlaywright(opts) {
1337
1435
  const url = String(opts.url ?? "").trim();
1338
1436
  if (!url) throw new Error("url is required");
1437
+ if (!opts.allowInternal && await isInternalUrlResolved(url)) {
1438
+ throw new Error(`Navigation to internal/loopback address blocked: "${url}". Set allowInternal: true if this is intentional.`);
1439
+ }
1339
1440
  const page = await getPageForTargetId({ cdpUrl: opts.cdpUrl, targetId: opts.targetId });
1340
1441
  ensurePageState(page);
1341
1442
  await page.goto(url, { timeout: normalizeTimeoutMs(opts.timeoutMs, 2e4) });
@@ -1357,11 +1458,14 @@ async function listPagesViaPlaywright(opts) {
1357
1458
  return results;
1358
1459
  }
1359
1460
  async function createPageViaPlaywright(opts) {
1461
+ const targetUrl = (opts.url ?? "").trim() || "about:blank";
1462
+ if (targetUrl !== "about:blank" && !opts.allowInternal && await isInternalUrlResolved(targetUrl)) {
1463
+ throw new Error(`Navigation to internal/loopback address blocked: "${targetUrl}". Set allowInternal: true if this is intentional.`);
1464
+ }
1360
1465
  const { browser } = await connectBrowser(opts.cdpUrl);
1361
1466
  const context = browser.contexts()[0] ?? await browser.newContext();
1362
1467
  const page = await context.newPage();
1363
1468
  ensurePageState(page);
1364
- const targetUrl = (opts.url ?? "").trim() || "about:blank";
1365
1469
  if (targetUrl !== "about:blank") {
1366
1470
  await page.goto(targetUrl, { timeout: normalizeTimeoutMs(void 0, 2e4) });
1367
1471
  }
@@ -1507,6 +1611,7 @@ async function evaluateViaPlaywright(opts) {
1507
1611
 
1508
1612
  // src/actions/download.ts
1509
1613
  async function downloadViaPlaywright(opts) {
1614
+ assertSafeOutputPath(opts.path, opts.allowedOutputRoots);
1510
1615
  const page = await getPageForTargetId({ cdpUrl: opts.cdpUrl, targetId: opts.targetId });
1511
1616
  ensurePageState(page);
1512
1617
  restoreRoleRefsForTarget({ cdpUrl: opts.cdpUrl, targetId: opts.targetId, page });
@@ -1533,6 +1638,7 @@ async function waitForDownloadViaPlaywright(opts) {
1533
1638
  const timeout = normalizeTimeoutMs(opts.timeoutMs, 3e4, 12e4);
1534
1639
  const download = await page.waitForEvent("download", { timeout });
1535
1640
  const savePath = opts.path ?? download.suggestedFilename();
1641
+ assertSafeOutputPath(savePath, opts.allowedOutputRoots);
1536
1642
  await download.saveAs(savePath);
1537
1643
  return {
1538
1644
  url: download.url(),
@@ -1716,6 +1822,7 @@ async function traceStartViaPlaywright(opts) {
1716
1822
  });
1717
1823
  }
1718
1824
  async function traceStopViaPlaywright(opts) {
1825
+ assertSafeOutputPath(opts.path, opts.allowedOutputRoots);
1719
1826
  const page = await getPageForTargetId({ cdpUrl: opts.cdpUrl, targetId: opts.targetId });
1720
1827
  ensurePageState(page);
1721
1828
  const context = page.context();
@@ -1861,10 +1968,12 @@ async function storageClearViaPlaywright(opts) {
1861
1968
  var CrawlPage = class {
1862
1969
  cdpUrl;
1863
1970
  targetId;
1971
+ allowInternal;
1864
1972
  /** @internal */
1865
- constructor(cdpUrl, targetId) {
1973
+ constructor(cdpUrl, targetId, allowInternal = false) {
1866
1974
  this.cdpUrl = cdpUrl;
1867
1975
  this.targetId = targetId;
1976
+ this.allowInternal = allowInternal;
1868
1977
  }
1869
1978
  /** The CDP target ID for this page. Use this to identify the page in multi-tab scenarios. */
1870
1979
  get id() {
@@ -2196,7 +2305,8 @@ var CrawlPage = class {
2196
2305
  cdpUrl: this.cdpUrl,
2197
2306
  targetId: this.targetId,
2198
2307
  url,
2199
- timeoutMs: opts?.timeoutMs
2308
+ timeoutMs: opts?.timeoutMs,
2309
+ allowInternal: this.allowInternal
2200
2310
  });
2201
2311
  }
2202
2312
  /**
@@ -2383,12 +2493,14 @@ var CrawlPage = class {
2383
2493
  * Stop recording a trace and save it to a file.
2384
2494
  *
2385
2495
  * @param path - File path to save the trace (e.g. `'trace.zip'`)
2496
+ * @param opts - Options (allowedOutputRoots: constrain output to specific directories)
2386
2497
  */
2387
- async traceStop(path2) {
2498
+ async traceStop(path2, opts) {
2388
2499
  return traceStopViaPlaywright({
2389
2500
  cdpUrl: this.cdpUrl,
2390
2501
  targetId: this.targetId,
2391
- path: path2
2502
+ path: path2,
2503
+ allowedOutputRoots: opts?.allowedOutputRoots
2392
2504
  });
2393
2505
  }
2394
2506
  /**
@@ -2576,7 +2688,8 @@ var CrawlPage = class {
2576
2688
  targetId: this.targetId,
2577
2689
  ref,
2578
2690
  path: path2,
2579
- timeoutMs: opts?.timeoutMs
2691
+ timeoutMs: opts?.timeoutMs,
2692
+ allowedOutputRoots: opts?.allowedOutputRoots
2580
2693
  });
2581
2694
  }
2582
2695
  /**
@@ -2592,7 +2705,8 @@ var CrawlPage = class {
2592
2705
  cdpUrl: this.cdpUrl,
2593
2706
  targetId: this.targetId,
2594
2707
  path: opts?.path,
2595
- timeoutMs: opts?.timeoutMs
2708
+ timeoutMs: opts?.timeoutMs,
2709
+ allowedOutputRoots: opts?.allowedOutputRoots
2596
2710
  });
2597
2711
  }
2598
2712
  // ── Emulation ───────────────────────────────────────────────
@@ -2722,10 +2836,12 @@ var CrawlPage = class {
2722
2836
  };
2723
2837
  var BrowserClaw = class _BrowserClaw {
2724
2838
  cdpUrl;
2839
+ allowInternal;
2725
2840
  chrome;
2726
- constructor(cdpUrl, chrome) {
2841
+ constructor(cdpUrl, chrome, allowInternal = false) {
2727
2842
  this.cdpUrl = cdpUrl;
2728
2843
  this.chrome = chrome;
2844
+ this.allowInternal = allowInternal;
2729
2845
  }
2730
2846
  /**
2731
2847
  * Launch a new Chrome instance and connect to it.
@@ -2753,7 +2869,7 @@ var BrowserClaw = class _BrowserClaw {
2753
2869
  static async launch(opts = {}) {
2754
2870
  const chrome = await launchChrome(opts);
2755
2871
  const cdpUrl = `http://127.0.0.1:${chrome.cdpPort}`;
2756
- return new _BrowserClaw(cdpUrl, chrome);
2872
+ return new _BrowserClaw(cdpUrl, chrome, opts.allowInternal);
2757
2873
  }
2758
2874
  /**
2759
2875
  * Connect to an already-running Chrome instance via its CDP endpoint.
@@ -2769,12 +2885,12 @@ var BrowserClaw = class _BrowserClaw {
2769
2885
  * const browser = await BrowserClaw.connect('http://localhost:9222');
2770
2886
  * ```
2771
2887
  */
2772
- static async connect(cdpUrl) {
2773
- if (!await isChromeReachable(cdpUrl, 3e3)) {
2888
+ static async connect(cdpUrl, opts) {
2889
+ if (!await isChromeReachable(cdpUrl, 3e3, opts?.authToken)) {
2774
2890
  throw new Error(`Cannot connect to Chrome at ${cdpUrl}. Is Chrome running with --remote-debugging-port?`);
2775
2891
  }
2776
- await connectBrowser(cdpUrl);
2777
- return new _BrowserClaw(cdpUrl, null);
2892
+ await connectBrowser(cdpUrl, opts?.authToken);
2893
+ return new _BrowserClaw(cdpUrl, null, opts?.allowInternal);
2778
2894
  }
2779
2895
  /**
2780
2896
  * Open a URL in a new tab and return the page handle.
@@ -2789,8 +2905,8 @@ var BrowserClaw = class _BrowserClaw {
2789
2905
  * ```
2790
2906
  */
2791
2907
  async open(url) {
2792
- const tab = await createPageViaPlaywright({ cdpUrl: this.cdpUrl, url });
2793
- return new CrawlPage(this.cdpUrl, tab.targetId);
2908
+ const tab = await createPageViaPlaywright({ cdpUrl: this.cdpUrl, url, allowInternal: this.allowInternal });
2909
+ return new CrawlPage(this.cdpUrl, tab.targetId, this.allowInternal);
2794
2910
  }
2795
2911
  /**
2796
2912
  * Get a CrawlPage handle for the currently active tab.
@@ -2803,7 +2919,7 @@ var BrowserClaw = class _BrowserClaw {
2803
2919
  if (!pages.length) throw new Error("No pages available. Use browser.open(url) to create a tab.");
2804
2920
  const tid = await pageTargetId(pages[0]).catch(() => null);
2805
2921
  if (!tid) throw new Error("Failed to get targetId for the current page.");
2806
- return new CrawlPage(this.cdpUrl, tid);
2922
+ return new CrawlPage(this.cdpUrl, tid, this.allowInternal);
2807
2923
  }
2808
2924
  /**
2809
2925
  * List all open tabs.
@@ -2838,7 +2954,7 @@ var BrowserClaw = class _BrowserClaw {
2838
2954
  * @returns CrawlPage for the specified tab
2839
2955
  */
2840
2956
  page(targetId) {
2841
- return new CrawlPage(this.cdpUrl, targetId);
2957
+ return new CrawlPage(this.cdpUrl, targetId, this.allowInternal);
2842
2958
  }
2843
2959
  /** The CDP endpoint URL for this browser connection. */
2844
2960
  get url() {