browserclaw 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1295,7 +1295,20 @@ async function fetchChromeVersion(cdpUrl, timeoutMs = 500, authToken) {
1295
1295
  async function isChromeReachable(cdpUrl, timeoutMs = 500, authToken) {
1296
1296
  if (isWebSocketUrl(cdpUrl)) return await canOpenWebSocket(cdpUrl, timeoutMs);
1297
1297
  const version = await fetchChromeVersion(cdpUrl, timeoutMs, authToken);
1298
- return Boolean(version);
1298
+ if (version !== null) return true;
1299
+ let isLoopback = false;
1300
+ try {
1301
+ const u = new URL(cdpUrl.startsWith("http") ? cdpUrl : `http://${cdpUrl}`);
1302
+ isLoopback = isLoopbackHost(u.hostname);
1303
+ } catch {
1304
+ }
1305
+ if (!isLoopback) return false;
1306
+ for (let i = 0; i < 2; i++) {
1307
+ await new Promise((r) => setTimeout(r, 150));
1308
+ const retry = await fetchChromeVersion(cdpUrl, timeoutMs, authToken);
1309
+ if (retry !== null) return true;
1310
+ }
1311
+ return false;
1299
1312
  }
1300
1313
  async function getChromeWebSocketUrl(cdpUrl, timeoutMs = 500, authToken) {
1301
1314
  if (isWebSocketUrl(cdpUrl)) return cdpUrl;
@@ -1487,6 +1500,198 @@ async function stopChrome(running, timeoutMs = 2500) {
1487
1500
  }
1488
1501
  }
1489
1502
 
1503
+ // src/stealth.ts
1504
+ var STEALTH_SCRIPT = `(function() {
1505
+ 'use strict';
1506
+ function p(fn) { try { fn(); } catch(_) {} }
1507
+
1508
+ // \u2500\u2500 1. navigator.webdriver \u2192 undefined \u2500\u2500
1509
+ p(function() {
1510
+ Object.defineProperty(navigator, 'webdriver', { get: function() { return undefined; }, configurable: true });
1511
+ });
1512
+
1513
+ // \u2500\u2500 2. navigator.plugins + mimeTypes (only if empty \u2014 Chrome 92+ populates them natively) \u2500\u2500
1514
+ p(function() {
1515
+ if (navigator.plugins && navigator.plugins.length > 0) return;
1516
+
1517
+ function FakePlugin(name, fn, desc, mimes) {
1518
+ this.name = name; this.filename = fn; this.description = desc; this.length = mimes.length;
1519
+ for (var i = 0; i < mimes.length; i++) { this[i] = mimes[i]; mimes[i].enabledPlugin = this; }
1520
+ }
1521
+ FakePlugin.prototype.item = function(i) { return this[i] || null; };
1522
+ FakePlugin.prototype.namedItem = function(n) {
1523
+ for (var i = 0; i < this.length; i++) if (this[i].type === n) return this[i];
1524
+ return null;
1525
+ };
1526
+
1527
+ function M(type, suf, desc) { this.type = type; this.suffixes = suf; this.description = desc; }
1528
+
1529
+ var m1 = new M('application/pdf', 'pdf', 'Portable Document Format');
1530
+ var m2 = new M('application/x-google-chrome-pdf', 'pdf', 'Portable Document Format');
1531
+ var m3 = new M('application/x-nacl', '', 'Native Client Executable');
1532
+ var m4 = new M('application/x-pnacl', '', 'Portable Native Client Executable');
1533
+
1534
+ var plugins = [
1535
+ new FakePlugin('Chrome PDF Plugin', 'internal-pdf-viewer', 'Portable Document Format', [m1]),
1536
+ new FakePlugin('Chrome PDF Viewer', 'mhjfbmdgcfjbbpaeojofohoefgiehjai', '', [m2]),
1537
+ new FakePlugin('Native Client', 'internal-nacl-plugin', '', [m3, m4]),
1538
+ ];
1539
+
1540
+ function makeIterable(arr, items) {
1541
+ arr.length = items.length;
1542
+ for (var i = 0; i < items.length; i++) arr[i] = items[i];
1543
+ arr[Symbol.iterator] = function() {
1544
+ var idx = 0;
1545
+ return { next: function() {
1546
+ return idx < items.length ? { value: items[idx++], done: false } : { done: true };
1547
+ }};
1548
+ };
1549
+ }
1550
+
1551
+ var pa = { item: function(i) { return plugins[i] || null; },
1552
+ namedItem: function(n) { for (var i = 0; i < plugins.length; i++) if (plugins[i].name === n) return plugins[i]; return null; },
1553
+ refresh: function() {} };
1554
+ makeIterable(pa, plugins);
1555
+ Object.defineProperty(navigator, 'plugins', { get: function() { return pa; } });
1556
+
1557
+ var allMimes = [m1, m2, m3, m4];
1558
+ var ma = { item: function(i) { return allMimes[i] || null; },
1559
+ namedItem: function(n) { for (var i = 0; i < allMimes.length; i++) if (allMimes[i].type === n) return allMimes[i]; return null; } };
1560
+ makeIterable(ma, allMimes);
1561
+ Object.defineProperty(navigator, 'mimeTypes', { get: function() { return ma; } });
1562
+ });
1563
+
1564
+ // \u2500\u2500 3. navigator.languages (cached + frozen so identity check passes) \u2500\u2500
1565
+ p(function() {
1566
+ if (!navigator.languages || navigator.languages.length === 0) {
1567
+ var langs = Object.freeze(['en-US', 'en']);
1568
+ Object.defineProperty(navigator, 'languages', { get: function() { return langs; } });
1569
+ }
1570
+ });
1571
+
1572
+ // \u2500\u2500 4. window.chrome \u2500\u2500
1573
+ p(function() {
1574
+ if (window.chrome && window.chrome.runtime && window.chrome.runtime.connect) return;
1575
+
1576
+ var chrome = window.chrome || {};
1577
+ var noop = function() {};
1578
+ var evtStub = { addListener: noop, removeListener: noop, hasListeners: function() { return false; } };
1579
+ chrome.runtime = chrome.runtime || {};
1580
+ chrome.runtime.onMessage = chrome.runtime.onMessage || evtStub;
1581
+ chrome.runtime.onConnect = chrome.runtime.onConnect || evtStub;
1582
+ chrome.runtime.sendMessage = chrome.runtime.sendMessage || noop;
1583
+ chrome.runtime.connect = chrome.runtime.connect || function() {
1584
+ return { onMessage: { addListener: noop }, postMessage: noop, disconnect: noop };
1585
+ };
1586
+ if (chrome.runtime.id === undefined) chrome.runtime.id = undefined;
1587
+ if (!chrome.loadTimes) chrome.loadTimes = function() { return {}; };
1588
+ if (!chrome.csi) chrome.csi = function() { return {}; };
1589
+ if (!chrome.app) {
1590
+ chrome.app = {
1591
+ isInstalled: false,
1592
+ InstallState: { INSTALLED: 'installed', NOT_INSTALLED: 'not_installed', DISABLED: 'disabled' },
1593
+ RunningState: { CANNOT_RUN: 'cannot_run', READY_TO_RUN: 'ready_to_run', RUNNING: 'running' },
1594
+ getDetails: function() { return null; },
1595
+ getIsInstalled: function() { return false; },
1596
+ runningState: function() { return 'cannot_run'; },
1597
+ };
1598
+ }
1599
+
1600
+ if (!window.chrome) {
1601
+ Object.defineProperty(window, 'chrome', { value: chrome, writable: false, enumerable: true, configurable: false });
1602
+ }
1603
+ });
1604
+
1605
+ // \u2500\u2500 5. Permissions API consistency \u2500\u2500
1606
+ p(function() {
1607
+ var orig = navigator.permissions.query.bind(navigator.permissions);
1608
+ function q(params) {
1609
+ if (params.name === 'notifications') {
1610
+ return Promise.resolve({
1611
+ state: typeof Notification !== 'undefined' ? Notification.permission : 'prompt',
1612
+ name: 'notifications', onchange: null,
1613
+ addEventListener: function(){}, removeEventListener: function(){}, dispatchEvent: function(){ return true; },
1614
+ });
1615
+ }
1616
+ return orig(params);
1617
+ }
1618
+ q.toString = function() { return 'function query() { [native code] }'; };
1619
+ navigator.permissions.query = q;
1620
+ });
1621
+
1622
+ // \u2500\u2500 6. WebGL vendor / renderer \u2500\u2500
1623
+ p(function() {
1624
+ var h = {
1625
+ apply: function(target, self, args) {
1626
+ var param = args[0];
1627
+ if (param === 0x9245) return 'Intel Inc.';
1628
+ if (param === 0x9246) return 'Intel Iris OpenGL Engine';
1629
+ return Reflect.apply(target, self, args);
1630
+ }
1631
+ };
1632
+ if (typeof WebGLRenderingContext !== 'undefined')
1633
+ WebGLRenderingContext.prototype.getParameter = new Proxy(WebGLRenderingContext.prototype.getParameter, h);
1634
+ if (typeof WebGL2RenderingContext !== 'undefined')
1635
+ WebGL2RenderingContext.prototype.getParameter = new Proxy(WebGL2RenderingContext.prototype.getParameter, h);
1636
+ });
1637
+
1638
+ // \u2500\u2500 7. Notification.permission \u2500\u2500
1639
+ p(function() {
1640
+ if (typeof Notification !== 'undefined' && Notification.permission === 'denied') {
1641
+ Object.defineProperty(Notification, 'permission', { get: function() { return 'default'; }, configurable: true });
1642
+ }
1643
+ });
1644
+
1645
+ // \u2500\u2500 8. navigator.connection (cached so identity check passes) \u2500\u2500
1646
+ p(function() {
1647
+ if (navigator.connection) return;
1648
+ var conn = {
1649
+ effectiveType: '4g', rtt: 50, downlink: 10, saveData: false, onchange: null,
1650
+ addEventListener: function(){}, removeEventListener: function(){}, dispatchEvent: function(){ return true; },
1651
+ };
1652
+ Object.defineProperty(navigator, 'connection', { get: function() { return conn; } });
1653
+ });
1654
+
1655
+ // \u2500\u2500 9. Iframe contentWindow.chrome \u2500\u2500
1656
+ // Handled by patch 4 \u2014 chrome object is now on window, propagates to iframes on same origin.
1657
+
1658
+ // \u2500\u2500 10. console method toString \u2500\u2500
1659
+ p(function() {
1660
+ ['log','info','warn','error','debug','table','trace'].forEach(function(n) {
1661
+ if (console[n]) {
1662
+ console[n].toString = function() { return 'function ' + n + '() { [native code] }'; };
1663
+ }
1664
+ });
1665
+ });
1666
+
1667
+ // \u2500\u2500 11. Headless-mode window / screen fixes \u2500\u2500
1668
+ p(function() {
1669
+ if (window.outerWidth === 0)
1670
+ Object.defineProperty(window, 'outerWidth', { get: function() { return window.innerWidth || 1920; } });
1671
+ if (window.outerHeight === 0)
1672
+ Object.defineProperty(window, 'outerHeight', { get: function() { return (window.innerHeight || 1080) + 85; } });
1673
+ });
1674
+
1675
+ p(function() {
1676
+ if (screen.colorDepth === 0) {
1677
+ Object.defineProperty(screen, 'colorDepth', { get: function() { return 24; } });
1678
+ Object.defineProperty(screen, 'pixelDepth', { get: function() { return 24; } });
1679
+ }
1680
+ });
1681
+
1682
+ // \u2500\u2500 12. navigator.hardwareConcurrency \u2500\u2500
1683
+ p(function() {
1684
+ if (!navigator.hardwareConcurrency)
1685
+ Object.defineProperty(navigator, 'hardwareConcurrency', { get: function() { return 4; } });
1686
+ });
1687
+
1688
+ // \u2500\u2500 13. navigator.deviceMemory \u2500\u2500
1689
+ p(function() {
1690
+ if (!navigator.deviceMemory)
1691
+ Object.defineProperty(navigator, 'deviceMemory', { get: function() { return 8; } });
1692
+ });
1693
+ })()`;
1694
+
1490
1695
  // src/connection.ts
1491
1696
  var BrowserTabNotFoundError = class extends Error {
1492
1697
  constructor(message = "Tab not found") {
@@ -1727,7 +1932,6 @@ function ensurePageState(page) {
1727
1932
  }
1728
1933
  return state;
1729
1934
  }
1730
- var STEALTH_SCRIPT = `Object.defineProperty(navigator, 'webdriver', { get: () => undefined })`;
1731
1935
  function applyStealthToPage(page) {
1732
1936
  page.evaluate(STEALTH_SCRIPT).catch((e) => {
1733
1937
  if (process.env.DEBUG !== void 0 && process.env.DEBUG !== "")
@@ -3547,6 +3751,99 @@ async function setTimezoneViaPlaywright(opts) {
3547
3751
  });
3548
3752
  }
3549
3753
 
3754
+ // src/anti-bot.ts
3755
+ var DETECT_CHALLENGE_SCRIPT = `(function() {
3756
+ var title = (document.title || '').toLowerCase();
3757
+
3758
+ // Cloudflare JS challenge
3759
+ if (title === 'just a moment...'
3760
+ || document.querySelector('#challenge-running, #cf-please-wait, #challenge-form')
3761
+ || title.indexOf('checking your browser') !== -1) {
3762
+ return { kind: 'cloudflare-js', message: 'Cloudflare JS challenge' };
3763
+ }
3764
+
3765
+ // Cloudflare block page (needs body text \u2014 read lazily)
3766
+ var body = null;
3767
+ function getBody() { if (body === null) body = (document.body && document.body.textContent) || ''; return body; }
3768
+
3769
+ if (title.indexOf('attention required') !== -1
3770
+ || (document.querySelector('.cf-error-details') && getBody().indexOf('blocked') !== -1)) {
3771
+ return { kind: 'cloudflare-block', message: 'Cloudflare block page' };
3772
+ }
3773
+
3774
+ // Cloudflare Turnstile
3775
+ if (document.querySelector('.cf-turnstile, iframe[src*="challenges.cloudflare.com"]')) {
3776
+ return { kind: 'cloudflare-turnstile', message: 'Cloudflare Turnstile challenge' };
3777
+ }
3778
+
3779
+ // hCaptcha
3780
+ if (document.querySelector('.h-captcha, iframe[src*="hcaptcha.com"]')) {
3781
+ return { kind: 'hcaptcha', message: 'hCaptcha challenge' };
3782
+ }
3783
+
3784
+ // reCAPTCHA
3785
+ if (document.querySelector('.g-recaptcha, iframe[src*="google.com/recaptcha"]')) {
3786
+ return { kind: 'recaptcha', message: 'reCAPTCHA challenge' };
3787
+ }
3788
+
3789
+ // Generic access-denied / rate-limit pages (only read body for short pages)
3790
+ var b = getBody();
3791
+ if (b.length < 5000) {
3792
+ if (/access denied|403 forbidden/i.test(title) || /access denied/i.test(b)) {
3793
+ return { kind: 'blocked', message: 'Access denied' };
3794
+ }
3795
+ if (/\\b429\\b/i.test(title) || /too many requests|rate limit/i.test(b)) {
3796
+ return { kind: 'rate-limited', message: 'Rate limited' };
3797
+ }
3798
+ }
3799
+
3800
+ return null;
3801
+ })()`;
3802
+ function parseChallengeResult(raw) {
3803
+ if (raw !== null && typeof raw === "object" && "kind" in raw) {
3804
+ return raw;
3805
+ }
3806
+ return null;
3807
+ }
3808
+ async function detectChallengeViaPlaywright(opts) {
3809
+ const page = await getPageForTargetId({ cdpUrl: opts.cdpUrl, targetId: opts.targetId });
3810
+ ensurePageState(page);
3811
+ return parseChallengeResult(await page.evaluate(DETECT_CHALLENGE_SCRIPT));
3812
+ }
3813
+ async function waitForChallengeViaPlaywright(opts) {
3814
+ const page = await getPageForTargetId({ cdpUrl: opts.cdpUrl, targetId: opts.targetId });
3815
+ ensurePageState(page);
3816
+ const timeout = normalizeTimeoutMs(opts.timeoutMs, 15e3);
3817
+ const poll = Math.max(250, Math.min(5e3, opts.pollMs ?? 500));
3818
+ const detect = async () => parseChallengeResult(await page.evaluate(DETECT_CHALLENGE_SCRIPT));
3819
+ const initial = await detect();
3820
+ if (initial === null) return { resolved: true, challenge: null };
3821
+ if (initial.kind === "cloudflare-js") {
3822
+ try {
3823
+ await page.waitForFunction(
3824
+ "document.title.toLowerCase() !== 'just a moment...' && !document.querySelector('#challenge-running')",
3825
+ void 0,
3826
+ { timeout }
3827
+ );
3828
+ await page.waitForLoadState("domcontentloaded", { timeout: 5e3 }).catch(() => {
3829
+ });
3830
+ const after = await detect();
3831
+ return { resolved: after === null, challenge: after };
3832
+ } catch {
3833
+ const after = await detect();
3834
+ return { resolved: after === null, challenge: after };
3835
+ }
3836
+ }
3837
+ const deadline = Date.now() + timeout;
3838
+ while (Date.now() < deadline) {
3839
+ await page.waitForTimeout(poll);
3840
+ const current = await detect();
3841
+ if (current === null) return { resolved: true, challenge: null };
3842
+ }
3843
+ const final = await detect();
3844
+ return { resolved: final === null, challenge: final };
3845
+ }
3846
+
3550
3847
  // src/capture/activity.ts
3551
3848
  function consolePriority(level) {
3552
3849
  switch (level) {
@@ -5151,6 +5448,53 @@ var CrawlPage = class {
5151
5448
  name
5152
5449
  });
5153
5450
  }
5451
+ // ── Anti-Bot ──────────────────────────────────────────────────
5452
+ /**
5453
+ * Detect whether the page is showing an anti-bot challenge
5454
+ * (Cloudflare, hCaptcha, reCAPTCHA, access-denied, rate-limit, etc.).
5455
+ *
5456
+ * Returns `null` if no challenge is detected.
5457
+ *
5458
+ * @example
5459
+ * ```ts
5460
+ * const challenge = await page.detectChallenge();
5461
+ * if (challenge) {
5462
+ * console.log(challenge.kind); // 'cloudflare-js'
5463
+ * console.log(challenge.message); // 'Cloudflare JS challenge'
5464
+ * }
5465
+ * ```
5466
+ */
5467
+ async detectChallenge() {
5468
+ return detectChallengeViaPlaywright({ cdpUrl: this.cdpUrl, targetId: this.targetId });
5469
+ }
5470
+ /**
5471
+ * Wait for an anti-bot challenge to resolve on its own.
5472
+ *
5473
+ * Cloudflare JS challenges typically auto-resolve in ~5 seconds.
5474
+ * CAPTCHA challenges will only resolve if solved in a visible browser window.
5475
+ *
5476
+ * @param opts.timeoutMs - Maximum wait time (default: `15000`)
5477
+ * @param opts.pollMs - Poll interval (default: `500`)
5478
+ * @returns Whether the challenge resolved, and the remaining challenge info if not
5479
+ *
5480
+ * @example
5481
+ * ```ts
5482
+ * await page.goto('https://example.com');
5483
+ * const challenge = await page.detectChallenge();
5484
+ * if (challenge?.kind === 'cloudflare-js') {
5485
+ * const { resolved } = await page.waitForChallenge({ timeoutMs: 20000 });
5486
+ * if (!resolved) throw new Error('Challenge did not resolve');
5487
+ * }
5488
+ * ```
5489
+ */
5490
+ async waitForChallenge(opts) {
5491
+ return waitForChallengeViaPlaywright({
5492
+ cdpUrl: this.cdpUrl,
5493
+ targetId: this.targetId,
5494
+ timeoutMs: opts?.timeoutMs,
5495
+ pollMs: opts?.pollMs
5496
+ });
5497
+ }
5154
5498
  };
5155
5499
  var BrowserClaw = class _BrowserClaw {
5156
5500
  cdpUrl;
@@ -5300,12 +5644,14 @@ exports.BrowserClaw = BrowserClaw;
5300
5644
  exports.BrowserTabNotFoundError = BrowserTabNotFoundError;
5301
5645
  exports.CrawlPage = CrawlPage;
5302
5646
  exports.InvalidBrowserNavigationUrlError = InvalidBrowserNavigationUrlError;
5647
+ exports.STEALTH_SCRIPT = STEALTH_SCRIPT;
5303
5648
  exports.assertBrowserNavigationAllowed = assertBrowserNavigationAllowed;
5304
5649
  exports.assertBrowserNavigationRedirectChainAllowed = assertBrowserNavigationRedirectChainAllowed;
5305
5650
  exports.assertBrowserNavigationResultAllowed = assertBrowserNavigationResultAllowed;
5306
5651
  exports.assertSafeUploadPaths = assertSafeUploadPaths;
5307
5652
  exports.batchViaPlaywright = batchViaPlaywright;
5308
5653
  exports.createPinnedLookup = createPinnedLookup;
5654
+ exports.detectChallengeViaPlaywright = detectChallengeViaPlaywright;
5309
5655
  exports.ensureContextState = ensureContextState;
5310
5656
  exports.executeSingleAction = executeSingleAction;
5311
5657
  exports.forceDisconnectPlaywrightForTarget = forceDisconnectPlaywrightForTarget;
@@ -5324,6 +5670,7 @@ exports.resolvePageByTargetIdOrThrow = resolvePageByTargetIdOrThrow;
5324
5670
  exports.resolvePinnedHostnameWithPolicy = resolvePinnedHostnameWithPolicy;
5325
5671
  exports.resolveStrictExistingUploadPaths = resolveStrictExistingUploadPaths;
5326
5672
  exports.sanitizeUntrustedFileName = sanitizeUntrustedFileName;
5673
+ exports.waitForChallengeViaPlaywright = waitForChallengeViaPlaywright;
5327
5674
  exports.withBrowserNavigationPolicy = withBrowserNavigationPolicy;
5328
5675
  exports.withPageScopedCdpClient = withPageScopedCdpClient;
5329
5676
  exports.withPlaywrightPageCdpSession = withPlaywrightPageCdpSession;