browserclaw 0.6.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1295,7 +1295,7 @@ async function fetchChromeVersion(cdpUrl, timeoutMs = 500, authToken) {
1295
1295
  async function isChromeReachable(cdpUrl, timeoutMs = 500, authToken) {
1296
1296
  if (isWebSocketUrl(cdpUrl)) return await canOpenWebSocket(cdpUrl, timeoutMs);
1297
1297
  const version = await fetchChromeVersion(cdpUrl, timeoutMs, authToken);
1298
- return Boolean(version);
1298
+ return version !== null;
1299
1299
  }
1300
1300
  async function getChromeWebSocketUrl(cdpUrl, timeoutMs = 500, authToken) {
1301
1301
  if (isWebSocketUrl(cdpUrl)) return cdpUrl;
@@ -1487,6 +1487,198 @@ async function stopChrome(running, timeoutMs = 2500) {
1487
1487
  }
1488
1488
  }
1489
1489
 
1490
+ // src/stealth.ts
1491
+ var STEALTH_SCRIPT = `(function() {
1492
+ 'use strict';
1493
+ function p(fn) { try { fn(); } catch(_) {} }
1494
+
1495
+ // \u2500\u2500 1. navigator.webdriver \u2192 undefined \u2500\u2500
1496
+ p(function() {
1497
+ Object.defineProperty(navigator, 'webdriver', { get: function() { return undefined; }, configurable: true });
1498
+ });
1499
+
1500
+ // \u2500\u2500 2. navigator.plugins + mimeTypes (only if empty \u2014 Chrome 92+ populates them natively) \u2500\u2500
1501
+ p(function() {
1502
+ if (navigator.plugins && navigator.plugins.length > 0) return;
1503
+
1504
+ function FakePlugin(name, fn, desc, mimes) {
1505
+ this.name = name; this.filename = fn; this.description = desc; this.length = mimes.length;
1506
+ for (var i = 0; i < mimes.length; i++) { this[i] = mimes[i]; mimes[i].enabledPlugin = this; }
1507
+ }
1508
+ FakePlugin.prototype.item = function(i) { return this[i] || null; };
1509
+ FakePlugin.prototype.namedItem = function(n) {
1510
+ for (var i = 0; i < this.length; i++) if (this[i].type === n) return this[i];
1511
+ return null;
1512
+ };
1513
+
1514
+ function M(type, suf, desc) { this.type = type; this.suffixes = suf; this.description = desc; }
1515
+
1516
+ var m1 = new M('application/pdf', 'pdf', 'Portable Document Format');
1517
+ var m2 = new M('application/x-google-chrome-pdf', 'pdf', 'Portable Document Format');
1518
+ var m3 = new M('application/x-nacl', '', 'Native Client Executable');
1519
+ var m4 = new M('application/x-pnacl', '', 'Portable Native Client Executable');
1520
+
1521
+ var plugins = [
1522
+ new FakePlugin('Chrome PDF Plugin', 'internal-pdf-viewer', 'Portable Document Format', [m1]),
1523
+ new FakePlugin('Chrome PDF Viewer', 'mhjfbmdgcfjbbpaeojofohoefgiehjai', '', [m2]),
1524
+ new FakePlugin('Native Client', 'internal-nacl-plugin', '', [m3, m4]),
1525
+ ];
1526
+
1527
+ function makeIterable(arr, items) {
1528
+ arr.length = items.length;
1529
+ for (var i = 0; i < items.length; i++) arr[i] = items[i];
1530
+ arr[Symbol.iterator] = function() {
1531
+ var idx = 0;
1532
+ return { next: function() {
1533
+ return idx < items.length ? { value: items[idx++], done: false } : { done: true };
1534
+ }};
1535
+ };
1536
+ }
1537
+
1538
+ var pa = { item: function(i) { return plugins[i] || null; },
1539
+ namedItem: function(n) { for (var i = 0; i < plugins.length; i++) if (plugins[i].name === n) return plugins[i]; return null; },
1540
+ refresh: function() {} };
1541
+ makeIterable(pa, plugins);
1542
+ Object.defineProperty(navigator, 'plugins', { get: function() { return pa; } });
1543
+
1544
+ var allMimes = [m1, m2, m3, m4];
1545
+ var ma = { item: function(i) { return allMimes[i] || null; },
1546
+ namedItem: function(n) { for (var i = 0; i < allMimes.length; i++) if (allMimes[i].type === n) return allMimes[i]; return null; } };
1547
+ makeIterable(ma, allMimes);
1548
+ Object.defineProperty(navigator, 'mimeTypes', { get: function() { return ma; } });
1549
+ });
1550
+
1551
+ // \u2500\u2500 3. navigator.languages (cached + frozen so identity check passes) \u2500\u2500
1552
+ p(function() {
1553
+ if (!navigator.languages || navigator.languages.length === 0) {
1554
+ var langs = Object.freeze(['en-US', 'en']);
1555
+ Object.defineProperty(navigator, 'languages', { get: function() { return langs; } });
1556
+ }
1557
+ });
1558
+
1559
+ // \u2500\u2500 4. window.chrome \u2500\u2500
1560
+ p(function() {
1561
+ if (window.chrome && window.chrome.runtime && window.chrome.runtime.connect) return;
1562
+
1563
+ var chrome = window.chrome || {};
1564
+ var noop = function() {};
1565
+ var evtStub = { addListener: noop, removeListener: noop, hasListeners: function() { return false; } };
1566
+ chrome.runtime = chrome.runtime || {};
1567
+ chrome.runtime.onMessage = chrome.runtime.onMessage || evtStub;
1568
+ chrome.runtime.onConnect = chrome.runtime.onConnect || evtStub;
1569
+ chrome.runtime.sendMessage = chrome.runtime.sendMessage || noop;
1570
+ chrome.runtime.connect = chrome.runtime.connect || function() {
1571
+ return { onMessage: { addListener: noop }, postMessage: noop, disconnect: noop };
1572
+ };
1573
+ if (chrome.runtime.id === undefined) chrome.runtime.id = undefined;
1574
+ if (!chrome.loadTimes) chrome.loadTimes = function() { return {}; };
1575
+ if (!chrome.csi) chrome.csi = function() { return {}; };
1576
+ if (!chrome.app) {
1577
+ chrome.app = {
1578
+ isInstalled: false,
1579
+ InstallState: { INSTALLED: 'installed', NOT_INSTALLED: 'not_installed', DISABLED: 'disabled' },
1580
+ RunningState: { CANNOT_RUN: 'cannot_run', READY_TO_RUN: 'ready_to_run', RUNNING: 'running' },
1581
+ getDetails: function() { return null; },
1582
+ getIsInstalled: function() { return false; },
1583
+ runningState: function() { return 'cannot_run'; },
1584
+ };
1585
+ }
1586
+
1587
+ if (!window.chrome) {
1588
+ Object.defineProperty(window, 'chrome', { value: chrome, writable: false, enumerable: true, configurable: false });
1589
+ }
1590
+ });
1591
+
1592
+ // \u2500\u2500 5. Permissions API consistency \u2500\u2500
1593
+ p(function() {
1594
+ var orig = navigator.permissions.query.bind(navigator.permissions);
1595
+ function q(params) {
1596
+ if (params.name === 'notifications') {
1597
+ return Promise.resolve({
1598
+ state: typeof Notification !== 'undefined' ? Notification.permission : 'prompt',
1599
+ name: 'notifications', onchange: null,
1600
+ addEventListener: function(){}, removeEventListener: function(){}, dispatchEvent: function(){ return true; },
1601
+ });
1602
+ }
1603
+ return orig(params);
1604
+ }
1605
+ q.toString = function() { return 'function query() { [native code] }'; };
1606
+ navigator.permissions.query = q;
1607
+ });
1608
+
1609
+ // \u2500\u2500 6. WebGL vendor / renderer \u2500\u2500
1610
+ p(function() {
1611
+ var h = {
1612
+ apply: function(target, self, args) {
1613
+ var param = args[0];
1614
+ if (param === 0x9245) return 'Intel Inc.';
1615
+ if (param === 0x9246) return 'Intel Iris OpenGL Engine';
1616
+ return Reflect.apply(target, self, args);
1617
+ }
1618
+ };
1619
+ if (typeof WebGLRenderingContext !== 'undefined')
1620
+ WebGLRenderingContext.prototype.getParameter = new Proxy(WebGLRenderingContext.prototype.getParameter, h);
1621
+ if (typeof WebGL2RenderingContext !== 'undefined')
1622
+ WebGL2RenderingContext.prototype.getParameter = new Proxy(WebGL2RenderingContext.prototype.getParameter, h);
1623
+ });
1624
+
1625
+ // \u2500\u2500 7. Notification.permission \u2500\u2500
1626
+ p(function() {
1627
+ if (typeof Notification !== 'undefined' && Notification.permission === 'denied') {
1628
+ Object.defineProperty(Notification, 'permission', { get: function() { return 'default'; }, configurable: true });
1629
+ }
1630
+ });
1631
+
1632
+ // \u2500\u2500 8. navigator.connection (cached so identity check passes) \u2500\u2500
1633
+ p(function() {
1634
+ if (navigator.connection) return;
1635
+ var conn = {
1636
+ effectiveType: '4g', rtt: 50, downlink: 10, saveData: false, onchange: null,
1637
+ addEventListener: function(){}, removeEventListener: function(){}, dispatchEvent: function(){ return true; },
1638
+ };
1639
+ Object.defineProperty(navigator, 'connection', { get: function() { return conn; } });
1640
+ });
1641
+
1642
+ // \u2500\u2500 9. Iframe contentWindow.chrome \u2500\u2500
1643
+ // Handled by patch 4 \u2014 chrome object is now on window, propagates to iframes on same origin.
1644
+
1645
+ // \u2500\u2500 10. console method toString \u2500\u2500
1646
+ p(function() {
1647
+ ['log','info','warn','error','debug','table','trace'].forEach(function(n) {
1648
+ if (console[n]) {
1649
+ console[n].toString = function() { return 'function ' + n + '() { [native code] }'; };
1650
+ }
1651
+ });
1652
+ });
1653
+
1654
+ // \u2500\u2500 11. Headless-mode window / screen fixes \u2500\u2500
1655
+ p(function() {
1656
+ if (window.outerWidth === 0)
1657
+ Object.defineProperty(window, 'outerWidth', { get: function() { return window.innerWidth || 1920; } });
1658
+ if (window.outerHeight === 0)
1659
+ Object.defineProperty(window, 'outerHeight', { get: function() { return (window.innerHeight || 1080) + 85; } });
1660
+ });
1661
+
1662
+ p(function() {
1663
+ if (screen.colorDepth === 0) {
1664
+ Object.defineProperty(screen, 'colorDepth', { get: function() { return 24; } });
1665
+ Object.defineProperty(screen, 'pixelDepth', { get: function() { return 24; } });
1666
+ }
1667
+ });
1668
+
1669
+ // \u2500\u2500 12. navigator.hardwareConcurrency \u2500\u2500
1670
+ p(function() {
1671
+ if (!navigator.hardwareConcurrency)
1672
+ Object.defineProperty(navigator, 'hardwareConcurrency', { get: function() { return 4; } });
1673
+ });
1674
+
1675
+ // \u2500\u2500 13. navigator.deviceMemory \u2500\u2500
1676
+ p(function() {
1677
+ if (!navigator.deviceMemory)
1678
+ Object.defineProperty(navigator, 'deviceMemory', { get: function() { return 8; } });
1679
+ });
1680
+ })()`;
1681
+
1490
1682
  // src/connection.ts
1491
1683
  var BrowserTabNotFoundError = class extends Error {
1492
1684
  constructor(message = "Tab not found") {
@@ -1727,7 +1919,6 @@ function ensurePageState(page) {
1727
1919
  }
1728
1920
  return state;
1729
1921
  }
1730
- var STEALTH_SCRIPT = `Object.defineProperty(navigator, 'webdriver', { get: () => undefined })`;
1731
1922
  function applyStealthToPage(page) {
1732
1923
  page.evaluate(STEALTH_SCRIPT).catch((e) => {
1733
1924
  if (process.env.DEBUG !== void 0 && process.env.DEBUG !== "")
@@ -1849,6 +2040,8 @@ async function disconnectBrowser() {
1849
2040
  }
1850
2041
  }
1851
2042
  for (const cur of cachedByCdpUrl.values()) {
2043
+ if (cur.onDisconnected && typeof cur.browser.off === "function")
2044
+ cur.browser.off("disconnected", cur.onDisconnected);
1852
2045
  await cur.browser.close().catch(() => {
1853
2046
  });
1854
2047
  }
@@ -3547,6 +3740,99 @@ async function setTimezoneViaPlaywright(opts) {
3547
3740
  });
3548
3741
  }
3549
3742
 
3743
+ // src/anti-bot.ts
3744
+ var DETECT_CHALLENGE_SCRIPT = `(function() {
3745
+ var title = (document.title || '').toLowerCase();
3746
+
3747
+ // Cloudflare JS challenge
3748
+ if (title === 'just a moment...'
3749
+ || document.querySelector('#challenge-running, #cf-please-wait, #challenge-form')
3750
+ || title.indexOf('checking your browser') !== -1) {
3751
+ return { kind: 'cloudflare-js', message: 'Cloudflare JS challenge' };
3752
+ }
3753
+
3754
+ // Cloudflare block page (needs body text \u2014 read lazily)
3755
+ var body = null;
3756
+ function getBody() { if (body === null) body = (document.body && document.body.textContent) || ''; return body; }
3757
+
3758
+ if (title.indexOf('attention required') !== -1
3759
+ || (document.querySelector('.cf-error-details') && getBody().indexOf('blocked') !== -1)) {
3760
+ return { kind: 'cloudflare-block', message: 'Cloudflare block page' };
3761
+ }
3762
+
3763
+ // Cloudflare Turnstile
3764
+ if (document.querySelector('.cf-turnstile, iframe[src*="challenges.cloudflare.com"]')) {
3765
+ return { kind: 'cloudflare-turnstile', message: 'Cloudflare Turnstile challenge' };
3766
+ }
3767
+
3768
+ // hCaptcha
3769
+ if (document.querySelector('.h-captcha, iframe[src*="hcaptcha.com"]')) {
3770
+ return { kind: 'hcaptcha', message: 'hCaptcha challenge' };
3771
+ }
3772
+
3773
+ // reCAPTCHA
3774
+ if (document.querySelector('.g-recaptcha, iframe[src*="google.com/recaptcha"]')) {
3775
+ return { kind: 'recaptcha', message: 'reCAPTCHA challenge' };
3776
+ }
3777
+
3778
+ // Generic access-denied / rate-limit pages (only read body for short pages)
3779
+ var b = getBody();
3780
+ if (b.length < 5000) {
3781
+ if (/access denied|403 forbidden/i.test(title) || /access denied/i.test(b)) {
3782
+ return { kind: 'blocked', message: 'Access denied' };
3783
+ }
3784
+ if (/\\b429\\b/i.test(title) || /too many requests|rate limit/i.test(b)) {
3785
+ return { kind: 'rate-limited', message: 'Rate limited' };
3786
+ }
3787
+ }
3788
+
3789
+ return null;
3790
+ })()`;
3791
+ function parseChallengeResult(raw) {
3792
+ if (raw !== null && typeof raw === "object" && "kind" in raw) {
3793
+ return raw;
3794
+ }
3795
+ return null;
3796
+ }
3797
+ async function detectChallengeViaPlaywright(opts) {
3798
+ const page = await getPageForTargetId({ cdpUrl: opts.cdpUrl, targetId: opts.targetId });
3799
+ ensurePageState(page);
3800
+ return parseChallengeResult(await page.evaluate(DETECT_CHALLENGE_SCRIPT));
3801
+ }
3802
+ async function waitForChallengeViaPlaywright(opts) {
3803
+ const page = await getPageForTargetId({ cdpUrl: opts.cdpUrl, targetId: opts.targetId });
3804
+ ensurePageState(page);
3805
+ const timeout = normalizeTimeoutMs(opts.timeoutMs, 15e3);
3806
+ const poll = Math.max(250, Math.min(5e3, opts.pollMs ?? 500));
3807
+ const detect = async () => parseChallengeResult(await page.evaluate(DETECT_CHALLENGE_SCRIPT));
3808
+ const initial = await detect();
3809
+ if (initial === null) return { resolved: true, challenge: null };
3810
+ if (initial.kind === "cloudflare-js") {
3811
+ try {
3812
+ await page.waitForFunction(
3813
+ "document.title.toLowerCase() !== 'just a moment...' && !document.querySelector('#challenge-running')",
3814
+ void 0,
3815
+ { timeout }
3816
+ );
3817
+ await page.waitForLoadState("domcontentloaded", { timeout: 5e3 }).catch(() => {
3818
+ });
3819
+ const after = await detect();
3820
+ return { resolved: after === null, challenge: after };
3821
+ } catch {
3822
+ const after = await detect();
3823
+ return { resolved: after === null, challenge: after };
3824
+ }
3825
+ }
3826
+ const deadline = Date.now() + timeout;
3827
+ while (Date.now() < deadline) {
3828
+ await page.waitForTimeout(poll);
3829
+ const current = await detect();
3830
+ if (current === null) return { resolved: true, challenge: null };
3831
+ }
3832
+ const final = await detect();
3833
+ return { resolved: final === null, challenge: final };
3834
+ }
3835
+
3550
3836
  // src/capture/activity.ts
3551
3837
  function consolePriority(level) {
3552
3838
  switch (level) {
@@ -5151,6 +5437,53 @@ var CrawlPage = class {
5151
5437
  name
5152
5438
  });
5153
5439
  }
5440
+ // ── Anti-Bot ──────────────────────────────────────────────────
5441
+ /**
5442
+ * Detect whether the page is showing an anti-bot challenge
5443
+ * (Cloudflare, hCaptcha, reCAPTCHA, access-denied, rate-limit, etc.).
5444
+ *
5445
+ * Returns `null` if no challenge is detected.
5446
+ *
5447
+ * @example
5448
+ * ```ts
5449
+ * const challenge = await page.detectChallenge();
5450
+ * if (challenge) {
5451
+ * console.log(challenge.kind); // 'cloudflare-js'
5452
+ * console.log(challenge.message); // 'Cloudflare JS challenge'
5453
+ * }
5454
+ * ```
5455
+ */
5456
+ async detectChallenge() {
5457
+ return detectChallengeViaPlaywright({ cdpUrl: this.cdpUrl, targetId: this.targetId });
5458
+ }
5459
+ /**
5460
+ * Wait for an anti-bot challenge to resolve on its own.
5461
+ *
5462
+ * Cloudflare JS challenges typically auto-resolve in ~5 seconds.
5463
+ * CAPTCHA challenges will only resolve if solved in a visible browser window.
5464
+ *
5465
+ * @param opts.timeoutMs - Maximum wait time (default: `15000`)
5466
+ * @param opts.pollMs - Poll interval (default: `500`)
5467
+ * @returns Whether the challenge resolved, and the remaining challenge info if not
5468
+ *
5469
+ * @example
5470
+ * ```ts
5471
+ * await page.goto('https://example.com');
5472
+ * const challenge = await page.detectChallenge();
5473
+ * if (challenge?.kind === 'cloudflare-js') {
5474
+ * const { resolved } = await page.waitForChallenge({ timeoutMs: 20000 });
5475
+ * if (!resolved) throw new Error('Challenge did not resolve');
5476
+ * }
5477
+ * ```
5478
+ */
5479
+ async waitForChallenge(opts) {
5480
+ return waitForChallengeViaPlaywright({
5481
+ cdpUrl: this.cdpUrl,
5482
+ targetId: this.targetId,
5483
+ timeoutMs: opts?.timeoutMs,
5484
+ pollMs: opts?.pollMs
5485
+ });
5486
+ }
5154
5487
  };
5155
5488
  var BrowserClaw = class _BrowserClaw {
5156
5489
  cdpUrl;
@@ -5300,12 +5633,14 @@ exports.BrowserClaw = BrowserClaw;
5300
5633
  exports.BrowserTabNotFoundError = BrowserTabNotFoundError;
5301
5634
  exports.CrawlPage = CrawlPage;
5302
5635
  exports.InvalidBrowserNavigationUrlError = InvalidBrowserNavigationUrlError;
5636
+ exports.STEALTH_SCRIPT = STEALTH_SCRIPT;
5303
5637
  exports.assertBrowserNavigationAllowed = assertBrowserNavigationAllowed;
5304
5638
  exports.assertBrowserNavigationRedirectChainAllowed = assertBrowserNavigationRedirectChainAllowed;
5305
5639
  exports.assertBrowserNavigationResultAllowed = assertBrowserNavigationResultAllowed;
5306
5640
  exports.assertSafeUploadPaths = assertSafeUploadPaths;
5307
5641
  exports.batchViaPlaywright = batchViaPlaywright;
5308
5642
  exports.createPinnedLookup = createPinnedLookup;
5643
+ exports.detectChallengeViaPlaywright = detectChallengeViaPlaywright;
5309
5644
  exports.ensureContextState = ensureContextState;
5310
5645
  exports.executeSingleAction = executeSingleAction;
5311
5646
  exports.forceDisconnectPlaywrightForTarget = forceDisconnectPlaywrightForTarget;
@@ -5324,6 +5659,7 @@ exports.resolvePageByTargetIdOrThrow = resolvePageByTargetIdOrThrow;
5324
5659
  exports.resolvePinnedHostnameWithPolicy = resolvePinnedHostnameWithPolicy;
5325
5660
  exports.resolveStrictExistingUploadPaths = resolveStrictExistingUploadPaths;
5326
5661
  exports.sanitizeUntrustedFileName = sanitizeUntrustedFileName;
5662
+ exports.waitForChallengeViaPlaywright = waitForChallengeViaPlaywright;
5327
5663
  exports.withBrowserNavigationPolicy = withBrowserNavigationPolicy;
5328
5664
  exports.withPageScopedCdpClient = withPageScopedCdpClient;
5329
5665
  exports.withPlaywrightPageCdpSession = withPlaywrightPageCdpSession;