browserclaw 0.5.8 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +1566 -1144
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +111 -21
- package/dist/index.d.ts +111 -21
- package/dist/index.js +1556 -1137
- package/dist/index.js.map +1 -1
- package/package.json +14 -2
package/dist/index.d.cts
CHANGED
|
@@ -1,8 +1,6 @@
|
|
|
1
|
-
import * as playwright_core from 'playwright-core';
|
|
2
1
|
import { BrowserContext, Page, CDPSession } from 'playwright-core';
|
|
3
|
-
import
|
|
4
|
-
import { lookup as lookup$1 } from 'node:dns';
|
|
5
|
-
import { lookup } from 'node:dns/promises';
|
|
2
|
+
import { lookup } from 'node:dns';
|
|
3
|
+
import { lookup as lookup$1 } from 'node:dns/promises';
|
|
6
4
|
|
|
7
5
|
/** A single action within a batch. */
|
|
8
6
|
type BatchAction = {
|
|
@@ -58,11 +56,11 @@ type BatchAction = {
|
|
|
58
56
|
timeoutMs?: number;
|
|
59
57
|
} | {
|
|
60
58
|
kind: 'fill';
|
|
61
|
-
fields:
|
|
59
|
+
fields: {
|
|
62
60
|
ref: string;
|
|
63
61
|
type?: string;
|
|
64
62
|
value?: string | number | boolean;
|
|
65
|
-
}
|
|
63
|
+
}[];
|
|
66
64
|
targetId?: string;
|
|
67
65
|
timeoutMs?: number;
|
|
68
66
|
} | {
|
|
@@ -547,11 +545,27 @@ interface HttpCredentials {
|
|
|
547
545
|
interface ContextState {
|
|
548
546
|
traceActive: boolean;
|
|
549
547
|
}
|
|
548
|
+
/** The kind of anti-bot challenge detected on a page. */
|
|
549
|
+
type ChallengeKind = 'cloudflare-js' | 'cloudflare-block' | 'cloudflare-turnstile' | 'hcaptcha' | 'recaptcha' | 'blocked' | 'rate-limited';
|
|
550
|
+
/** Information about a detected anti-bot challenge. */
|
|
551
|
+
interface ChallengeInfo {
|
|
552
|
+
/** What type of challenge is present */
|
|
553
|
+
kind: ChallengeKind;
|
|
554
|
+
/** Human-readable description */
|
|
555
|
+
message: string;
|
|
556
|
+
}
|
|
557
|
+
/** Result of waiting for an anti-bot challenge to resolve. */
|
|
558
|
+
interface ChallengeWaitResult {
|
|
559
|
+
/** Whether the challenge cleared within the timeout */
|
|
560
|
+
resolved: boolean;
|
|
561
|
+
/** The challenge still present (null if resolved) */
|
|
562
|
+
challenge: ChallengeInfo | null;
|
|
563
|
+
}
|
|
550
564
|
/** Result of DNS pinning resolution — hostname locked to resolved addresses. */
|
|
551
565
|
interface PinnedHostname {
|
|
552
566
|
hostname: string;
|
|
553
567
|
addresses: string[];
|
|
554
|
-
lookup: typeof
|
|
568
|
+
lookup: typeof lookup;
|
|
555
569
|
}
|
|
556
570
|
|
|
557
571
|
/**
|
|
@@ -925,7 +939,7 @@ declare class CrawlPage {
|
|
|
925
939
|
type?: 'png' | 'jpeg';
|
|
926
940
|
}): Promise<{
|
|
927
941
|
buffer: Buffer;
|
|
928
|
-
labels:
|
|
942
|
+
labels: {
|
|
929
943
|
ref: string;
|
|
930
944
|
index: number;
|
|
931
945
|
box: {
|
|
@@ -934,7 +948,7 @@ declare class CrawlPage {
|
|
|
934
948
|
width: number;
|
|
935
949
|
height: number;
|
|
936
950
|
};
|
|
937
|
-
}
|
|
951
|
+
}[];
|
|
938
952
|
skipped: string[];
|
|
939
953
|
}>;
|
|
940
954
|
/**
|
|
@@ -1022,7 +1036,7 @@ declare class CrawlPage {
|
|
|
1022
1036
|
*
|
|
1023
1037
|
* @returns Array of cookie objects
|
|
1024
1038
|
*/
|
|
1025
|
-
cookies(): Promise<Awaited<ReturnType<
|
|
1039
|
+
cookies(): Promise<Awaited<ReturnType<BrowserContext['cookies']>>>;
|
|
1026
1040
|
/**
|
|
1027
1041
|
* Set a cookie in the browser context.
|
|
1028
1042
|
*
|
|
@@ -1164,6 +1178,46 @@ declare class CrawlPage {
|
|
|
1164
1178
|
* ```
|
|
1165
1179
|
*/
|
|
1166
1180
|
setDevice(name: string): Promise<void>;
|
|
1181
|
+
/**
|
|
1182
|
+
* Detect whether the page is showing an anti-bot challenge
|
|
1183
|
+
* (Cloudflare, hCaptcha, reCAPTCHA, access-denied, rate-limit, etc.).
|
|
1184
|
+
*
|
|
1185
|
+
* Returns `null` if no challenge is detected.
|
|
1186
|
+
*
|
|
1187
|
+
* @example
|
|
1188
|
+
* ```ts
|
|
1189
|
+
* const challenge = await page.detectChallenge();
|
|
1190
|
+
* if (challenge) {
|
|
1191
|
+
* console.log(challenge.kind); // 'cloudflare-js'
|
|
1192
|
+
* console.log(challenge.message); // 'Cloudflare JS challenge'
|
|
1193
|
+
* }
|
|
1194
|
+
* ```
|
|
1195
|
+
*/
|
|
1196
|
+
detectChallenge(): Promise<ChallengeInfo | null>;
|
|
1197
|
+
/**
|
|
1198
|
+
* Wait for an anti-bot challenge to resolve on its own.
|
|
1199
|
+
*
|
|
1200
|
+
* Cloudflare JS challenges typically auto-resolve in ~5 seconds.
|
|
1201
|
+
* CAPTCHA challenges will only resolve if solved in a visible browser window.
|
|
1202
|
+
*
|
|
1203
|
+
* @param opts.timeoutMs - Maximum wait time (default: `15000`)
|
|
1204
|
+
* @param opts.pollMs - Poll interval (default: `500`)
|
|
1205
|
+
* @returns Whether the challenge resolved, and the remaining challenge info if not
|
|
1206
|
+
*
|
|
1207
|
+
* @example
|
|
1208
|
+
* ```ts
|
|
1209
|
+
* await page.goto('https://example.com');
|
|
1210
|
+
* const challenge = await page.detectChallenge();
|
|
1211
|
+
* if (challenge?.kind === 'cloudflare-js') {
|
|
1212
|
+
* const { resolved } = await page.waitForChallenge({ timeoutMs: 20000 });
|
|
1213
|
+
* if (!resolved) throw new Error('Challenge did not resolve');
|
|
1214
|
+
* }
|
|
1215
|
+
* ```
|
|
1216
|
+
*/
|
|
1217
|
+
waitForChallenge(opts?: {
|
|
1218
|
+
timeoutMs?: number;
|
|
1219
|
+
pollMs?: number;
|
|
1220
|
+
}): Promise<ChallengeWaitResult>;
|
|
1167
1221
|
}
|
|
1168
1222
|
/**
|
|
1169
1223
|
* Main entry point for browserclaw.
|
|
@@ -1296,7 +1350,7 @@ declare function isChromeReachable(cdpUrl: string, timeoutMs?: number, authToken
|
|
|
1296
1350
|
declare function getChromeWebSocketUrl(cdpUrl: string, timeoutMs?: number, authToken?: string): Promise<string | null>;
|
|
1297
1351
|
declare function isChromeCdpReady(cdpUrl: string, timeoutMs?: number, handshakeTimeoutMs?: number): Promise<boolean>;
|
|
1298
1352
|
|
|
1299
|
-
type LookupFn = typeof lookup;
|
|
1353
|
+
type LookupFn = typeof lookup$1;
|
|
1300
1354
|
/**
|
|
1301
1355
|
* Thrown when a navigation URL is blocked by SSRF policy.
|
|
1302
1356
|
* Callers can catch this specifically to distinguish navigation blocks
|
|
@@ -1306,14 +1360,14 @@ declare class InvalidBrowserNavigationUrlError extends Error {
|
|
|
1306
1360
|
constructor(message: string);
|
|
1307
1361
|
}
|
|
1308
1362
|
/** Options for browser navigation SSRF policy. */
|
|
1309
|
-
|
|
1363
|
+
interface BrowserNavigationPolicyOptions {
|
|
1310
1364
|
ssrfPolicy?: SsrfPolicy;
|
|
1311
|
-
}
|
|
1365
|
+
}
|
|
1312
1366
|
/** Playwright-compatible request interface for redirect chain inspection. */
|
|
1313
|
-
|
|
1367
|
+
interface BrowserNavigationRequestLike {
|
|
1314
1368
|
url(): string;
|
|
1315
1369
|
redirectedFrom(): BrowserNavigationRequestLike | null;
|
|
1316
|
-
}
|
|
1370
|
+
}
|
|
1317
1371
|
/** Build a BrowserNavigationPolicyOptions from an SsrfPolicy. */
|
|
1318
1372
|
declare function withBrowserNavigationPolicy(ssrfPolicy?: SsrfPolicy): BrowserNavigationPolicyOptions;
|
|
1319
1373
|
/**
|
|
@@ -1323,8 +1377,8 @@ declare function withBrowserNavigationPolicy(ssrfPolicy?: SsrfPolicy): BrowserNa
|
|
|
1323
1377
|
declare function createPinnedLookup(params: {
|
|
1324
1378
|
hostname: string;
|
|
1325
1379
|
addresses: string[];
|
|
1326
|
-
fallback?: typeof lookup
|
|
1327
|
-
}): typeof lookup
|
|
1380
|
+
fallback?: typeof lookup;
|
|
1381
|
+
}): typeof lookup;
|
|
1328
1382
|
/**
|
|
1329
1383
|
* Resolve DNS for a hostname and validate resolved addresses against SSRF policy.
|
|
1330
1384
|
* Returns a PinnedHostname with pre-resolved addresses and a pinned lookup function.
|
|
@@ -1401,14 +1455,12 @@ declare class BrowserTabNotFoundError extends Error {
|
|
|
1401
1455
|
declare function withPlaywrightPageCdpSession<T>(page: Page, fn: (session: CDPSession) => Promise<T>): Promise<T>;
|
|
1402
1456
|
/**
|
|
1403
1457
|
* Run a function with a page-scoped CDP client.
|
|
1404
|
-
* For extension relay endpoints, routes through the raw CDP websocket.
|
|
1405
|
-
* Otherwise, uses a Playwright CDP session.
|
|
1406
1458
|
*/
|
|
1407
1459
|
declare function withPageScopedCdpClient<T>(opts: {
|
|
1408
1460
|
cdpUrl: string;
|
|
1409
1461
|
page: Page;
|
|
1410
1462
|
targetId?: string;
|
|
1411
|
-
fn: (send: (method: string, params?: Record<string, unknown>) => Promise<
|
|
1463
|
+
fn: (send: (method: string, params?: Record<string, unknown>) => Promise<unknown>) => Promise<T>;
|
|
1412
1464
|
}): Promise<T>;
|
|
1413
1465
|
declare function ensureContextState(context: BrowserContext): ContextState;
|
|
1414
1466
|
/**
|
|
@@ -1458,4 +1510,42 @@ declare function getRestoredPageForTarget(opts: {
|
|
|
1458
1510
|
targetId?: string;
|
|
1459
1511
|
}): Promise<Page>;
|
|
1460
1512
|
|
|
1461
|
-
|
|
1513
|
+
/**
|
|
1514
|
+
* Comprehensive browser stealth evasions.
|
|
1515
|
+
*
|
|
1516
|
+
* Injected via `addInitScript()` (runs before any page JS) and via
|
|
1517
|
+
* `page.evaluate()` for already-loaded pages. Each patch is wrapped
|
|
1518
|
+
* in try/catch so a single failure never breaks the rest.
|
|
1519
|
+
*
|
|
1520
|
+
* Covers: navigator.webdriver, plugins, languages, window.chrome,
|
|
1521
|
+
* Permissions API, WebGL fingerprint, Notification.permission,
|
|
1522
|
+
* navigator.connection, console toString, headless-mode quirks,
|
|
1523
|
+
* hardwareConcurrency, and deviceMemory.
|
|
1524
|
+
*/
|
|
1525
|
+
declare const STEALTH_SCRIPT = "(function() {\n 'use strict';\n function p(fn) { try { fn(); } catch(_) {} }\n\n // \u2500\u2500 1. navigator.webdriver \u2192 undefined \u2500\u2500\n p(function() {\n Object.defineProperty(navigator, 'webdriver', { get: function() { return undefined; }, configurable: true });\n });\n\n // \u2500\u2500 2. navigator.plugins + mimeTypes (only if empty \u2014 Chrome 92+ populates them natively) \u2500\u2500\n p(function() {\n if (navigator.plugins && navigator.plugins.length > 0) return;\n\n function FakePlugin(name, fn, desc, mimes) {\n this.name = name; this.filename = fn; this.description = desc; this.length = mimes.length;\n for (var i = 0; i < mimes.length; i++) { this[i] = mimes[i]; mimes[i].enabledPlugin = this; }\n }\n FakePlugin.prototype.item = function(i) { return this[i] || null; };\n FakePlugin.prototype.namedItem = function(n) {\n for (var i = 0; i < this.length; i++) if (this[i].type === n) return this[i];\n return null;\n };\n\n function M(type, suf, desc) { this.type = type; this.suffixes = suf; this.description = desc; }\n\n var m1 = new M('application/pdf', 'pdf', 'Portable Document Format');\n var m2 = new M('application/x-google-chrome-pdf', 'pdf', 'Portable Document Format');\n var m3 = new M('application/x-nacl', '', 'Native Client Executable');\n var m4 = new M('application/x-pnacl', '', 'Portable Native Client Executable');\n\n var plugins = [\n new FakePlugin('Chrome PDF Plugin', 'internal-pdf-viewer', 'Portable Document Format', [m1]),\n new FakePlugin('Chrome PDF Viewer', 'mhjfbmdgcfjbbpaeojofohoefgiehjai', '', [m2]),\n new FakePlugin('Native Client', 'internal-nacl-plugin', '', [m3, m4]),\n ];\n\n function makeIterable(arr, items) {\n arr.length = items.length;\n for (var i = 0; i < items.length; i++) arr[i] = items[i];\n arr[Symbol.iterator] = function() {\n var idx = 0;\n return { next: function() {\n return idx < items.length ? { value: items[idx++], done: false } : { done: true };\n }};\n };\n }\n\n var pa = { item: function(i) { return plugins[i] || null; },\n namedItem: function(n) { for (var i = 0; i < plugins.length; i++) if (plugins[i].name === n) return plugins[i]; return null; },\n refresh: function() {} };\n makeIterable(pa, plugins);\n Object.defineProperty(navigator, 'plugins', { get: function() { return pa; } });\n\n var allMimes = [m1, m2, m3, m4];\n var ma = { item: function(i) { return allMimes[i] || null; },\n namedItem: function(n) { for (var i = 0; i < allMimes.length; i++) if (allMimes[i].type === n) return allMimes[i]; return null; } };\n makeIterable(ma, allMimes);\n Object.defineProperty(navigator, 'mimeTypes', { get: function() { return ma; } });\n });\n\n // \u2500\u2500 3. navigator.languages (cached + frozen so identity check passes) \u2500\u2500\n p(function() {\n if (!navigator.languages || navigator.languages.length === 0) {\n var langs = Object.freeze(['en-US', 'en']);\n Object.defineProperty(navigator, 'languages', { get: function() { return langs; } });\n }\n });\n\n // \u2500\u2500 4. window.chrome \u2500\u2500\n p(function() {\n if (window.chrome && window.chrome.runtime && window.chrome.runtime.connect) return;\n\n var chrome = window.chrome || {};\n var noop = function() {};\n var evtStub = { addListener: noop, removeListener: noop, hasListeners: function() { return false; } };\n chrome.runtime = chrome.runtime || {};\n chrome.runtime.onMessage = chrome.runtime.onMessage || evtStub;\n chrome.runtime.onConnect = chrome.runtime.onConnect || evtStub;\n chrome.runtime.sendMessage = chrome.runtime.sendMessage || noop;\n chrome.runtime.connect = chrome.runtime.connect || function() {\n return { onMessage: { addListener: noop }, postMessage: noop, disconnect: noop };\n };\n if (chrome.runtime.id === undefined) chrome.runtime.id = undefined;\n if (!chrome.loadTimes) chrome.loadTimes = function() { return {}; };\n if (!chrome.csi) chrome.csi = function() { return {}; };\n if (!chrome.app) {\n chrome.app = {\n isInstalled: false,\n InstallState: { INSTALLED: 'installed', NOT_INSTALLED: 'not_installed', DISABLED: 'disabled' },\n RunningState: { CANNOT_RUN: 'cannot_run', READY_TO_RUN: 'ready_to_run', RUNNING: 'running' },\n getDetails: function() { return null; },\n getIsInstalled: function() { return false; },\n runningState: function() { return 'cannot_run'; },\n };\n }\n\n if (!window.chrome) {\n Object.defineProperty(window, 'chrome', { value: chrome, writable: false, enumerable: true, configurable: false });\n }\n });\n\n // \u2500\u2500 5. Permissions API consistency \u2500\u2500\n p(function() {\n var orig = navigator.permissions.query.bind(navigator.permissions);\n function q(params) {\n if (params.name === 'notifications') {\n return Promise.resolve({\n state: typeof Notification !== 'undefined' ? Notification.permission : 'prompt',\n name: 'notifications', onchange: null,\n addEventListener: function(){}, removeEventListener: function(){}, dispatchEvent: function(){ return true; },\n });\n }\n return orig(params);\n }\n q.toString = function() { return 'function query() { [native code] }'; };\n navigator.permissions.query = q;\n });\n\n // \u2500\u2500 6. WebGL vendor / renderer \u2500\u2500\n p(function() {\n var h = {\n apply: function(target, self, args) {\n var param = args[0];\n if (param === 0x9245) return 'Intel Inc.';\n if (param === 0x9246) return 'Intel Iris OpenGL Engine';\n return Reflect.apply(target, self, args);\n }\n };\n if (typeof WebGLRenderingContext !== 'undefined')\n WebGLRenderingContext.prototype.getParameter = new Proxy(WebGLRenderingContext.prototype.getParameter, h);\n if (typeof WebGL2RenderingContext !== 'undefined')\n WebGL2RenderingContext.prototype.getParameter = new Proxy(WebGL2RenderingContext.prototype.getParameter, h);\n });\n\n // \u2500\u2500 7. Notification.permission \u2500\u2500\n p(function() {\n if (typeof Notification !== 'undefined' && Notification.permission === 'denied') {\n Object.defineProperty(Notification, 'permission', { get: function() { return 'default'; }, configurable: true });\n }\n });\n\n // \u2500\u2500 8. navigator.connection (cached so identity check passes) \u2500\u2500\n p(function() {\n if (navigator.connection) return;\n var conn = {\n effectiveType: '4g', rtt: 50, downlink: 10, saveData: false, onchange: null,\n addEventListener: function(){}, removeEventListener: function(){}, dispatchEvent: function(){ return true; },\n };\n Object.defineProperty(navigator, 'connection', { get: function() { return conn; } });\n });\n\n // \u2500\u2500 9. Iframe contentWindow.chrome \u2500\u2500\n // Handled by patch 4 \u2014 chrome object is now on window, propagates to iframes on same origin.\n\n // \u2500\u2500 10. console method toString \u2500\u2500\n p(function() {\n ['log','info','warn','error','debug','table','trace'].forEach(function(n) {\n if (console[n]) {\n console[n].toString = function() { return 'function ' + n + '() { [native code] }'; };\n }\n });\n });\n\n // \u2500\u2500 11. Headless-mode window / screen fixes \u2500\u2500\n p(function() {\n if (window.outerWidth === 0)\n Object.defineProperty(window, 'outerWidth', { get: function() { return window.innerWidth || 1920; } });\n if (window.outerHeight === 0)\n Object.defineProperty(window, 'outerHeight', { get: function() { return (window.innerHeight || 1080) + 85; } });\n });\n\n p(function() {\n if (screen.colorDepth === 0) {\n Object.defineProperty(screen, 'colorDepth', { get: function() { return 24; } });\n Object.defineProperty(screen, 'pixelDepth', { get: function() { return 24; } });\n }\n });\n\n // \u2500\u2500 12. navigator.hardwareConcurrency \u2500\u2500\n p(function() {\n if (!navigator.hardwareConcurrency)\n Object.defineProperty(navigator, 'hardwareConcurrency', { get: function() { return 4; } });\n });\n\n // \u2500\u2500 13. navigator.deviceMemory \u2500\u2500\n p(function() {\n if (!navigator.deviceMemory)\n Object.defineProperty(navigator, 'deviceMemory', { get: function() { return 8; } });\n });\n})()";
|
|
1526
|
+
|
|
1527
|
+
/**
|
|
1528
|
+
* Detect whether the current page is showing an anti-bot challenge.
|
|
1529
|
+
* Returns `null` if no challenge is detected.
|
|
1530
|
+
*/
|
|
1531
|
+
declare function detectChallengeViaPlaywright(opts: {
|
|
1532
|
+
cdpUrl: string;
|
|
1533
|
+
targetId?: string;
|
|
1534
|
+
}): Promise<ChallengeInfo | null>;
|
|
1535
|
+
/**
|
|
1536
|
+
* Wait for an anti-bot challenge to resolve on its own (e.g. Cloudflare JS challenge).
|
|
1537
|
+
*
|
|
1538
|
+
* Returns `{ resolved: true }` if the challenge cleared within the timeout,
|
|
1539
|
+
* or `{ resolved: false, challenge }` with the still-present challenge info.
|
|
1540
|
+
*
|
|
1541
|
+
* For challenges that require human interaction (CAPTCHA), this will time out
|
|
1542
|
+
* unless the user solves the challenge in the visible browser window.
|
|
1543
|
+
*/
|
|
1544
|
+
declare function waitForChallengeViaPlaywright(opts: {
|
|
1545
|
+
cdpUrl: string;
|
|
1546
|
+
targetId?: string;
|
|
1547
|
+
timeoutMs?: number;
|
|
1548
|
+
pollMs?: number;
|
|
1549
|
+
}): Promise<ChallengeWaitResult>;
|
|
1550
|
+
|
|
1551
|
+
export { type AriaNode, type AriaSnapshotResult, type BatchAction, type BatchActionResult, BrowserClaw, type BrowserNavigationPolicyOptions, type BrowserNavigationRequestLike, type BrowserTab, BrowserTabNotFoundError, type ChallengeInfo, type ChallengeKind, type ChallengeWaitResult, type ChromeExecutable, type ChromeKind, type ClickOptions, type ColorScheme, type ConnectOptions, type ConsoleMessage, type ContextState, type CookieData, CrawlPage, type DialogOptions, type DownloadResult, type FormField, type FrameEvalResult, type GeolocationOptions, type HttpCredentials, InvalidBrowserNavigationUrlError, type LaunchOptions, type LookupFn, type NetworkRequest, type PageError, type PinnedHostname, type ResponseBodyResult, type RoleRefInfo, type RoleRefs, STEALTH_SCRIPT, type ScreenshotOptions, type SnapshotOptions, type SnapshotResult, type SnapshotStats, type SsrfPolicy, type StorageKind, type TraceStartOptions, type TypeOptions, type UntrustedContentMeta, type WaitOptions, assertBrowserNavigationAllowed, assertBrowserNavigationRedirectChainAllowed, assertBrowserNavigationResultAllowed, assertSafeUploadPaths, batchViaPlaywright, createPinnedLookup, detectChallengeViaPlaywright, ensureContextState, executeSingleAction, forceDisconnectPlaywrightForTarget, getChromeWebSocketUrl, getRestoredPageForTarget, isChromeCdpReady, isChromeReachable, normalizeCdpHttpBaseForJsonEndpoints, parseRoleRef, requireRef, requireRefOrSelector, requiresInspectableBrowserNavigationRedirects, resolveBoundedDelayMs, resolveInteractionTimeoutMs, resolvePageByTargetIdOrThrow, resolvePinnedHostnameWithPolicy, resolveStrictExistingUploadPaths, sanitizeUntrustedFileName, waitForChallengeViaPlaywright, withBrowserNavigationPolicy, withPageScopedCdpClient, withPlaywrightPageCdpSession, writeViaSiblingTempPath };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,8 +1,6 @@
|
|
|
1
|
-
import * as playwright_core from 'playwright-core';
|
|
2
1
|
import { BrowserContext, Page, CDPSession } from 'playwright-core';
|
|
3
|
-
import
|
|
4
|
-
import { lookup as lookup$1 } from 'node:dns';
|
|
5
|
-
import { lookup } from 'node:dns/promises';
|
|
2
|
+
import { lookup } from 'node:dns';
|
|
3
|
+
import { lookup as lookup$1 } from 'node:dns/promises';
|
|
6
4
|
|
|
7
5
|
/** A single action within a batch. */
|
|
8
6
|
type BatchAction = {
|
|
@@ -58,11 +56,11 @@ type BatchAction = {
|
|
|
58
56
|
timeoutMs?: number;
|
|
59
57
|
} | {
|
|
60
58
|
kind: 'fill';
|
|
61
|
-
fields:
|
|
59
|
+
fields: {
|
|
62
60
|
ref: string;
|
|
63
61
|
type?: string;
|
|
64
62
|
value?: string | number | boolean;
|
|
65
|
-
}
|
|
63
|
+
}[];
|
|
66
64
|
targetId?: string;
|
|
67
65
|
timeoutMs?: number;
|
|
68
66
|
} | {
|
|
@@ -547,11 +545,27 @@ interface HttpCredentials {
|
|
|
547
545
|
interface ContextState {
|
|
548
546
|
traceActive: boolean;
|
|
549
547
|
}
|
|
548
|
+
/** The kind of anti-bot challenge detected on a page. */
|
|
549
|
+
type ChallengeKind = 'cloudflare-js' | 'cloudflare-block' | 'cloudflare-turnstile' | 'hcaptcha' | 'recaptcha' | 'blocked' | 'rate-limited';
|
|
550
|
+
/** Information about a detected anti-bot challenge. */
|
|
551
|
+
interface ChallengeInfo {
|
|
552
|
+
/** What type of challenge is present */
|
|
553
|
+
kind: ChallengeKind;
|
|
554
|
+
/** Human-readable description */
|
|
555
|
+
message: string;
|
|
556
|
+
}
|
|
557
|
+
/** Result of waiting for an anti-bot challenge to resolve. */
|
|
558
|
+
interface ChallengeWaitResult {
|
|
559
|
+
/** Whether the challenge cleared within the timeout */
|
|
560
|
+
resolved: boolean;
|
|
561
|
+
/** The challenge still present (null if resolved) */
|
|
562
|
+
challenge: ChallengeInfo | null;
|
|
563
|
+
}
|
|
550
564
|
/** Result of DNS pinning resolution — hostname locked to resolved addresses. */
|
|
551
565
|
interface PinnedHostname {
|
|
552
566
|
hostname: string;
|
|
553
567
|
addresses: string[];
|
|
554
|
-
lookup: typeof
|
|
568
|
+
lookup: typeof lookup;
|
|
555
569
|
}
|
|
556
570
|
|
|
557
571
|
/**
|
|
@@ -925,7 +939,7 @@ declare class CrawlPage {
|
|
|
925
939
|
type?: 'png' | 'jpeg';
|
|
926
940
|
}): Promise<{
|
|
927
941
|
buffer: Buffer;
|
|
928
|
-
labels:
|
|
942
|
+
labels: {
|
|
929
943
|
ref: string;
|
|
930
944
|
index: number;
|
|
931
945
|
box: {
|
|
@@ -934,7 +948,7 @@ declare class CrawlPage {
|
|
|
934
948
|
width: number;
|
|
935
949
|
height: number;
|
|
936
950
|
};
|
|
937
|
-
}
|
|
951
|
+
}[];
|
|
938
952
|
skipped: string[];
|
|
939
953
|
}>;
|
|
940
954
|
/**
|
|
@@ -1022,7 +1036,7 @@ declare class CrawlPage {
|
|
|
1022
1036
|
*
|
|
1023
1037
|
* @returns Array of cookie objects
|
|
1024
1038
|
*/
|
|
1025
|
-
cookies(): Promise<Awaited<ReturnType<
|
|
1039
|
+
cookies(): Promise<Awaited<ReturnType<BrowserContext['cookies']>>>;
|
|
1026
1040
|
/**
|
|
1027
1041
|
* Set a cookie in the browser context.
|
|
1028
1042
|
*
|
|
@@ -1164,6 +1178,46 @@ declare class CrawlPage {
|
|
|
1164
1178
|
* ```
|
|
1165
1179
|
*/
|
|
1166
1180
|
setDevice(name: string): Promise<void>;
|
|
1181
|
+
/**
|
|
1182
|
+
* Detect whether the page is showing an anti-bot challenge
|
|
1183
|
+
* (Cloudflare, hCaptcha, reCAPTCHA, access-denied, rate-limit, etc.).
|
|
1184
|
+
*
|
|
1185
|
+
* Returns `null` if no challenge is detected.
|
|
1186
|
+
*
|
|
1187
|
+
* @example
|
|
1188
|
+
* ```ts
|
|
1189
|
+
* const challenge = await page.detectChallenge();
|
|
1190
|
+
* if (challenge) {
|
|
1191
|
+
* console.log(challenge.kind); // 'cloudflare-js'
|
|
1192
|
+
* console.log(challenge.message); // 'Cloudflare JS challenge'
|
|
1193
|
+
* }
|
|
1194
|
+
* ```
|
|
1195
|
+
*/
|
|
1196
|
+
detectChallenge(): Promise<ChallengeInfo | null>;
|
|
1197
|
+
/**
|
|
1198
|
+
* Wait for an anti-bot challenge to resolve on its own.
|
|
1199
|
+
*
|
|
1200
|
+
* Cloudflare JS challenges typically auto-resolve in ~5 seconds.
|
|
1201
|
+
* CAPTCHA challenges will only resolve if solved in a visible browser window.
|
|
1202
|
+
*
|
|
1203
|
+
* @param opts.timeoutMs - Maximum wait time (default: `15000`)
|
|
1204
|
+
* @param opts.pollMs - Poll interval (default: `500`)
|
|
1205
|
+
* @returns Whether the challenge resolved, and the remaining challenge info if not
|
|
1206
|
+
*
|
|
1207
|
+
* @example
|
|
1208
|
+
* ```ts
|
|
1209
|
+
* await page.goto('https://example.com');
|
|
1210
|
+
* const challenge = await page.detectChallenge();
|
|
1211
|
+
* if (challenge?.kind === 'cloudflare-js') {
|
|
1212
|
+
* const { resolved } = await page.waitForChallenge({ timeoutMs: 20000 });
|
|
1213
|
+
* if (!resolved) throw new Error('Challenge did not resolve');
|
|
1214
|
+
* }
|
|
1215
|
+
* ```
|
|
1216
|
+
*/
|
|
1217
|
+
waitForChallenge(opts?: {
|
|
1218
|
+
timeoutMs?: number;
|
|
1219
|
+
pollMs?: number;
|
|
1220
|
+
}): Promise<ChallengeWaitResult>;
|
|
1167
1221
|
}
|
|
1168
1222
|
/**
|
|
1169
1223
|
* Main entry point for browserclaw.
|
|
@@ -1296,7 +1350,7 @@ declare function isChromeReachable(cdpUrl: string, timeoutMs?: number, authToken
|
|
|
1296
1350
|
declare function getChromeWebSocketUrl(cdpUrl: string, timeoutMs?: number, authToken?: string): Promise<string | null>;
|
|
1297
1351
|
declare function isChromeCdpReady(cdpUrl: string, timeoutMs?: number, handshakeTimeoutMs?: number): Promise<boolean>;
|
|
1298
1352
|
|
|
1299
|
-
type LookupFn = typeof lookup;
|
|
1353
|
+
type LookupFn = typeof lookup$1;
|
|
1300
1354
|
/**
|
|
1301
1355
|
* Thrown when a navigation URL is blocked by SSRF policy.
|
|
1302
1356
|
* Callers can catch this specifically to distinguish navigation blocks
|
|
@@ -1306,14 +1360,14 @@ declare class InvalidBrowserNavigationUrlError extends Error {
|
|
|
1306
1360
|
constructor(message: string);
|
|
1307
1361
|
}
|
|
1308
1362
|
/** Options for browser navigation SSRF policy. */
|
|
1309
|
-
|
|
1363
|
+
interface BrowserNavigationPolicyOptions {
|
|
1310
1364
|
ssrfPolicy?: SsrfPolicy;
|
|
1311
|
-
}
|
|
1365
|
+
}
|
|
1312
1366
|
/** Playwright-compatible request interface for redirect chain inspection. */
|
|
1313
|
-
|
|
1367
|
+
interface BrowserNavigationRequestLike {
|
|
1314
1368
|
url(): string;
|
|
1315
1369
|
redirectedFrom(): BrowserNavigationRequestLike | null;
|
|
1316
|
-
}
|
|
1370
|
+
}
|
|
1317
1371
|
/** Build a BrowserNavigationPolicyOptions from an SsrfPolicy. */
|
|
1318
1372
|
declare function withBrowserNavigationPolicy(ssrfPolicy?: SsrfPolicy): BrowserNavigationPolicyOptions;
|
|
1319
1373
|
/**
|
|
@@ -1323,8 +1377,8 @@ declare function withBrowserNavigationPolicy(ssrfPolicy?: SsrfPolicy): BrowserNa
|
|
|
1323
1377
|
declare function createPinnedLookup(params: {
|
|
1324
1378
|
hostname: string;
|
|
1325
1379
|
addresses: string[];
|
|
1326
|
-
fallback?: typeof lookup
|
|
1327
|
-
}): typeof lookup
|
|
1380
|
+
fallback?: typeof lookup;
|
|
1381
|
+
}): typeof lookup;
|
|
1328
1382
|
/**
|
|
1329
1383
|
* Resolve DNS for a hostname and validate resolved addresses against SSRF policy.
|
|
1330
1384
|
* Returns a PinnedHostname with pre-resolved addresses and a pinned lookup function.
|
|
@@ -1401,14 +1455,12 @@ declare class BrowserTabNotFoundError extends Error {
|
|
|
1401
1455
|
declare function withPlaywrightPageCdpSession<T>(page: Page, fn: (session: CDPSession) => Promise<T>): Promise<T>;
|
|
1402
1456
|
/**
|
|
1403
1457
|
* Run a function with a page-scoped CDP client.
|
|
1404
|
-
* For extension relay endpoints, routes through the raw CDP websocket.
|
|
1405
|
-
* Otherwise, uses a Playwright CDP session.
|
|
1406
1458
|
*/
|
|
1407
1459
|
declare function withPageScopedCdpClient<T>(opts: {
|
|
1408
1460
|
cdpUrl: string;
|
|
1409
1461
|
page: Page;
|
|
1410
1462
|
targetId?: string;
|
|
1411
|
-
fn: (send: (method: string, params?: Record<string, unknown>) => Promise<
|
|
1463
|
+
fn: (send: (method: string, params?: Record<string, unknown>) => Promise<unknown>) => Promise<T>;
|
|
1412
1464
|
}): Promise<T>;
|
|
1413
1465
|
declare function ensureContextState(context: BrowserContext): ContextState;
|
|
1414
1466
|
/**
|
|
@@ -1458,4 +1510,42 @@ declare function getRestoredPageForTarget(opts: {
|
|
|
1458
1510
|
targetId?: string;
|
|
1459
1511
|
}): Promise<Page>;
|
|
1460
1512
|
|
|
1461
|
-
|
|
1513
|
+
/**
|
|
1514
|
+
* Comprehensive browser stealth evasions.
|
|
1515
|
+
*
|
|
1516
|
+
* Injected via `addInitScript()` (runs before any page JS) and via
|
|
1517
|
+
* `page.evaluate()` for already-loaded pages. Each patch is wrapped
|
|
1518
|
+
* in try/catch so a single failure never breaks the rest.
|
|
1519
|
+
*
|
|
1520
|
+
* Covers: navigator.webdriver, plugins, languages, window.chrome,
|
|
1521
|
+
* Permissions API, WebGL fingerprint, Notification.permission,
|
|
1522
|
+
* navigator.connection, console toString, headless-mode quirks,
|
|
1523
|
+
* hardwareConcurrency, and deviceMemory.
|
|
1524
|
+
*/
|
|
1525
|
+
declare const STEALTH_SCRIPT = "(function() {\n 'use strict';\n function p(fn) { try { fn(); } catch(_) {} }\n\n // \u2500\u2500 1. navigator.webdriver \u2192 undefined \u2500\u2500\n p(function() {\n Object.defineProperty(navigator, 'webdriver', { get: function() { return undefined; }, configurable: true });\n });\n\n // \u2500\u2500 2. navigator.plugins + mimeTypes (only if empty \u2014 Chrome 92+ populates them natively) \u2500\u2500\n p(function() {\n if (navigator.plugins && navigator.plugins.length > 0) return;\n\n function FakePlugin(name, fn, desc, mimes) {\n this.name = name; this.filename = fn; this.description = desc; this.length = mimes.length;\n for (var i = 0; i < mimes.length; i++) { this[i] = mimes[i]; mimes[i].enabledPlugin = this; }\n }\n FakePlugin.prototype.item = function(i) { return this[i] || null; };\n FakePlugin.prototype.namedItem = function(n) {\n for (var i = 0; i < this.length; i++) if (this[i].type === n) return this[i];\n return null;\n };\n\n function M(type, suf, desc) { this.type = type; this.suffixes = suf; this.description = desc; }\n\n var m1 = new M('application/pdf', 'pdf', 'Portable Document Format');\n var m2 = new M('application/x-google-chrome-pdf', 'pdf', 'Portable Document Format');\n var m3 = new M('application/x-nacl', '', 'Native Client Executable');\n var m4 = new M('application/x-pnacl', '', 'Portable Native Client Executable');\n\n var plugins = [\n new FakePlugin('Chrome PDF Plugin', 'internal-pdf-viewer', 'Portable Document Format', [m1]),\n new FakePlugin('Chrome PDF Viewer', 'mhjfbmdgcfjbbpaeojofohoefgiehjai', '', [m2]),\n new FakePlugin('Native Client', 'internal-nacl-plugin', '', [m3, m4]),\n ];\n\n function makeIterable(arr, items) {\n arr.length = items.length;\n for (var i = 0; i < items.length; i++) arr[i] = items[i];\n arr[Symbol.iterator] = function() {\n var idx = 0;\n return { next: function() {\n return idx < items.length ? { value: items[idx++], done: false } : { done: true };\n }};\n };\n }\n\n var pa = { item: function(i) { return plugins[i] || null; },\n namedItem: function(n) { for (var i = 0; i < plugins.length; i++) if (plugins[i].name === n) return plugins[i]; return null; },\n refresh: function() {} };\n makeIterable(pa, plugins);\n Object.defineProperty(navigator, 'plugins', { get: function() { return pa; } });\n\n var allMimes = [m1, m2, m3, m4];\n var ma = { item: function(i) { return allMimes[i] || null; },\n namedItem: function(n) { for (var i = 0; i < allMimes.length; i++) if (allMimes[i].type === n) return allMimes[i]; return null; } };\n makeIterable(ma, allMimes);\n Object.defineProperty(navigator, 'mimeTypes', { get: function() { return ma; } });\n });\n\n // \u2500\u2500 3. navigator.languages (cached + frozen so identity check passes) \u2500\u2500\n p(function() {\n if (!navigator.languages || navigator.languages.length === 0) {\n var langs = Object.freeze(['en-US', 'en']);\n Object.defineProperty(navigator, 'languages', { get: function() { return langs; } });\n }\n });\n\n // \u2500\u2500 4. window.chrome \u2500\u2500\n p(function() {\n if (window.chrome && window.chrome.runtime && window.chrome.runtime.connect) return;\n\n var chrome = window.chrome || {};\n var noop = function() {};\n var evtStub = { addListener: noop, removeListener: noop, hasListeners: function() { return false; } };\n chrome.runtime = chrome.runtime || {};\n chrome.runtime.onMessage = chrome.runtime.onMessage || evtStub;\n chrome.runtime.onConnect = chrome.runtime.onConnect || evtStub;\n chrome.runtime.sendMessage = chrome.runtime.sendMessage || noop;\n chrome.runtime.connect = chrome.runtime.connect || function() {\n return { onMessage: { addListener: noop }, postMessage: noop, disconnect: noop };\n };\n if (chrome.runtime.id === undefined) chrome.runtime.id = undefined;\n if (!chrome.loadTimes) chrome.loadTimes = function() { return {}; };\n if (!chrome.csi) chrome.csi = function() { return {}; };\n if (!chrome.app) {\n chrome.app = {\n isInstalled: false,\n InstallState: { INSTALLED: 'installed', NOT_INSTALLED: 'not_installed', DISABLED: 'disabled' },\n RunningState: { CANNOT_RUN: 'cannot_run', READY_TO_RUN: 'ready_to_run', RUNNING: 'running' },\n getDetails: function() { return null; },\n getIsInstalled: function() { return false; },\n runningState: function() { return 'cannot_run'; },\n };\n }\n\n if (!window.chrome) {\n Object.defineProperty(window, 'chrome', { value: chrome, writable: false, enumerable: true, configurable: false });\n }\n });\n\n // \u2500\u2500 5. Permissions API consistency \u2500\u2500\n p(function() {\n var orig = navigator.permissions.query.bind(navigator.permissions);\n function q(params) {\n if (params.name === 'notifications') {\n return Promise.resolve({\n state: typeof Notification !== 'undefined' ? Notification.permission : 'prompt',\n name: 'notifications', onchange: null,\n addEventListener: function(){}, removeEventListener: function(){}, dispatchEvent: function(){ return true; },\n });\n }\n return orig(params);\n }\n q.toString = function() { return 'function query() { [native code] }'; };\n navigator.permissions.query = q;\n });\n\n // \u2500\u2500 6. WebGL vendor / renderer \u2500\u2500\n p(function() {\n var h = {\n apply: function(target, self, args) {\n var param = args[0];\n if (param === 0x9245) return 'Intel Inc.';\n if (param === 0x9246) return 'Intel Iris OpenGL Engine';\n return Reflect.apply(target, self, args);\n }\n };\n if (typeof WebGLRenderingContext !== 'undefined')\n WebGLRenderingContext.prototype.getParameter = new Proxy(WebGLRenderingContext.prototype.getParameter, h);\n if (typeof WebGL2RenderingContext !== 'undefined')\n WebGL2RenderingContext.prototype.getParameter = new Proxy(WebGL2RenderingContext.prototype.getParameter, h);\n });\n\n // \u2500\u2500 7. Notification.permission \u2500\u2500\n p(function() {\n if (typeof Notification !== 'undefined' && Notification.permission === 'denied') {\n Object.defineProperty(Notification, 'permission', { get: function() { return 'default'; }, configurable: true });\n }\n });\n\n // \u2500\u2500 8. navigator.connection (cached so identity check passes) \u2500\u2500\n p(function() {\n if (navigator.connection) return;\n var conn = {\n effectiveType: '4g', rtt: 50, downlink: 10, saveData: false, onchange: null,\n addEventListener: function(){}, removeEventListener: function(){}, dispatchEvent: function(){ return true; },\n };\n Object.defineProperty(navigator, 'connection', { get: function() { return conn; } });\n });\n\n // \u2500\u2500 9. Iframe contentWindow.chrome \u2500\u2500\n // Handled by patch 4 \u2014 chrome object is now on window, propagates to iframes on same origin.\n\n // \u2500\u2500 10. console method toString \u2500\u2500\n p(function() {\n ['log','info','warn','error','debug','table','trace'].forEach(function(n) {\n if (console[n]) {\n console[n].toString = function() { return 'function ' + n + '() { [native code] }'; };\n }\n });\n });\n\n // \u2500\u2500 11. Headless-mode window / screen fixes \u2500\u2500\n p(function() {\n if (window.outerWidth === 0)\n Object.defineProperty(window, 'outerWidth', { get: function() { return window.innerWidth || 1920; } });\n if (window.outerHeight === 0)\n Object.defineProperty(window, 'outerHeight', { get: function() { return (window.innerHeight || 1080) + 85; } });\n });\n\n p(function() {\n if (screen.colorDepth === 0) {\n Object.defineProperty(screen, 'colorDepth', { get: function() { return 24; } });\n Object.defineProperty(screen, 'pixelDepth', { get: function() { return 24; } });\n }\n });\n\n // \u2500\u2500 12. navigator.hardwareConcurrency \u2500\u2500\n p(function() {\n if (!navigator.hardwareConcurrency)\n Object.defineProperty(navigator, 'hardwareConcurrency', { get: function() { return 4; } });\n });\n\n // \u2500\u2500 13. navigator.deviceMemory \u2500\u2500\n p(function() {\n if (!navigator.deviceMemory)\n Object.defineProperty(navigator, 'deviceMemory', { get: function() { return 8; } });\n });\n})()";
|
|
1526
|
+
|
|
1527
|
+
/**
|
|
1528
|
+
* Detect whether the current page is showing an anti-bot challenge.
|
|
1529
|
+
* Returns `null` if no challenge is detected.
|
|
1530
|
+
*/
|
|
1531
|
+
declare function detectChallengeViaPlaywright(opts: {
|
|
1532
|
+
cdpUrl: string;
|
|
1533
|
+
targetId?: string;
|
|
1534
|
+
}): Promise<ChallengeInfo | null>;
|
|
1535
|
+
/**
|
|
1536
|
+
* Wait for an anti-bot challenge to resolve on its own (e.g. Cloudflare JS challenge).
|
|
1537
|
+
*
|
|
1538
|
+
* Returns `{ resolved: true }` if the challenge cleared within the timeout,
|
|
1539
|
+
* or `{ resolved: false, challenge }` with the still-present challenge info.
|
|
1540
|
+
*
|
|
1541
|
+
* For challenges that require human interaction (CAPTCHA), this will time out
|
|
1542
|
+
* unless the user solves the challenge in the visible browser window.
|
|
1543
|
+
*/
|
|
1544
|
+
declare function waitForChallengeViaPlaywright(opts: {
|
|
1545
|
+
cdpUrl: string;
|
|
1546
|
+
targetId?: string;
|
|
1547
|
+
timeoutMs?: number;
|
|
1548
|
+
pollMs?: number;
|
|
1549
|
+
}): Promise<ChallengeWaitResult>;
|
|
1550
|
+
|
|
1551
|
+
export { type AriaNode, type AriaSnapshotResult, type BatchAction, type BatchActionResult, BrowserClaw, type BrowserNavigationPolicyOptions, type BrowserNavigationRequestLike, type BrowserTab, BrowserTabNotFoundError, type ChallengeInfo, type ChallengeKind, type ChallengeWaitResult, type ChromeExecutable, type ChromeKind, type ClickOptions, type ColorScheme, type ConnectOptions, type ConsoleMessage, type ContextState, type CookieData, CrawlPage, type DialogOptions, type DownloadResult, type FormField, type FrameEvalResult, type GeolocationOptions, type HttpCredentials, InvalidBrowserNavigationUrlError, type LaunchOptions, type LookupFn, type NetworkRequest, type PageError, type PinnedHostname, type ResponseBodyResult, type RoleRefInfo, type RoleRefs, STEALTH_SCRIPT, type ScreenshotOptions, type SnapshotOptions, type SnapshotResult, type SnapshotStats, type SsrfPolicy, type StorageKind, type TraceStartOptions, type TypeOptions, type UntrustedContentMeta, type WaitOptions, assertBrowserNavigationAllowed, assertBrowserNavigationRedirectChainAllowed, assertBrowserNavigationResultAllowed, assertSafeUploadPaths, batchViaPlaywright, createPinnedLookup, detectChallengeViaPlaywright, ensureContextState, executeSingleAction, forceDisconnectPlaywrightForTarget, getChromeWebSocketUrl, getRestoredPageForTarget, isChromeCdpReady, isChromeReachable, normalizeCdpHttpBaseForJsonEndpoints, parseRoleRef, requireRef, requireRefOrSelector, requiresInspectableBrowserNavigationRedirects, resolveBoundedDelayMs, resolveInteractionTimeoutMs, resolvePageByTargetIdOrThrow, resolvePinnedHostnameWithPolicy, resolveStrictExistingUploadPaths, sanitizeUntrustedFileName, waitForChallengeViaPlaywright, withBrowserNavigationPolicy, withPageScopedCdpClient, withPlaywrightPageCdpSession, writeViaSiblingTempPath };
|