@trusty-squire/mcp 0.8.15 → 0.8.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bot/agent.d.ts +42 -3
- package/dist/bot/agent.d.ts.map +1 -1
- package/dist/bot/agent.js +2423 -272
- package/dist/bot/agent.js.map +1 -1
- package/dist/bot/browser.d.ts +31 -3
- package/dist/bot/browser.d.ts.map +1 -1
- package/dist/bot/browser.js +872 -113
- package/dist/bot/browser.js.map +1 -1
- package/dist/bot/captcha-solver-2captcha.d.ts +12 -0
- package/dist/bot/captcha-solver-2captcha.d.ts.map +1 -1
- package/dist/bot/captcha-solver-2captcha.js +28 -5
- package/dist/bot/captcha-solver-2captcha.js.map +1 -1
- package/dist/bot/google-login.d.ts.map +1 -1
- package/dist/bot/google-login.js +39 -0
- package/dist/bot/google-login.js.map +1 -1
- package/dist/bot/index.d.ts +1 -1
- package/dist/bot/index.d.ts.map +1 -1
- package/dist/bot/oauth-providers.d.ts.map +1 -1
- package/dist/bot/oauth-providers.js +13 -3
- package/dist/bot/oauth-providers.js.map +1 -1
- package/dist/bot/promote-to-skill.d.ts +2 -1
- package/dist/bot/promote-to-skill.d.ts.map +1 -1
- package/dist/bot/promote-to-skill.js +26 -0
- package/dist/bot/promote-to-skill.js.map +1 -1
- package/dist/bot/replay-skill.d.ts.map +1 -1
- package/dist/bot/replay-skill.js +237 -32
- package/dist/bot/replay-skill.js.map +1 -1
- package/dist/bot/xvfb.d.ts.map +1 -1
- package/dist/bot/xvfb.js +8 -3
- package/dist/bot/xvfb.js.map +1 -1
- package/dist/install/cli.d.ts +5 -0
- package/dist/install/cli.d.ts.map +1 -1
- package/dist/install/cli.js +33 -8
- package/dist/install/cli.js.map +1 -1
- package/dist/tools/signup-telemetry.d.ts +2 -2
- package/dist/tools/signup-telemetry.d.ts.map +1 -1
- package/dist/tools/signup-telemetry.js.map +1 -1
- package/package.json +2 -1
package/dist/bot/browser.js
CHANGED
|
@@ -31,10 +31,11 @@ import { startXvfb, xvfbAvailable } from "./xvfb.js";
|
|
|
31
31
|
// the CJS modules lazily (the stealth toolchain only ships CJS) and treat
|
|
32
32
|
// stealth as best-effort — a missing dep should never crash the bot.
|
|
33
33
|
const require = createRequire(import.meta.url);
|
|
34
|
-
// Whether the operator asked for the CDP-hardened launcher
|
|
35
|
-
//
|
|
36
|
-
//
|
|
37
|
-
//
|
|
34
|
+
// Whether the operator asked for the CDP-hardened launcher (patchright,
|
|
35
|
+
// which runs evaluations in an isolated world and removes the automation
|
|
36
|
+
// tells — mainWorldExecution, navigator.webdriver — that Turnstile /
|
|
37
|
+
// reCAPTCHA-v3 score on). Flag-gated so it can be A/B'd against the
|
|
38
|
+
// stealth baseline — see docs/DESIGN-antibot-hardening.md.
|
|
38
39
|
function cdpHardeningRequested() {
|
|
39
40
|
const v = process.env.BOT_CDP_HARDENED;
|
|
40
41
|
return v === "1" || v === "true" || v === "on";
|
|
@@ -43,7 +44,7 @@ let cachedChromium = null;
|
|
|
43
44
|
// The stealth profile the cached launcher actually represents. Set the
|
|
44
45
|
// first time getChromium() resolves a launcher and read back via
|
|
45
46
|
// BrowserController.stealthProfile for the CaptchaEvent A/B tag. A
|
|
46
|
-
//
|
|
47
|
+
// patchright load failure degrades it to "baseline" truthfully rather
|
|
47
48
|
// than over-claiming "cdp_hardened" on a run that never got the patch.
|
|
48
49
|
let activeStealthProfile = "baseline";
|
|
49
50
|
function activeStealthProfileValue() {
|
|
@@ -54,39 +55,31 @@ function getChromium() {
|
|
|
54
55
|
return cachedChromium;
|
|
55
56
|
const hardened = cdpHardeningRequested();
|
|
56
57
|
try {
|
|
57
|
-
const { addExtra } = require("playwright-extra");
|
|
58
|
-
const stealth = require("puppeteer-extra-plugin-stealth");
|
|
59
|
-
let baseLauncher = baseChromium;
|
|
60
58
|
if (hardened) {
|
|
61
|
-
//
|
|
62
|
-
//
|
|
63
|
-
//
|
|
64
|
-
//
|
|
65
|
-
//
|
|
66
|
-
//
|
|
67
|
-
//
|
|
68
|
-
//
|
|
69
|
-
//
|
|
70
|
-
//
|
|
71
|
-
//
|
|
72
|
-
//
|
|
73
|
-
//
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
}
|
|
77
|
-
const rebrowser = require("rebrowser-playwright-core");
|
|
78
|
-
baseLauncher = rebrowser.chromium;
|
|
59
|
+
// patchright — a maintained Playwright fork that runs every
|
|
60
|
+
// evaluation in an ISOLATED world (so the bot's DOM probing is
|
|
61
|
+
// invisible to a page that traps DOM methods → closes the
|
|
62
|
+
// `mainWorldExecution` tell) and handles `navigator.webdriver`
|
|
63
|
+
// natively + correctly. Verified ALL-GREEN against the maintained
|
|
64
|
+
// rebrowser bot-detector (mainWorldExecution, navigatorWebdriver,
|
|
65
|
+
// viewport, runtimeEnableLeak all clean). It drives real Chrome
|
|
66
|
+
// (channel) directly — the earlier rebrowser fork couldn't, which is
|
|
67
|
+
// why the old hardened arm was forced onto bundled chromium and then
|
|
68
|
+
// crashed the OAuth flow. NO playwright-extra/stealth wrap here: the
|
|
69
|
+
// stealth plugin's manual `navigator.webdriver` defineProperty
|
|
70
|
+
// RE-ADDS a detectable property (proven counterproductive) — patchright
|
|
71
|
+
// does it right. See docs/DESIGN-antibot-hardening.md.
|
|
72
|
+
const patchright = require("patchright");
|
|
73
|
+
cachedChromium = patchright.chromium;
|
|
79
74
|
activeStealthProfile = "cdp_hardened";
|
|
75
|
+
return cachedChromium;
|
|
80
76
|
}
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
// same stealth wrap (Codex review: a bare import swap would NOT
|
|
88
|
-
// repoint the stealth-wrapped launcher — it must go through addExtra).
|
|
89
|
-
const extra = addExtra(baseLauncher);
|
|
77
|
+
// Baseline: playwright-extra + stealth (unchanged). addExtra(baseChromium)
|
|
78
|
+
// is exactly what playwright-extra's default `chromium` export already is.
|
|
79
|
+
const { addExtra } = require("playwright-extra");
|
|
80
|
+
const stealth = require("puppeteer-extra-plugin-stealth");
|
|
81
|
+
activeStealthProfile = "baseline";
|
|
82
|
+
const extra = addExtra(baseChromium);
|
|
90
83
|
extra.use(stealth());
|
|
91
84
|
cachedChromium = extra;
|
|
92
85
|
}
|
|
@@ -100,6 +93,31 @@ function getChromium() {
|
|
|
100
93
|
}
|
|
101
94
|
return cachedChromium;
|
|
102
95
|
}
|
|
96
|
+
// Map a cookie jar to the OAuth providers that have a LIVE logged-in session.
|
|
97
|
+
// The auth cookies that mean "signed in": GitHub → `user_session`; Google →
|
|
98
|
+
// any of the *SID session cookies (NID / CONSENT / 1P_JAR are set even when
|
|
99
|
+
// logged out, so they are deliberately NOT signals). Host-scoped so a
|
|
100
|
+
// google.com cookie can't pass for github. Cookie NAMES + presence only;
|
|
101
|
+
// values are checked for non-triviality, never logged. Exported for tests.
|
|
102
|
+
export function sessionProvidersFromCookies(cookies) {
|
|
103
|
+
const SIGNATURES = [
|
|
104
|
+
{ provider: "github", host: /(^|\.)github\.com$/i, names: ["user_session"] },
|
|
105
|
+
{
|
|
106
|
+
provider: "google",
|
|
107
|
+
host: /(^|\.)google\.com$/i,
|
|
108
|
+
names: ["SID", "__Secure-1PSID", "__Secure-3PSID"],
|
|
109
|
+
},
|
|
110
|
+
];
|
|
111
|
+
const live = [];
|
|
112
|
+
for (const sig of SIGNATURES) {
|
|
113
|
+
const present = cookies.some((c) => sig.host.test(c.domain.replace(/^\./, "")) &&
|
|
114
|
+
sig.names.includes(c.name) &&
|
|
115
|
+
c.value.length > 10);
|
|
116
|
+
if (present)
|
|
117
|
+
live.push(sig.provider);
|
|
118
|
+
}
|
|
119
|
+
return live;
|
|
120
|
+
}
|
|
103
121
|
function isCaptchaVariant(v) {
|
|
104
122
|
return (v === "turnstile" ||
|
|
105
123
|
v === "recaptcha_v2" ||
|
|
@@ -194,6 +212,49 @@ async function detectChromiumChannel() {
|
|
|
194
212
|
}
|
|
195
213
|
return null;
|
|
196
214
|
}
|
|
215
|
+
// Classify an anti-bot interstitial page from its (title + body) text.
|
|
216
|
+
// `onInterstitial` matches the static Cloudflare/Turnstile challenge copy.
|
|
217
|
+
// `verificationPassed` is the signal the challenge SUCCEEDED — but
|
|
218
|
+
// Cloudflare leaves the static "Just a moment / Performing security
|
|
219
|
+
// verification" copy ON THE PAGE even after it appends "Verification
|
|
220
|
+
// successful. Waiting for…", so `onInterstitial` alone wrongly reads as
|
|
221
|
+
// "still blocked" and the bot bails as anti_bot_blocked — exactly what
|
|
222
|
+
// stranded codesandbox/lambda-labs once patchright started PASSING the
|
|
223
|
+
// challenge. When the challenge passed, the redirect is just racing/
|
|
224
|
+
// stuck; the caller should be patient + reload, not give up. Exported
|
|
225
|
+
// for unit tests.
|
|
226
|
+
export function classifyInterstitialText(text) {
|
|
227
|
+
const onInterstitial = /just a moment|performing security verification|verifying you are human|checking your browser|attention required/i.test(text);
|
|
228
|
+
const verificationPassed = /verification successful|you are (now )?verified|success!|challenge[- ]?(passed|complete)/i.test(text);
|
|
229
|
+
return { onInterstitial, verificationPassed };
|
|
230
|
+
}
|
|
231
|
+
// After a Cloudflare managed challenge PASSES, the cf_clearance cookie is
|
|
232
|
+
// set but the URL still carries Cloudflare's single-use challenge token
|
|
233
|
+
// (`__cf_chl_rt_tk`, `__cf_chl_tk`, `__cf_chl_f_tk`, …). Cloudflare's own
|
|
234
|
+
// client-side redirect to the cleared page can stall — especially over a
|
|
235
|
+
// high-latency residential tunnel, where the meta-refresh/JS hop never
|
|
236
|
+
// fires inside our wait budget. Re-navigating to the SAME url with those
|
|
237
|
+
// one-shot tokens stripped serves the real page directly (the clearance
|
|
238
|
+
// cookie now satisfies the edge), instead of waiting on the stuck redirect.
|
|
239
|
+
// Returns the cleaned URL, or null when there's no challenge token to strip
|
|
240
|
+
// (nothing this can do better than a plain reload). Exported for unit tests.
|
|
241
|
+
export function stripCloudflareChallengeParams(rawUrl) {
|
|
242
|
+
let u;
|
|
243
|
+
try {
|
|
244
|
+
u = new URL(rawUrl);
|
|
245
|
+
}
|
|
246
|
+
catch {
|
|
247
|
+
return null;
|
|
248
|
+
}
|
|
249
|
+
let changed = false;
|
|
250
|
+
for (const key of [...u.searchParams.keys()]) {
|
|
251
|
+
if (key.toLowerCase().startsWith("__cf_chl")) {
|
|
252
|
+
u.searchParams.delete(key);
|
|
253
|
+
changed = true;
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
return changed ? u.toString() : null;
|
|
257
|
+
}
|
|
197
258
|
export class BrowserController {
|
|
198
259
|
// The persistent browser context. Persistent (launchPersistentContext)
|
|
199
260
|
// rather than an ephemeral context so the profile carries the user's
|
|
@@ -259,8 +320,8 @@ export class BrowserController {
|
|
|
259
320
|
return this.proxyServer;
|
|
260
321
|
}
|
|
261
322
|
// The stealth profile the most recent .start() launched under:
|
|
262
|
-
// "cdp_hardened" when the
|
|
263
|
-
// (BOT_CDP_HARDENED set +
|
|
323
|
+
// "cdp_hardened" when the patchright launcher actually loaded
|
|
324
|
+
// (BOT_CDP_HARDENED set + patchright present), else "baseline". Surfaced
|
|
264
325
|
// for the CaptchaEvent A/B tag. Throws before .start() — same reason
|
|
265
326
|
// as channel/proxied.
|
|
266
327
|
get stealthProfile() {
|
|
@@ -367,30 +428,19 @@ export class BrowserController {
|
|
|
367
428
|
// decide on executablePath below.
|
|
368
429
|
const launcher = getChromium();
|
|
369
430
|
const hardened = activeStealthProfileValue() === "cdp_hardened";
|
|
370
|
-
//
|
|
371
|
-
//
|
|
372
|
-
//
|
|
373
|
-
//
|
|
374
|
-
//
|
|
375
|
-
// the
|
|
376
|
-
|
|
377
|
-
// executablePath are mutually exclusive in Playwright, so we drop the
|
|
378
|
-
// channel here. (This makes the hardened arm use bundled chromium vs
|
|
379
|
-
// the baseline's real Chrome — a known A/B confound, documented in
|
|
380
|
-
// DESIGN-antibot-hardening.md.)
|
|
381
|
-
const hardenedExecutablePath = hardened ? baseChromium.executablePath() : null;
|
|
382
|
-
const effectiveChannel = hardened ? null : channel;
|
|
383
|
-
// Keep telemetry honest: report what actually launched.
|
|
384
|
-
this.launchedChannel = effectiveChannel;
|
|
431
|
+
// Both launchers drive real Chrome via `channel`: baseline through
|
|
432
|
+
// playwright+stealth, hardened through patchright. patchright closes
|
|
433
|
+
// the automation tells at the protocol layer and drives real Chrome
|
|
434
|
+
// directly — so it no longer needs the bundled-chromium pin the old
|
|
435
|
+
// rebrowser fork required (the pin is what crashed the OAuth flow and
|
|
436
|
+
// confounded the A/B). One binary for both arms.
|
|
437
|
+
this.launchedChannel = channel;
|
|
385
438
|
const context = await launchWithProfileGate(this.profileDir, () => launcher.launchPersistentContext(this.profileDir, {
|
|
386
439
|
headless: chromeHeadless,
|
|
387
440
|
...(chromeEnv !== undefined ? { env: chromeEnv } : {}),
|
|
388
441
|
// `channel:` selects a real installed browser over the bundled
|
|
389
|
-
// binary
|
|
390
|
-
...(
|
|
391
|
-
...(hardenedExecutablePath !== null
|
|
392
|
-
? { executablePath: hardenedExecutablePath }
|
|
393
|
-
: {}),
|
|
442
|
+
// binary (omitted when channel detection found nothing).
|
|
443
|
+
...(channel !== null ? { channel } : {}),
|
|
394
444
|
// `proxy:` routes egress through a residential proxy — only for
|
|
395
445
|
// datacenter-class egress (see resolveProxy()).
|
|
396
446
|
...(proxy !== null ? { proxy } : {}),
|
|
@@ -398,9 +448,33 @@ export class BrowserController {
|
|
|
398
448
|
"--disable-blink-features=AutomationControlled",
|
|
399
449
|
"--no-sandbox",
|
|
400
450
|
"--disable-dev-shm-usage",
|
|
451
|
+
// Enable software WebGL on the GPU-less Xvfb host. Without this,
|
|
452
|
+
// Chrome 120+ disables WebGL entirely (getContext("webgl") → null),
|
|
453
|
+
// which MEASURED (2026-06-04) as the bot's one real fingerprint gap:
|
|
454
|
+
// a browser with NO WebGL is itself an anti-bot tell (reCAPTCHA
|
|
455
|
+
// Enterprise / device-fingerprinting weight it). SwiftShader gives a
|
|
456
|
+
// real WebGL context. MEASURED 2026-06-04: with this on, WebGL reports
|
|
457
|
+
// a Mesa/llvmpipe software renderer and the reCAPTCHA v3 score stays
|
|
458
|
+
// 1.0 — a strict improvement over "no WebGL at all", which more
|
|
459
|
+
// fingerprint libs treat as suspicious than a software renderer. The
|
|
460
|
+
// rc.33 init-script below TRIES to spoof the renderer string to a real
|
|
461
|
+
// Intel GPU, but it is INERT under patchright (hardened) — see its
|
|
462
|
+
// comment. A clean GPU-string spoof under patchright needs binary-level
|
|
463
|
+
// support; tracked as a follow-up, not blocking (score is already 1.0).
|
|
464
|
+
"--enable-unsafe-swiftshader",
|
|
465
|
+
"--ignore-gpu-blocklist",
|
|
401
466
|
],
|
|
402
|
-
viewport:
|
|
403
|
-
|
|
467
|
+
// `viewport: null` makes the page use the REAL OS window size
|
|
468
|
+
// instead of a hardcoded value. The old fixed 1280×720 is exactly
|
|
469
|
+
// Playwright's device-emulation default and is flagged by anti-bot
|
|
470
|
+
// detectors as "default Playwright viewport"; the real window
|
|
471
|
+
// (sized by the Xvfb display) reads as an ordinary browser.
|
|
472
|
+
viewport: null,
|
|
473
|
+
// No `userAgent` override: a real Chrome (channel) supplies a UA
|
|
474
|
+
// that AGREES with navigator.userAgentData + the binary version.
|
|
475
|
+
// The old hardcoded "Chrome/131" string mismatched the actual
|
|
476
|
+
// binary (148) — a UA-vs-userAgentData inconsistency that is itself
|
|
477
|
+
// a fingerprint tell. Let the browser report its own coherent UA.
|
|
404
478
|
// locale stays en-US deliberately: matching it to the proxy
|
|
405
479
|
// country would render signup pages in that language, and the
|
|
406
480
|
// Claude vision form-planner expects English.
|
|
@@ -424,33 +498,58 @@ export class BrowserController {
|
|
|
424
498
|
...(geo?.geolocation !== undefined ? { geolocation: geo.geolocation } : {}),
|
|
425
499
|
}));
|
|
426
500
|
this.context = context;
|
|
427
|
-
// Patch
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
//
|
|
432
|
-
//
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
//
|
|
439
|
-
//
|
|
440
|
-
//
|
|
441
|
-
//
|
|
442
|
-
|
|
501
|
+
// Patch navigator.webdriver — BASELINE ONLY. Measured against the
|
|
502
|
+
// rebrowser bot-detector, this manual `defineProperty` is
|
|
503
|
+
// COUNTERPRODUCTIVE under patchright: it re-adds `webdriver` as an own
|
|
504
|
+
// property the detector then flags, whereas patchright removes it
|
|
505
|
+
// correctly at the source. So in hardened mode we leave it to
|
|
506
|
+
// patchright; only the stealth baseline gets the manual patch.
|
|
507
|
+
if (!hardened) {
|
|
508
|
+
await context.addInitScript(() => {
|
|
509
|
+
Object.defineProperty(navigator, "webdriver", { get: () => undefined });
|
|
510
|
+
});
|
|
511
|
+
}
|
|
512
|
+
// rc.33 / 2026-06-04 — spoof the WebGL UNMASKED vendor+renderer toward a
|
|
513
|
+
// stock Intel GPU, so the software Mesa/llvmpipe string (--enable-unsafe-
|
|
514
|
+
// swiftshader gives us a context, but llvmpipe is itself a VM/headless
|
|
515
|
+
// tell) doesn't read through. Applied TWO ways because patchright
|
|
516
|
+
// (hardened) isolates document-start scripts from the page's main world:
|
|
517
|
+
// • addInitScript — document-start; the effective path in the stealth
|
|
518
|
+
// BASELINE (non-patchright).
|
|
519
|
+
// • re-applied via page.evaluate on every navigation — the ONLY path that
|
|
520
|
+
// reaches the MAIN world under patchright. MEASURED 2026-06-04:
|
|
521
|
+
// addInitScript AND raw CDP Page.addScriptToEvaluateOnNewDocument both
|
|
522
|
+
// land in patchright's isolated world (renderer stayed llvmpipe);
|
|
523
|
+
// page.evaluate does not (renderer became Intel), and the v3 score held
|
|
524
|
+
// at 1.0. Idempotent via a marker so the per-nav re-apply is cheap, and
|
|
525
|
+
// getParameter.toString() is masked to the original native source so
|
|
526
|
+
// the patch itself isn't a tell. Only strings change, not rendering.
|
|
527
|
+
const installWebglSpoof = () => {
|
|
443
528
|
const VENDOR_WEBGL = 0x9245; // UNMASKED_VENDOR_WEBGL
|
|
444
529
|
const RENDERER_WEBGL = 0x9246; // UNMASKED_RENDERER_WEBGL
|
|
445
530
|
const spoof = (proto) => {
|
|
531
|
+
// The marker lives on the prototype so re-application is a no-op; the
|
|
532
|
+
// cast is the one typed-alternative-exhausted spot (adding an ad-hoc
|
|
533
|
+
// brand to a DOM prototype).
|
|
534
|
+
const marked = proto;
|
|
535
|
+
if (marked.__tsWebglPatched === true)
|
|
536
|
+
return;
|
|
446
537
|
const orig = proto.getParameter;
|
|
538
|
+
const native = orig.toString();
|
|
447
539
|
proto.getParameter = function (p) {
|
|
448
540
|
if (p === VENDOR_WEBGL)
|
|
449
|
-
return "
|
|
450
|
-
if (p === RENDERER_WEBGL)
|
|
451
|
-
return "Intel(R) UHD Graphics 620";
|
|
541
|
+
return "Google Inc. (Intel)";
|
|
542
|
+
if (p === RENDERER_WEBGL) {
|
|
543
|
+
return "ANGLE (Intel, Mesa Intel(R) UHD Graphics 620 (KBL GT2), OpenGL 4.6)";
|
|
544
|
+
}
|
|
452
545
|
return orig.call(this, p);
|
|
453
546
|
};
|
|
547
|
+
Object.defineProperty(proto.getParameter, "toString", {
|
|
548
|
+
value: () => native,
|
|
549
|
+
configurable: true,
|
|
550
|
+
writable: true,
|
|
551
|
+
});
|
|
552
|
+
marked.__tsWebglPatched = true;
|
|
454
553
|
};
|
|
455
554
|
if (typeof WebGLRenderingContext !== "undefined") {
|
|
456
555
|
spoof(WebGLRenderingContext.prototype);
|
|
@@ -458,8 +557,26 @@ export class BrowserController {
|
|
|
458
557
|
if (typeof WebGL2RenderingContext !== "undefined") {
|
|
459
558
|
spoof(WebGL2RenderingContext.prototype);
|
|
460
559
|
}
|
|
461
|
-
}
|
|
560
|
+
};
|
|
561
|
+
await context.addInitScript(installWebglSpoof);
|
|
462
562
|
this.page = context.pages()[0] ?? (await context.newPage());
|
|
563
|
+
// Re-apply on every navigation — the main-world reach patchright's isolated
|
|
564
|
+
// init world denies us. framenavigated fires at navigation-commit (before
|
|
565
|
+
// most page JS), so a late WebGL query (reCAPTCHA scores seconds in) sees
|
|
566
|
+
// the spoofed strings; a document-start fingerprinter could still race it.
|
|
567
|
+
const reapplyWebglSpoof = () => {
|
|
568
|
+
const pg = this.page;
|
|
569
|
+
if (pg === null)
|
|
570
|
+
return;
|
|
571
|
+
void pg.evaluate(installWebglSpoof).catch(() => {
|
|
572
|
+
// mid-navigation / closed page — the next navigation re-applies.
|
|
573
|
+
});
|
|
574
|
+
};
|
|
575
|
+
this.page.on("framenavigated", (frame) => {
|
|
576
|
+
if (this.page !== null && frame === this.page.mainFrame())
|
|
577
|
+
reapplyWebglSpoof();
|
|
578
|
+
});
|
|
579
|
+
this.page.on("load", reapplyWebglSpoof);
|
|
463
580
|
// rc.33 — captcha tracing. When UNIVERSAL_BOT_CAPTCHA_TRACE=1 is
|
|
464
581
|
// set, log every response from Cloudflare/Google's challenge
|
|
465
582
|
// endpoints plus any console message that mentions captcha-y
|
|
@@ -878,6 +995,87 @@ export class BrowserController {
|
|
|
878
995
|
await this.page.check(selector, { force: true });
|
|
879
996
|
}
|
|
880
997
|
}
|
|
998
|
+
// Deterministic pre-submit guard: tick every visible, unchecked,
|
|
999
|
+
// non-disabled REQUIRED-AGREEMENT checkbox (terms/privacy/consent),
|
|
1000
|
+
// while never touching marketing/newsletter opt-ins.
|
|
1001
|
+
//
|
|
1002
|
+
// Why this exists separate from the LLM planner: amplitude's signup
|
|
1003
|
+
// has a required TOS checkbox the planner skipped (it read the
|
|
1004
|
+
// adjacent data-storage card-radios as the whole cluster being
|
|
1005
|
+
// "ambiguous radios"), and amplitude does NOT disable submit when the
|
|
1006
|
+
// box is unticked — so the click silently no-ops and the bot then
|
|
1007
|
+
// waits forever for a verification mail that never sends. This runs on
|
|
1008
|
+
// EVERY submit, not only the `submit_disabled` path in clickSubmit().
|
|
1009
|
+
//
|
|
1010
|
+
// Returns the labels/testids it checked (for step logging); empty when
|
|
1011
|
+
// it ticked nothing.
|
|
1012
|
+
async checkRequiredAgreementBoxes() {
|
|
1013
|
+
if (!this.page)
|
|
1014
|
+
throw new Error("Browser not started");
|
|
1015
|
+
// Best-effort: a page-eval failure (navigation mid-call, detached
|
|
1016
|
+
// frame) must never fail the parent submit — return nothing.
|
|
1017
|
+
try {
|
|
1018
|
+
return await this.page.evaluate(() => {
|
|
1019
|
+
// These two regexes MUST stay byte-identical with
|
|
1020
|
+
// AGREEMENT_TEXT_RE / MARKETING_TEXT_RE in this module — the
|
|
1021
|
+
// page realm can't import, so they're inlined here.
|
|
1022
|
+
const agreementRe = /terms|tos\b|privacy|consent|policy|i agree|agree to|acknowledge|gdpr/i;
|
|
1023
|
+
const marketingRe = /newsletter|updates|offers|product tips|marketing|promotional|receive emails|opt[- ]?in to|subscribe/i;
|
|
1024
|
+
const checked = [];
|
|
1025
|
+
const boxes = Array.from(document.querySelectorAll('input[type="checkbox"]'));
|
|
1026
|
+
for (const box of boxes) {
|
|
1027
|
+
if (box.checked || box.disabled)
|
|
1028
|
+
continue;
|
|
1029
|
+
const rect = box.getBoundingClientRect();
|
|
1030
|
+
if (rect.width <= 0 || rect.height <= 0)
|
|
1031
|
+
continue;
|
|
1032
|
+
// Associated text = attributes + a label[for=id] + nearest
|
|
1033
|
+
// ancestor <label> + the immediately following sibling text.
|
|
1034
|
+
const parts = [
|
|
1035
|
+
box.getAttribute("data-testid") ?? "",
|
|
1036
|
+
box.getAttribute("name") ?? "",
|
|
1037
|
+
box.id,
|
|
1038
|
+
box.getAttribute("aria-label") ?? "",
|
|
1039
|
+
];
|
|
1040
|
+
if (box.id) {
|
|
1041
|
+
const forLabel = document.querySelector(`label[for="${CSS.escape(box.id)}"]`);
|
|
1042
|
+
if (forLabel)
|
|
1043
|
+
parts.push(forLabel.textContent ?? "");
|
|
1044
|
+
}
|
|
1045
|
+
const ancestorLabel = box.closest("label");
|
|
1046
|
+
if (ancestorLabel)
|
|
1047
|
+
parts.push(ancestorLabel.textContent ?? "");
|
|
1048
|
+
const sibling = box.nextSibling;
|
|
1049
|
+
if (sibling && sibling.textContent)
|
|
1050
|
+
parts.push(sibling.textContent);
|
|
1051
|
+
if (box.nextElementSibling) {
|
|
1052
|
+
parts.push(box.nextElementSibling.textContent ?? "");
|
|
1053
|
+
}
|
|
1054
|
+
const text = parts.join(" ");
|
|
1055
|
+
if (!agreementRe.test(text) || marketingRe.test(text))
|
|
1056
|
+
continue;
|
|
1057
|
+
// React/Vue controlled inputs ignore a bare `.checked = true`:
|
|
1058
|
+
// their state lives in the framework, updated only by the real
|
|
1059
|
+
// event flow. Set the property AND dispatch input/change AND a
|
|
1060
|
+
// synthetic click so the controlled binding observes the flip.
|
|
1061
|
+
box.checked = true;
|
|
1062
|
+
box.dispatchEvent(new Event("input", { bubbles: true }));
|
|
1063
|
+
box.dispatchEvent(new Event("change", { bubbles: true }));
|
|
1064
|
+
box.click();
|
|
1065
|
+
const label = box.getAttribute("data-testid") ||
|
|
1066
|
+
box.getAttribute("name") ||
|
|
1067
|
+
box.id ||
|
|
1068
|
+
box.getAttribute("aria-label") ||
|
|
1069
|
+
"agreement-checkbox";
|
|
1070
|
+
checked.push(label);
|
|
1071
|
+
}
|
|
1072
|
+
return checked;
|
|
1073
|
+
});
|
|
1074
|
+
}
|
|
1075
|
+
catch {
|
|
1076
|
+
return [];
|
|
1077
|
+
}
|
|
1078
|
+
}
|
|
881
1079
|
// Scroll a Terms-of-Service style modal to the bottom so the gated
|
|
882
1080
|
// "Accept" button enables. Railway's signup is the canonical case:
|
|
883
1081
|
// a modal with a virtualized ToS list watches real `scroll` /
|
|
@@ -1754,6 +1952,11 @@ export class BrowserController {
|
|
|
1754
1952
|
const recaptcha = document.querySelector('textarea[name="g-recaptcha-response"]');
|
|
1755
1953
|
if (recaptcha !== null && recaptcha.value.length > 0)
|
|
1756
1954
|
return true;
|
|
1955
|
+
// hCaptcha populates its own response textarea on a passed
|
|
1956
|
+
// checkbox (plausible). Same shape as reCAPTCHA's.
|
|
1957
|
+
const hcaptcha = document.querySelector('textarea[name="h-captcha-response"]');
|
|
1958
|
+
if (hcaptcha !== null && hcaptcha.value.length > 0)
|
|
1959
|
+
return true;
|
|
1757
1960
|
// Some Turnstile installs use a managed mode that emits its
|
|
1758
1961
|
// own attribute on the host div when solved.
|
|
1759
1962
|
const cfManaged = document.querySelector(".cf-turnstile[data-state='success']");
|
|
@@ -1791,6 +1994,34 @@ export class BrowserController {
|
|
|
1791
1994
|
async findCaptchaWidget() {
|
|
1792
1995
|
if (!this.page)
|
|
1793
1996
|
throw new Error("Browser not started");
|
|
1997
|
+
// An INVISIBLE reCAPTCHA (api2/anchor with size=invisible — the
|
|
1998
|
+
// bottom-right badge) is score-mode: there is no checkbox to click, and
|
|
1999
|
+
// its token is emitted only when the form's submit handler calls
|
|
2000
|
+
// grecaptcha.execute(). It must NOT be treated as a solvable visible
|
|
2001
|
+
// widget. MEASURED on amplitude (2026-06-04): the badge iframe is
|
|
2002
|
+
// ~256×60, so it cleared the size filter below and got "found" + clicked;
|
|
2003
|
+
// the pre-submit token-poll then timed out and the bot escalated to
|
|
2004
|
+
// 2Captcha, which can't solve a score-mode widget (ERROR_CAPTCHA_
|
|
2005
|
+
// UNSOLVABLE) → captcha_blocked — even though our v3 score is ~1.0 and a
|
|
2006
|
+
// plain form-submit would have passed silently. Detect "invisible-only"
|
|
2007
|
+
// (badge present, no visible checkbox anchor, no rendered bframe grid) and
|
|
2008
|
+
// skip reCAPTCHA entirely so the signup proceeds to submit.
|
|
2009
|
+
const recaptchaInvisibleOnly = await this.page
|
|
2010
|
+
.evaluate(() => {
|
|
2011
|
+
const q = (s) => document.querySelector(s) !== null;
|
|
2012
|
+
const visibleAnchor = Array.from(document.querySelectorAll('iframe[src*="recaptcha/api2/anchor"]')).some((f) => !/size=invisible/.test(f.src));
|
|
2013
|
+
const bframe = (() => {
|
|
2014
|
+
const f = document.querySelector('iframe[src*="recaptcha/api2/bframe"]');
|
|
2015
|
+
if (f === null)
|
|
2016
|
+
return false;
|
|
2017
|
+
const r = f.getBoundingClientRect();
|
|
2018
|
+
return r.width > 30 && r.height > 30;
|
|
2019
|
+
})();
|
|
2020
|
+
const invisiblePresent = q('iframe[src*="recaptcha/api2/anchor"][src*="size=invisible"]') ||
|
|
2021
|
+
q(".grecaptcha-badge");
|
|
2022
|
+
return invisiblePresent && !visibleAnchor && !bframe;
|
|
2023
|
+
})
|
|
2024
|
+
.catch(() => false);
|
|
1794
2025
|
// Phase 1: widget shape with polling. page.locator (unlike the
|
|
1795
2026
|
// querySelector in detectCaptchaVariant) pierces OPEN shadow roots,
|
|
1796
2027
|
// so the Cloudflare iframe is reachable even on modern shadow-DOM
|
|
@@ -1803,11 +2034,19 @@ export class BrowserController {
|
|
|
1803
2034
|
// reCAPTCHA v2: src contains "recaptcha/api2"
|
|
1804
2035
|
const iframeCandidates = [
|
|
1805
2036
|
{ kind: "turnstile", selector: 'iframe[src*="challenges.cloudflare.com"]' },
|
|
1806
|
-
|
|
2037
|
+
// Visible reCAPTCHA only — the size=invisible anchor (score-mode badge)
|
|
2038
|
+
// is handled by the recaptchaInvisibleOnly skip above.
|
|
2039
|
+
{ kind: "recaptcha", selector: 'iframe[src*="recaptcha/api2/anchor"]:not([src*="size=invisible"])' },
|
|
2040
|
+
// hCaptcha's checkbox iframe (the anchor frame). Plausible and other
|
|
2041
|
+
// hCaptcha sites render this; clicking it ticks the box the same way
|
|
2042
|
+
// Turnstile/reCAPTCHA do.
|
|
2043
|
+
{ kind: "hcaptcha", selector: 'iframe[src*="hcaptcha.com"][src*="frame=checkbox"]' },
|
|
2044
|
+
{ kind: "hcaptcha", selector: 'iframe[src*="newassets.hcaptcha.com"]' },
|
|
1807
2045
|
// Host-div fallbacks (light DOM) — preferred order keeps the iframe
|
|
1808
2046
|
// first when present (more precise click target).
|
|
1809
2047
|
{ kind: "turnstile", selector: ".cf-turnstile" },
|
|
1810
2048
|
{ kind: "turnstile", selector: "#clerk-captcha" },
|
|
2049
|
+
{ kind: "hcaptcha", selector: ".h-captcha" },
|
|
1811
2050
|
];
|
|
1812
2051
|
const iframeDeadline = Date.now() + 5000;
|
|
1813
2052
|
while (Date.now() < iframeDeadline) {
|
|
@@ -1834,8 +2073,14 @@ export class BrowserController {
|
|
|
1834
2073
|
const hostCandidates = [
|
|
1835
2074
|
{ kind: "turnstile", selector: 'input[name="cf-turnstile-response"]' },
|
|
1836
2075
|
{ kind: "recaptcha", selector: 'textarea[name="g-recaptcha-response"]' },
|
|
2076
|
+
{ kind: "hcaptcha", selector: 'textarea[name="h-captcha-response"]' },
|
|
1837
2077
|
];
|
|
1838
2078
|
for (const { kind, selector } of hostCandidates) {
|
|
2079
|
+
// The invisible reCAPTCHA's hidden g-recaptcha-response textarea lives
|
|
2080
|
+
// INSIDE the .grecaptcha-badge (~256×60), so the walk-up below would
|
|
2081
|
+
// return the badge box and we'd click it — the exact bug. Skip it.
|
|
2082
|
+
if (kind === "recaptcha" && recaptchaInvisibleOnly)
|
|
2083
|
+
continue;
|
|
1839
2084
|
const locator = this.page.locator(selector);
|
|
1840
2085
|
const count = await locator.count();
|
|
1841
2086
|
if (count === 0)
|
|
@@ -1912,11 +2157,14 @@ export class BrowserController {
|
|
|
1912
2157
|
else if (present('iframe[src*="hcaptcha.com"]')) {
|
|
1913
2158
|
variant = "hcaptcha";
|
|
1914
2159
|
}
|
|
1915
|
-
else if (present('iframe[src*="recaptcha/api2/anchor"]')) {
|
|
2160
|
+
else if (present('iframe[src*="recaptcha/api2/anchor"]:not([src*="size=invisible"])')) {
|
|
2161
|
+
// VISIBLE checkbox anchor (size=normal) → clickable v2.
|
|
1916
2162
|
variant = "recaptcha_v2";
|
|
1917
2163
|
}
|
|
1918
|
-
else if (present(".grecaptcha-badge")
|
|
1919
|
-
|
|
2164
|
+
else if (present(".grecaptcha-badge") ||
|
|
2165
|
+
present('iframe[src*="recaptcha/api2/anchor"][src*="size=invisible"]')) {
|
|
2166
|
+
// Badge / size=invisible anchor and no clickable checkbox →
|
|
2167
|
+
// score-mode reCAPTCHA (passes on submit, nothing to click).
|
|
1920
2168
|
variant = "recaptcha_v3";
|
|
1921
2169
|
}
|
|
1922
2170
|
return { variant, challengeRendered };
|
|
@@ -1937,25 +2185,37 @@ export class BrowserController {
|
|
|
1937
2185
|
// help). Reads from the standard places sites declare it:
|
|
1938
2186
|
// 1. <div class="g-recaptcha" data-sitekey="...">
|
|
1939
2187
|
// 2. <iframe src="...?k=SITEKEY&..."> (api2/anchor frame)
|
|
1940
|
-
//
|
|
2188
|
+
//
|
|
2189
|
+
// CRITICAL: only ever returns a GENUINE reCAPTCHA key. hCaptcha
|
|
2190
|
+
// (`.h-captcha`) and Turnstile (`.cf-turnstile`) ALSO publish a
|
|
2191
|
+
// `data-sitekey` attribute, so a bare `[data-sitekey]` selector
|
|
2192
|
+
// grabs the wrong provider's key and the caller ships it to
|
|
2193
|
+
// 2Captcha's `userrecaptcha` endpoint → ERROR_WRONG_GOOGLEKEY (the
|
|
2194
|
+
// plausible/hCaptcha case). The authoritative discriminator is the
|
|
2195
|
+
// key FORMAT: reCAPTCHA public keys always start with `6L`; hCaptcha
|
|
2196
|
+
// keys are UUIDs (`bc609205-…`); Turnstile keys start with `0x`. We
|
|
2197
|
+
// both scope the selector away from the other widgets AND gate on
|
|
2198
|
+
// the `6L` prefix, so no non-reCAPTCHA key can ever leak through.
|
|
1941
2199
|
async extractRecaptchaSitekey() {
|
|
1942
2200
|
if (!this.page)
|
|
1943
2201
|
throw new Error("Browser not started");
|
|
1944
2202
|
try {
|
|
1945
2203
|
const sitekey = await this.page.evaluate(() => {
|
|
1946
|
-
|
|
1947
|
-
|
|
1948
|
-
|
|
1949
|
-
|
|
1950
|
-
|
|
2204
|
+
const isRecaptchaKey = (k) => k !== null && /^6L/.test(k) && k.length > 30;
|
|
2205
|
+
// 1. data-sitekey, but NOT on an hCaptcha/Turnstile widget (or
|
|
2206
|
+
// nested inside one). Those publish data-sitekey too.
|
|
2207
|
+
const anchors = Array.from(document.querySelectorAll("[data-sitekey]")).filter((el) => el.closest(".h-captcha, .cf-turnstile") === null);
|
|
2208
|
+
for (const el of anchors) {
|
|
2209
|
+
const k = el.getAttribute("data-sitekey");
|
|
2210
|
+
if (isRecaptchaKey(k))
|
|
1951
2211
|
return k;
|
|
1952
2212
|
}
|
|
1953
|
-
// 2. The api2 iframe src carries ?k=SITEKEY.
|
|
2213
|
+
// 2. The api2/enterprise iframe src carries ?k=SITEKEY.
|
|
1954
2214
|
const iframes = Array.from(document.querySelectorAll('iframe[src*="recaptcha/api2"], iframe[src*="recaptcha/enterprise"]'));
|
|
1955
2215
|
for (const ifr of iframes) {
|
|
1956
2216
|
const url = new URL(ifr.src);
|
|
1957
2217
|
const k = url.searchParams.get("k");
|
|
1958
|
-
if (k
|
|
2218
|
+
if (isRecaptchaKey(k))
|
|
1959
2219
|
return k;
|
|
1960
2220
|
}
|
|
1961
2221
|
return null;
|
|
@@ -2029,6 +2289,161 @@ export class BrowserController {
|
|
|
2029
2289
|
return false;
|
|
2030
2290
|
}
|
|
2031
2291
|
}
|
|
2292
|
+
// Mint the score token for an INVISIBLE reCAPTCHA by calling
|
|
2293
|
+
// grecaptcha.execute() ourselves, then wait for g-recaptcha-response to
|
|
2294
|
+
// populate. MEASURED on amplitude (2026-06-04): an invisible reCAPTCHA's
|
|
2295
|
+
// token only exists once execute() runs, and amplitude's form REQUIRES it —
|
|
2296
|
+
// merely skipping the badge (not clicking it) left the textarea empty and
|
|
2297
|
+
// the submit silently no-op'd. With our ~1.0 v3 score, execute() returns a
|
|
2298
|
+
// passing token in ~1-3s, so the subsequent submit carries a valid token.
|
|
2299
|
+
// Handles both standard (grecaptcha) and enterprise (grecaptcha.enterprise)
|
|
2300
|
+
// namespaces. Returns true once a token is present. Best-effort: a missing
|
|
2301
|
+
// grecaptcha or an execute() throw resolves false (the form may still mint
|
|
2302
|
+
// it on its own submit handler).
|
|
2303
|
+
async triggerInvisibleRecaptcha(timeoutMs = 9000) {
|
|
2304
|
+
if (!this.page)
|
|
2305
|
+
throw new Error("Browser not started");
|
|
2306
|
+
const tokenPresent = () => this.page.evaluate(() => {
|
|
2307
|
+
const ta = document.querySelector('textarea[name="g-recaptcha-response"], textarea[id^="g-recaptcha-response"]');
|
|
2308
|
+
return ta !== null && ta.value.length > 0;
|
|
2309
|
+
}).catch(() => false);
|
|
2310
|
+
if (await tokenPresent())
|
|
2311
|
+
return true;
|
|
2312
|
+
const fired = await this.page
|
|
2313
|
+
.evaluate(() => {
|
|
2314
|
+
const w = window;
|
|
2315
|
+
const g = w.grecaptcha;
|
|
2316
|
+
if (g === undefined)
|
|
2317
|
+
return false;
|
|
2318
|
+
let any = false;
|
|
2319
|
+
const ids = (() => {
|
|
2320
|
+
try {
|
|
2321
|
+
return Object.keys(w.___grecaptcha_cfg?.clients ?? {});
|
|
2322
|
+
}
|
|
2323
|
+
catch {
|
|
2324
|
+
return [];
|
|
2325
|
+
}
|
|
2326
|
+
})();
|
|
2327
|
+
for (const id of ids) {
|
|
2328
|
+
const n = Number(id);
|
|
2329
|
+
if (!Number.isFinite(n))
|
|
2330
|
+
continue;
|
|
2331
|
+
try {
|
|
2332
|
+
g.enterprise?.execute?.(n);
|
|
2333
|
+
any = true;
|
|
2334
|
+
}
|
|
2335
|
+
catch {
|
|
2336
|
+
/* not this namespace */
|
|
2337
|
+
}
|
|
2338
|
+
try {
|
|
2339
|
+
g.execute?.(n);
|
|
2340
|
+
any = true;
|
|
2341
|
+
}
|
|
2342
|
+
catch {
|
|
2343
|
+
/* widget already executed / wrong namespace */
|
|
2344
|
+
}
|
|
2345
|
+
}
|
|
2346
|
+
// Fallback: no enumerable clients — try the bare (first-widget) call,
|
|
2347
|
+
// enterprise first (a v2-invisible page exposes plain execute()).
|
|
2348
|
+
if (!any) {
|
|
2349
|
+
try {
|
|
2350
|
+
if (typeof g.enterprise?.execute === "function") {
|
|
2351
|
+
g.enterprise.execute();
|
|
2352
|
+
any = true;
|
|
2353
|
+
}
|
|
2354
|
+
else if (typeof g.execute === "function") {
|
|
2355
|
+
g.execute();
|
|
2356
|
+
any = true;
|
|
2357
|
+
}
|
|
2358
|
+
}
|
|
2359
|
+
catch {
|
|
2360
|
+
return false;
|
|
2361
|
+
}
|
|
2362
|
+
}
|
|
2363
|
+
return any;
|
|
2364
|
+
})
|
|
2365
|
+
.catch(() => false);
|
|
2366
|
+
if (!fired)
|
|
2367
|
+
return false;
|
|
2368
|
+
const start = Date.now();
|
|
2369
|
+
while (Date.now() - start < timeoutMs) {
|
|
2370
|
+
await this.sleep(500);
|
|
2371
|
+
if (await tokenPresent())
|
|
2372
|
+
return true;
|
|
2373
|
+
}
|
|
2374
|
+
return false;
|
|
2375
|
+
}
|
|
2376
|
+
// Tier 3 hCaptcha support — extract the hCaptcha sitekey so 2Captcha
|
|
2377
|
+
// can solve it. hCaptcha publishes its key on `.h-captcha[data-sitekey]`
|
|
2378
|
+
// or in the checkbox iframe's `?sitekey=` query. Keys are UUIDs (the
|
|
2379
|
+
// reCAPTCHA `6L` guard in extractRecaptchaSitekey deliberately rejects
|
|
2380
|
+
// them, which is why hCaptcha needs its own extractor). Returns null
|
|
2381
|
+
// when no hCaptcha widget is present.
|
|
2382
|
+
async extractHcaptchaSitekey() {
|
|
2383
|
+
if (!this.page)
|
|
2384
|
+
throw new Error("Browser not started");
|
|
2385
|
+
try {
|
|
2386
|
+
return await this.page.evaluate(() => {
|
|
2387
|
+
const div = document.querySelector(".h-captcha[data-sitekey], [data-hcaptcha-sitekey]");
|
|
2388
|
+
if (div !== null) {
|
|
2389
|
+
const k = div.getAttribute("data-sitekey") ??
|
|
2390
|
+
div.getAttribute("data-hcaptcha-sitekey");
|
|
2391
|
+
if (k !== null && k.length > 10)
|
|
2392
|
+
return k;
|
|
2393
|
+
}
|
|
2394
|
+
const iframe = document.querySelector('iframe[src*="hcaptcha.com"]');
|
|
2395
|
+
if (iframe !== null) {
|
|
2396
|
+
const k = new URL(iframe.src).searchParams.get("sitekey");
|
|
2397
|
+
if (k !== null && k.length > 10)
|
|
2398
|
+
return k;
|
|
2399
|
+
}
|
|
2400
|
+
return null;
|
|
2401
|
+
});
|
|
2402
|
+
}
|
|
2403
|
+
catch {
|
|
2404
|
+
return null;
|
|
2405
|
+
}
|
|
2406
|
+
}
|
|
2407
|
+
// Inject a 2Captcha-resolved hCaptcha token into the page's
|
|
2408
|
+
// h-captcha-response textarea(s) and fire the widget's data-callback
|
|
2409
|
+
// if the page registered one. Mirrors injectRecaptchaToken; hCaptcha
|
|
2410
|
+
// also mirrors the response token into a g-recaptcha-response textarea
|
|
2411
|
+
// on some compat installs, so populate both names if present.
|
|
2412
|
+
async injectHcaptchaToken(token) {
|
|
2413
|
+
if (!this.page)
|
|
2414
|
+
throw new Error("Browser not started");
|
|
2415
|
+
try {
|
|
2416
|
+
return await this.page.evaluate((tok) => {
|
|
2417
|
+
const inputs = Array.from(document.querySelectorAll('textarea[name="h-captcha-response"], textarea[id^="h-captcha-response"], textarea[name="g-recaptcha-response"]'));
|
|
2418
|
+
if (inputs.length === 0)
|
|
2419
|
+
return false;
|
|
2420
|
+
for (const input of inputs) {
|
|
2421
|
+
input.value = tok;
|
|
2422
|
+
input.dispatchEvent(new Event("input", { bubbles: true }));
|
|
2423
|
+
input.dispatchEvent(new Event("change", { bubbles: true }));
|
|
2424
|
+
}
|
|
2425
|
+
// Fire the data-callback the page registered on the .h-captcha
|
|
2426
|
+
// host (hCaptcha calls it by name on window). Best-effort — the
|
|
2427
|
+
// populated textarea is what server-side validation reads.
|
|
2428
|
+
try {
|
|
2429
|
+
const host = document.querySelector(".h-captcha[data-callback]");
|
|
2430
|
+
const name = host?.getAttribute("data-callback");
|
|
2431
|
+
if (name !== null && name !== undefined) {
|
|
2432
|
+
const fn = window[name];
|
|
2433
|
+
if (typeof fn === "function")
|
|
2434
|
+
fn(tok);
|
|
2435
|
+
}
|
|
2436
|
+
}
|
|
2437
|
+
catch {
|
|
2438
|
+
// no named callback — DOM injection stands.
|
|
2439
|
+
}
|
|
2440
|
+
return true;
|
|
2441
|
+
}, token);
|
|
2442
|
+
}
|
|
2443
|
+
catch {
|
|
2444
|
+
return false;
|
|
2445
|
+
}
|
|
2446
|
+
}
|
|
2032
2447
|
// Small mouse wiggle near the current position. Used during prewarm
|
|
2033
2448
|
// so the page sees pointer events before we navigate away.
|
|
2034
2449
|
async jitterMouse() {
|
|
@@ -2810,35 +3225,110 @@ export class BrowserController {
|
|
|
2810
3225
|
async waitForAntiBotInterstitialToClear(timeoutMs) {
|
|
2811
3226
|
if (!this.page)
|
|
2812
3227
|
return;
|
|
2813
|
-
|
|
2814
|
-
|
|
2815
|
-
|
|
2816
|
-
|
|
3228
|
+
const first = await this.pollUntilInterstitialClears(timeoutMs);
|
|
3229
|
+
// Never saw an interstitial, or saw one and it cleared on its own —
|
|
3230
|
+
// nothing more to do.
|
|
3231
|
+
if (!first.detected || first.cleared)
|
|
2817
3232
|
return;
|
|
3233
|
+
// Still on the interstitial at the deadline. If Cloudflare reported
|
|
3234
|
+
// the challenge PASSED ("Verification successful"), the redirect is
|
|
3235
|
+
// just racing/stuck — be patient through ANOTHER full window before
|
|
3236
|
+
// touching anything (a reload mid-redirect can re-arm the challenge).
|
|
3237
|
+
if (first.verificationPassed) {
|
|
3238
|
+
const patient = await this.pollUntilInterstitialClears(timeoutMs);
|
|
3239
|
+
if (patient.cleared)
|
|
3240
|
+
return;
|
|
3241
|
+
// "Verification successful" but the page never advances is the
|
|
3242
|
+
// signature of a STALE cf_clearance cookie — issued on a prior visit
|
|
3243
|
+
// (often a different egress IP), which CF matches ("successful") but
|
|
3244
|
+
// the origin then rejects, looping forever on "Waiting for the page
|
|
3245
|
+
// to load." MEASURED: a clean profile clears codesandbox's challenge
|
|
3246
|
+
// in ~12s; the stale cookie is what stalls the shared profile. Drop
|
|
3247
|
+
// the CF cookies to force a FRESH challenge, then reload.
|
|
3248
|
+
if (await this.clearCloudflareCookiesAndRetry(timeoutMs))
|
|
3249
|
+
return;
|
|
3250
|
+
// Or the auto-redirect simply stalled with a still-valid clearance —
|
|
3251
|
+
// re-navigate past the one-shot challenge token.
|
|
3252
|
+
if (await this.forceNavigatePastClearedChallenge())
|
|
3253
|
+
return;
|
|
2818
3254
|
}
|
|
2819
|
-
//
|
|
2820
|
-
//
|
|
2821
|
-
//
|
|
2822
|
-
//
|
|
2823
|
-
//
|
|
2824
|
-
//
|
|
2825
|
-
// block — fingerprint/IP — reload won't help, but the caller's
|
|
2826
|
-
// inventory diagnostic will still surface the block.)
|
|
3255
|
+
// Force the real page: now that the cf_clearance cookie is set, a
|
|
3256
|
+
// reload often renders it. domcontentloaded (not networkidle) — the
|
|
3257
|
+
// real page is usually a heavy SPA that never reaches networkidle, so
|
|
3258
|
+
// waiting for it just burns the budget back into a timeout. (If it's a
|
|
3259
|
+
// server-side risk-score block — fingerprint/IP — reload won't help,
|
|
3260
|
+
// but the caller's inventory diagnostic will still surface the block.)
|
|
2827
3261
|
try {
|
|
2828
|
-
await this.page.reload({ waitUntil: "
|
|
3262
|
+
await this.page.reload({ waitUntil: "domcontentloaded", timeout: 15_000 });
|
|
2829
3263
|
}
|
|
2830
3264
|
catch {
|
|
2831
3265
|
// reload failed — proceed with what's there
|
|
2832
3266
|
}
|
|
2833
3267
|
await this.pollUntilInterstitialClears(Math.max(5000, timeoutMs / 2));
|
|
2834
3268
|
}
|
|
2835
|
-
//
|
|
2836
|
-
//
|
|
2837
|
-
|
|
3269
|
+
// Drop Cloudflare's anti-bot cookies (cf_clearance + __cf_bm) so the next
|
|
3270
|
+
// request triggers a FRESH managed challenge, then reload and wait for it
|
|
3271
|
+
// to clear. Scoped to cookie NAME — only CF's own cookies are removed, so
|
|
3272
|
+
// an OAuth provider's session on accounts.google.com / github.com is
|
|
3273
|
+
// untouched. A fresh challenge on a residential IP clears in ~12-15s, so
|
|
3274
|
+
// we give it a generous window. Returns true if the interstitial is gone.
|
|
3275
|
+
async clearCloudflareCookiesAndRetry(timeoutMs) {
|
|
3276
|
+
if (!this.page || !this.context)
|
|
3277
|
+
return false;
|
|
3278
|
+
try {
|
|
3279
|
+
await this.context.clearCookies({ name: "cf_clearance" });
|
|
3280
|
+
await this.context.clearCookies({ name: "__cf_bm" });
|
|
3281
|
+
}
|
|
3282
|
+
catch {
|
|
3283
|
+
// clearCookies filter unsupported / failed — nothing to retry on.
|
|
3284
|
+
return false;
|
|
3285
|
+
}
|
|
3286
|
+
try {
|
|
3287
|
+
await this.page.reload({ waitUntil: "domcontentloaded", timeout: 20_000 });
|
|
3288
|
+
}
|
|
3289
|
+
catch {
|
|
3290
|
+
// reload failed — still give the poll a chance below.
|
|
3291
|
+
}
|
|
3292
|
+
const after = await this.pollUntilInterstitialClears(Math.max(20_000, timeoutMs));
|
|
3293
|
+
return after.cleared || !after.detected;
|
|
3294
|
+
}
|
|
3295
|
+
// With a CONFIRMED Cloudflare pass, re-navigate to the current URL with
|
|
3296
|
+
// the one-shot `__cf_chl_*` challenge token stripped — the cf_clearance
|
|
3297
|
+
// cookie is already set, so the edge serves the real page instead of the
|
|
3298
|
+
// stuck redirect. Returns true if the interstitial is gone afterwards.
|
|
3299
|
+
// Returns false (caller falls back to a plain reload) when there's no
|
|
3300
|
+
// token to strip or the navigation didn't clear the gate.
|
|
3301
|
+
async forceNavigatePastClearedChallenge() {
|
|
2838
3302
|
if (!this.page)
|
|
2839
3303
|
return false;
|
|
3304
|
+
const cleaned = stripCloudflareChallengeParams(this.page.url());
|
|
3305
|
+
if (!cleaned)
|
|
3306
|
+
return false;
|
|
3307
|
+
try {
|
|
3308
|
+
await this.page.goto(cleaned, {
|
|
3309
|
+
waitUntil: "domcontentloaded",
|
|
3310
|
+
timeout: 15_000,
|
|
3311
|
+
});
|
|
3312
|
+
}
|
|
3313
|
+
catch {
|
|
3314
|
+
return false;
|
|
3315
|
+
}
|
|
3316
|
+
const after = await this.pollUntilInterstitialClears(Math.max(5000, 8000));
|
|
3317
|
+
// cleared = saw it then it went away; !detected = the real page rendered
|
|
3318
|
+
// immediately (no interstitial on the post-nav page at all).
|
|
3319
|
+
return after.cleared || !after.detected;
|
|
3320
|
+
}
|
|
3321
|
+
// One poll loop. `detected` = an interstitial was observed at least
|
|
3322
|
+
// once; `cleared` = it was observed AND then went away (vs. still there
|
|
3323
|
+
// at the deadline); `verificationPassed` = Cloudflare reported the
|
|
3324
|
+
// challenge succeeded at some point during the wait (see
|
|
3325
|
+
// classifyInterstitialText).
|
|
3326
|
+
async pollUntilInterstitialClears(timeoutMs) {
|
|
3327
|
+
if (!this.page)
|
|
3328
|
+
return { detected: false, cleared: false, verificationPassed: false };
|
|
2840
3329
|
const deadline = Date.now() + timeoutMs;
|
|
2841
3330
|
let detected = false;
|
|
3331
|
+
let verificationPassed = false;
|
|
2842
3332
|
while (Date.now() < deadline) {
|
|
2843
3333
|
let title = "";
|
|
2844
3334
|
let bodyText = "";
|
|
@@ -2850,19 +3340,21 @@ export class BrowserController {
|
|
|
2850
3340
|
await new Promise((r) => setTimeout(r, 500));
|
|
2851
3341
|
continue;
|
|
2852
3342
|
}
|
|
2853
|
-
const
|
|
2854
|
-
if (
|
|
3343
|
+
const c = classifyInterstitialText(title + " " + bodyText);
|
|
3344
|
+
if (c.verificationPassed)
|
|
3345
|
+
verificationPassed = true;
|
|
3346
|
+
if (!c.onInterstitial) {
|
|
2855
3347
|
if (detected) {
|
|
2856
3348
|
// Give the freshly-revealed page a tick to hydrate before
|
|
2857
3349
|
// the inventory scan.
|
|
2858
3350
|
await new Promise((r) => setTimeout(r, 800));
|
|
2859
3351
|
}
|
|
2860
|
-
return detected;
|
|
3352
|
+
return { detected, cleared: detected, verificationPassed };
|
|
2861
3353
|
}
|
|
2862
3354
|
detected = true;
|
|
2863
3355
|
await new Promise((r) => setTimeout(r, 1000));
|
|
2864
3356
|
}
|
|
2865
|
-
return detected;
|
|
3357
|
+
return { detected, cleared: false, verificationPassed };
|
|
2866
3358
|
}
|
|
2867
3359
|
// Walk the live DOM (piercing open shadow roots) and return every
|
|
2868
3360
|
// visible interactive element with a bot-computed selector (F3 T1).
|
|
@@ -2903,7 +3395,16 @@ export class BrowserController {
|
|
|
2903
3395
|
// whole inventory with "Cannot read properties of undefined
|
|
2904
3396
|
// (reading 'querySelectorAll')", failing the run before the
|
|
2905
3397
|
// planner ever saw the page. Skip such a node instead.
|
|
2906
|
-
|
|
3398
|
+
//
|
|
3399
|
+
// `== null` (not `=== null`) is load-bearing: `el.shadowRoot` is
|
|
3400
|
+
// typed `ShadowRoot | null`, but a detached/closed custom element
|
|
3401
|
+
// can yield `undefined` at runtime. The recursion below calls
|
|
3402
|
+
// `walk(el.shadowRoot)` whenever it isn't `null`, so an `undefined`
|
|
3403
|
+
// shadowRoot reaches here and `typeof undefined.querySelectorAll`
|
|
3404
|
+
// THROWS before the typeof guard can fire — exactly the #59
|
|
3405
|
+
// redis-cloud crash, which recurred 2026-06-03 even with the
|
|
3406
|
+
// null-only guard in place. The loose check covers both.
|
|
3407
|
+
if (root == null || typeof root.querySelectorAll !== "function")
|
|
2907
3408
|
return;
|
|
2908
3409
|
root.querySelectorAll(SELECTOR).forEach((n) => collected.push(n));
|
|
2909
3410
|
root.querySelectorAll("*").forEach((el) => {
|
|
@@ -3262,16 +3763,250 @@ export class BrowserController {
|
|
|
3262
3763
|
// best-effort — the agent's consent loop re-reads state regardless
|
|
3263
3764
|
}
|
|
3264
3765
|
}
|
|
3766
|
+
// Does the page sign in with Google via Google Identity Services (GSI)
|
|
3767
|
+
// rather than classic OAuth redirect? GSI renders its button in a
|
|
3768
|
+
// cross-origin iframe (accounts.google.com/gsi/button) and/or exposes the
|
|
3769
|
+
// `google.accounts.id` JS API; on use it raises a browser-native FedCM
|
|
3770
|
+
// dialog or a popup and returns a JWT to a JS callback — there is NO
|
|
3771
|
+
// redirect, so the classic startOAuth flow can't drive it. Detecting this
|
|
3772
|
+
// is what lets the agent route to tryGoogleGsiLogin instead.
|
|
3773
|
+
async hasGoogleGsiAffordance() {
|
|
3774
|
+
if (!this.page)
|
|
3775
|
+
return false;
|
|
3776
|
+
try {
|
|
3777
|
+
return await this.page.evaluate(() => {
|
|
3778
|
+
if (document.querySelector('iframe[src*="accounts.google.com/gsi/"]') !== null) {
|
|
3779
|
+
return true;
|
|
3780
|
+
}
|
|
3781
|
+
// On-demand One-Tap: the page loads the GSI client script but renders
|
|
3782
|
+
// no static button and may not have initialized `google.accounts.id`
|
|
3783
|
+
// yet (amplitude, clerk). A plain click on the in-page "Sign in with
|
|
3784
|
+
// Google" affordance never redirects, so the bot used to falsely
|
|
3785
|
+
// conclude "signed in" and bounce to login. Treat the loaded client
|
|
3786
|
+
// script as a GSI affordance so the agent routes through
|
|
3787
|
+
// tryGoogleGsiLogin, which now raises One-Tap programmatically.
|
|
3788
|
+
if (document.querySelector('script[src*="accounts.google.com/gsi/client"]') !== null) {
|
|
3789
|
+
return true;
|
|
3790
|
+
}
|
|
3791
|
+
const g = window.google;
|
|
3792
|
+
return typeof g?.accounts?.id !== "undefined";
|
|
3793
|
+
});
|
|
3794
|
+
}
|
|
3795
|
+
catch {
|
|
3796
|
+
return false;
|
|
3797
|
+
}
|
|
3798
|
+
}
|
|
3799
|
+
// Drive a Google Identity Services / FedCM sign-in. Two variants are
|
|
3800
|
+
// handled:
|
|
3801
|
+
// - FedCM: clicking the GSI widget raises a browser-NATIVE credential
|
|
3802
|
+
// dialog (no DOM, no popup — invisible to Playwright). We enable the
|
|
3803
|
+
// CDP FedCm domain up front and auto-select the first account when
|
|
3804
|
+
// FedCm.dialogShown fires. The page's JS callback then receives the
|
|
3805
|
+
// JWT and establishes the session.
|
|
3806
|
+
// - Popup: older GSI opens a Google account-chooser window; we adopt it
|
|
3807
|
+
// like startOAuth does so the consent loop can drive it.
|
|
3808
|
+
// Returns how it resolved. The caller then runs the SAME post-OAuth
|
|
3809
|
+
// settle/consent/post-verify path as the redirect flow.
|
|
3810
|
+
async tryGoogleGsiLogin(triggerSelector, timeoutMs = 25_000) {
|
|
3811
|
+
if (!this.page || !this.context)
|
|
3812
|
+
throw new Error("Browser not started");
|
|
3813
|
+
this.oauthProductPage = this.page;
|
|
3814
|
+
let fedcmResolved = false;
|
|
3815
|
+
let cdp = null;
|
|
3816
|
+
try {
|
|
3817
|
+
cdp = await this.context.newCDPSession(this.page);
|
|
3818
|
+
await cdp.send("FedCm.enable", { disableRejectionDelay: true });
|
|
3819
|
+
cdp.on("FedCm.dialogShown", (ev) => {
|
|
3820
|
+
const e = ev;
|
|
3821
|
+
const dialogId = e.dialogId;
|
|
3822
|
+
if (dialogId === undefined)
|
|
3823
|
+
return;
|
|
3824
|
+
void (async () => {
|
|
3825
|
+
// A ConfirmIdpLogin dialog has no account list — it's the "Continue
|
|
3826
|
+
// as / sign in to Google" confirmation that precedes the account
|
|
3827
|
+
// chooser. selectAccount would error on it, so drive the confirm
|
|
3828
|
+
// button directly and skip selectAccount for this dialog type.
|
|
3829
|
+
if (e.dialogType === "ConfirmIdpLogin") {
|
|
3830
|
+
try {
|
|
3831
|
+
await cdp.send("FedCm.clickDialogButton", {
|
|
3832
|
+
dialogId,
|
|
3833
|
+
dialogButton: "ConfirmIdpLoginContinue",
|
|
3834
|
+
});
|
|
3835
|
+
}
|
|
3836
|
+
catch {
|
|
3837
|
+
// method/param may not apply to this build/dialog — non-fatal;
|
|
3838
|
+
// a subsequent AccountChooser dialog still resolves via select.
|
|
3839
|
+
}
|
|
3840
|
+
return;
|
|
3841
|
+
}
|
|
3842
|
+
try {
|
|
3843
|
+
// Pick the first account on the account-chooser dialog.
|
|
3844
|
+
await cdp.send("FedCm.selectAccount", { dialogId, accountIndex: 0 });
|
|
3845
|
+
fedcmResolved = true;
|
|
3846
|
+
}
|
|
3847
|
+
catch {
|
|
3848
|
+
// dialog dismissed or already resolved
|
|
3849
|
+
}
|
|
3850
|
+
if (!fedcmResolved) {
|
|
3851
|
+
// Some flows surface a "Continue as <name>" confirm even on the
|
|
3852
|
+
// account dialog; selectAccount alone usually completes it, but
|
|
3853
|
+
// when it didn't, try the confirm button as a fallback. Failure
|
|
3854
|
+
// is non-fatal — the popup/none path still applies.
|
|
3855
|
+
try {
|
|
3856
|
+
await cdp.send("FedCm.clickDialogButton", {
|
|
3857
|
+
dialogId,
|
|
3858
|
+
dialogButton: "ConfirmIdpLoginContinue",
|
|
3859
|
+
});
|
|
3860
|
+
fedcmResolved = true;
|
|
3861
|
+
}
|
|
3862
|
+
catch {
|
|
3863
|
+
// button absent or not applicable — degrade to popup/none
|
|
3864
|
+
}
|
|
3865
|
+
}
|
|
3866
|
+
})();
|
|
3867
|
+
});
|
|
3868
|
+
}
|
|
3869
|
+
catch {
|
|
3870
|
+
cdp = null; // FedCm domain unavailable — the popup path still works
|
|
3871
|
+
}
|
|
3872
|
+
const popupPromise = this.context
|
|
3873
|
+
.waitForEvent("page", { timeout: timeoutMs })
|
|
3874
|
+
.then((p) => p)
|
|
3875
|
+
.catch(() => null);
|
|
3876
|
+
await this.click(triggerSelector);
|
|
3877
|
+
// On-demand One-Tap: when the page loaded the GSI client but rendered no
|
|
3878
|
+
// static button, the click above hits an in-page affordance that never
|
|
3879
|
+
// raises a dialog on its own. If neither a FedCM dialog nor a popup has
|
|
3880
|
+
// appeared shortly after the click, ask GSI to raise One-Tap itself.
|
|
3881
|
+
// `google.accounts.id.prompt()` triggers the FedCM dialog our handler is
|
|
3882
|
+
// already listening for. Guarded — `window.google.accounts.id` may be
|
|
3883
|
+
// undefined (no-op) and any failure must degrade to the popup/none path.
|
|
3884
|
+
if (cdp !== null) {
|
|
3885
|
+
const promptDeadline = Date.now() + Math.min(4_000, timeoutMs);
|
|
3886
|
+
while (Date.now() < promptDeadline &&
|
|
3887
|
+
!fedcmResolved &&
|
|
3888
|
+
this.context.pages().length <= 1) {
|
|
3889
|
+
await this.sleep(250);
|
|
3890
|
+
}
|
|
3891
|
+
if (!fedcmResolved && this.context.pages().length <= 1) {
|
|
3892
|
+
try {
|
|
3893
|
+
await this.page.evaluate(() => {
|
|
3894
|
+
const g = window.google;
|
|
3895
|
+
const id = g?.accounts?.id;
|
|
3896
|
+
if (id !== undefined && typeof id.prompt === "function") {
|
|
3897
|
+
id.prompt();
|
|
3898
|
+
}
|
|
3899
|
+
});
|
|
3900
|
+
}
|
|
3901
|
+
catch {
|
|
3902
|
+
// GSI not initialized / prompt unavailable — popup/none still apply
|
|
3903
|
+
}
|
|
3904
|
+
}
|
|
3905
|
+
}
|
|
3906
|
+
// Resolve when a popup opens OR FedCM completes OR we hit the deadline.
|
|
3907
|
+
const fedcmWait = (async () => {
|
|
3908
|
+
const deadline = Date.now() + timeoutMs;
|
|
3909
|
+
while (Date.now() < deadline && !fedcmResolved) {
|
|
3910
|
+
await this.sleep(250);
|
|
3911
|
+
}
|
|
3912
|
+
return null;
|
|
3913
|
+
})();
|
|
3914
|
+
const popup = await Promise.race([popupPromise, fedcmWait]);
|
|
3915
|
+
if (cdp !== null) {
|
|
3916
|
+
try {
|
|
3917
|
+
await cdp.send("FedCm.disable");
|
|
3918
|
+
}
|
|
3919
|
+
catch {
|
|
3920
|
+
// best-effort
|
|
3921
|
+
}
|
|
3922
|
+
}
|
|
3923
|
+
if (popup !== null && popup !== this.page && !popup.isClosed()) {
|
|
3924
|
+
this.page = popup;
|
|
3925
|
+
try {
|
|
3926
|
+
await this.page.waitForLoadState("domcontentloaded", { timeout: 15_000 });
|
|
3927
|
+
}
|
|
3928
|
+
catch {
|
|
3929
|
+
// consent loop re-reads regardless
|
|
3930
|
+
}
|
|
3931
|
+
return { ok: true, via: "popup" };
|
|
3932
|
+
}
|
|
3933
|
+
if (fedcmResolved) {
|
|
3934
|
+
// Credential delivered to the page's JS callback — give the app a beat
|
|
3935
|
+
// to exchange it for a session and redirect.
|
|
3936
|
+
try {
|
|
3937
|
+
await this.page.waitForLoadState("domcontentloaded", { timeout: 10_000 });
|
|
3938
|
+
}
|
|
3939
|
+
catch {
|
|
3940
|
+
// best-effort
|
|
3941
|
+
}
|
|
3942
|
+
return { ok: true, via: "fedcm" };
|
|
3943
|
+
}
|
|
3944
|
+
return { ok: false, via: "none" };
|
|
3945
|
+
}
|
|
3265
3946
|
// URL of the active page (the OAuth page mid-handshake, the product
|
|
3266
3947
|
// page otherwise). Cheap — no screenshot, unlike getState().
|
|
3267
3948
|
currentUrl() {
|
|
3268
3949
|
return this.page !== null ? this.page.url() : "";
|
|
3269
3950
|
}
|
|
3951
|
+
// Fetch a URL's final response (following redirects) and return its
|
|
3952
|
+
// status, final URL, and body text — or null on any failure.
|
|
3953
|
+
//
|
|
3954
|
+
// WHY the CONTEXT request API (this.context.request) and not global
|
|
3955
|
+
// fetch / a fresh node http client: the context's APIRequestContext
|
|
3956
|
+
// shares the BrowserContext's proxy + cookie jar, so this egresses
|
|
3957
|
+
// through the SAME residential tunnel the real navigation uses. That
|
|
3958
|
+
// makes a probe here representative of what the browser would actually
|
|
3959
|
+
// land on (same IP reputation, same cf_clearance cookie) — and needs no
|
|
3960
|
+
// separate SOCKS/HTTP-proxy plumbing. Used by the signup-URL resolver to
|
|
3961
|
+
// distinguish a stale /signup that serves a login SPA from the real
|
|
3962
|
+
// signup form, BEFORE committing to a ~6-minute navigation.
|
|
3963
|
+
//
|
|
3964
|
+
// Bounded (15s, ≤10 redirects) and non-throwing — the resolver treats
|
|
3965
|
+
// null as "couldn't tell" and escalates.
|
|
3966
|
+
async fetchText(url) {
|
|
3967
|
+
if (this.context === null)
|
|
3968
|
+
return null;
|
|
3969
|
+
try {
|
|
3970
|
+
const response = await this.context.request.get(url, {
|
|
3971
|
+
maxRedirects: 10,
|
|
3972
|
+
timeout: 15_000,
|
|
3973
|
+
// We inspect 404/redirect bodies ourselves; don't let a non-2xx
|
|
3974
|
+
// throw before we can classify it.
|
|
3975
|
+
failOnStatusCode: false,
|
|
3976
|
+
});
|
|
3977
|
+
return {
|
|
3978
|
+
finalUrl: response.url(),
|
|
3979
|
+
status: response.status(),
|
|
3980
|
+
bodyText: await response.text(),
|
|
3981
|
+
};
|
|
3982
|
+
}
|
|
3983
|
+
catch {
|
|
3984
|
+
return null;
|
|
3985
|
+
}
|
|
3986
|
+
}
|
|
3270
3987
|
// True when the active OAuth page is gone — for the popup flow, the
|
|
3271
3988
|
// popup closing IS the signal the handshake finished.
|
|
3272
3989
|
oauthPageClosed() {
|
|
3273
3990
|
return this.page === null || this.page.isClosed();
|
|
3274
3991
|
}
|
|
3992
|
+
// Which OAuth providers have a LIVE session in this profile's cookie jar.
|
|
3993
|
+
// The logged-in-providers.json marker is a memo that drifts out of sync
|
|
3994
|
+
// (a --force-relogin clears it, a misclassified run clears it, a parallel
|
|
3995
|
+
// run overwrites it) — so a session that is genuinely live in the cookies
|
|
3996
|
+
// can go invisible to provider selection, which is exactly how a warm
|
|
3997
|
+
// GitHub session got skipped in favour of a broken Google path. The cookie
|
|
3998
|
+
// jar is the ground truth: read it directly. Cookie NAMES + presence only;
|
|
3999
|
+
// values are never read into logs. Best-effort — a read failure returns [].
|
|
4000
|
+
async detectSessionProviders() {
|
|
4001
|
+
if (this.context === null)
|
|
4002
|
+
return [];
|
|
4003
|
+
try {
|
|
4004
|
+
return sessionProvidersFromCookies(await this.context.cookies());
|
|
4005
|
+
}
|
|
4006
|
+
catch {
|
|
4007
|
+
return [];
|
|
4008
|
+
}
|
|
4009
|
+
}
|
|
3275
4010
|
// Advance a provider's consent / account-chooser screen by one click
|
|
3276
4011
|
// — the scope-gated auto-approve (T7/T13). Returns false when no
|
|
3277
4012
|
// approve control is present — the agent then aborts rather than
|
|
@@ -3446,7 +4181,11 @@ export function pickClickLocator(locator, count) {
|
|
|
3446
4181
|
export function collectAcrossShadowRoots(root, selector) {
|
|
3447
4182
|
const collected = [];
|
|
3448
4183
|
const walk = (r) => {
|
|
3449
|
-
|
|
4184
|
+
// `== null` (not `=== null`) covers both null and undefined — the
|
|
4185
|
+
// recursion below calls walk() on any non-null shadowRoot, so an
|
|
4186
|
+
// `undefined` one reaches here and `typeof undefined.querySelectorAll`
|
|
4187
|
+
// would throw before the typeof guard fired (#59 redis-cloud).
|
|
4188
|
+
if (r == null || typeof r.querySelectorAll !== "function")
|
|
3450
4189
|
return;
|
|
3451
4190
|
Array.from(r.querySelectorAll(selector)).forEach((n) => collected.push(n));
|
|
3452
4191
|
Array.from(r.querySelectorAll("*")).forEach((el) => {
|
|
@@ -3473,6 +4212,26 @@ export function pickSubmitButtonIndex(texts) {
|
|
|
3473
4212
|
});
|
|
3474
4213
|
return bestIndex;
|
|
3475
4214
|
}
|
|
4215
|
+
// ───────────── required-agreement checkbox guard ─────────────
|
|
4216
|
+
// Patterns shared by the pure helper below and the in-page evaluate in
|
|
4217
|
+
// `checkRequiredAgreementBoxes`. The evaluate runs in the page realm and
|
|
4218
|
+
// can't import, so the same two regexes are inlined there verbatim —
|
|
4219
|
+
// keep them BYTE-IDENTICAL with these.
|
|
4220
|
+
const AGREEMENT_TEXT_RE = /terms|tos\b|privacy|consent|policy|i agree|agree to|acknowledge|gdpr/i;
|
|
4221
|
+
const MARKETING_TEXT_RE = /newsletter|updates|offers|product tips|marketing|promotional|receive emails|opt[- ]?in to|subscribe/i;
|
|
4222
|
+
// True when a checkbox's associated text reads as a REQUIRED agreement
|
|
4223
|
+
// (terms/privacy/consent) and NOT as a marketing/newsletter opt-in.
|
|
4224
|
+
//
|
|
4225
|
+
// Why a deterministic check instead of trusting the LLM planner:
|
|
4226
|
+
// amplitude's signup renders the required TOS checkbox next to a pair of
|
|
4227
|
+
// data-storage-location card-radios; the planner mistook the whole
|
|
4228
|
+
// cluster for "ambiguous radios" and skipped the box, and amplitude's
|
|
4229
|
+
// submit isn't disabled when it's unticked — so the form silently
|
|
4230
|
+
// no-ops. We must never flip a marketing opt-in on the user's behalf,
|
|
4231
|
+
// hence the explicit marketing exclusion.
|
|
4232
|
+
export function isAgreementCheckboxText(text) {
|
|
4233
|
+
return AGREEMENT_TEXT_RE.test(text) && !MARKETING_TEXT_RE.test(text);
|
|
4234
|
+
}
|
|
3476
4235
|
// Parse a UNIVERSAL_BOT_PROXY_URL — e.g. "http://user:pass@host:8080" or
|
|
3477
4236
|
// "socks5://host:1080" — into Playwright's proxy option shape. Playwright
|
|
3478
4237
|
// wants credentials separate from `server`, so we split them out and
|