@trusty-squire/mcp 0.8.15 → 0.8.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/dist/bot/agent.d.ts +42 -3
  2. package/dist/bot/agent.d.ts.map +1 -1
  3. package/dist/bot/agent.js +2423 -272
  4. package/dist/bot/agent.js.map +1 -1
  5. package/dist/bot/browser.d.ts +31 -3
  6. package/dist/bot/browser.d.ts.map +1 -1
  7. package/dist/bot/browser.js +872 -113
  8. package/dist/bot/browser.js.map +1 -1
  9. package/dist/bot/captcha-solver-2captcha.d.ts +12 -0
  10. package/dist/bot/captcha-solver-2captcha.d.ts.map +1 -1
  11. package/dist/bot/captcha-solver-2captcha.js +28 -5
  12. package/dist/bot/captcha-solver-2captcha.js.map +1 -1
  13. package/dist/bot/google-login.d.ts.map +1 -1
  14. package/dist/bot/google-login.js +39 -0
  15. package/dist/bot/google-login.js.map +1 -1
  16. package/dist/bot/index.d.ts +1 -1
  17. package/dist/bot/index.d.ts.map +1 -1
  18. package/dist/bot/oauth-providers.d.ts.map +1 -1
  19. package/dist/bot/oauth-providers.js +13 -3
  20. package/dist/bot/oauth-providers.js.map +1 -1
  21. package/dist/bot/promote-to-skill.d.ts +2 -1
  22. package/dist/bot/promote-to-skill.d.ts.map +1 -1
  23. package/dist/bot/promote-to-skill.js +26 -0
  24. package/dist/bot/promote-to-skill.js.map +1 -1
  25. package/dist/bot/replay-skill.d.ts.map +1 -1
  26. package/dist/bot/replay-skill.js +237 -32
  27. package/dist/bot/replay-skill.js.map +1 -1
  28. package/dist/bot/xvfb.d.ts.map +1 -1
  29. package/dist/bot/xvfb.js +8 -3
  30. package/dist/bot/xvfb.js.map +1 -1
  31. package/dist/install/cli.d.ts +5 -0
  32. package/dist/install/cli.d.ts.map +1 -1
  33. package/dist/install/cli.js +33 -8
  34. package/dist/install/cli.js.map +1 -1
  35. package/dist/tools/signup-telemetry.d.ts +2 -2
  36. package/dist/tools/signup-telemetry.d.ts.map +1 -1
  37. package/dist/tools/signup-telemetry.js.map +1 -1
  38. package/package.json +2 -1
@@ -31,10 +31,11 @@ import { startXvfb, xvfbAvailable } from "./xvfb.js";
31
31
  // the CJS modules lazily (the stealth toolchain only ships CJS) and treat
32
32
  // stealth as best-effort — a missing dep should never crash the bot.
33
33
  const require = createRequire(import.meta.url);
34
- // Whether the operator asked for the CDP-hardened launcher
35
- // (rebrowser-playwright-core, which closes the Runtime.enable leak that
36
- // Turnstile / reCAPTCHA-v3 score on). Flag-gated so it can be A/B'd
37
- // against the baseline launcher see docs/DESIGN-antibot-hardening.md.
34
+ // Whether the operator asked for the CDP-hardened launcher (patchright,
35
+ // which runs evaluations in an isolated world and removes the automation
36
+ // tells mainWorldExecution, navigator.webdriver that Turnstile /
37
+ // reCAPTCHA-v3 score on). Flag-gated so it can be A/B'd against the
38
+ // stealth baseline — see docs/DESIGN-antibot-hardening.md.
38
39
  function cdpHardeningRequested() {
39
40
  const v = process.env.BOT_CDP_HARDENED;
40
41
  return v === "1" || v === "true" || v === "on";
@@ -43,7 +44,7 @@ let cachedChromium = null;
43
44
  // The stealth profile the cached launcher actually represents. Set the
44
45
  // first time getChromium() resolves a launcher and read back via
45
46
  // BrowserController.stealthProfile for the CaptchaEvent A/B tag. A
46
- // rebrowser load failure degrades it to "baseline" truthfully rather
47
+ // patchright load failure degrades it to "baseline" truthfully rather
47
48
  // than over-claiming "cdp_hardened" on a run that never got the patch.
48
49
  let activeStealthProfile = "baseline";
49
50
  function activeStealthProfileValue() {
@@ -54,39 +55,31 @@ function getChromium() {
54
55
  return cachedChromium;
55
56
  const hardened = cdpHardeningRequested();
56
57
  try {
57
- const { addExtra } = require("playwright-extra");
58
- const stealth = require("puppeteer-extra-plugin-stealth");
59
- let baseLauncher = baseChromium;
60
58
  if (hardened) {
61
- // rebrowser-playwright-core defers/isolates the Runtime.enable CDP
62
- // call. Fix mode = `addBinding`, NOT `alwaysIsolated`: the latter
63
- // gives the cleanest leak closure on a static page but CRASHES the
64
- // bot's real flow a live harvest run died with
65
- // "Target page, context or browser has been closed" at the OAuth
66
- // scan, because forcing every page.evaluate into an isolated world
67
- // breaks the prewarm/multi-page juggling. `addBinding` keeps
68
- // main-world evaluate (the flow works) while still avoiding the
69
- // leaky Runtime.enable call it closes the sourceUrl/UtilityScript
70
- // tell but leaves mainWorldExecution detectable. That partial
71
- // closure is the price of a launcher that actually completes
72
- // signups. See DESIGN-antibot-hardening.md D3 (revised). Set BEFORE
73
- // the require so the patch reads it; honor an operator pin.
74
- if (process.env.REBROWSER_PATCHES_RUNTIME_FIX_MODE === undefined) {
75
- process.env.REBROWSER_PATCHES_RUNTIME_FIX_MODE = "addBinding";
76
- }
77
- const rebrowser = require("rebrowser-playwright-core");
78
- baseLauncher = rebrowser.chromium;
59
+ // patchright a maintained Playwright fork that runs every
60
+ // evaluation in an ISOLATED world (so the bot's DOM probing is
61
+ // invisible to a page that traps DOM methods closes the
62
+ // `mainWorldExecution` tell) and handles `navigator.webdriver`
63
+ // natively + correctly. Verified ALL-GREEN against the maintained
64
+ // rebrowser bot-detector (mainWorldExecution, navigatorWebdriver,
65
+ // viewport, runtimeEnableLeak all clean). It drives real Chrome
66
+ // (channel) directly the earlier rebrowser fork couldn't, which is
67
+ // why the old hardened arm was forced onto bundled chromium and then
68
+ // crashed the OAuth flow. NO playwright-extra/stealth wrap here: the
69
+ // stealth plugin's manual `navigator.webdriver` defineProperty
70
+ // RE-ADDS a detectable property (proven counterproductive) patchright
71
+ // does it right. See docs/DESIGN-antibot-hardening.md.
72
+ const patchright = require("patchright");
73
+ cachedChromium = patchright.chromium;
79
74
  activeStealthProfile = "cdp_hardened";
75
+ return cachedChromium;
80
76
  }
81
- else {
82
- activeStealthProfile = "baseline";
83
- }
84
- // addExtra(baseChromium) is exactly what playwright-extra's default
85
- // `chromium` export already is, so the baseline path is unchanged;
86
- // the hardened path swaps in the rebrowser launcher underneath the
87
- // same stealth wrap (Codex review: a bare import swap would NOT
88
- // repoint the stealth-wrapped launcher — it must go through addExtra).
89
- const extra = addExtra(baseLauncher);
77
+ // Baseline: playwright-extra + stealth (unchanged). addExtra(baseChromium)
78
+ // is exactly what playwright-extra's default `chromium` export already is.
79
+ const { addExtra } = require("playwright-extra");
80
+ const stealth = require("puppeteer-extra-plugin-stealth");
81
+ activeStealthProfile = "baseline";
82
+ const extra = addExtra(baseChromium);
90
83
  extra.use(stealth());
91
84
  cachedChromium = extra;
92
85
  }
@@ -100,6 +93,31 @@ function getChromium() {
100
93
  }
101
94
  return cachedChromium;
102
95
  }
96
+ // Map a cookie jar to the OAuth providers that have a LIVE logged-in session.
97
+ // The auth cookies that mean "signed in": GitHub → `user_session`; Google →
98
+ // any of the *SID session cookies (NID / CONSENT / 1P_JAR are set even when
99
+ // logged out, so they are deliberately NOT signals). Host-scoped so a
100
+ // google.com cookie can't pass for github. Cookie NAMES + presence only;
101
+ // values are checked for non-triviality, never logged. Exported for tests.
102
+ export function sessionProvidersFromCookies(cookies) {
103
+ const SIGNATURES = [
104
+ { provider: "github", host: /(^|\.)github\.com$/i, names: ["user_session"] },
105
+ {
106
+ provider: "google",
107
+ host: /(^|\.)google\.com$/i,
108
+ names: ["SID", "__Secure-1PSID", "__Secure-3PSID"],
109
+ },
110
+ ];
111
+ const live = [];
112
+ for (const sig of SIGNATURES) {
113
+ const present = cookies.some((c) => sig.host.test(c.domain.replace(/^\./, "")) &&
114
+ sig.names.includes(c.name) &&
115
+ c.value.length > 10);
116
+ if (present)
117
+ live.push(sig.provider);
118
+ }
119
+ return live;
120
+ }
103
121
  function isCaptchaVariant(v) {
104
122
  return (v === "turnstile" ||
105
123
  v === "recaptcha_v2" ||
@@ -194,6 +212,49 @@ async function detectChromiumChannel() {
194
212
  }
195
213
  return null;
196
214
  }
215
+ // Classify an anti-bot interstitial page from its (title + body) text.
216
+ // `onInterstitial` matches the static Cloudflare/Turnstile challenge copy.
217
+ // `verificationPassed` is the signal the challenge SUCCEEDED — but
218
+ // Cloudflare leaves the static "Just a moment / Performing security
219
+ // verification" copy ON THE PAGE even after it appends "Verification
220
+ // successful. Waiting for…", so `onInterstitial` alone wrongly reads as
221
+ // "still blocked" and the bot bails as anti_bot_blocked — exactly what
222
+ // stranded codesandbox/lambda-labs once patchright started PASSING the
223
+ // challenge. When the challenge passed, the redirect is just racing/
224
+ // stuck; the caller should be patient + reload, not give up. Exported
225
+ // for unit tests.
226
+ export function classifyInterstitialText(text) {
227
+ const onInterstitial = /just a moment|performing security verification|verifying you are human|checking your browser|attention required/i.test(text);
228
+ const verificationPassed = /verification successful|you are (now )?verified|success!|challenge[- ]?(passed|complete)/i.test(text);
229
+ return { onInterstitial, verificationPassed };
230
+ }
231
+ // After a Cloudflare managed challenge PASSES, the cf_clearance cookie is
232
+ // set but the URL still carries Cloudflare's single-use challenge token
233
+ // (`__cf_chl_rt_tk`, `__cf_chl_tk`, `__cf_chl_f_tk`, …). Cloudflare's own
234
+ // client-side redirect to the cleared page can stall — especially over a
235
+ // high-latency residential tunnel, where the meta-refresh/JS hop never
236
+ // fires inside our wait budget. Re-navigating to the SAME url with those
237
+ // one-shot tokens stripped serves the real page directly (the clearance
238
+ // cookie now satisfies the edge), instead of waiting on the stuck redirect.
239
+ // Returns the cleaned URL, or null when there's no challenge token to strip
240
+ // (nothing this can do better than a plain reload). Exported for unit tests.
241
+ export function stripCloudflareChallengeParams(rawUrl) {
242
+ let u;
243
+ try {
244
+ u = new URL(rawUrl);
245
+ }
246
+ catch {
247
+ return null;
248
+ }
249
+ let changed = false;
250
+ for (const key of [...u.searchParams.keys()]) {
251
+ if (key.toLowerCase().startsWith("__cf_chl")) {
252
+ u.searchParams.delete(key);
253
+ changed = true;
254
+ }
255
+ }
256
+ return changed ? u.toString() : null;
257
+ }
197
258
  export class BrowserController {
198
259
  // The persistent browser context. Persistent (launchPersistentContext)
199
260
  // rather than an ephemeral context so the profile carries the user's
@@ -259,8 +320,8 @@ export class BrowserController {
259
320
  return this.proxyServer;
260
321
  }
261
322
  // The stealth profile the most recent .start() launched under:
262
- // "cdp_hardened" when the rebrowser launcher actually loaded
263
- // (BOT_CDP_HARDENED set + fork present), else "baseline". Surfaced
323
+ // "cdp_hardened" when the patchright launcher actually loaded
324
+ // (BOT_CDP_HARDENED set + patchright present), else "baseline". Surfaced
264
325
  // for the CaptchaEvent A/B tag. Throws before .start() — same reason
265
326
  // as channel/proxied.
266
327
  get stealthProfile() {
@@ -367,30 +428,19 @@ export class BrowserController {
367
428
  // decide on executablePath below.
368
429
  const launcher = getChromium();
369
430
  const hardened = activeStealthProfileValue() === "cdp_hardened";
370
- // In hardened mode, ALWAYS drive the bundled (playwright-matched)
371
- // chromium via executablePath and IGNORE the real-Chrome channel.
372
- // rebrowser-playwright-core's older CDP driver cannot reliably drive
373
- // an arbitrary *new* real Chrome (observed: Chrome 148
374
- // "Target page, context or browser has been closed" mid-flow), while
375
- // the spike validated it against the bundled chromium revision. The
376
- // bundled binary is the version the driver expects; channel +
377
- // executablePath are mutually exclusive in Playwright, so we drop the
378
- // channel here. (This makes the hardened arm use bundled chromium vs
379
- // the baseline's real Chrome — a known A/B confound, documented in
380
- // DESIGN-antibot-hardening.md.)
381
- const hardenedExecutablePath = hardened ? baseChromium.executablePath() : null;
382
- const effectiveChannel = hardened ? null : channel;
383
- // Keep telemetry honest: report what actually launched.
384
- this.launchedChannel = effectiveChannel;
431
+ // Both launchers drive real Chrome via `channel`: baseline through
432
+ // playwright+stealth, hardened through patchright. patchright closes
433
+ // the automation tells at the protocol layer and drives real Chrome
434
+ // directly so it no longer needs the bundled-chromium pin the old
435
+ // rebrowser fork required (the pin is what crashed the OAuth flow and
436
+ // confounded the A/B). One binary for both arms.
437
+ this.launchedChannel = channel;
385
438
  const context = await launchWithProfileGate(this.profileDir, () => launcher.launchPersistentContext(this.profileDir, {
386
439
  headless: chromeHeadless,
387
440
  ...(chromeEnv !== undefined ? { env: chromeEnv } : {}),
388
441
  // `channel:` selects a real installed browser over the bundled
389
- // binary; omitted in hardened mode and when null.
390
- ...(effectiveChannel !== null ? { channel: effectiveChannel } : {}),
391
- ...(hardenedExecutablePath !== null
392
- ? { executablePath: hardenedExecutablePath }
393
- : {}),
442
+ // binary (omitted when channel detection found nothing).
443
+ ...(channel !== null ? { channel } : {}),
394
444
  // `proxy:` routes egress through a residential proxy — only for
395
445
  // datacenter-class egress (see resolveProxy()).
396
446
  ...(proxy !== null ? { proxy } : {}),
@@ -398,9 +448,33 @@ export class BrowserController {
398
448
  "--disable-blink-features=AutomationControlled",
399
449
  "--no-sandbox",
400
450
  "--disable-dev-shm-usage",
451
+ // Enable software WebGL on the GPU-less Xvfb host. Without this,
452
+ // Chrome 120+ disables WebGL entirely (getContext("webgl") → null),
453
+ // which MEASURED (2026-06-04) as the bot's one real fingerprint gap:
454
+ // a browser with NO WebGL is itself an anti-bot tell (reCAPTCHA
455
+ // Enterprise / device-fingerprinting weight it). SwiftShader gives a
456
+ // real WebGL context. MEASURED 2026-06-04: with this on, WebGL reports
457
+ // a Mesa/llvmpipe software renderer and the reCAPTCHA v3 score stays
458
+ // 1.0 — a strict improvement over "no WebGL at all", which more
459
+ // fingerprint libs treat as suspicious than a software renderer. The
460
+ // rc.33 init-script below TRIES to spoof the renderer string to a real
461
+ // Intel GPU, but it is INERT under patchright (hardened) — see its
462
+ // comment. A clean GPU-string spoof under patchright needs binary-level
463
+ // support; tracked as a follow-up, not blocking (score is already 1.0).
464
+ "--enable-unsafe-swiftshader",
465
+ "--ignore-gpu-blocklist",
401
466
  ],
402
- viewport: { width: 1280, height: 720 },
403
- userAgent: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
467
+ // `viewport: null` makes the page use the REAL OS window size
468
+ // instead of a hardcoded value. The old fixed 1280×720 is exactly
469
+ // Playwright's device-emulation default and is flagged by anti-bot
470
+ // detectors as "default Playwright viewport"; the real window
471
+ // (sized by the Xvfb display) reads as an ordinary browser.
472
+ viewport: null,
473
+ // No `userAgent` override: a real Chrome (channel) supplies a UA
474
+ // that AGREES with navigator.userAgentData + the binary version.
475
+ // The old hardcoded "Chrome/131" string mismatched the actual
476
+ // binary (148) — a UA-vs-userAgentData inconsistency that is itself
477
+ // a fingerprint tell. Let the browser report its own coherent UA.
404
478
  // locale stays en-US deliberately: matching it to the proxy
405
479
  // country would render signup pages in that language, and the
406
480
  // Claude vision form-planner expects English.
@@ -424,33 +498,58 @@ export class BrowserController {
424
498
  ...(geo?.geolocation !== undefined ? { geolocation: geo.geolocation } : {}),
425
499
  }));
426
500
  this.context = context;
427
- // Patch the navigator.webdriver flag most anti-bot heuristics look here.
428
- await context.addInitScript(() => {
429
- Object.defineProperty(navigator, "webdriver", { get: () => undefined });
430
- });
431
- // rc.33 spoof WebGL renderer/vendor. Under Xvfb (or any non-GPU
432
- // host) Chrome falls back to SwiftShader, which reports
433
- // UNMASKED_VENDOR_WEBGL = "Google Inc. (Google)"
434
- // UNMASKED_RENDERER_WEBGL = "ANGLE (Google, ...SwiftShader...)"
435
- // Both strings are on every published anti-bot fingerprint
436
- // blocklist; Cloudflare Turnstile responds with error 600010
437
- // ("internal client execution error") rather than even trying to
438
- // grade the click. Override the two parameter codes on both
439
- // WebGL1 and WebGL2 prototypes to look like a stock Intel laptop
440
- // GPU. Doesn't change actual rendering only the strings the
441
- // fingerprint probe reads back.
442
- await context.addInitScript(() => {
501
+ // Patch navigator.webdriver — BASELINE ONLY. Measured against the
502
+ // rebrowser bot-detector, this manual `defineProperty` is
503
+ // COUNTERPRODUCTIVE under patchright: it re-adds `webdriver` as an own
504
+ // property the detector then flags, whereas patchright removes it
505
+ // correctly at the source. So in hardened mode we leave it to
506
+ // patchright; only the stealth baseline gets the manual patch.
507
+ if (!hardened) {
508
+ await context.addInitScript(() => {
509
+ Object.defineProperty(navigator, "webdriver", { get: () => undefined });
510
+ });
511
+ }
512
+ // rc.33 / 2026-06-04 spoof the WebGL UNMASKED vendor+renderer toward a
513
+ // stock Intel GPU, so the software Mesa/llvmpipe string (--enable-unsafe-
514
+ // swiftshader gives us a context, but llvmpipe is itself a VM/headless
515
+ // tell) doesn't read through. Applied TWO ways because patchright
516
+ // (hardened) isolates document-start scripts from the page's main world:
517
+ // • addInitScript — document-start; the effective path in the stealth
518
+ // BASELINE (non-patchright).
519
+ // • re-applied via page.evaluate on every navigation — the ONLY path that
520
+ // reaches the MAIN world under patchright. MEASURED 2026-06-04:
521
+ // addInitScript AND raw CDP Page.addScriptToEvaluateOnNewDocument both
522
+ // land in patchright's isolated world (renderer stayed llvmpipe);
523
+ // page.evaluate does not (renderer became Intel), and the v3 score held
524
+ // at 1.0. Idempotent via a marker so the per-nav re-apply is cheap, and
525
+ // getParameter.toString() is masked to the original native source so
526
+ // the patch itself isn't a tell. Only strings change, not rendering.
527
+ const installWebglSpoof = () => {
443
528
  const VENDOR_WEBGL = 0x9245; // UNMASKED_VENDOR_WEBGL
444
529
  const RENDERER_WEBGL = 0x9246; // UNMASKED_RENDERER_WEBGL
445
530
  const spoof = (proto) => {
531
+ // The marker lives on the prototype so re-application is a no-op; the
532
+ // cast is the one typed-alternative-exhausted spot (adding an ad-hoc
533
+ // brand to a DOM prototype).
534
+ const marked = proto;
535
+ if (marked.__tsWebglPatched === true)
536
+ return;
446
537
  const orig = proto.getParameter;
538
+ const native = orig.toString();
447
539
  proto.getParameter = function (p) {
448
540
  if (p === VENDOR_WEBGL)
449
- return "Intel Inc.";
450
- if (p === RENDERER_WEBGL)
451
- return "Intel(R) UHD Graphics 620";
541
+ return "Google Inc. (Intel)";
542
+ if (p === RENDERER_WEBGL) {
543
+ return "ANGLE (Intel, Mesa Intel(R) UHD Graphics 620 (KBL GT2), OpenGL 4.6)";
544
+ }
452
545
  return orig.call(this, p);
453
546
  };
547
+ Object.defineProperty(proto.getParameter, "toString", {
548
+ value: () => native,
549
+ configurable: true,
550
+ writable: true,
551
+ });
552
+ marked.__tsWebglPatched = true;
454
553
  };
455
554
  if (typeof WebGLRenderingContext !== "undefined") {
456
555
  spoof(WebGLRenderingContext.prototype);
@@ -458,8 +557,26 @@ export class BrowserController {
458
557
  if (typeof WebGL2RenderingContext !== "undefined") {
459
558
  spoof(WebGL2RenderingContext.prototype);
460
559
  }
461
- });
560
+ };
561
+ await context.addInitScript(installWebglSpoof);
462
562
  this.page = context.pages()[0] ?? (await context.newPage());
563
+ // Re-apply on every navigation — the main-world reach patchright's isolated
564
+ // init world denies us. framenavigated fires at navigation-commit (before
565
+ // most page JS), so a late WebGL query (reCAPTCHA scores seconds in) sees
566
+ // the spoofed strings; a document-start fingerprinter could still race it.
567
+ const reapplyWebglSpoof = () => {
568
+ const pg = this.page;
569
+ if (pg === null)
570
+ return;
571
+ void pg.evaluate(installWebglSpoof).catch(() => {
572
+ // mid-navigation / closed page — the next navigation re-applies.
573
+ });
574
+ };
575
+ this.page.on("framenavigated", (frame) => {
576
+ if (this.page !== null && frame === this.page.mainFrame())
577
+ reapplyWebglSpoof();
578
+ });
579
+ this.page.on("load", reapplyWebglSpoof);
463
580
  // rc.33 — captcha tracing. When UNIVERSAL_BOT_CAPTCHA_TRACE=1 is
464
581
  // set, log every response from Cloudflare/Google's challenge
465
582
  // endpoints plus any console message that mentions captcha-y
@@ -878,6 +995,87 @@ export class BrowserController {
878
995
  await this.page.check(selector, { force: true });
879
996
  }
880
997
  }
998
+ // Deterministic pre-submit guard: tick every visible, unchecked,
999
+ // non-disabled REQUIRED-AGREEMENT checkbox (terms/privacy/consent),
1000
+ // while never touching marketing/newsletter opt-ins.
1001
+ //
1002
+ // Why this exists separate from the LLM planner: amplitude's signup
1003
+ // has a required TOS checkbox the planner skipped (it read the
1004
+ // adjacent data-storage card-radios as the whole cluster being
1005
+ // "ambiguous radios"), and amplitude does NOT disable submit when the
1006
+ // box is unticked — so the click silently no-ops and the bot then
1007
+ // waits forever for a verification mail that never sends. This runs on
1008
+ // EVERY submit, not only the `submit_disabled` path in clickSubmit().
1009
+ //
1010
+ // Returns the labels/testids it checked (for step logging); empty when
1011
+ // it ticked nothing.
1012
+ async checkRequiredAgreementBoxes() {
1013
+ if (!this.page)
1014
+ throw new Error("Browser not started");
1015
+ // Best-effort: a page-eval failure (navigation mid-call, detached
1016
+ // frame) must never fail the parent submit — return nothing.
1017
+ try {
1018
+ return await this.page.evaluate(() => {
1019
+ // These two regexes MUST stay byte-identical with
1020
+ // AGREEMENT_TEXT_RE / MARKETING_TEXT_RE in this module — the
1021
+ // page realm can't import, so they're inlined here.
1022
+ const agreementRe = /terms|tos\b|privacy|consent|policy|i agree|agree to|acknowledge|gdpr/i;
1023
+ const marketingRe = /newsletter|updates|offers|product tips|marketing|promotional|receive emails|opt[- ]?in to|subscribe/i;
1024
+ const checked = [];
1025
+ const boxes = Array.from(document.querySelectorAll('input[type="checkbox"]'));
1026
+ for (const box of boxes) {
1027
+ if (box.checked || box.disabled)
1028
+ continue;
1029
+ const rect = box.getBoundingClientRect();
1030
+ if (rect.width <= 0 || rect.height <= 0)
1031
+ continue;
1032
+ // Associated text = attributes + a label[for=id] + nearest
1033
+ // ancestor <label> + the immediately following sibling text.
1034
+ const parts = [
1035
+ box.getAttribute("data-testid") ?? "",
1036
+ box.getAttribute("name") ?? "",
1037
+ box.id,
1038
+ box.getAttribute("aria-label") ?? "",
1039
+ ];
1040
+ if (box.id) {
1041
+ const forLabel = document.querySelector(`label[for="${CSS.escape(box.id)}"]`);
1042
+ if (forLabel)
1043
+ parts.push(forLabel.textContent ?? "");
1044
+ }
1045
+ const ancestorLabel = box.closest("label");
1046
+ if (ancestorLabel)
1047
+ parts.push(ancestorLabel.textContent ?? "");
1048
+ const sibling = box.nextSibling;
1049
+ if (sibling && sibling.textContent)
1050
+ parts.push(sibling.textContent);
1051
+ if (box.nextElementSibling) {
1052
+ parts.push(box.nextElementSibling.textContent ?? "");
1053
+ }
1054
+ const text = parts.join(" ");
1055
+ if (!agreementRe.test(text) || marketingRe.test(text))
1056
+ continue;
1057
+ // React/Vue controlled inputs ignore a bare `.checked = true`:
1058
+ // their state lives in the framework, updated only by the real
1059
+ // event flow. Set the property AND dispatch input/change AND a
1060
+ // synthetic click so the controlled binding observes the flip.
1061
+ box.checked = true;
1062
+ box.dispatchEvent(new Event("input", { bubbles: true }));
1063
+ box.dispatchEvent(new Event("change", { bubbles: true }));
1064
+ box.click();
1065
+ const label = box.getAttribute("data-testid") ||
1066
+ box.getAttribute("name") ||
1067
+ box.id ||
1068
+ box.getAttribute("aria-label") ||
1069
+ "agreement-checkbox";
1070
+ checked.push(label);
1071
+ }
1072
+ return checked;
1073
+ });
1074
+ }
1075
+ catch {
1076
+ return [];
1077
+ }
1078
+ }
881
1079
  // Scroll a Terms-of-Service style modal to the bottom so the gated
882
1080
  // "Accept" button enables. Railway's signup is the canonical case:
883
1081
  // a modal with a virtualized ToS list watches real `scroll` /
@@ -1754,6 +1952,11 @@ export class BrowserController {
1754
1952
  const recaptcha = document.querySelector('textarea[name="g-recaptcha-response"]');
1755
1953
  if (recaptcha !== null && recaptcha.value.length > 0)
1756
1954
  return true;
1955
+ // hCaptcha populates its own response textarea on a passed
1956
+ // checkbox (plausible). Same shape as reCAPTCHA's.
1957
+ const hcaptcha = document.querySelector('textarea[name="h-captcha-response"]');
1958
+ if (hcaptcha !== null && hcaptcha.value.length > 0)
1959
+ return true;
1757
1960
  // Some Turnstile installs use a managed mode that emits its
1758
1961
  // own attribute on the host div when solved.
1759
1962
  const cfManaged = document.querySelector(".cf-turnstile[data-state='success']");
@@ -1791,6 +1994,34 @@ export class BrowserController {
1791
1994
  async findCaptchaWidget() {
1792
1995
  if (!this.page)
1793
1996
  throw new Error("Browser not started");
1997
+ // An INVISIBLE reCAPTCHA (api2/anchor with size=invisible — the
1998
+ // bottom-right badge) is score-mode: there is no checkbox to click, and
1999
+ // its token is emitted only when the form's submit handler calls
2000
+ // grecaptcha.execute(). It must NOT be treated as a solvable visible
2001
+ // widget. MEASURED on amplitude (2026-06-04): the badge iframe is
2002
+ // ~256×60, so it cleared the size filter below and got "found" + clicked;
2003
+ // the pre-submit token-poll then timed out and the bot escalated to
2004
+ // 2Captcha, which can't solve a score-mode widget (ERROR_CAPTCHA_
2005
+ // UNSOLVABLE) → captcha_blocked — even though our v3 score is ~1.0 and a
2006
+ // plain form-submit would have passed silently. Detect "invisible-only"
2007
+ // (badge present, no visible checkbox anchor, no rendered bframe grid) and
2008
+ // skip reCAPTCHA entirely so the signup proceeds to submit.
2009
+ const recaptchaInvisibleOnly = await this.page
2010
+ .evaluate(() => {
2011
+ const q = (s) => document.querySelector(s) !== null;
2012
+ const visibleAnchor = Array.from(document.querySelectorAll('iframe[src*="recaptcha/api2/anchor"]')).some((f) => !/size=invisible/.test(f.src));
2013
+ const bframe = (() => {
2014
+ const f = document.querySelector('iframe[src*="recaptcha/api2/bframe"]');
2015
+ if (f === null)
2016
+ return false;
2017
+ const r = f.getBoundingClientRect();
2018
+ return r.width > 30 && r.height > 30;
2019
+ })();
2020
+ const invisiblePresent = q('iframe[src*="recaptcha/api2/anchor"][src*="size=invisible"]') ||
2021
+ q(".grecaptcha-badge");
2022
+ return invisiblePresent && !visibleAnchor && !bframe;
2023
+ })
2024
+ .catch(() => false);
1794
2025
  // Phase 1: widget shape with polling. page.locator (unlike the
1795
2026
  // querySelector in detectCaptchaVariant) pierces OPEN shadow roots,
1796
2027
  // so the Cloudflare iframe is reachable even on modern shadow-DOM
@@ -1803,11 +2034,19 @@ export class BrowserController {
1803
2034
  // reCAPTCHA v2: src contains "recaptcha/api2"
1804
2035
  const iframeCandidates = [
1805
2036
  { kind: "turnstile", selector: 'iframe[src*="challenges.cloudflare.com"]' },
1806
- { kind: "recaptcha", selector: 'iframe[src*="recaptcha/api2"]' },
2037
+ // Visible reCAPTCHA only the size=invisible anchor (score-mode badge)
2038
+ // is handled by the recaptchaInvisibleOnly skip above.
2039
+ { kind: "recaptcha", selector: 'iframe[src*="recaptcha/api2/anchor"]:not([src*="size=invisible"])' },
2040
+ // hCaptcha's checkbox iframe (the anchor frame). Plausible and other
2041
+ // hCaptcha sites render this; clicking it ticks the box the same way
2042
+ // Turnstile/reCAPTCHA do.
2043
+ { kind: "hcaptcha", selector: 'iframe[src*="hcaptcha.com"][src*="frame=checkbox"]' },
2044
+ { kind: "hcaptcha", selector: 'iframe[src*="newassets.hcaptcha.com"]' },
1807
2045
  // Host-div fallbacks (light DOM) — preferred order keeps the iframe
1808
2046
  // first when present (more precise click target).
1809
2047
  { kind: "turnstile", selector: ".cf-turnstile" },
1810
2048
  { kind: "turnstile", selector: "#clerk-captcha" },
2049
+ { kind: "hcaptcha", selector: ".h-captcha" },
1811
2050
  ];
1812
2051
  const iframeDeadline = Date.now() + 5000;
1813
2052
  while (Date.now() < iframeDeadline) {
@@ -1834,8 +2073,14 @@ export class BrowserController {
1834
2073
  const hostCandidates = [
1835
2074
  { kind: "turnstile", selector: 'input[name="cf-turnstile-response"]' },
1836
2075
  { kind: "recaptcha", selector: 'textarea[name="g-recaptcha-response"]' },
2076
+ { kind: "hcaptcha", selector: 'textarea[name="h-captcha-response"]' },
1837
2077
  ];
1838
2078
  for (const { kind, selector } of hostCandidates) {
2079
+ // The invisible reCAPTCHA's hidden g-recaptcha-response textarea lives
2080
+ // INSIDE the .grecaptcha-badge (~256×60), so the walk-up below would
2081
+ // return the badge box and we'd click it — the exact bug. Skip it.
2082
+ if (kind === "recaptcha" && recaptchaInvisibleOnly)
2083
+ continue;
1839
2084
  const locator = this.page.locator(selector);
1840
2085
  const count = await locator.count();
1841
2086
  if (count === 0)
@@ -1912,11 +2157,14 @@ export class BrowserController {
1912
2157
  else if (present('iframe[src*="hcaptcha.com"]')) {
1913
2158
  variant = "hcaptcha";
1914
2159
  }
1915
- else if (present('iframe[src*="recaptcha/api2/anchor"]')) {
2160
+ else if (present('iframe[src*="recaptcha/api2/anchor"]:not([src*="size=invisible"])')) {
2161
+ // VISIBLE checkbox anchor (size=normal) → clickable v2.
1916
2162
  variant = "recaptcha_v2";
1917
2163
  }
1918
- else if (present(".grecaptcha-badge")) {
1919
- // Badge but no clickable anchor → score-mode reCAPTCHA.
2164
+ else if (present(".grecaptcha-badge") ||
2165
+ present('iframe[src*="recaptcha/api2/anchor"][src*="size=invisible"]')) {
2166
+ // Badge / size=invisible anchor and no clickable checkbox →
2167
+ // score-mode reCAPTCHA (passes on submit, nothing to click).
1920
2168
  variant = "recaptcha_v3";
1921
2169
  }
1922
2170
  return { variant, challengeRendered };
@@ -1937,25 +2185,37 @@ export class BrowserController {
1937
2185
  // help). Reads from the standard places sites declare it:
1938
2186
  // 1. <div class="g-recaptcha" data-sitekey="...">
1939
2187
  // 2. <iframe src="...?k=SITEKEY&..."> (api2/anchor frame)
1940
- // 3. <script>...sitekey: '...'...</script> via window globals
2188
+ //
2189
+ // CRITICAL: only ever returns a GENUINE reCAPTCHA key. hCaptcha
2190
+ // (`.h-captcha`) and Turnstile (`.cf-turnstile`) ALSO publish a
2191
+ // `data-sitekey` attribute, so a bare `[data-sitekey]` selector
2192
+ // grabs the wrong provider's key and the caller ships it to
2193
+ // 2Captcha's `userrecaptcha` endpoint → ERROR_WRONG_GOOGLEKEY (the
2194
+ // plausible/hCaptcha case). The authoritative discriminator is the
2195
+ // key FORMAT: reCAPTCHA public keys always start with `6L`; hCaptcha
2196
+ // keys are UUIDs (`bc609205-…`); Turnstile keys start with `0x`. We
2197
+ // both scope the selector away from the other widgets AND gate on
2198
+ // the `6L` prefix, so no non-reCAPTCHA key can ever leak through.
1941
2199
  async extractRecaptchaSitekey() {
1942
2200
  if (!this.page)
1943
2201
  throw new Error("Browser not started");
1944
2202
  try {
1945
2203
  const sitekey = await this.page.evaluate(() => {
1946
- // 1. div[data-sitekey] the standard reCAPTCHA v2 anchor.
1947
- const div = document.querySelector("[data-sitekey], div.g-recaptcha[data-sitekey]");
1948
- if (div !== null) {
1949
- const k = div.getAttribute("data-sitekey");
1950
- if (k !== null && k.length > 10)
2204
+ const isRecaptchaKey = (k) => k !== null && /^6L/.test(k) && k.length > 30;
2205
+ // 1. data-sitekey, but NOT on an hCaptcha/Turnstile widget (or
2206
+ // nested inside one). Those publish data-sitekey too.
2207
+ const anchors = Array.from(document.querySelectorAll("[data-sitekey]")).filter((el) => el.closest(".h-captcha, .cf-turnstile") === null);
2208
+ for (const el of anchors) {
2209
+ const k = el.getAttribute("data-sitekey");
2210
+ if (isRecaptchaKey(k))
1951
2211
  return k;
1952
2212
  }
1953
- // 2. The api2 iframe src carries ?k=SITEKEY.
2213
+ // 2. The api2/enterprise iframe src carries ?k=SITEKEY.
1954
2214
  const iframes = Array.from(document.querySelectorAll('iframe[src*="recaptcha/api2"], iframe[src*="recaptcha/enterprise"]'));
1955
2215
  for (const ifr of iframes) {
1956
2216
  const url = new URL(ifr.src);
1957
2217
  const k = url.searchParams.get("k");
1958
- if (k !== null && k.length > 10)
2218
+ if (isRecaptchaKey(k))
1959
2219
  return k;
1960
2220
  }
1961
2221
  return null;
@@ -2029,6 +2289,161 @@ export class BrowserController {
2029
2289
  return false;
2030
2290
  }
2031
2291
  }
2292
+ // Mint the score token for an INVISIBLE reCAPTCHA by calling
2293
+ // grecaptcha.execute() ourselves, then wait for g-recaptcha-response to
2294
+ // populate. MEASURED on amplitude (2026-06-04): an invisible reCAPTCHA's
2295
+ // token only exists once execute() runs, and amplitude's form REQUIRES it —
2296
+ // merely skipping the badge (not clicking it) left the textarea empty and
2297
+ // the submit silently no-op'd. With our ~1.0 v3 score, execute() returns a
2298
+ // passing token in ~1-3s, so the subsequent submit carries a valid token.
2299
+ // Handles both standard (grecaptcha) and enterprise (grecaptcha.enterprise)
2300
+ // namespaces. Returns true once a token is present. Best-effort: a missing
2301
+ // grecaptcha or an execute() throw resolves false (the form may still mint
2302
+ // it on its own submit handler).
2303
+ async triggerInvisibleRecaptcha(timeoutMs = 9000) {
2304
+ if (!this.page)
2305
+ throw new Error("Browser not started");
2306
+ const tokenPresent = () => this.page.evaluate(() => {
2307
+ const ta = document.querySelector('textarea[name="g-recaptcha-response"], textarea[id^="g-recaptcha-response"]');
2308
+ return ta !== null && ta.value.length > 0;
2309
+ }).catch(() => false);
2310
+ if (await tokenPresent())
2311
+ return true;
2312
+ const fired = await this.page
2313
+ .evaluate(() => {
2314
+ const w = window;
2315
+ const g = w.grecaptcha;
2316
+ if (g === undefined)
2317
+ return false;
2318
+ let any = false;
2319
+ const ids = (() => {
2320
+ try {
2321
+ return Object.keys(w.___grecaptcha_cfg?.clients ?? {});
2322
+ }
2323
+ catch {
2324
+ return [];
2325
+ }
2326
+ })();
2327
+ for (const id of ids) {
2328
+ const n = Number(id);
2329
+ if (!Number.isFinite(n))
2330
+ continue;
2331
+ try {
2332
+ g.enterprise?.execute?.(n);
2333
+ any = true;
2334
+ }
2335
+ catch {
2336
+ /* not this namespace */
2337
+ }
2338
+ try {
2339
+ g.execute?.(n);
2340
+ any = true;
2341
+ }
2342
+ catch {
2343
+ /* widget already executed / wrong namespace */
2344
+ }
2345
+ }
2346
+ // Fallback: no enumerable clients — try the bare (first-widget) call,
2347
+ // enterprise first (a v2-invisible page exposes plain execute()).
2348
+ if (!any) {
2349
+ try {
2350
+ if (typeof g.enterprise?.execute === "function") {
2351
+ g.enterprise.execute();
2352
+ any = true;
2353
+ }
2354
+ else if (typeof g.execute === "function") {
2355
+ g.execute();
2356
+ any = true;
2357
+ }
2358
+ }
2359
+ catch {
2360
+ return false;
2361
+ }
2362
+ }
2363
+ return any;
2364
+ })
2365
+ .catch(() => false);
2366
+ if (!fired)
2367
+ return false;
2368
+ const start = Date.now();
2369
+ while (Date.now() - start < timeoutMs) {
2370
+ await this.sleep(500);
2371
+ if (await tokenPresent())
2372
+ return true;
2373
+ }
2374
+ return false;
2375
+ }
2376
+ // Tier 3 hCaptcha support — extract the hCaptcha sitekey so 2Captcha
2377
+ // can solve it. hCaptcha publishes its key on `.h-captcha[data-sitekey]`
2378
+ // or in the checkbox iframe's `?sitekey=` query. Keys are UUIDs (the
2379
+ // reCAPTCHA `6L` guard in extractRecaptchaSitekey deliberately rejects
2380
+ // them, which is why hCaptcha needs its own extractor). Returns null
2381
+ // when no hCaptcha widget is present.
2382
+ async extractHcaptchaSitekey() {
2383
+ if (!this.page)
2384
+ throw new Error("Browser not started");
2385
+ try {
2386
+ return await this.page.evaluate(() => {
2387
+ const div = document.querySelector(".h-captcha[data-sitekey], [data-hcaptcha-sitekey]");
2388
+ if (div !== null) {
2389
+ const k = div.getAttribute("data-sitekey") ??
2390
+ div.getAttribute("data-hcaptcha-sitekey");
2391
+ if (k !== null && k.length > 10)
2392
+ return k;
2393
+ }
2394
+ const iframe = document.querySelector('iframe[src*="hcaptcha.com"]');
2395
+ if (iframe !== null) {
2396
+ const k = new URL(iframe.src).searchParams.get("sitekey");
2397
+ if (k !== null && k.length > 10)
2398
+ return k;
2399
+ }
2400
+ return null;
2401
+ });
2402
+ }
2403
+ catch {
2404
+ return null;
2405
+ }
2406
+ }
2407
+ // Inject a 2Captcha-resolved hCaptcha token into the page's
2408
+ // h-captcha-response textarea(s) and fire the widget's data-callback
2409
+ // if the page registered one. Mirrors injectRecaptchaToken; hCaptcha
2410
+ // also mirrors the response token into a g-recaptcha-response textarea
2411
+ // on some compat installs, so populate both names if present.
2412
+ async injectHcaptchaToken(token) {
2413
+ if (!this.page)
2414
+ throw new Error("Browser not started");
2415
+ try {
2416
+ return await this.page.evaluate((tok) => {
2417
+ const inputs = Array.from(document.querySelectorAll('textarea[name="h-captcha-response"], textarea[id^="h-captcha-response"], textarea[name="g-recaptcha-response"]'));
2418
+ if (inputs.length === 0)
2419
+ return false;
2420
+ for (const input of inputs) {
2421
+ input.value = tok;
2422
+ input.dispatchEvent(new Event("input", { bubbles: true }));
2423
+ input.dispatchEvent(new Event("change", { bubbles: true }));
2424
+ }
2425
+ // Fire the data-callback the page registered on the .h-captcha
2426
+ // host (hCaptcha calls it by name on window). Best-effort — the
2427
+ // populated textarea is what server-side validation reads.
2428
+ try {
2429
+ const host = document.querySelector(".h-captcha[data-callback]");
2430
+ const name = host?.getAttribute("data-callback");
2431
+ if (name !== null && name !== undefined) {
2432
+ const fn = window[name];
2433
+ if (typeof fn === "function")
2434
+ fn(tok);
2435
+ }
2436
+ }
2437
+ catch {
2438
+ // no named callback — DOM injection stands.
2439
+ }
2440
+ return true;
2441
+ }, token);
2442
+ }
2443
+ catch {
2444
+ return false;
2445
+ }
2446
+ }
2032
2447
  // Small mouse wiggle near the current position. Used during prewarm
2033
2448
  // so the page sees pointer events before we navigate away.
2034
2449
  async jitterMouse() {
@@ -2810,35 +3225,110 @@ export class BrowserController {
2810
3225
  async waitForAntiBotInterstitialToClear(timeoutMs) {
2811
3226
  if (!this.page)
2812
3227
  return;
2813
- let detected = await this.pollUntilInterstitialClears(timeoutMs);
2814
- if (!detected) {
2815
- // We either never saw an interstitial, or we saw one and it
2816
- // cleared on its own. Nothing more to do.
3228
+ const first = await this.pollUntilInterstitialClears(timeoutMs);
3229
+ // Never saw an interstitial, or saw one and it cleared on its own —
3230
+ // nothing more to do.
3231
+ if (!first.detected || first.cleared)
2817
3232
  return;
3233
+ // Still on the interstitial at the deadline. If Cloudflare reported
3234
+ // the challenge PASSED ("Verification successful"), the redirect is
3235
+ // just racing/stuck — be patient through ANOTHER full window before
3236
+ // touching anything (a reload mid-redirect can re-arm the challenge).
3237
+ if (first.verificationPassed) {
3238
+ const patient = await this.pollUntilInterstitialClears(timeoutMs);
3239
+ if (patient.cleared)
3240
+ return;
3241
+ // "Verification successful" but the page never advances is the
3242
+ // signature of a STALE cf_clearance cookie — issued on a prior visit
3243
+ // (often a different egress IP), which CF matches ("successful") but
3244
+ // the origin then rejects, looping forever on "Waiting for the page
3245
+ // to load." MEASURED: a clean profile clears codesandbox's challenge
3246
+ // in ~12s; the stale cookie is what stalls the shared profile. Drop
3247
+ // the CF cookies to force a FRESH challenge, then reload.
3248
+ if (await this.clearCloudflareCookiesAndRetry(timeoutMs))
3249
+ return;
3250
+ // Or the auto-redirect simply stalled with a still-valid clearance —
3251
+ // re-navigate past the one-shot challenge token.
3252
+ if (await this.forceNavigatePastClearedChallenge())
3253
+ return;
2818
3254
  }
2819
- // The interstitial outlived the wait. Cloudflare frequently shows
2820
- // "Verification successful. Wait" but then never fires the JS
2821
- // redirect the challenge passed, but the redirect script got
2822
- // stuck or the cookie set is racing the navigation. A single
2823
- // reload, now that the cf_clearance cookie is set, often lets the
2824
- // real page render. (If the issue is a server-side risk-score
2825
- // block — fingerprint/IP — reload won't help, but the caller's
2826
- // inventory diagnostic will still surface the block.)
3255
+ // Force the real page: now that the cf_clearance cookie is set, a
3256
+ // reload often renders it. domcontentloaded (not networkidle) the
3257
+ // real page is usually a heavy SPA that never reaches networkidle, so
3258
+ // waiting for it just burns the budget back into a timeout. (If it's a
3259
+ // server-side risk-score block fingerprint/IP reload won't help,
3260
+ // but the caller's inventory diagnostic will still surface the block.)
2827
3261
  try {
2828
- await this.page.reload({ waitUntil: "networkidle", timeout: 10_000 });
3262
+ await this.page.reload({ waitUntil: "domcontentloaded", timeout: 15_000 });
2829
3263
  }
2830
3264
  catch {
2831
3265
  // reload failed — proceed with what's there
2832
3266
  }
2833
3267
  await this.pollUntilInterstitialClears(Math.max(5000, timeoutMs / 2));
2834
3268
  }
2835
- // One poll loop. Returns true if an interstitial was ever observed
2836
- // (cleared or still there at timeout), false if never seen.
2837
- async pollUntilInterstitialClears(timeoutMs) {
3269
+ // Drop Cloudflare's anti-bot cookies (cf_clearance + __cf_bm) so the next
3270
+ // request triggers a FRESH managed challenge, then reload and wait for it
3271
+ // to clear. Scoped to cookie NAME — only CF's own cookies are removed, so
3272
+ // an OAuth provider's session on accounts.google.com / github.com is
3273
+ // untouched. A fresh challenge on a residential IP clears in ~12-15s, so
3274
+ // we give it a generous window. Returns true if the interstitial is gone.
3275
+ async clearCloudflareCookiesAndRetry(timeoutMs) {
3276
+ if (!this.page || !this.context)
3277
+ return false;
3278
+ try {
3279
+ await this.context.clearCookies({ name: "cf_clearance" });
3280
+ await this.context.clearCookies({ name: "__cf_bm" });
3281
+ }
3282
+ catch {
3283
+ // clearCookies filter unsupported / failed — nothing to retry on.
3284
+ return false;
3285
+ }
3286
+ try {
3287
+ await this.page.reload({ waitUntil: "domcontentloaded", timeout: 20_000 });
3288
+ }
3289
+ catch {
3290
+ // reload failed — still give the poll a chance below.
3291
+ }
3292
+ const after = await this.pollUntilInterstitialClears(Math.max(20_000, timeoutMs));
3293
+ return after.cleared || !after.detected;
3294
+ }
3295
+ // With a CONFIRMED Cloudflare pass, re-navigate to the current URL with
3296
+ // the one-shot `__cf_chl_*` challenge token stripped — the cf_clearance
3297
+ // cookie is already set, so the edge serves the real page instead of the
3298
+ // stuck redirect. Returns true if the interstitial is gone afterwards.
3299
+ // Returns false (caller falls back to a plain reload) when there's no
3300
+ // token to strip or the navigation didn't clear the gate.
3301
+ async forceNavigatePastClearedChallenge() {
2838
3302
  if (!this.page)
2839
3303
  return false;
3304
+ const cleaned = stripCloudflareChallengeParams(this.page.url());
3305
+ if (!cleaned)
3306
+ return false;
3307
+ try {
3308
+ await this.page.goto(cleaned, {
3309
+ waitUntil: "domcontentloaded",
3310
+ timeout: 15_000,
3311
+ });
3312
+ }
3313
+ catch {
3314
+ return false;
3315
+ }
3316
+ const after = await this.pollUntilInterstitialClears(Math.max(5000, 8000));
3317
+ // cleared = saw it then it went away; !detected = the real page rendered
3318
+ // immediately (no interstitial on the post-nav page at all).
3319
+ return after.cleared || !after.detected;
3320
+ }
3321
+ // One poll loop. `detected` = an interstitial was observed at least
3322
+ // once; `cleared` = it was observed AND then went away (vs. still there
3323
+ // at the deadline); `verificationPassed` = Cloudflare reported the
3324
+ // challenge succeeded at some point during the wait (see
3325
+ // classifyInterstitialText).
3326
+ async pollUntilInterstitialClears(timeoutMs) {
3327
+ if (!this.page)
3328
+ return { detected: false, cleared: false, verificationPassed: false };
2840
3329
  const deadline = Date.now() + timeoutMs;
2841
3330
  let detected = false;
3331
+ let verificationPassed = false;
2842
3332
  while (Date.now() < deadline) {
2843
3333
  let title = "";
2844
3334
  let bodyText = "";
@@ -2850,19 +3340,21 @@ export class BrowserController {
2850
3340
  await new Promise((r) => setTimeout(r, 500));
2851
3341
  continue;
2852
3342
  }
2853
- const onInterstitial = /just a moment|performing security verification|verifying you are human|checking your browser|attention required/i.test(title + " " + bodyText);
2854
- if (!onInterstitial) {
3343
+ const c = classifyInterstitialText(title + " " + bodyText);
3344
+ if (c.verificationPassed)
3345
+ verificationPassed = true;
3346
+ if (!c.onInterstitial) {
2855
3347
  if (detected) {
2856
3348
  // Give the freshly-revealed page a tick to hydrate before
2857
3349
  // the inventory scan.
2858
3350
  await new Promise((r) => setTimeout(r, 800));
2859
3351
  }
2860
- return detected;
3352
+ return { detected, cleared: detected, verificationPassed };
2861
3353
  }
2862
3354
  detected = true;
2863
3355
  await new Promise((r) => setTimeout(r, 1000));
2864
3356
  }
2865
- return detected;
3357
+ return { detected, cleared: false, verificationPassed };
2866
3358
  }
2867
3359
  // Walk the live DOM (piercing open shadow roots) and return every
2868
3360
  // visible interactive element with a bot-computed selector (F3 T1).
@@ -2903,7 +3395,16 @@ export class BrowserController {
2903
3395
  // whole inventory with "Cannot read properties of undefined
2904
3396
  // (reading 'querySelectorAll')", failing the run before the
2905
3397
  // planner ever saw the page. Skip such a node instead.
2906
- if (root === null || typeof root.querySelectorAll !== "function")
3398
+ //
3399
+ // `== null` (not `=== null`) is load-bearing: `el.shadowRoot` is
3400
+ // typed `ShadowRoot | null`, but a detached/closed custom element
3401
+ // can yield `undefined` at runtime. The recursion below calls
3402
+ // `walk(el.shadowRoot)` whenever it isn't `null`, so an `undefined`
3403
+ // shadowRoot reaches here and `typeof undefined.querySelectorAll`
3404
+ // THROWS before the typeof guard can fire — exactly the #59
3405
+ // redis-cloud crash, which recurred 2026-06-03 even with the
3406
+ // null-only guard in place. The loose check covers both.
3407
+ if (root == null || typeof root.querySelectorAll !== "function")
2907
3408
  return;
2908
3409
  root.querySelectorAll(SELECTOR).forEach((n) => collected.push(n));
2909
3410
  root.querySelectorAll("*").forEach((el) => {
@@ -3262,16 +3763,250 @@ export class BrowserController {
3262
3763
  // best-effort — the agent's consent loop re-reads state regardless
3263
3764
  }
3264
3765
  }
3766
+ // Does the page sign in with Google via Google Identity Services (GSI)
3767
+ // rather than classic OAuth redirect? GSI renders its button in a
3768
+ // cross-origin iframe (accounts.google.com/gsi/button) and/or exposes the
3769
+ // `google.accounts.id` JS API; on use it raises a browser-native FedCM
3770
+ // dialog or a popup and returns a JWT to a JS callback — there is NO
3771
+ // redirect, so the classic startOAuth flow can't drive it. Detecting this
3772
+ // is what lets the agent route to tryGoogleGsiLogin instead.
3773
+ async hasGoogleGsiAffordance() {
3774
+ if (!this.page)
3775
+ return false;
3776
+ try {
3777
+ return await this.page.evaluate(() => {
3778
+ if (document.querySelector('iframe[src*="accounts.google.com/gsi/"]') !== null) {
3779
+ return true;
3780
+ }
3781
+ // On-demand One-Tap: the page loads the GSI client script but renders
3782
+ // no static button and may not have initialized `google.accounts.id`
3783
+ // yet (amplitude, clerk). A plain click on the in-page "Sign in with
3784
+ // Google" affordance never redirects, so the bot used to falsely
3785
+ // conclude "signed in" and bounce to login. Treat the loaded client
3786
+ // script as a GSI affordance so the agent routes through
3787
+ // tryGoogleGsiLogin, which now raises One-Tap programmatically.
3788
+ if (document.querySelector('script[src*="accounts.google.com/gsi/client"]') !== null) {
3789
+ return true;
3790
+ }
3791
+ const g = window.google;
3792
+ return typeof g?.accounts?.id !== "undefined";
3793
+ });
3794
+ }
3795
+ catch {
3796
+ return false;
3797
+ }
3798
+ }
3799
+ // Drive a Google Identity Services / FedCM sign-in. Two variants are
3800
+ // handled:
3801
+ // - FedCM: clicking the GSI widget raises a browser-NATIVE credential
3802
+ // dialog (no DOM, no popup — invisible to Playwright). We enable the
3803
+ // CDP FedCm domain up front and auto-select the first account when
3804
+ // FedCm.dialogShown fires. The page's JS callback then receives the
3805
+ // JWT and establishes the session.
3806
+ // - Popup: older GSI opens a Google account-chooser window; we adopt it
3807
+ // like startOAuth does so the consent loop can drive it.
3808
+ // Returns how it resolved. The caller then runs the SAME post-OAuth
3809
+ // settle/consent/post-verify path as the redirect flow.
3810
+ async tryGoogleGsiLogin(triggerSelector, timeoutMs = 25_000) {
3811
+ if (!this.page || !this.context)
3812
+ throw new Error("Browser not started");
3813
+ this.oauthProductPage = this.page;
3814
+ let fedcmResolved = false;
3815
+ let cdp = null;
3816
+ try {
3817
+ cdp = await this.context.newCDPSession(this.page);
3818
+ await cdp.send("FedCm.enable", { disableRejectionDelay: true });
3819
+ cdp.on("FedCm.dialogShown", (ev) => {
3820
+ const e = ev;
3821
+ const dialogId = e.dialogId;
3822
+ if (dialogId === undefined)
3823
+ return;
3824
+ void (async () => {
3825
+ // A ConfirmIdpLogin dialog has no account list — it's the "Continue
3826
+ // as / sign in to Google" confirmation that precedes the account
3827
+ // chooser. selectAccount would error on it, so drive the confirm
3828
+ // button directly and skip selectAccount for this dialog type.
3829
+ if (e.dialogType === "ConfirmIdpLogin") {
3830
+ try {
3831
+ await cdp.send("FedCm.clickDialogButton", {
3832
+ dialogId,
3833
+ dialogButton: "ConfirmIdpLoginContinue",
3834
+ });
3835
+ }
3836
+ catch {
3837
+ // method/param may not apply to this build/dialog — non-fatal;
3838
+ // a subsequent AccountChooser dialog still resolves via select.
3839
+ }
3840
+ return;
3841
+ }
3842
+ try {
3843
+ // Pick the first account on the account-chooser dialog.
3844
+ await cdp.send("FedCm.selectAccount", { dialogId, accountIndex: 0 });
3845
+ fedcmResolved = true;
3846
+ }
3847
+ catch {
3848
+ // dialog dismissed or already resolved
3849
+ }
3850
+ if (!fedcmResolved) {
3851
+ // Some flows surface a "Continue as <name>" confirm even on the
3852
+ // account dialog; selectAccount alone usually completes it, but
3853
+ // when it didn't, try the confirm button as a fallback. Failure
3854
+ // is non-fatal — the popup/none path still applies.
3855
+ try {
3856
+ await cdp.send("FedCm.clickDialogButton", {
3857
+ dialogId,
3858
+ dialogButton: "ConfirmIdpLoginContinue",
3859
+ });
3860
+ fedcmResolved = true;
3861
+ }
3862
+ catch {
3863
+ // button absent or not applicable — degrade to popup/none
3864
+ }
3865
+ }
3866
+ })();
3867
+ });
3868
+ }
3869
+ catch {
3870
+ cdp = null; // FedCm domain unavailable — the popup path still works
3871
+ }
3872
+ const popupPromise = this.context
3873
+ .waitForEvent("page", { timeout: timeoutMs })
3874
+ .then((p) => p)
3875
+ .catch(() => null);
3876
+ await this.click(triggerSelector);
3877
+ // On-demand One-Tap: when the page loaded the GSI client but rendered no
3878
+ // static button, the click above hits an in-page affordance that never
3879
+ // raises a dialog on its own. If neither a FedCM dialog nor a popup has
3880
+ // appeared shortly after the click, ask GSI to raise One-Tap itself.
3881
+ // `google.accounts.id.prompt()` triggers the FedCM dialog our handler is
3882
+ // already listening for. Guarded — `window.google.accounts.id` may be
3883
+ // undefined (no-op) and any failure must degrade to the popup/none path.
3884
+ if (cdp !== null) {
3885
+ const promptDeadline = Date.now() + Math.min(4_000, timeoutMs);
3886
+ while (Date.now() < promptDeadline &&
3887
+ !fedcmResolved &&
3888
+ this.context.pages().length <= 1) {
3889
+ await this.sleep(250);
3890
+ }
3891
+ if (!fedcmResolved && this.context.pages().length <= 1) {
3892
+ try {
3893
+ await this.page.evaluate(() => {
3894
+ const g = window.google;
3895
+ const id = g?.accounts?.id;
3896
+ if (id !== undefined && typeof id.prompt === "function") {
3897
+ id.prompt();
3898
+ }
3899
+ });
3900
+ }
3901
+ catch {
3902
+ // GSI not initialized / prompt unavailable — popup/none still apply
3903
+ }
3904
+ }
3905
+ }
3906
+ // Resolve when a popup opens OR FedCM completes OR we hit the deadline.
3907
+ const fedcmWait = (async () => {
3908
+ const deadline = Date.now() + timeoutMs;
3909
+ while (Date.now() < deadline && !fedcmResolved) {
3910
+ await this.sleep(250);
3911
+ }
3912
+ return null;
3913
+ })();
3914
+ const popup = await Promise.race([popupPromise, fedcmWait]);
3915
+ if (cdp !== null) {
3916
+ try {
3917
+ await cdp.send("FedCm.disable");
3918
+ }
3919
+ catch {
3920
+ // best-effort
3921
+ }
3922
+ }
3923
+ if (popup !== null && popup !== this.page && !popup.isClosed()) {
3924
+ this.page = popup;
3925
+ try {
3926
+ await this.page.waitForLoadState("domcontentloaded", { timeout: 15_000 });
3927
+ }
3928
+ catch {
3929
+ // consent loop re-reads regardless
3930
+ }
3931
+ return { ok: true, via: "popup" };
3932
+ }
3933
+ if (fedcmResolved) {
3934
+ // Credential delivered to the page's JS callback — give the app a beat
3935
+ // to exchange it for a session and redirect.
3936
+ try {
3937
+ await this.page.waitForLoadState("domcontentloaded", { timeout: 10_000 });
3938
+ }
3939
+ catch {
3940
+ // best-effort
3941
+ }
3942
+ return { ok: true, via: "fedcm" };
3943
+ }
3944
+ return { ok: false, via: "none" };
3945
+ }
3265
3946
  // URL of the active page (the OAuth page mid-handshake, the product
3266
3947
  // page otherwise). Cheap — no screenshot, unlike getState().
3267
3948
  currentUrl() {
3268
3949
  return this.page !== null ? this.page.url() : "";
3269
3950
  }
3951
+ // Fetch a URL's final response (following redirects) and return its
3952
+ // status, final URL, and body text — or null on any failure.
3953
+ //
3954
+ // WHY the CONTEXT request API (this.context.request) and not global
3955
+ // fetch / a fresh node http client: the context's APIRequestContext
3956
+ // shares the BrowserContext's proxy + cookie jar, so this egresses
3957
+ // through the SAME residential tunnel the real navigation uses. That
3958
+ // makes a probe here representative of what the browser would actually
3959
+ // land on (same IP reputation, same cf_clearance cookie) — and needs no
3960
+ // separate SOCKS/HTTP-proxy plumbing. Used by the signup-URL resolver to
3961
+ // distinguish a stale /signup that serves a login SPA from the real
3962
+ // signup form, BEFORE committing to a ~6-minute navigation.
3963
+ //
3964
+ // Bounded (15s, ≤10 redirects) and non-throwing — the resolver treats
3965
+ // null as "couldn't tell" and escalates.
3966
+ async fetchText(url) {
3967
+ if (this.context === null)
3968
+ return null;
3969
+ try {
3970
+ const response = await this.context.request.get(url, {
3971
+ maxRedirects: 10,
3972
+ timeout: 15_000,
3973
+ // We inspect 404/redirect bodies ourselves; don't let a non-2xx
3974
+ // throw before we can classify it.
3975
+ failOnStatusCode: false,
3976
+ });
3977
+ return {
3978
+ finalUrl: response.url(),
3979
+ status: response.status(),
3980
+ bodyText: await response.text(),
3981
+ };
3982
+ }
3983
+ catch {
3984
+ return null;
3985
+ }
3986
+ }
3270
3987
  // True when the active OAuth page is gone — for the popup flow, the
3271
3988
  // popup closing IS the signal the handshake finished.
3272
3989
  oauthPageClosed() {
3273
3990
  return this.page === null || this.page.isClosed();
3274
3991
  }
3992
+ // Which OAuth providers have a LIVE session in this profile's cookie jar.
3993
+ // The logged-in-providers.json marker is a memo that drifts out of sync
3994
+ // (a --force-relogin clears it, a misclassified run clears it, a parallel
3995
+ // run overwrites it) — so a session that is genuinely live in the cookies
3996
+ // can go invisible to provider selection, which is exactly how a warm
3997
+ // GitHub session got skipped in favour of a broken Google path. The cookie
3998
+ // jar is the ground truth: read it directly. Cookie NAMES + presence only;
3999
+ // values are never read into logs. Best-effort — a read failure returns [].
4000
+ async detectSessionProviders() {
4001
+ if (this.context === null)
4002
+ return [];
4003
+ try {
4004
+ return sessionProvidersFromCookies(await this.context.cookies());
4005
+ }
4006
+ catch {
4007
+ return [];
4008
+ }
4009
+ }
3275
4010
  // Advance a provider's consent / account-chooser screen by one click
3276
4011
  // — the scope-gated auto-approve (T7/T13). Returns false when no
3277
4012
  // approve control is present — the agent then aborts rather than
@@ -3446,7 +4181,11 @@ export function pickClickLocator(locator, count) {
3446
4181
  export function collectAcrossShadowRoots(root, selector) {
3447
4182
  const collected = [];
3448
4183
  const walk = (r) => {
3449
- if (r === null || typeof r.querySelectorAll !== "function")
4184
+ // `== null` (not `=== null`) covers both null and undefined — the
4185
+ // recursion below calls walk() on any non-null shadowRoot, so an
4186
+ // `undefined` one reaches here and `typeof undefined.querySelectorAll`
4187
+ // would throw before the typeof guard fired (#59 redis-cloud).
4188
+ if (r == null || typeof r.querySelectorAll !== "function")
3450
4189
  return;
3451
4190
  Array.from(r.querySelectorAll(selector)).forEach((n) => collected.push(n));
3452
4191
  Array.from(r.querySelectorAll("*")).forEach((el) => {
@@ -3473,6 +4212,26 @@ export function pickSubmitButtonIndex(texts) {
3473
4212
  });
3474
4213
  return bestIndex;
3475
4214
  }
4215
+ // ───────────── required-agreement checkbox guard ─────────────
4216
+ // Patterns shared by the pure helper below and the in-page evaluate in
4217
+ // `checkRequiredAgreementBoxes`. The evaluate runs in the page realm and
4218
+ // can't import, so the same two regexes are inlined there verbatim —
4219
+ // keep them BYTE-IDENTICAL with these.
4220
+ const AGREEMENT_TEXT_RE = /terms|tos\b|privacy|consent|policy|i agree|agree to|acknowledge|gdpr/i;
4221
+ const MARKETING_TEXT_RE = /newsletter|updates|offers|product tips|marketing|promotional|receive emails|opt[- ]?in to|subscribe/i;
4222
+ // True when a checkbox's associated text reads as a REQUIRED agreement
4223
+ // (terms/privacy/consent) and NOT as a marketing/newsletter opt-in.
4224
+ //
4225
+ // Why a deterministic check instead of trusting the LLM planner:
4226
+ // amplitude's signup renders the required TOS checkbox next to a pair of
4227
+ // data-storage-location card-radios; the planner mistook the whole
4228
+ // cluster for "ambiguous radios" and skipped the box, and amplitude's
4229
+ // submit isn't disabled when it's unticked — so the form silently
4230
+ // no-ops. We must never flip a marketing opt-in on the user's behalf,
4231
+ // hence the explicit marketing exclusion.
4232
+ export function isAgreementCheckboxText(text) {
4233
+ return AGREEMENT_TEXT_RE.test(text) && !MARKETING_TEXT_RE.test(text);
4234
+ }
3476
4235
  // Parse a UNIVERSAL_BOT_PROXY_URL — e.g. "http://user:pass@host:8080" or
3477
4236
  // "socks5://host:1080" — into Playwright's proxy option shape. Playwright
3478
4237
  // wants credentials separate from `server`, so we split them out and