@trusty-squire/mcp 0.8.16 → 0.8.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bot/agent.d.ts +33 -2
- package/dist/bot/agent.d.ts.map +1 -1
- package/dist/bot/agent.js +1747 -213
- package/dist/bot/agent.js.map +1 -1
- package/dist/bot/browser.d.ts +29 -1
- package/dist/bot/browser.d.ts.map +1 -1
- package/dist/bot/browser.js +796 -48
- package/dist/bot/browser.js.map +1 -1
- package/dist/bot/captcha-solver-2captcha.d.ts +12 -0
- package/dist/bot/captcha-solver-2captcha.d.ts.map +1 -1
- package/dist/bot/captcha-solver-2captcha.js +28 -5
- package/dist/bot/captcha-solver-2captcha.js.map +1 -1
- package/dist/bot/google-login.d.ts.map +1 -1
- package/dist/bot/google-login.js +39 -0
- package/dist/bot/google-login.js.map +1 -1
- package/dist/bot/index.d.ts +1 -1
- package/dist/bot/index.d.ts.map +1 -1
- package/dist/bot/oauth-providers.d.ts.map +1 -1
- package/dist/bot/oauth-providers.js +13 -3
- package/dist/bot/oauth-providers.js.map +1 -1
- package/dist/tools/signup-telemetry.d.ts +2 -2
- package/dist/tools/signup-telemetry.d.ts.map +1 -1
- package/dist/tools/signup-telemetry.js.map +1 -1
- package/package.json +1 -1
package/dist/bot/browser.js
CHANGED
|
@@ -93,6 +93,31 @@ function getChromium() {
|
|
|
93
93
|
}
|
|
94
94
|
return cachedChromium;
|
|
95
95
|
}
|
|
96
|
+
// Map a cookie jar to the OAuth providers that have a LIVE logged-in session.
|
|
97
|
+
// The auth cookies that mean "signed in": GitHub → `user_session`; Google →
|
|
98
|
+
// any of the *SID session cookies (NID / CONSENT / 1P_JAR are set even when
|
|
99
|
+
// logged out, so they are deliberately NOT signals). Host-scoped so a
|
|
100
|
+
// google.com cookie can't pass for github. Cookie NAMES + presence only;
|
|
101
|
+
// values are checked for non-triviality, never logged. Exported for tests.
|
|
102
|
+
export function sessionProvidersFromCookies(cookies) {
|
|
103
|
+
const SIGNATURES = [
|
|
104
|
+
{ provider: "github", host: /(^|\.)github\.com$/i, names: ["user_session"] },
|
|
105
|
+
{
|
|
106
|
+
provider: "google",
|
|
107
|
+
host: /(^|\.)google\.com$/i,
|
|
108
|
+
names: ["SID", "__Secure-1PSID", "__Secure-3PSID"],
|
|
109
|
+
},
|
|
110
|
+
];
|
|
111
|
+
const live = [];
|
|
112
|
+
for (const sig of SIGNATURES) {
|
|
113
|
+
const present = cookies.some((c) => sig.host.test(c.domain.replace(/^\./, "")) &&
|
|
114
|
+
sig.names.includes(c.name) &&
|
|
115
|
+
c.value.length > 10);
|
|
116
|
+
if (present)
|
|
117
|
+
live.push(sig.provider);
|
|
118
|
+
}
|
|
119
|
+
return live;
|
|
120
|
+
}
|
|
96
121
|
function isCaptchaVariant(v) {
|
|
97
122
|
return (v === "turnstile" ||
|
|
98
123
|
v === "recaptcha_v2" ||
|
|
@@ -187,6 +212,49 @@ async function detectChromiumChannel() {
|
|
|
187
212
|
}
|
|
188
213
|
return null;
|
|
189
214
|
}
|
|
215
|
+
// Classify an anti-bot interstitial page from its (title + body) text.
|
|
216
|
+
// `onInterstitial` matches the static Cloudflare/Turnstile challenge copy.
|
|
217
|
+
// `verificationPassed` is the signal the challenge SUCCEEDED — but
|
|
218
|
+
// Cloudflare leaves the static "Just a moment / Performing security
|
|
219
|
+
// verification" copy ON THE PAGE even after it appends "Verification
|
|
220
|
+
// successful. Waiting for…", so `onInterstitial` alone wrongly reads as
|
|
221
|
+
// "still blocked" and the bot bails as anti_bot_blocked — exactly what
|
|
222
|
+
// stranded codesandbox/lambda-labs once patchright started PASSING the
|
|
223
|
+
// challenge. When the challenge passed, the redirect is just racing/
|
|
224
|
+
// stuck; the caller should be patient + reload, not give up. Exported
|
|
225
|
+
// for unit tests.
|
|
226
|
+
export function classifyInterstitialText(text) {
|
|
227
|
+
const onInterstitial = /just a moment|performing security verification|verifying you are human|checking your browser|attention required/i.test(text);
|
|
228
|
+
const verificationPassed = /verification successful|you are (now )?verified|success!|challenge[- ]?(passed|complete)/i.test(text);
|
|
229
|
+
return { onInterstitial, verificationPassed };
|
|
230
|
+
}
|
|
231
|
+
// After a Cloudflare managed challenge PASSES, the cf_clearance cookie is
|
|
232
|
+
// set but the URL still carries Cloudflare's single-use challenge token
|
|
233
|
+
// (`__cf_chl_rt_tk`, `__cf_chl_tk`, `__cf_chl_f_tk`, …). Cloudflare's own
|
|
234
|
+
// client-side redirect to the cleared page can stall — especially over a
|
|
235
|
+
// high-latency residential tunnel, where the meta-refresh/JS hop never
|
|
236
|
+
// fires inside our wait budget. Re-navigating to the SAME url with those
|
|
237
|
+
// one-shot tokens stripped serves the real page directly (the clearance
|
|
238
|
+
// cookie now satisfies the edge), instead of waiting on the stuck redirect.
|
|
239
|
+
// Returns the cleaned URL, or null when there's no challenge token to strip
|
|
240
|
+
// (nothing this can do better than a plain reload). Exported for unit tests.
|
|
241
|
+
export function stripCloudflareChallengeParams(rawUrl) {
|
|
242
|
+
let u;
|
|
243
|
+
try {
|
|
244
|
+
u = new URL(rawUrl);
|
|
245
|
+
}
|
|
246
|
+
catch {
|
|
247
|
+
return null;
|
|
248
|
+
}
|
|
249
|
+
let changed = false;
|
|
250
|
+
for (const key of [...u.searchParams.keys()]) {
|
|
251
|
+
if (key.toLowerCase().startsWith("__cf_chl")) {
|
|
252
|
+
u.searchParams.delete(key);
|
|
253
|
+
changed = true;
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
return changed ? u.toString() : null;
|
|
257
|
+
}
|
|
190
258
|
export class BrowserController {
|
|
191
259
|
// The persistent browser context. Persistent (launchPersistentContext)
|
|
192
260
|
// rather than an ephemeral context so the profile carries the user's
|
|
@@ -380,6 +448,21 @@ export class BrowserController {
|
|
|
380
448
|
"--disable-blink-features=AutomationControlled",
|
|
381
449
|
"--no-sandbox",
|
|
382
450
|
"--disable-dev-shm-usage",
|
|
451
|
+
// Enable software WebGL on the GPU-less Xvfb host. Without this,
|
|
452
|
+
// Chrome 120+ disables WebGL entirely (getContext("webgl") → null),
|
|
453
|
+
// which MEASURED (2026-06-04) as the bot's one real fingerprint gap:
|
|
454
|
+
// a browser with NO WebGL is itself an anti-bot tell (reCAPTCHA
|
|
455
|
+
// Enterprise / device-fingerprinting weight it). SwiftShader gives a
|
|
456
|
+
// real WebGL context. MEASURED 2026-06-04: with this on, WebGL reports
|
|
457
|
+
// a Mesa/llvmpipe software renderer and the reCAPTCHA v3 score stays
|
|
458
|
+
// 1.0 — a strict improvement over "no WebGL at all", which more
|
|
459
|
+
// fingerprint libs treat as suspicious than a software renderer. The
|
|
460
|
+
// rc.33 init-script below TRIES to spoof the renderer string to a real
|
|
461
|
+
// Intel GPU, but it is INERT under patchright (hardened) — see its
|
|
462
|
+
// comment. A clean GPU-string spoof under patchright needs binary-level
|
|
463
|
+
// support; tracked as a follow-up, not blocking (score is already 1.0).
|
|
464
|
+
"--enable-unsafe-swiftshader",
|
|
465
|
+
"--ignore-gpu-blocklist",
|
|
383
466
|
],
|
|
384
467
|
// `viewport: null` makes the page use the REAL OS window size
|
|
385
468
|
// instead of a hardcoded value. The old fixed 1280×720 is exactly
|
|
@@ -426,29 +509,47 @@ export class BrowserController {
|
|
|
426
509
|
Object.defineProperty(navigator, "webdriver", { get: () => undefined });
|
|
427
510
|
});
|
|
428
511
|
}
|
|
429
|
-
// rc.33 — spoof WebGL renderer
|
|
430
|
-
//
|
|
431
|
-
//
|
|
432
|
-
//
|
|
433
|
-
//
|
|
434
|
-
//
|
|
435
|
-
// (
|
|
436
|
-
//
|
|
437
|
-
//
|
|
438
|
-
//
|
|
439
|
-
//
|
|
440
|
-
|
|
512
|
+
// rc.33 / 2026-06-04 — spoof the WebGL UNMASKED vendor+renderer toward a
|
|
513
|
+
// stock Intel GPU, so the software Mesa/llvmpipe string (--enable-unsafe-
|
|
514
|
+
// swiftshader gives us a context, but llvmpipe is itself a VM/headless
|
|
515
|
+
// tell) doesn't read through. Applied TWO ways because patchright
|
|
516
|
+
// (hardened) isolates document-start scripts from the page's main world:
|
|
517
|
+
// • addInitScript — document-start; the effective path in the stealth
|
|
518
|
+
// BASELINE (non-patchright).
|
|
519
|
+
// • re-applied via page.evaluate on every navigation — the ONLY path that
|
|
520
|
+
// reaches the MAIN world under patchright. MEASURED 2026-06-04:
|
|
521
|
+
// addInitScript AND raw CDP Page.addScriptToEvaluateOnNewDocument both
|
|
522
|
+
// land in patchright's isolated world (renderer stayed llvmpipe);
|
|
523
|
+
// page.evaluate does not (renderer became Intel), and the v3 score held
|
|
524
|
+
// at 1.0. Idempotent via a marker so the per-nav re-apply is cheap, and
|
|
525
|
+
// getParameter.toString() is masked to the original native source so
|
|
526
|
+
// the patch itself isn't a tell. Only strings change, not rendering.
|
|
527
|
+
const installWebglSpoof = () => {
|
|
441
528
|
const VENDOR_WEBGL = 0x9245; // UNMASKED_VENDOR_WEBGL
|
|
442
529
|
const RENDERER_WEBGL = 0x9246; // UNMASKED_RENDERER_WEBGL
|
|
443
530
|
const spoof = (proto) => {
|
|
531
|
+
// The marker lives on the prototype so re-application is a no-op; the
|
|
532
|
+
// cast is the one typed-alternative-exhausted spot (adding an ad-hoc
|
|
533
|
+
// brand to a DOM prototype).
|
|
534
|
+
const marked = proto;
|
|
535
|
+
if (marked.__tsWebglPatched === true)
|
|
536
|
+
return;
|
|
444
537
|
const orig = proto.getParameter;
|
|
538
|
+
const native = orig.toString();
|
|
445
539
|
proto.getParameter = function (p) {
|
|
446
540
|
if (p === VENDOR_WEBGL)
|
|
447
|
-
return "
|
|
448
|
-
if (p === RENDERER_WEBGL)
|
|
449
|
-
return "Intel(R) UHD Graphics 620";
|
|
541
|
+
return "Google Inc. (Intel)";
|
|
542
|
+
if (p === RENDERER_WEBGL) {
|
|
543
|
+
return "ANGLE (Intel, Mesa Intel(R) UHD Graphics 620 (KBL GT2), OpenGL 4.6)";
|
|
544
|
+
}
|
|
450
545
|
return orig.call(this, p);
|
|
451
546
|
};
|
|
547
|
+
Object.defineProperty(proto.getParameter, "toString", {
|
|
548
|
+
value: () => native,
|
|
549
|
+
configurable: true,
|
|
550
|
+
writable: true,
|
|
551
|
+
});
|
|
552
|
+
marked.__tsWebglPatched = true;
|
|
452
553
|
};
|
|
453
554
|
if (typeof WebGLRenderingContext !== "undefined") {
|
|
454
555
|
spoof(WebGLRenderingContext.prototype);
|
|
@@ -456,8 +557,26 @@ export class BrowserController {
|
|
|
456
557
|
if (typeof WebGL2RenderingContext !== "undefined") {
|
|
457
558
|
spoof(WebGL2RenderingContext.prototype);
|
|
458
559
|
}
|
|
459
|
-
}
|
|
560
|
+
};
|
|
561
|
+
await context.addInitScript(installWebglSpoof);
|
|
460
562
|
this.page = context.pages()[0] ?? (await context.newPage());
|
|
563
|
+
// Re-apply on every navigation — the main-world reach patchright's isolated
|
|
564
|
+
// init world denies us. framenavigated fires at navigation-commit (before
|
|
565
|
+
// most page JS), so a late WebGL query (reCAPTCHA scores seconds in) sees
|
|
566
|
+
// the spoofed strings; a document-start fingerprinter could still race it.
|
|
567
|
+
const reapplyWebglSpoof = () => {
|
|
568
|
+
const pg = this.page;
|
|
569
|
+
if (pg === null)
|
|
570
|
+
return;
|
|
571
|
+
void pg.evaluate(installWebglSpoof).catch(() => {
|
|
572
|
+
// mid-navigation / closed page — the next navigation re-applies.
|
|
573
|
+
});
|
|
574
|
+
};
|
|
575
|
+
this.page.on("framenavigated", (frame) => {
|
|
576
|
+
if (this.page !== null && frame === this.page.mainFrame())
|
|
577
|
+
reapplyWebglSpoof();
|
|
578
|
+
});
|
|
579
|
+
this.page.on("load", reapplyWebglSpoof);
|
|
461
580
|
// rc.33 — captcha tracing. When UNIVERSAL_BOT_CAPTCHA_TRACE=1 is
|
|
462
581
|
// set, log every response from Cloudflare/Google's challenge
|
|
463
582
|
// endpoints plus any console message that mentions captcha-y
|
|
@@ -876,6 +995,87 @@ export class BrowserController {
|
|
|
876
995
|
await this.page.check(selector, { force: true });
|
|
877
996
|
}
|
|
878
997
|
}
|
|
998
|
+
// Deterministic pre-submit guard: tick every visible, unchecked,
|
|
999
|
+
// non-disabled REQUIRED-AGREEMENT checkbox (terms/privacy/consent),
|
|
1000
|
+
// while never touching marketing/newsletter opt-ins.
|
|
1001
|
+
//
|
|
1002
|
+
// Why this exists separate from the LLM planner: amplitude's signup
|
|
1003
|
+
// has a required TOS checkbox the planner skipped (it read the
|
|
1004
|
+
// adjacent data-storage card-radios as the whole cluster being
|
|
1005
|
+
// "ambiguous radios"), and amplitude does NOT disable submit when the
|
|
1006
|
+
// box is unticked — so the click silently no-ops and the bot then
|
|
1007
|
+
// waits forever for a verification mail that never sends. This runs on
|
|
1008
|
+
// EVERY submit, not only the `submit_disabled` path in clickSubmit().
|
|
1009
|
+
//
|
|
1010
|
+
// Returns the labels/testids it checked (for step logging); empty when
|
|
1011
|
+
// it ticked nothing.
|
|
1012
|
+
async checkRequiredAgreementBoxes() {
|
|
1013
|
+
if (!this.page)
|
|
1014
|
+
throw new Error("Browser not started");
|
|
1015
|
+
// Best-effort: a page-eval failure (navigation mid-call, detached
|
|
1016
|
+
// frame) must never fail the parent submit — return nothing.
|
|
1017
|
+
try {
|
|
1018
|
+
return await this.page.evaluate(() => {
|
|
1019
|
+
// These two regexes MUST stay byte-identical with
|
|
1020
|
+
// AGREEMENT_TEXT_RE / MARKETING_TEXT_RE in this module — the
|
|
1021
|
+
// page realm can't import, so they're inlined here.
|
|
1022
|
+
const agreementRe = /terms|tos\b|privacy|consent|policy|i agree|agree to|acknowledge|gdpr/i;
|
|
1023
|
+
const marketingRe = /newsletter|updates|offers|product tips|marketing|promotional|receive emails|opt[- ]?in to|subscribe/i;
|
|
1024
|
+
const checked = [];
|
|
1025
|
+
const boxes = Array.from(document.querySelectorAll('input[type="checkbox"]'));
|
|
1026
|
+
for (const box of boxes) {
|
|
1027
|
+
if (box.checked || box.disabled)
|
|
1028
|
+
continue;
|
|
1029
|
+
const rect = box.getBoundingClientRect();
|
|
1030
|
+
if (rect.width <= 0 || rect.height <= 0)
|
|
1031
|
+
continue;
|
|
1032
|
+
// Associated text = attributes + a label[for=id] + nearest
|
|
1033
|
+
// ancestor <label> + the immediately following sibling text.
|
|
1034
|
+
const parts = [
|
|
1035
|
+
box.getAttribute("data-testid") ?? "",
|
|
1036
|
+
box.getAttribute("name") ?? "",
|
|
1037
|
+
box.id,
|
|
1038
|
+
box.getAttribute("aria-label") ?? "",
|
|
1039
|
+
];
|
|
1040
|
+
if (box.id) {
|
|
1041
|
+
const forLabel = document.querySelector(`label[for="${CSS.escape(box.id)}"]`);
|
|
1042
|
+
if (forLabel)
|
|
1043
|
+
parts.push(forLabel.textContent ?? "");
|
|
1044
|
+
}
|
|
1045
|
+
const ancestorLabel = box.closest("label");
|
|
1046
|
+
if (ancestorLabel)
|
|
1047
|
+
parts.push(ancestorLabel.textContent ?? "");
|
|
1048
|
+
const sibling = box.nextSibling;
|
|
1049
|
+
if (sibling && sibling.textContent)
|
|
1050
|
+
parts.push(sibling.textContent);
|
|
1051
|
+
if (box.nextElementSibling) {
|
|
1052
|
+
parts.push(box.nextElementSibling.textContent ?? "");
|
|
1053
|
+
}
|
|
1054
|
+
const text = parts.join(" ");
|
|
1055
|
+
if (!agreementRe.test(text) || marketingRe.test(text))
|
|
1056
|
+
continue;
|
|
1057
|
+
// React/Vue controlled inputs ignore a bare `.checked = true`:
|
|
1058
|
+
// their state lives in the framework, updated only by the real
|
|
1059
|
+
// event flow. Set the property AND dispatch input/change AND a
|
|
1060
|
+
// synthetic click so the controlled binding observes the flip.
|
|
1061
|
+
box.checked = true;
|
|
1062
|
+
box.dispatchEvent(new Event("input", { bubbles: true }));
|
|
1063
|
+
box.dispatchEvent(new Event("change", { bubbles: true }));
|
|
1064
|
+
box.click();
|
|
1065
|
+
const label = box.getAttribute("data-testid") ||
|
|
1066
|
+
box.getAttribute("name") ||
|
|
1067
|
+
box.id ||
|
|
1068
|
+
box.getAttribute("aria-label") ||
|
|
1069
|
+
"agreement-checkbox";
|
|
1070
|
+
checked.push(label);
|
|
1071
|
+
}
|
|
1072
|
+
return checked;
|
|
1073
|
+
});
|
|
1074
|
+
}
|
|
1075
|
+
catch {
|
|
1076
|
+
return [];
|
|
1077
|
+
}
|
|
1078
|
+
}
|
|
879
1079
|
// Scroll a Terms-of-Service style modal to the bottom so the gated
|
|
880
1080
|
// "Accept" button enables. Railway's signup is the canonical case:
|
|
881
1081
|
// a modal with a virtualized ToS list watches real `scroll` /
|
|
@@ -1752,6 +1952,11 @@ export class BrowserController {
|
|
|
1752
1952
|
const recaptcha = document.querySelector('textarea[name="g-recaptcha-response"]');
|
|
1753
1953
|
if (recaptcha !== null && recaptcha.value.length > 0)
|
|
1754
1954
|
return true;
|
|
1955
|
+
// hCaptcha populates its own response textarea on a passed
|
|
1956
|
+
// checkbox (plausible). Same shape as reCAPTCHA's.
|
|
1957
|
+
const hcaptcha = document.querySelector('textarea[name="h-captcha-response"]');
|
|
1958
|
+
if (hcaptcha !== null && hcaptcha.value.length > 0)
|
|
1959
|
+
return true;
|
|
1755
1960
|
// Some Turnstile installs use a managed mode that emits its
|
|
1756
1961
|
// own attribute on the host div when solved.
|
|
1757
1962
|
const cfManaged = document.querySelector(".cf-turnstile[data-state='success']");
|
|
@@ -1789,6 +1994,34 @@ export class BrowserController {
|
|
|
1789
1994
|
async findCaptchaWidget() {
|
|
1790
1995
|
if (!this.page)
|
|
1791
1996
|
throw new Error("Browser not started");
|
|
1997
|
+
// An INVISIBLE reCAPTCHA (api2/anchor with size=invisible — the
|
|
1998
|
+
// bottom-right badge) is score-mode: there is no checkbox to click, and
|
|
1999
|
+
// its token is emitted only when the form's submit handler calls
|
|
2000
|
+
// grecaptcha.execute(). It must NOT be treated as a solvable visible
|
|
2001
|
+
// widget. MEASURED on amplitude (2026-06-04): the badge iframe is
|
|
2002
|
+
// ~256×60, so it cleared the size filter below and got "found" + clicked;
|
|
2003
|
+
// the pre-submit token-poll then timed out and the bot escalated to
|
|
2004
|
+
// 2Captcha, which can't solve a score-mode widget (ERROR_CAPTCHA_
|
|
2005
|
+
// UNSOLVABLE) → captcha_blocked — even though our v3 score is ~1.0 and a
|
|
2006
|
+
// plain form-submit would have passed silently. Detect "invisible-only"
|
|
2007
|
+
// (badge present, no visible checkbox anchor, no rendered bframe grid) and
|
|
2008
|
+
// skip reCAPTCHA entirely so the signup proceeds to submit.
|
|
2009
|
+
const recaptchaInvisibleOnly = await this.page
|
|
2010
|
+
.evaluate(() => {
|
|
2011
|
+
const q = (s) => document.querySelector(s) !== null;
|
|
2012
|
+
const visibleAnchor = Array.from(document.querySelectorAll('iframe[src*="recaptcha/api2/anchor"]')).some((f) => !/size=invisible/.test(f.src));
|
|
2013
|
+
const bframe = (() => {
|
|
2014
|
+
const f = document.querySelector('iframe[src*="recaptcha/api2/bframe"]');
|
|
2015
|
+
if (f === null)
|
|
2016
|
+
return false;
|
|
2017
|
+
const r = f.getBoundingClientRect();
|
|
2018
|
+
return r.width > 30 && r.height > 30;
|
|
2019
|
+
})();
|
|
2020
|
+
const invisiblePresent = q('iframe[src*="recaptcha/api2/anchor"][src*="size=invisible"]') ||
|
|
2021
|
+
q(".grecaptcha-badge");
|
|
2022
|
+
return invisiblePresent && !visibleAnchor && !bframe;
|
|
2023
|
+
})
|
|
2024
|
+
.catch(() => false);
|
|
1792
2025
|
// Phase 1: widget shape with polling. page.locator (unlike the
|
|
1793
2026
|
// querySelector in detectCaptchaVariant) pierces OPEN shadow roots,
|
|
1794
2027
|
// so the Cloudflare iframe is reachable even on modern shadow-DOM
|
|
@@ -1801,11 +2034,19 @@ export class BrowserController {
|
|
|
1801
2034
|
// reCAPTCHA v2: src contains "recaptcha/api2"
|
|
1802
2035
|
const iframeCandidates = [
|
|
1803
2036
|
{ kind: "turnstile", selector: 'iframe[src*="challenges.cloudflare.com"]' },
|
|
1804
|
-
|
|
2037
|
+
// Visible reCAPTCHA only — the size=invisible anchor (score-mode badge)
|
|
2038
|
+
// is handled by the recaptchaInvisibleOnly skip above.
|
|
2039
|
+
{ kind: "recaptcha", selector: 'iframe[src*="recaptcha/api2/anchor"]:not([src*="size=invisible"])' },
|
|
2040
|
+
// hCaptcha's checkbox iframe (the anchor frame). Plausible and other
|
|
2041
|
+
// hCaptcha sites render this; clicking it ticks the box the same way
|
|
2042
|
+
// Turnstile/reCAPTCHA do.
|
|
2043
|
+
{ kind: "hcaptcha", selector: 'iframe[src*="hcaptcha.com"][src*="frame=checkbox"]' },
|
|
2044
|
+
{ kind: "hcaptcha", selector: 'iframe[src*="newassets.hcaptcha.com"]' },
|
|
1805
2045
|
// Host-div fallbacks (light DOM) — preferred order keeps the iframe
|
|
1806
2046
|
// first when present (more precise click target).
|
|
1807
2047
|
{ kind: "turnstile", selector: ".cf-turnstile" },
|
|
1808
2048
|
{ kind: "turnstile", selector: "#clerk-captcha" },
|
|
2049
|
+
{ kind: "hcaptcha", selector: ".h-captcha" },
|
|
1809
2050
|
];
|
|
1810
2051
|
const iframeDeadline = Date.now() + 5000;
|
|
1811
2052
|
while (Date.now() < iframeDeadline) {
|
|
@@ -1832,8 +2073,14 @@ export class BrowserController {
|
|
|
1832
2073
|
const hostCandidates = [
|
|
1833
2074
|
{ kind: "turnstile", selector: 'input[name="cf-turnstile-response"]' },
|
|
1834
2075
|
{ kind: "recaptcha", selector: 'textarea[name="g-recaptcha-response"]' },
|
|
2076
|
+
{ kind: "hcaptcha", selector: 'textarea[name="h-captcha-response"]' },
|
|
1835
2077
|
];
|
|
1836
2078
|
for (const { kind, selector } of hostCandidates) {
|
|
2079
|
+
// The invisible reCAPTCHA's hidden g-recaptcha-response textarea lives
|
|
2080
|
+
// INSIDE the .grecaptcha-badge (~256×60), so the walk-up below would
|
|
2081
|
+
// return the badge box and we'd click it — the exact bug. Skip it.
|
|
2082
|
+
if (kind === "recaptcha" && recaptchaInvisibleOnly)
|
|
2083
|
+
continue;
|
|
1837
2084
|
const locator = this.page.locator(selector);
|
|
1838
2085
|
const count = await locator.count();
|
|
1839
2086
|
if (count === 0)
|
|
@@ -1910,11 +2157,14 @@ export class BrowserController {
|
|
|
1910
2157
|
else if (present('iframe[src*="hcaptcha.com"]')) {
|
|
1911
2158
|
variant = "hcaptcha";
|
|
1912
2159
|
}
|
|
1913
|
-
else if (present('iframe[src*="recaptcha/api2/anchor"]')) {
|
|
2160
|
+
else if (present('iframe[src*="recaptcha/api2/anchor"]:not([src*="size=invisible"])')) {
|
|
2161
|
+
// VISIBLE checkbox anchor (size=normal) → clickable v2.
|
|
1914
2162
|
variant = "recaptcha_v2";
|
|
1915
2163
|
}
|
|
1916
|
-
else if (present(".grecaptcha-badge")
|
|
1917
|
-
|
|
2164
|
+
else if (present(".grecaptcha-badge") ||
|
|
2165
|
+
present('iframe[src*="recaptcha/api2/anchor"][src*="size=invisible"]')) {
|
|
2166
|
+
// Badge / size=invisible anchor and no clickable checkbox →
|
|
2167
|
+
// score-mode reCAPTCHA (passes on submit, nothing to click).
|
|
1918
2168
|
variant = "recaptcha_v3";
|
|
1919
2169
|
}
|
|
1920
2170
|
return { variant, challengeRendered };
|
|
@@ -1935,25 +2185,37 @@ export class BrowserController {
|
|
|
1935
2185
|
// help). Reads from the standard places sites declare it:
|
|
1936
2186
|
// 1. <div class="g-recaptcha" data-sitekey="...">
|
|
1937
2187
|
// 2. <iframe src="...?k=SITEKEY&..."> (api2/anchor frame)
|
|
1938
|
-
//
|
|
2188
|
+
//
|
|
2189
|
+
// CRITICAL: only ever returns a GENUINE reCAPTCHA key. hCaptcha
|
|
2190
|
+
// (`.h-captcha`) and Turnstile (`.cf-turnstile`) ALSO publish a
|
|
2191
|
+
// `data-sitekey` attribute, so a bare `[data-sitekey]` selector
|
|
2192
|
+
// grabs the wrong provider's key and the caller ships it to
|
|
2193
|
+
// 2Captcha's `userrecaptcha` endpoint → ERROR_WRONG_GOOGLEKEY (the
|
|
2194
|
+
// plausible/hCaptcha case). The authoritative discriminator is the
|
|
2195
|
+
// key FORMAT: reCAPTCHA public keys always start with `6L`; hCaptcha
|
|
2196
|
+
// keys are UUIDs (`bc609205-…`); Turnstile keys start with `0x`. We
|
|
2197
|
+
// both scope the selector away from the other widgets AND gate on
|
|
2198
|
+
// the `6L` prefix, so no non-reCAPTCHA key can ever leak through.
|
|
1939
2199
|
async extractRecaptchaSitekey() {
|
|
1940
2200
|
if (!this.page)
|
|
1941
2201
|
throw new Error("Browser not started");
|
|
1942
2202
|
try {
|
|
1943
2203
|
const sitekey = await this.page.evaluate(() => {
|
|
1944
|
-
|
|
1945
|
-
|
|
1946
|
-
|
|
1947
|
-
|
|
1948
|
-
|
|
2204
|
+
const isRecaptchaKey = (k) => k !== null && /^6L/.test(k) && k.length > 30;
|
|
2205
|
+
// 1. data-sitekey, but NOT on an hCaptcha/Turnstile widget (or
|
|
2206
|
+
// nested inside one). Those publish data-sitekey too.
|
|
2207
|
+
const anchors = Array.from(document.querySelectorAll("[data-sitekey]")).filter((el) => el.closest(".h-captcha, .cf-turnstile") === null);
|
|
2208
|
+
for (const el of anchors) {
|
|
2209
|
+
const k = el.getAttribute("data-sitekey");
|
|
2210
|
+
if (isRecaptchaKey(k))
|
|
1949
2211
|
return k;
|
|
1950
2212
|
}
|
|
1951
|
-
// 2. The api2 iframe src carries ?k=SITEKEY.
|
|
2213
|
+
// 2. The api2/enterprise iframe src carries ?k=SITEKEY.
|
|
1952
2214
|
const iframes = Array.from(document.querySelectorAll('iframe[src*="recaptcha/api2"], iframe[src*="recaptcha/enterprise"]'));
|
|
1953
2215
|
for (const ifr of iframes) {
|
|
1954
2216
|
const url = new URL(ifr.src);
|
|
1955
2217
|
const k = url.searchParams.get("k");
|
|
1956
|
-
if (k
|
|
2218
|
+
if (isRecaptchaKey(k))
|
|
1957
2219
|
return k;
|
|
1958
2220
|
}
|
|
1959
2221
|
return null;
|
|
@@ -2027,6 +2289,161 @@ export class BrowserController {
|
|
|
2027
2289
|
return false;
|
|
2028
2290
|
}
|
|
2029
2291
|
}
|
|
2292
|
+
// Mint the score token for an INVISIBLE reCAPTCHA by calling
|
|
2293
|
+
// grecaptcha.execute() ourselves, then wait for g-recaptcha-response to
|
|
2294
|
+
// populate. MEASURED on amplitude (2026-06-04): an invisible reCAPTCHA's
|
|
2295
|
+
// token only exists once execute() runs, and amplitude's form REQUIRES it —
|
|
2296
|
+
// merely skipping the badge (not clicking it) left the textarea empty and
|
|
2297
|
+
// the submit silently no-op'd. With our ~1.0 v3 score, execute() returns a
|
|
2298
|
+
// passing token in ~1-3s, so the subsequent submit carries a valid token.
|
|
2299
|
+
// Handles both standard (grecaptcha) and enterprise (grecaptcha.enterprise)
|
|
2300
|
+
// namespaces. Returns true once a token is present. Best-effort: a missing
|
|
2301
|
+
// grecaptcha or an execute() throw resolves false (the form may still mint
|
|
2302
|
+
// it on its own submit handler).
|
|
2303
|
+
async triggerInvisibleRecaptcha(timeoutMs = 9000) {
|
|
2304
|
+
if (!this.page)
|
|
2305
|
+
throw new Error("Browser not started");
|
|
2306
|
+
const tokenPresent = () => this.page.evaluate(() => {
|
|
2307
|
+
const ta = document.querySelector('textarea[name="g-recaptcha-response"], textarea[id^="g-recaptcha-response"]');
|
|
2308
|
+
return ta !== null && ta.value.length > 0;
|
|
2309
|
+
}).catch(() => false);
|
|
2310
|
+
if (await tokenPresent())
|
|
2311
|
+
return true;
|
|
2312
|
+
const fired = await this.page
|
|
2313
|
+
.evaluate(() => {
|
|
2314
|
+
const w = window;
|
|
2315
|
+
const g = w.grecaptcha;
|
|
2316
|
+
if (g === undefined)
|
|
2317
|
+
return false;
|
|
2318
|
+
let any = false;
|
|
2319
|
+
const ids = (() => {
|
|
2320
|
+
try {
|
|
2321
|
+
return Object.keys(w.___grecaptcha_cfg?.clients ?? {});
|
|
2322
|
+
}
|
|
2323
|
+
catch {
|
|
2324
|
+
return [];
|
|
2325
|
+
}
|
|
2326
|
+
})();
|
|
2327
|
+
for (const id of ids) {
|
|
2328
|
+
const n = Number(id);
|
|
2329
|
+
if (!Number.isFinite(n))
|
|
2330
|
+
continue;
|
|
2331
|
+
try {
|
|
2332
|
+
g.enterprise?.execute?.(n);
|
|
2333
|
+
any = true;
|
|
2334
|
+
}
|
|
2335
|
+
catch {
|
|
2336
|
+
/* not this namespace */
|
|
2337
|
+
}
|
|
2338
|
+
try {
|
|
2339
|
+
g.execute?.(n);
|
|
2340
|
+
any = true;
|
|
2341
|
+
}
|
|
2342
|
+
catch {
|
|
2343
|
+
/* widget already executed / wrong namespace */
|
|
2344
|
+
}
|
|
2345
|
+
}
|
|
2346
|
+
// Fallback: no enumerable clients — try the bare (first-widget) call,
|
|
2347
|
+
// enterprise first (a v2-invisible page exposes plain execute()).
|
|
2348
|
+
if (!any) {
|
|
2349
|
+
try {
|
|
2350
|
+
if (typeof g.enterprise?.execute === "function") {
|
|
2351
|
+
g.enterprise.execute();
|
|
2352
|
+
any = true;
|
|
2353
|
+
}
|
|
2354
|
+
else if (typeof g.execute === "function") {
|
|
2355
|
+
g.execute();
|
|
2356
|
+
any = true;
|
|
2357
|
+
}
|
|
2358
|
+
}
|
|
2359
|
+
catch {
|
|
2360
|
+
return false;
|
|
2361
|
+
}
|
|
2362
|
+
}
|
|
2363
|
+
return any;
|
|
2364
|
+
})
|
|
2365
|
+
.catch(() => false);
|
|
2366
|
+
if (!fired)
|
|
2367
|
+
return false;
|
|
2368
|
+
const start = Date.now();
|
|
2369
|
+
while (Date.now() - start < timeoutMs) {
|
|
2370
|
+
await this.sleep(500);
|
|
2371
|
+
if (await tokenPresent())
|
|
2372
|
+
return true;
|
|
2373
|
+
}
|
|
2374
|
+
return false;
|
|
2375
|
+
}
|
|
2376
|
+
// Tier 3 hCaptcha support — extract the hCaptcha sitekey so 2Captcha
|
|
2377
|
+
// can solve it. hCaptcha publishes its key on `.h-captcha[data-sitekey]`
|
|
2378
|
+
// or in the checkbox iframe's `?sitekey=` query. Keys are UUIDs (the
|
|
2379
|
+
// reCAPTCHA `6L` guard in extractRecaptchaSitekey deliberately rejects
|
|
2380
|
+
// them, which is why hCaptcha needs its own extractor). Returns null
|
|
2381
|
+
// when no hCaptcha widget is present.
|
|
2382
|
+
async extractHcaptchaSitekey() {
|
|
2383
|
+
if (!this.page)
|
|
2384
|
+
throw new Error("Browser not started");
|
|
2385
|
+
try {
|
|
2386
|
+
return await this.page.evaluate(() => {
|
|
2387
|
+
const div = document.querySelector(".h-captcha[data-sitekey], [data-hcaptcha-sitekey]");
|
|
2388
|
+
if (div !== null) {
|
|
2389
|
+
const k = div.getAttribute("data-sitekey") ??
|
|
2390
|
+
div.getAttribute("data-hcaptcha-sitekey");
|
|
2391
|
+
if (k !== null && k.length > 10)
|
|
2392
|
+
return k;
|
|
2393
|
+
}
|
|
2394
|
+
const iframe = document.querySelector('iframe[src*="hcaptcha.com"]');
|
|
2395
|
+
if (iframe !== null) {
|
|
2396
|
+
const k = new URL(iframe.src).searchParams.get("sitekey");
|
|
2397
|
+
if (k !== null && k.length > 10)
|
|
2398
|
+
return k;
|
|
2399
|
+
}
|
|
2400
|
+
return null;
|
|
2401
|
+
});
|
|
2402
|
+
}
|
|
2403
|
+
catch {
|
|
2404
|
+
return null;
|
|
2405
|
+
}
|
|
2406
|
+
}
|
|
2407
|
+
// Inject a 2Captcha-resolved hCaptcha token into the page's
|
|
2408
|
+
// h-captcha-response textarea(s) and fire the widget's data-callback
|
|
2409
|
+
// if the page registered one. Mirrors injectRecaptchaToken; hCaptcha
|
|
2410
|
+
// also mirrors the response token into a g-recaptcha-response textarea
|
|
2411
|
+
// on some compat installs, so populate both names if present.
|
|
2412
|
+
async injectHcaptchaToken(token) {
|
|
2413
|
+
if (!this.page)
|
|
2414
|
+
throw new Error("Browser not started");
|
|
2415
|
+
try {
|
|
2416
|
+
return await this.page.evaluate((tok) => {
|
|
2417
|
+
const inputs = Array.from(document.querySelectorAll('textarea[name="h-captcha-response"], textarea[id^="h-captcha-response"], textarea[name="g-recaptcha-response"]'));
|
|
2418
|
+
if (inputs.length === 0)
|
|
2419
|
+
return false;
|
|
2420
|
+
for (const input of inputs) {
|
|
2421
|
+
input.value = tok;
|
|
2422
|
+
input.dispatchEvent(new Event("input", { bubbles: true }));
|
|
2423
|
+
input.dispatchEvent(new Event("change", { bubbles: true }));
|
|
2424
|
+
}
|
|
2425
|
+
// Fire the data-callback the page registered on the .h-captcha
|
|
2426
|
+
// host (hCaptcha calls it by name on window). Best-effort — the
|
|
2427
|
+
// populated textarea is what server-side validation reads.
|
|
2428
|
+
try {
|
|
2429
|
+
const host = document.querySelector(".h-captcha[data-callback]");
|
|
2430
|
+
const name = host?.getAttribute("data-callback");
|
|
2431
|
+
if (name !== null && name !== undefined) {
|
|
2432
|
+
const fn = window[name];
|
|
2433
|
+
if (typeof fn === "function")
|
|
2434
|
+
fn(tok);
|
|
2435
|
+
}
|
|
2436
|
+
}
|
|
2437
|
+
catch {
|
|
2438
|
+
// no named callback — DOM injection stands.
|
|
2439
|
+
}
|
|
2440
|
+
return true;
|
|
2441
|
+
}, token);
|
|
2442
|
+
}
|
|
2443
|
+
catch {
|
|
2444
|
+
return false;
|
|
2445
|
+
}
|
|
2446
|
+
}
|
|
2030
2447
|
// Small mouse wiggle near the current position. Used during prewarm
|
|
2031
2448
|
// so the page sees pointer events before we navigate away.
|
|
2032
2449
|
async jitterMouse() {
|
|
@@ -2808,35 +3225,110 @@ export class BrowserController {
|
|
|
2808
3225
|
async waitForAntiBotInterstitialToClear(timeoutMs) {
|
|
2809
3226
|
if (!this.page)
|
|
2810
3227
|
return;
|
|
2811
|
-
|
|
2812
|
-
|
|
2813
|
-
|
|
2814
|
-
|
|
3228
|
+
const first = await this.pollUntilInterstitialClears(timeoutMs);
|
|
3229
|
+
// Never saw an interstitial, or saw one and it cleared on its own —
|
|
3230
|
+
// nothing more to do.
|
|
3231
|
+
if (!first.detected || first.cleared)
|
|
2815
3232
|
return;
|
|
3233
|
+
// Still on the interstitial at the deadline. If Cloudflare reported
|
|
3234
|
+
// the challenge PASSED ("Verification successful"), the redirect is
|
|
3235
|
+
// just racing/stuck — be patient through ANOTHER full window before
|
|
3236
|
+
// touching anything (a reload mid-redirect can re-arm the challenge).
|
|
3237
|
+
if (first.verificationPassed) {
|
|
3238
|
+
const patient = await this.pollUntilInterstitialClears(timeoutMs);
|
|
3239
|
+
if (patient.cleared)
|
|
3240
|
+
return;
|
|
3241
|
+
// "Verification successful" but the page never advances is the
|
|
3242
|
+
// signature of a STALE cf_clearance cookie — issued on a prior visit
|
|
3243
|
+
// (often a different egress IP), which CF matches ("successful") but
|
|
3244
|
+
// the origin then rejects, looping forever on "Waiting for the page
|
|
3245
|
+
// to load." MEASURED: a clean profile clears codesandbox's challenge
|
|
3246
|
+
// in ~12s; the stale cookie is what stalls the shared profile. Drop
|
|
3247
|
+
// the CF cookies to force a FRESH challenge, then reload.
|
|
3248
|
+
if (await this.clearCloudflareCookiesAndRetry(timeoutMs))
|
|
3249
|
+
return;
|
|
3250
|
+
// Or the auto-redirect simply stalled with a still-valid clearance —
|
|
3251
|
+
// re-navigate past the one-shot challenge token.
|
|
3252
|
+
if (await this.forceNavigatePastClearedChallenge())
|
|
3253
|
+
return;
|
|
2816
3254
|
}
|
|
2817
|
-
//
|
|
2818
|
-
//
|
|
2819
|
-
//
|
|
2820
|
-
//
|
|
2821
|
-
//
|
|
2822
|
-
//
|
|
2823
|
-
// block — fingerprint/IP — reload won't help, but the caller's
|
|
2824
|
-
// inventory diagnostic will still surface the block.)
|
|
3255
|
+
// Force the real page: now that the cf_clearance cookie is set, a
|
|
3256
|
+
// reload often renders it. domcontentloaded (not networkidle) — the
|
|
3257
|
+
// real page is usually a heavy SPA that never reaches networkidle, so
|
|
3258
|
+
// waiting for it just burns the budget back into a timeout. (If it's a
|
|
3259
|
+
// server-side risk-score block — fingerprint/IP — reload won't help,
|
|
3260
|
+
// but the caller's inventory diagnostic will still surface the block.)
|
|
2825
3261
|
try {
|
|
2826
|
-
await this.page.reload({ waitUntil: "
|
|
3262
|
+
await this.page.reload({ waitUntil: "domcontentloaded", timeout: 15_000 });
|
|
2827
3263
|
}
|
|
2828
3264
|
catch {
|
|
2829
3265
|
// reload failed — proceed with what's there
|
|
2830
3266
|
}
|
|
2831
3267
|
await this.pollUntilInterstitialClears(Math.max(5000, timeoutMs / 2));
|
|
2832
3268
|
}
|
|
2833
|
-
//
|
|
2834
|
-
//
|
|
2835
|
-
|
|
3269
|
+
// Drop Cloudflare's anti-bot cookies (cf_clearance + __cf_bm) so the next
|
|
3270
|
+
// request triggers a FRESH managed challenge, then reload and wait for it
|
|
3271
|
+
// to clear. Scoped to cookie NAME — only CF's own cookies are removed, so
|
|
3272
|
+
// an OAuth provider's session on accounts.google.com / github.com is
|
|
3273
|
+
// untouched. A fresh challenge on a residential IP clears in ~12-15s, so
|
|
3274
|
+
// we give it a generous window. Returns true if the interstitial is gone.
|
|
3275
|
+
async clearCloudflareCookiesAndRetry(timeoutMs) {
|
|
3276
|
+
if (!this.page || !this.context)
|
|
3277
|
+
return false;
|
|
3278
|
+
try {
|
|
3279
|
+
await this.context.clearCookies({ name: "cf_clearance" });
|
|
3280
|
+
await this.context.clearCookies({ name: "__cf_bm" });
|
|
3281
|
+
}
|
|
3282
|
+
catch {
|
|
3283
|
+
// clearCookies filter unsupported / failed — nothing to retry on.
|
|
3284
|
+
return false;
|
|
3285
|
+
}
|
|
3286
|
+
try {
|
|
3287
|
+
await this.page.reload({ waitUntil: "domcontentloaded", timeout: 20_000 });
|
|
3288
|
+
}
|
|
3289
|
+
catch {
|
|
3290
|
+
// reload failed — still give the poll a chance below.
|
|
3291
|
+
}
|
|
3292
|
+
const after = await this.pollUntilInterstitialClears(Math.max(20_000, timeoutMs));
|
|
3293
|
+
return after.cleared || !after.detected;
|
|
3294
|
+
}
|
|
3295
|
+
// With a CONFIRMED Cloudflare pass, re-navigate to the current URL with
|
|
3296
|
+
// the one-shot `__cf_chl_*` challenge token stripped — the cf_clearance
|
|
3297
|
+
// cookie is already set, so the edge serves the real page instead of the
|
|
3298
|
+
// stuck redirect. Returns true if the interstitial is gone afterwards.
|
|
3299
|
+
// Returns false (caller falls back to a plain reload) when there's no
|
|
3300
|
+
// token to strip or the navigation didn't clear the gate.
|
|
3301
|
+
async forceNavigatePastClearedChallenge() {
|
|
2836
3302
|
if (!this.page)
|
|
2837
3303
|
return false;
|
|
3304
|
+
const cleaned = stripCloudflareChallengeParams(this.page.url());
|
|
3305
|
+
if (!cleaned)
|
|
3306
|
+
return false;
|
|
3307
|
+
try {
|
|
3308
|
+
await this.page.goto(cleaned, {
|
|
3309
|
+
waitUntil: "domcontentloaded",
|
|
3310
|
+
timeout: 15_000,
|
|
3311
|
+
});
|
|
3312
|
+
}
|
|
3313
|
+
catch {
|
|
3314
|
+
return false;
|
|
3315
|
+
}
|
|
3316
|
+
const after = await this.pollUntilInterstitialClears(Math.max(5000, 8000));
|
|
3317
|
+
// cleared = saw it then it went away; !detected = the real page rendered
|
|
3318
|
+
// immediately (no interstitial on the post-nav page at all).
|
|
3319
|
+
return after.cleared || !after.detected;
|
|
3320
|
+
}
|
|
3321
|
+
// One poll loop. `detected` = an interstitial was observed at least
|
|
3322
|
+
// once; `cleared` = it was observed AND then went away (vs. still there
|
|
3323
|
+
// at the deadline); `verificationPassed` = Cloudflare reported the
|
|
3324
|
+
// challenge succeeded at some point during the wait (see
|
|
3325
|
+
// classifyInterstitialText).
|
|
3326
|
+
async pollUntilInterstitialClears(timeoutMs) {
|
|
3327
|
+
if (!this.page)
|
|
3328
|
+
return { detected: false, cleared: false, verificationPassed: false };
|
|
2838
3329
|
const deadline = Date.now() + timeoutMs;
|
|
2839
3330
|
let detected = false;
|
|
3331
|
+
let verificationPassed = false;
|
|
2840
3332
|
while (Date.now() < deadline) {
|
|
2841
3333
|
let title = "";
|
|
2842
3334
|
let bodyText = "";
|
|
@@ -2848,19 +3340,21 @@ export class BrowserController {
|
|
|
2848
3340
|
await new Promise((r) => setTimeout(r, 500));
|
|
2849
3341
|
continue;
|
|
2850
3342
|
}
|
|
2851
|
-
const
|
|
2852
|
-
if (
|
|
3343
|
+
const c = classifyInterstitialText(title + " " + bodyText);
|
|
3344
|
+
if (c.verificationPassed)
|
|
3345
|
+
verificationPassed = true;
|
|
3346
|
+
if (!c.onInterstitial) {
|
|
2853
3347
|
if (detected) {
|
|
2854
3348
|
// Give the freshly-revealed page a tick to hydrate before
|
|
2855
3349
|
// the inventory scan.
|
|
2856
3350
|
await new Promise((r) => setTimeout(r, 800));
|
|
2857
3351
|
}
|
|
2858
|
-
return detected;
|
|
3352
|
+
return { detected, cleared: detected, verificationPassed };
|
|
2859
3353
|
}
|
|
2860
3354
|
detected = true;
|
|
2861
3355
|
await new Promise((r) => setTimeout(r, 1000));
|
|
2862
3356
|
}
|
|
2863
|
-
return detected;
|
|
3357
|
+
return { detected, cleared: false, verificationPassed };
|
|
2864
3358
|
}
|
|
2865
3359
|
// Walk the live DOM (piercing open shadow roots) and return every
|
|
2866
3360
|
// visible interactive element with a bot-computed selector (F3 T1).
|
|
@@ -3269,16 +3763,250 @@ export class BrowserController {
|
|
|
3269
3763
|
// best-effort — the agent's consent loop re-reads state regardless
|
|
3270
3764
|
}
|
|
3271
3765
|
}
|
|
3766
|
+
// Does the page sign in with Google via Google Identity Services (GSI)
|
|
3767
|
+
// rather than classic OAuth redirect? GSI renders its button in a
|
|
3768
|
+
// cross-origin iframe (accounts.google.com/gsi/button) and/or exposes the
|
|
3769
|
+
// `google.accounts.id` JS API; on use it raises a browser-native FedCM
|
|
3770
|
+
// dialog or a popup and returns a JWT to a JS callback — there is NO
|
|
3771
|
+
// redirect, so the classic startOAuth flow can't drive it. Detecting this
|
|
3772
|
+
// is what lets the agent route to tryGoogleGsiLogin instead.
|
|
3773
|
+
async hasGoogleGsiAffordance() {
|
|
3774
|
+
if (!this.page)
|
|
3775
|
+
return false;
|
|
3776
|
+
try {
|
|
3777
|
+
return await this.page.evaluate(() => {
|
|
3778
|
+
if (document.querySelector('iframe[src*="accounts.google.com/gsi/"]') !== null) {
|
|
3779
|
+
return true;
|
|
3780
|
+
}
|
|
3781
|
+
// On-demand One-Tap: the page loads the GSI client script but renders
|
|
3782
|
+
// no static button and may not have initialized `google.accounts.id`
|
|
3783
|
+
// yet (amplitude, clerk). A plain click on the in-page "Sign in with
|
|
3784
|
+
// Google" affordance never redirects, so the bot used to falsely
|
|
3785
|
+
// conclude "signed in" and bounce to login. Treat the loaded client
|
|
3786
|
+
// script as a GSI affordance so the agent routes through
|
|
3787
|
+
// tryGoogleGsiLogin, which now raises One-Tap programmatically.
|
|
3788
|
+
if (document.querySelector('script[src*="accounts.google.com/gsi/client"]') !== null) {
|
|
3789
|
+
return true;
|
|
3790
|
+
}
|
|
3791
|
+
const g = window.google;
|
|
3792
|
+
return typeof g?.accounts?.id !== "undefined";
|
|
3793
|
+
});
|
|
3794
|
+
}
|
|
3795
|
+
catch {
|
|
3796
|
+
return false;
|
|
3797
|
+
}
|
|
3798
|
+
}
|
|
3799
|
+
// Drive a Google Identity Services / FedCM sign-in. Two variants are
|
|
3800
|
+
// handled:
|
|
3801
|
+
// - FedCM: clicking the GSI widget raises a browser-NATIVE credential
|
|
3802
|
+
// dialog (no DOM, no popup — invisible to Playwright). We enable the
|
|
3803
|
+
// CDP FedCm domain up front and auto-select the first account when
|
|
3804
|
+
// FedCm.dialogShown fires. The page's JS callback then receives the
|
|
3805
|
+
// JWT and establishes the session.
|
|
3806
|
+
// - Popup: older GSI opens a Google account-chooser window; we adopt it
|
|
3807
|
+
// like startOAuth does so the consent loop can drive it.
|
|
3808
|
+
// Returns how it resolved. The caller then runs the SAME post-OAuth
|
|
3809
|
+
// settle/consent/post-verify path as the redirect flow.
|
|
3810
|
+
async tryGoogleGsiLogin(triggerSelector, timeoutMs = 25_000) {
|
|
3811
|
+
if (!this.page || !this.context)
|
|
3812
|
+
throw new Error("Browser not started");
|
|
3813
|
+
this.oauthProductPage = this.page;
|
|
3814
|
+
let fedcmResolved = false;
|
|
3815
|
+
let cdp = null;
|
|
3816
|
+
try {
|
|
3817
|
+
cdp = await this.context.newCDPSession(this.page);
|
|
3818
|
+
await cdp.send("FedCm.enable", { disableRejectionDelay: true });
|
|
3819
|
+
cdp.on("FedCm.dialogShown", (ev) => {
|
|
3820
|
+
const e = ev;
|
|
3821
|
+
const dialogId = e.dialogId;
|
|
3822
|
+
if (dialogId === undefined)
|
|
3823
|
+
return;
|
|
3824
|
+
void (async () => {
|
|
3825
|
+
// A ConfirmIdpLogin dialog has no account list — it's the "Continue
|
|
3826
|
+
// as / sign in to Google" confirmation that precedes the account
|
|
3827
|
+
// chooser. selectAccount would error on it, so drive the confirm
|
|
3828
|
+
// button directly and skip selectAccount for this dialog type.
|
|
3829
|
+
if (e.dialogType === "ConfirmIdpLogin") {
|
|
3830
|
+
try {
|
|
3831
|
+
await cdp.send("FedCm.clickDialogButton", {
|
|
3832
|
+
dialogId,
|
|
3833
|
+
dialogButton: "ConfirmIdpLoginContinue",
|
|
3834
|
+
});
|
|
3835
|
+
}
|
|
3836
|
+
catch {
|
|
3837
|
+
// method/param may not apply to this build/dialog — non-fatal;
|
|
3838
|
+
// a subsequent AccountChooser dialog still resolves via select.
|
|
3839
|
+
}
|
|
3840
|
+
return;
|
|
3841
|
+
}
|
|
3842
|
+
try {
|
|
3843
|
+
// Pick the first account on the account-chooser dialog.
|
|
3844
|
+
await cdp.send("FedCm.selectAccount", { dialogId, accountIndex: 0 });
|
|
3845
|
+
fedcmResolved = true;
|
|
3846
|
+
}
|
|
3847
|
+
catch {
|
|
3848
|
+
// dialog dismissed or already resolved
|
|
3849
|
+
}
|
|
3850
|
+
if (!fedcmResolved) {
|
|
3851
|
+
// Some flows surface a "Continue as <name>" confirm even on the
|
|
3852
|
+
// account dialog; selectAccount alone usually completes it, but
|
|
3853
|
+
// when it didn't, try the confirm button as a fallback. Failure
|
|
3854
|
+
// is non-fatal — the popup/none path still applies.
|
|
3855
|
+
try {
|
|
3856
|
+
await cdp.send("FedCm.clickDialogButton", {
|
|
3857
|
+
dialogId,
|
|
3858
|
+
dialogButton: "ConfirmIdpLoginContinue",
|
|
3859
|
+
});
|
|
3860
|
+
fedcmResolved = true;
|
|
3861
|
+
}
|
|
3862
|
+
catch {
|
|
3863
|
+
// button absent or not applicable — degrade to popup/none
|
|
3864
|
+
}
|
|
3865
|
+
}
|
|
3866
|
+
})();
|
|
3867
|
+
});
|
|
3868
|
+
}
|
|
3869
|
+
catch {
|
|
3870
|
+
cdp = null; // FedCm domain unavailable — the popup path still works
|
|
3871
|
+
}
|
|
3872
|
+
const popupPromise = this.context
|
|
3873
|
+
.waitForEvent("page", { timeout: timeoutMs })
|
|
3874
|
+
.then((p) => p)
|
|
3875
|
+
.catch(() => null);
|
|
3876
|
+
await this.click(triggerSelector);
|
|
3877
|
+
// On-demand One-Tap: when the page loaded the GSI client but rendered no
|
|
3878
|
+
// static button, the click above hits an in-page affordance that never
|
|
3879
|
+
// raises a dialog on its own. If neither a FedCM dialog nor a popup has
|
|
3880
|
+
// appeared shortly after the click, ask GSI to raise One-Tap itself.
|
|
3881
|
+
// `google.accounts.id.prompt()` triggers the FedCM dialog our handler is
|
|
3882
|
+
// already listening for. Guarded — `window.google.accounts.id` may be
|
|
3883
|
+
// undefined (no-op) and any failure must degrade to the popup/none path.
|
|
3884
|
+
if (cdp !== null) {
|
|
3885
|
+
const promptDeadline = Date.now() + Math.min(4_000, timeoutMs);
|
|
3886
|
+
while (Date.now() < promptDeadline &&
|
|
3887
|
+
!fedcmResolved &&
|
|
3888
|
+
this.context.pages().length <= 1) {
|
|
3889
|
+
await this.sleep(250);
|
|
3890
|
+
}
|
|
3891
|
+
if (!fedcmResolved && this.context.pages().length <= 1) {
|
|
3892
|
+
try {
|
|
3893
|
+
await this.page.evaluate(() => {
|
|
3894
|
+
const g = window.google;
|
|
3895
|
+
const id = g?.accounts?.id;
|
|
3896
|
+
if (id !== undefined && typeof id.prompt === "function") {
|
|
3897
|
+
id.prompt();
|
|
3898
|
+
}
|
|
3899
|
+
});
|
|
3900
|
+
}
|
|
3901
|
+
catch {
|
|
3902
|
+
// GSI not initialized / prompt unavailable — popup/none still apply
|
|
3903
|
+
}
|
|
3904
|
+
}
|
|
3905
|
+
}
|
|
3906
|
+
// Resolve when a popup opens OR FedCM completes OR we hit the deadline.
|
|
3907
|
+
const fedcmWait = (async () => {
|
|
3908
|
+
const deadline = Date.now() + timeoutMs;
|
|
3909
|
+
while (Date.now() < deadline && !fedcmResolved) {
|
|
3910
|
+
await this.sleep(250);
|
|
3911
|
+
}
|
|
3912
|
+
return null;
|
|
3913
|
+
})();
|
|
3914
|
+
const popup = await Promise.race([popupPromise, fedcmWait]);
|
|
3915
|
+
if (cdp !== null) {
|
|
3916
|
+
try {
|
|
3917
|
+
await cdp.send("FedCm.disable");
|
|
3918
|
+
}
|
|
3919
|
+
catch {
|
|
3920
|
+
// best-effort
|
|
3921
|
+
}
|
|
3922
|
+
}
|
|
3923
|
+
if (popup !== null && popup !== this.page && !popup.isClosed()) {
|
|
3924
|
+
this.page = popup;
|
|
3925
|
+
try {
|
|
3926
|
+
await this.page.waitForLoadState("domcontentloaded", { timeout: 15_000 });
|
|
3927
|
+
}
|
|
3928
|
+
catch {
|
|
3929
|
+
// consent loop re-reads regardless
|
|
3930
|
+
}
|
|
3931
|
+
return { ok: true, via: "popup" };
|
|
3932
|
+
}
|
|
3933
|
+
if (fedcmResolved) {
|
|
3934
|
+
// Credential delivered to the page's JS callback — give the app a beat
|
|
3935
|
+
// to exchange it for a session and redirect.
|
|
3936
|
+
try {
|
|
3937
|
+
await this.page.waitForLoadState("domcontentloaded", { timeout: 10_000 });
|
|
3938
|
+
}
|
|
3939
|
+
catch {
|
|
3940
|
+
// best-effort
|
|
3941
|
+
}
|
|
3942
|
+
return { ok: true, via: "fedcm" };
|
|
3943
|
+
}
|
|
3944
|
+
return { ok: false, via: "none" };
|
|
3945
|
+
}
|
|
3272
3946
|
// URL of the active page (the OAuth page mid-handshake, the product
|
|
3273
3947
|
// page otherwise). Cheap — no screenshot, unlike getState().
|
|
3274
3948
|
currentUrl() {
|
|
3275
3949
|
return this.page !== null ? this.page.url() : "";
|
|
3276
3950
|
}
|
|
3951
|
+
// Fetch a URL's final response (following redirects) and return its
|
|
3952
|
+
// status, final URL, and body text — or null on any failure.
|
|
3953
|
+
//
|
|
3954
|
+
// WHY the CONTEXT request API (this.context.request) and not global
|
|
3955
|
+
// fetch / a fresh node http client: the context's APIRequestContext
|
|
3956
|
+
// shares the BrowserContext's proxy + cookie jar, so this egresses
|
|
3957
|
+
// through the SAME residential tunnel the real navigation uses. That
|
|
3958
|
+
// makes a probe here representative of what the browser would actually
|
|
3959
|
+
// land on (same IP reputation, same cf_clearance cookie) — and needs no
|
|
3960
|
+
// separate SOCKS/HTTP-proxy plumbing. Used by the signup-URL resolver to
|
|
3961
|
+
// distinguish a stale /signup that serves a login SPA from the real
|
|
3962
|
+
// signup form, BEFORE committing to a ~6-minute navigation.
|
|
3963
|
+
//
|
|
3964
|
+
// Bounded (15s, ≤10 redirects) and non-throwing — the resolver treats
|
|
3965
|
+
// null as "couldn't tell" and escalates.
|
|
3966
|
+
async fetchText(url) {
|
|
3967
|
+
if (this.context === null)
|
|
3968
|
+
return null;
|
|
3969
|
+
try {
|
|
3970
|
+
const response = await this.context.request.get(url, {
|
|
3971
|
+
maxRedirects: 10,
|
|
3972
|
+
timeout: 15_000,
|
|
3973
|
+
// We inspect 404/redirect bodies ourselves; don't let a non-2xx
|
|
3974
|
+
// throw before we can classify it.
|
|
3975
|
+
failOnStatusCode: false,
|
|
3976
|
+
});
|
|
3977
|
+
return {
|
|
3978
|
+
finalUrl: response.url(),
|
|
3979
|
+
status: response.status(),
|
|
3980
|
+
bodyText: await response.text(),
|
|
3981
|
+
};
|
|
3982
|
+
}
|
|
3983
|
+
catch {
|
|
3984
|
+
return null;
|
|
3985
|
+
}
|
|
3986
|
+
}
|
|
3277
3987
|
// True when the active OAuth page is gone — for the popup flow, the
|
|
3278
3988
|
// popup closing IS the signal the handshake finished.
|
|
3279
3989
|
oauthPageClosed() {
|
|
3280
3990
|
return this.page === null || this.page.isClosed();
|
|
3281
3991
|
}
|
|
3992
|
+
// Which OAuth providers have a LIVE session in this profile's cookie jar.
|
|
3993
|
+
// The logged-in-providers.json marker is a memo that drifts out of sync
|
|
3994
|
+
// (a --force-relogin clears it, a misclassified run clears it, a parallel
|
|
3995
|
+
// run overwrites it) — so a session that is genuinely live in the cookies
|
|
3996
|
+
// can go invisible to provider selection, which is exactly how a warm
|
|
3997
|
+
// GitHub session got skipped in favour of a broken Google path. The cookie
|
|
3998
|
+
// jar is the ground truth: read it directly. Cookie NAMES + presence only;
|
|
3999
|
+
// values are never read into logs. Best-effort — a read failure returns [].
|
|
4000
|
+
async detectSessionProviders() {
|
|
4001
|
+
if (this.context === null)
|
|
4002
|
+
return [];
|
|
4003
|
+
try {
|
|
4004
|
+
return sessionProvidersFromCookies(await this.context.cookies());
|
|
4005
|
+
}
|
|
4006
|
+
catch {
|
|
4007
|
+
return [];
|
|
4008
|
+
}
|
|
4009
|
+
}
|
|
3282
4010
|
// Advance a provider's consent / account-chooser screen by one click
|
|
3283
4011
|
// — the scope-gated auto-approve (T7/T13). Returns false when no
|
|
3284
4012
|
// approve control is present — the agent then aborts rather than
|
|
@@ -3484,6 +4212,26 @@ export function pickSubmitButtonIndex(texts) {
|
|
|
3484
4212
|
});
|
|
3485
4213
|
return bestIndex;
|
|
3486
4214
|
}
|
|
4215
|
+
// ───────────── required-agreement checkbox guard ─────────────
|
|
4216
|
+
// Patterns shared by the pure helper below and the in-page evaluate in
|
|
4217
|
+
// `checkRequiredAgreementBoxes`. The evaluate runs in the page realm and
|
|
4218
|
+
// can't import, so the same two regexes are inlined there verbatim —
|
|
4219
|
+
// keep them BYTE-IDENTICAL with these.
|
|
4220
|
+
const AGREEMENT_TEXT_RE = /terms|tos\b|privacy|consent|policy|i agree|agree to|acknowledge|gdpr/i;
|
|
4221
|
+
const MARKETING_TEXT_RE = /newsletter|updates|offers|product tips|marketing|promotional|receive emails|opt[- ]?in to|subscribe/i;
|
|
4222
|
+
// True when a checkbox's associated text reads as a REQUIRED agreement
|
|
4223
|
+
// (terms/privacy/consent) and NOT as a marketing/newsletter opt-in.
|
|
4224
|
+
//
|
|
4225
|
+
// Why a deterministic check instead of trusting the LLM planner:
|
|
4226
|
+
// amplitude's signup renders the required TOS checkbox next to a pair of
|
|
4227
|
+
// data-storage-location card-radios; the planner mistook the whole
|
|
4228
|
+
// cluster for "ambiguous radios" and skipped the box, and amplitude's
|
|
4229
|
+
// submit isn't disabled when it's unticked — so the form silently
|
|
4230
|
+
// no-ops. We must never flip a marketing opt-in on the user's behalf,
|
|
4231
|
+
// hence the explicit marketing exclusion.
|
|
4232
|
+
export function isAgreementCheckboxText(text) {
|
|
4233
|
+
return AGREEMENT_TEXT_RE.test(text) && !MARKETING_TEXT_RE.test(text);
|
|
4234
|
+
}
|
|
3487
4235
|
// Parse a UNIVERSAL_BOT_PROXY_URL — e.g. "http://user:pass@host:8080" or
|
|
3488
4236
|
// "socks5://host:1080" — into Playwright's proxy option shape. Playwright
|
|
3489
4237
|
// wants credentials separate from `server`, so we split them out and
|