@trusty-squire/mcp 0.9.13 → 0.9.14-rc.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bot/agent.d.ts +5 -1
- package/dist/bot/agent.d.ts.map +1 -1
- package/dist/bot/agent.js +496 -20
- package/dist/bot/agent.js.map +1 -1
- package/dist/bot/browser.d.ts +12 -0
- package/dist/bot/browser.d.ts.map +1 -1
- package/dist/bot/browser.js +838 -83
- package/dist/bot/browser.js.map +1 -1
- package/dist/bot/captcha-solver-2captcha.d.ts +18 -0
- package/dist/bot/captcha-solver-2captcha.d.ts.map +1 -1
- package/dist/bot/captcha-solver-2captcha.js +21 -0
- package/dist/bot/captcha-solver-2captcha.js.map +1 -1
- package/dist/bot/email-code-fetcher.d.ts +5 -0
- package/dist/bot/email-code-fetcher.d.ts.map +1 -0
- package/dist/bot/email-code-fetcher.js +33 -0
- package/dist/bot/email-code-fetcher.js.map +1 -0
- package/dist/bot/inbox-client.d.ts +1 -0
- package/dist/bot/inbox-client.d.ts.map +1 -1
- package/dist/bot/inbox-client.js +55 -15
- package/dist/bot/inbox-client.js.map +1 -1
- package/dist/bot/index.d.ts +0 -1
- package/dist/bot/index.d.ts.map +1 -1
- package/dist/bot/index.js +45 -19
- package/dist/bot/index.js.map +1 -1
- package/dist/bot/promote-to-skill.d.ts +2 -1
- package/dist/bot/promote-to-skill.d.ts.map +1 -1
- package/dist/bot/promote-to-skill.js +115 -6
- package/dist/bot/promote-to-skill.js.map +1 -1
- package/dist/bot/replay-skill.d.ts +17 -0
- package/dist/bot/replay-skill.d.ts.map +1 -1
- package/dist/bot/replay-skill.js +243 -10
- package/dist/bot/replay-skill.js.map +1 -1
- package/dist/bot/signup-lock.d.ts +17 -0
- package/dist/bot/signup-lock.d.ts.map +1 -0
- package/dist/bot/signup-lock.js +174 -0
- package/dist/bot/signup-lock.js.map +1 -0
- package/dist/tools/provision-any.d.ts.map +1 -1
- package/dist/tools/provision-any.js +25 -12
- package/dist/tools/provision-any.js.map +1 -1
- package/package.json +2 -2
- package/dist/bot/oauth-lock.d.ts +0 -2
- package/dist/bot/oauth-lock.d.ts.map +0 -1
- package/dist/bot/oauth-lock.js +0 -28
- package/dist/bot/oauth-lock.js.map +0 -1
package/dist/bot/browser.js
CHANGED
|
@@ -23,6 +23,9 @@
|
|
|
23
23
|
// agent.ts.
|
|
24
24
|
import { chromium as baseChromium } from "playwright";
|
|
25
25
|
import { createRequire } from "node:module";
|
|
26
|
+
import { Socket, createServer } from "node:net";
|
|
27
|
+
import { existsSync } from "node:fs";
|
|
28
|
+
import { spawn } from "node:child_process";
|
|
26
29
|
import { detectAsn } from "./asn.js";
|
|
27
30
|
import { CHROME_PROFILE_DIR, launchWithProfileGate, ProfileBusyError, reapLeakedProfileHolder, waitForProfileFree } from "./profile.js";
|
|
28
31
|
import { startXvfb, xvfbAvailable } from "./xvfb.js";
|
|
@@ -225,6 +228,87 @@ async function detectChromiumChannel() {
|
|
|
225
228
|
}
|
|
226
229
|
return null;
|
|
227
230
|
}
|
|
231
|
+
// Resolve the on-disk Chrome binary for a detected channel, for the
|
|
232
|
+
// self-launch path (see launchSelfManagedContext). Playwright launches a
|
|
233
|
+
// channel by name; we have to spawn the binary ourselves, so we need the
|
|
234
|
+
// path. Returns null when the channel is unknown / not found on disk
|
|
235
|
+
// (caller falls back to launchPersistentContext).
|
|
236
|
+
export function resolveChannelBinary(channel) {
|
|
237
|
+
if (channel === null)
|
|
238
|
+
return null; // bundled Chromium — no self-launch
|
|
239
|
+
const explicit = process.env.UNIVERSAL_BOT_CHROME_BINARY;
|
|
240
|
+
if (explicit !== undefined && explicit.length > 0) {
|
|
241
|
+
return existsSync(explicit) ? explicit : null;
|
|
242
|
+
}
|
|
243
|
+
const candidates = CHANNEL_PATHS[channel] ?? [];
|
|
244
|
+
for (const c of candidates) {
|
|
245
|
+
try {
|
|
246
|
+
if (existsSync(c))
|
|
247
|
+
return c;
|
|
248
|
+
}
|
|
249
|
+
catch {
|
|
250
|
+
// skip unreadable candidate
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
return null;
|
|
254
|
+
}
|
|
255
|
+
// Whether to launch Chrome ourselves and attach over CDP, instead of
|
|
256
|
+
// Playwright's launchPersistentContext.
|
|
257
|
+
//
|
|
258
|
+
// WHY THIS EXISTS — the single decisive finding (2026-06-12, fully
|
|
259
|
+
// reproduced + falsifiable; see STATE.md "Cloudflare-Turnstile wall").
|
|
260
|
+
// Cloudflare Turnstile's interactive challenge FAILS a Playwright/patchright
|
|
261
|
+
// launchPersistentContext-driven Chrome and PASSES a Chrome the operator
|
|
262
|
+
// launches itself and then attaches to over CDP — every other variable held
|
|
263
|
+
// constant (same box, same datacenter IP, same Xvfb display, same Chrome 148
|
|
264
|
+
// binary, same software-WebGL, same humanized click). The discriminator
|
|
265
|
+
// matrix:
|
|
266
|
+
// launchPersistentContext + CDP click → "Verification failed"
|
|
267
|
+
// launchPersistentContext + OS click → "Verification failed"
|
|
268
|
+
// plain google-chrome + OS click → "Success!"
|
|
269
|
+
// plain google-chrome + connectOverCDP + page.mouse → token issued (len816)
|
|
270
|
+
// So the tell is NEITHER the live CDP attachment NOR the click mechanism —
|
|
271
|
+
// it is specifically the launch flags/instrumentation Playwright injects at
|
|
272
|
+
// launchPersistentContext time. Self-launching the binary (no
|
|
273
|
+
// --enable-automation et al.) and attaching with connectOverCDP avoids it.
|
|
274
|
+
// Default-ON; opt out with BOT_SELF_LAUNCH=0 for the old path. Exported for tests.
|
|
275
|
+
export function selfLaunchEnabled() {
|
|
276
|
+
const v = process.env.BOT_SELF_LAUNCH;
|
|
277
|
+
return v !== "0" && v !== "false" && v !== "off";
|
|
278
|
+
}
|
|
279
|
+
// Find an ephemeral TCP port for Chrome's --remote-debugging-port.
|
|
280
|
+
function findFreePort() {
|
|
281
|
+
return new Promise((resolve, reject) => {
|
|
282
|
+
const srv = createServer();
|
|
283
|
+
srv.on("error", reject);
|
|
284
|
+
srv.listen(0, "127.0.0.1", () => {
|
|
285
|
+
const addr = srv.address();
|
|
286
|
+
const port = typeof addr === "object" && addr !== null ? addr.port : 0;
|
|
287
|
+
srv.close(() => (port > 0 ? resolve(port) : reject(new Error("no port"))));
|
|
288
|
+
});
|
|
289
|
+
});
|
|
290
|
+
}
|
|
291
|
+
// Poll Chrome's DevTools HTTP endpoint until it answers (the browser is up
|
|
292
|
+
// and accepting CDP), or the deadline passes. Returns the base endpoint URL
|
|
293
|
+
// connectOverCDP accepts.
|
|
294
|
+
async function waitForDevtools(port, deadlineMs) {
|
|
295
|
+
const base = `http://127.0.0.1:${port}`;
|
|
296
|
+
const deadline = Date.now() + deadlineMs;
|
|
297
|
+
let lastErr = "";
|
|
298
|
+
while (Date.now() < deadline) {
|
|
299
|
+
try {
|
|
300
|
+
const res = await fetch(`${base}/json/version`, { signal: AbortSignal.timeout(2_000) });
|
|
301
|
+
if (res.ok)
|
|
302
|
+
return base;
|
|
303
|
+
lastErr = `HTTP ${res.status}`;
|
|
304
|
+
}
|
|
305
|
+
catch (err) {
|
|
306
|
+
lastErr = err instanceof Error ? err.message : String(err);
|
|
307
|
+
}
|
|
308
|
+
await new Promise((r) => setTimeout(r, 200));
|
|
309
|
+
}
|
|
310
|
+
throw new Error(`Chrome DevTools endpoint never came up on ${base} (${lastErr})`);
|
|
311
|
+
}
|
|
228
312
|
// Classify an anti-bot interstitial page from its (title + body) text.
|
|
229
313
|
// `onInterstitial` matches the static Cloudflare/Turnstile challenge copy.
|
|
230
314
|
// `verificationPassed` is the signal the challenge SUCCEEDED — but
|
|
@@ -274,6 +358,11 @@ export class BrowserController {
|
|
|
274
358
|
// Google session across runs — see profile.ts / google-login.ts.
|
|
275
359
|
context = null;
|
|
276
360
|
page = null;
|
|
361
|
+
// Self-launch path (Turnstile-safe; see selfLaunchEnabled). When we spawn
|
|
362
|
+
// Chrome ourselves and attach over CDP, these hold the child process and
|
|
363
|
+
// the connected Browser so close() can tear both down.
|
|
364
|
+
childChrome = null;
|
|
365
|
+
cdpBrowser = null;
|
|
277
366
|
// True once launchPersistentContext succeeded this session. close() only
|
|
278
367
|
// reaps a leaked Chrome when WE launched one — so a ProfileBusyError thrown
|
|
279
368
|
// BEFORE launch (while waiting on a genuine concurrent holder) never kills
|
|
@@ -348,6 +437,63 @@ export class BrowserController {
|
|
|
348
437
|
}
|
|
349
438
|
return activeStealthProfileValue();
|
|
350
439
|
}
|
|
440
|
+
// Launch Chrome ourselves and attach over CDP — the Turnstile-safe launch
|
|
441
|
+
// (see selfLaunchEnabled for the proof). The profile dir is the SAME shared
|
|
442
|
+
// profile launchPersistentContext would use, so the OAuth session carries
|
|
443
|
+
// over. Options that launchPersistentContext takes at creation but a default
|
|
444
|
+
// (connectOverCDP) context can't are applied differently:
|
|
445
|
+
// • timezone → TZ env on the child (more authentic than a CDP override)
|
|
446
|
+
// • proxy → --proxy-server flag (auth-less only; the caller routes
|
|
447
|
+
// credentialed proxies to the old path)
|
|
448
|
+
// • viewport → --window-size (with viewport:null-equivalent: we never set
|
|
449
|
+
// an emulated viewport on the connected context)
|
|
450
|
+
// • locale/geo/permissions → applied post-connect by start()
|
|
451
|
+
async launchSelfManagedContext(params) {
|
|
452
|
+
const port = await findFreePort();
|
|
453
|
+
const argv = [
|
|
454
|
+
`--remote-debugging-port=${port}`,
|
|
455
|
+
"--remote-debugging-address=127.0.0.1",
|
|
456
|
+
`--user-data-dir=${this.profileDir}`,
|
|
457
|
+
"--no-first-run",
|
|
458
|
+
"--no-default-browser-check",
|
|
459
|
+
"--password-store=basic",
|
|
460
|
+
"--window-position=0,0",
|
|
461
|
+
`--window-size=${params.window.width},${params.window.height}`,
|
|
462
|
+
"--lang=en-US",
|
|
463
|
+
...params.args,
|
|
464
|
+
...(params.proxy !== null ? [`--proxy-server=${params.proxy.server}`] : []),
|
|
465
|
+
...(params.headless ? ["--headless=new"] : []),
|
|
466
|
+
"about:blank",
|
|
467
|
+
];
|
|
468
|
+
const child = spawn(params.binary, argv, { env: params.env, stdio: "ignore" });
|
|
469
|
+
this.childChrome = child;
|
|
470
|
+
let endpoint;
|
|
471
|
+
try {
|
|
472
|
+
endpoint = await waitForDevtools(port, 30_000);
|
|
473
|
+
}
|
|
474
|
+
catch (err) {
|
|
475
|
+
try {
|
|
476
|
+
child.kill("SIGKILL");
|
|
477
|
+
}
|
|
478
|
+
catch {
|
|
479
|
+
/* already gone */
|
|
480
|
+
}
|
|
481
|
+
this.childChrome = null;
|
|
482
|
+
throw err;
|
|
483
|
+
}
|
|
484
|
+
// Use the patchright launcher's connectOverCDP — it's the exact path the
|
|
485
|
+
// falsification experiment validated (its connect avoids Runtime.enable,
|
|
486
|
+
// which a plain attach would emit). The anti-detection that matters here
|
|
487
|
+
// is the LAUNCH (which we now own), not the connect.
|
|
488
|
+
const launcher = getChromium();
|
|
489
|
+
const browser = await launcher.connectOverCDP(endpoint);
|
|
490
|
+
this.cdpBrowser = browser;
|
|
491
|
+
const ctx = browser.contexts()[0];
|
|
492
|
+
if (ctx === undefined) {
|
|
493
|
+
throw new Error("self-launched Chrome exposed no default browser context");
|
|
494
|
+
}
|
|
495
|
+
return ctx;
|
|
496
|
+
}
|
|
351
497
|
async start() {
|
|
352
498
|
const channel = await detectChromiumChannel();
|
|
353
499
|
this.launchedChannel = channel;
|
|
@@ -407,7 +553,13 @@ export class BrowserController {
|
|
|
407
553
|
}
|
|
408
554
|
else if (xvfbAvailable()) {
|
|
409
555
|
try {
|
|
410
|
-
|
|
556
|
+
// 1920×1080 — the most common real desktop resolution. The old
|
|
557
|
+
// 1280×720 here was exactly Playwright's emulated-device viewport
|
|
558
|
+
// default (the code's own comments flag that as an anti-bot tell),
|
|
559
|
+
// and with viewport:null the page read it straight back. A 720p
|
|
560
|
+
// screen whose availHeight==height (no taskbar) is a headless
|
|
561
|
+
// signature strict Turnstiles (exa/cartesia) score against.
|
|
562
|
+
this.xvfb = await startXvfb({ width: 1920, height: 1080 });
|
|
411
563
|
chromeEnv = { ...process.env, DISPLAY: this.xvfb.display };
|
|
412
564
|
chromeHeadless = false;
|
|
413
565
|
this.launchedMode = "xvfb";
|
|
@@ -430,12 +582,31 @@ export class BrowserController {
|
|
|
430
582
|
// SingletonLock from a killed run, or wait our turn behind a live
|
|
431
583
|
// `mcp login` / another signup. Without this, launchPersistentContext
|
|
432
584
|
// aborts with "Failed to create a ProcessSingleton" and bricks the run.
|
|
433
|
-
|
|
585
|
+
let free = await waitForProfileFree(this.profileDir, {
|
|
434
586
|
deadlineMs: 120_000,
|
|
435
587
|
onWait: () => console.error("[universal-bot] bot Chrome profile is busy with another run — waiting…"),
|
|
436
588
|
});
|
|
437
589
|
if (!free) {
|
|
438
|
-
|
|
590
|
+
// A live-pid holder that never released within the deadline. The
|
|
591
|
+
// signup/discover loop is strictly serial (one run at a time), so a
|
|
592
|
+
// local holder that outlasts 120s is NOT a legitimate concurrent run —
|
|
593
|
+
// it's a leaked Chrome from a previously EXTERNALLY-killed run
|
|
594
|
+
// (run_timeout SIGKILL, OOM, reboot) whose JS `finally`/close() never
|
|
595
|
+
// executed, so reapLeakedProfileHolder never ran. waitForProfileFree
|
|
596
|
+
// only reclaims dead-pid / null locks, so this live orphan otherwise
|
|
597
|
+
// crashes every subsequent run with ProfileBusyError (MEASURED
|
|
598
|
+
// 2026-06-11: cyclic, railpack). A genuine concurrent `mcp login` would
|
|
599
|
+
// have released within the 120s wait — so by here, reaping the LOCAL
|
|
600
|
+
// holder (SIGKILL + clear singletons; no-ops on a remote-host holder)
|
|
601
|
+
// and retrying once is safe and recovers the run instead of failing it.
|
|
602
|
+
const reaped = reapLeakedProfileHolder(this.profileDir);
|
|
603
|
+
if (reaped) {
|
|
604
|
+
console.error("[universal-bot] reaped a leaked Chrome holding the profile (orphan from an externally-killed run) — retrying");
|
|
605
|
+
free = await waitForProfileFree(this.profileDir, { deadlineMs: 10_000 });
|
|
606
|
+
}
|
|
607
|
+
if (!free) {
|
|
608
|
+
throw new ProfileBusyError("bot Chrome profile is held by another run (a login or signup); retry shortly");
|
|
609
|
+
}
|
|
439
610
|
}
|
|
440
611
|
// T3: a PERSISTENT context. The profile dir carries the user's
|
|
441
612
|
// Google session (established by `mcp login` — see google-login.ts),
|
|
@@ -453,68 +624,87 @@ export class BrowserController {
|
|
|
453
624
|
// rebrowser fork required (the pin is what crashed the OAuth flow and
|
|
454
625
|
// confounded the A/B). One binary for both arms.
|
|
455
626
|
this.launchedChannel = channel;
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
//
|
|
488
|
-
//
|
|
489
|
-
//
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
//
|
|
506
|
-
//
|
|
507
|
-
//
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
}
|
|
627
|
+
// Launch args shared by BOTH paths (launchPersistentContext and the
|
|
628
|
+
// self-launch). See the per-flag rationale: swiftshader gives a real
|
|
629
|
+
// (software) WebGL context on the GPU-less Xvfb box; the others are the
|
|
630
|
+
// standard headless/sandbox flags. NOTE we deliberately do NOT include
|
|
631
|
+
// Playwright's automation flags (--enable-automation et al.) — on the
|
|
632
|
+
// self-launch path their ABSENCE is the whole fix.
|
|
633
|
+
const launchArgs = [
|
|
634
|
+
"--disable-blink-features=AutomationControlled",
|
|
635
|
+
"--no-sandbox",
|
|
636
|
+
"--disable-dev-shm-usage",
|
|
637
|
+
"--enable-unsafe-swiftshader",
|
|
638
|
+
"--ignore-gpu-blocklist",
|
|
639
|
+
];
|
|
640
|
+
// F10 clipboard + egress-matched geolocation permission, built once for
|
|
641
|
+
// either path. Typed as string[] (Playwright's grantPermissions /
|
|
642
|
+
// permissions option both accept it).
|
|
643
|
+
const grantedPermissions = [
|
|
644
|
+
...(geo?.geolocation !== undefined ? ["geolocation"] : []),
|
|
645
|
+
"clipboard-read",
|
|
646
|
+
"clipboard-write",
|
|
647
|
+
];
|
|
648
|
+
// Decide the launch path. Self-launch (Turnstile-safe) requires a real
|
|
649
|
+
// Chrome binary on disk AND an auth-less proxy (a credentialed proxy needs
|
|
650
|
+
// Playwright's native proxy auth, which only the launchPersistentContext
|
|
651
|
+
// path provides — so route those there).
|
|
652
|
+
const selfLaunchBinary = selfLaunchEnabled() ? resolveChannelBinary(channel) : null;
|
|
653
|
+
const proxyHasAuth = proxy !== null && typeof proxy.username === "string" && proxy.username.length > 0;
|
|
654
|
+
const useSelfLaunch = selfLaunchBinary !== null && !proxyHasAuth;
|
|
655
|
+
let context;
|
|
656
|
+
if (useSelfLaunch && selfLaunchBinary !== null) {
|
|
657
|
+
console.error(`[universal-bot] self-launch + connectOverCDP (Turnstile-safe launch) binary=${selfLaunchBinary}`);
|
|
658
|
+
// Window size matches the display surface so viewport reads as a real
|
|
659
|
+
// window (no emulated-viewport tell). TZ on the child makes Chrome
|
|
660
|
+
// report the egress timezone natively.
|
|
661
|
+
const window = this.launchedMode === "xvfb"
|
|
662
|
+
? { width: 1920, height: 1080 }
|
|
663
|
+
: { width: 1280, height: 1024 };
|
|
664
|
+
const selfEnv = {
|
|
665
|
+
...(chromeEnv ?? process.env),
|
|
666
|
+
TZ: geo?.timezoneId ?? "America/New_York",
|
|
667
|
+
};
|
|
668
|
+
context = await launchWithProfileGate(this.profileDir, () => this.launchSelfManagedContext({
|
|
669
|
+
binary: selfLaunchBinary,
|
|
670
|
+
headless: chromeHeadless,
|
|
671
|
+
args: launchArgs,
|
|
672
|
+
proxy,
|
|
673
|
+
env: selfEnv,
|
|
674
|
+
window,
|
|
675
|
+
}));
|
|
676
|
+
// Options the default (connectOverCDP) context can't take at creation —
|
|
677
|
+
// applied post-connect. Best-effort: a failure here is non-fatal (the
|
|
678
|
+
// signup proceeds; only clipboard-key-extraction / geo degrade).
|
|
679
|
+
try {
|
|
680
|
+
await context.grantPermissions(grantedPermissions);
|
|
681
|
+
if (geo?.geolocation !== undefined) {
|
|
682
|
+
await context.setGeolocation(geo.geolocation);
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
catch (err) {
|
|
686
|
+
console.error(`[universal-bot] post-connect context setup partial: ${err instanceof Error ? err.message : String(err)}`);
|
|
687
|
+
}
|
|
688
|
+
}
|
|
689
|
+
else {
|
|
690
|
+
if (selfLaunchEnabled() && selfLaunchBinary !== null && proxyHasAuth) {
|
|
691
|
+
console.error("[universal-bot] credentialed proxy → launchPersistentContext (self-launch can't carry proxy auth)");
|
|
692
|
+
}
|
|
693
|
+
// T3: a PERSISTENT context (the legacy path). The profile dir carries the
|
|
694
|
+
// user's Google session so the OAuth-first path reuses it.
|
|
695
|
+
context = await launchWithProfileGate(this.profileDir, () => launcher.launchPersistentContext(this.profileDir, {
|
|
696
|
+
headless: chromeHeadless,
|
|
697
|
+
...(chromeEnv !== undefined ? { env: chromeEnv } : {}),
|
|
698
|
+
...(channel !== null ? { channel } : {}),
|
|
699
|
+
...(proxy !== null ? { proxy } : {}),
|
|
700
|
+
args: [...launchArgs],
|
|
701
|
+
viewport: null,
|
|
702
|
+
locale: "en-US",
|
|
703
|
+
timezoneId: geo?.timezoneId ?? "America/New_York",
|
|
704
|
+
permissions: grantedPermissions,
|
|
705
|
+
...(geo?.geolocation !== undefined ? { geolocation: geo.geolocation } : {}),
|
|
706
|
+
}));
|
|
707
|
+
}
|
|
518
708
|
this.context = context;
|
|
519
709
|
// We own the profile now — close() may reap a leaked Chrome.
|
|
520
710
|
this.launchedContext = true;
|
|
@@ -577,6 +767,52 @@ export class BrowserController {
|
|
|
577
767
|
if (typeof WebGL2RenderingContext !== "undefined") {
|
|
578
768
|
spoof(WebGL2RenderingContext.prototype);
|
|
579
769
|
}
|
|
770
|
+
// Device-tell normalization. The headless harvester box reports 20
|
|
771
|
+
// logical cores (navigator.hardwareConcurrency) — a consumer residential
|
|
772
|
+
// device is 4-16. A 20-core Linux machine behind a "residential" IP is
|
|
773
|
+
// an internal inconsistency Cloudflare Turnstile scores against
|
|
774
|
+
// (MEASURED 2026-06-11: exa/cartesia Turnstile won't issue a token on a
|
|
775
|
+
// clean-fingerprint click; hwConcurrency=20 + Linux is the standout
|
|
776
|
+
// anomaly). Normalize to a common consumer profile. Same per-nav main-
|
|
777
|
+
// world application as the WebGL spoof — patchright denies init-world
|
|
778
|
+
// reach, and Turnstile reads these after the challenge script loads
|
|
779
|
+
// (seconds in), so the framenavigated re-apply wins the race. Defined on
|
|
780
|
+
// Navigator.prototype (where the native getters live) so there's no own-
|
|
781
|
+
// property tell on the instance.
|
|
782
|
+
const navProto = Navigator.prototype;
|
|
783
|
+
if (navProto.__tsDevicePatched !== true) {
|
|
784
|
+
try {
|
|
785
|
+
Object.defineProperty(Navigator.prototype, "hardwareConcurrency", {
|
|
786
|
+
get: () => 8,
|
|
787
|
+
configurable: true,
|
|
788
|
+
});
|
|
789
|
+
Object.defineProperty(Navigator.prototype, "deviceMemory", {
|
|
790
|
+
get: () => 8,
|
|
791
|
+
configurable: true,
|
|
792
|
+
});
|
|
793
|
+
// Screen availHeight tell: a headless Xvfb screen reports
|
|
794
|
+
// availHeight == height (no OS taskbar), whereas a real Windows
|
|
795
|
+
// desktop reserves ~40px for the taskbar (availHeight = height-40,
|
|
796
|
+
// availWidth = width). Reinstate that gap so the screen reads like
|
|
797
|
+
// an ordinary desktop, not a bare framebuffer. Guarded so it only
|
|
798
|
+
// applies when the two are currently equal (i.e. headless).
|
|
799
|
+
try {
|
|
800
|
+
if (screen.availHeight === screen.height) {
|
|
801
|
+
Object.defineProperty(Screen.prototype, "availHeight", {
|
|
802
|
+
get: () => screen.height - 40,
|
|
803
|
+
configurable: true,
|
|
804
|
+
});
|
|
805
|
+
}
|
|
806
|
+
}
|
|
807
|
+
catch {
|
|
808
|
+
// leave it
|
|
809
|
+
}
|
|
810
|
+
navProto.__tsDevicePatched = true;
|
|
811
|
+
}
|
|
812
|
+
catch {
|
|
813
|
+
// descriptor already locked by something else — leave it.
|
|
814
|
+
}
|
|
815
|
+
}
|
|
580
816
|
};
|
|
581
817
|
await context.addInitScript(installWebglSpoof);
|
|
582
818
|
this.page = context.pages()[0] ?? (await context.newPage());
|
|
@@ -698,6 +934,20 @@ export class BrowserController {
|
|
|
698
934
|
const asn = await detectAsn();
|
|
699
935
|
const asnClass = asn?.class ?? "unknown";
|
|
700
936
|
if (shouldRouteThroughProxy(asnClass, forceAlways)) {
|
|
937
|
+
// Proxy liveness probe. A dead proxy (gost crashed, Tailscale down) makes
|
|
938
|
+
// EVERY navigation time out for 60s and silently breaks the whole heal
|
|
939
|
+
// pass — MEASURED 2026-06-12: the Mac gost SOCKS5 went down and every
|
|
940
|
+
// discover died on page.goto Timeout. A cheap TCP connect to the SOCKS
|
|
941
|
+
// host tells us it's reachable; if not, fall back to DIRECT (the box's own
|
|
942
|
+
// datacenter egress) so the run still serves the services that don't block
|
|
943
|
+
// datacenter IPs, instead of dying entirely. Self-healing > silent stall.
|
|
944
|
+
const reachable = await isProxyReachable(proxy.server);
|
|
945
|
+
if (!reachable) {
|
|
946
|
+
console.error(`[universal-bot] proxy ${proxy.server} is UNREACHABLE — falling back to ` +
|
|
947
|
+
`DIRECT egress (datacenter IP; anti-bot services may block it, but far ` +
|
|
948
|
+
`better than every navigation timing out)`);
|
|
949
|
+
return null;
|
|
950
|
+
}
|
|
701
951
|
console.error(`[universal-bot] routing through residential proxy ` +
|
|
702
952
|
`(asn=${asnClass}${forceAlways ? ", forced" : ""})`);
|
|
703
953
|
return proxy;
|
|
@@ -731,11 +981,32 @@ export class BrowserController {
|
|
|
731
981
|
// The host is reachable on the next attempt — a single goto failure
|
|
732
982
|
// shouldn't fail the whole signup. Only retry these connection-level
|
|
733
983
|
// errors; HTTP statuses and selector/logic errors fall straight through.
|
|
734
|
-
|
|
984
|
+
// net::ERR_ABORTED — a navigation superseded by a redirect/JS-nav during
|
|
985
|
+
// the domcontentloaded wait. Usually transient (a redirect race on the
|
|
986
|
+
// first hit of an auth-gated portal — MEASURED 2026-06-11: defang's
|
|
987
|
+
// portal.defang.io aborted on the initial goto); a retry lands the
|
|
988
|
+
// settled page. Distinct from ERR_CONNECTION_ABORTED (a dropped socket).
|
|
989
|
+
const TRANSIENT_NET = /ERR_SOCKS_CONNECTION_FAILED|ERR_CONNECTION_(?:RESET|CLOSED|FAILED|ABORTED)|ERR_NETWORK_CHANGED|ERR_TIMED_OUT|ERR_NAME_NOT_RESOLVED|net::ERR_EMPTY_RESPONSE|net::ERR_ABORTED/i;
|
|
735
990
|
const MAX_GOTO_ATTEMPTS = 3;
|
|
736
991
|
for (let attempt = 1;; attempt++) {
|
|
737
992
|
try {
|
|
738
993
|
await this.page.goto(url, { waitUntil: "domcontentloaded", timeout: 60000 });
|
|
994
|
+
// A SOCKS/connection drop does NOT always throw: Chrome resolves
|
|
995
|
+
// domcontentloaded on its own `chrome-error://chromewebdata/`
|
|
996
|
+
// interstitial and goto returns cleanly. The bot then ran the whole
|
|
997
|
+
// planner on a dead error page and gave up after one round (MEASURED
|
|
998
|
+
// 2026-06-11: galileo/lancedb landed on chrome-error with the app
|
|
999
|
+
// host as the title, never retried). Treat a chrome-error landing as
|
|
1000
|
+
// the same transient class and retry it like a thrown net error.
|
|
1001
|
+
const landed = this.page.url();
|
|
1002
|
+
if (landed.startsWith("chrome-error://")) {
|
|
1003
|
+
if (attempt >= MAX_GOTO_ATTEMPTS) {
|
|
1004
|
+
throw new Error(`net::navigation landed on a Chrome error page for ${url} ` +
|
|
1005
|
+
`after ${attempt} attempts (transient proxy/host failure)`);
|
|
1006
|
+
}
|
|
1007
|
+
await this.sleep(1500 * attempt);
|
|
1008
|
+
continue;
|
|
1009
|
+
}
|
|
739
1010
|
break;
|
|
740
1011
|
}
|
|
741
1012
|
catch (err) {
|
|
@@ -904,6 +1175,148 @@ export class BrowserController {
|
|
|
904
1175
|
// score improvement.
|
|
905
1176
|
await locator.pressSequentially(text, { delay: rand(40, 110) });
|
|
906
1177
|
}
|
|
1178
|
+
// Best-effort scan for the SPECIFIC unfilled required field(s) blocking a
|
|
1179
|
+
// disabled submit. Returns a " Unfilled required field(s) — …" suffix for the
|
|
1180
|
+
// disabled-click error so the planner fills the right field instead of
|
|
1181
|
+
// re-clicking the dead button. Pure observation — never throws, never mutates.
|
|
1182
|
+
async unfilledRequiredHint() {
|
|
1183
|
+
if (!this.page)
|
|
1184
|
+
return "";
|
|
1185
|
+
try {
|
|
1186
|
+
const fields = await this.page.evaluate(() => {
|
|
1187
|
+
const out = [];
|
|
1188
|
+
const vis = (el) => {
|
|
1189
|
+
const r = el.getBoundingClientRect();
|
|
1190
|
+
return r.width > 0 && r.height > 0;
|
|
1191
|
+
};
|
|
1192
|
+
const label = (el) => {
|
|
1193
|
+
const al = el.getAttribute("aria-label");
|
|
1194
|
+
if (al && al.trim())
|
|
1195
|
+
return al.trim().slice(0, 40);
|
|
1196
|
+
const id = el.id;
|
|
1197
|
+
if (id) {
|
|
1198
|
+
const esc = window.CSS && CSS.escape ? CSS.escape(id) : id;
|
|
1199
|
+
const lab = document.querySelector(`label[for="${esc}"]`);
|
|
1200
|
+
if (lab && lab.textContent && lab.textContent.trim())
|
|
1201
|
+
return lab.textContent.trim().slice(0, 40);
|
|
1202
|
+
}
|
|
1203
|
+
const ph = el.getAttribute("placeholder");
|
|
1204
|
+
if (ph && ph.trim())
|
|
1205
|
+
return ph.trim().slice(0, 40);
|
|
1206
|
+
return (el.getAttribute("name") ?? el.tagName.toLowerCase()).slice(0, 40);
|
|
1207
|
+
};
|
|
1208
|
+
for (const el of Array.from(document.querySelectorAll("input[required],textarea[required],input[aria-required='true'],textarea[aria-required='true']"))) {
|
|
1209
|
+
if (!vis(el))
|
|
1210
|
+
continue;
|
|
1211
|
+
const inp = el;
|
|
1212
|
+
if (inp.type === "checkbox" || inp.type === "radio") {
|
|
1213
|
+
if (!inp.checked)
|
|
1214
|
+
out.push(`unchecked: ${label(el)}`);
|
|
1215
|
+
}
|
|
1216
|
+
else if (!inp.value || !inp.value.trim()) {
|
|
1217
|
+
out.push(`empty: ${label(el)}`);
|
|
1218
|
+
}
|
|
1219
|
+
}
|
|
1220
|
+
for (const el of Array.from(document.querySelectorAll("select"))) {
|
|
1221
|
+
if (vis(el) && !el.value)
|
|
1222
|
+
out.push(`unselected: ${label(el)}`);
|
|
1223
|
+
}
|
|
1224
|
+
for (const el of Array.from(document.querySelectorAll("[role='combobox'],[role='listbox']"))) {
|
|
1225
|
+
if (!vis(el))
|
|
1226
|
+
continue;
|
|
1227
|
+
const txt = (el.textContent ?? "").trim();
|
|
1228
|
+
if (txt.length === 0 || /^(select|choose|please|pick)\b/i.test(txt))
|
|
1229
|
+
out.push(`unselected: ${label(el)}`);
|
|
1230
|
+
}
|
|
1231
|
+
for (const grp of Array.from(document.querySelectorAll("[role='radiogroup']"))) {
|
|
1232
|
+
if (!vis(grp))
|
|
1233
|
+
continue;
|
|
1234
|
+
const chosen = grp.querySelector("[role='radio'][aria-checked='true'],input[type='radio']:checked");
|
|
1235
|
+
if (!chosen)
|
|
1236
|
+
out.push(`nothing chosen: ${label(grp)}`);
|
|
1237
|
+
}
|
|
1238
|
+
return Array.from(new Set(out)).slice(0, 5);
|
|
1239
|
+
});
|
|
1240
|
+
return fields.length > 0
|
|
1241
|
+
? ` Unfilled required field(s) — fill/select these first: ${fields.join("; ")}.`
|
|
1242
|
+
: "";
|
|
1243
|
+
}
|
|
1244
|
+
catch {
|
|
1245
|
+
return "";
|
|
1246
|
+
}
|
|
1247
|
+
}
|
|
1248
|
+
// Read any visible transient toast / alert / notification text. Validation
|
|
1249
|
+
// errors, rate-limits, and "operation failed" messages frequently appear as a
|
|
1250
|
+
// toast that auto-dismisses BEFORE the next round's capture — so a failed
|
|
1251
|
+
// submit looks like a SILENT no-op to the planner. Surfacing it turns the
|
|
1252
|
+
// no-op into a diagnosable reason. MEASURED 2026-06-11 (deepseek Sign-up
|
|
1253
|
+
// no-ops; the error is a ds-toast the round-start capture never sees).
|
|
1254
|
+
// `settleMs` lets the caller reuse a wait it was already going to do.
|
|
1255
|
+
async captureTransientAlert(settleMs = 600) {
|
|
1256
|
+
if (!this.page)
|
|
1257
|
+
return "";
|
|
1258
|
+
if (settleMs > 0)
|
|
1259
|
+
await this.sleep(settleMs);
|
|
1260
|
+
try {
|
|
1261
|
+
return await this.page.evaluate(() => {
|
|
1262
|
+
const sels = [
|
|
1263
|
+
"[role='alert']",
|
|
1264
|
+
"[aria-live='assertive']",
|
|
1265
|
+
".ds-toast-container",
|
|
1266
|
+
".ds-notification-container",
|
|
1267
|
+
".Toastify__toast",
|
|
1268
|
+
".ant-message-notice",
|
|
1269
|
+
".ant-notification-notice",
|
|
1270
|
+
".sonner-toast",
|
|
1271
|
+
"[data-sonner-toast]",
|
|
1272
|
+
".toast",
|
|
1273
|
+
".Toaster",
|
|
1274
|
+
];
|
|
1275
|
+
const vis = (el) => {
|
|
1276
|
+
const r = el.getBoundingClientRect();
|
|
1277
|
+
return r.width > 0 && r.height > 0;
|
|
1278
|
+
};
|
|
1279
|
+
for (const sel of sels) {
|
|
1280
|
+
for (const el of Array.from(document.querySelectorAll(sel))) {
|
|
1281
|
+
if (!vis(el))
|
|
1282
|
+
continue;
|
|
1283
|
+
const t = (el.textContent ?? "").replace(/\s+/g, " ").trim();
|
|
1284
|
+
if (t.length >= 2 && t.length <= 240)
|
|
1285
|
+
return t;
|
|
1286
|
+
}
|
|
1287
|
+
}
|
|
1288
|
+
// Second pass: INLINE field-validation errors (not a transient
|
|
1289
|
+
// toast). Many SPAs render "Please enter the verification code" /
|
|
1290
|
+
// "Invalid code" as a small element with an error-ish class or an
|
|
1291
|
+
// aria-invalid node rather than a toast — so the first pass misses
|
|
1292
|
+
// them and a failed submit reads as a silent no-op.
|
|
1293
|
+
// MEASURED 2026-06-11 (deepseek post-OTP submit).
|
|
1294
|
+
const errSels = [
|
|
1295
|
+
"[class*='error' i]",
|
|
1296
|
+
"[class*='invalid' i]",
|
|
1297
|
+
"[class*='danger' i]",
|
|
1298
|
+
"[class*='explain' i]", // antd/ds-form-item-explain
|
|
1299
|
+
"[aria-invalid='true']",
|
|
1300
|
+
];
|
|
1301
|
+
for (const sel of errSels) {
|
|
1302
|
+
for (const el of Array.from(document.querySelectorAll(sel))) {
|
|
1303
|
+
if (!vis(el))
|
|
1304
|
+
continue;
|
|
1305
|
+
// Leaf-ish only — skip containers that wrap the whole form.
|
|
1306
|
+
if (el.querySelector("input, button, form"))
|
|
1307
|
+
continue;
|
|
1308
|
+
const t = (el.textContent ?? "").replace(/\s+/g, " ").trim();
|
|
1309
|
+
if (t.length >= 3 && t.length <= 160)
|
|
1310
|
+
return t;
|
|
1311
|
+
}
|
|
1312
|
+
}
|
|
1313
|
+
return "";
|
|
1314
|
+
});
|
|
1315
|
+
}
|
|
1316
|
+
catch {
|
|
1317
|
+
return "";
|
|
1318
|
+
}
|
|
1319
|
+
}
|
|
907
1320
|
async click(selector) {
|
|
908
1321
|
if (!this.page)
|
|
909
1322
|
throw new Error("Browser not started");
|
|
@@ -916,14 +1329,39 @@ export class BrowserController {
|
|
|
916
1329
|
// dispatches input/change; `force` bypasses the visibility actionability
|
|
917
1330
|
// gate for the sr-only pattern. MEASURED 2026-06-09 (kinde tech-stack step).
|
|
918
1331
|
try {
|
|
919
|
-
const
|
|
1332
|
+
const probe = await this.page
|
|
920
1333
|
.$eval(selector, (el) => {
|
|
921
1334
|
const t = el;
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
1335
|
+
const inputKind = t.tagName === "INPUT" && (t.type === "radio" || t.type === "checkbox") ? t.type : "";
|
|
1336
|
+
return {
|
|
1337
|
+
inputKind,
|
|
1338
|
+
role: el.getAttribute("role") ?? "",
|
|
1339
|
+
text: (el.textContent ?? "").trim().slice(0, 80),
|
|
1340
|
+
};
|
|
925
1341
|
})
|
|
926
|
-
.catch(() => "");
|
|
1342
|
+
.catch(() => ({ inputKind: "", role: "", text: "" }));
|
|
1343
|
+
const inputKind = probe.inputKind;
|
|
1344
|
+
// Custom-combobox / listbox options (role=option|menuitem) — react-select,
|
|
1345
|
+
// Radix, downshift, MUI. Two failure modes the humanized RAW-COORDINATE
|
|
1346
|
+
// click hits: (1) the menu is a PORTAL that re-renders/repositions, so the
|
|
1347
|
+
// captured POSITIONAL selector (e.g. `div…>> nth=42`) resolves to the wrong
|
|
1348
|
+
// element at click time — nothing selects, planner loops (MEASURED
|
|
1349
|
+
// 2026-06-11, meilisearch Radix combobox); (2) options bind pointer/select
|
|
1350
|
+
// handlers a raw coordinate click misses. Fix: re-resolve by role+accessible
|
|
1351
|
+
// name (robust to portal/positional drift), and use the actionability-checked
|
|
1352
|
+
// locator click. Options are post-load, NOT the anti-bot-scored gate.
|
|
1353
|
+
if (probe.role === "option" || probe.role === "menuitem" || probe.role === "menuitemradio") {
|
|
1354
|
+
const role = probe.role;
|
|
1355
|
+
if (probe.text.length > 0) {
|
|
1356
|
+
const byName = this.page.getByRole(role, { name: probe.text, exact: false }).first();
|
|
1357
|
+
if ((await byName.count().catch(() => 0)) > 0) {
|
|
1358
|
+
await byName.click({ timeout: 8000 });
|
|
1359
|
+
return;
|
|
1360
|
+
}
|
|
1361
|
+
}
|
|
1362
|
+
await this.page.locator(selector).first().click({ timeout: 8000 });
|
|
1363
|
+
return;
|
|
1364
|
+
}
|
|
927
1365
|
if (inputKind === "radio" || inputKind === "checkbox") {
|
|
928
1366
|
// check() handles standard inputs; but a custom framework (kinde's kui)
|
|
929
1367
|
// binds its change handler via event delegation, and a force-check on an
|
|
@@ -1016,7 +1454,17 @@ export class BrowserController {
|
|
|
1016
1454
|
// fall through — click() below will produce the canonical error
|
|
1017
1455
|
}
|
|
1018
1456
|
const locator = this.page.locator(selector);
|
|
1019
|
-
|
|
1457
|
+
// The count can throw "Execution context was destroyed" when an
|
|
1458
|
+
// earlier fill already triggered a navigation/auto-submit (zilliz:
|
|
1459
|
+
// typing email+password redirects before we reach the submit click).
|
|
1460
|
+
// That race must NOT crash the whole signup — the page is already
|
|
1461
|
+
// moving on, so treat the submit as effectively done and let the
|
|
1462
|
+
// caller inspect the new page. MEASURED 2026-06-11 (zilliz /signup).
|
|
1463
|
+
const count = await locator.count().catch(() => -1);
|
|
1464
|
+
if (count < 0) {
|
|
1465
|
+
await this.page.waitForLoadState("domcontentloaded").catch(() => { });
|
|
1466
|
+
return;
|
|
1467
|
+
}
|
|
1020
1468
|
// A disabled submit means a required field or agreement checkbox
|
|
1021
1469
|
// wasn't satisfied — throw a distinct `submit_disabled` so the
|
|
1022
1470
|
// caller can re-plan to fix it, rather than wait out a generic
|
|
@@ -1080,15 +1528,62 @@ export class BrowserController {
|
|
|
1080
1528
|
// Verify it actually became checked; some checkboxes need the
|
|
1081
1529
|
// explicit `check()` call to flip state (e.g., styled labels
|
|
1082
1530
|
// that swallow the click event).
|
|
1083
|
-
|
|
1531
|
+
let isChecked = await this.page.locator(selector).isChecked();
|
|
1084
1532
|
if (!isChecked) {
|
|
1085
1533
|
await this.page.check(selector, { force: true });
|
|
1534
|
+
isChecked = await this.page.locator(selector).isChecked().catch(() => false);
|
|
1535
|
+
}
|
|
1536
|
+
// Mantine / Radix styled checkboxes: the hidden <input> can read
|
|
1537
|
+
// checked in the DOM while the library's React onChange never fired —
|
|
1538
|
+
// so the form's controlled state stays false and the gated submit
|
|
1539
|
+
// stays disabled even though isChecked() is true (MEASURED 2026-06-11:
|
|
1540
|
+
// friendliai's #agreedToServiceTerms cost a wasted round because the
|
|
1541
|
+
// first check didn't register the form state). Clicking the ASSOCIATED
|
|
1542
|
+
// LABEL fires the real onChange the library listens for. Best-effort.
|
|
1543
|
+
if (!isChecked) {
|
|
1544
|
+
const labelClicked = await this.clickAssociatedLabel(selector);
|
|
1545
|
+
if (!labelClicked)
|
|
1546
|
+
await this.page.check(selector, { force: true });
|
|
1086
1547
|
}
|
|
1087
1548
|
}
|
|
1088
1549
|
catch {
|
|
1089
1550
|
await this.page.check(selector, { force: true });
|
|
1090
1551
|
}
|
|
1091
1552
|
}
|
|
1553
|
+
// Click the <label> associated with a checkbox/radio input — either a
|
|
1554
|
+
// `<label for="<id>">` or the wrapping `<label>` ancestor. Mantine/Radix
|
|
1555
|
+
// render the real input visually-hidden inside a styled label; clicking the
|
|
1556
|
+
// label is what fires the library's onChange (a direct input check can
|
|
1557
|
+
// leave React's controlled state stale). Returns true if a label was
|
|
1558
|
+
// found + clicked. Best-effort — never throws.
|
|
1559
|
+
async clickAssociatedLabel(selector) {
|
|
1560
|
+
if (!this.page)
|
|
1561
|
+
return false;
|
|
1562
|
+
try {
|
|
1563
|
+
const id = await this.page
|
|
1564
|
+
.locator(selector)
|
|
1565
|
+
.first()
|
|
1566
|
+
.evaluate((el) => (el instanceof HTMLElement ? el.id : ""))
|
|
1567
|
+
.catch(() => "");
|
|
1568
|
+
if (id) {
|
|
1569
|
+
const forLabel = this.page.locator(`label[for="${id}"]`).first();
|
|
1570
|
+
if ((await forLabel.count()) > 0) {
|
|
1571
|
+
await forLabel.click({ timeout: 4000 });
|
|
1572
|
+
return true;
|
|
1573
|
+
}
|
|
1574
|
+
}
|
|
1575
|
+
// No `for=` label — try the wrapping <label> ancestor.
|
|
1576
|
+
const wrapping = this.page.locator(selector).locator("xpath=ancestor::label[1]").first();
|
|
1577
|
+
if ((await wrapping.count()) > 0) {
|
|
1578
|
+
await wrapping.click({ timeout: 4000 });
|
|
1579
|
+
return true;
|
|
1580
|
+
}
|
|
1581
|
+
}
|
|
1582
|
+
catch {
|
|
1583
|
+
// best-effort
|
|
1584
|
+
}
|
|
1585
|
+
return false;
|
|
1586
|
+
}
|
|
1092
1587
|
// Deterministic pre-submit guard: tick every visible, unchecked,
|
|
1093
1588
|
// non-disabled REQUIRED-AGREEMENT checkbox (terms/privacy/consent),
|
|
1094
1589
|
// while never touching marketing/newsletter opt-ins.
|
|
@@ -1723,16 +2218,42 @@ export class BrowserController {
|
|
|
1723
2218
|
// candidates. Matcher → filter by hasText (case-insensitive by
|
|
1724
2219
|
// default in Playwright). No matcher → first.
|
|
1725
2220
|
async pickComboboxOption(options, matcher) {
|
|
2221
|
+
let target = options.first();
|
|
1726
2222
|
if (matcher !== undefined) {
|
|
1727
2223
|
const filtered = options.filter({ hasText: matcher });
|
|
1728
|
-
|
|
1729
|
-
|
|
1730
|
-
|
|
1731
|
-
|
|
1732
|
-
|
|
1733
|
-
|
|
2224
|
+
if ((await filtered.count()) > 0)
|
|
2225
|
+
target = filtered.first();
|
|
2226
|
+
}
|
|
2227
|
+
// cmdk (the command-menu library) does NOT commit a selection from the
|
|
2228
|
+
// bot's humanized page.mouse.click(x, y): cmdk re-renders + re-orders its
|
|
2229
|
+
// list as the search filters, so the cached click coordinates land on the
|
|
2230
|
+
// wrong row (or empty space), and cmdk's onSelect — bound to a real
|
|
2231
|
+
// pointer/click event ON the item, or Enter on the highlighted item —
|
|
2232
|
+
// never fires. The trigger keeps its placeholder and the gated submit
|
|
2233
|
+
// stays disabled (MEASURED 2026-06-11: meilisearch's /welcome-informations
|
|
2234
|
+
// "reasons" + "SDK" comboboxes looped the whole run). Detect cmdk/Radix
|
|
2235
|
+
// option items and commit via a real, re-resolved actionable click (plus a
|
|
2236
|
+
// pointer-event sequence as backup) instead of raw mouse coordinates.
|
|
2237
|
+
const isCmdkItem = await target
|
|
2238
|
+
.evaluate((el) => el.hasAttribute("cmdk-item") ||
|
|
2239
|
+
el.closest("[cmdk-root],[cmdk-list],[cmdk-group]") !== null)
|
|
2240
|
+
.catch(() => false);
|
|
2241
|
+
if (isCmdkItem) {
|
|
2242
|
+
await target.scrollIntoViewIfNeeded().catch(() => { });
|
|
2243
|
+
// Playwright's locator.click() re-resolves geometry and dispatches the
|
|
2244
|
+
// full trusted pointer/mouse sequence at the element's center — what
|
|
2245
|
+
// cmdk's onSelect actually listens for.
|
|
2246
|
+
await target.click({ timeout: 5000 }).catch(async () => {
|
|
2247
|
+
// Backup: dispatch the pointer pair directly, then Enter (the cmdk
|
|
2248
|
+
// input is focused after type-to-filter and highlights this item).
|
|
2249
|
+
await target.dispatchEvent("pointerdown").catch(() => { });
|
|
2250
|
+
await target.dispatchEvent("pointerup").catch(() => { });
|
|
2251
|
+
await this.page?.keyboard.press("Enter").catch(() => { });
|
|
2252
|
+
});
|
|
2253
|
+
await this.wait(0.5);
|
|
2254
|
+
return;
|
|
1734
2255
|
}
|
|
1735
|
-
await this.humanClickLocator(
|
|
2256
|
+
await this.humanClickLocator(target);
|
|
1736
2257
|
await this.wait(0.5);
|
|
1737
2258
|
}
|
|
1738
2259
|
// ───────────── humanization internals ─────────────
|
|
@@ -1818,11 +2339,17 @@ export class BrowserController {
|
|
|
1818
2339
|
await this.sleep(150);
|
|
1819
2340
|
}
|
|
1820
2341
|
if (isDisabled) {
|
|
2342
|
+
// Name the SPECIFIC unfilled required field(s) so the planner fills the
|
|
2343
|
+
// right one instead of re-clicking the dead submit. MEASURED 2026-06-11
|
|
2344
|
+
// (meilisearch/zilliz: planner clicked a disabled Next 4+ times because
|
|
2345
|
+
// the generic hint didn't say WHICH field blocked it). Feedback only.
|
|
2346
|
+
const hint = await this.unfilledRequiredHint();
|
|
1821
2347
|
throw new Error("target is disabled (HTML disabled or aria-disabled=true) after 6s — " +
|
|
1822
2348
|
"the click would no-op. A required precondition is unmet: an empty " +
|
|
1823
2349
|
"input, an unselected dropdown, an unchecked agreement checkbox, or " +
|
|
1824
2350
|
"a missing preset/permission choice. Do NOT retry this click — pick a " +
|
|
1825
|
-
"different action that fills the missing field first."
|
|
2351
|
+
"different action that fills the missing field first." +
|
|
2352
|
+
hint);
|
|
1826
2353
|
}
|
|
1827
2354
|
}
|
|
1828
2355
|
// Scroll the element into the viewport BEFORE measuring it. A
|
|
@@ -2151,7 +2678,15 @@ export class BrowserController {
|
|
|
2151
2678
|
continue;
|
|
2152
2679
|
for (let i = 0; i < count; i++) {
|
|
2153
2680
|
const el = locator.nth(i);
|
|
2154
|
-
|
|
2681
|
+
// Bounded + best-effort. boundingBox() carries Playwright's default
|
|
2682
|
+
// 30s actionability wait; an invisible-mode Turnstile (the kind
|
|
2683
|
+
// patchright + a residential IP pass silently) never stabilises into
|
|
2684
|
+
// a visible box, so the unguarded call burned the full 30s and THREW
|
|
2685
|
+
// — and because the form-fill runCaptchaGate path didn't catch it,
|
|
2686
|
+
// it aborted the whole signup (measured: cartesia, cron-job.org).
|
|
2687
|
+
// A short timeout + catch turns "no clickable widget here" into a
|
|
2688
|
+
// skip, matching the Phase-2 host walk-up's `.catch(() => null)`.
|
|
2689
|
+
const box = await el.boundingBox({ timeout: 1500 }).catch(() => null);
|
|
2155
2690
|
if (box === null)
|
|
2156
2691
|
continue;
|
|
2157
2692
|
if (box.width < 50 || box.height < 30)
|
|
@@ -2383,6 +2918,83 @@ export class BrowserController {
|
|
|
2383
2918
|
return false;
|
|
2384
2919
|
}
|
|
2385
2920
|
}
|
|
2921
|
+
// Cloudflare Turnstile sitekey. On the `.cf-turnstile` widget's
|
|
2922
|
+
// data-sitekey, or as the `0x…` path segment in the challenge iframe src
|
|
2923
|
+
// (challenges.cloudflare.com/.../0x4AAAAA…/…). Returns null when absent.
|
|
2924
|
+
async extractTurnstileSitekey() {
|
|
2925
|
+
if (!this.page)
|
|
2926
|
+
throw new Error("Browser not started");
|
|
2927
|
+
try {
|
|
2928
|
+
return await this.page.evaluate(() => {
|
|
2929
|
+
// Turnstile sitekeys are `0x` + ~22 base64url chars (e.g.
|
|
2930
|
+
// 0x4AAAAAADSpJWQOnICEKAwx). A site-embedded WIDGET exposes it; a
|
|
2931
|
+
// Cloudflare-MANAGED interstitial does not (it's injected, not in the
|
|
2932
|
+
// DOM) — those return null and the caller can't Tier-3 solve them.
|
|
2933
|
+
const isKey = (k) => k != null && /^0x[A-Za-z0-9_-]{18,}$/.test(k);
|
|
2934
|
+
// 1. data-sitekey on any element.
|
|
2935
|
+
for (const el of Array.from(document.querySelectorAll("[data-sitekey]"))) {
|
|
2936
|
+
const k = el.getAttribute("data-sitekey");
|
|
2937
|
+
if (isKey(k))
|
|
2938
|
+
return k;
|
|
2939
|
+
}
|
|
2940
|
+
// 2. ANY iframe src carrying a 0x… sitekey (the challenge iframe path,
|
|
2941
|
+
// or a query param). Not just challenges.cloudflare.com — some
|
|
2942
|
+
// embeds proxy it.
|
|
2943
|
+
for (const ifr of Array.from(document.querySelectorAll("iframe"))) {
|
|
2944
|
+
const src = ifr.src || "";
|
|
2945
|
+
const path = src.match(/\/(0x[A-Za-z0-9_-]{18,})(?:\/|$)/);
|
|
2946
|
+
if (path !== null && isKey(path[1]))
|
|
2947
|
+
return path[1] ?? null;
|
|
2948
|
+
try {
|
|
2949
|
+
const q = new URL(src).searchParams.get("sitekey");
|
|
2950
|
+
if (isKey(q))
|
|
2951
|
+
return q;
|
|
2952
|
+
}
|
|
2953
|
+
catch {
|
|
2954
|
+
/* relative/blank src */
|
|
2955
|
+
}
|
|
2956
|
+
}
|
|
2957
|
+
// 3. Inline HTML: `sitekey: '0x…'`, `data-sitekey="0x…"`,
|
|
2958
|
+
// `turnstile.render(el, { sitekey: '0x…' })`. Covers JS-config
|
|
2959
|
+
// widgets that never set a DOM attribute.
|
|
2960
|
+
const html = document.documentElement.outerHTML;
|
|
2961
|
+
const m = html.match(/data-sitekey=["'](0x[A-Za-z0-9_-]{18,})/i) ??
|
|
2962
|
+
html.match(/sitekey["'\s:=]{1,4}["'](0x[A-Za-z0-9_-]{18,})/i);
|
|
2963
|
+
if (m !== null && isKey(m[1]))
|
|
2964
|
+
return m[1] ?? null;
|
|
2965
|
+
return null;
|
|
2966
|
+
});
|
|
2967
|
+
}
|
|
2968
|
+
catch {
|
|
2969
|
+
return null;
|
|
2970
|
+
}
|
|
2971
|
+
}
|
|
2972
|
+
// Inject a 2Captcha-resolved Turnstile token into the page's
|
|
2973
|
+
// cf-turnstile-response input(s) + dispatch input/change so the form's
|
|
2974
|
+
// submit handler sees it. Turnstile exposes no public callback-read API
|
|
2975
|
+
// (unlike grecaptcha), so DOM injection + events is the reliable path; the
|
|
2976
|
+
// server-side validation reads the input value. Returns true if an input
|
|
2977
|
+
// was populated.
|
|
2978
|
+
async injectTurnstileToken(token) {
|
|
2979
|
+
if (!this.page)
|
|
2980
|
+
throw new Error("Browser not started");
|
|
2981
|
+
try {
|
|
2982
|
+
return await this.page.evaluate((tok) => {
|
|
2983
|
+
const inputs = Array.from(document.querySelectorAll('[name="cf-turnstile-response"], [name^="cf-turnstile-response"], input[id^="cf-chl-widget"]'));
|
|
2984
|
+
if (inputs.length === 0)
|
|
2985
|
+
return false;
|
|
2986
|
+
for (const input of inputs) {
|
|
2987
|
+
input.value = tok;
|
|
2988
|
+
input.dispatchEvent(new Event("input", { bubbles: true }));
|
|
2989
|
+
input.dispatchEvent(new Event("change", { bubbles: true }));
|
|
2990
|
+
}
|
|
2991
|
+
return true;
|
|
2992
|
+
}, token);
|
|
2993
|
+
}
|
|
2994
|
+
catch {
|
|
2995
|
+
return false;
|
|
2996
|
+
}
|
|
2997
|
+
}
|
|
2386
2998
|
// Mint the score token for an INVISIBLE reCAPTCHA by calling
|
|
2387
2999
|
// grecaptcha.execute() ourselves, then wait for g-recaptcha-response to
|
|
2388
3000
|
// populate. MEASURED on amplitude (2026-06-04): an invisible reCAPTCHA's
|
|
@@ -3139,8 +3751,14 @@ export class BrowserController {
|
|
|
3139
3751
|
return r.width > 2 && r.height > 2;
|
|
3140
3752
|
};
|
|
3141
3753
|
document.querySelectorAll("input, textarea").forEach((el) => {
|
|
3754
|
+
// Only text-shaped inputs can RENDER a credential. A checkbox/
|
|
3755
|
+
// radio/button's `value` is a markup constant, not page content —
|
|
3756
|
+
// zilliz's CookieScript banner ships `<input type="checkbox"
|
|
3757
|
+
// value="personalization">` and those words sit earlier in DOM
|
|
3758
|
+
// order than the real key, so the validator-shaped scan tier was
|
|
3759
|
+
// returning them as the "credential".
|
|
3142
3760
|
if (el instanceof HTMLInputElement &&
|
|
3143
|
-
|
|
3761
|
+
!["text", "search", "url", "tel", "number", "email", ""].includes(el.type)) {
|
|
3144
3762
|
return;
|
|
3145
3763
|
}
|
|
3146
3764
|
const value = el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement
|
|
@@ -3231,6 +3849,61 @@ export class BrowserController {
|
|
|
3231
3849
|
// No interactive element appeared in time — let the planner run
|
|
3232
3850
|
// anyway; it fails cleanly rather than hanging.
|
|
3233
3851
|
}
|
|
3852
|
+
// The generic wait above is satisfied by ANY interactive element —
|
|
3853
|
+
// on a signup page with marketing chrome (links, marketplace badges)
|
|
3854
|
+
// that fires while the actual auth widget is still an async spinner.
|
|
3855
|
+
// The bot then snapshots a form-less inventory and bails
|
|
3856
|
+
// `oauth_required` ("no email/password form"). MEASURED 2026-06-11
|
|
3857
|
+
// (zilliz /signup: right-panel spinner, marketing copy on the left).
|
|
3858
|
+
// So: if a loading spinner is visible AND no auth-form signal exists
|
|
3859
|
+
// yet, give the widget a bounded extra wait to hydrate.
|
|
3860
|
+
await this.waitForAuthWidgetHydration();
|
|
3861
|
+
}
|
|
3862
|
+
// Bounded poll for an auth-form signal when the page is still showing a
|
|
3863
|
+
// loading spinner. Strictly additive: returns immediately unless a
|
|
3864
|
+
// spinner is visible AND no auth signal (email/password input or a
|
|
3865
|
+
// provider/sign-up button) is present yet. Best-effort — never throws.
|
|
3866
|
+
async waitForAuthWidgetHydration(timeoutMs = 8_000) {
|
|
3867
|
+
if (!this.page)
|
|
3868
|
+
return;
|
|
3869
|
+
const deadline = Date.now() + timeoutMs;
|
|
3870
|
+
while (Date.now() < deadline) {
|
|
3871
|
+
try {
|
|
3872
|
+
const state = await this.page.evaluate(() => {
|
|
3873
|
+
const vis = (el) => {
|
|
3874
|
+
const r = el.getBoundingClientRect();
|
|
3875
|
+
return r.width > 0 && r.height > 0;
|
|
3876
|
+
};
|
|
3877
|
+
const anyVis = (sel) => Array.from(document.querySelectorAll(sel)).some(vis);
|
|
3878
|
+
// Auth signal: a real form input or a recognizable provider /
|
|
3879
|
+
// signup affordance.
|
|
3880
|
+
const hasAuthInput = anyVis('input[type="email"],input[type="password"],input[name="email" i],input[name="password" i]');
|
|
3881
|
+
let hasAuthButton = false;
|
|
3882
|
+
const re = /\b(sign\s?up|continue with|log ?in with|with google|with github|with sso|create account)\b/i;
|
|
3883
|
+
for (const el of Array.from(document.querySelectorAll('button,a[href],[role="button"]'))) {
|
|
3884
|
+
if (!vis(el))
|
|
3885
|
+
continue;
|
|
3886
|
+
if (re.test((el.textContent ?? "").trim())) {
|
|
3887
|
+
hasAuthButton = true;
|
|
3888
|
+
break;
|
|
3889
|
+
}
|
|
3890
|
+
}
|
|
3891
|
+
const spinnerVisible = anyVis('[role="progressbar"],[aria-busy="true"],[class*="spin" i],[class*="loading" i],[class*="loader" i],.ant-spin,.MuiCircularProgress-root');
|
|
3892
|
+
return { hasAuth: hasAuthInput || hasAuthButton, spinnerVisible };
|
|
3893
|
+
});
|
|
3894
|
+
// Done the moment an auth signal appears, or once nothing is
|
|
3895
|
+
// spinning anymore (no point waiting on a page that simply has
|
|
3896
|
+
// no auth widget — a true OAuth-less/blank page bails honestly).
|
|
3897
|
+
if (state.hasAuth)
|
|
3898
|
+
return;
|
|
3899
|
+
if (!state.spinnerVisible)
|
|
3900
|
+
return;
|
|
3901
|
+
}
|
|
3902
|
+
catch {
|
|
3903
|
+
return; // navigation / context teardown — let the caller proceed
|
|
3904
|
+
}
|
|
3905
|
+
await this.sleep(500);
|
|
3906
|
+
}
|
|
3234
3907
|
}
|
|
3235
3908
|
// rc.33 — wait for the DOM to grow past a minimum interactive-
|
|
3236
3909
|
// element count, polling every 500ms up to timeoutMs. The
|
|
@@ -4291,6 +4964,31 @@ export class BrowserController {
|
|
|
4291
4964
|
const count = await btn.count().catch(() => 0);
|
|
4292
4965
|
if (count === 0)
|
|
4293
4966
|
continue;
|
|
4967
|
+
// GitHub disables the Authorize button with a clickjacking-protection
|
|
4968
|
+
// COUNTDOWN (~3-8s) the first time you authorize an OAuth app that
|
|
4969
|
+
// requests org scopes (read:org). Clicking while disabled silently
|
|
4970
|
+
// no-ops and the URL never changes, so the whole consent bails
|
|
4971
|
+
// "no approve control" even though the button is right there
|
|
4972
|
+
// (MEASURED 2026-06-11: defang's "Authorize DefangLabs"). Poll up to
|
|
4973
|
+
// 12s for it to enable before clicking.
|
|
4974
|
+
{
|
|
4975
|
+
const deadline = Date.now() + 12_000;
|
|
4976
|
+
while (Date.now() < deadline) {
|
|
4977
|
+
const disabled = await btn
|
|
4978
|
+
.evaluate((el) => {
|
|
4979
|
+
if (el instanceof HTMLButtonElement || el instanceof HTMLInputElement) {
|
|
4980
|
+
if (el.disabled)
|
|
4981
|
+
return true;
|
|
4982
|
+
}
|
|
4983
|
+
const aria = el.getAttribute("aria-disabled");
|
|
4984
|
+
return aria === "true" || aria === "";
|
|
4985
|
+
})
|
|
4986
|
+
.catch(() => false);
|
|
4987
|
+
if (!disabled)
|
|
4988
|
+
break;
|
|
4989
|
+
await this.sleep(400);
|
|
4990
|
+
}
|
|
4991
|
+
}
|
|
4294
4992
|
try {
|
|
4295
4993
|
await btn.click({ timeout: 8000 });
|
|
4296
4994
|
}
|
|
@@ -4462,6 +5160,24 @@ export class BrowserController {
|
|
|
4462
5160
|
await capped(this.page.close(), 5_000);
|
|
4463
5161
|
if (this.context)
|
|
4464
5162
|
await capped(this.context.close(), 10_000);
|
|
5163
|
+
// Self-launch path: disconnect the CDP browser and SIGKILL the Chrome we
|
|
5164
|
+
// spawned. context.close() on a connectOverCDP context only disconnects —
|
|
5165
|
+
// it does NOT necessarily exit the browser process, which would leak the
|
|
5166
|
+
// SingletonLock and brick the next run (the reap below is the backstop, but
|
|
5167
|
+
// killing our own child directly is cleaner and faster).
|
|
5168
|
+
if (this.cdpBrowser) {
|
|
5169
|
+
await capped(this.cdpBrowser.close(), 5_000);
|
|
5170
|
+
this.cdpBrowser = null;
|
|
5171
|
+
}
|
|
5172
|
+
if (this.childChrome) {
|
|
5173
|
+
try {
|
|
5174
|
+
this.childChrome.kill("SIGKILL");
|
|
5175
|
+
}
|
|
5176
|
+
catch {
|
|
5177
|
+
/* already gone */
|
|
5178
|
+
}
|
|
5179
|
+
this.childChrome = null;
|
|
5180
|
+
}
|
|
4465
5181
|
// …and context.close() doesn't always kill the browser: headed Chrome
|
|
4466
5182
|
// under Xvfb / some patchright teardowns leave the main process alive
|
|
4467
5183
|
// holding the SingletonLock. A leaked browser makes the NEXT run wait
|
|
@@ -4581,6 +5297,45 @@ export function isAgreementCheckboxText(text) {
|
|
|
4581
5297
|
// and falls back to a direct connection.
|
|
4582
5298
|
//
|
|
4583
5299
|
// Exported for unit testing — URL parsing is the error-prone bit.
|
|
5300
|
+
// Cheap TCP liveness probe for a proxy `server` string ("socks5://host:port").
|
|
5301
|
+
// A SOCKS5 proxy listens on TCP; if a connect succeeds within the timeout the
|
|
5302
|
+
// proxy is up. Resolves false on connect error / timeout / a malformed server.
|
|
5303
|
+
// Pure (no class state) so resolveProxy can call it before launching Chrome.
|
|
5304
|
+
export async function isProxyReachable(server, timeoutMs = 4000) {
|
|
5305
|
+
let host;
|
|
5306
|
+
let port;
|
|
5307
|
+
try {
|
|
5308
|
+
const u = new URL(server);
|
|
5309
|
+
host = u.hostname;
|
|
5310
|
+
port = Number(u.port) || (u.protocol.startsWith("socks") ? 1080 : 8080);
|
|
5311
|
+
}
|
|
5312
|
+
catch {
|
|
5313
|
+
return false;
|
|
5314
|
+
}
|
|
5315
|
+
if (host.length === 0 || !Number.isFinite(port))
|
|
5316
|
+
return false;
|
|
5317
|
+
return await new Promise((resolve) => {
|
|
5318
|
+
const sock = new Socket();
|
|
5319
|
+
let settled = false;
|
|
5320
|
+
const finish = (ok) => {
|
|
5321
|
+
if (settled)
|
|
5322
|
+
return;
|
|
5323
|
+
settled = true;
|
|
5324
|
+
try {
|
|
5325
|
+
sock.destroy();
|
|
5326
|
+
}
|
|
5327
|
+
catch {
|
|
5328
|
+
// already closed
|
|
5329
|
+
}
|
|
5330
|
+
resolve(ok);
|
|
5331
|
+
};
|
|
5332
|
+
sock.setTimeout(timeoutMs);
|
|
5333
|
+
sock.once("connect", () => finish(true));
|
|
5334
|
+
sock.once("timeout", () => finish(false));
|
|
5335
|
+
sock.once("error", () => finish(false));
|
|
5336
|
+
sock.connect(port, host);
|
|
5337
|
+
});
|
|
5338
|
+
}
|
|
4584
5339
|
export function parseProxyUrl(raw) {
|
|
4585
5340
|
const u = new URL(raw.trim());
|
|
4586
5341
|
if (u.hostname.length === 0) {
|