@trusty-squire/mcp 0.9.16 → 0.9.17-rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/dist/bot/affordance-probe.d.ts +15 -0
  2. package/dist/bot/affordance-probe.d.ts.map +1 -0
  3. package/dist/bot/affordance-probe.js +63 -0
  4. package/dist/bot/affordance-probe.js.map +1 -0
  5. package/dist/bot/agent.d.ts +31 -1
  6. package/dist/bot/agent.d.ts.map +1 -1
  7. package/dist/bot/agent.js +1826 -129
  8. package/dist/bot/agent.js.map +1 -1
  9. package/dist/bot/browser.d.ts +9 -0
  10. package/dist/bot/browser.d.ts.map +1 -1
  11. package/dist/bot/browser.js +281 -8
  12. package/dist/bot/browser.js.map +1 -1
  13. package/dist/bot/extraction.d.ts +18 -0
  14. package/dist/bot/extraction.d.ts.map +1 -0
  15. package/dist/bot/extraction.js +55 -0
  16. package/dist/bot/extraction.js.map +1 -0
  17. package/dist/bot/form-fill.d.ts +144 -0
  18. package/dist/bot/form-fill.d.ts.map +1 -0
  19. package/dist/bot/form-fill.js +320 -0
  20. package/dist/bot/form-fill.js.map +1 -0
  21. package/dist/bot/google-login.d.ts.map +1 -1
  22. package/dist/bot/google-login.js +6 -2
  23. package/dist/bot/google-login.js.map +1 -1
  24. package/dist/bot/llm-client.d.ts +12 -0
  25. package/dist/bot/llm-client.d.ts.map +1 -1
  26. package/dist/bot/llm-client.js +99 -0
  27. package/dist/bot/llm-client.js.map +1 -1
  28. package/dist/bot/nav-search.d.ts +80 -0
  29. package/dist/bot/nav-search.d.ts.map +1 -0
  30. package/dist/bot/nav-search.js +409 -0
  31. package/dist/bot/nav-search.js.map +1 -0
  32. package/dist/bot/oauth-flow.d.ts +48 -0
  33. package/dist/bot/oauth-flow.d.ts.map +1 -0
  34. package/dist/bot/oauth-flow.js +111 -0
  35. package/dist/bot/oauth-flow.js.map +1 -0
  36. package/dist/bot/onboarding-capture.d.ts +4 -0
  37. package/dist/bot/onboarding-capture.d.ts.map +1 -1
  38. package/dist/bot/onboarding-capture.js +5 -0
  39. package/dist/bot/onboarding-capture.js.map +1 -1
  40. package/dist/bot/redact.d.ts +1 -0
  41. package/dist/bot/redact.d.ts.map +1 -1
  42. package/dist/bot/redact.js +46 -0
  43. package/dist/bot/redact.js.map +1 -1
  44. package/dist/skill-registry-client.d.ts +4 -0
  45. package/dist/skill-registry-client.d.ts.map +1 -1
  46. package/dist/skill-registry-client.js +4 -0
  47. package/dist/skill-registry-client.js.map +1 -1
  48. package/dist/tools/provision-any.d.ts +21 -0
  49. package/dist/tools/provision-any.d.ts.map +1 -1
  50. package/dist/tools/provision-any.js +16 -7
  51. package/dist/tools/provision-any.js.map +1 -1
  52. package/dist/tools/signup-telemetry.d.ts +6 -0
  53. package/dist/tools/signup-telemetry.d.ts.map +1 -1
  54. package/dist/tools/signup-telemetry.js +4 -0
  55. package/dist/tools/signup-telemetry.js.map +1 -1
  56. package/package.json +1 -1
package/dist/bot/agent.js CHANGED
@@ -10,6 +10,9 @@
10
10
  import { rankAndCapInventory, scoreSignupButton } from "./browser.js";
11
11
  import { OAUTH_PROVIDERS, extractOAuthScopes, isGitHubDismissible2faSetup, isGitHubForced2faVerification, GITHUB_DISMISSIBLE_2FA_SKIP_TEXT, } from "./oauth-providers.js";
12
12
  import { extractGoogleNumberMatch, scrapeGoogleScopePhrases } from "./google-login.js";
13
+ import { decideOAuthStep } from "./oauth-flow.js";
14
+ import { decideFormFillStep, FORM_FILL_BUDGETS as B_FF, initialFormFillState, } from "./form-fill.js";
15
+ import { accumulateCandidate, hasFullHit, initialExtractionState, resolveExtraction, } from "./extraction.js";
13
16
  import { notifyHeightenedAuth } from "./notify-api.js";
14
17
  import { sendTelegramHeightenedAuth } from "./telegram-notify.js";
15
18
  import { TwoCaptchaSolver } from "./captcha-solver-2captcha.js";
@@ -18,6 +21,7 @@ import { readOperatorOtp, fromDomainFromUrl } from "./read-otp.js";
18
21
  import { loggedInProviders, clearProviderLoggedIn, markProviderLoggedIn, } from "./login-state.js";
19
22
  import { saveDebugSnapshot } from "./debug.js";
20
23
  import { captureOnboardingRound } from "./onboarding-capture.js";
24
+ import { runNavSearch, } from "./nav-search.js";
21
25
  import { wasRecentlyPrewarmed, recordPrewarmSuccess } from "./prewarm-cache.js";
22
26
  import { pickLLMPair, } from "./llm-client.js";
23
27
  import { getDomain } from "tldts";
@@ -55,11 +59,91 @@ const VERIFICATION_EXPECTED_PATTERNS = [
55
59
  "almost there",
56
60
  "one more step",
57
61
  ];
62
+ // A single-use verification link that lands on an "expired / already used /
63
+ // invalid" page. MEASURED on portkey (2026-06-17): a FRESH, seconds-old
64
+ // Firebase mode=verifyEmail oobCode rendered "Email Verification Failed — This
65
+ // link has expired" on the bot's FIRST navigation. A single-use action link is
66
+ // routinely burned by an upstream mail link-scanner (anti-phishing prefetch) —
67
+ // and following a Firebase verifyEmail link COMPLETES the verification
68
+ // server-side. So "expired" almost always means the email is ALREADY verified;
69
+ // the right move is to log in with the signup credentials and proceed, not bail.
70
+ // Exported for unit tests.
71
+ const VERIFY_LINK_FAILED_PATTERNS = [
72
+ "link has expired",
73
+ "link is invalid",
74
+ "link is no longer valid",
75
+ "invalid or expired",
76
+ "expired or invalid",
77
+ "verification failed",
78
+ "already been used",
79
+ "already verified", // some apps say so outright → also "go log in"
80
+ ];
81
+ export function verificationLinkFailed(pageText) {
82
+ const t = pageText.toLowerCase();
83
+ return VERIFY_LINK_FAILED_PATTERNS.some((p) => t.includes(p));
84
+ }
85
+ // Firebase email-action links carry mode=verifyEmail + oobCode + apiKey as
86
+ // query params — even on a custom domain (portkey: app.portkey.ai/auth?…).
87
+ // Extract them so we can confirm the verification via Firebase's REST API
88
+ // directly: far lower latency than a browser SPA navigation (racing a short
89
+ // oobCode TTL) and it issues the single-use code exactly ONCE (no SPA
90
+ // double-submit). Returns null when the link isn't a Firebase verifyEmail
91
+ // action. Exported for unit tests.
92
+ export function parseFirebaseEmailAction(url) {
93
+ let u;
94
+ try {
95
+ u = new URL(url.replace(/&/g, "&"));
96
+ }
97
+ catch {
98
+ return null;
99
+ }
100
+ const oobCode = u.searchParams.get("oobCode");
101
+ const apiKey = u.searchParams.get("apiKey");
102
+ if (u.searchParams.get("mode") !== "verifyEmail" || oobCode === null || apiKey === null) {
103
+ return null;
104
+ }
105
+ if (!/^AIza[\w-]{20,}$/.test(apiKey))
106
+ return null; // Firebase web API key shape
107
+ return { apiKey, oobCode };
108
+ }
109
+ // Apply a Firebase email-verification oobCode via the Identity Toolkit REST API
110
+ // — the same call the emailed link's SPA makes, issued directly so it runs the
111
+ // instant the mail lands and only once. The apiKey is the Firebase WEB api key
112
+ // (public by design). ok=true + the verified email on success; ok=false + the
113
+ // Firebase error (EXPIRED_OOB_CODE / INVALID_OOB_CODE) otherwise — which itself
114
+ // proves whether the code was alive at receipt. Exported for unit tests.
115
+ export async function applyFirebaseEmailVerification(apiKey, oobCode) {
116
+ try {
117
+ const resp = await fetch(`https://identitytoolkit.googleapis.com/v1/accounts:update?key=${encodeURIComponent(apiKey)}`, {
118
+ method: "POST",
119
+ headers: { "content-type": "application/json" },
120
+ body: JSON.stringify({ oobCode }),
121
+ });
122
+ const data = (await resp.json().catch(() => ({})));
123
+ if (resp.ok && (data.emailVerified === true || data.email !== undefined)) {
124
+ return { ok: true, ...(data.email !== undefined ? { email: data.email } : {}) };
125
+ }
126
+ return { ok: false, error: data.error?.message ?? `http_${resp.status}` };
127
+ }
128
+ catch (err) {
129
+ return { ok: false, error: err instanceof Error ? err.message : String(err) };
130
+ }
131
+ }
58
132
  // Short probe when, even after a settle, the post-submit page still never
59
133
  // prompted the user to check their email AND no account-created signal
60
134
  // appeared. Legitimate verification mail almost always lands inside a
61
135
  // minute; this catches the fast case without 300s of dead air.
62
136
  const VERIFICATION_PROBE_SECONDS = 45;
137
+ // Give-up ceiling for the "page says check your email" case (mail IS coming).
138
+ // These poll timeouts are pure GIVE-UP BOUNDS: the inbox server long-polls and
139
+ // early-exits the instant mail arrives, so a real email returns at ~its arrival
140
+ // time regardless of the ceiling — the ceiling only costs wall-clock when mail
141
+ // NEVER comes (young-domain withhold). Sized to the ARRIVAL TAIL, not the
142
+ // average: transactional mail lands <30s typically, with a greylist retry tail
143
+ // to ~60s, so 90s covers every real case with margin while failing a withheld
144
+ // mail 90s sooner (was 180). Tail-sized, NOT average-sized — abandoning real
145
+ // mail loses a signup (OF#2). Env-tunable for an A/B.
146
+ const VERIFY_EMAIL_CEILING_SECONDS = Number.parseInt(process.env.BOT_VERIFY_EMAIL_CEILING_S ?? "120", 10);
63
147
  // Settle window before the SECOND post-submit page read. SPA signups
64
148
  // (Postmark, ElevenLabs, Browserbase, Grafana Cloud, …) swap in their
65
149
  // "check your email" confirmation screen a beat AFTER submit. Reading the
@@ -74,9 +158,11 @@ const SUBMIT_SETTLE_SECONDS = 3;
74
158
  // fresh send (Postmark, SendGrid) routinely take longer than the 45s
75
159
  // probe. Polling 120s here — rather than bailing at 45s — is the
76
160
  // difference between catching that mail and a false `verification_not_sent`.
77
- // Still bounded so a genuinely-silent service doesn't hold the run for the
78
- // full 180s expected-email timeout.
79
- const SUBMITTED_PROBE_FLOOR_SECONDS = 120;
161
+ // Still bounded so a genuinely-silent service doesn't hold the run for the full
162
+ // ceiling. Tail-sized to ~90s (arrival tail incl. a greylist retry); since the
163
+ // long-poll early-exits on arrival, this only costs wall-clock when mail never
164
+ // comes — failing an inconclusive no-mail run 30s sooner (was 120). Env-tunable.
165
+ const SUBMITTED_PROBE_FLOOR_SECONDS = Number.parseInt(process.env.BOT_VERIFY_INCONCLUSIVE_CEILING_S ?? "90", 10);
80
166
  // Post-submit page text that means the submit was REJECTED, not accepted —
81
167
  // no account was created, so no verification mail is coming and even the
82
168
  // 45s probe is wasted. Lets the bot bail immediately instead of polling.
@@ -169,6 +255,74 @@ export function isAtPaywall(text) {
169
255
  }
170
256
  return false;
171
257
  }
258
+ // A service can complete the signup form / OAuth handshake and THEN drop the
259
+ // account into a manual-approval gate — a waiting room, a waitlist, a
260
+ // "request access / your account is pending approval / under review" screen —
261
+ // instead of granting a dashboard + API key. Baseten is the field example:
262
+ // the form submits, then a "waiting_room" / account-review screen appears and
263
+ // no key is obtainable autonomously.
264
+ //
265
+ // This is NOT a captcha and NOT an anti-bot block — it's a service-side human
266
+ // gate. Left undetected, the post-verify loop exhausts its budget and the run
267
+ // gets mislabeled (oauth_onboarding_failed / a generic no-credentials miss),
268
+ // which is misleading and can wrongly count toward skill demotion or send us
269
+ // chasing a non-existent code bug. We classify it as `onboarding_blocked` —
270
+ // the same terminal, human-pile, non-demoting status the billing wall uses —
271
+ // so the loop routes it to the manual pile and never advances the demote
272
+ // counter.
273
+ //
274
+ // Tuned for PRECISION over recall: every pattern requires explicit
275
+ // account-review / waitlist / pending-approval phrasing. A marketing tile that
276
+ // merely mentions "early access" as a feature must not trip it, so the verbs
277
+ // are scoped to the gate's own phrasing (you ARE on the list / access IS
278
+ // pending / the account IS under review).
279
+ const ACCOUNT_REVIEW_GATE_PATTERNS = [
280
+ /\bwaiting\s+room\b/i,
281
+ /\b(?:join|on|added\s+to)\s+(?:the\s+|our\s+)?waitlist\b/i,
282
+ /\byou'?re\s+on\s+the\s+(?:list|waitlist)\b/i,
283
+ /\brequest\s+(?:early\s+)?access\b/i,
284
+ /\baccess\s+(?:is\s+)?pending\b/i,
285
+ /\b(?:your\s+)?account\s+is\s+pending\b/i,
286
+ /\bpending\s+approval\b/i,
287
+ /\baccount\s+(?:is\s+)?(?:currently\s+)?under\s+review\b/i,
288
+ /\byour\s+account\s+is\s+being\s+reviewed\b/i,
289
+ /\bwe'?ll\s+email\s+you\s+when\b/i,
290
+ /\bawaiting\s+(?:approval|access)\b/i,
291
+ ];
292
+ // Exported for unit testing — the post-signup heuristic that distinguishes a
293
+ // service-side manual-approval gate (waiting room / waitlist / pending review)
294
+ // from a normal dashboard, signup form, or captcha page. Pure over page text.
295
+ export function isAtAccountReviewGate(text) {
296
+ return ACCOUNT_REVIEW_GATE_PATTERNS.some((p) => p.test(text));
297
+ }
298
+ // Decide whether a no-credential form-fill outcome is a manual-review gate.
299
+ // A verification timeout is the AUTHORITATIVE cause and must win: a pending
300
+ // "check your email / we sent a code" page can read as a review gate to
301
+ // isAtAccountReviewGate, so without this guard a verification_not_sent gets
302
+ // mislabeled onboarding_blocked (the anthropic regression). Only when
303
+ // verification did NOT fail is the review-gate text trusted. Pure, testable.
304
+ export function isOnboardingReviewGate(verificationFailed, pageText) {
305
+ return verificationFailed === undefined && isAtAccountReviewGate(pageText);
306
+ }
307
+ // Closed / invite-only registration: the service does not accept new self-serve
308
+ // signups at all (turbopuffer: "Sign-ups are closed"). Distinct from a review
309
+ // gate (you signed up, awaiting approval) — here NO account can be created, so
310
+ // the run is terminally unservable and the service should be dequeued, not
311
+ // retried or mislabeled oauth_onboarding_failed (which implies a fixable nav
312
+ // bug). Precision-tuned: requires explicit closed/disabled/invite-only phrasing
313
+ // scoped to sign-up/registration, so a normal page mentioning "sign up" or an
314
+ // "invite your team" feature doesn't trip it. Pure over page text.
315
+ const SIGNUPS_CLOSED_PATTERNS = [
316
+ /\bsign[\s-]?ups?\s+(?:are|is)\s+(?:currently\s+)?(?:closed|disabled|paused|not\s+(?:open|available|being\s+accepted))\b/i,
317
+ /\b(?:we\s+are|we're)\s+not\s+(?:currently\s+)?accepting\s+(?:new\s+)?(?:sign[\s-]?ups|registrations|users|accounts)\b/i,
318
+ /\bregistration\s+(?:is\s+)?(?:currently\s+)?(?:closed|disabled)\b/i,
319
+ /\b(?:sign[\s-]?up|registration|access)\s+is\s+(?:by\s+)?invite[\s-]?only\b/i,
320
+ /\binvite[\s-]?only\s+(?:beta|access|signup|registration)\b/i,
321
+ /\brequest\s+an\s+invite\b/i,
322
+ ];
323
+ export function isSignupsClosed(text) {
324
+ return SIGNUPS_CLOSED_PATTERNS.some((p) => p.test(text));
325
+ }
172
326
  // S3: does this post-submit page text indicate the service genuinely
173
327
  // expects the user to confirm via email? Drives whether the bot polls the
174
328
  // full verification timeout or runs only a short probe. Exported so the
@@ -197,8 +351,9 @@ export class OAuthSessionNotPersistedError extends Error {
197
351
  // 0.8.2-rc.10 — common dashboard paths that vendors host their
198
352
  // per-account API key UI at. Ordered most-specific first so a
199
353
  // fallback navigate doesn't land short of the actual page. Returned
200
- // as an array of path-strings; the caller composes them onto the
201
- // origin of the currently-stuck URL and skips any already tried.
354
+ // as an array of path-strings; the caller composes them onto the APP
355
+ // origin (the signup/app URL the bot navigated to), NOT the auth/IdP
356
+ // origin it may be stuck on post-OAuth, and skips any already tried.
202
357
  //
203
358
  // Patterns harvested from Anthropic (settings/keys), Sentry
204
359
  // (settings/account/api/auth-tokens), Neon (settings#api-keys),
@@ -418,33 +573,162 @@ export function findCreateKeyAffordance(inventory) {
418
573
  candidates.sort((a, b) => b.score - a.score);
419
574
  return candidates[0].el;
420
575
  }
576
+ // A "name your key" confirm modal frequently labels its submit button
577
+ // generically — "Submit", "Create", "Generate", "Done" — with NO key noun, so
578
+ // findCreateKeyAffordance can't see it (groq: the page-level "Create API Key"
579
+ // opens a dialog whose only affirmative is a bare "Submit"). Worse, the
580
+ // page-level create button is still in the background DOM, so a naive
581
+ // findCreateKeyAffordance re-grabs IT and reopens the modal forever. This finds
582
+ // the modal's affirmative submit instead. Bounded to the modal shape — a text
583
+ // input (the name field) MUST be present — so a page-level Submit on an
584
+ // unrelated form can't be tripped. Excludes the just-clicked create button and
585
+ // any cancel/close control. Pure; exported for unit testing.
586
+ const KEY_MODAL_SUBMIT_AFFIRM = /^\s*(?:submit|create(?:\s+(?:api\s+)?key)?|generate(?:\s+key)?|confirm|done|save|add(?:\s+key)?|ok)\s*$/i;
587
+ const KEY_MODAL_NEGATIVE = /\b(?:cancel|close|back|dismiss|never\s*mind)\b/i;
588
+ export function findKeyModalSubmit(inventory, excludeSelector) {
589
+ const hasNameInput = inventory.some((el) => el.tag === "input" &&
590
+ el.visible !== false &&
591
+ (el.type === null || /^(?:text|search|email)$/i.test(el.type)));
592
+ if (!hasNameInput)
593
+ return null;
594
+ for (const el of inventory) {
595
+ if (el.selector === excludeSelector)
596
+ continue;
597
+ const clickable = el.tag === "button" || el.tag === "a" || el.role === "button" || el.role === "link";
598
+ if (!clickable || el.visible === false)
599
+ continue;
600
+ const label = [el.visibleText, el.ariaLabel, el.title]
601
+ .filter((s) => s !== null && s !== undefined && s.length > 0)
602
+ .join(" ")
603
+ .trim();
604
+ if (label.length === 0 || label.length > 24)
605
+ continue;
606
+ if (KEY_MODAL_NEGATIVE.test(label))
607
+ continue;
608
+ if (KEY_MODAL_SUBMIT_AFFIRM.test(label))
609
+ return el;
610
+ }
611
+ return null;
612
+ }
613
+ // The name input inside a "name your key" modal — the first visible text-like
614
+ // input. Some vendors gate the submit on a non-empty name (groq), so the mint
615
+ // flow types one before clicking submit. Pure; exported for unit testing.
616
+ export function findKeyNameInput(inventory) {
617
+ for (const el of inventory) {
618
+ if (el.tag !== "input" || el.visible === false)
619
+ continue;
620
+ if (el.type !== null && !/^(?:text|search|email)$/i.test(el.type))
621
+ continue;
622
+ return el;
623
+ }
624
+ return null;
625
+ }
626
+ // An in-DOM nav link/affordance that points AT an API-keys / tokens page.
627
+ // Distinct from findCreateKeyAffordance (the "create key" button): this finds
628
+ // the LINK that navigates TO the keys page, so the bot can click the real
629
+ // target — whose href is the correct path — instead of GUESSING a URL from a
630
+ // fixed convention list (which 404s whenever a service hosts keys at a
631
+ // non-standard path: unify-ai's keys aren't at /keys, /api-keys, or
632
+ // /settings/api-keys, all of which 404). A human clicks the sidebar link; so
633
+ // should the bot. Exported, pure (operates on the inventory shape only).
634
+ const API_KEYS_HREF = /\/(?:api[-_]?keys?|api[-_]?tokens?|access[-_]?tokens?|auth[-_]?tokens?|secret[-_]?keys?|personal[-_]?access[-_]?tokens?|developers?|keys?|tokens?)(?:[/?#]|$)/i;
635
+ const API_KEYS_TEXT = /\b(?:api|access|secret|auth|personal\s+access)\s*(?:keys?|tokens?)\b/i;
636
+ export function findApiKeysNavLink(inventory, alreadyClicked = new Set()) {
637
+ const candidates = [];
638
+ for (const el of inventory) {
639
+ const isClickable = el.tag === "a" ||
640
+ el.tag === "button" ||
641
+ el.role === "link" ||
642
+ el.role === "button";
643
+ if (!isClickable)
644
+ continue;
645
+ if (el.visible === false)
646
+ continue;
647
+ if (alreadyClicked.has(el.selector))
648
+ continue;
649
+ const href = el.href ?? "";
650
+ const text = [el.visibleText, el.ariaLabel, el.title, el.labelText, el.iconLabel]
651
+ .filter((s) => s !== null && s !== undefined)
652
+ .join(" ")
653
+ .trim();
654
+ // The loose href segments (keys?/tokens?/developers?) are only trusted on
655
+ // an actual anchor href, where they're a structured path, not free text.
656
+ const hrefHit = href.length > 0 && API_KEYS_HREF.test(href);
657
+ const textHit = API_KEYS_TEXT.test(text);
658
+ if (!hrefHit && !textHit)
659
+ continue;
660
+ // A "create API key" control is a different affordance (it opens a
661
+ // create flow / modal, it doesn't navigate to the listing). Skip it here
662
+ // UNLESS it's a real anchor with a keys href (then it's a nav link that
663
+ // merely happens to read "New API key").
664
+ if (CREATE_KEY_PHRASE.test(text) && !(el.tag === "a" && hrefHit))
665
+ continue;
666
+ let score = 0;
667
+ if (hrefHit)
668
+ score += 4; // a real, navigable target beats a text guess
669
+ if (/\bapi\s*(?:keys?|tokens?)\b/i.test(text))
670
+ score += 2;
671
+ else if (textHit)
672
+ score += 1;
673
+ if (el.tag === "a")
674
+ score += 1; // prefer anchors over role=button
675
+ if (el.inViewport === true)
676
+ score += 1;
677
+ candidates.push({ el, score });
678
+ }
679
+ if (candidates.length === 0)
680
+ return null;
681
+ candidates.sort((a, b) => b.score - a.score);
682
+ return candidates[0].el;
683
+ }
421
684
  // Pick the next fallback URL to try, keyed against the origin of the
422
685
  // currently-stuck URL. The curated SERVICE_KEYS_PATHS for the run's
423
686
  // service (when its host matches the stuck origin) are tried FIRST,
424
687
  // then the generic STUCK_LOOP_FALLBACK_PATHS. Returns null when every
425
688
  // path has already been attempted. Exported for unit tests.
426
- export function pickStuckLoopFallbackUrl(currentUrl, alreadyTried, service) {
427
- let parsed;
689
+ export function pickStuckLoopFallbackUrl(currentUrl, alreadyTried, service, appUrl) {
690
+ let parsedCurrent;
428
691
  try {
429
- parsed = new URL(currentUrl);
692
+ parsedCurrent = new URL(currentUrl);
430
693
  }
431
694
  catch {
432
695
  return null;
433
696
  }
697
+ // Compose key-path guesses onto the APP origin, NOT the origin of the
698
+ // currently-stuck URL. After OAuth the stuck URL is the identity-provider
699
+ // subdomain (auth.lumalabs.ai, accounts.<svc>, login.<svc>, the IdP) — which
700
+ // has no settings/keys pages, so "${authOrigin}/settings/keys" 404s by
701
+ // construction. The keys live on the app host (lumalabs.ai). `appUrl` is the
702
+ // signup/app URL the bot actually navigated to (this.resolvedSignupUrl), so
703
+ // its origin is the right host to guess against. Fall back to the stuck
704
+ // origin only when no usable app URL is known.
705
+ let composeBase = parsedCurrent;
706
+ if (appUrl !== undefined) {
707
+ try {
708
+ const parsedApp = new URL(appUrl);
709
+ if ((parsedApp.protocol === "http:" || parsedApp.protocol === "https:") &&
710
+ !isGoogleSearchUrl(appUrl)) {
711
+ composeBase = parsedApp;
712
+ }
713
+ }
714
+ catch {
715
+ // keep the stuck origin
716
+ }
717
+ }
434
718
  // about:blank / data: / chrome-error pages have an opaque origin that
435
719
  // serializes to the literal string "null" — building "${origin}${path}"
436
720
  // then yields an unnavigable "null/settings/keys". Only compose
437
721
  // fallbacks against a real http(s) origin.
438
- if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
722
+ if (composeBase.protocol !== "http:" && composeBase.protocol !== "https:") {
439
723
  return null;
440
724
  }
441
- const origin = parsed.origin;
442
- // Skip a candidate when the current URL's path ALREADY matches it
443
- // (case-insensitive, trailing-slash tolerant). The planner is stuck
444
- // ON the page the candidate points to navigating to the same URL
445
- // again won't break the cycle, only a different path will.
446
- const currentPath = parsed.pathname.replace(/\/+$/, "").toLowerCase();
447
- // Compose curated per-service paths first, but only when the stuck
725
+ const origin = composeBase.origin;
726
+ // Skip a candidate when it resolves to the exact URL we're already stuck
727
+ // on (full origin+path, trailing-slash/case tolerant) re-navigating
728
+ // there won't break the cycle. Compared on the full URL now that the
729
+ // compose origin can differ from the stuck origin.
730
+ const currentFull = `${parsedCurrent.origin}${parsedCurrent.pathname}`.replace(/\/+$/, "").toLowerCase();
731
+ // Compose curated per-service paths first, but only when the COMPOSE
448
732
  // origin's host actually belongs to the named service. The slug is
449
733
  // a substring of the host for the vendors we curate (groq →
450
734
  // console.groq.com, launchdarkly → app.launchdarkly.com, …); this
@@ -454,7 +738,7 @@ export function pickStuckLoopFallbackUrl(currentUrl, alreadyTried, service) {
454
738
  const slug = service !== undefined ? serviceSlug(service) : "";
455
739
  const curated = slug !== "" &&
456
740
  SERVICE_KEYS_PATHS[slug] !== undefined &&
457
- parsed.hostname.toLowerCase().includes(slug)
741
+ composeBase.hostname.toLowerCase().includes(slug)
458
742
  ? SERVICE_KEYS_PATHS[slug]
459
743
  : [];
460
744
  // Curated paths lead; the generic list follows. De-dup so a path that
@@ -468,7 +752,7 @@ export function pickStuckLoopFallbackUrl(currentUrl, alreadyTried, service) {
468
752
  const candidate = `${origin}${path}`;
469
753
  if (alreadyTried.has(candidate))
470
754
  continue;
471
- if (candidatePath === currentPath)
755
+ if (`${origin}${path}`.replace(/\/+$/, "").toLowerCase() === currentFull)
472
756
  continue;
473
757
  return candidate;
474
758
  }
@@ -2506,9 +2790,13 @@ export function extractQuotedTokenFromReason(reason, pageText) {
2506
2790
  // `.` is in the class: many tokens are dot-separated (Zerops
2507
2791
  // `LhJbaP.VeODh3ZZ…`, GitLab PATs, JWTs, Slack `xox*`); excluding it
2508
2792
  // dropped every dotted token to null and looped to run_timeout
2509
- // (MEASURED 2026-06-12: zerops). The verbatim pageText.includes guard
2510
- // below keeps a sentence's trailing period from matching.
2511
- const matches = reason.matchAll(/['"`]([A-Za-z0-9_.\-]{10,80})['"`]/g);
2793
+ // (MEASURED 2026-06-12: zerops). `+/=` are in too: some services mint
2794
+ // BASE64-encoded keys (portkey, MEASURED 2026-06-17: `tdCwXd/8kp4…` the
2795
+ // `/` truncated capture to `tdCwXd` (<10) → null → 24-round loop to
2796
+ // run_timeout despite the key being on the page). The verbatim
2797
+ // pageText.includes guard below keeps a sentence's trailing period — or a
2798
+ // stray path/URL fragment — from matching anything not actually on the page.
2799
+ const matches = reason.matchAll(/['"`]([A-Za-z0-9_.+/=\-]{10,80})['"`]/g);
2512
2800
  for (const m of matches) {
2513
2801
  const candidate = m[1];
2514
2802
  if (candidate === undefined)
@@ -2662,7 +2950,7 @@ export function extractAllLabeledTokensFromReason(reason, pageText) {
2662
2950
  // credential-shape (mixed alpha+digit, ≥16 chars, OR a known
2663
2951
  // credential prefix); (2) hard-reject a curated set of common
2664
2952
  // English status words that look label-like in extract prose.
2665
- const quotedRe = new RegExp(`\\b(${labelAltLoose})\\b\\s*[=:]\\s*['"\`]([A-Za-z0-9_.\\-]{4,80})['"\`]`, "gi");
2953
+ const quotedRe = new RegExp(`\\b(${labelAltLoose})\\b\\s*[=:]\\s*['"\`]([A-Za-z0-9_.+/=\\-]{4,80})['"\`]`, "gi");
2666
2954
  for (const m of reason.matchAll(quotedRe)) {
2667
2955
  const rawLabel = (m[1] ?? "").toLowerCase().replace(/[-\s]+/g, "_");
2668
2956
  const normalized = rawLabel.replace(/_+/g, "_");
@@ -2710,7 +2998,7 @@ export function extractAllLabeledTokensFromReason(reason, pageText) {
2710
2998
  // Same separator vocab as quoted, plus optional quotes around the
2711
2999
  // value. The credential-shape + blacklist guards run on the
2712
3000
  // captured (possibly-unquoted) value.
2713
- const proseRe = new RegExp(`\\b(${labelAltLoose})\\b\\s*(?:[=:]|\\b(?:is|are)\\b)\\s*['"\`]?([A-Za-z0-9_.\\-]{4,80})['"\`]?`, "gi");
3001
+ const proseRe = new RegExp(`\\b(${labelAltLoose})\\b\\s*(?:[=:]|\\b(?:is|are)\\b)\\s*['"\`]?([A-Za-z0-9_.+/=\\-]{4,80})['"\`]?`, "gi");
2714
3002
  for (const m of reason.matchAll(proseRe)) {
2715
3003
  const rawLabel = (m[1] ?? "").toLowerCase().replace(/[-\s]+/g, "_");
2716
3004
  const normalized = rawLabel.replace(/_+/g, "_");
@@ -3100,6 +3388,38 @@ export function pickVerificationLinkFromHtml(bodyHtml) {
3100
3388
  }
3101
3389
  return best !== null && best.score > 0 ? best.url : null;
3102
3390
  }
3391
+ // Last-resort verification-link pick: a link in the email that points at the
3392
+ // SERVICE's OWN domain. MEASURED on arize (2026-06-17): the confirm email says
3393
+ // "Click the link in the email", but its confirm link is a click-tracker the
3394
+ // keyword scorers miss, and a spurious number in the body got read as a code →
3395
+ // the bot entered a code on a page with no code field and stalled. A same-
3396
+ // registrable-domain link in a signup email is almost always the confirmation,
3397
+ // so follow it when the keyword scorers came up empty. Skips obvious non-confirm
3398
+ // links (unsubscribe/preferences/privacy/terms/social). Exported for unit tests.
3399
+ export function pickServiceDomainLink(links, serviceHost) {
3400
+ if (serviceHost === null || serviceHost.length === 0)
3401
+ return null;
3402
+ // Compare the last two labels so app.arize.com matches arize.com.
3403
+ const base = (h) => h.toLowerCase().replace(/^www\./, "").split(".").slice(-2).join(".");
3404
+ const target = base(serviceHost);
3405
+ const SKIP = /unsubscribe|preferences|email[_-]?settings|\bmanage\b|privacy|\bterms\b|twitter|linkedin|facebook|instagram|youtube|status\.|\/help\b|\/support\b|\/docs\b/i;
3406
+ for (const raw of links) {
3407
+ let u;
3408
+ try {
3409
+ u = new URL(raw.replace(/&amp;/g, "&"));
3410
+ }
3411
+ catch {
3412
+ continue;
3413
+ }
3414
+ if (u.protocol !== "https:" && u.protocol !== "http:")
3415
+ continue;
3416
+ if (SKIP.test(raw))
3417
+ continue;
3418
+ if (base(u.hostname) === target)
3419
+ return u.href;
3420
+ }
3421
+ return null;
3422
+ }
3103
3423
  // Last-resort verification-CODE extraction from an email body, for the
3104
3424
  // passwordless "we emailed you a code" flow (axiom: "Axiom sign-in
3105
3425
  // verification code") when the inbox parser's parsed_codes came back empty.
@@ -3305,6 +3625,50 @@ export function isLoadingShellText(text) {
3305
3625
  // forever, so it is not a signal.
3306
3626
  return /\bconnecting\b|\bloading\b|please wait|getting things ready|initiali[sz]ing/i.test(text);
3307
3627
  }
3628
+ // The interactive-element count at/above which a page is "hydrated by
3629
+ // definition" — a rendered dashboard/form a user can act on — so a stray
3630
+ // "loading"/"please wait" word in its (visible) text is NOT a hydration
3631
+ // shell. WHY 5: a genuine loading shell paints zero or a handful of chrome
3632
+ // affordances (a logo link, maybe a skip-link); a real authenticated surface
3633
+ // (nav + content + an "API Keys"/"Create" affordance) clears 5 trivially.
3634
+ // Field evidence: luma-ai/unify-ai/sambanova/fireworks-ai/defang carried
3635
+ // 10–95 visible interactive elements yet were flagged a shell EVERY round —
3636
+ // any threshold from ~5 up vetoes all of them while still catching the true
3637
+ // 0-to-few-element shell (northflank). Reuses the same minElements default as
3638
+ // waitForInteractiveDom (5) so the negative gate and the positive readiness
3639
+ // wait agree on what "hydrated" means.
3640
+ export const SHELL_MAX_ELEMENTS = 5;
3641
+ // The authoritative loading-shell decision: a page is a hydration shell only
3642
+ // when loading-text is present in its VISIBLE text AND it has fewer than
3643
+ // SHELL_MAX_ELEMENTS interactive elements. Splitting the two conditions kills
3644
+ // the dominant false positive two ways at once:
3645
+ // 1. visibleText (innerText) drops hidden skeleton/RSC "loading" strings a
3646
+ // raw textContent read picked up;
3647
+ // 2. the inventory veto makes the gate un-fireable on a hydrated page
3648
+ // regardless of any residual stray "loading" word.
3649
+ // Pure + exported for unit tests. The text predicate stays isLoadingShellText
3650
+ // (still used where only text is on hand); this is the call-site gate where
3651
+ // both signals are available.
3652
+ export function isLoadingShell(visibleText, inventoryCount) {
3653
+ if (inventoryCount >= SHELL_MAX_ELEMENTS)
3654
+ return false;
3655
+ return isLoadingShellText(visibleText);
3656
+ }
3657
+ // Thrown from postVerifyLoop when a post-OAuth/post-verify SPA presents a
3658
+ // genuine loading shell that never hydrates within the bounded budget (and a
3659
+ // navigate-to-root retry didn't unstick it). Surfaced as the terminal status
3660
+ // `spa_never_hydrated`. classifyFailure() (skill-schema failure-taxonomy)
3661
+ // has no entry for this kind, so it falls to the deliberate transient default
3662
+ // — a non-demoting outcome (a never-hydrating route is environmental/transient,
3663
+ // not skill rot), and no new exported skill-schema symbol is needed (avoids
3664
+ // the published-dep-skew trap). The leading token before ':' is what
3665
+ // classifyFailure keys on, so the message MUST start with the bare kind.
3666
+ export class SpaNeverHydratedError extends Error {
3667
+ constructor(message) {
3668
+ super(message);
3669
+ this.name = "SpaNeverHydratedError";
3670
+ }
3671
+ }
3308
3672
  // Transient "the session is being established RIGHT NOW" copy. MEASURED on
3309
3673
  // groq (Stytch B2B): after the OAuth callback, /authenticate shows
3310
3674
  // "Logging in…" then "Creating your organization…" for ~5-7s of async
@@ -3348,6 +3712,12 @@ export class SignupAgent {
3348
3712
  // backends_used[i] is the .name string of the LLMClient that produced
3349
3713
  // the i-th reply this run.
3350
3714
  backendsUsed = [];
3715
+ // Fix C4 — the model/provider the backend actually served on the most
3716
+ // recent LLM call, captured per round. callLLM stamps these after every
3717
+ // call; the capture sites read them when dumping a round. Undefined
3718
+ // until the first call (or when the backend doesn't report a model).
3719
+ lastResolvedModel;
3720
+ lastResolvedProvider;
3351
3721
  llmPair;
3352
3722
  // Captcha encounter state for the current run. Updated by the
3353
3723
  // pre/post-submit/re-plan captcha gates in signup(); read by the
@@ -3355,6 +3725,21 @@ export class SignupAgent {
3355
3725
  // because a "blocked" outcome is more diagnostic than an earlier
3356
3726
  // "solved" one and we always want the failure mode in the result.
3357
3727
  captchaEncounter = undefined;
3728
+ // Sticky "this run is on the email path" flag. Set when OAuth turns out to be
3729
+ // login-only (a new identity has no account — Clerk's form_identifier_not_found)
3730
+ // and we fall back to email signup. Without it, the dispatch loop re-runs the
3731
+ // OAuth-first scan after the re-route and re-clicks Google → loops forever
3732
+ // (the cartesia oauth_session_not_persisted bug). Honored by
3733
+ // resolveOAuthCandidates; reset at the start of each signup().
3734
+ committedToEmailPath = false;
3735
+ // One-shot guard for the post-OAuth-callback email fallback. When a Clerk-class
3736
+ // app completes the Google handshake but never persists a session (its callback
3737
+ // silently fails for a brand-new identity driven through sign-IN — the cartesia
3738
+ // root cause WITHOUT the explicit "no account" text), the bot recovers by
3739
+ // creating the account via email instead. This flag keeps that to one attempt so
3740
+ // a service that's genuinely OAuth-only (no email form to fall back to) fails
3741
+ // honestly rather than re-trying forever. Reset at the start of each signup().
3742
+ oauthEmailFallbackTried = false;
3358
3743
  // Invisible-captcha presence for the current run. Cloudflare Turnstile
3359
3744
  // and reCAPTCHA-v3 are score-based: a HIGH score passes silently with no
3360
3745
  // visible widget to "solve", so the visible-gate path above records
@@ -3451,6 +3836,42 @@ export class SignupAgent {
3451
3836
  const minted = await this.browser.triggerInvisibleRecaptcha();
3452
3837
  steps.push(`${label} captcha: invisible reCAPTCHA v3 — ${minted ? "minted score token via grecaptcha.execute()" : "badge present, token not minted (form may submit it itself)"}`);
3453
3838
  }
3839
+ else if (this.captchaSolver?.isAvailable() === true) {
3840
+ // INVISIBLE hCaptcha (huggingface, 2026-06-17): sitekey in the page's
3841
+ // JS config, NO visible widget, but the form REQUIRES an
3842
+ // h-captcha-response token to submit (the wired Tier 3 hCaptcha path
3843
+ // only fires for VISIBLE widgets). Solve via 2Captcha now and inject,
3844
+ // so the imminent submit carries a token instead of a silent reject.
3845
+ // HONEST CAVEAT: a sophisticated host (HF may run Enterprise hCaptcha)
3846
+ // can bind the token to the browser session and reject a solver token
3847
+ // regardless — in which case this surfaces the real wall instead of a
3848
+ // misleading "validation error".
3849
+ const hSitekey = await this.browser.extractHcaptchaSitekey();
3850
+ if (hSitekey !== null) {
3851
+ this.invisibleCaptcha = { kind: "hcaptcha", variant: "hcaptcha" };
3852
+ const pageUrl = (await this.browser.getState().catch(() => null))?.url;
3853
+ if (pageUrl !== undefined && this.captchaSolver !== undefined) {
3854
+ steps.push(`${label} captcha: invisible hCaptcha (sitekey ${hSitekey.slice(0, 10)}…) — solving via 2Captcha before submit`);
3855
+ const solveRes = await this.captchaSolver.solveHcaptcha({
3856
+ sitekey: hSitekey,
3857
+ pageUrl,
3858
+ });
3859
+ if (solveRes.kind === "ok") {
3860
+ const injected = await this.browser.injectHcaptchaToken(solveRes.token);
3861
+ steps.push(injected
3862
+ ? `${label} captcha: invisible hCaptcha solved in ${Math.round(solveRes.durationMs / 1000)}s + token injected`
3863
+ : `${label} captcha: hCaptcha token arrived but injection failed`);
3864
+ if (injected) {
3865
+ return { found: true, solved: true, blocked: false, kind: "hcaptcha" };
3866
+ }
3867
+ }
3868
+ else {
3869
+ steps.push(`${label} captcha: invisible hCaptcha 2Captcha ${solveRes.kind}` +
3870
+ ("reason" in solveRes ? `: ${solveRes.reason}` : ""));
3871
+ }
3872
+ }
3873
+ }
3874
+ }
3454
3875
  }
3455
3876
  return { found: false, solved: false, blocked: false, kind: "turnstile" };
3456
3877
  }
@@ -3650,6 +4071,18 @@ export class SignupAgent {
3650
4071
  // this the scan would re-pick OAuth and loop right back into the
3651
4072
  // same no-account bounce. One-shot equivalent of committedToEmailPath.
3652
4073
  forceFormFill = false) {
4074
+ // FORM_FILL_ENGINE (default-ON since 2026-06-15, strangler slice 3): route the
4075
+ // whole round through the pure decideFormFillStep reducer. Flipped default-on
4076
+ // after live validation showed the engine reaches the correct terminal on
4077
+ // every fillable form (ipinfo full success; cohere/deepinfra/postmark each
4078
+ // reached submit — their failures were downstream verification/extraction or
4079
+ // already-registered, NOT the form-fill phase). The inline loop below is kept
4080
+ // one cycle as the explicit opt-out fallback (FORM_FILL_ENGINE=0/off) and is
4081
+ // deleted next, once a heal pass confirms no per-service regression
4082
+ // (DESIGN-form-fill-engine.md migration step 4).
4083
+ if (!/^(0|false|off|no)$/i.test(process.env.FORM_FILL_ENGINE ?? "")) {
4084
+ return this.planExecuteViaEngine(task, fillValues, steps, forceFormFill);
4085
+ }
3653
4086
  const MAX_ERROR_REPLANS = 2;
3654
4087
  // 0.8.3-rc.1 — widened from 4 to 6 so submit_disabled re-plans
3655
4088
  // get more attempts to identify the gating control. Mailgun's
@@ -3688,10 +4121,23 @@ export class SignupAgent {
3688
4121
  // F14 — selectors the planner clicked WITHOUT advancing the page.
3689
4122
  // Each no-progress plan records its click selectors here; the next
3690
4123
  // plan that picks ONLY selectors in this set is failed as stuck
3691
- // instead of looping. Cleared on any progress (fill action). The
3692
- // Railway run that motivated F14 spun the same footer "Email" link
3693
- // 5 times before timing out; this loop now bails after 2.
4124
+ // instead of looping. Cleared on ANY real progress between two
4125
+ // clicks of the same selector a fill/select/check action OR a
4126
+ // page change (inventory/url moved). The Railway run that motivated
4127
+ // F14 spun the same footer "Email" link 5 times before timing out;
4128
+ // this loop now bails after 2.
3694
4129
  let lastNoProgressClickSelectors = new Set();
4130
+ // Page-state fingerprint from the END of the previous round, used to
4131
+ // decide whether the page actually moved between rounds. A
4132
+ // "fill field → submit → (validation error) → fix field → submit
4133
+ // again" cycle is legitimate progress, NOT a loop: kinde's post-OAuth
4134
+ // register form has a globally-unique "domain" field, so the first
4135
+ // guess collides ("taken") and the bot must edit the field and
4136
+ // re-click the SAME "Next" button. Without this, re-clicking the same
4137
+ // selector after a genuine field edit (or any inventory/url change)
4138
+ // false-bailed as planner_loop even though the intervening fill was
4139
+ // real progress. (MEASURED 2026-06-13, kinde, terminal_round 3.)
4140
+ let lastRoundPageSig = null;
3695
4141
  // rc.31 — once the bot has explicitly clicked an email-flow
3696
4142
  // button (e.g. Railway's "Log in using email" two-stage chooser),
3697
4143
  // stay on the email path. Without this, the auto-OAuth-first
@@ -4057,16 +4503,40 @@ export class SignupAgent {
4057
4503
  steps.push("Form-fill planner described a logged-in product/billing page (not a signup form) — pivoting to post-verify navigation");
4058
4504
  return { kind: "already_oauth" };
4059
4505
  }
4506
+ // The page moved since the previous round if the URL changed or the
4507
+ // set of interactive selectors changed (a field gained/lost, a
4508
+ // validation message toggled an element, a wizard step advanced).
4509
+ // ANY such change means whatever the planner did last round was real
4510
+ // progress — clear the no-progress memory so a re-click of a
4511
+ // previously-"dead" selector on the now-changed page isn't judged a
4512
+ // loop. This is the unique-value-retry case (kinde domain field):
4513
+ // edit field → page re-renders → re-click "Next" is legitimate.
4514
+ const pageSig = state.url +
4515
+ "§" +
4516
+ inventory
4517
+ .map((e) => e.selector)
4518
+ .sort()
4519
+ .join("|");
4520
+ if (lastRoundPageSig !== null && pageSig !== lastRoundPageSig) {
4521
+ lastNoProgressClickSelectors = new Set();
4522
+ }
4523
+ lastRoundPageSig = pageSig;
4060
4524
  // F14 — stuck-detection: if the plan picks ONLY click selectors
4061
4525
  // we already tried in the previous round without page progress,
4062
4526
  // it's a planner loop. Fail planning_failed with the offending
4063
4527
  // selector(s) so the operator sees what stalled. Doesn't fire
4064
4528
  // when the plan adds at least one new selector (legitimate
4065
- // exploration). Doesn't fire on fill plans (forward progress).
4529
+ // exploration). Doesn't fire on fill plans (forward progress),
4530
+ // nor on a plan that ALSO edits a field this round (a fill/check
4531
+ // alongside the re-click is real progress — kinde's "tick the
4532
+ // required box + re-click Next" advances the form even though the
4533
+ // Next selector repeats).
4066
4534
  const planClickSelectors = plan.actions
4067
4535
  .filter((a) => a.kind === "click")
4068
4536
  .map((a) => a.selector);
4069
- if (planClickSelectors.length > 0 &&
4537
+ const planEditsAField = plan.actions.some((a) => a.kind === "fill" || a.kind === "check");
4538
+ if (!planEditsAField &&
4539
+ planClickSelectors.length > 0 &&
4070
4540
  lastNoProgressClickSelectors.size > 0 &&
4071
4541
  planClickSelectors.every((s) => lastNoProgressClickSelectors.has(s))) {
4072
4542
  return {
@@ -4120,6 +4590,16 @@ export class SignupAgent {
4120
4590
  // static page won't help, so a second consecutive empty plan is
4121
4591
  // a dead end. (The 0.1.12 loop spun this 4x on Axiom.)
4122
4592
  const hadFill = plan.actions.some((a) => a.kind === "fill");
4593
+ // A check is ALSO a field edit = real progress, even though (unlike
4594
+ // a fill) it doesn't promote the plan to the submit path below.
4595
+ // (The form-fill plan vocabulary is fill/check/click — `select`
4596
+ // belongs to the post-verify loop.) Treat a check as progress for
4597
+ // the no-progress tracker only: a plan that ticked a box advanced
4598
+ // the form, so its click selectors must NOT be recorded as "dead"
4599
+ // (and any prior dead record is cleared). Without this, a "click
4600
+ // Next (no advance) → tick a required box + re-click Next" cycle
4601
+ // false-bailed as a loop even though the check was progress.
4602
+ const hadFieldEdit = plan.actions.some((a) => a.kind === "fill" || a.kind === "check");
4123
4603
  if (!hadFill) {
4124
4604
  if (plan.actions.length === 0) {
4125
4605
  emptyPlans += 1;
@@ -4142,8 +4622,12 @@ export class SignupAgent {
4142
4622
  // F14 — record the click selectors that didn't advance the
4143
4623
  // page. The next plan's stuck-detection check (above) bails
4144
4624
  // if it picks the same ones again. Hint also tells the
4145
- // planner which selectors NOT to re-pick.
4146
- lastNoProgressClickSelectors = new Set(planClickSelectors);
4625
+ // planner which selectors NOT to re-pick. A plan that ALSO made
4626
+ // a field edit (select/check) made real progress, so clear the
4627
+ // tracker instead of recording its clicks as dead.
4628
+ lastNoProgressClickSelectors = hadFieldEdit
4629
+ ? new Set()
4630
+ : new Set(planClickSelectors);
4147
4631
  const avoidHint = planClickSelectors.length > 0
4148
4632
  ? ` AVOID these selectors — they were clicked but the page did NOT advance: ${planClickSelectors.map((s) => JSON.stringify(s)).join(", ")}.`
4149
4633
  : "";
@@ -4268,8 +4752,30 @@ export class SignupAgent {
4268
4752
  // the next planner iteration handles SPA settle.
4269
4753
  await this.browser.wait(2);
4270
4754
  const postGate = await this.runCaptchaGate("Post-submit", steps);
4271
- if (postGate.blocked)
4755
+ if (postGate.blocked) {
4756
+ // A managed/invisible Turnstile (Clerk's Smart CAPTCHA) resolves
4757
+ // SERVER-SIDE: the submit can succeed — account created, verification
4758
+ // email sent — even though our client-side token poll timed out.
4759
+ // cartesia PROVED this: it emailed a verification code AFTER the bot had
4760
+ // bailed captcha_blocked. The ground truth of "did the submit go
4761
+ // through" is the INBOX, not the client token. So for a POST-submit
4762
+ // Turnstile with an inbox available, don't hard-bail: proceed to the
4763
+ // verification step and let the inbox poll arbitrate — a code arriving
4764
+ // proves the managed Turnstile passed (→ completes); no code surfaces
4765
+ // an honest verification_not_sent rather than a false captcha_blocked.
4766
+ // A genuine pre-submit gate (no inbox, or a non-Turnstile challenge)
4767
+ // still bails captcha_blocked.
4768
+ if (postGate.kind === "turnstile" && task.inbox !== undefined) {
4769
+ steps.push("Post-submit Turnstile token didn't populate — but a managed Turnstile resolves " +
4770
+ "server-side, so the submit may have gone through. Proceeding to verification; " +
4771
+ "the inbox poll arbitrates (a code = submit succeeded).");
4772
+ // Don't let the recorded block short-circuit later gates / the result.
4773
+ this.captchaEncounter = undefined;
4774
+ await this.captureSignupFormRounds(task.service, plan, inventory, fillValues);
4775
+ return { kind: "submitted" };
4776
+ }
4272
4777
  return { kind: "captcha_blocked", captchaKind: postGate.kind };
4778
+ }
4273
4779
  if (postGate.found && postGate.solved) {
4274
4780
  // Re-click submit so the populated token ships with the form.
4275
4781
  try {
@@ -4304,6 +4810,416 @@ export class SignupAgent {
4304
4810
  return { kind: "submitted" };
4305
4811
  }
4306
4812
  }
4813
+ // FORM_FILL_ENGINE path (strangler slice 3) — the same round as
4814
+ // planExecuteWithRetry, but every DECISION goes through the pure
4815
+ // decideFormFillStep reducer (form-fill.ts); this method owns only the I/O and
4816
+ // the replan-hint CONTENT. Faithful to the inline loop; reuses its helpers.
4817
+ async planExecuteViaEngine(task, fillValues, steps, forceFormFill) {
4818
+ let state = initialFormFillState(forceFormFill);
4819
+ let hint;
4820
+ // Map a reducer terminal outcome to PlanExecOutcome. needs_oauth_provider_session
4821
+ // + oauth carry provider IDs the executor holds in typed form — pass those in.
4822
+ const toPlanExec = (outcome, typed) => {
4823
+ switch (outcome.kind) {
4824
+ case "oauth":
4825
+ return { kind: "oauth", selector: typed.oauth.selector, provider: typed.oauth.provider };
4826
+ case "needs_oauth_provider_session":
4827
+ return {
4828
+ kind: "needs_oauth_provider_session",
4829
+ missingProviders: typed.missingProviders,
4830
+ haveSessions: typed.haveSessions,
4831
+ };
4832
+ default:
4833
+ return outcome;
4834
+ }
4835
+ };
4836
+ const oauthCandidates = await this.resolveOAuthCandidates(task, steps);
4837
+ for (;;) {
4838
+ await this.browser.waitForFormReady();
4839
+ const dismissed = await this.browser.dismissConsentBanner();
4840
+ if (dismissed !== null)
4841
+ steps.push(`Dismissed cookie consent: "${dismissed}"`);
4842
+ await saveDebugSnapshot(this.browser, "before-fill");
4843
+ const [browserState, inventory] = await Promise.all([
4844
+ this.browser.getState(),
4845
+ this.buildInventory(steps, oauthCandidates),
4846
+ ]);
4847
+ // ── C1 pre_plan: gather the observation, then decide ──
4848
+ const hasFillableInput = inventory.some((e) => e.tag === "input" &&
4849
+ (e.type === "email" || e.type === "text" || e.type === "password" || e.type === null) &&
4850
+ e.visible !== false);
4851
+ const wallAlias = extractVerifyWallAlias(browserState.html);
4852
+ const ourInboxDomain = task.email.slice(task.email.indexOf("@") + 1).toLowerCase();
4853
+ const aliasPollable = wallAlias === null ||
4854
+ wallAlias.slice(wallAlias.indexOf("@") + 1).toLowerCase() === ourInboxDomain;
4855
+ const oauthButtonHitRaw = findFirstOAuthButton(inventory, oauthCandidates);
4856
+ const offersOAuthSignup = oauthCandidates.length > 0 && oauthButtonHitRaw !== null;
4857
+ const verifyWall = !hasFillableInput &&
4858
+ expectsVerificationEmail(browserState.html) &&
4859
+ aliasPollable &&
4860
+ !offersOAuthSignup;
4861
+ const hasCredentialInput = inventory.some((e) => e.tag === "input" && (e.type === "email" || e.type === "password" || e.type === "tel"));
4862
+ // LAZY (parity with inline agent.ts:4823): the loading-shell check calls
4863
+ // extractText() — an I/O read — so only compute it when the OAuth-scan
4864
+ // branch will actually consult it (candidates present, NOT committed, and
4865
+ // no provider button hit yet). Computing it unconditionally would fire a
4866
+ // spurious extractText() every round and diverge from the inline path.
4867
+ const needScanShell = oauthCandidates.length > 0 && !state.committedToEmailPath && oauthButtonHitRaw === null;
4868
+ const oauthScanShell = needScanShell &&
4869
+ (inventory.length <= 1 ||
4870
+ !hasCredentialInput ||
4871
+ isLoadingShellText(await this.browser.extractText().catch(() => "")));
4872
+ const signInAdvance = findSignInAdvanceButton(inventory, oauthCandidates);
4873
+ const antiBotVendor = inventory.length < 10 ? detectAntiBotBlock(browserState.html) : null;
4874
+ const oauthOnly = isOauthOnlyChooser(inventory);
4875
+ let missingProviders = [];
4876
+ let haveSessions = [];
4877
+ if (oauthOnly) {
4878
+ const visibleProviders = detectOAuthProvidersInInventory(inventory);
4879
+ haveSessions = await this.effectiveLoggedInProviders();
4880
+ missingProviders = visibleProviders.filter((p) => !haveSessions.includes(p));
4881
+ }
4882
+ const preObs = {
4883
+ checkpoint: "pre_plan",
4884
+ hasFillableInput,
4885
+ verifyWall,
4886
+ codeGate: isVerificationCodeGate(inventory, browserState.html),
4887
+ oauthCandidatesPresent: oauthCandidates.length > 0,
4888
+ oauthButtonHit: oauthButtonHitRaw !== null
4889
+ ? { selector: oauthButtonHitRaw.button.selector, provider: oauthButtonHitRaw.provider }
4890
+ : null,
4891
+ oauthScanShell,
4892
+ alreadySignedIn: detectAlreadySignedIn({ inventory, url: browserState.url }),
4893
+ signInAdvancePresent: signInAdvance !== null,
4894
+ antiBotVendor,
4895
+ oauthOnly,
4896
+ oauthOnlyMissingProviders: missingProviders,
4897
+ oauthOnlyHaveSessions: haveSessions,
4898
+ };
4899
+ const pre = decideFormFillStep(state, preObs);
4900
+ state = pre.nextState;
4901
+ const preAct = pre.action;
4902
+ if (preAct.kind === "route_to_verification") {
4903
+ this.pendingVerificationAlias = wallAlias;
4904
+ steps.push(`Form: email-verification wall (no fields to fill${wallAlias !== null ? `, check ${wallAlias}` : ""}) — ` +
4905
+ `routing to the inbox-poll + verification-link flow.`);
4906
+ const resend = inventory.find((e) => {
4907
+ if (e.tag !== "button" && e.tag !== "a")
4908
+ return false;
4909
+ const t = `${e.visibleText ?? ""} ${e.ariaLabel ?? ""}`.toLowerCase();
4910
+ return /resend (?:verification )?(?:email|link)|send (?:it )?again/.test(t);
4911
+ });
4912
+ if (resend !== undefined) {
4913
+ try {
4914
+ await this.browser.click(resend.selector);
4915
+ steps.push(`Form: clicked "Resend verification email" to refresh the link.`);
4916
+ await this.browser.wait(2);
4917
+ }
4918
+ catch {
4919
+ // non-fatal
4920
+ }
4921
+ }
4922
+ return { kind: "submitted" };
4923
+ }
4924
+ if (preAct.kind === "terminal") {
4925
+ if (preAct.outcome.kind === "oauth" && oauthButtonHitRaw !== null) {
4926
+ const label = OAUTH_PROVIDERS[oauthButtonHitRaw.provider].label;
4927
+ steps.push(`OAuth-first: found a ${label} sign-in affordance ` +
4928
+ `(${JSON.stringify(oauthButtonHitRaw.button.visibleText ?? oauthButtonHitRaw.button.ariaLabel ?? label)}) ` +
4929
+ `— taking the OAuth path`);
4930
+ }
4931
+ return toPlanExec(preAct.outcome, {
4932
+ oauth: oauthButtonHitRaw !== null
4933
+ ? { selector: oauthButtonHitRaw.button.selector, provider: oauthButtonHitRaw.provider }
4934
+ : undefined,
4935
+ missingProviders,
4936
+ haveSessions,
4937
+ });
4938
+ }
4939
+ if (preAct.kind === "oauth_scan_wait") {
4940
+ steps.push(`OAuth-first[engine]: no provider affordance yet — waiting for async render ` +
4941
+ `(retry ${state.oauthScanRetries}${oauthScanShell ? ", loading shell" : ""})`);
4942
+ await this.browser.wait(3);
4943
+ continue;
4944
+ }
4945
+ if (preAct.kind === "oauth_shell_reload") {
4946
+ steps.push(`OAuth-first[engine]: page stuck as a loading shell — reloading once to unstick the SPA`);
4947
+ try {
4948
+ await this.browser.goto(this.browser.currentUrl());
4949
+ await this.browser.waitForFormReady();
4950
+ }
4951
+ catch {
4952
+ // reload failed — re-loop and let the terminal handling take over
4953
+ }
4954
+ continue;
4955
+ }
4956
+ if (preAct.kind === "sign_in_advance" && signInAdvance !== null) {
4957
+ steps.push(`OAuth-first: no provider affordance, but found a generic ` +
4958
+ `sign-in affordance (${JSON.stringify(signInAdvance.visibleText ?? signInAdvance.ariaLabel ?? "")}) ` +
4959
+ `— clicking it to advance to the real login page ` +
4960
+ `(${state.signInAdvanceClicks}/${B_FF.MAX_SIGN_IN_ADVANCE_CLICKS})`);
4961
+ try {
4962
+ await this.browser.click(signInAdvance.selector);
4963
+ }
4964
+ catch (err) {
4965
+ steps.push(`OAuth-first[engine]: sign-in advance click failed (${err instanceof Error ? err.message : String(err)})`);
4966
+ }
4967
+ continue;
4968
+ }
4969
+ // preAct.kind === "run_planner" → fall through to the planner.
4970
+ steps.push("Asking Claude to plan the signup form fill...");
4971
+ let plan;
4972
+ try {
4973
+ plan = await this.planSignupForm({
4974
+ service: task.service,
4975
+ url: browserState.url,
4976
+ inventory,
4977
+ screenshot: browserState.screenshot,
4978
+ ...(hint !== undefined ? { hint } : {}),
4979
+ });
4980
+ }
4981
+ catch (err) {
4982
+ // ── C2 plan_error ──
4983
+ const reason = err instanceof Error ? err.message : String(err);
4984
+ const isUpstreamBlip = /\b50[234]\b/.test(reason) ||
4985
+ /\bupstream_(?:error|unreachable)\b/i.test(reason) ||
4986
+ /\bnetwork error\b/i.test(reason);
4987
+ const pe = decideFormFillStep(state, { checkpoint: "plan_error", isUpstreamBlip, reason });
4988
+ state = pe.nextState;
4989
+ if (pe.action.kind === "terminal")
4990
+ return toPlanExec(pe.action.outcome);
4991
+ if (pe.action.kind === "blip_retry") {
4992
+ steps.push(`⚠ planner request hit a transient upstream blip (${reason}) — retrying`);
4993
+ await this.browser.wait(2);
4994
+ continue;
4995
+ }
4996
+ // replan (selector_not_in_inventory)
4997
+ steps.push(`⚠ plan rejected (${reason}) — re-planning`);
4998
+ hint =
4999
+ "Your previous plan used a selector not in the inventory. Use ONLY selectors copied verbatim from a `selector=` field.";
5000
+ continue;
5001
+ }
5002
+ steps.push(`Plan: ${plan.actions.length} action(s), confidence=${plan.confidence}` +
5003
+ (plan.notes !== undefined ? ` — ${plan.notes}` : ""));
5004
+ // ── C3 post_plan ──
5005
+ const planClickSelectors = plan.actions
5006
+ .filter((a) => a.kind === "click")
5007
+ .map((a) => a.selector);
5008
+ const planEditsAField = plan.actions.some((a) => a.kind === "fill" || a.kind === "check");
5009
+ const pageSig = browserState.url + "§" + inventory.map((e) => e.selector).sort().join("|");
5010
+ const bySelector = new Map(inventory.map((e) => [e.selector, e]));
5011
+ const miss = await this.verifyPlan(plan, bySelector);
5012
+ const post = decideFormFillStep(state, {
5013
+ checkpoint: "post_plan",
5014
+ isDashboard: detectFormFillIsDashboard(plan),
5015
+ pageSig,
5016
+ planClickSelectors,
5017
+ planEditsAField,
5018
+ verifyMiss: miss,
5019
+ verifyMissNotCheckbox: miss !== null && miss.includes("not a checkbox"),
5020
+ });
5021
+ state = post.nextState;
5022
+ if (post.action.kind === "terminal") {
5023
+ if (post.action.outcome.kind === "planning_failed") {
5024
+ steps.push(`Form[engine]: post-plan → planning_failed (${post.action.outcome.reason})`);
5025
+ }
5026
+ return toPlanExec(post.action.outcome);
5027
+ }
5028
+ if (post.action.kind === "replan") {
5029
+ if (post.action.hintKind === "drop_the_check") {
5030
+ steps.push(`⚠ planned selectors did not verify (${miss}) — re-planning`);
5031
+ hint =
5032
+ `These selectors did not resolve correctly: ${miss}. Pick different inventory entries.` +
5033
+ " If the inventory has NO input of type=checkbox, OMIT the check" +
5034
+ " action entirely — do not substitute a link or a button. The" +
5035
+ " agreement may be implicit or pre-accepted.";
5036
+ }
5037
+ else {
5038
+ steps.push(`⚠ planned selectors did not verify (${miss}) — re-planning`);
5039
+ hint = `These selectors did not resolve correctly: ${miss}. Pick different inventory entries.`;
5040
+ }
5041
+ continue;
5042
+ }
5043
+ // post.action.kind === "execute_plan"
5044
+ await this.executePlan(plan, fillValues, steps, bySelector);
5045
+ // ── C4 post_execute ──
5046
+ const hadFill = plan.actions.some((a) => a.kind === "fill");
5047
+ const hadFieldEdit = plan.actions.some((a) => a.kind === "fill" || a.kind === "check");
5048
+ const clickedEmailAffordance = plan.actions.some((a) => a.kind === "click" && /\bemail\b/i.test(a.reason));
5049
+ const wasCommitted = state.committedToEmailPath;
5050
+ const px = decideFormFillStep(state, {
5051
+ checkpoint: "post_execute",
5052
+ clickedEmailAffordance,
5053
+ planClickSelectors,
5054
+ hadFill,
5055
+ hadFieldEdit,
5056
+ planActionCount: plan.actions.length,
5057
+ });
5058
+ state = px.nextState;
5059
+ if (!wasCommitted && state.committedToEmailPath) {
5060
+ steps.push("Committed to email-fill path — auto-OAuth-first scan suppressed for the rest of this signup");
5061
+ }
5062
+ if (px.action.kind === "terminal") {
5063
+ steps.push(`Form[engine]: post-execute → planning_failed`);
5064
+ return toPlanExec(px.action.outcome);
5065
+ }
5066
+ if (px.action.kind === "replan") {
5067
+ const avoidHint = planClickSelectors.length > 0
5068
+ ? ` AVOID these selectors — they were clicked but the page did NOT advance: ${planClickSelectors.map((s) => JSON.stringify(s)).join(", ")}.`
5069
+ : "";
5070
+ steps.push(plan.actions.length === 0
5071
+ ? "Plan found nothing to act on — re-checking once for a late render"
5072
+ : "Plan only revealed the page — re-planning the now-visible form");
5073
+ hint =
5074
+ "The previous step revealed or advanced the page. Plan the signup form that should now be visible." +
5075
+ avoidHint;
5076
+ continue;
5077
+ }
5078
+ // px.action.kind === "submit"
5079
+ const agreementBoxes = await this.browser.checkRequiredAgreementBoxes();
5080
+ if (agreementBoxes.length > 0) {
5081
+ steps.push(`Form: checked required agreement box(es): [${agreementBoxes.join(", ")}]`);
5082
+ }
5083
+ // ── C4 post_submit: gather facts incrementally (the reducer's priority-order
5084
+ // checks make an early-blocking pre-gate correct even with default tails). ──
5085
+ const preGate = await this.runCaptchaGate("Pre-submit", steps);
5086
+ let submitError = null;
5087
+ let submitDisabled = false;
5088
+ let submitTimeout = false;
5089
+ let postGateBlocked = false;
5090
+ let postGateKind = "";
5091
+ let validationFailure = false;
5092
+ if (!preGate.blocked) {
5093
+ steps.push(`Submit → ${plan.submit_selector}`);
5094
+ try {
5095
+ await this.browser.clickSubmit(plan.submit_selector);
5096
+ }
5097
+ catch (err) {
5098
+ submitError = err instanceof Error ? err.message : String(err);
5099
+ submitDisabled = submitError.startsWith("submit_disabled");
5100
+ submitTimeout = !submitDisabled && isSubmitTimeout(submitError);
5101
+ }
5102
+ if (submitError === null) {
5103
+ await this.browser.wait(2);
5104
+ const postGate = await this.runCaptchaGate("Post-submit", steps);
5105
+ postGateBlocked = postGate.blocked;
5106
+ postGateKind = postGate.kind;
5107
+ if (!postGate.blocked && postGate.found && postGate.solved) {
5108
+ try {
5109
+ await this.browser.click(plan.submit_selector);
5110
+ await this.browser.wait(3);
5111
+ }
5112
+ catch (err) {
5113
+ steps.push(`⚠ post-captcha submit retry failed: ${err instanceof Error ? err.message : String(err)}`);
5114
+ }
5115
+ }
5116
+ if (!postGate.blocked) {
5117
+ const afterText = (await this.browser.extractText()).slice(0, 4000);
5118
+ validationFailure = this.looksLikeValidationFailure(afterText);
5119
+ if (validationFailure)
5120
+ hint = `The previous submit produced validation errors. Visible page text: ${afterText.slice(0, 600)}`;
5121
+ }
5122
+ }
5123
+ }
5124
+ const ps = decideFormFillStep(state, {
5125
+ checkpoint: "post_submit",
5126
+ preGateBlocked: preGate.blocked,
5127
+ preGateKind: preGate.kind,
5128
+ submitError,
5129
+ submitDisabled,
5130
+ submitTimeout,
5131
+ postGateBlocked,
5132
+ postGateKind,
5133
+ hasInbox: task.inbox !== undefined,
5134
+ validationFailure,
5135
+ });
5136
+ state = ps.nextState;
5137
+ if (ps.action.kind === "replan") {
5138
+ if (ps.action.hintKind === "submit_disabled") {
5139
+ steps.push(`⚠ ${submitError} — re-planning to satisfy it`);
5140
+ hint = await this.buildSubmitDisabledHint(steps);
5141
+ }
5142
+ else if (ps.action.hintKind === "submit_went_stale") {
5143
+ steps.push(`⚠ submit selector went stale — the page likely advanced; re-planning`);
5144
+ hint =
5145
+ "The submit button selected last round was no longer present when " +
5146
+ "we tried to click it — an earlier action probably advanced the page. " +
5147
+ "Re-read the now-visible form and plan the next step (pick the submit " +
5148
+ "button that is actually on the current screen).";
5149
+ }
5150
+ else {
5151
+ steps.push("Post-submit validation errors — re-planning");
5152
+ // hint already set above from afterText
5153
+ }
5154
+ continue;
5155
+ }
5156
+ if (ps.action.kind === "terminal") {
5157
+ // Match the inline step trail: a genuine (non-disabled, non-timeout)
5158
+ // submit error logs "submit click failed" before failing.
5159
+ if (ps.action.outcome.kind === "submit_failed" &&
5160
+ submitError !== null &&
5161
+ !submitDisabled &&
5162
+ !submitTimeout) {
5163
+ steps.push(`⚠ submit click failed: ${submitError}`);
5164
+ }
5165
+ if (ps.action.outcome.kind === "submitted" && postGateBlocked && postGateKind === "turnstile") {
5166
+ // managed-Turnstile + inbox flip: clear the recorded block so it can't
5167
+ // short-circuit a later gate, and capture the form rounds.
5168
+ steps.push("Post-submit Turnstile token didn't populate — managed Turnstile resolves server-side; " +
5169
+ "proceeding to verification (the inbox poll arbitrates).");
5170
+ this.captchaEncounter = undefined;
5171
+ }
5172
+ if (ps.action.outcome.kind === "submitted") {
5173
+ await this.captureSignupFormRounds(task.service, plan, inventory, fillValues);
5174
+ }
5175
+ return toPlanExec(ps.action.outcome);
5176
+ }
5177
+ }
5178
+ }
5179
+ // The submit_disabled replan hint CONTENT (review Q2 — the executor owns this;
5180
+ // the reducer only emits the intent). A fresh inventory snapshot lists concrete
5181
+ // unchecked-checkbox + empty-input candidates so the planner picks one
5182
+ // immediately. Best-effort: a snapshot failure falls back to the generic prose.
5183
+ async buildSubmitDisabledHint(steps) {
5184
+ let uncheckedHint = "";
5185
+ let emptyInputHint = "";
5186
+ try {
5187
+ const snapshotInv = await this.buildInventory(steps, undefined, 60);
5188
+ const unchecked = snapshotInv.filter((e) => e.tag === "input" &&
5189
+ (e.type === "checkbox" || e.role === "checkbox") &&
5190
+ e.checked === false &&
5191
+ e.visible === true);
5192
+ if (unchecked.length > 0) {
5193
+ const lines = unchecked.slice(0, 6).map((e) => {
5194
+ const label = (e.labelText ?? e.ariaLabel ?? e.placeholder ?? e.name ?? "(no label)").toString().slice(0, 60);
5195
+ return ` - selector ${JSON.stringify(e.selector)} label=${JSON.stringify(label)}`;
5196
+ });
5197
+ uncheckedHint = `\nUnchecked checkboxes visible on the page:\n${lines.join("\n")}`;
5198
+ }
5199
+ const emptyInputs = snapshotInv.filter((e) => e.tag === "input" &&
5200
+ e.type !== "checkbox" &&
5201
+ e.type !== "radio" &&
5202
+ e.type !== "hidden" &&
5203
+ (e.value === null || e.value === "") &&
5204
+ e.visible === true);
5205
+ if (emptyInputs.length > 0) {
5206
+ const lines = emptyInputs.slice(0, 6).map((e) => {
5207
+ const label = (e.labelText ?? e.placeholder ?? e.ariaLabel ?? e.name ?? "(no label)").toString().slice(0, 60);
5208
+ return ` - selector ${JSON.stringify(e.selector)} label=${JSON.stringify(label)}`;
5209
+ });
5210
+ emptyInputHint = `\nEmpty visible inputs (any could be the unmet required field):\n${lines.join("\n")}`;
5211
+ }
5212
+ }
5213
+ catch {
5214
+ // best-effort
5215
+ }
5216
+ return ("The submit button is disabled — a required field or an agreement " +
5217
+ "was not satisfied. Issue {\"kind\":\"check\"} on an unchecked " +
5218
+ "agreement/terms checkbox, OR {\"kind\":\"fill\"} on an empty " +
5219
+ "required input. Do NOT click a link." +
5220
+ uncheckedHint +
5221
+ emptyInputHint);
5222
+ }
4307
5223
  // Emit the signup-form-fill rounds (email + password + submit) into the
4308
5224
  // capture chain. Shares this.captureChainRound with the post-verify loop
4309
5225
  // so the two phases form one contiguous 0..N chain. The captured email
@@ -4329,6 +5245,11 @@ export class SignupAgent {
4329
5245
  state,
4330
5246
  inventory,
4331
5247
  observed,
5248
+ // Fix C4 — the form-plan's backend (planSignupForm ran before
5249
+ // this synthetic preamble capture, so lastResolved* still reflect
5250
+ // it). These preamble rounds replay the one plan; one backend.
5251
+ ...(this.lastResolvedModel !== undefined ? { resolved_model: this.lastResolvedModel } : {}),
5252
+ ...(this.lastResolvedProvider !== undefined ? { resolved_provider: this.lastResolvedProvider } : {}),
4332
5253
  });
4333
5254
  this.captureChainRound += 1;
4334
5255
  };
@@ -4570,8 +5491,10 @@ export class SignupAgent {
4570
5491
  return [...new Set([...fromMarker, ...live])];
4571
5492
  }
4572
5493
  async resolveOAuthCandidates(task, steps) {
4573
- if (task.forceForm === true) {
4574
- steps.push("Force-form: OAuth-first scan suppressed — taking the email/password path");
5494
+ if (task.forceForm === true || this.committedToEmailPath) {
5495
+ steps.push(this.committedToEmailPath
5496
+ ? "Committed to email path (OAuth was login-only) — OAuth-first scan suppressed"
5497
+ : "Force-form: OAuth-first scan suppressed — taking the email/password path");
4575
5498
  return [];
4576
5499
  }
4577
5500
  const ordered = orderOAuthCandidates(task.oauthProvider, await this.effectiveLoggedInProviders());
@@ -4735,14 +5658,31 @@ export class SignupAgent {
4735
5658
  // permission scope, 5 short of the API key. Failed calls produce
4736
5659
  // no progress; charging them against the budget is wrong. Behave
4737
5660
  // like a meter: only count consumption that actually delivered.
5661
+ // Text-only planner experiment (BOT_PLANNER_TEXT_ONLY, default-off).
5662
+ // The DOM inventory is the authoritative action space — the planner
5663
+ // may only pick a selector the bot supplied, so the screenshot can
5664
+ // never expand the move set. This strips the screenshot from
5665
+ // NAVIGATION-PLANNER calls (deterministic=true) ONLY, leaving genuine
5666
+ // vision calls (2SV number-read, on-screen key extraction) untouched,
5667
+ // to measure whether the image earns its latency + token cost.
5668
+ const plannerTextOnly = args.deterministic === true &&
5669
+ /^(1|true|on)$/i.test(process.env.BOT_PLANNER_TEXT_ONLY ?? "");
5670
+ const userBlocks = plannerTextOnly
5671
+ ? args.userBlocks.filter((b) => b.kind !== "image")
5672
+ : args.userBlocks;
4738
5673
  const resp = await client.createMessage({
4739
5674
  system: args.system,
4740
- user: args.userBlocks,
5675
+ user: userBlocks,
4741
5676
  max_tokens: args.maxTokens,
4742
5677
  ...(args.temperature !== undefined ? { temperature: args.temperature } : {}),
5678
+ ...(args.deterministic === true ? { deterministic: true } : {}),
4743
5679
  });
4744
5680
  this.llmCallCount += 1;
4745
5681
  this.backendsUsed.push(resp.backend);
5682
+ // Fix C4 — remember the served model/provider so the capture sites
5683
+ // can stamp this round with what actually produced the plan.
5684
+ this.lastResolvedModel = resp.resolved_model;
5685
+ this.lastResolvedProvider = resp.resolved_provider;
4746
5686
  return resp.text;
4747
5687
  };
4748
5688
  const primaryRaw = await callOne(this.llmPair.primary);
@@ -4825,6 +5765,9 @@ export class SignupAgent {
4825
5765
  // (Google number-match etc.). Without it, the run still works —
4826
5766
  // steps are just only visible in the final result.
4827
5767
  const steps = task.stepsSink ?? [];
5768
+ // Fresh per-run: don't let a prior run's email-path commitment leak.
5769
+ this.committedToEmailPath = false;
5770
+ this.oauthEmailFallbackTried = false;
4828
5771
  // Stash the service name so the diagnostic uploader (called from
4829
5772
  // deep inside postVerifyLoop after a failed extract) can label
4830
5773
  // the snapshot without us threading task through every method.
@@ -5318,6 +6261,10 @@ export class SignupAgent {
5318
6261
  // /signup form), fill it IN PLACE — re-navigating to task.signupUrl
5319
6262
  // could bounce back to the demo. Otherwise re-navigate (the
5320
6263
  // login-only / no-account case left us on a /login page).
6264
+ // OAuth was login-only (no account for this identity). Commit to the
6265
+ // email path for the rest of the run so the dispatch loop's
6266
+ // OAuth-first scan doesn't re-click Google and loop.
6267
+ this.committedToEmailPath = true;
5321
6268
  const onSignupFormHtml = (await this.browser.getState().catch(() => null))?.html ?? "";
5322
6269
  if (classifySignupHtml(onSignupFormHtml) === "signup") {
5323
6270
  steps.push(`OAuth recovery already on a signup form ` +
@@ -5484,7 +6431,7 @@ export class SignupAgent {
5484
6431
  // created, so transactional mail is plausibly inbound and
5485
6432
  // can outlast the 45s probe; bounded below the full timeout).
5486
6433
  const verificationTimeoutSeconds = expectsEmail
5487
- ? (task.verificationTimeoutSeconds ?? 180)
6434
+ ? (task.verificationTimeoutSeconds ?? VERIFY_EMAIL_CEILING_SECONDS)
5488
6435
  : submitRejected
5489
6436
  ? VERIFICATION_PROBE_SECONDS
5490
6437
  : SUBMITTED_PROBE_FLOOR_SECONDS;
@@ -5500,36 +6447,108 @@ export class SignupAgent {
5500
6447
  // URL-keyword scorer first; if it can't see past a click-tracker
5501
6448
  // wrapper, fall back to matching the link's ANCHOR TEXT in the
5502
6449
  // HTML body (amplitude's SendGrid-wrapped "Activate account").
6450
+ // The service's own host — for the same-domain link fallback
6451
+ // below. The current page IS the service (its confirm-email
6452
+ // wall), so its URL is the most reliable source; task.signupUrl
6453
+ // backs it up.
6454
+ let serviceHost = null;
6455
+ try {
6456
+ serviceHost = new URL((await this.browser.getState()).url).hostname;
6457
+ }
6458
+ catch {
6459
+ /* fall through to signupUrl */
6460
+ }
6461
+ if (serviceHost === null && task.signupUrl !== undefined) {
6462
+ try {
6463
+ serviceHost = new URL(task.signupUrl).hostname;
6464
+ }
6465
+ catch {
6466
+ /* leave null */
6467
+ }
6468
+ }
5503
6469
  const verifyLink = this.pickVerificationLink(Array.from(email.parsed_links)) ??
5504
- pickVerificationLinkFromHtml(email.body_html ?? "");
6470
+ pickVerificationLinkFromHtml(email.body_html ?? "") ??
6471
+ pickServiceDomainLink(Array.from(email.parsed_links), serviceHost);
5505
6472
  if (verifyLink !== null) {
5506
- steps.push(`Following verification link: ${verifyLink}`);
5507
- await this.browser.goto(verifyLink);
5508
- // PERF: a 1s settle is enough for the verify landing
5509
- // page to commit cookies + render the post-verify
5510
- // dashboard. Previous 3s was over-cautious.
5511
- await this.browser.wait(1);
5512
- await saveDebugSnapshot(this.browser, "after-verify");
5513
- // Verify-link SPA bounce (MEASURED 2026-06-09: amplitude). The
5514
- // emailed link is a click-tracker that redirects to
5515
- // app.amplitude.com/signup?token=… the token IS consumed
5516
- // server-side, but the single-page app still renders the
5517
- // "check your email" wall until the client re-fetches session
5518
- // state. The post-verify loop then can't get past it. A single
5519
- // reload makes the SPA re-read the now-verified session.
5520
- // Bounded + guarded on the wall still showing, so a service
5521
- // that verified cleanly pays nothing.
5522
- try {
5523
- const afterText = await this.browser.extractText();
5524
- if (expectsVerificationEmail(afterText)) {
5525
- steps.push("Verification link landed but the page still shows the email-verify wall — reloading so the SPA re-reads the verified session.");
5526
- await this.browser.reload();
6473
+ // Firebase email-action link → verify via the REST API
6474
+ // IMMEDIATELY (clean ms-latency POST) instead of the browser
6475
+ // SPA, which (portkey, 2026-06-17) landed on "link expired".
6476
+ // This races a short oobCode TTL, issues the single-use code
6477
+ // once, AND diagnoses: an EXPIRED error here proves the code is
6478
+ // dead at receipt (mail-pipeline latency), not a browser bug.
6479
+ const fbAction = parseFirebaseEmailAction(verifyLink);
6480
+ let firebaseVerified = false;
6481
+ if (fbAction !== null) {
6482
+ const r = await applyFirebaseEmailVerification(fbAction.apiKey, fbAction.oobCode);
6483
+ firebaseVerified = r.ok;
6484
+ steps.push(r.ok
6485
+ ? `Verified the email directly via Firebase REST (${r.email ?? "account"} now verified) bypassing the browser link.`
6486
+ : `Firebase REST verify did not succeed (${r.error ?? "unknown"}) — oobCode appears dead at receipt; falling back to the browser link.`);
6487
+ }
6488
+ if (firebaseVerified) {
6489
+ // Email verified server-side → go to the app and log in
6490
+ // (two-stage aware); extraction below then reaches the key.
6491
+ try {
6492
+ await this.browser.goto(new URL(verifyLink).origin);
6493
+ await this.browser.wait(2);
6494
+ await this.loginWithCredentials(task.email, password, steps);
5527
6495
  await this.browser.wait(2);
5528
6496
  }
6497
+ catch (err) {
6498
+ steps.push(`Post-Firebase-verify login errored (non-fatal): ${err instanceof Error ? err.message : String(err)}`);
6499
+ }
5529
6500
  }
5530
- catch {
5531
- // best-effort fall through to extraction regardless
5532
- }
6501
+ else {
6502
+ steps.push(`Following verification link: ${verifyLink}`);
6503
+ await this.browser.goto(verifyLink);
6504
+ // PERF: a 1s settle is enough for the verify landing
6505
+ // page to commit cookies + render the post-verify
6506
+ // dashboard. Previous 3s was over-cautious.
6507
+ await this.browser.wait(1);
6508
+ await saveDebugSnapshot(this.browser, "after-verify");
6509
+ // Verify-link SPA bounce (MEASURED 2026-06-09: amplitude). The
6510
+ // emailed link is a click-tracker that redirects to
6511
+ // app.amplitude.com/signup?token=… — the token IS consumed
6512
+ // server-side, but the single-page app still renders the
6513
+ // "check your email" wall until the client re-fetches session
6514
+ // state. The post-verify loop then can't get past it. A single
6515
+ // reload makes the SPA re-read the now-verified session.
6516
+ // Bounded + guarded on the wall still showing, so a service
6517
+ // that verified cleanly pays nothing.
6518
+ try {
6519
+ const afterText = await this.browser.extractText();
6520
+ if (expectsVerificationEmail(afterText)) {
6521
+ steps.push("Verification link landed but the page still shows the email-verify wall — reloading so the SPA re-reads the verified session.");
6522
+ await this.browser.reload();
6523
+ await this.browser.wait(2);
6524
+ }
6525
+ }
6526
+ catch {
6527
+ // best-effort — fall through to extraction regardless
6528
+ }
6529
+ // Expired/used single-use verification link (portkey,
6530
+ // 2026-06-17). A fresh Firebase oobCode that reads "expired" on
6531
+ // first touch was consumed upstream by a mail link-scanner —
6532
+ // which ALSO ran the verification server-side, so the email is
6533
+ // already verified. Don't fail: log in with the signup
6534
+ // credentials (the account is ready) and let the extraction +
6535
+ // post-verify loop below reach the key. Generalizes to every
6536
+ // single-use verify-link flow (Firebase et al.).
6537
+ try {
6538
+ const linkText = await this.browser.extractText();
6539
+ if (verificationLinkFailed(linkText)) {
6540
+ steps.push("Verification link reported expired/used — a single-use link is typically burned by an upstream mail scanner, which also completes verification server-side. Treating the email as verified and logging in with the signup credentials.");
6541
+ const origin = new URL(verifyLink).origin;
6542
+ await this.browser.goto(origin);
6543
+ await this.browser.wait(2);
6544
+ await this.loginWithCredentials(task.email, password, steps);
6545
+ await this.browser.wait(2);
6546
+ }
6547
+ }
6548
+ catch (err) {
6549
+ steps.push(`Post-expiry login attempt errored (non-fatal): ${err instanceof Error ? err.message : String(err)}`);
6550
+ }
6551
+ } // end browser-link fallback (non-Firebase path)
5533
6552
  // Try extracting first — many services drop the API key
5534
6553
  // straight onto the landing page after verification.
5535
6554
  credentials = await this.extractCredentials();
@@ -5562,7 +6581,12 @@ export class SignupAgent {
5562
6581
  credentials = await this.enterEmailVerificationCode(bodyCode, task, password, steps);
5563
6582
  }
5564
6583
  else {
5565
- steps.push("Email had no usable verification link or code.");
6584
+ // Diagnostic (arize, 2026-06-17): the email arrived but
6585
+ // neither scorer found a usable link and no code parsed —
6586
+ // dump the candidate hrefs so the next run shows WHY (e.g. an
6587
+ // image-only button, or anchor text the scorer doesn't weight).
6588
+ const hrefs = email.parsed_links.slice(0, 8).join(" | ");
6589
+ steps.push(`Email had no usable verification link or code. parsed_links(${email.parsed_links.length}): ${hrefs || "(none)"}`);
5566
6590
  }
5567
6591
  }
5568
6592
  }
@@ -5596,6 +6620,43 @@ export class SignupAgent {
5596
6620
  ...this.resultTail(),
5597
6621
  };
5598
6622
  }
6623
+ // Before the generic no-credentials miss: a service that completed the
6624
+ // signup form and then dropped the account into a manual-approval gate
6625
+ // (waiting room / waitlist / pending review). Same terminal, non-demoting
6626
+ // onboarding_blocked status the OAuth path uses — there's no key to reach
6627
+ // until a human approves the account, so don't surface it as a generic
6628
+ // failure (which can wrongly chase a code bug) or punish a skill for it.
6629
+ //
6630
+ // ONLY when verification did NOT time out. A pending email-verification
6631
+ // page ("check your email", "we sent a code") can read as a review gate
6632
+ // to the classifier, but the authoritative cause there is the missing
6633
+ // mail (verification_not_sent) — anthropic mislabeled as onboarding_blocked
6634
+ // exactly this way. If we were waiting on an email that never came, that
6635
+ // is the failure; don't reinterpret it as a manual-review gate.
6636
+ const reviewGateText = verificationFailed === undefined ? await this.browser.extractText().catch(() => "") : "";
6637
+ // Closed / invite-only registration takes precedence over the review-gate
6638
+ // and the generic miss — no account can be created, so it's terminally
6639
+ // unservable (dequeue), not a fixable nav bug. Checked only when
6640
+ // verification didn't time out (same reasoning as the review gate).
6641
+ if (verificationFailed === undefined && isSignupsClosed(reviewGateText)) {
6642
+ return {
6643
+ success: false,
6644
+ error: `signups_closed: ${task.service} is not accepting new self-serve sign-ups ` +
6645
+ `(closed / invite-only registration) — no account can be created. Dequeue or sign up manually once open.`,
6646
+ steps,
6647
+ ...this.resultTail(),
6648
+ };
6649
+ }
6650
+ if (isOnboardingReviewGate(verificationFailed, reviewGateText)) {
6651
+ return {
6652
+ success: false,
6653
+ error: `onboarding_blocked: ${task.service} put the account into a manual review / ` +
6654
+ `waitlist gate after signup — no API key is obtainable until a human approves ` +
6655
+ `the account. Finish the signup manually once access is granted.`,
6656
+ steps,
6657
+ ...this.resultTail(),
6658
+ };
6659
+ }
5599
6660
  return {
5600
6661
  success: false,
5601
6662
  error: verificationFailed ?? "Could not find credentials on page or via email",
@@ -5888,6 +6949,14 @@ export class SignupAgent {
5888
6949
  // complete first.
5889
6950
  let consentAdvanceWaits = 0;
5890
6951
  const MAX_CONSENT_ADVANCE_WAITS = 3;
6952
+ // OAUTH_ENGINE (default-ON since 2026-06-15): route the CONSENT decision
6953
+ // through the pure reducer (oauth-flow.ts, eng-reviewed) instead of the inline
6954
+ // scope-gate branches below. Flipped after live validation drove a real Google
6955
+ // consent screen both ways — advance_consent (opaque-scope→blind approve, full
6956
+ // ipinfo OAuth signup succeeded) AND the abort-on-login-form safety invariant.
6957
+ // The inline scope-gate block is kept one cycle as the explicit opt-out
6958
+ // (OAUTH_ENGINE=0) and deleted next (DESIGN-oauth-consent-engine.md step 2).
6959
+ const oauthEngineOn = !/^(0|false|off|no)$/i.test(process.env.OAUTH_ENGINE ?? "");
5891
6960
  for (let i = 0; i < MAX_OAUTH_NAV; i++) {
5892
6961
  if (this.browser.oauthPageClosed()) {
5893
6962
  steps.push(`OAuth: the ${provider.label} window closed — handshake returned to the service`);
@@ -6154,6 +7223,80 @@ export class SignupAgent {
6154
7223
  return this.oauthAbort("needs_login", `the bot's ${provider.label} session is missing or expired — no consent screen was reached. ` +
6155
7224
  `Re-run \`${loginCmd}\` to re-establish it, then retry.`, steps);
6156
7225
  }
7226
+ // authState === "consent" — route through the reducer when OAUTH_ENGINE is
7227
+ // on (every path here continues or returns, so the inline block below is
7228
+ // the default-off path). Faithful to the inline ordering; the executor owns
7229
+ // the advance-success flag flip + the consentAdvanceWaits budget.
7230
+ if (oauthEngineOn) {
7231
+ const hasLoginForm = await this.oauthLoginFormPresent();
7232
+ const scopes = extractOAuthScopes(url);
7233
+ const dangerPhrases = provider.id === "google" ? scrapeGoogleScopePhrases(body) : [];
7234
+ const consentDom = scopes === null ? await this.browser.extractText().catch(() => "") : "";
7235
+ const { action } = decideOAuthStep({
7236
+ providerId: provider.id,
7237
+ consentAlreadyApproved,
7238
+ omniauthPostTried,
7239
+ allowBlindOAuthConsent: task.allowBlindOAuthConsent === true,
7240
+ allowExtraOAuthScopes: task.allowExtraOAuthScopes ?? [],
7241
+ }, {
7242
+ isChooser: false,
7243
+ authState: "consent",
7244
+ hasLoginForm,
7245
+ omniAuthPassthru: false,
7246
+ scopes,
7247
+ dangerPhrases,
7248
+ domBasicFromDom: provider.id === "google" && googleConsentIsBasicFromDom(body),
7249
+ domBasicGis: provider.id === "google" && googleGisConsentIsBasic(consentDom),
7250
+ }, { scopesAreBasic: (s) => provider.scopesAreBasic(s) });
7251
+ steps.push(`OAuth[engine]: scopes=[${scopes === null ? "<unreadable>" : scopes.join(", ")}] → ` +
7252
+ `${action.kind}${action.kind === "advance_consent" ? `:${action.mode}` : ""}`);
7253
+ // Faithful inline step trail: a readable all-basic approve logs the same
7254
+ // "scopes all basic … auto-approving" line the inline scope-gate emits.
7255
+ if (action.kind === "advance_consent" && action.mode === "approve" && scopes !== null) {
7256
+ steps.push(`OAuth: consent scopes all basic (${scopes.join(", ")}) — auto-approving`);
7257
+ }
7258
+ if (action.kind === "abort") {
7259
+ if (action.clearProviderLoggedIn)
7260
+ clearProviderLoggedIn(provider.id);
7261
+ const detail = action.reason === "needs_login"
7262
+ ? `landed on a ${provider.label} sign-in form / no session — re-run \`${loginCmd}\`, then retry. ` +
7263
+ `The bot will not type into ${provider.label}'s login form.`
7264
+ : action.unauthorizedScopes !== undefined
7265
+ ? `${provider.label} consent requests non-basic scopes: [${action.unauthorizedScopes.join(", ")}]. ` +
7266
+ `All requested: [${(scopes ?? []).join(", ")}]. Re-run provision with allow_extra_oauth_scopes set to proceed.`
7267
+ : `reached a ${provider.label} consent screen but could not safely auto-approve its scopes — approve it manually.`;
7268
+ return this.oauthAbort(action.reason, detail, steps);
7269
+ }
7270
+ // A consent observation only ever yields abort | advance_consent; this
7271
+ // narrows the union for TS (and is a defensive no-op if it ever doesn't).
7272
+ if (action.kind !== "advance_consent")
7273
+ break;
7274
+ // advance_consent — perform the advance; flip the flag only on success.
7275
+ const advanced = await this.browser.advanceOAuthConsent(provider.id);
7276
+ if (advanced) {
7277
+ consentAlreadyApproved = true;
7278
+ await this.browser.wait(3);
7279
+ continue;
7280
+ }
7281
+ if (action.onAdvanceFail === "bounded_wait") {
7282
+ if (consentAdvanceWaits < MAX_CONSENT_ADVANCE_WAITS) {
7283
+ consentAdvanceWaits += 1;
7284
+ steps.push(`OAuth[engine]: approve control not present yet — waiting for hydrate/redirect ` +
7285
+ `(${consentAdvanceWaits}/${MAX_CONSENT_ADVANCE_WAITS})`);
7286
+ await this.browser.wait(4);
7287
+ continue;
7288
+ }
7289
+ return this.oauthAbort("oauth_consent_needs_review", `blind-consent approved but no approve control on the ${provider.label} consent page ` +
7290
+ `after ${consentAdvanceWaits} waits — sign up manually.`, steps);
7291
+ }
7292
+ if (action.onAdvanceFail === "wait_nav") {
7293
+ steps.push("OAuth[engine]: post-grant page, no approve control — waiting for natural navigation");
7294
+ await this.browser.wait(3);
7295
+ continue;
7296
+ }
7297
+ // onAdvanceFail === "abort"
7298
+ return this.oauthAbort("oauth_consent_needs_review", `reached a ${provider.label} consent screen but found no approve control to click — approve it manually.`, steps);
7299
+ }
6157
7300
  // authState === "consent". Backstop the page classifier with a
6158
7301
  // live-DOM check: if the page actually carries a credential
6159
7302
  // field it is a login form (the text classifier can catch a
@@ -6348,16 +7491,36 @@ export class SignupAgent {
6348
7491
  // non-auth path here and is left alone.
6349
7492
  if (isSignupOrLoginRoute(this.browser.currentUrl()) &&
6350
7493
  !isOAuthProviderHost(this.browser.currentUrl())) {
6351
- const root = originRoot(this.browser.currentUrl());
6352
- if (root !== null) {
6353
- steps.push(`OAuth: post-auth landing is a signup/login route (${pathOf(this.browser.currentUrl())}) ` +
6354
- `navigating to the app root (${root}) so the service routes us to the dashboard.`);
6355
- try {
6356
- await this.browser.goto(root);
6357
- await this.browser.wait(2);
6358
- }
6359
- catch {
6360
- // navigation hiccup — the post-verify loop re-reads regardless.
7494
+ // Clerk callback: don't immediately navigate away. On a Clerk combined
7495
+ // sign-in/sign-up flow a new-user OAuth completes the account via a
7496
+ // client-side sign-up transfer that takes a beat AFTER the callback lands;
7497
+ // navigating to root unmounts Clerk's JS and interrupts it (the bug behind
7498
+ // the cartesia/braintrust "oauth_session_not_persisted" cluster — proven
7499
+ // not IP). We can't drive the transfer via window.Clerk (patchright's
7500
+ // isolated world hides it), so instead give Clerk's own JS time and detect
7501
+ // success via cookies (world-agnostic). If a session appears, we're signed
7502
+ // in — skip the navigate-away.
7503
+ const onClerkCallback = /sso-callback|\/sso\b/i.test(this.browser.currentUrl());
7504
+ let clerkSignedIn = false;
7505
+ if (onClerkCallback) {
7506
+ clerkSignedIn = await this.browser.waitForClerkSession(12000).catch(() => false);
7507
+ steps.push(`OAuth: Clerk callback — waited for session establish → ${clerkSignedIn ? "signed in" : "no session (likely login-only OAuth / needs email signup)"}`);
7508
+ }
7509
+ if (clerkSignedIn) {
7510
+ await this.browser.wait(2);
7511
+ }
7512
+ else {
7513
+ const root = originRoot(this.browser.currentUrl());
7514
+ if (root !== null) {
7515
+ steps.push(`OAuth: post-auth landing is a signup/login route (${pathOf(this.browser.currentUrl())}) — ` +
7516
+ `navigating to the app root (${root}) so the service routes us to the dashboard.`);
7517
+ try {
7518
+ await this.browser.goto(root);
7519
+ await this.browser.wait(2);
7520
+ }
7521
+ catch {
7522
+ // navigation hiccup — the post-verify loop re-reads regardless.
7523
+ }
6361
7524
  }
6362
7525
  }
6363
7526
  }
@@ -6530,6 +7693,9 @@ export class SignupAgent {
6530
7693
  // oauth_session_not_persisted and abort. The account simply needs
6531
7694
  // creating via email, so re-route to form-fill instead of bailing.
6532
7695
  if (detectGoogleNoAccount(gateState.url, gateText)) {
7696
+ // Commit to email for the rest of the run — OAuth is login-only here, so
7697
+ // the OAuth-first scan must not re-fire after the form-fill re-route.
7698
+ this.committedToEmailPath = true;
6533
7699
  steps.push(`OAuth: ${provider.label} sign-in succeeded but ${task.service} has no account for ` +
6534
7700
  `this identity (login-only OAuth, ${pathOf(gateState.url)}) — abandoning OAuth and ` +
6535
7701
  `falling back to email/password signup to create the account.`);
@@ -6690,6 +7856,22 @@ export class SignupAgent {
6690
7856
  // (oauth_session_not_persisted) instead of thrashing into
6691
7857
  // oauth_onboarding_failed.
6692
7858
  if (err instanceof OAuthSessionNotPersistedError) {
7859
+ // The handshake completed but the service never created a session — the
7860
+ // Clerk new-user-via-sign-in bounce, surfacing here as a stuck login page
7861
+ // (no explicit "no account" text, so detectGoogleNoAccount missed it
7862
+ // upstream). If the service also offers email signup, creating the account
7863
+ // that way is the recovery — the SAME OAUTH_FALL_BACK_TO_FORM_FILL path the
7864
+ // explicit-text case uses (runSignup re-navigates to the form and runs the
7865
+ // email path with forceFormFill, which suppresses OAuth so it can't bounce
7866
+ // back here). One-shot: an OAuth-only service with no email form then fails
7867
+ // honestly on the re-run instead of looping. Generalizes the cartesia crack
7868
+ // to the whole silent-callback Clerk cluster (openrouter/groq/northflank/…).
7869
+ if (!this.oauthEmailFallbackTried) {
7870
+ this.oauthEmailFallbackTried = true;
7871
+ steps.push(`OAuth callback never persisted a session (Clerk new-user sign-in bounce) — ` +
7872
+ `falling back to email/password signup to create the account.`);
7873
+ return OAUTH_FALL_BACK_TO_FORM_FILL;
7874
+ }
6693
7875
  return { success: false, error: err.message, steps, ...this.resultTail() };
6694
7876
  }
6695
7877
  throw err;
@@ -6720,6 +7902,19 @@ export class SignupAgent {
6720
7902
  const paywallCheckText = this.lastPostVerifyDoneReason !== null
6721
7903
  ? `${finalText}\n${this.lastPostVerifyDoneReason}`
6722
7904
  : finalText;
7905
+ // Closed / invite-only registration — no account can be created at all
7906
+ // (turbopuffer: "Sign-ups are closed"). Terminally unservable; label it
7907
+ // honestly so the operator dequeues rather than seeing a misleading
7908
+ // oauth_onboarding_failed that implies a fixable nav bug.
7909
+ if (isSignupsClosed(paywallCheckText)) {
7910
+ return {
7911
+ success: false,
7912
+ error: `signups_closed: ${task.service} is not accepting new self-serve sign-ups ` +
7913
+ `(closed / invite-only registration) — no account can be created. Dequeue or sign up manually once open.`,
7914
+ steps,
7915
+ ...this.resultTail(),
7916
+ };
7917
+ }
6723
7918
  if (isAtPaywall(paywallCheckText)) {
6724
7919
  return {
6725
7920
  success: false,
@@ -6729,6 +7924,22 @@ export class SignupAgent {
6729
7924
  ...this.resultTail(),
6730
7925
  };
6731
7926
  }
7927
+ // Service-side manual-approval gate (waiting room / waitlist / account
7928
+ // pending review). The OAuth handshake succeeded but the service won't
7929
+ // grant a key until a human approves the account — there is no key to
7930
+ // reach autonomously. Same terminal onboarding_blocked status as the
7931
+ // billing wall so it's a non-demoting human-pile outcome, not a
7932
+ // mislabeled oauth_onboarding_failed that wrongly implies a code bug.
7933
+ if (isAtAccountReviewGate(paywallCheckText)) {
7934
+ return {
7935
+ success: false,
7936
+ error: `onboarding_blocked: ${task.service} put the account into a manual review / ` +
7937
+ `waitlist gate after signup — no API key is obtainable until a human approves ` +
7938
+ `the account. Finish the signup manually once access is granted.`,
7939
+ steps,
7940
+ ...this.resultTail(),
7941
+ };
7942
+ }
6732
7943
  // rc.39 — anti-bot interstitial that survived the post-OAuth
6733
7944
  // landing. Turso's GitHub SSO callback runs a Cloudflare check
6734
7945
  // that never clears for our Chromium fingerprint; the planner's
@@ -7009,6 +8220,9 @@ ${formatInventory(input.inventory)}`,
7009
8220
  // Deterministic form-fill picks (same rationale as the post-verify
7010
8221
  // planner — D2). Removes a run-to-run flakiness source.
7011
8222
  temperature: 0,
8223
+ // Fix C — pin a single model + provider + seed on the proxy path.
8224
+ // temperature 0 alone leaves the model/provider lottery in play.
8225
+ deterministic: true,
7012
8226
  parse: (raw) => parseSignupPlan(raw, allowed),
7013
8227
  });
7014
8228
  }
@@ -7345,15 +8559,51 @@ ${formatInventory(input.inventory)}`,
7345
8559
  steps.push(`Existing-account recovery: create-key click failed (${err instanceof Error ? err.message : String(err)}).`);
7346
8560
  return null;
7347
8561
  }
7348
- // Poll for the freshly-minted key minting is a server
7349
- // round-trip (Render/Mistral/Mailtrap render the value into a
7350
- // modal after the POST returns). Reuse the modal-reveal poll
7351
- // budget the click branch uses elsewhere (~8s), early-exiting the
7352
- // moment any tier surfaces a credential. A confirmation dialog
7353
- // ("Name your key" → Create) is common; fire the reveal pass each
7354
- // round so a modal that needs a second confirm-then-show click is
7355
- // still harvested.
7356
- const deadline = Date.now() + 8000;
8562
+ // Forensic: capture the post-click state so a "modal never minted a key"
8563
+ // failure is diagnosable — what does the create-key dialog render (name
8564
+ // field? an in-modal captcha? a disabled submit?), and is it in our
8565
+ // inventory? Off by default; production runs don't pay the snapshot.
8566
+ if (process.env.BOT_DEBUG_MINT_MODAL === "1") {
8567
+ await this.browser.wait(1.2);
8568
+ await saveDebugSnapshot(this.browser, "mint-after-create-click");
8569
+ }
8570
+ // Drive the "name your key" dialog the create-click opened, then poll for
8571
+ // the freshly-minted value (minting is a server round-trip; Render/Mistral
8572
+ // render the value into the modal after the POST returns).
8573
+ //
8574
+ // Two gates commonly hold the dialog's submit DISABLED until satisfied:
8575
+ // (1) a non-empty NAME (groq's keyName field), and
8576
+ // (2) a CAPTCHA token — groq embeds a Cloudflare Turnstile INSIDE the
8577
+ // create-key modal (cf-turnstile-response), and the submit stays
8578
+ // disabled until the widget issues a token. The captcha gate never
8579
+ // ran here (it fires during form-fill, not post-verify mint), so the
8580
+ // modal sat unsolved and every re-click just reopened it. Satisfy
8581
+ // both up front: type the name, then run the captcha gate (Tier 1
8582
+ // behavior / Tier 2 click-and-wait, polling for the token), and only
8583
+ // then start clicking submit.
8584
+ try {
8585
+ const openInv = await this.browser.extractInteractiveElements();
8586
+ const nameInput = findKeyNameInput(openInv);
8587
+ if (nameInput !== null) {
8588
+ await this.browser.type(nameInput.selector, "trusty-squire").catch(() => { });
8589
+ steps.push("Existing-account recovery: named the new key.");
8590
+ }
8591
+ }
8592
+ catch {
8593
+ // best-effort name fill
8594
+ }
8595
+ // Solve any captcha gating the modal's submit (groq's in-modal Turnstile).
8596
+ // Best-effort: a no-widget result or a solver miss just falls through to
8597
+ // the submit poll below, which still works for modals with no captcha.
8598
+ const mintGate = await this.runCaptchaGate("Mint-modal", steps);
8599
+ if (mintGate.blocked) {
8600
+ steps.push("Existing-account recovery: the create-key modal's captcha is blocking — cannot mint.");
8601
+ }
8602
+ // Poll: click the modal's affirmative submit (re-clicking is harmless —
8603
+ // it's a no-op while still disabled, and once name+token clear the gate
8604
+ // the click lands), harvesting the minted value each round. No
8605
+ // single-click guard: the gate may enable a beat after we first try.
8606
+ const deadline = Date.now() + 12000;
7357
8607
  while (Date.now() < deadline) {
7358
8608
  await this.browser.wait(0.5);
7359
8609
  const minted = await this.harvestVisibleCredentials();
@@ -7361,15 +8611,21 @@ ${formatInventory(input.inventory)}`,
7361
8611
  steps.push("Existing-account recovery: extracted the freshly-minted key.");
7362
8612
  return minted;
7363
8613
  }
7364
- // A two-step create modal: clicking the page-level "Create key"
7365
- // opened a "name + confirm" dialog. Click a now-visible confirm
7366
- // affordance once, then keep polling.
7367
8614
  try {
7368
8615
  const modalInv = await this.browser.extractInteractiveElements();
7369
- const confirmBtn = findCreateKeyAffordance(modalInv);
7370
- if (confirmBtn !== null &&
7371
- confirmBtn.selector !== createBtn.selector) {
7372
- await this.browser.click(confirmBtn.selector);
8616
+ // Prefer the modal's generic submit ("Submit"/"Create"/…) over
8617
+ // findCreateKeyAffordance: the page-level "Create API Key" button is
8618
+ // still in the background DOM, and re-clicking IT just reopens the
8619
+ // modal (the pre-fix groq failure loop). Fall back to the affordance
8620
+ // matcher for modals whose confirm DOES carry a key noun.
8621
+ let confirmBtn = findKeyModalSubmit(modalInv, createBtn.selector);
8622
+ if (confirmBtn === null) {
8623
+ const aff = findCreateKeyAffordance(modalInv);
8624
+ if (aff !== null && aff.selector !== createBtn.selector)
8625
+ confirmBtn = aff;
8626
+ }
8627
+ if (confirmBtn !== null) {
8628
+ await this.browser.click(confirmBtn.selector).catch(() => { });
7373
8629
  }
7374
8630
  }
7375
8631
  catch {
@@ -7422,7 +8678,7 @@ ${formatInventory(input.inventory)}`,
7422
8678
  catch {
7423
8679
  break;
7424
8680
  }
7425
- const fallback = pickStuckLoopFallbackUrl(currentUrl, visitedKeysUrls);
8681
+ const fallback = pickStuckLoopFallbackUrl(currentUrl, visitedKeysUrls, undefined, this.resolvedSignupUrl);
7426
8682
  if (fallback === null)
7427
8683
  break;
7428
8684
  visitedKeysUrls.add(fallback);
@@ -7461,6 +8717,144 @@ ${formatInventory(input.inventory)}`,
7461
8717
  }
7462
8718
  return null;
7463
8719
  }
8720
+ // NAV_SEARCH phase (slice 1): drive the post-verify phase with the goal-directed
8721
+ // nav-search engine (nav-search.ts) instead of the greedy planner. Adapts the
8722
+ // BrowserController to the engine's narrow port and wires the extractor, the
8723
+ // capture-chain (sequential rounds → OF#1/auto-promote parity, A2), and the
8724
+ // log. Returns the extracted credentials, or {} (+ a no_self_serve_key done
8725
+ // reason) when the dashboard's navigation has no reachable key surface.
8726
+ async runNavSearchPhase(args, oauth) {
8727
+ const hasRealKey = (c) => Object.keys(c).some((k) => !NON_CREDENTIAL_KEYS.has(k));
8728
+ // Already on a key surface at entry (bot landed there directly).
8729
+ const entry = await this.extractCredentials();
8730
+ if (hasRealKey(entry))
8731
+ return entry;
8732
+ const port = {
8733
+ currentUrl: () => this.browser.currentUrl(),
8734
+ // Visibility-respecting text (innerText) for goal assessment — extractText()
8735
+ // reads textContent, which fuses inline <script> source + display:none nodes
8736
+ // into the page text and poisons every text-based goal/onboarding signal
8737
+ // (the false-shell class). Key extraction still reads RAW text via
8738
+ // extractCredentials() downstream; this is the nav-decision surface only.
8739
+ extractText: () => this.browser.extractVisibleText(),
8740
+ extractInventory: () => this.browser.extractInteractiveElements(),
8741
+ clickSelector: (s) => this.browser.click(s),
8742
+ navigate: (u) => this.browser.goto(u),
8743
+ pressEscape: () => this.browser.pressKey("Escape"),
8744
+ settle: async () => {
8745
+ await this.browser.waitForInteractiveDom(5, 15_000).catch(() => { });
8746
+ },
8747
+ expandLatentNav: () => this.browser.expandLatentNav(),
8748
+ };
8749
+ // Cap nav-search's LLM tiebreak calls so the navigation phase can't starve
8750
+ // the greedy planner's budget when we hand off (DEFAULT-ON hybrid): the
8751
+ // per-signup circuit breaker is shared, so an unbounded tiebreak could leave
8752
+ // the form-fill handoff with no budget. Deterministic ranking is unbounded
8753
+ // (free); only the LLM tiebreak is capped. Past the cap, tiebreak returns
8754
+ // null (deterministic-only), which leads to honest exhaustion → handoff.
8755
+ let tiebreakCalls = 0;
8756
+ const MAX_NAV_TIEBREAKS = Number(process.env.NAV_SEARCH_MAX_TIEBREAKS) || 6;
8757
+ const deps = {
8758
+ extractKey: async () => {
8759
+ const c = await this.extractCredentials();
8760
+ return hasRealKey(c) ? c : null;
8761
+ },
8762
+ // Mint on a create-gated key surface: reuse the proven existing-account
8763
+ // recovery (readable → reveal → click create → drive the name+confirm
8764
+ // modal → poll the create POST → reveal masked-on-first-show). Without
8765
+ // this, nav-search only bare-clicks "Create API Key" and never submits
8766
+ // the resulting modal (groq's virgin /keys flow).
8767
+ mintKey: async () => {
8768
+ const c = await this.attemptMintNewKey(args.steps);
8769
+ return c !== null && hasRealKey(c) ? c : null;
8770
+ },
8771
+ // Capture-chain parity (A2 / OF#1): one sequential round per step, full
8772
+ // state + the real selector, so the synthesizer's chain check (no gaps)
8773
+ // still passes and auto-promote keeps minting skills.
8774
+ captureRound: async (ctx) => {
8775
+ const state = await this.browser.getState().catch(() => null);
8776
+ if (state === null)
8777
+ return;
8778
+ const observed = ctx.action === "extract"
8779
+ ? { kind: "extract", reason: "nav-search: extract on key surface" }
8780
+ : ctx.selector !== undefined
8781
+ ? { kind: "click", selector: ctx.selector, reason: `nav-search: ${ctx.action}` }
8782
+ : { kind: "done", reason: `nav-search: ${ctx.action}` };
8783
+ captureOnboardingRound({
8784
+ service: args.service,
8785
+ round: this.captureChainRound,
8786
+ oauth,
8787
+ state,
8788
+ inventory: ctx.inventory,
8789
+ observed,
8790
+ ...(this.lastResolvedModel !== undefined ? { resolved_model: this.lastResolvedModel } : {}),
8791
+ ...(this.lastResolvedProvider !== undefined
8792
+ ? { resolved_provider: this.lastResolvedProvider }
8793
+ : {}),
8794
+ });
8795
+ this.captureChainRound += 1;
8796
+ },
8797
+ // LLM tiebreak: the deterministic ranker only fires on keys-keyword text /
8798
+ // href. When keys live behind a generically-named affordance (a settings
8799
+ // tab like "Advanced"/"Security", an icon nav), nothing scores and the
8800
+ // ranker can't decide. This is the ONE place the LLM touches the loop —
8801
+ // it picks the single candidate most likely to lead to a key surface, or
8802
+ // null. Cheap (text-only, ≤80 tokens, deterministic) and bounded by the
8803
+ // per-signup LLM budget; a budget/parse failure falls through to null
8804
+ // (honest exhaustion), never throws into the loop.
8805
+ tiebreak: async (candidates) => {
8806
+ if (candidates.length === 0)
8807
+ return null;
8808
+ if (tiebreakCalls >= MAX_NAV_TIEBREAKS)
8809
+ return null; // reserve budget for the handoff
8810
+ tiebreakCalls += 1;
8811
+ const here = this.browser.currentUrl();
8812
+ const list = candidates
8813
+ .map((c, i) => `${i}. "${c.text.slice(0, 60)}"${c.href !== null ? ` (href=${c.href})` : ""}`)
8814
+ .join("\n");
8815
+ const system = `You navigate a SaaS dashboard after signup. Goal: reach the page that SHOWS or CREATES an API key (or any credential — token, secret, access key).
8816
+ You are given a numbered list of the clickable affordances on the current page. Pick the ONE most likely to lead toward an API-keys / tokens / developer-credentials surface.
8817
+ Reply with a single JSON object and nothing else: {"index": N} (N = the list number) or {"index": null} if NONE plausibly leads to API keys.
8818
+ Prefer items naming keys / tokens / API / developer / secrets; then credentials / advanced / settings / account / security. On a B2B product an API key is frequently scoped to an ORGANIZATION / WORKSPACE / PROJECT / TEAM rather than the personal account — so if no personal "API keys" surface exists, an "Organization", "Workspace", "Project", or "Team" link is a strong candidate (its settings usually hold the keys). NEVER pick log out, billing, invoices, usage, docs, pricing, a link back to the current page, or any destructive action (delete, remove, revoke, deactivate, cancel).`;
8819
+ const userBlocks = [
8820
+ { kind: "text", text: `Current URL: ${here}\n\nAffordances:\n${list}` },
8821
+ ];
8822
+ try {
8823
+ return await this.callLLM({
8824
+ system,
8825
+ userBlocks,
8826
+ maxTokens: 80,
8827
+ temperature: 0,
8828
+ deterministic: true,
8829
+ parse: (raw) => {
8830
+ const m = raw.match(/\{[\s\S]*\}/);
8831
+ if (m === null)
8832
+ return null;
8833
+ const obj = JSON.parse(m[0]);
8834
+ const idx = typeof obj === "object" && obj !== null && "index" in obj
8835
+ ? obj.index
8836
+ : null;
8837
+ if (typeof idx !== "number")
8838
+ return null;
8839
+ return candidates[idx]?.selector ?? null;
8840
+ },
8841
+ });
8842
+ }
8843
+ catch {
8844
+ return null;
8845
+ }
8846
+ },
8847
+ log: (line) => args.steps.push(line),
8848
+ maxSteps: args.maxRounds,
8849
+ };
8850
+ const result = await runNavSearch(port, deps);
8851
+ if (result.kind === "found")
8852
+ return result.credentials;
8853
+ this.lastPostVerifyDoneReason =
8854
+ "no_self_serve_key: nav-search exhausted the dashboard's navigation without " +
8855
+ "reaching an API-key surface";
8856
+ return {};
8857
+ }
7464
8858
  async postVerifyLoop(args) {
7465
8859
  let credentials = await this.extractCredentials();
7466
8860
  // 0.8.2-rc.15 — also seed DOM-proximity at loop entry. If the
@@ -7508,6 +8902,15 @@ ${formatInventory(input.inventory)}`,
7508
8902
  // the dashboard for those; a genuine callback rejection stays on login
7509
8903
  // even after reload, so this never masks a real wall.
7510
8904
  let oauthBounceReloadTried = false;
8905
+ // Consecutive rounds the post-verify page read as a genuine loading shell
8906
+ // (visible loading-text AND a sub-threshold inventory). A real SPA
8907
+ // hydrates within the bounded per-round wait, so a streak means the route
8908
+ // never paints content — burn a navigate-to-root retry, then bail
8909
+ // truthfully rather than re-running the wait every round to run_timeout.
8910
+ // Reset on any non-shell round. Mirrors the consecutiveOauthLoginPageRounds
8911
+ // / oauthBounceReloadTried escape used for the stuck-login case.
8912
+ let shellStreak = 0;
8913
+ let shellRootNavTried = false;
7511
8914
  let planFailures = 0;
7512
8915
  // 0.8.2-rc.6 — separate counter for upstream-blip retries. Doesn't
7513
8916
  // gate planFailures (so a transient 502 won't push us into the
@@ -7519,6 +8922,44 @@ ${formatInventory(input.inventory)}`,
7519
8922
  let upstreamBlipRetries = 0;
7520
8923
  const MAX_UPSTREAM_BLIP_RETRIES = 8;
7521
8924
  const oauth = args.credentials === undefined;
8925
+ // NAV_SEARCH (DEFAULT-ON as of T6): drive the post-verify phase with the
8926
+ // goal-directed nav-search engine, then HAND OFF to the greedy planner if it
8927
+ // couldn't finish. T6 (live, neon) proved the two are complementary:
8928
+ // nav-search is strong at NAVIGATION (it drove through two onboarding wizards
8929
+ // + the dashboard to the exact create-API-key modal, where the greedy planner
8930
+ // often gets lost), but it's nav-only by design — it can't fill+submit a
8931
+ // create-key form. The greedy planner is strong at form-fill but weak at
8932
+ // navigation. So: nav-search navigates to (or near) the key surface; if it
8933
+ // extracts a key, done; if not, we FALL THROUGH to the greedy loop, which
8934
+ // resumes from the current page nav-search reached and completes the local
8935
+ // form-fill + extract. The capture chain continues on the same
8936
+ // this.captureChainRound counter (read below at loop start), so it stays
8937
+ // gap-free for auto-promote.
8938
+ //
8939
+ // Default-on is SAFE because the worst case is the pre-existing behavior: if
8940
+ // nav-search reaches no key, control falls through to the same greedy loop
8941
+ // that was the default before. nav-search only changes outcomes by reaching
8942
+ // key surfaces greedy couldn't — a strict improvement in the cases it helps.
8943
+ // Its LLM tiebreak is budget-capped (MAX_NAV_TIEBREAKS) so the handoff keeps
8944
+ // form-fill budget; the same-site guard keeps it on the app. Opt OUT with
8945
+ // NAV_SEARCH=0/false/off (kept for reversibility — DESIGN A2).
8946
+ if (!/^(0|false|off|no)$/i.test(process.env.NAV_SEARCH ?? "")) {
8947
+ try {
8948
+ const navResult = await this.runNavSearchPhase(args, oauth);
8949
+ if (Object.keys(navResult).some((k) => !NON_CREDENTIAL_KEYS.has(k))) {
8950
+ return navResult;
8951
+ }
8952
+ args.steps.push("nav-search: no key via navigation alone — handing off to the planner from the current surface");
8953
+ }
8954
+ catch (err) {
8955
+ // Default-on safety: nav-search must NEVER crash a signup. Any unexpected
8956
+ // error (a browser-port method throwing, a bad selector, etc.) falls
8957
+ // through to the greedy planner — the pre-existing default behavior — so
8958
+ // the worst case of enabling nav-search is "no better than before".
8959
+ args.steps.push(`nav-search: errored (${err instanceof Error ? err.message : String(err)}) — falling back to the planner`);
8960
+ }
8961
+ // fall through to the greedy planner loop below
8962
+ }
7522
8963
  // Re-plan hint for the next round — set when an `extract` step
7523
8964
  // found no key, which means the visible key text is masked /
7524
8965
  // truncated (the S3-class trap: the planner sees a key-shaped
@@ -7637,6 +9078,9 @@ ${formatInventory(input.inventory)}`,
7637
9078
  let stuckFiresAtUrl = 0;
7638
9079
  let lastStuckFireUrl = null;
7639
9080
  const triedFallbackUrls = new Set();
9081
+ // Selectors of API-keys nav links already clicked, so the
9082
+ // click-the-real-link escalation doesn't re-click the same link.
9083
+ const clickedKeysLinks = new Set();
7640
9084
  // Premature-done guard budget. When the planner gives up (`done`)
7641
9085
  // with zero credentials captured, we navigate to an unvisited
7642
9086
  // canonical keys URL and re-plan — bounded so a service that
@@ -7872,47 +9316,98 @@ ${formatInventory(input.inventory)}`,
7872
9316
  // SPA hydration guard. A post-OAuth dashboard (northflank's
7873
9317
  // /settings/access-tokens, PostHog) can render a "Connecting"/loading
7874
9318
  // shell while its JS bundle + websocket finish — slow over a
7875
- // residential tunnel. The shell often carries a stray element or two
7876
- // (a logo link, the <noscript>), so gating on an EMPTY inventory
7877
- // misses it; the loading-shell TEXT is the authoritative "not yet
7878
- // rendered" signal. Wait while that text persists, then proceed with
7879
- // whatever's there (an honest "still a shell" beats a premature done
7880
- // and if the SPA never hydrates, e.g. a blocked websocket, the bound
7881
- // keeps us from hanging).
9319
+ // residential tunnel. We gate on POSITIVE readiness the instant the
9320
+ // page has SHELL_MAX_ELEMENTS visible interactive elements it is
9321
+ // hydrated by definition and we proceed rather than looping on the
9322
+ // negative "text still says loading" signal. waitForInteractiveDom
9323
+ // returns the moment that count is met (or after the budget), so a fast
9324
+ // page costs ~0 and a slow one waits exactly as long as needed. This is
9325
+ // the fix for the dominant false positive: a fully-rendered dashboard
9326
+ // whose DOM merely CONTAINS a hidden "loading…"/"please wait 30
9327
+ // seconds…" string no longer spins the wait every round to run_timeout.
7882
9328
  //
7883
9329
  // Budget = 6x3s = 18s. MEASURED: a dashboard SPA gated on a websocket
7884
9330
  // (northflank's wss://platform.northflank.com/websocket) hydrates in
7885
- // ~12-15s over the tunnel. A larger budget BACKFIRES on a page that
7886
- // will NEVER hydrate (e.g. an authed user stranded on /signup): the
7887
- // wait re-runs every round and burns the 600s run cap. The escape for
7888
- // a never-hydrating route is navigate-to-root post-OAuth, not a longer
7889
- // wait here.
9331
+ // ~12-15s over the tunnel.
7890
9332
  //
7891
9333
  // ADAPTIVE exception (MEASURED 2026-06-04, clerk): an OAuth/SSO
7892
9334
  // CALLBACK route does a token exchange that renders even slower than a
7893
9335
  // plain dashboard — clerk's `/sign-in/sso-callback` outlasts 18s and
7894
9336
  // the bot bailed at the edge with `oauth_session_not_persisted`. On a
7895
- // callback route the SPA IS making progress, so 12x3s = 36s of
7896
- // patience is warranted; everywhere else the 6-tick budget holds so a
7897
- // genuinely-stuck route still hits the navigate-to-root escape fast.
7898
- // Read the URL fresh each round (it may redirect off the callback).
7899
- const HYDRATION_TICKS = isOAuthCallbackRoute(state.url) ? 12 : 6;
7900
- for (let hydrationWait = 0; hydrationWait < HYDRATION_TICKS &&
7901
- isLoadingShellText(await this.browser.extractText().catch(() => "")); hydrationWait++) {
7902
- args.steps.push(`Post-verify round ${round}: ${pathOf(state.url)} is a loading shell ` +
7903
- `(hydration wait ${hydrationWait + 1}/${HYDRATION_TICKS}) waiting for the SPA to render`);
7904
- await this.browser.wait(3);
7905
- try {
7906
- [state, inventory] = await Promise.all([
7907
- this.browser.getState(),
7908
- this.buildInventory(args.steps, undefined, 80),
7909
- ]);
9337
+ // callback route the SPA IS making progress, so 36s of patience is
9338
+ // warranted; everywhere else the 18s budget holds so a genuinely-stuck
9339
+ // route reaches the navigate-to-root escape fast. Read the URL fresh
9340
+ // each round (it may redirect off the callback).
9341
+ const onOAuthCallback = isOAuthCallbackRoute(state.url);
9342
+ const HYDRATION_BUDGET_MS = onOAuthCallback ? 36_000 : 18_000;
9343
+ await this.browser
9344
+ .waitForInteractiveDom(SHELL_MAX_ELEMENTS, HYDRATION_BUDGET_MS)
9345
+ .catch(() => undefined);
9346
+ // Re-read after the wait — the page may have hydrated (or redirected).
9347
+ try {
9348
+ [state, inventory] = await Promise.all([
9349
+ this.browser.getState(),
9350
+ this.buildInventory(args.steps, undefined, 80),
9351
+ ]);
9352
+ }
9353
+ catch {
9354
+ // mid-navigation read — keep the prior state/inventory; the shell
9355
+ // decision below uses whatever count we have.
9356
+ }
9357
+ // Negative-side decision, now visibility- AND inventory-aware: a shell
9358
+ // requires loading-text in the VISIBLE text AND a sub-threshold
9359
+ // inventory. The OAuth-callback exclusion keeps the navigate-to-root
9360
+ // escape from firing mid-token-exchange (the callback IS making
9361
+ // progress and a navigate-away would abort the session).
9362
+ const stillShell = !onOAuthCallback &&
9363
+ isLoadingShell(await this.browser.extractVisibleText().catch(() => ""), inventory.length);
9364
+ if (stillShell) {
9365
+ shellStreak += 1;
9366
+ // On the 2nd consecutive shell round, do the navigate-to-root the
9367
+ // budgeted wait can't fix — a route stuck mid-hydration (a blocked
9368
+ // websocket, an SPA wedged on a stale path) often paints the real
9369
+ // dashboard from origin root. Once only.
9370
+ if (shellStreak >= 2 && !shellRootNavTried) {
9371
+ shellRootNavTried = true;
9372
+ const root = originRoot(state.url);
9373
+ args.steps.push(`Post-verify round ${round}: ${pathOf(state.url)} read as a loading shell for ` +
9374
+ `${shellStreak} consecutive rounds — navigating to origin root once before bailing.`);
9375
+ try {
9376
+ await this.browser.goto(root ?? state.url);
9377
+ await this.browser
9378
+ .waitForInteractiveDom(SHELL_MAX_ELEMENTS, 15_000)
9379
+ .catch(() => undefined);
9380
+ [state, inventory] = await Promise.all([
9381
+ this.browser.getState(),
9382
+ this.buildInventory(args.steps, undefined, 80),
9383
+ ]);
9384
+ }
9385
+ catch {
9386
+ // navigate/read failed — the streak check below bails on the
9387
+ // next shell read.
9388
+ }
9389
+ // Re-evaluate after the root nav. If it hydrated, fall through to
9390
+ // planning; if it's STILL a shell, bail truthfully now rather than
9391
+ // burning the rest of the round budget to run_timeout.
9392
+ const recovered = !isLoadingShell(await this.browser.extractVisibleText().catch(() => ""), inventory.length);
9393
+ if (recovered) {
9394
+ shellStreak = 0;
9395
+ }
9396
+ else {
9397
+ throw new SpaNeverHydratedError(`spa_never_hydrated: ${args.service}'s post-verify page (${pathOf(state.url)}) ` +
9398
+ `stayed a loading shell across ${shellStreak} rounds and an origin-root reload — ` +
9399
+ `the SPA never rendered an actionable surface (blocked websocket / wedged hydration). ` +
9400
+ `Not a navigation bug; retry or finish the signup manually.`);
9401
+ }
7910
9402
  }
7911
- catch {
7912
- // mid-navigation read keep the prior state/inventory and let
7913
- // the next hydration tick (or the planner) retry.
9403
+ else {
9404
+ args.steps.push(`Post-verify round ${round}: ${pathOf(state.url)} is a loading shell ` +
9405
+ `(streak ${shellStreak}) — letting the SPA settle one more round`);
7914
9406
  }
7915
9407
  }
9408
+ else {
9409
+ shellStreak = 0;
9410
+ }
7916
9411
  // Stalled-wizard breaker. Build a content signature (URL + each
7917
9412
  // inventory element's selector + label) and judge whether the
7918
9413
  // PREVIOUS executed action changed the page. If the last few
@@ -8057,11 +9552,13 @@ ${formatInventory(input.inventory)}`,
8057
9552
  if (consecutiveOauthLoginPageRounds >= 3) {
8058
9553
  args.steps.push(`Post-verify: OAuth run still on a login page (${pathOf(state.url)}) for ` +
8059
9554
  `${consecutiveOauthLoginPageRounds} rounds (incl. a reload) — the OAuth callback never persisted; bailing.`);
9555
+ await this.browser.dumpOAuthDebug(args.service, "callback-not-persisted").catch(() => { });
8060
9556
  throw new OAuthSessionNotPersistedError(`oauth_session_not_persisted: signed in to ${args.service} via OAuth but the page ` +
8061
9557
  `still presents a login screen (${pathOf(state.url)}) after ` +
8062
- `${consecutiveOauthLoginPageRounds} rounds — the OAuth callback never established a ` +
8063
- `session (anti-bot / IP rejection of the callback). Not a navigation bug; needs ` +
8064
- `residential egress or manual signup.`);
9558
+ `${consecutiveOauthLoginPageRounds} rounds — the OAuth callback was rejected at the ` +
9559
+ `automation/fingerprint layer. NOT an IP issue (FALSIFIED 2026-06-14: a clean ` +
9560
+ `residential IP fails this callback identically — see STATE.md), so residential ` +
9561
+ `egress does NOT fix it. Needs a fingerprint/automation fix or manual signup.`);
8065
9562
  }
8066
9563
  }
8067
9564
  else {
@@ -8199,6 +9696,10 @@ ${formatInventory(input.inventory)}`,
8199
9696
  state,
8200
9697
  inventory,
8201
9698
  observed: nextStep,
9699
+ // Fix C4 — stamp the backend that produced THIS round's plan
9700
+ // (planPostVerifyStep set these via callLLM just above).
9701
+ ...(this.lastResolvedModel !== undefined ? { resolved_model: this.lastResolvedModel } : {}),
9702
+ ...(this.lastResolvedProvider !== undefined ? { resolved_provider: this.lastResolvedProvider } : {}),
8202
9703
  });
8203
9704
  capturedRound += 1;
8204
9705
  // Per-round telemetry upload (rc.11). Mirrors the disk capture
@@ -8555,7 +10056,7 @@ ${formatInventory(input.inventory)}`,
8555
10056
  hint = undefined;
8556
10057
  continue;
8557
10058
  }
8558
- const fallback = pickStuckLoopFallbackUrl(state.url, triedFallbackUrls, args.service);
10059
+ const fallback = pickStuckLoopFallbackUrl(state.url, triedFallbackUrls, args.service, this.resolvedSignupUrl);
8559
10060
  if (fallback !== null) {
8560
10061
  triedFallbackUrls.add(fallback);
8561
10062
  args.steps.push(`Post-verify: stuck-loop detected ${stuckFiresAtUrl}x at ${state.url} — escalating to a hardcoded API-key URL: ${fallback}`);
@@ -8670,7 +10171,30 @@ ${formatInventory(input.inventory)}`,
8670
10171
  // candidate is exhausted, `done` is honored.
8671
10172
  const capturedCredCount = Object.keys(credentials).filter((k) => !NON_CREDENTIAL_KEYS.has(k)).length;
8672
10173
  if (capturedCredCount === 0 && prematureDoneFallbacks < MAX_PREMATURE_DONE_FALLBACKS) {
8673
- const fallback = pickStuckLoopFallbackUrl(state.url, triedFallbackUrls, args.service);
10174
+ // Prefer CLICKING a real API-keys nav link over guessing a URL.
10175
+ // The dashboard's own sidebar/menu link carries the correct href;
10176
+ // guessing /keys, /api-keys, /settings/api-keys 404s on services
10177
+ // that host keys at a non-standard path (unify-ai). Only when no
10178
+ // such link is in the DOM do we fall through to URL composition.
10179
+ const keysLink = findApiKeysNavLink(inventory, clickedKeysLinks);
10180
+ if (keysLink !== null) {
10181
+ prematureDoneFallbacks += 1;
10182
+ clickedKeysLinks.add(keysLink.selector);
10183
+ const label = (keysLink.visibleText ?? keysLink.ariaLabel ?? keysLink.href ?? keysLink.selector) || keysLink.selector;
10184
+ args.steps.push(`Post-verify: planner emitted done with no credential captured — ` +
10185
+ `clicking the in-page API-keys link "${label.slice(0, 60)}" ` +
10186
+ `(${keysLink.href ?? keysLink.selector}) before guessing a URL`);
10187
+ try {
10188
+ await this.browser.click(keysLink.selector);
10189
+ await this.browser.waitForInteractiveDom(5, 15_000);
10190
+ }
10191
+ catch (err) {
10192
+ args.steps.push(`Post-verify: API-keys link click failed (${err instanceof Error ? err.message : String(err)}) — continuing.`);
10193
+ }
10194
+ hint = undefined;
10195
+ continue;
10196
+ }
10197
+ const fallback = pickStuckLoopFallbackUrl(state.url, triedFallbackUrls, args.service, this.resolvedSignupUrl);
8674
10198
  if (fallback !== null) {
8675
10199
  prematureDoneFallbacks += 1;
8676
10200
  triedFallbackUrls.add(fallback);
@@ -9178,6 +10702,10 @@ ${formatInventory(input.inventory)}`,
9178
10702
  state: postState,
9179
10703
  inventory: postInventory,
9180
10704
  observed: syntheticExtract,
10705
+ // Fix C4 — attribute this synthetic round to the planner call
10706
+ // that drove us here (no LLM ran for this implicit extract).
10707
+ ...(this.lastResolvedModel !== undefined ? { resolved_model: this.lastResolvedModel } : {}),
10708
+ ...(this.lastResolvedProvider !== undefined ? { resolved_provider: this.lastResolvedProvider } : {}),
9181
10709
  });
9182
10710
  capturedRound += 1;
9183
10711
  if (this.roundUploader !== undefined) {
@@ -9334,12 +10862,83 @@ ${formatInventory(input.inventory)}`,
9334
10862
  // the F3 inventory by element type. Returns false when the page
9335
10863
  // isn't a login form.
9336
10864
  async loginWithCredentials(email, password, steps) {
9337
- const inv = await this.buildInventory(steps);
9338
- const emailEl = inv.find((e) => e.tag === "input" && e.type === "email") ??
9339
- inv.find((e) => e.tag === "input" && (e.type === "text" || e.type === null));
9340
- const pwEl = inv.find((e) => e.tag === "input" && e.type === "password");
10865
+ const findLoginFields = (inventory) => {
10866
+ const emailEl = inventory.find((e) => e.tag === "input" && e.type === "email") ??
10867
+ inventory.find((e) => e.tag === "input" && (e.type === "text" || e.type === null));
10868
+ const pwEl = inventory.find((e) => e.tag === "input" && e.type === "password");
10869
+ return { emailEl, pwEl };
10870
+ };
10871
+ let inv = await this.buildInventory(steps);
10872
+ let { emailEl, pwEl } = findLoginFields(inv);
10873
+ // Poll for the form to render after a click. SPA login forms reveal on a
10874
+ // VARIABLE delay — portkey's "Continue with work email" → password form was
10875
+ // a render race where a fixed wait passed one run and missed the next
10876
+ // (stochastic login). Re-reads up to maxAttempts times ~1.2s apart, mutating
10877
+ // inv/emailEl/pwEl, returning as soon as the wanted field(s) appear. Uses a
10878
+ // throwaway steps array so polling doesn't spam the trail.
10879
+ const pollForFields = async (maxAttempts, requirePassword) => {
10880
+ for (let i = 0; i < maxAttempts; i++) {
10881
+ await this.browser.wait(1.2);
10882
+ inv = await this.buildInventory([]);
10883
+ ({ emailEl, pwEl } = findLoginFields(inv));
10884
+ const done = requirePassword
10885
+ ? pwEl !== undefined
10886
+ : pwEl !== undefined || emailEl !== undefined;
10887
+ if (done)
10888
+ return;
10889
+ }
10890
+ };
10891
+ // Two-stage email login: many login pages render only provider buttons
10892
+ // ("Continue with Google / Microsoft / work email", SSO) and reveal the
10893
+ // email+password inputs ONLY after you click the email option. portkey
10894
+ // (MEASURED 2026-06-17): /login is button-only with "Continue with work
10895
+ // email". Click that affordance — NOT Google/Microsoft/SSO — then re-read.
9341
10896
  if (emailEl === undefined || pwEl === undefined) {
9342
- steps.push("Login: no email/password fields on the page — skipped.");
10897
+ const emailButton = inv.find((e) => {
10898
+ if (e.tag !== "button" && e.type !== "submit")
10899
+ return false;
10900
+ const t = `${e.visibleText ?? ""} ${e.ariaLabel ?? ""}`.toLowerCase();
10901
+ return (t.includes("email") &&
10902
+ /\b(continue|log ?in|sign ?in|use|with|password)\b/.test(t) &&
10903
+ !/google|microsoft|apple|github|\bsso\b|single sign/.test(t));
10904
+ });
10905
+ if (emailButton !== undefined) {
10906
+ // The continue button is frequently DISABLED until a Terms/consent
10907
+ // checkbox is ticked (portkey, MEASURED 2026-06-17: "Continue with
10908
+ // work email" stays inert until the TOS box is checked → the click
10909
+ // silently no-ops and the form never reveals). Tick required agreement
10910
+ // boxes first (skips marketing opt-ins; best-effort, never throws).
10911
+ const agreed = await this.browser.checkRequiredAgreementBoxes();
10912
+ if (agreed.length > 0) {
10913
+ steps.push(`Login: ticked terms/consent box(es) [${agreed.join(", ")}] to enable the button.`);
10914
+ await this.browser.wait(1);
10915
+ }
10916
+ steps.push(`Login: two-stage page — clicking "${(emailButton.visibleText ?? "email")
10917
+ .slice(0, 40)
10918
+ .trim()}" to reveal the email/password form.`);
10919
+ await this.browser.click(emailButton.selector);
10920
+ // Poll up to ~10s for the form to reveal (render race — see above).
10921
+ await pollForFields(8, false);
10922
+ }
10923
+ }
10924
+ // Progressive (email-first) login: some flows reveal ONLY the email field;
10925
+ // you enter it, click Continue, THEN the password field appears on the next
10926
+ // step. portkey (MEASURED 2026-06-17): "Continue with work email" → email
10927
+ // field → Continue → password. Fill the email, advance, and re-read.
10928
+ if (emailEl !== undefined && pwEl === undefined) {
10929
+ await this.browser.type(emailEl.selector, email).catch(() => undefined);
10930
+ const advance = inv.find((e) => e.type === "submit") ??
10931
+ inv.find((e) => (e.tag === "button" || e.type === "submit") &&
10932
+ /\b(continue|next|log ?in|sign ?in|submit)\b/i.test(`${e.visibleText ?? ""} ${e.ariaLabel ?? ""}`));
10933
+ if (advance !== undefined) {
10934
+ steps.push("Login: email-first flow — submitted the email, advancing to the password step.");
10935
+ await this.browser.clickSubmit(advance.selector).catch(() => undefined);
10936
+ // Poll up to ~10s for the password step to render (render race).
10937
+ await pollForFields(8, true);
10938
+ }
10939
+ }
10940
+ if (pwEl === undefined) {
10941
+ steps.push("Login: no password field reachable — skipped.");
9343
10942
  return false;
9344
10943
  }
9345
10944
  // Login submit: a submit-typed button, else one whose text reads
@@ -9350,7 +10949,10 @@ ${formatInventory(input.inventory)}`,
9350
10949
  buttons.find((e) => /\b(log ?in|sign ?in|continue|next|submit)\b/i.test(`${e.visibleText ?? ""} ${e.ariaLabel ?? ""}`)) ??
9351
10950
  buttons[0];
9352
10951
  try {
9353
- await this.browser.type(emailEl.selector, email);
10952
+ // The email may already have been entered on the prior step.
10953
+ if (emailEl !== undefined) {
10954
+ await this.browser.type(emailEl.selector, email);
10955
+ }
9354
10956
  await this.browser.type(pwEl.selector, password);
9355
10957
  steps.push("Login: filled the signup credentials");
9356
10958
  if (submitEl !== undefined) {
@@ -9611,6 +11213,11 @@ ${formatInventory(input.inventory)}${input.hint !== undefined ? `\n\nIMPORTANT
9611
11213
  // navigation-eval.md). The stall-detector + prior-action memory are the
9612
11214
  // escape from a deterministic loop.
9613
11215
  temperature: 0,
11216
+ // Fix C — pin a single model + provider + seed on the proxy path so
11217
+ // the same dashboard yields the same step regardless of which backend
11218
+ // OpenRouter would otherwise route to (the model/provider lottery
11219
+ // survives temperature 0).
11220
+ deterministic: true,
9614
11221
  parse: (raw) => {
9615
11222
  const step = parsePostVerifyStep(raw, allowed);
9616
11223
  // A `check` must land on a real checkbox/radio — the planner
@@ -9798,6 +11405,18 @@ ${formatInventory(input.inventory)}${input.hint !== undefined ? `\n\nIMPORTANT
9798
11405
  });
9799
11406
  }
9800
11407
  async extractCredentials() {
11408
+ // EXTRACTION_ENGINE (default-ON since 2026-06-15, strangler slice 4): route the
11409
+ // cross-pass accumulation + resolution through the pure extraction module
11410
+ // (extraction.ts). Flipped after pass-1 live validation (ipinfo → api_key) on
11411
+ // the dominant path; the truncated/clipboard path (pass 2) reuses the IDENTICAL
11412
+ // I/O as inline — only the unit-tested accumulation differs — and its
11413
+ // truncated-modal services (OpenRouter-class) are currently anti-bot-walled, so
11414
+ // its live blast radius is ~zero. The inline 5-pass body below is kept one cycle
11415
+ // as the explicit opt-out (EXTRACTION_ENGINE=0) and deleted next
11416
+ // (DESIGN-extraction-engine.md migration step 4).
11417
+ if (!/^(0|false|off|no)$/i.test(process.env.EXTRACTION_ENGINE ?? "")) {
11418
+ return this.extractCredentialsViaEngine();
11419
+ }
9801
11420
  // IMPORTANT: pull credentials from the *visible* page, not the raw
9802
11421
  // HTML. Reading from HTML matches anti-bot challenge JS (Cloudflare
9803
11422
  // Turnstile, hCaptcha) whose challenge tokens look like API keys to
@@ -9920,6 +11539,84 @@ ${formatInventory(input.inventory)}${input.hint !== undefined ? `\n\nIMPORTANT
9920
11539
  credentials.api_key = apiKey;
9921
11540
  return credentials;
9922
11541
  }
11542
+ // EXTRACTION_ENGINE path (strangler slice 4) — the same five-pass extraction as
11543
+ // extractCredentials, but the cross-pass accumulation (first full wins; first
11544
+ // truncated remembered) + final resolution go through the pure module
11545
+ // (extraction.ts). This method owns only the I/O + the per-candidate regex
11546
+ // classification. Faithful to the inline passes (incl. the subtlety that passes
11547
+ // 3 + 4 accept FULL hits only — they never record a truncated stub).
11548
+ async extractCredentialsViaEngine() {
11549
+ const curUrl = typeof this.browser.currentUrl === "function" ? this.browser.currentUrl() : "";
11550
+ if (typeof curUrl === "string" && isDocumentationUrl(curUrl))
11551
+ return {};
11552
+ let st = initialExtractionState();
11553
+ const classify = (text) => {
11554
+ const hit = extractApiKeyFromText(text);
11555
+ if (hit === null)
11556
+ return { kind: "none" };
11557
+ return isTruncatedCapture(text, hit) ? { kind: "truncated", value: hit } : { kind: "full", value: hit };
11558
+ };
11559
+ // Pass 1 — visible candidates (records truncated hits).
11560
+ for (const candidate of await this.browser.extractCredentialCandidates()) {
11561
+ st = accumulateCandidate(st, classify(candidate));
11562
+ if (hasFullHit(st))
11563
+ return resolveExtraction(st);
11564
+ }
11565
+ // Pass 1b — body text (records truncated hits).
11566
+ if (!hasFullHit(st)) {
11567
+ st = accumulateCandidate(st, classify(await this.browser.extractText()));
11568
+ if (hasFullHit(st))
11569
+ return resolveExtraction(st);
11570
+ }
11571
+ // Pass 2 — copy-button + clipboard recovery, only when a truncated stub was
11572
+ // seen. The copied value is accepted as a full hit directly (inline does the
11573
+ // same — no re-classification).
11574
+ if (!hasFullHit(st) && st.truncatedHit !== null) {
11575
+ const copied = await this.tryCopyButtonExtraction();
11576
+ if (copied !== null) {
11577
+ st = accumulateCandidate(st, { kind: "full", value: copied });
11578
+ if (hasFullHit(st))
11579
+ return resolveExtraction(st);
11580
+ }
11581
+ }
11582
+ // Pass 3 — hidden-input scan. FULL hits only (inline ignores truncated here).
11583
+ if (!hasFullHit(st)) {
11584
+ try {
11585
+ for (const value of await this.browser.extractAllInputValues()) {
11586
+ const c = classify(value);
11587
+ if (c.kind !== "full")
11588
+ continue;
11589
+ st = accumulateCandidate(st, c);
11590
+ if (hasFullHit(st))
11591
+ return resolveExtraction(st);
11592
+ }
11593
+ }
11594
+ catch {
11595
+ // non-fatal
11596
+ }
11597
+ }
11598
+ // Pass 4 — copy-button colocation. A bare UUID is accepted directly; otherwise
11599
+ // the normal extractor, FULL only (inline records no truncated here).
11600
+ if (!hasFullHit(st)) {
11601
+ try {
11602
+ const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
11603
+ for (const candidate of await this.browser.extractCredentialsNearCopyButtons()) {
11604
+ const c = UUID_RE.test(candidate)
11605
+ ? { kind: "full", value: candidate }
11606
+ : classify(candidate);
11607
+ if (c.kind !== "full")
11608
+ continue;
11609
+ st = accumulateCandidate(st, c);
11610
+ if (hasFullHit(st))
11611
+ return resolveExtraction(st);
11612
+ }
11613
+ }
11614
+ catch {
11615
+ // non-fatal
11616
+ }
11617
+ }
11618
+ return resolveExtraction(st);
11619
+ }
9923
11620
  // F10: click the page's Copy button (whose label typically reads
9924
11621
  // "Copy", "Copy key", "Copy secret") and extract the secret from
9925
11622
  // `navigator.clipboard.readText()`. Returns null on any failure —