npm - @trusty-squire/mcp - Versions diffs - 0.9.16 → 0.9.17-rc.1 - Mend

@trusty-squire/mcp 0.9.16 → 0.9.17-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/dist/bot/affordance-probe.d.ts +15 -0
package/dist/bot/affordance-probe.d.ts.map +1 -0
package/dist/bot/affordance-probe.js +63 -0
package/dist/bot/affordance-probe.js.map +1 -0
package/dist/bot/agent.d.ts +13 -1
package/dist/bot/agent.d.ts.map +1 -1
package/dist/bot/agent.js +560 -71
package/dist/bot/agent.js.map +1 -1
package/dist/bot/browser.d.ts +6 -0
package/dist/bot/browser.d.ts.map +1 -1
package/dist/bot/browser.js +151 -0
package/dist/bot/browser.js.map +1 -1
package/dist/bot/llm-client.d.ts +4 -0
package/dist/bot/llm-client.d.ts.map +1 -1
package/dist/bot/llm-client.js +14 -0
package/dist/bot/llm-client.js.map +1 -1
package/dist/bot/onboarding-capture.d.ts +4 -0
package/dist/bot/onboarding-capture.d.ts.map +1 -1
package/dist/bot/onboarding-capture.js +5 -0
package/dist/bot/onboarding-capture.js.map +1 -1
package/package.json +1 -1

package/dist/bot/agent.js CHANGED Viewed

@@ -169,6 +169,74 @@ export function isAtPaywall(text) {
     }
     return false;
 }
+// A service can complete the signup form / OAuth handshake and THEN drop the
+// account into a manual-approval gate — a waiting room, a waitlist, a
+// "request access / your account is pending approval / under review" screen —
+// instead of granting a dashboard + API key. Baseten is the field example:
+// the form submits, then a "waiting_room" / account-review screen appears and
+// no key is obtainable autonomously.
+//
+// This is NOT a captcha and NOT an anti-bot block — it's a service-side human
+// gate. Left undetected, the post-verify loop exhausts its budget and the run
+// gets mislabeled (oauth_onboarding_failed / a generic no-credentials miss),
+// which is misleading and can wrongly count toward skill demotion or send us
+// chasing a non-existent code bug. We classify it as `onboarding_blocked` —
+// the same terminal, human-pile, non-demoting status the billing wall uses —
+// so the loop routes it to the manual pile and never advances the demote
+// counter.
+//
+// Tuned for PRECISION over recall: every pattern requires explicit
+// account-review / waitlist / pending-approval phrasing. A marketing tile that
+// merely mentions "early access" as a feature must not trip it, so the verbs
+// are scoped to the gate's own phrasing (you ARE on the list / access IS
+// pending / the account IS under review).
+const ACCOUNT_REVIEW_GATE_PATTERNS = [
+    /\bwaiting\s+room\b/i,
+    /\b(?:join|on|added\s+to)\s+(?:the\s+|our\s+)?waitlist\b/i,
+    /\byou'?re\s+on\s+the\s+(?:list|waitlist)\b/i,
+    /\brequest\s+(?:early\s+)?access\b/i,
+    /\baccess\s+(?:is\s+)?pending\b/i,
+    /\b(?:your\s+)?account\s+is\s+pending\b/i,
+    /\bpending\s+approval\b/i,
+    /\baccount\s+(?:is\s+)?(?:currently\s+)?under\s+review\b/i,
+    /\byour\s+account\s+is\s+being\s+reviewed\b/i,
+    /\bwe'?ll\s+email\s+you\s+when\b/i,
+    /\bawaiting\s+(?:approval|access)\b/i,
+];
+// Exported for unit testing — the post-signup heuristic that distinguishes a
+// service-side manual-approval gate (waiting room / waitlist / pending review)
+// from a normal dashboard, signup form, or captcha page. Pure over page text.
+export function isAtAccountReviewGate(text) {
+    return ACCOUNT_REVIEW_GATE_PATTERNS.some((p) => p.test(text));
+}
+// Decide whether a no-credential form-fill outcome is a manual-review gate.
+// A verification timeout is the AUTHORITATIVE cause and must win: a pending
+// "check your email / we sent a code" page can read as a review gate to
+// isAtAccountReviewGate, so without this guard a verification_not_sent gets
+// mislabeled onboarding_blocked (the anthropic regression). Only when
+// verification did NOT fail is the review-gate text trusted. Pure, testable.
+export function isOnboardingReviewGate(verificationFailed, pageText) {
+    return verificationFailed === undefined && isAtAccountReviewGate(pageText);
+}
+// Closed / invite-only registration: the service does not accept new self-serve
+// signups at all (turbopuffer: "Sign-ups are closed"). Distinct from a review
+// gate (you signed up, awaiting approval) — here NO account can be created, so
+// the run is terminally unservable and the service should be dequeued, not
+// retried or mislabeled oauth_onboarding_failed (which implies a fixable nav
+// bug). Precision-tuned: requires explicit closed/disabled/invite-only phrasing
+// scoped to sign-up/registration, so a normal page mentioning "sign up" or an
+// "invite your team" feature doesn't trip it. Pure over page text.
+const SIGNUPS_CLOSED_PATTERNS = [
+    /\bsign[\s-]?ups?\s+(?:are|is)\s+(?:currently\s+)?(?:closed|disabled|paused|not\s+(?:open|available|being\s+accepted))\b/i,
+    /\b(?:we\s+are|we're)\s+not\s+(?:currently\s+)?accepting\s+(?:new\s+)?(?:sign[\s-]?ups|registrations|users|accounts)\b/i,
+    /\bregistration\s+(?:is\s+)?(?:currently\s+)?(?:closed|disabled)\b/i,
+    /\b(?:sign[\s-]?up|registration|access)\s+is\s+(?:by\s+)?invite[\s-]?only\b/i,
+    /\binvite[\s-]?only\s+(?:beta|access|signup|registration)\b/i,
+    /\brequest\s+an\s+invite\b/i,
+];
+export function isSignupsClosed(text) {
+    return SIGNUPS_CLOSED_PATTERNS.some((p) => p.test(text));
+}
 // S3: does this post-submit page text indicate the service genuinely
 // expects the user to confirm via email? Drives whether the bot polls the
 // full verification timeout or runs only a short probe. Exported so the
@@ -197,8 +265,9 @@ export class OAuthSessionNotPersistedError extends Error {
 // 0.8.2-rc.10 — common dashboard paths that vendors host their
 // per-account API key UI at. Ordered most-specific first so a
 // fallback navigate doesn't land short of the actual page. Returned
-// as an array of path-strings; the caller composes them onto the
-// origin of the currently-stuck URL and skips any already tried.
+// as an array of path-strings; the caller composes them onto the APP
+// origin (the signup/app URL the bot navigated to), NOT the auth/IdP
+// origin it may be stuck on post-OAuth, and skips any already tried.
 //
 // Patterns harvested from Anthropic (settings/keys), Sentry
 // (settings/account/api/auth-tokens), Neon (settings#api-keys),
@@ -418,33 +487,112 @@ export function findCreateKeyAffordance(inventory) {
     candidates.sort((a, b) => b.score - a.score);
     return candidates[0].el;
 }
+// An in-DOM nav link/affordance that points AT an API-keys / tokens page.
+// Distinct from findCreateKeyAffordance (the "create key" button): this finds
+// the LINK that navigates TO the keys page, so the bot can click the real
+// target — whose href is the correct path — instead of GUESSING a URL from a
+// fixed convention list (which 404s whenever a service hosts keys at a
+// non-standard path: unify-ai's keys aren't at /keys, /api-keys, or
+// /settings/api-keys, all of which 404). A human clicks the sidebar link; so
+// should the bot. Exported, pure (operates on the inventory shape only).
+const API_KEYS_HREF = /\/(?:api[-_]?keys?|api[-_]?tokens?|access[-_]?tokens?|auth[-_]?tokens?|secret[-_]?keys?|personal[-_]?access[-_]?tokens?|developers?|keys?|tokens?)(?:[/?#]|$)/i;
+const API_KEYS_TEXT = /\b(?:api|access|secret|auth|personal\s+access)\s*(?:keys?|tokens?)\b/i;
+export function findApiKeysNavLink(inventory, alreadyClicked = new Set()) {
+    const candidates = [];
+    for (const el of inventory) {
+        const isClickable = el.tag === "a" ||
+            el.tag === "button" ||
+            el.role === "link" ||
+            el.role === "button";
+        if (!isClickable)
+            continue;
+        if (el.visible === false)
+            continue;
+        if (alreadyClicked.has(el.selector))
+            continue;
+        const href = el.href ?? "";
+        const text = [el.visibleText, el.ariaLabel, el.title, el.labelText, el.iconLabel]
+            .filter((s) => s !== null && s !== undefined)
+            .join(" ")
+            .trim();
+        // The loose href segments (keys?/tokens?/developers?) are only trusted on
+        // an actual anchor href, where they're a structured path, not free text.
+        const hrefHit = href.length > 0 && API_KEYS_HREF.test(href);
+        const textHit = API_KEYS_TEXT.test(text);
+        if (!hrefHit && !textHit)
+            continue;
+        // A "create API key" control is a different affordance (it opens a
+        // create flow / modal, it doesn't navigate to the listing). Skip it here
+        // UNLESS it's a real anchor with a keys href (then it's a nav link that
+        // merely happens to read "New API key").
+        if (CREATE_KEY_PHRASE.test(text) && !(el.tag === "a" && hrefHit))
+            continue;
+        let score = 0;
+        if (hrefHit)
+            score += 4; // a real, navigable target beats a text guess
+        if (/\bapi\s*(?:keys?|tokens?)\b/i.test(text))
+            score += 2;
+        else if (textHit)
+            score += 1;
+        if (el.tag === "a")
+            score += 1; // prefer anchors over role=button
+        if (el.inViewport === true)
+            score += 1;
+        candidates.push({ el, score });
+    }
+    if (candidates.length === 0)
+        return null;
+    candidates.sort((a, b) => b.score - a.score);
+    return candidates[0].el;
+}
 // Pick the next fallback URL to try, keyed against the origin of the
 // currently-stuck URL. The curated SERVICE_KEYS_PATHS for the run's
 // service (when its host matches the stuck origin) are tried FIRST,
 // then the generic STUCK_LOOP_FALLBACK_PATHS. Returns null when every
 // path has already been attempted. Exported for unit tests.
-export function pickStuckLoopFallbackUrl(currentUrl, alreadyTried, service) {
-    let parsed;
+export function pickStuckLoopFallbackUrl(currentUrl, alreadyTried, service, appUrl) {
+    let parsedCurrent;
     try {
-        parsed = new URL(currentUrl);
+        parsedCurrent = new URL(currentUrl);
     }
     catch {
         return null;
     }
+    // Compose key-path guesses onto the APP origin, NOT the origin of the
+    // currently-stuck URL. After OAuth the stuck URL is the identity-provider
+    // subdomain (auth.lumalabs.ai, accounts.<svc>, login.<svc>, the IdP) — which
+    // has no settings/keys pages, so "${authOrigin}/settings/keys" 404s by
+    // construction. The keys live on the app host (lumalabs.ai). `appUrl` is the
+    // signup/app URL the bot actually navigated to (this.resolvedSignupUrl), so
+    // its origin is the right host to guess against. Fall back to the stuck
+    // origin only when no usable app URL is known.
+    let composeBase = parsedCurrent;
+    if (appUrl !== undefined) {
+        try {
+            const parsedApp = new URL(appUrl);
+            if ((parsedApp.protocol === "http:" || parsedApp.protocol === "https:") &&
+                !isGoogleSearchUrl(appUrl)) {
+                composeBase = parsedApp;
+            }
+        }
+        catch {
+            // keep the stuck origin
+        }
+    }
     // about:blank / data: / chrome-error pages have an opaque origin that
     // serializes to the literal string "null" — building "${origin}${path}"
     // then yields an unnavigable "null/settings/keys". Only compose
     // fallbacks against a real http(s) origin.
-    if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
+    if (composeBase.protocol !== "http:" && composeBase.protocol !== "https:") {
         return null;
     }
-    const origin = parsed.origin;
-    // Skip a candidate when the current URL's path ALREADY matches it
-    // (case-insensitive, trailing-slash tolerant). The planner is stuck
-    // ON the page the candidate points to — navigating to the same URL
-    // again won't break the cycle, only a different path will.
-    const currentPath = parsed.pathname.replace(/\/+$/, "").toLowerCase();
-    // Compose curated per-service paths first, but only when the stuck
+    const origin = composeBase.origin;
+    // Skip a candidate when it resolves to the exact URL we're already stuck
+    // on (full origin+path, trailing-slash/case tolerant) — re-navigating
+    // there won't break the cycle. Compared on the full URL now that the
+    // compose origin can differ from the stuck origin.
+    const currentFull = `${parsedCurrent.origin}${parsedCurrent.pathname}`.replace(/\/+$/, "").toLowerCase();
+    // Compose curated per-service paths first, but only when the COMPOSE
     // origin's host actually belongs to the named service. The slug is
     // a substring of the host for the vendors we curate (groq →
     // console.groq.com, launchdarkly → app.launchdarkly.com, …); this
@@ -454,7 +602,7 @@ export function pickStuckLoopFallbackUrl(currentUrl, alreadyTried, service) {
     const slug = service !== undefined ? serviceSlug(service) : "";
     const curated = slug !== "" &&
         SERVICE_KEYS_PATHS[slug] !== undefined &&
-        parsed.hostname.toLowerCase().includes(slug)
+        composeBase.hostname.toLowerCase().includes(slug)
         ? SERVICE_KEYS_PATHS[slug]
         : [];
     // Curated paths lead; the generic list follows. De-dup so a path that
@@ -468,7 +616,7 @@ export function pickStuckLoopFallbackUrl(currentUrl, alreadyTried, service) {
         const candidate = `${origin}${path}`;
         if (alreadyTried.has(candidate))
             continue;
-        if (candidatePath === currentPath)
+        if (`${origin}${path}`.replace(/\/+$/, "").toLowerCase() === currentFull)
             continue;
         return candidate;
     }
@@ -3305,6 +3453,50 @@ export function isLoadingShellText(text) {
     // forever, so it is not a signal.
     return /\bconnecting\b|\bloading\b|please wait|getting things ready|initiali[sz]ing/i.test(text);
 }
+// The interactive-element count at/above which a page is "hydrated by
+// definition" — a rendered dashboard/form a user can act on — so a stray
+// "loading"/"please wait" word in its (visible) text is NOT a hydration
+// shell. WHY 5: a genuine loading shell paints zero or a handful of chrome
+// affordances (a logo link, maybe a skip-link); a real authenticated surface
+// (nav + content + an "API Keys"/"Create" affordance) clears 5 trivially.
+// Field evidence: luma-ai/unify-ai/sambanova/fireworks-ai/defang carried
+// 10–95 visible interactive elements yet were flagged a shell EVERY round —
+// any threshold from ~5 up vetoes all of them while still catching the true
+// 0-to-few-element shell (northflank). Reuses the same minElements default as
+// waitForInteractiveDom (5) so the negative gate and the positive readiness
+// wait agree on what "hydrated" means.
+export const SHELL_MAX_ELEMENTS = 5;
+// The authoritative loading-shell decision: a page is a hydration shell only
+// when loading-text is present in its VISIBLE text AND it has fewer than
+// SHELL_MAX_ELEMENTS interactive elements. Splitting the two conditions kills
+// the dominant false positive two ways at once:
+//   1. visibleText (innerText) drops hidden skeleton/RSC "loading" strings a
+//      raw textContent read picked up;
+//   2. the inventory veto makes the gate un-fireable on a hydrated page
+//      regardless of any residual stray "loading" word.
+// Pure + exported for unit tests. The text predicate stays isLoadingShellText
+// (still used where only text is on hand); this is the call-site gate where
+// both signals are available.
+export function isLoadingShell(visibleText, inventoryCount) {
+    if (inventoryCount >= SHELL_MAX_ELEMENTS)
+        return false;
+    return isLoadingShellText(visibleText);
+}
+// Thrown from postVerifyLoop when a post-OAuth/post-verify SPA presents a
+// genuine loading shell that never hydrates within the bounded budget (and a
+// navigate-to-root retry didn't unstick it). Surfaced as the terminal status
+// `spa_never_hydrated`. classifyFailure() (skill-schema failure-taxonomy)
+// has no entry for this kind, so it falls to the deliberate transient default
+// — a non-demoting outcome (a never-hydrating route is environmental/transient,
+// not skill rot), and no new exported skill-schema symbol is needed (avoids
+// the published-dep-skew trap). The leading token before ':' is what
+// classifyFailure keys on, so the message MUST start with the bare kind.
+export class SpaNeverHydratedError extends Error {
+    constructor(message) {
+        super(message);
+        this.name = "SpaNeverHydratedError";
+    }
+}
 // Transient "the session is being established RIGHT NOW" copy. MEASURED on
 // groq (Stytch B2B): after the OAuth callback, /authenticate shows
 // "Logging in…" then "Creating your organization…" for ~5-7s of async
@@ -3348,6 +3540,12 @@ export class SignupAgent {
     // backends_used[i] is the .name string of the LLMClient that produced
     // the i-th reply this run.
     backendsUsed = [];
+    // Fix C4 — the model/provider the backend actually served on the most
+    // recent LLM call, captured per round. callLLM stamps these after every
+    // call; the capture sites read them when dumping a round. Undefined
+    // until the first call (or when the backend doesn't report a model).
+    lastResolvedModel;
+    lastResolvedProvider;
     llmPair;
     // Captcha encounter state for the current run. Updated by the
     // pre/post-submit/re-plan captcha gates in signup(); read by the
@@ -3355,6 +3553,13 @@ export class SignupAgent {
     // because a "blocked" outcome is more diagnostic than an earlier
     // "solved" one and we always want the failure mode in the result.
     captchaEncounter = undefined;
+    // Sticky "this run is on the email path" flag. Set when OAuth turns out to be
+    // login-only (a new identity has no account — Clerk's form_identifier_not_found)
+    // and we fall back to email signup. Without it, the dispatch loop re-runs the
+    // OAuth-first scan after the re-route and re-clicks Google → loops forever
+    // (the cartesia oauth_session_not_persisted bug). Honored by
+    // resolveOAuthCandidates; reset at the start of each signup().
+    committedToEmailPath = false;
     // Invisible-captcha presence for the current run. Cloudflare Turnstile
     // and reCAPTCHA-v3 are score-based: a HIGH score passes silently with no
     // visible widget to "solve", so the visible-gate path above records
@@ -3688,10 +3893,23 @@ export class SignupAgent {
         // F14 — selectors the planner clicked WITHOUT advancing the page.
         // Each no-progress plan records its click selectors here; the next
         // plan that picks ONLY selectors in this set is failed as stuck
-        // instead of looping. Cleared on any progress (fill action). The
-        // Railway run that motivated F14 spun the same footer "Email" link
-        // 5 times before timing out; this loop now bails after 2.
+        // instead of looping. Cleared on ANY real progress between two
+        // clicks of the same selector — a fill/select/check action OR a
+        // page change (inventory/url moved). The Railway run that motivated
+        // F14 spun the same footer "Email" link 5 times before timing out;
+        // this loop now bails after 2.
         let lastNoProgressClickSelectors = new Set();
+        // Page-state fingerprint from the END of the previous round, used to
+        // decide whether the page actually moved between rounds. A
+        // "fill field → submit → (validation error) → fix field → submit
+        // again" cycle is legitimate progress, NOT a loop: kinde's post-OAuth
+        // register form has a globally-unique "domain" field, so the first
+        // guess collides ("taken") and the bot must edit the field and
+        // re-click the SAME "Next" button. Without this, re-clicking the same
+        // selector after a genuine field edit (or any inventory/url change)
+        // false-bailed as planner_loop even though the intervening fill was
+        // real progress. (MEASURED 2026-06-13, kinde, terminal_round 3.)
+        let lastRoundPageSig = null;
         // rc.31 — once the bot has explicitly clicked an email-flow
         // button (e.g. Railway's "Log in using email" two-stage chooser),
         // stay on the email path. Without this, the auto-OAuth-first
@@ -4057,16 +4275,40 @@ export class SignupAgent {
                 steps.push("Form-fill planner described a logged-in product/billing page (not a signup form) — pivoting to post-verify navigation");
                 return { kind: "already_oauth" };
             }
+            // The page moved since the previous round if the URL changed or the
+            // set of interactive selectors changed (a field gained/lost, a
+            // validation message toggled an element, a wizard step advanced).
+            // ANY such change means whatever the planner did last round was real
+            // progress — clear the no-progress memory so a re-click of a
+            // previously-"dead" selector on the now-changed page isn't judged a
+            // loop. This is the unique-value-retry case (kinde domain field):
+            // edit field → page re-renders → re-click "Next" is legitimate.
+            const pageSig = state.url +
+                "§" +
+                inventory
+                    .map((e) => e.selector)
+                    .sort()
+                    .join("|");
+            if (lastRoundPageSig !== null && pageSig !== lastRoundPageSig) {
+                lastNoProgressClickSelectors = new Set();
+            }
+            lastRoundPageSig = pageSig;
             // F14 — stuck-detection: if the plan picks ONLY click selectors
             // we already tried in the previous round without page progress,
             // it's a planner loop. Fail planning_failed with the offending
             // selector(s) so the operator sees what stalled. Doesn't fire
             // when the plan adds at least one new selector (legitimate
-            // exploration). Doesn't fire on fill plans (forward progress).
+            // exploration). Doesn't fire on fill plans (forward progress),
+            // nor on a plan that ALSO edits a field this round (a fill/check
+            // alongside the re-click is real progress — kinde's "tick the
+            // required box + re-click Next" advances the form even though the
+            // Next selector repeats).
             const planClickSelectors = plan.actions
                 .filter((a) => a.kind === "click")
                 .map((a) => a.selector);
-            if (planClickSelectors.length > 0 &&
+            const planEditsAField = plan.actions.some((a) => a.kind === "fill" || a.kind === "check");
+            if (!planEditsAField &&
+                planClickSelectors.length > 0 &&
                 lastNoProgressClickSelectors.size > 0 &&
                 planClickSelectors.every((s) => lastNoProgressClickSelectors.has(s))) {
                 return {
@@ -4120,6 +4362,16 @@ export class SignupAgent {
             // static page won't help, so a second consecutive empty plan is
             // a dead end. (The 0.1.12 loop spun this 4x on Axiom.)
             const hadFill = plan.actions.some((a) => a.kind === "fill");
+            // A check is ALSO a field edit = real progress, even though (unlike
+            // a fill) it doesn't promote the plan to the submit path below.
+            // (The form-fill plan vocabulary is fill/check/click — `select`
+            // belongs to the post-verify loop.) Treat a check as progress for
+            // the no-progress tracker only: a plan that ticked a box advanced
+            // the form, so its click selectors must NOT be recorded as "dead"
+            // (and any prior dead record is cleared). Without this, a "click
+            // Next (no advance) → tick a required box + re-click Next" cycle
+            // false-bailed as a loop even though the check was progress.
+            const hadFieldEdit = plan.actions.some((a) => a.kind === "fill" || a.kind === "check");
             if (!hadFill) {
                 if (plan.actions.length === 0) {
                     emptyPlans += 1;
@@ -4142,8 +4394,12 @@ export class SignupAgent {
                 // F14 — record the click selectors that didn't advance the
                 // page. The next plan's stuck-detection check (above) bails
                 // if it picks the same ones again. Hint also tells the
-                // planner which selectors NOT to re-pick.
-                lastNoProgressClickSelectors = new Set(planClickSelectors);
+                // planner which selectors NOT to re-pick. A plan that ALSO made
+                // a field edit (select/check) made real progress, so clear the
+                // tracker instead of recording its clicks as dead.
+                lastNoProgressClickSelectors = hadFieldEdit
+                    ? new Set()
+                    : new Set(planClickSelectors);
                 const avoidHint = planClickSelectors.length > 0
                     ? ` AVOID these selectors — they were clicked but the page did NOT advance: ${planClickSelectors.map((s) => JSON.stringify(s)).join(", ")}.`
                     : "";
@@ -4268,8 +4524,30 @@ export class SignupAgent {
             // the next planner iteration handles SPA settle.
             await this.browser.wait(2);
             const postGate = await this.runCaptchaGate("Post-submit", steps);
-            if (postGate.blocked)
+            if (postGate.blocked) {
+                // A managed/invisible Turnstile (Clerk's Smart CAPTCHA) resolves
+                // SERVER-SIDE: the submit can succeed — account created, verification
+                // email sent — even though our client-side token poll timed out.
+                // cartesia PROVED this: it emailed a verification code AFTER the bot had
+                // bailed captcha_blocked. The ground truth of "did the submit go
+                // through" is the INBOX, not the client token. So for a POST-submit
+                // Turnstile with an inbox available, don't hard-bail: proceed to the
+                // verification step and let the inbox poll arbitrate — a code arriving
+                // proves the managed Turnstile passed (→ completes); no code surfaces
+                // an honest verification_not_sent rather than a false captcha_blocked.
+                // A genuine pre-submit gate (no inbox, or a non-Turnstile challenge)
+                // still bails captcha_blocked.
+                if (postGate.kind === "turnstile" && task.inbox !== undefined) {
+                    steps.push("Post-submit Turnstile token didn't populate — but a managed Turnstile resolves " +
+                        "server-side, so the submit may have gone through. Proceeding to verification; " +
+                        "the inbox poll arbitrates (a code = submit succeeded).");
+                    // Don't let the recorded block short-circuit later gates / the result.
+                    this.captchaEncounter = undefined;
+                    await this.captureSignupFormRounds(task.service, plan, inventory, fillValues);
+                    return { kind: "submitted" };
+                }
                 return { kind: "captcha_blocked", captchaKind: postGate.kind };
+            }
             if (postGate.found && postGate.solved) {
                 // Re-click submit so the populated token ships with the form.
                 try {
@@ -4329,6 +4607,11 @@ export class SignupAgent {
                     state,
                     inventory,
                     observed,
+                    // Fix C4 — the form-plan's backend (planSignupForm ran before
+                    // this synthetic preamble capture, so lastResolved* still reflect
+                    // it). These preamble rounds replay the one plan; one backend.
+                    ...(this.lastResolvedModel !== undefined ? { resolved_model: this.lastResolvedModel } : {}),
+                    ...(this.lastResolvedProvider !== undefined ? { resolved_provider: this.lastResolvedProvider } : {}),
                 });
                 this.captureChainRound += 1;
             };
@@ -4570,8 +4853,10 @@ export class SignupAgent {
         return [...new Set([...fromMarker, ...live])];
     }
     async resolveOAuthCandidates(task, steps) {
-        if (task.forceForm === true) {
-            steps.push("Force-form: OAuth-first scan suppressed — taking the email/password path");
+        if (task.forceForm === true || this.committedToEmailPath) {
+            steps.push(this.committedToEmailPath
+                ? "Committed to email path (OAuth was login-only) — OAuth-first scan suppressed"
+                : "Force-form: OAuth-first scan suppressed — taking the email/password path");
             return [];
         }
         const ordered = orderOAuthCandidates(task.oauthProvider, await this.effectiveLoggedInProviders());
@@ -4740,9 +5025,14 @@ export class SignupAgent {
                 user: args.userBlocks,
                 max_tokens: args.maxTokens,
                 ...(args.temperature !== undefined ? { temperature: args.temperature } : {}),
+                ...(args.deterministic === true ? { deterministic: true } : {}),
             });
             this.llmCallCount += 1;
             this.backendsUsed.push(resp.backend);
+            // Fix C4 — remember the served model/provider so the capture sites
+            // can stamp this round with what actually produced the plan.
+            this.lastResolvedModel = resp.resolved_model;
+            this.lastResolvedProvider = resp.resolved_provider;
             return resp.text;
         };
         const primaryRaw = await callOne(this.llmPair.primary);
@@ -4825,6 +5115,8 @@ export class SignupAgent {
         // (Google number-match etc.). Without it, the run still works —
         // steps are just only visible in the final result.
         const steps = task.stepsSink ?? [];
+        // Fresh per-run: don't let a prior run's email-path commitment leak.
+        this.committedToEmailPath = false;
         // Stash the service name so the diagnostic uploader (called from
         // deep inside postVerifyLoop after a failed extract) can label
         // the snapshot without us threading task through every method.
@@ -5318,6 +5610,10 @@ export class SignupAgent {
                             // /signup form), fill it IN PLACE — re-navigating to task.signupUrl
                             // could bounce back to the demo. Otherwise re-navigate (the
                             // login-only / no-account case left us on a /login page).
+                            // OAuth was login-only (no account for this identity). Commit to the
+                            // email path for the rest of the run so the dispatch loop's
+                            // OAuth-first scan doesn't re-click Google and loop.
+                            this.committedToEmailPath = true;
                             const onSignupFormHtml = (await this.browser.getState().catch(() => null))?.html ?? "";
                             if (classifySignupHtml(onSignupFormHtml) === "signup") {
                                 steps.push(`OAuth recovery already on a signup form ` +
@@ -5596,6 +5892,43 @@ export class SignupAgent {
                     ...this.resultTail(),
                 };
             }
+            // Before the generic no-credentials miss: a service that completed the
+            // signup form and then dropped the account into a manual-approval gate
+            // (waiting room / waitlist / pending review). Same terminal, non-demoting
+            // onboarding_blocked status the OAuth path uses — there's no key to reach
+            // until a human approves the account, so don't surface it as a generic
+            // failure (which can wrongly chase a code bug) or punish a skill for it.
+            //
+            // ONLY when verification did NOT time out. A pending email-verification
+            // page ("check your email", "we sent a code") can read as a review gate
+            // to the classifier, but the authoritative cause there is the missing
+            // mail (verification_not_sent) — anthropic mislabeled as onboarding_blocked
+            // exactly this way. If we were waiting on an email that never came, that
+            // is the failure; don't reinterpret it as a manual-review gate.
+            const reviewGateText = verificationFailed === undefined ? await this.browser.extractText().catch(() => "") : "";
+            // Closed / invite-only registration takes precedence over the review-gate
+            // and the generic miss — no account can be created, so it's terminally
+            // unservable (dequeue), not a fixable nav bug. Checked only when
+            // verification didn't time out (same reasoning as the review gate).
+            if (verificationFailed === undefined && isSignupsClosed(reviewGateText)) {
+                return {
+                    success: false,
+                    error: `signups_closed: ${task.service} is not accepting new self-serve sign-ups ` +
+                        `(closed / invite-only registration) — no account can be created. Dequeue or sign up manually once open.`,
+                    steps,
+                    ...this.resultTail(),
+                };
+            }
+            if (isOnboardingReviewGate(verificationFailed, reviewGateText)) {
+                return {
+                    success: false,
+                    error: `onboarding_blocked: ${task.service} put the account into a manual review / ` +
+                        `waitlist gate after signup — no API key is obtainable until a human approves ` +
+                        `the account. Finish the signup manually once access is granted.`,
+                    steps,
+                    ...this.resultTail(),
+                };
+            }
             return {
                 success: false,
                 error: verificationFailed ?? "Could not find credentials on page or via email",
@@ -6348,16 +6681,36 @@ export class SignupAgent {
         // non-auth path here and is left alone.
         if (isSignupOrLoginRoute(this.browser.currentUrl()) &&
             !isOAuthProviderHost(this.browser.currentUrl())) {
-            const root = originRoot(this.browser.currentUrl());
-            if (root !== null) {
-                steps.push(`OAuth: post-auth landing is a signup/login route (${pathOf(this.browser.currentUrl())}) — ` +
-                    `navigating to the app root (${root}) so the service routes us to the dashboard.`);
-                try {
-                    await this.browser.goto(root);
-                    await this.browser.wait(2);
-                }
-                catch {
-                    // navigation hiccup — the post-verify loop re-reads regardless.
+            // Clerk callback: don't immediately navigate away. On a Clerk combined
+            // sign-in/sign-up flow a new-user OAuth completes the account via a
+            // client-side sign-up transfer that takes a beat AFTER the callback lands;
+            // navigating to root unmounts Clerk's JS and interrupts it (the bug behind
+            // the cartesia/braintrust "oauth_session_not_persisted" cluster — proven
+            // not IP). We can't drive the transfer via window.Clerk (patchright's
+            // isolated world hides it), so instead give Clerk's own JS time and detect
+            // success via cookies (world-agnostic). If a session appears, we're signed
+            // in — skip the navigate-away.
+            const onClerkCallback = /sso-callback|\/sso\b/i.test(this.browser.currentUrl());
+            let clerkSignedIn = false;
+            if (onClerkCallback) {
+                clerkSignedIn = await this.browser.waitForClerkSession(12000).catch(() => false);
+                steps.push(`OAuth: Clerk callback — waited for session establish → ${clerkSignedIn ? "signed in" : "no session (likely login-only OAuth / needs email signup)"}`);
+            }
+            if (clerkSignedIn) {
+                await this.browser.wait(2);
+            }
+            else {
+                const root = originRoot(this.browser.currentUrl());
+                if (root !== null) {
+                    steps.push(`OAuth: post-auth landing is a signup/login route (${pathOf(this.browser.currentUrl())}) — ` +
+                        `navigating to the app root (${root}) so the service routes us to the dashboard.`);
+                    try {
+                        await this.browser.goto(root);
+                        await this.browser.wait(2);
+                    }
+                    catch {
+                        // navigation hiccup — the post-verify loop re-reads regardless.
+                    }
                 }
             }
         }
@@ -6530,6 +6883,9 @@ export class SignupAgent {
             // oauth_session_not_persisted and abort. The account simply needs
             // creating via email, so re-route to form-fill instead of bailing.
             if (detectGoogleNoAccount(gateState.url, gateText)) {
+                // Commit to email for the rest of the run — OAuth is login-only here, so
+                // the OAuth-first scan must not re-fire after the form-fill re-route.
+                this.committedToEmailPath = true;
                 steps.push(`OAuth: ${provider.label} sign-in succeeded but ${task.service} has no account for ` +
                     `this identity (login-only OAuth, ${pathOf(gateState.url)}) — abandoning OAuth and ` +
                     `falling back to email/password signup to create the account.`);
@@ -6720,6 +7076,19 @@ export class SignupAgent {
         const paywallCheckText = this.lastPostVerifyDoneReason !== null
             ? `${finalText}\n${this.lastPostVerifyDoneReason}`
             : finalText;
+        // Closed / invite-only registration — no account can be created at all
+        // (turbopuffer: "Sign-ups are closed"). Terminally unservable; label it
+        // honestly so the operator dequeues rather than seeing a misleading
+        // oauth_onboarding_failed that implies a fixable nav bug.
+        if (isSignupsClosed(paywallCheckText)) {
+            return {
+                success: false,
+                error: `signups_closed: ${task.service} is not accepting new self-serve sign-ups ` +
+                    `(closed / invite-only registration) — no account can be created. Dequeue or sign up manually once open.`,
+                steps,
+                ...this.resultTail(),
+            };
+        }
         if (isAtPaywall(paywallCheckText)) {
             return {
                 success: false,
@@ -6729,6 +7098,22 @@ export class SignupAgent {
                 ...this.resultTail(),
             };
         }
+        // Service-side manual-approval gate (waiting room / waitlist / account
+        // pending review). The OAuth handshake succeeded but the service won't
+        // grant a key until a human approves the account — there is no key to
+        // reach autonomously. Same terminal onboarding_blocked status as the
+        // billing wall so it's a non-demoting human-pile outcome, not a
+        // mislabeled oauth_onboarding_failed that wrongly implies a code bug.
+        if (isAtAccountReviewGate(paywallCheckText)) {
+            return {
+                success: false,
+                error: `onboarding_blocked: ${task.service} put the account into a manual review / ` +
+                    `waitlist gate after signup — no API key is obtainable until a human approves ` +
+                    `the account. Finish the signup manually once access is granted.`,
+                steps,
+                ...this.resultTail(),
+            };
+        }
         // rc.39 — anti-bot interstitial that survived the post-OAuth
         // landing. Turso's GitHub SSO callback runs a Cloudflare check
         // that never clears for our Chromium fingerprint; the planner's
@@ -7009,6 +7394,9 @@ ${formatInventory(input.inventory)}`,
             // Deterministic form-fill picks (same rationale as the post-verify
             // planner — D2). Removes a run-to-run flakiness source.
             temperature: 0,
+            // Fix C — pin a single model + provider + seed on the proxy path.
+            // temperature 0 alone leaves the model/provider lottery in play.
+            deterministic: true,
             parse: (raw) => parseSignupPlan(raw, allowed),
         });
     }
@@ -7422,7 +7810,7 @@ ${formatInventory(input.inventory)}`,
             catch {
                 break;
             }
-            const fallback = pickStuckLoopFallbackUrl(currentUrl, visitedKeysUrls);
+            const fallback = pickStuckLoopFallbackUrl(currentUrl, visitedKeysUrls, undefined, this.resolvedSignupUrl);
             if (fallback === null)
                 break;
             visitedKeysUrls.add(fallback);
@@ -7508,6 +7896,15 @@ ${formatInventory(input.inventory)}`,
         // the dashboard for those; a genuine callback rejection stays on login
         // even after reload, so this never masks a real wall.
         let oauthBounceReloadTried = false;
+        // Consecutive rounds the post-verify page read as a genuine loading shell
+        // (visible loading-text AND a sub-threshold inventory). A real SPA
+        // hydrates within the bounded per-round wait, so a streak means the route
+        // never paints content — burn a navigate-to-root retry, then bail
+        // truthfully rather than re-running the wait every round to run_timeout.
+        // Reset on any non-shell round. Mirrors the consecutiveOauthLoginPageRounds
+        // / oauthBounceReloadTried escape used for the stuck-login case.
+        let shellStreak = 0;
+        let shellRootNavTried = false;
         let planFailures = 0;
         // 0.8.2-rc.6 — separate counter for upstream-blip retries. Doesn't
         // gate planFailures (so a transient 502 won't push us into the
@@ -7637,6 +8034,9 @@ ${formatInventory(input.inventory)}`,
         let stuckFiresAtUrl = 0;
         let lastStuckFireUrl = null;
         const triedFallbackUrls = new Set();
+        // Selectors of API-keys nav links already clicked, so the
+        // click-the-real-link escalation doesn't re-click the same link.
+        const clickedKeysLinks = new Set();
         // Premature-done guard budget. When the planner gives up (`done`)
         // with zero credentials captured, we navigate to an unvisited
         // canonical keys URL and re-plan — bounded so a service that
@@ -7872,47 +8272,98 @@ ${formatInventory(input.inventory)}`,
             // SPA hydration guard. A post-OAuth dashboard (northflank's
             // /settings/access-tokens, PostHog) can render a "Connecting"/loading
             // shell while its JS bundle + websocket finish — slow over a
-            // residential tunnel. The shell often carries a stray element or two
-            // (a logo link, the <noscript>), so gating on an EMPTY inventory
-            // misses it; the loading-shell TEXT is the authoritative "not yet
-            // rendered" signal. Wait while that text persists, then proceed with
-            // whatever's there (an honest "still a shell" beats a premature done —
-            // and if the SPA never hydrates, e.g. a blocked websocket, the bound
-            // keeps us from hanging).
+            // residential tunnel. We gate on POSITIVE readiness — the instant the
+            // page has SHELL_MAX_ELEMENTS visible interactive elements it is
+            // hydrated by definition and we proceed — rather than looping on the
+            // negative "text still says loading" signal. waitForInteractiveDom
+            // returns the moment that count is met (or after the budget), so a fast
+            // page costs ~0 and a slow one waits exactly as long as needed. This is
+            // the fix for the dominant false positive: a fully-rendered dashboard
+            // whose DOM merely CONTAINS a hidden "loading…"/"please wait 30
+            // seconds…" string no longer spins the wait every round to run_timeout.
             //
             // Budget = 6x3s = 18s. MEASURED: a dashboard SPA gated on a websocket
             // (northflank's wss://platform.northflank.com/websocket) hydrates in
-            // ~12-15s over the tunnel. A larger budget BACKFIRES on a page that
-            // will NEVER hydrate (e.g. an authed user stranded on /signup): the
-            // wait re-runs every round and burns the 600s run cap. The escape for
-            // a never-hydrating route is navigate-to-root post-OAuth, not a longer
-            // wait here.
+            // ~12-15s over the tunnel.
             //
             // ADAPTIVE exception (MEASURED 2026-06-04, clerk): an OAuth/SSO
             // CALLBACK route does a token exchange that renders even slower than a
             // plain dashboard — clerk's `/sign-in/sso-callback` outlasts 18s and
             // the bot bailed at the edge with `oauth_session_not_persisted`. On a
-            // callback route the SPA IS making progress, so 12x3s = 36s of
-            // patience is warranted; everywhere else the 6-tick budget holds so a
-            // genuinely-stuck route still hits the navigate-to-root escape fast.
-            // Read the URL fresh each round (it may redirect off the callback).
-            const HYDRATION_TICKS = isOAuthCallbackRoute(state.url) ? 12 : 6;
-            for (let hydrationWait = 0; hydrationWait < HYDRATION_TICKS &&
-                isLoadingShellText(await this.browser.extractText().catch(() => "")); hydrationWait++) {
-                args.steps.push(`Post-verify round ${round}: ${pathOf(state.url)} is a loading shell ` +
-                    `(hydration wait ${hydrationWait + 1}/${HYDRATION_TICKS}) — waiting for the SPA to render`);
-                await this.browser.wait(3);
-                try {
-                    [state, inventory] = await Promise.all([
-                        this.browser.getState(),
-                        this.buildInventory(args.steps, undefined, 80),
-                    ]);
+            // callback route the SPA IS making progress, so 36s of patience is
+            // warranted; everywhere else the 18s budget holds so a genuinely-stuck
+            // route reaches the navigate-to-root escape fast. Read the URL fresh
+            // each round (it may redirect off the callback).
+            const onOAuthCallback = isOAuthCallbackRoute(state.url);
+            const HYDRATION_BUDGET_MS = onOAuthCallback ? 36_000 : 18_000;
+            await this.browser
+                .waitForInteractiveDom(SHELL_MAX_ELEMENTS, HYDRATION_BUDGET_MS)
+                .catch(() => undefined);
+            // Re-read after the wait — the page may have hydrated (or redirected).
+            try {
+                [state, inventory] = await Promise.all([
+                    this.browser.getState(),
+                    this.buildInventory(args.steps, undefined, 80),
+                ]);
+            }
+            catch {
+                // mid-navigation read — keep the prior state/inventory; the shell
+                // decision below uses whatever count we have.
+            }
+            // Negative-side decision, now visibility- AND inventory-aware: a shell
+            // requires loading-text in the VISIBLE text AND a sub-threshold
+            // inventory. The OAuth-callback exclusion keeps the navigate-to-root
+            // escape from firing mid-token-exchange (the callback IS making
+            // progress and a navigate-away would abort the session).
+            const stillShell = !onOAuthCallback &&
+                isLoadingShell(await this.browser.extractVisibleText().catch(() => ""), inventory.length);
+            if (stillShell) {
+                shellStreak += 1;
+                // On the 2nd consecutive shell round, do the navigate-to-root the
+                // budgeted wait can't fix — a route stuck mid-hydration (a blocked
+                // websocket, an SPA wedged on a stale path) often paints the real
+                // dashboard from origin root. Once only.
+                if (shellStreak >= 2 && !shellRootNavTried) {
+                    shellRootNavTried = true;
+                    const root = originRoot(state.url);
+                    args.steps.push(`Post-verify round ${round}: ${pathOf(state.url)} read as a loading shell for ` +
+                        `${shellStreak} consecutive rounds — navigating to origin root once before bailing.`);
+                    try {
+                        await this.browser.goto(root ?? state.url);
+                        await this.browser
+                            .waitForInteractiveDom(SHELL_MAX_ELEMENTS, 15_000)
+                            .catch(() => undefined);
+                        [state, inventory] = await Promise.all([
+                            this.browser.getState(),
+                            this.buildInventory(args.steps, undefined, 80),
+                        ]);
+                    }
+                    catch {
+                        // navigate/read failed — the streak check below bails on the
+                        // next shell read.
+                    }
+                    // Re-evaluate after the root nav. If it hydrated, fall through to
+                    // planning; if it's STILL a shell, bail truthfully now rather than
+                    // burning the rest of the round budget to run_timeout.
+                    const recovered = !isLoadingShell(await this.browser.extractVisibleText().catch(() => ""), inventory.length);
+                    if (recovered) {
+                        shellStreak = 0;
+                    }
+                    else {
+                        throw new SpaNeverHydratedError(`spa_never_hydrated: ${args.service}'s post-verify page (${pathOf(state.url)}) ` +
+                            `stayed a loading shell across ${shellStreak} rounds and an origin-root reload — ` +
+                            `the SPA never rendered an actionable surface (blocked websocket / wedged hydration). ` +
+                            `Not a navigation bug; retry or finish the signup manually.`);
+                    }
                 }
-                catch {
-                    // mid-navigation read — keep the prior state/inventory and let
-                    // the next hydration tick (or the planner) retry.
+                else {
+                    args.steps.push(`Post-verify round ${round}: ${pathOf(state.url)} is a loading shell ` +
+                        `(streak ${shellStreak}) — letting the SPA settle one more round`);
                 }
             }
+            else {
+                shellStreak = 0;
+            }
             // Stalled-wizard breaker. Build a content signature (URL + each
             // inventory element's selector + label) and judge whether the
             // PREVIOUS executed action changed the page. If the last few
@@ -8057,11 +8508,13 @@ ${formatInventory(input.inventory)}`,
                 if (consecutiveOauthLoginPageRounds >= 3) {
                     args.steps.push(`Post-verify: OAuth run still on a login page (${pathOf(state.url)}) for ` +
                         `${consecutiveOauthLoginPageRounds} rounds (incl. a reload) — the OAuth callback never persisted; bailing.`);
+                    await this.browser.dumpOAuthDebug(args.service, "callback-not-persisted").catch(() => { });
                     throw new OAuthSessionNotPersistedError(`oauth_session_not_persisted: signed in to ${args.service} via OAuth but the page ` +
                         `still presents a login screen (${pathOf(state.url)}) after ` +
-                        `${consecutiveOauthLoginPageRounds} rounds — the OAuth callback never established a ` +
-                        `session (anti-bot / IP rejection of the callback). Not a navigation bug; needs ` +
-                        `residential egress or manual signup.`);
+                        `${consecutiveOauthLoginPageRounds} rounds — the OAuth callback was rejected at the ` +
+                        `automation/fingerprint layer. NOT an IP issue (FALSIFIED 2026-06-14: a clean ` +
+                        `residential IP fails this callback identically — see STATE.md), so residential ` +
+                        `egress does NOT fix it. Needs a fingerprint/automation fix or manual signup.`);
                 }
             }
             else {
@@ -8199,6 +8652,10 @@ ${formatInventory(input.inventory)}`,
                 state,
                 inventory,
                 observed: nextStep,
+                // Fix C4 — stamp the backend that produced THIS round's plan
+                // (planPostVerifyStep set these via callLLM just above).
+                ...(this.lastResolvedModel !== undefined ? { resolved_model: this.lastResolvedModel } : {}),
+                ...(this.lastResolvedProvider !== undefined ? { resolved_provider: this.lastResolvedProvider } : {}),
             });
             capturedRound += 1;
             // Per-round telemetry upload (rc.11). Mirrors the disk capture
@@ -8555,7 +9012,7 @@ ${formatInventory(input.inventory)}`,
                             hint = undefined;
                             continue;
                         }
-                        const fallback = pickStuckLoopFallbackUrl(state.url, triedFallbackUrls, args.service);
+                        const fallback = pickStuckLoopFallbackUrl(state.url, triedFallbackUrls, args.service, this.resolvedSignupUrl);
                         if (fallback !== null) {
                             triedFallbackUrls.add(fallback);
                             args.steps.push(`Post-verify: stuck-loop detected ${stuckFiresAtUrl}x at ${state.url} — escalating to a hardcoded API-key URL: ${fallback}`);
@@ -8670,7 +9127,30 @@ ${formatInventory(input.inventory)}`,
                 // candidate is exhausted, `done` is honored.
                 const capturedCredCount = Object.keys(credentials).filter((k) => !NON_CREDENTIAL_KEYS.has(k)).length;
                 if (capturedCredCount === 0 && prematureDoneFallbacks < MAX_PREMATURE_DONE_FALLBACKS) {
-                    const fallback = pickStuckLoopFallbackUrl(state.url, triedFallbackUrls, args.service);
+                    // Prefer CLICKING a real API-keys nav link over guessing a URL.
+                    // The dashboard's own sidebar/menu link carries the correct href;
+                    // guessing /keys, /api-keys, /settings/api-keys 404s on services
+                    // that host keys at a non-standard path (unify-ai). Only when no
+                    // such link is in the DOM do we fall through to URL composition.
+                    const keysLink = findApiKeysNavLink(inventory, clickedKeysLinks);
+                    if (keysLink !== null) {
+                        prematureDoneFallbacks += 1;
+                        clickedKeysLinks.add(keysLink.selector);
+                        const label = (keysLink.visibleText ?? keysLink.ariaLabel ?? keysLink.href ?? keysLink.selector) || keysLink.selector;
+                        args.steps.push(`Post-verify: planner emitted done with no credential captured — ` +
+                            `clicking the in-page API-keys link "${label.slice(0, 60)}" ` +
+                            `(${keysLink.href ?? keysLink.selector}) before guessing a URL`);
+                        try {
+                            await this.browser.click(keysLink.selector);
+                            await this.browser.waitForInteractiveDom(5, 15_000);
+                        }
+                        catch (err) {
+                            args.steps.push(`Post-verify: API-keys link click failed (${err instanceof Error ? err.message : String(err)}) — continuing.`);
+                        }
+                        hint = undefined;
+                        continue;
+                    }
+                    const fallback = pickStuckLoopFallbackUrl(state.url, triedFallbackUrls, args.service, this.resolvedSignupUrl);
                     if (fallback !== null) {
                         prematureDoneFallbacks += 1;
                         triedFallbackUrls.add(fallback);
@@ -9178,6 +9658,10 @@ ${formatInventory(input.inventory)}`,
                         state: postState,
                         inventory: postInventory,
                         observed: syntheticExtract,
+                        // Fix C4 — attribute this synthetic round to the planner call
+                        // that drove us here (no LLM ran for this implicit extract).
+                        ...(this.lastResolvedModel !== undefined ? { resolved_model: this.lastResolvedModel } : {}),
+                        ...(this.lastResolvedProvider !== undefined ? { resolved_provider: this.lastResolvedProvider } : {}),
                     });
                     capturedRound += 1;
                     if (this.roundUploader !== undefined) {
@@ -9611,6 +10095,11 @@ ${formatInventory(input.inventory)}${input.hint !== undefined ? `\n\nIMPORTANT
             // navigation-eval.md). The stall-detector + prior-action memory are the
             // escape from a deterministic loop.
             temperature: 0,
+            // Fix C — pin a single model + provider + seed on the proxy path so
+            // the same dashboard yields the same step regardless of which backend
+            // OpenRouter would otherwise route to (the model/provider lottery
+            // survives temperature 0).
+            deterministic: true,
             parse: (raw) => {
                 const step = parsePostVerifyStep(raw, allowed);
                 // A `check` must land on a real checkbox/radio — the planner