@trusty-squire/mcp 0.9.16 → 0.9.17-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/bot/agent.js CHANGED
@@ -169,6 +169,74 @@ export function isAtPaywall(text) {
169
169
  }
170
170
  return false;
171
171
  }
172
+ // A service can complete the signup form / OAuth handshake and THEN drop the
173
+ // account into a manual-approval gate — a waiting room, a waitlist, a
174
+ // "request access / your account is pending approval / under review" screen —
175
+ // instead of granting a dashboard + API key. Baseten is the field example:
176
+ // the form submits, then a "waiting_room" / account-review screen appears and
177
+ // no key is obtainable autonomously.
178
+ //
179
+ // This is NOT a captcha and NOT an anti-bot block — it's a service-side human
180
+ // gate. Left undetected, the post-verify loop exhausts its budget and the run
181
+ // gets mislabeled (oauth_onboarding_failed / a generic no-credentials miss),
182
+ // which is misleading and can wrongly count toward skill demotion or send us
183
+ // chasing a non-existent code bug. We classify it as `onboarding_blocked` —
184
+ // the same terminal, human-pile, non-demoting status the billing wall uses —
185
+ // so the loop routes it to the manual pile and never advances the demote
186
+ // counter.
187
+ //
188
+ // Tuned for PRECISION over recall: every pattern requires explicit
189
+ // account-review / waitlist / pending-approval phrasing. A marketing tile that
190
+ // merely mentions "early access" as a feature must not trip it, so the verbs
191
+ // are scoped to the gate's own phrasing (you ARE on the list / access IS
192
+ // pending / the account IS under review).
193
+ const ACCOUNT_REVIEW_GATE_PATTERNS = [
194
+ /\bwaiting\s+room\b/i,
195
+ /\b(?:join|on|added\s+to)\s+(?:the\s+|our\s+)?waitlist\b/i,
196
+ /\byou'?re\s+on\s+the\s+(?:list|waitlist)\b/i,
197
+ /\brequest\s+(?:early\s+)?access\b/i,
198
+ /\baccess\s+(?:is\s+)?pending\b/i,
199
+ /\b(?:your\s+)?account\s+is\s+pending\b/i,
200
+ /\bpending\s+approval\b/i,
201
+ /\baccount\s+(?:is\s+)?(?:currently\s+)?under\s+review\b/i,
202
+ /\byour\s+account\s+is\s+being\s+reviewed\b/i,
203
+ /\bwe'?ll\s+email\s+you\s+when\b/i,
204
+ /\bawaiting\s+(?:approval|access)\b/i,
205
+ ];
206
+ // Exported for unit testing — the post-signup heuristic that distinguishes a
207
+ // service-side manual-approval gate (waiting room / waitlist / pending review)
208
+ // from a normal dashboard, signup form, or captcha page. Pure over page text.
209
+ export function isAtAccountReviewGate(text) {
210
+ return ACCOUNT_REVIEW_GATE_PATTERNS.some((p) => p.test(text));
211
+ }
212
+ // Decide whether a no-credential form-fill outcome is a manual-review gate.
213
+ // A verification timeout is the AUTHORITATIVE cause and must win: a pending
214
+ // "check your email / we sent a code" page can read as a review gate to
215
+ // isAtAccountReviewGate, so without this guard a verification_not_sent gets
216
+ // mislabeled onboarding_blocked (the anthropic regression). Only when
217
+ // verification did NOT fail is the review-gate text trusted. Pure, testable.
218
+ export function isOnboardingReviewGate(verificationFailed, pageText) {
219
+ return verificationFailed === undefined && isAtAccountReviewGate(pageText);
220
+ }
221
+ // Closed / invite-only registration: the service does not accept new self-serve
222
+ // signups at all (turbopuffer: "Sign-ups are closed"). Distinct from a review
223
+ // gate (you signed up, awaiting approval) — here NO account can be created, so
224
+ // the run is terminally unservable and the service should be dequeued, not
225
+ // retried or mislabeled oauth_onboarding_failed (which implies a fixable nav
226
+ // bug). Precision-tuned: requires explicit closed/disabled/invite-only phrasing
227
+ // scoped to sign-up/registration, so a normal page mentioning "sign up" or an
228
+ // "invite your team" feature doesn't trip it. Pure over page text.
229
+ const SIGNUPS_CLOSED_PATTERNS = [
230
+ /\bsign[\s-]?ups?\s+(?:are|is)\s+(?:currently\s+)?(?:closed|disabled|paused|not\s+(?:open|available|being\s+accepted))\b/i,
231
+ /\b(?:we\s+are|we're)\s+not\s+(?:currently\s+)?accepting\s+(?:new\s+)?(?:sign[\s-]?ups|registrations|users|accounts)\b/i,
232
+ /\bregistration\s+(?:is\s+)?(?:currently\s+)?(?:closed|disabled)\b/i,
233
+ /\b(?:sign[\s-]?up|registration|access)\s+is\s+(?:by\s+)?invite[\s-]?only\b/i,
234
+ /\binvite[\s-]?only\s+(?:beta|access|signup|registration)\b/i,
235
+ /\brequest\s+an\s+invite\b/i,
236
+ ];
237
+ export function isSignupsClosed(text) {
238
+ return SIGNUPS_CLOSED_PATTERNS.some((p) => p.test(text));
239
+ }
172
240
  // S3: does this post-submit page text indicate the service genuinely
173
241
  // expects the user to confirm via email? Drives whether the bot polls the
174
242
  // full verification timeout or runs only a short probe. Exported so the
@@ -197,8 +265,9 @@ export class OAuthSessionNotPersistedError extends Error {
197
265
  // 0.8.2-rc.10 — common dashboard paths that vendors host their
198
266
  // per-account API key UI at. Ordered most-specific first so a
199
267
  // fallback navigate doesn't land short of the actual page. Returned
200
- // as an array of path-strings; the caller composes them onto the
201
- // origin of the currently-stuck URL and skips any already tried.
268
+ // as an array of path-strings; the caller composes them onto the APP
269
+ // origin (the signup/app URL the bot navigated to), NOT the auth/IdP
270
+ // origin it may be stuck on post-OAuth, and skips any already tried.
202
271
  //
203
272
  // Patterns harvested from Anthropic (settings/keys), Sentry
204
273
  // (settings/account/api/auth-tokens), Neon (settings#api-keys),
@@ -418,33 +487,112 @@ export function findCreateKeyAffordance(inventory) {
418
487
  candidates.sort((a, b) => b.score - a.score);
419
488
  return candidates[0].el;
420
489
  }
490
+ // An in-DOM nav link/affordance that points AT an API-keys / tokens page.
491
+ // Distinct from findCreateKeyAffordance (the "create key" button): this finds
492
+ // the LINK that navigates TO the keys page, so the bot can click the real
493
+ // target — whose href is the correct path — instead of GUESSING a URL from a
494
+ // fixed convention list (which 404s whenever a service hosts keys at a
495
+ // non-standard path: unify-ai's keys aren't at /keys, /api-keys, or
496
+ // /settings/api-keys, all of which 404). A human clicks the sidebar link; so
497
+ // should the bot. Exported, pure (operates on the inventory shape only).
498
+ const API_KEYS_HREF = /\/(?:api[-_]?keys?|api[-_]?tokens?|access[-_]?tokens?|auth[-_]?tokens?|secret[-_]?keys?|personal[-_]?access[-_]?tokens?|developers?|keys?|tokens?)(?:[/?#]|$)/i;
499
+ const API_KEYS_TEXT = /\b(?:api|access|secret|auth|personal\s+access)\s*(?:keys?|tokens?)\b/i;
500
+ export function findApiKeysNavLink(inventory, alreadyClicked = new Set()) {
501
+ const candidates = [];
502
+ for (const el of inventory) {
503
+ const isClickable = el.tag === "a" ||
504
+ el.tag === "button" ||
505
+ el.role === "link" ||
506
+ el.role === "button";
507
+ if (!isClickable)
508
+ continue;
509
+ if (el.visible === false)
510
+ continue;
511
+ if (alreadyClicked.has(el.selector))
512
+ continue;
513
+ const href = el.href ?? "";
514
+ const text = [el.visibleText, el.ariaLabel, el.title, el.labelText, el.iconLabel]
515
+ .filter((s) => s !== null && s !== undefined)
516
+ .join(" ")
517
+ .trim();
518
+ // The loose href segments (keys?/tokens?/developers?) are only trusted on
519
+ // an actual anchor href, where they're a structured path, not free text.
520
+ const hrefHit = href.length > 0 && API_KEYS_HREF.test(href);
521
+ const textHit = API_KEYS_TEXT.test(text);
522
+ if (!hrefHit && !textHit)
523
+ continue;
524
+ // A "create API key" control is a different affordance (it opens a
525
+ // create flow / modal, it doesn't navigate to the listing). Skip it here
526
+ // UNLESS it's a real anchor with a keys href (then it's a nav link that
527
+ // merely happens to read "New API key").
528
+ if (CREATE_KEY_PHRASE.test(text) && !(el.tag === "a" && hrefHit))
529
+ continue;
530
+ let score = 0;
531
+ if (hrefHit)
532
+ score += 4; // a real, navigable target beats a text guess
533
+ if (/\bapi\s*(?:keys?|tokens?)\b/i.test(text))
534
+ score += 2;
535
+ else if (textHit)
536
+ score += 1;
537
+ if (el.tag === "a")
538
+ score += 1; // prefer anchors over role=button
539
+ if (el.inViewport === true)
540
+ score += 1;
541
+ candidates.push({ el, score });
542
+ }
543
+ if (candidates.length === 0)
544
+ return null;
545
+ candidates.sort((a, b) => b.score - a.score);
546
+ return candidates[0].el;
547
+ }
421
548
  // Pick the next fallback URL to try, keyed against the origin of the
422
549
  // currently-stuck URL. The curated SERVICE_KEYS_PATHS for the run's
423
550
  // service (when its host matches the stuck origin) are tried FIRST,
424
551
  // then the generic STUCK_LOOP_FALLBACK_PATHS. Returns null when every
425
552
  // path has already been attempted. Exported for unit tests.
426
- export function pickStuckLoopFallbackUrl(currentUrl, alreadyTried, service) {
427
- let parsed;
553
+ export function pickStuckLoopFallbackUrl(currentUrl, alreadyTried, service, appUrl) {
554
+ let parsedCurrent;
428
555
  try {
429
- parsed = new URL(currentUrl);
556
+ parsedCurrent = new URL(currentUrl);
430
557
  }
431
558
  catch {
432
559
  return null;
433
560
  }
561
+ // Compose key-path guesses onto the APP origin, NOT the origin of the
562
+ // currently-stuck URL. After OAuth the stuck URL is the identity-provider
563
+ // subdomain (auth.lumalabs.ai, accounts.<svc>, login.<svc>, the IdP) — which
564
+ // has no settings/keys pages, so "${authOrigin}/settings/keys" 404s by
565
+ // construction. The keys live on the app host (lumalabs.ai). `appUrl` is the
566
+ // signup/app URL the bot actually navigated to (this.resolvedSignupUrl), so
567
+ // its origin is the right host to guess against. Fall back to the stuck
568
+ // origin only when no usable app URL is known.
569
+ let composeBase = parsedCurrent;
570
+ if (appUrl !== undefined) {
571
+ try {
572
+ const parsedApp = new URL(appUrl);
573
+ if ((parsedApp.protocol === "http:" || parsedApp.protocol === "https:") &&
574
+ !isGoogleSearchUrl(appUrl)) {
575
+ composeBase = parsedApp;
576
+ }
577
+ }
578
+ catch {
579
+ // keep the stuck origin
580
+ }
581
+ }
434
582
  // about:blank / data: / chrome-error pages have an opaque origin that
435
583
  // serializes to the literal string "null" — building "${origin}${path}"
436
584
  // then yields an unnavigable "null/settings/keys". Only compose
437
585
  // fallbacks against a real http(s) origin.
438
- if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
586
+ if (composeBase.protocol !== "http:" && composeBase.protocol !== "https:") {
439
587
  return null;
440
588
  }
441
- const origin = parsed.origin;
442
- // Skip a candidate when the current URL's path ALREADY matches it
443
- // (case-insensitive, trailing-slash tolerant). The planner is stuck
444
- // ON the page the candidate points to navigating to the same URL
445
- // again won't break the cycle, only a different path will.
446
- const currentPath = parsed.pathname.replace(/\/+$/, "").toLowerCase();
447
- // Compose curated per-service paths first, but only when the stuck
589
+ const origin = composeBase.origin;
590
+ // Skip a candidate when it resolves to the exact URL we're already stuck
591
+ // on (full origin+path, trailing-slash/case tolerant) re-navigating
592
+ // there won't break the cycle. Compared on the full URL now that the
593
+ // compose origin can differ from the stuck origin.
594
+ const currentFull = `${parsedCurrent.origin}${parsedCurrent.pathname}`.replace(/\/+$/, "").toLowerCase();
595
+ // Compose curated per-service paths first, but only when the COMPOSE
448
596
  // origin's host actually belongs to the named service. The slug is
449
597
  // a substring of the host for the vendors we curate (groq →
450
598
  // console.groq.com, launchdarkly → app.launchdarkly.com, …); this
@@ -454,7 +602,7 @@ export function pickStuckLoopFallbackUrl(currentUrl, alreadyTried, service) {
454
602
  const slug = service !== undefined ? serviceSlug(service) : "";
455
603
  const curated = slug !== "" &&
456
604
  SERVICE_KEYS_PATHS[slug] !== undefined &&
457
- parsed.hostname.toLowerCase().includes(slug)
605
+ composeBase.hostname.toLowerCase().includes(slug)
458
606
  ? SERVICE_KEYS_PATHS[slug]
459
607
  : [];
460
608
  // Curated paths lead; the generic list follows. De-dup so a path that
@@ -468,7 +616,7 @@ export function pickStuckLoopFallbackUrl(currentUrl, alreadyTried, service) {
468
616
  const candidate = `${origin}${path}`;
469
617
  if (alreadyTried.has(candidate))
470
618
  continue;
471
- if (candidatePath === currentPath)
619
+ if (`${origin}${path}`.replace(/\/+$/, "").toLowerCase() === currentFull)
472
620
  continue;
473
621
  return candidate;
474
622
  }
@@ -3305,6 +3453,50 @@ export function isLoadingShellText(text) {
3305
3453
  // forever, so it is not a signal.
3306
3454
  return /\bconnecting\b|\bloading\b|please wait|getting things ready|initiali[sz]ing/i.test(text);
3307
3455
  }
3456
+ // The interactive-element count at/above which a page is "hydrated by
3457
+ // definition" — a rendered dashboard/form a user can act on — so a stray
3458
+ // "loading"/"please wait" word in its (visible) text is NOT a hydration
3459
+ // shell. WHY 5: a genuine loading shell paints zero or a handful of chrome
3460
+ // affordances (a logo link, maybe a skip-link); a real authenticated surface
3461
+ // (nav + content + an "API Keys"/"Create" affordance) clears 5 trivially.
3462
+ // Field evidence: luma-ai/unify-ai/sambanova/fireworks-ai/defang carried
3463
+ // 10–95 visible interactive elements yet were flagged a shell EVERY round —
3464
+ // any threshold from ~5 up vetoes all of them while still catching the true
3465
+ // 0-to-few-element shell (northflank). Reuses the same minElements default as
3466
+ // waitForInteractiveDom (5) so the negative gate and the positive readiness
3467
+ // wait agree on what "hydrated" means.
3468
+ export const SHELL_MAX_ELEMENTS = 5;
3469
+ // The authoritative loading-shell decision: a page is a hydration shell only
3470
+ // when loading-text is present in its VISIBLE text AND it has fewer than
3471
+ // SHELL_MAX_ELEMENTS interactive elements. Splitting the two conditions kills
3472
+ // the dominant false positive two ways at once:
3473
+ // 1. visibleText (innerText) drops hidden skeleton/RSC "loading" strings a
3474
+ // raw textContent read picked up;
3475
+ // 2. the inventory veto makes the gate un-fireable on a hydrated page
3476
+ // regardless of any residual stray "loading" word.
3477
+ // Pure + exported for unit tests. The text predicate stays isLoadingShellText
3478
+ // (still used where only text is on hand); this is the call-site gate where
3479
+ // both signals are available.
3480
+ export function isLoadingShell(visibleText, inventoryCount) {
3481
+ if (inventoryCount >= SHELL_MAX_ELEMENTS)
3482
+ return false;
3483
+ return isLoadingShellText(visibleText);
3484
+ }
3485
+ // Thrown from postVerifyLoop when a post-OAuth/post-verify SPA presents a
3486
+ // genuine loading shell that never hydrates within the bounded budget (and a
3487
+ // navigate-to-root retry didn't unstick it). Surfaced as the terminal status
3488
+ // `spa_never_hydrated`. classifyFailure() (skill-schema failure-taxonomy)
3489
+ // has no entry for this kind, so it falls to the deliberate transient default
3490
+ // — a non-demoting outcome (a never-hydrating route is environmental/transient,
3491
+ // not skill rot), and no new exported skill-schema symbol is needed (avoids
3492
+ // the published-dep-skew trap). The leading token before ':' is what
3493
+ // classifyFailure keys on, so the message MUST start with the bare kind.
3494
+ export class SpaNeverHydratedError extends Error {
3495
+ constructor(message) {
3496
+ super(message);
3497
+ this.name = "SpaNeverHydratedError";
3498
+ }
3499
+ }
3308
3500
  // Transient "the session is being established RIGHT NOW" copy. MEASURED on
3309
3501
  // groq (Stytch B2B): after the OAuth callback, /authenticate shows
3310
3502
  // "Logging in…" then "Creating your organization…" for ~5-7s of async
@@ -3348,6 +3540,12 @@ export class SignupAgent {
3348
3540
  // backends_used[i] is the .name string of the LLMClient that produced
3349
3541
  // the i-th reply this run.
3350
3542
  backendsUsed = [];
3543
+ // Fix C4 — the model/provider the backend actually served on the most
3544
+ // recent LLM call, captured per round. callLLM stamps these after every
3545
+ // call; the capture sites read them when dumping a round. Undefined
3546
+ // until the first call (or when the backend doesn't report a model).
3547
+ lastResolvedModel;
3548
+ lastResolvedProvider;
3351
3549
  llmPair;
3352
3550
  // Captcha encounter state for the current run. Updated by the
3353
3551
  // pre/post-submit/re-plan captcha gates in signup(); read by the
@@ -3355,6 +3553,13 @@ export class SignupAgent {
3355
3553
  // because a "blocked" outcome is more diagnostic than an earlier
3356
3554
  // "solved" one and we always want the failure mode in the result.
3357
3555
  captchaEncounter = undefined;
3556
+ // Sticky "this run is on the email path" flag. Set when OAuth turns out to be
3557
+ // login-only (a new identity has no account — Clerk's form_identifier_not_found)
3558
+ // and we fall back to email signup. Without it, the dispatch loop re-runs the
3559
+ // OAuth-first scan after the re-route and re-clicks Google → loops forever
3560
+ // (the cartesia oauth_session_not_persisted bug). Honored by
3561
+ // resolveOAuthCandidates; reset at the start of each signup().
3562
+ committedToEmailPath = false;
3358
3563
  // Invisible-captcha presence for the current run. Cloudflare Turnstile
3359
3564
  // and reCAPTCHA-v3 are score-based: a HIGH score passes silently with no
3360
3565
  // visible widget to "solve", so the visible-gate path above records
@@ -3688,10 +3893,23 @@ export class SignupAgent {
3688
3893
  // F14 — selectors the planner clicked WITHOUT advancing the page.
3689
3894
  // Each no-progress plan records its click selectors here; the next
3690
3895
  // plan that picks ONLY selectors in this set is failed as stuck
3691
- // instead of looping. Cleared on any progress (fill action). The
3692
- // Railway run that motivated F14 spun the same footer "Email" link
3693
- // 5 times before timing out; this loop now bails after 2.
3896
+ // instead of looping. Cleared on ANY real progress between two
3897
+ // clicks of the same selector a fill/select/check action OR a
3898
+ // page change (inventory/url moved). The Railway run that motivated
3899
+ // F14 spun the same footer "Email" link 5 times before timing out;
3900
+ // this loop now bails after 2.
3694
3901
  let lastNoProgressClickSelectors = new Set();
3902
+ // Page-state fingerprint from the END of the previous round, used to
3903
+ // decide whether the page actually moved between rounds. A
3904
+ // "fill field → submit → (validation error) → fix field → submit
3905
+ // again" cycle is legitimate progress, NOT a loop: kinde's post-OAuth
3906
+ // register form has a globally-unique "domain" field, so the first
3907
+ // guess collides ("taken") and the bot must edit the field and
3908
+ // re-click the SAME "Next" button. Without this, re-clicking the same
3909
+ // selector after a genuine field edit (or any inventory/url change)
3910
+ // false-bailed as planner_loop even though the intervening fill was
3911
+ // real progress. (MEASURED 2026-06-13, kinde, terminal_round 3.)
3912
+ let lastRoundPageSig = null;
3695
3913
  // rc.31 — once the bot has explicitly clicked an email-flow
3696
3914
  // button (e.g. Railway's "Log in using email" two-stage chooser),
3697
3915
  // stay on the email path. Without this, the auto-OAuth-first
@@ -4057,16 +4275,40 @@ export class SignupAgent {
4057
4275
  steps.push("Form-fill planner described a logged-in product/billing page (not a signup form) — pivoting to post-verify navigation");
4058
4276
  return { kind: "already_oauth" };
4059
4277
  }
4278
+ // The page moved since the previous round if the URL changed or the
4279
+ // set of interactive selectors changed (a field gained/lost, a
4280
+ // validation message toggled an element, a wizard step advanced).
4281
+ // ANY such change means whatever the planner did last round was real
4282
+ // progress — clear the no-progress memory so a re-click of a
4283
+ // previously-"dead" selector on the now-changed page isn't judged a
4284
+ // loop. This is the unique-value-retry case (kinde domain field):
4285
+ // edit field → page re-renders → re-click "Next" is legitimate.
4286
+ const pageSig = state.url +
4287
+ "§" +
4288
+ inventory
4289
+ .map((e) => e.selector)
4290
+ .sort()
4291
+ .join("|");
4292
+ if (lastRoundPageSig !== null && pageSig !== lastRoundPageSig) {
4293
+ lastNoProgressClickSelectors = new Set();
4294
+ }
4295
+ lastRoundPageSig = pageSig;
4060
4296
  // F14 — stuck-detection: if the plan picks ONLY click selectors
4061
4297
  // we already tried in the previous round without page progress,
4062
4298
  // it's a planner loop. Fail planning_failed with the offending
4063
4299
  // selector(s) so the operator sees what stalled. Doesn't fire
4064
4300
  // when the plan adds at least one new selector (legitimate
4065
- // exploration). Doesn't fire on fill plans (forward progress).
4301
+ // exploration). Doesn't fire on fill plans (forward progress),
4302
+ // nor on a plan that ALSO edits a field this round (a fill/check
4303
+ // alongside the re-click is real progress — kinde's "tick the
4304
+ // required box + re-click Next" advances the form even though the
4305
+ // Next selector repeats).
4066
4306
  const planClickSelectors = plan.actions
4067
4307
  .filter((a) => a.kind === "click")
4068
4308
  .map((a) => a.selector);
4069
- if (planClickSelectors.length > 0 &&
4309
+ const planEditsAField = plan.actions.some((a) => a.kind === "fill" || a.kind === "check");
4310
+ if (!planEditsAField &&
4311
+ planClickSelectors.length > 0 &&
4070
4312
  lastNoProgressClickSelectors.size > 0 &&
4071
4313
  planClickSelectors.every((s) => lastNoProgressClickSelectors.has(s))) {
4072
4314
  return {
@@ -4120,6 +4362,16 @@ export class SignupAgent {
4120
4362
  // static page won't help, so a second consecutive empty plan is
4121
4363
  // a dead end. (The 0.1.12 loop spun this 4x on Axiom.)
4122
4364
  const hadFill = plan.actions.some((a) => a.kind === "fill");
4365
+ // A check is ALSO a field edit = real progress, even though (unlike
4366
+ // a fill) it doesn't promote the plan to the submit path below.
4367
+ // (The form-fill plan vocabulary is fill/check/click — `select`
4368
+ // belongs to the post-verify loop.) Treat a check as progress for
4369
+ // the no-progress tracker only: a plan that ticked a box advanced
4370
+ // the form, so its click selectors must NOT be recorded as "dead"
4371
+ // (and any prior dead record is cleared). Without this, a "click
4372
+ // Next (no advance) → tick a required box + re-click Next" cycle
4373
+ // false-bailed as a loop even though the check was progress.
4374
+ const hadFieldEdit = plan.actions.some((a) => a.kind === "fill" || a.kind === "check");
4123
4375
  if (!hadFill) {
4124
4376
  if (plan.actions.length === 0) {
4125
4377
  emptyPlans += 1;
@@ -4142,8 +4394,12 @@ export class SignupAgent {
4142
4394
  // F14 — record the click selectors that didn't advance the
4143
4395
  // page. The next plan's stuck-detection check (above) bails
4144
4396
  // if it picks the same ones again. Hint also tells the
4145
- // planner which selectors NOT to re-pick.
4146
- lastNoProgressClickSelectors = new Set(planClickSelectors);
4397
+ // planner which selectors NOT to re-pick. A plan that ALSO made
4398
+ // a field edit (select/check) made real progress, so clear the
4399
+ // tracker instead of recording its clicks as dead.
4400
+ lastNoProgressClickSelectors = hadFieldEdit
4401
+ ? new Set()
4402
+ : new Set(planClickSelectors);
4147
4403
  const avoidHint = planClickSelectors.length > 0
4148
4404
  ? ` AVOID these selectors — they were clicked but the page did NOT advance: ${planClickSelectors.map((s) => JSON.stringify(s)).join(", ")}.`
4149
4405
  : "";
@@ -4268,8 +4524,30 @@ export class SignupAgent {
4268
4524
  // the next planner iteration handles SPA settle.
4269
4525
  await this.browser.wait(2);
4270
4526
  const postGate = await this.runCaptchaGate("Post-submit", steps);
4271
- if (postGate.blocked)
4527
+ if (postGate.blocked) {
4528
+ // A managed/invisible Turnstile (Clerk's Smart CAPTCHA) resolves
4529
+ // SERVER-SIDE: the submit can succeed — account created, verification
4530
+ // email sent — even though our client-side token poll timed out.
4531
+ // cartesia PROVED this: it emailed a verification code AFTER the bot had
4532
+ // bailed captcha_blocked. The ground truth of "did the submit go
4533
+ // through" is the INBOX, not the client token. So for a POST-submit
4534
+ // Turnstile with an inbox available, don't hard-bail: proceed to the
4535
+ // verification step and let the inbox poll arbitrate — a code arriving
4536
+ // proves the managed Turnstile passed (→ completes); no code surfaces
4537
+ // an honest verification_not_sent rather than a false captcha_blocked.
4538
+ // A genuine pre-submit gate (no inbox, or a non-Turnstile challenge)
4539
+ // still bails captcha_blocked.
4540
+ if (postGate.kind === "turnstile" && task.inbox !== undefined) {
4541
+ steps.push("Post-submit Turnstile token didn't populate — but a managed Turnstile resolves " +
4542
+ "server-side, so the submit may have gone through. Proceeding to verification; " +
4543
+ "the inbox poll arbitrates (a code = submit succeeded).");
4544
+ // Don't let the recorded block short-circuit later gates / the result.
4545
+ this.captchaEncounter = undefined;
4546
+ await this.captureSignupFormRounds(task.service, plan, inventory, fillValues);
4547
+ return { kind: "submitted" };
4548
+ }
4272
4549
  return { kind: "captcha_blocked", captchaKind: postGate.kind };
4550
+ }
4273
4551
  if (postGate.found && postGate.solved) {
4274
4552
  // Re-click submit so the populated token ships with the form.
4275
4553
  try {
@@ -4329,6 +4607,11 @@ export class SignupAgent {
4329
4607
  state,
4330
4608
  inventory,
4331
4609
  observed,
4610
+ // Fix C4 — the form-plan's backend (planSignupForm ran before
4611
+ // this synthetic preamble capture, so lastResolved* still reflect
4612
+ // it). These preamble rounds replay the one plan; one backend.
4613
+ ...(this.lastResolvedModel !== undefined ? { resolved_model: this.lastResolvedModel } : {}),
4614
+ ...(this.lastResolvedProvider !== undefined ? { resolved_provider: this.lastResolvedProvider } : {}),
4332
4615
  });
4333
4616
  this.captureChainRound += 1;
4334
4617
  };
@@ -4570,8 +4853,10 @@ export class SignupAgent {
4570
4853
  return [...new Set([...fromMarker, ...live])];
4571
4854
  }
4572
4855
  async resolveOAuthCandidates(task, steps) {
4573
- if (task.forceForm === true) {
4574
- steps.push("Force-form: OAuth-first scan suppressed — taking the email/password path");
4856
+ if (task.forceForm === true || this.committedToEmailPath) {
4857
+ steps.push(this.committedToEmailPath
4858
+ ? "Committed to email path (OAuth was login-only) — OAuth-first scan suppressed"
4859
+ : "Force-form: OAuth-first scan suppressed — taking the email/password path");
4575
4860
  return [];
4576
4861
  }
4577
4862
  const ordered = orderOAuthCandidates(task.oauthProvider, await this.effectiveLoggedInProviders());
@@ -4740,9 +5025,14 @@ export class SignupAgent {
4740
5025
  user: args.userBlocks,
4741
5026
  max_tokens: args.maxTokens,
4742
5027
  ...(args.temperature !== undefined ? { temperature: args.temperature } : {}),
5028
+ ...(args.deterministic === true ? { deterministic: true } : {}),
4743
5029
  });
4744
5030
  this.llmCallCount += 1;
4745
5031
  this.backendsUsed.push(resp.backend);
5032
+ // Fix C4 — remember the served model/provider so the capture sites
5033
+ // can stamp this round with what actually produced the plan.
5034
+ this.lastResolvedModel = resp.resolved_model;
5035
+ this.lastResolvedProvider = resp.resolved_provider;
4746
5036
  return resp.text;
4747
5037
  };
4748
5038
  const primaryRaw = await callOne(this.llmPair.primary);
@@ -4825,6 +5115,8 @@ export class SignupAgent {
4825
5115
  // (Google number-match etc.). Without it, the run still works —
4826
5116
  // steps are just only visible in the final result.
4827
5117
  const steps = task.stepsSink ?? [];
5118
+ // Fresh per-run: don't let a prior run's email-path commitment leak.
5119
+ this.committedToEmailPath = false;
4828
5120
  // Stash the service name so the diagnostic uploader (called from
4829
5121
  // deep inside postVerifyLoop after a failed extract) can label
4830
5122
  // the snapshot without us threading task through every method.
@@ -5318,6 +5610,10 @@ export class SignupAgent {
5318
5610
  // /signup form), fill it IN PLACE — re-navigating to task.signupUrl
5319
5611
  // could bounce back to the demo. Otherwise re-navigate (the
5320
5612
  // login-only / no-account case left us on a /login page).
5613
+ // OAuth was login-only (no account for this identity). Commit to the
5614
+ // email path for the rest of the run so the dispatch loop's
5615
+ // OAuth-first scan doesn't re-click Google and loop.
5616
+ this.committedToEmailPath = true;
5321
5617
  const onSignupFormHtml = (await this.browser.getState().catch(() => null))?.html ?? "";
5322
5618
  if (classifySignupHtml(onSignupFormHtml) === "signup") {
5323
5619
  steps.push(`OAuth recovery already on a signup form ` +
@@ -5596,6 +5892,43 @@ export class SignupAgent {
5596
5892
  ...this.resultTail(),
5597
5893
  };
5598
5894
  }
5895
+ // Before the generic no-credentials miss: a service that completed the
5896
+ // signup form and then dropped the account into a manual-approval gate
5897
+ // (waiting room / waitlist / pending review). Same terminal, non-demoting
5898
+ // onboarding_blocked status the OAuth path uses — there's no key to reach
5899
+ // until a human approves the account, so don't surface it as a generic
5900
+ // failure (which can wrongly chase a code bug) or punish a skill for it.
5901
+ //
5902
+ // ONLY when verification did NOT time out. A pending email-verification
5903
+ // page ("check your email", "we sent a code") can read as a review gate
5904
+ // to the classifier, but the authoritative cause there is the missing
5905
+ // mail (verification_not_sent) — anthropic mislabeled as onboarding_blocked
5906
+ // exactly this way. If we were waiting on an email that never came, that
5907
+ // is the failure; don't reinterpret it as a manual-review gate.
5908
+ const reviewGateText = verificationFailed === undefined ? await this.browser.extractText().catch(() => "") : "";
5909
+ // Closed / invite-only registration takes precedence over the review-gate
5910
+ // and the generic miss — no account can be created, so it's terminally
5911
+ // unservable (dequeue), not a fixable nav bug. Checked only when
5912
+ // verification didn't time out (same reasoning as the review gate).
5913
+ if (verificationFailed === undefined && isSignupsClosed(reviewGateText)) {
5914
+ return {
5915
+ success: false,
5916
+ error: `signups_closed: ${task.service} is not accepting new self-serve sign-ups ` +
5917
+ `(closed / invite-only registration) — no account can be created. Dequeue or sign up manually once open.`,
5918
+ steps,
5919
+ ...this.resultTail(),
5920
+ };
5921
+ }
5922
+ if (isOnboardingReviewGate(verificationFailed, reviewGateText)) {
5923
+ return {
5924
+ success: false,
5925
+ error: `onboarding_blocked: ${task.service} put the account into a manual review / ` +
5926
+ `waitlist gate after signup — no API key is obtainable until a human approves ` +
5927
+ `the account. Finish the signup manually once access is granted.`,
5928
+ steps,
5929
+ ...this.resultTail(),
5930
+ };
5931
+ }
5599
5932
  return {
5600
5933
  success: false,
5601
5934
  error: verificationFailed ?? "Could not find credentials on page or via email",
@@ -6348,16 +6681,36 @@ export class SignupAgent {
6348
6681
  // non-auth path here and is left alone.
6349
6682
  if (isSignupOrLoginRoute(this.browser.currentUrl()) &&
6350
6683
  !isOAuthProviderHost(this.browser.currentUrl())) {
6351
- const root = originRoot(this.browser.currentUrl());
6352
- if (root !== null) {
6353
- steps.push(`OAuth: post-auth landing is a signup/login route (${pathOf(this.browser.currentUrl())}) ` +
6354
- `navigating to the app root (${root}) so the service routes us to the dashboard.`);
6355
- try {
6356
- await this.browser.goto(root);
6357
- await this.browser.wait(2);
6358
- }
6359
- catch {
6360
- // navigation hiccup — the post-verify loop re-reads regardless.
6684
+ // Clerk callback: don't immediately navigate away. On a Clerk combined
6685
+ // sign-in/sign-up flow a new-user OAuth completes the account via a
6686
+ // client-side sign-up transfer that takes a beat AFTER the callback lands;
6687
+ // navigating to root unmounts Clerk's JS and interrupts it (the bug behind
6688
+ // the cartesia/braintrust "oauth_session_not_persisted" cluster — proven
6689
+ // not IP). We can't drive the transfer via window.Clerk (patchright's
6690
+ // isolated world hides it), so instead give Clerk's own JS time and detect
6691
+ // success via cookies (world-agnostic). If a session appears, we're signed
6692
+ // in — skip the navigate-away.
6693
+ const onClerkCallback = /sso-callback|\/sso\b/i.test(this.browser.currentUrl());
6694
+ let clerkSignedIn = false;
6695
+ if (onClerkCallback) {
6696
+ clerkSignedIn = await this.browser.waitForClerkSession(12000).catch(() => false);
6697
+ steps.push(`OAuth: Clerk callback — waited for session establish → ${clerkSignedIn ? "signed in" : "no session (likely login-only OAuth / needs email signup)"}`);
6698
+ }
6699
+ if (clerkSignedIn) {
6700
+ await this.browser.wait(2);
6701
+ }
6702
+ else {
6703
+ const root = originRoot(this.browser.currentUrl());
6704
+ if (root !== null) {
6705
+ steps.push(`OAuth: post-auth landing is a signup/login route (${pathOf(this.browser.currentUrl())}) — ` +
6706
+ `navigating to the app root (${root}) so the service routes us to the dashboard.`);
6707
+ try {
6708
+ await this.browser.goto(root);
6709
+ await this.browser.wait(2);
6710
+ }
6711
+ catch {
6712
+ // navigation hiccup — the post-verify loop re-reads regardless.
6713
+ }
6361
6714
  }
6362
6715
  }
6363
6716
  }
@@ -6530,6 +6883,9 @@ export class SignupAgent {
6530
6883
  // oauth_session_not_persisted and abort. The account simply needs
6531
6884
  // creating via email, so re-route to form-fill instead of bailing.
6532
6885
  if (detectGoogleNoAccount(gateState.url, gateText)) {
6886
+ // Commit to email for the rest of the run — OAuth is login-only here, so
6887
+ // the OAuth-first scan must not re-fire after the form-fill re-route.
6888
+ this.committedToEmailPath = true;
6533
6889
  steps.push(`OAuth: ${provider.label} sign-in succeeded but ${task.service} has no account for ` +
6534
6890
  `this identity (login-only OAuth, ${pathOf(gateState.url)}) — abandoning OAuth and ` +
6535
6891
  `falling back to email/password signup to create the account.`);
@@ -6720,6 +7076,19 @@ export class SignupAgent {
6720
7076
  const paywallCheckText = this.lastPostVerifyDoneReason !== null
6721
7077
  ? `${finalText}\n${this.lastPostVerifyDoneReason}`
6722
7078
  : finalText;
7079
+ // Closed / invite-only registration — no account can be created at all
7080
+ // (turbopuffer: "Sign-ups are closed"). Terminally unservable; label it
7081
+ // honestly so the operator dequeues rather than seeing a misleading
7082
+ // oauth_onboarding_failed that implies a fixable nav bug.
7083
+ if (isSignupsClosed(paywallCheckText)) {
7084
+ return {
7085
+ success: false,
7086
+ error: `signups_closed: ${task.service} is not accepting new self-serve sign-ups ` +
7087
+ `(closed / invite-only registration) — no account can be created. Dequeue or sign up manually once open.`,
7088
+ steps,
7089
+ ...this.resultTail(),
7090
+ };
7091
+ }
6723
7092
  if (isAtPaywall(paywallCheckText)) {
6724
7093
  return {
6725
7094
  success: false,
@@ -6729,6 +7098,22 @@ export class SignupAgent {
6729
7098
  ...this.resultTail(),
6730
7099
  };
6731
7100
  }
7101
+ // Service-side manual-approval gate (waiting room / waitlist / account
7102
+ // pending review). The OAuth handshake succeeded but the service won't
7103
+ // grant a key until a human approves the account — there is no key to
7104
+ // reach autonomously. Same terminal onboarding_blocked status as the
7105
+ // billing wall so it's a non-demoting human-pile outcome, not a
7106
+ // mislabeled oauth_onboarding_failed that wrongly implies a code bug.
7107
+ if (isAtAccountReviewGate(paywallCheckText)) {
7108
+ return {
7109
+ success: false,
7110
+ error: `onboarding_blocked: ${task.service} put the account into a manual review / ` +
7111
+ `waitlist gate after signup — no API key is obtainable until a human approves ` +
7112
+ `the account. Finish the signup manually once access is granted.`,
7113
+ steps,
7114
+ ...this.resultTail(),
7115
+ };
7116
+ }
6732
7117
  // rc.39 — anti-bot interstitial that survived the post-OAuth
6733
7118
  // landing. Turso's GitHub SSO callback runs a Cloudflare check
6734
7119
  // that never clears for our Chromium fingerprint; the planner's
@@ -7009,6 +7394,9 @@ ${formatInventory(input.inventory)}`,
7009
7394
  // Deterministic form-fill picks (same rationale as the post-verify
7010
7395
  // planner — D2). Removes a run-to-run flakiness source.
7011
7396
  temperature: 0,
7397
+ // Fix C — pin a single model + provider + seed on the proxy path.
7398
+ // temperature 0 alone leaves the model/provider lottery in play.
7399
+ deterministic: true,
7012
7400
  parse: (raw) => parseSignupPlan(raw, allowed),
7013
7401
  });
7014
7402
  }
@@ -7422,7 +7810,7 @@ ${formatInventory(input.inventory)}`,
7422
7810
  catch {
7423
7811
  break;
7424
7812
  }
7425
- const fallback = pickStuckLoopFallbackUrl(currentUrl, visitedKeysUrls);
7813
+ const fallback = pickStuckLoopFallbackUrl(currentUrl, visitedKeysUrls, undefined, this.resolvedSignupUrl);
7426
7814
  if (fallback === null)
7427
7815
  break;
7428
7816
  visitedKeysUrls.add(fallback);
@@ -7508,6 +7896,15 @@ ${formatInventory(input.inventory)}`,
7508
7896
  // the dashboard for those; a genuine callback rejection stays on login
7509
7897
  // even after reload, so this never masks a real wall.
7510
7898
  let oauthBounceReloadTried = false;
7899
+ // Consecutive rounds the post-verify page read as a genuine loading shell
7900
+ // (visible loading-text AND a sub-threshold inventory). A real SPA
7901
+ // hydrates within the bounded per-round wait, so a streak means the route
7902
+ // never paints content — burn a navigate-to-root retry, then bail
7903
+ // truthfully rather than re-running the wait every round to run_timeout.
7904
+ // Reset on any non-shell round. Mirrors the consecutiveOauthLoginPageRounds
7905
+ // / oauthBounceReloadTried escape used for the stuck-login case.
7906
+ let shellStreak = 0;
7907
+ let shellRootNavTried = false;
7511
7908
  let planFailures = 0;
7512
7909
  // 0.8.2-rc.6 — separate counter for upstream-blip retries. Doesn't
7513
7910
  // gate planFailures (so a transient 502 won't push us into the
@@ -7637,6 +8034,9 @@ ${formatInventory(input.inventory)}`,
7637
8034
  let stuckFiresAtUrl = 0;
7638
8035
  let lastStuckFireUrl = null;
7639
8036
  const triedFallbackUrls = new Set();
8037
+ // Selectors of API-keys nav links already clicked, so the
8038
+ // click-the-real-link escalation doesn't re-click the same link.
8039
+ const clickedKeysLinks = new Set();
7640
8040
  // Premature-done guard budget. When the planner gives up (`done`)
7641
8041
  // with zero credentials captured, we navigate to an unvisited
7642
8042
  // canonical keys URL and re-plan — bounded so a service that
@@ -7872,47 +8272,98 @@ ${formatInventory(input.inventory)}`,
7872
8272
  // SPA hydration guard. A post-OAuth dashboard (northflank's
7873
8273
  // /settings/access-tokens, PostHog) can render a "Connecting"/loading
7874
8274
  // shell while its JS bundle + websocket finish — slow over a
7875
- // residential tunnel. The shell often carries a stray element or two
7876
- // (a logo link, the <noscript>), so gating on an EMPTY inventory
7877
- // misses it; the loading-shell TEXT is the authoritative "not yet
7878
- // rendered" signal. Wait while that text persists, then proceed with
7879
- // whatever's there (an honest "still a shell" beats a premature done
7880
- // and if the SPA never hydrates, e.g. a blocked websocket, the bound
7881
- // keeps us from hanging).
8275
+ // residential tunnel. We gate on POSITIVE readiness the instant the
8276
+ // page has SHELL_MAX_ELEMENTS visible interactive elements it is
8277
+ // hydrated by definition and we proceed rather than looping on the
8278
+ // negative "text still says loading" signal. waitForInteractiveDom
8279
+ // returns the moment that count is met (or after the budget), so a fast
8280
+ // page costs ~0 and a slow one waits exactly as long as needed. This is
8281
+ // the fix for the dominant false positive: a fully-rendered dashboard
8282
+ // whose DOM merely CONTAINS a hidden "loading…"/"please wait 30
8283
+ // seconds…" string no longer spins the wait every round to run_timeout.
7882
8284
  //
7883
8285
  // Budget = 6x3s = 18s. MEASURED: a dashboard SPA gated on a websocket
7884
8286
  // (northflank's wss://platform.northflank.com/websocket) hydrates in
7885
- // ~12-15s over the tunnel. A larger budget BACKFIRES on a page that
7886
- // will NEVER hydrate (e.g. an authed user stranded on /signup): the
7887
- // wait re-runs every round and burns the 600s run cap. The escape for
7888
- // a never-hydrating route is navigate-to-root post-OAuth, not a longer
7889
- // wait here.
8287
+ // ~12-15s over the tunnel.
7890
8288
  //
7891
8289
  // ADAPTIVE exception (MEASURED 2026-06-04, clerk): an OAuth/SSO
7892
8290
  // CALLBACK route does a token exchange that renders even slower than a
7893
8291
  // plain dashboard — clerk's `/sign-in/sso-callback` outlasts 18s and
7894
8292
  // the bot bailed at the edge with `oauth_session_not_persisted`. On a
7895
- // callback route the SPA IS making progress, so 12x3s = 36s of
7896
- // patience is warranted; everywhere else the 6-tick budget holds so a
7897
- // genuinely-stuck route still hits the navigate-to-root escape fast.
7898
- // Read the URL fresh each round (it may redirect off the callback).
7899
- const HYDRATION_TICKS = isOAuthCallbackRoute(state.url) ? 12 : 6;
7900
- for (let hydrationWait = 0; hydrationWait < HYDRATION_TICKS &&
7901
- isLoadingShellText(await this.browser.extractText().catch(() => "")); hydrationWait++) {
7902
- args.steps.push(`Post-verify round ${round}: ${pathOf(state.url)} is a loading shell ` +
7903
- `(hydration wait ${hydrationWait + 1}/${HYDRATION_TICKS}) waiting for the SPA to render`);
7904
- await this.browser.wait(3);
7905
- try {
7906
- [state, inventory] = await Promise.all([
7907
- this.browser.getState(),
7908
- this.buildInventory(args.steps, undefined, 80),
7909
- ]);
8293
+ // callback route the SPA IS making progress, so 36s of patience is
8294
+ // warranted; everywhere else the 18s budget holds so a genuinely-stuck
8295
+ // route reaches the navigate-to-root escape fast. Read the URL fresh
8296
+ // each round (it may redirect off the callback).
8297
+ const onOAuthCallback = isOAuthCallbackRoute(state.url);
8298
+ const HYDRATION_BUDGET_MS = onOAuthCallback ? 36_000 : 18_000;
8299
+ await this.browser
8300
+ .waitForInteractiveDom(SHELL_MAX_ELEMENTS, HYDRATION_BUDGET_MS)
8301
+ .catch(() => undefined);
8302
+ // Re-read after the wait — the page may have hydrated (or redirected).
8303
+ try {
8304
+ [state, inventory] = await Promise.all([
8305
+ this.browser.getState(),
8306
+ this.buildInventory(args.steps, undefined, 80),
8307
+ ]);
8308
+ }
8309
+ catch {
8310
+ // mid-navigation read — keep the prior state/inventory; the shell
8311
+ // decision below uses whatever count we have.
8312
+ }
8313
+ // Negative-side decision, now visibility- AND inventory-aware: a shell
8314
+ // requires loading-text in the VISIBLE text AND a sub-threshold
8315
+ // inventory. The OAuth-callback exclusion keeps the navigate-to-root
8316
+ // escape from firing mid-token-exchange (the callback IS making
8317
+ // progress and a navigate-away would abort the session).
8318
+ const stillShell = !onOAuthCallback &&
8319
+ isLoadingShell(await this.browser.extractVisibleText().catch(() => ""), inventory.length);
8320
+ if (stillShell) {
8321
+ shellStreak += 1;
8322
+ // On the 2nd consecutive shell round, do the navigate-to-root the
8323
+ // budgeted wait can't fix — a route stuck mid-hydration (a blocked
8324
+ // websocket, an SPA wedged on a stale path) often paints the real
8325
+ // dashboard from origin root. Once only.
8326
+ if (shellStreak >= 2 && !shellRootNavTried) {
8327
+ shellRootNavTried = true;
8328
+ const root = originRoot(state.url);
8329
+ args.steps.push(`Post-verify round ${round}: ${pathOf(state.url)} read as a loading shell for ` +
8330
+ `${shellStreak} consecutive rounds — navigating to origin root once before bailing.`);
8331
+ try {
8332
+ await this.browser.goto(root ?? state.url);
8333
+ await this.browser
8334
+ .waitForInteractiveDom(SHELL_MAX_ELEMENTS, 15_000)
8335
+ .catch(() => undefined);
8336
+ [state, inventory] = await Promise.all([
8337
+ this.browser.getState(),
8338
+ this.buildInventory(args.steps, undefined, 80),
8339
+ ]);
8340
+ }
8341
+ catch {
8342
+ // navigate/read failed — the streak check below bails on the
8343
+ // next shell read.
8344
+ }
8345
+ // Re-evaluate after the root nav. If it hydrated, fall through to
8346
+ // planning; if it's STILL a shell, bail truthfully now rather than
8347
+ // burning the rest of the round budget to run_timeout.
8348
+ const recovered = !isLoadingShell(await this.browser.extractVisibleText().catch(() => ""), inventory.length);
8349
+ if (recovered) {
8350
+ shellStreak = 0;
8351
+ }
8352
+ else {
8353
+ throw new SpaNeverHydratedError(`spa_never_hydrated: ${args.service}'s post-verify page (${pathOf(state.url)}) ` +
8354
+ `stayed a loading shell across ${shellStreak} rounds and an origin-root reload — ` +
8355
+ `the SPA never rendered an actionable surface (blocked websocket / wedged hydration). ` +
8356
+ `Not a navigation bug; retry or finish the signup manually.`);
8357
+ }
7910
8358
  }
7911
- catch {
7912
- // mid-navigation read keep the prior state/inventory and let
7913
- // the next hydration tick (or the planner) retry.
8359
+ else {
8360
+ args.steps.push(`Post-verify round ${round}: ${pathOf(state.url)} is a loading shell ` +
8361
+ `(streak ${shellStreak}) — letting the SPA settle one more round`);
7914
8362
  }
7915
8363
  }
8364
+ else {
8365
+ shellStreak = 0;
8366
+ }
7916
8367
  // Stalled-wizard breaker. Build a content signature (URL + each
7917
8368
  // inventory element's selector + label) and judge whether the
7918
8369
  // PREVIOUS executed action changed the page. If the last few
@@ -8057,11 +8508,13 @@ ${formatInventory(input.inventory)}`,
8057
8508
  if (consecutiveOauthLoginPageRounds >= 3) {
8058
8509
  args.steps.push(`Post-verify: OAuth run still on a login page (${pathOf(state.url)}) for ` +
8059
8510
  `${consecutiveOauthLoginPageRounds} rounds (incl. a reload) — the OAuth callback never persisted; bailing.`);
8511
+ await this.browser.dumpOAuthDebug(args.service, "callback-not-persisted").catch(() => { });
8060
8512
  throw new OAuthSessionNotPersistedError(`oauth_session_not_persisted: signed in to ${args.service} via OAuth but the page ` +
8061
8513
  `still presents a login screen (${pathOf(state.url)}) after ` +
8062
- `${consecutiveOauthLoginPageRounds} rounds — the OAuth callback never established a ` +
8063
- `session (anti-bot / IP rejection of the callback). Not a navigation bug; needs ` +
8064
- `residential egress or manual signup.`);
8514
+ `${consecutiveOauthLoginPageRounds} rounds — the OAuth callback was rejected at the ` +
8515
+ `automation/fingerprint layer. NOT an IP issue (FALSIFIED 2026-06-14: a clean ` +
8516
+ `residential IP fails this callback identically — see STATE.md), so residential ` +
8517
+ `egress does NOT fix it. Needs a fingerprint/automation fix or manual signup.`);
8065
8518
  }
8066
8519
  }
8067
8520
  else {
@@ -8199,6 +8652,10 @@ ${formatInventory(input.inventory)}`,
8199
8652
  state,
8200
8653
  inventory,
8201
8654
  observed: nextStep,
8655
+ // Fix C4 — stamp the backend that produced THIS round's plan
8656
+ // (planPostVerifyStep set these via callLLM just above).
8657
+ ...(this.lastResolvedModel !== undefined ? { resolved_model: this.lastResolvedModel } : {}),
8658
+ ...(this.lastResolvedProvider !== undefined ? { resolved_provider: this.lastResolvedProvider } : {}),
8202
8659
  });
8203
8660
  capturedRound += 1;
8204
8661
  // Per-round telemetry upload (rc.11). Mirrors the disk capture
@@ -8555,7 +9012,7 @@ ${formatInventory(input.inventory)}`,
8555
9012
  hint = undefined;
8556
9013
  continue;
8557
9014
  }
8558
- const fallback = pickStuckLoopFallbackUrl(state.url, triedFallbackUrls, args.service);
9015
+ const fallback = pickStuckLoopFallbackUrl(state.url, triedFallbackUrls, args.service, this.resolvedSignupUrl);
8559
9016
  if (fallback !== null) {
8560
9017
  triedFallbackUrls.add(fallback);
8561
9018
  args.steps.push(`Post-verify: stuck-loop detected ${stuckFiresAtUrl}x at ${state.url} — escalating to a hardcoded API-key URL: ${fallback}`);
@@ -8670,7 +9127,30 @@ ${formatInventory(input.inventory)}`,
8670
9127
  // candidate is exhausted, `done` is honored.
8671
9128
  const capturedCredCount = Object.keys(credentials).filter((k) => !NON_CREDENTIAL_KEYS.has(k)).length;
8672
9129
  if (capturedCredCount === 0 && prematureDoneFallbacks < MAX_PREMATURE_DONE_FALLBACKS) {
8673
- const fallback = pickStuckLoopFallbackUrl(state.url, triedFallbackUrls, args.service);
9130
+ // Prefer CLICKING a real API-keys nav link over guessing a URL.
9131
+ // The dashboard's own sidebar/menu link carries the correct href;
9132
+ // guessing /keys, /api-keys, /settings/api-keys 404s on services
9133
+ // that host keys at a non-standard path (unify-ai). Only when no
9134
+ // such link is in the DOM do we fall through to URL composition.
9135
+ const keysLink = findApiKeysNavLink(inventory, clickedKeysLinks);
9136
+ if (keysLink !== null) {
9137
+ prematureDoneFallbacks += 1;
9138
+ clickedKeysLinks.add(keysLink.selector);
9139
+ const label = (keysLink.visibleText ?? keysLink.ariaLabel ?? keysLink.href ?? keysLink.selector) || keysLink.selector;
9140
+ args.steps.push(`Post-verify: planner emitted done with no credential captured — ` +
9141
+ `clicking the in-page API-keys link "${label.slice(0, 60)}" ` +
9142
+ `(${keysLink.href ?? keysLink.selector}) before guessing a URL`);
9143
+ try {
9144
+ await this.browser.click(keysLink.selector);
9145
+ await this.browser.waitForInteractiveDom(5, 15_000);
9146
+ }
9147
+ catch (err) {
9148
+ args.steps.push(`Post-verify: API-keys link click failed (${err instanceof Error ? err.message : String(err)}) — continuing.`);
9149
+ }
9150
+ hint = undefined;
9151
+ continue;
9152
+ }
9153
+ const fallback = pickStuckLoopFallbackUrl(state.url, triedFallbackUrls, args.service, this.resolvedSignupUrl);
8674
9154
  if (fallback !== null) {
8675
9155
  prematureDoneFallbacks += 1;
8676
9156
  triedFallbackUrls.add(fallback);
@@ -9178,6 +9658,10 @@ ${formatInventory(input.inventory)}`,
9178
9658
  state: postState,
9179
9659
  inventory: postInventory,
9180
9660
  observed: syntheticExtract,
9661
+ // Fix C4 — attribute this synthetic round to the planner call
9662
+ // that drove us here (no LLM ran for this implicit extract).
9663
+ ...(this.lastResolvedModel !== undefined ? { resolved_model: this.lastResolvedModel } : {}),
9664
+ ...(this.lastResolvedProvider !== undefined ? { resolved_provider: this.lastResolvedProvider } : {}),
9181
9665
  });
9182
9666
  capturedRound += 1;
9183
9667
  if (this.roundUploader !== undefined) {
@@ -9611,6 +10095,11 @@ ${formatInventory(input.inventory)}${input.hint !== undefined ? `\n\nIMPORTANT
9611
10095
  // navigation-eval.md). The stall-detector + prior-action memory are the
9612
10096
  // escape from a deterministic loop.
9613
10097
  temperature: 0,
10098
+ // Fix C — pin a single model + provider + seed on the proxy path so
10099
+ // the same dashboard yields the same step regardless of which backend
10100
+ // OpenRouter would otherwise route to (the model/provider lottery
10101
+ // survives temperature 0).
10102
+ deterministic: true,
9614
10103
  parse: (raw) => {
9615
10104
  const step = parsePostVerifyStep(raw, allowed);
9616
10105
  // A `check` must land on a real checkbox/radio — the planner