@trusty-squire/mcp 0.9.16 → 0.9.17-rc.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bot/affordance-probe.d.ts +15 -0
- package/dist/bot/affordance-probe.d.ts.map +1 -0
- package/dist/bot/affordance-probe.js +63 -0
- package/dist/bot/affordance-probe.js.map +1 -0
- package/dist/bot/agent.d.ts +13 -1
- package/dist/bot/agent.d.ts.map +1 -1
- package/dist/bot/agent.js +560 -71
- package/dist/bot/agent.js.map +1 -1
- package/dist/bot/browser.d.ts +6 -0
- package/dist/bot/browser.d.ts.map +1 -1
- package/dist/bot/browser.js +151 -0
- package/dist/bot/browser.js.map +1 -1
- package/dist/bot/llm-client.d.ts +4 -0
- package/dist/bot/llm-client.d.ts.map +1 -1
- package/dist/bot/llm-client.js +14 -0
- package/dist/bot/llm-client.js.map +1 -1
- package/dist/bot/onboarding-capture.d.ts +4 -0
- package/dist/bot/onboarding-capture.d.ts.map +1 -1
- package/dist/bot/onboarding-capture.js +5 -0
- package/dist/bot/onboarding-capture.js.map +1 -1
- package/package.json +1 -1
package/dist/bot/agent.js
CHANGED
|
@@ -169,6 +169,74 @@ export function isAtPaywall(text) {
|
|
|
169
169
|
}
|
|
170
170
|
return false;
|
|
171
171
|
}
|
|
172
|
+
// A service can complete the signup form / OAuth handshake and THEN drop the
|
|
173
|
+
// account into a manual-approval gate — a waiting room, a waitlist, a
|
|
174
|
+
// "request access / your account is pending approval / under review" screen —
|
|
175
|
+
// instead of granting a dashboard + API key. Baseten is the field example:
|
|
176
|
+
// the form submits, then a "waiting_room" / account-review screen appears and
|
|
177
|
+
// no key is obtainable autonomously.
|
|
178
|
+
//
|
|
179
|
+
// This is NOT a captcha and NOT an anti-bot block — it's a service-side human
|
|
180
|
+
// gate. Left undetected, the post-verify loop exhausts its budget and the run
|
|
181
|
+
// gets mislabeled (oauth_onboarding_failed / a generic no-credentials miss),
|
|
182
|
+
// which is misleading and can wrongly count toward skill demotion or send us
|
|
183
|
+
// chasing a non-existent code bug. We classify it as `onboarding_blocked` —
|
|
184
|
+
// the same terminal, human-pile, non-demoting status the billing wall uses —
|
|
185
|
+
// so the loop routes it to the manual pile and never advances the demote
|
|
186
|
+
// counter.
|
|
187
|
+
//
|
|
188
|
+
// Tuned for PRECISION over recall: every pattern requires explicit
|
|
189
|
+
// account-review / waitlist / pending-approval phrasing. A marketing tile that
|
|
190
|
+
// merely mentions "early access" as a feature must not trip it, so the verbs
|
|
191
|
+
// are scoped to the gate's own phrasing (you ARE on the list / access IS
|
|
192
|
+
// pending / the account IS under review).
|
|
193
|
+
const ACCOUNT_REVIEW_GATE_PATTERNS = [
|
|
194
|
+
/\bwaiting\s+room\b/i,
|
|
195
|
+
/\b(?:join|on|added\s+to)\s+(?:the\s+|our\s+)?waitlist\b/i,
|
|
196
|
+
/\byou'?re\s+on\s+the\s+(?:list|waitlist)\b/i,
|
|
197
|
+
/\brequest\s+(?:early\s+)?access\b/i,
|
|
198
|
+
/\baccess\s+(?:is\s+)?pending\b/i,
|
|
199
|
+
/\b(?:your\s+)?account\s+is\s+pending\b/i,
|
|
200
|
+
/\bpending\s+approval\b/i,
|
|
201
|
+
/\baccount\s+(?:is\s+)?(?:currently\s+)?under\s+review\b/i,
|
|
202
|
+
/\byour\s+account\s+is\s+being\s+reviewed\b/i,
|
|
203
|
+
/\bwe'?ll\s+email\s+you\s+when\b/i,
|
|
204
|
+
/\bawaiting\s+(?:approval|access)\b/i,
|
|
205
|
+
];
|
|
206
|
+
// Exported for unit testing — the post-signup heuristic that distinguishes a
|
|
207
|
+
// service-side manual-approval gate (waiting room / waitlist / pending review)
|
|
208
|
+
// from a normal dashboard, signup form, or captcha page. Pure over page text.
|
|
209
|
+
export function isAtAccountReviewGate(text) {
|
|
210
|
+
return ACCOUNT_REVIEW_GATE_PATTERNS.some((p) => p.test(text));
|
|
211
|
+
}
|
|
212
|
+
// Decide whether a no-credential form-fill outcome is a manual-review gate.
|
|
213
|
+
// A verification timeout is the AUTHORITATIVE cause and must win: a pending
|
|
214
|
+
// "check your email / we sent a code" page can read as a review gate to
|
|
215
|
+
// isAtAccountReviewGate, so without this guard a verification_not_sent gets
|
|
216
|
+
// mislabeled onboarding_blocked (the anthropic regression). Only when
|
|
217
|
+
// verification did NOT fail is the review-gate text trusted. Pure, testable.
|
|
218
|
+
export function isOnboardingReviewGate(verificationFailed, pageText) {
|
|
219
|
+
return verificationFailed === undefined && isAtAccountReviewGate(pageText);
|
|
220
|
+
}
|
|
221
|
+
// Closed / invite-only registration: the service does not accept new self-serve
|
|
222
|
+
// signups at all (turbopuffer: "Sign-ups are closed"). Distinct from a review
|
|
223
|
+
// gate (you signed up, awaiting approval) — here NO account can be created, so
|
|
224
|
+
// the run is terminally unservable and the service should be dequeued, not
|
|
225
|
+
// retried or mislabeled oauth_onboarding_failed (which implies a fixable nav
|
|
226
|
+
// bug). Precision-tuned: requires explicit closed/disabled/invite-only phrasing
|
|
227
|
+
// scoped to sign-up/registration, so a normal page mentioning "sign up" or an
|
|
228
|
+
// "invite your team" feature doesn't trip it. Pure over page text.
|
|
229
|
+
const SIGNUPS_CLOSED_PATTERNS = [
|
|
230
|
+
/\bsign[\s-]?ups?\s+(?:are|is)\s+(?:currently\s+)?(?:closed|disabled|paused|not\s+(?:open|available|being\s+accepted))\b/i,
|
|
231
|
+
/\b(?:we\s+are|we're)\s+not\s+(?:currently\s+)?accepting\s+(?:new\s+)?(?:sign[\s-]?ups|registrations|users|accounts)\b/i,
|
|
232
|
+
/\bregistration\s+(?:is\s+)?(?:currently\s+)?(?:closed|disabled)\b/i,
|
|
233
|
+
/\b(?:sign[\s-]?up|registration|access)\s+is\s+(?:by\s+)?invite[\s-]?only\b/i,
|
|
234
|
+
/\binvite[\s-]?only\s+(?:beta|access|signup|registration)\b/i,
|
|
235
|
+
/\brequest\s+an\s+invite\b/i,
|
|
236
|
+
];
|
|
237
|
+
export function isSignupsClosed(text) {
|
|
238
|
+
return SIGNUPS_CLOSED_PATTERNS.some((p) => p.test(text));
|
|
239
|
+
}
|
|
172
240
|
// S3: does this post-submit page text indicate the service genuinely
|
|
173
241
|
// expects the user to confirm via email? Drives whether the bot polls the
|
|
174
242
|
// full verification timeout or runs only a short probe. Exported so the
|
|
@@ -197,8 +265,9 @@ export class OAuthSessionNotPersistedError extends Error {
|
|
|
197
265
|
// 0.8.2-rc.10 — common dashboard paths that vendors host their
|
|
198
266
|
// per-account API key UI at. Ordered most-specific first so a
|
|
199
267
|
// fallback navigate doesn't land short of the actual page. Returned
|
|
200
|
-
// as an array of path-strings; the caller composes them onto the
|
|
201
|
-
// origin
|
|
268
|
+
// as an array of path-strings; the caller composes them onto the APP
|
|
269
|
+
// origin (the signup/app URL the bot navigated to), NOT the auth/IdP
|
|
270
|
+
// origin it may be stuck on post-OAuth, and skips any already tried.
|
|
202
271
|
//
|
|
203
272
|
// Patterns harvested from Anthropic (settings/keys), Sentry
|
|
204
273
|
// (settings/account/api/auth-tokens), Neon (settings#api-keys),
|
|
@@ -418,33 +487,112 @@ export function findCreateKeyAffordance(inventory) {
|
|
|
418
487
|
candidates.sort((a, b) => b.score - a.score);
|
|
419
488
|
return candidates[0].el;
|
|
420
489
|
}
|
|
490
|
+
// An in-DOM nav link/affordance that points AT an API-keys / tokens page.
|
|
491
|
+
// Distinct from findCreateKeyAffordance (the "create key" button): this finds
|
|
492
|
+
// the LINK that navigates TO the keys page, so the bot can click the real
|
|
493
|
+
// target — whose href is the correct path — instead of GUESSING a URL from a
|
|
494
|
+
// fixed convention list (which 404s whenever a service hosts keys at a
|
|
495
|
+
// non-standard path: unify-ai's keys aren't at /keys, /api-keys, or
|
|
496
|
+
// /settings/api-keys, all of which 404). A human clicks the sidebar link; so
|
|
497
|
+
// should the bot. Exported, pure (operates on the inventory shape only).
|
|
498
|
+
const API_KEYS_HREF = /\/(?:api[-_]?keys?|api[-_]?tokens?|access[-_]?tokens?|auth[-_]?tokens?|secret[-_]?keys?|personal[-_]?access[-_]?tokens?|developers?|keys?|tokens?)(?:[/?#]|$)/i;
|
|
499
|
+
const API_KEYS_TEXT = /\b(?:api|access|secret|auth|personal\s+access)\s*(?:keys?|tokens?)\b/i;
|
|
500
|
+
export function findApiKeysNavLink(inventory, alreadyClicked = new Set()) {
|
|
501
|
+
const candidates = [];
|
|
502
|
+
for (const el of inventory) {
|
|
503
|
+
const isClickable = el.tag === "a" ||
|
|
504
|
+
el.tag === "button" ||
|
|
505
|
+
el.role === "link" ||
|
|
506
|
+
el.role === "button";
|
|
507
|
+
if (!isClickable)
|
|
508
|
+
continue;
|
|
509
|
+
if (el.visible === false)
|
|
510
|
+
continue;
|
|
511
|
+
if (alreadyClicked.has(el.selector))
|
|
512
|
+
continue;
|
|
513
|
+
const href = el.href ?? "";
|
|
514
|
+
const text = [el.visibleText, el.ariaLabel, el.title, el.labelText, el.iconLabel]
|
|
515
|
+
.filter((s) => s !== null && s !== undefined)
|
|
516
|
+
.join(" ")
|
|
517
|
+
.trim();
|
|
518
|
+
// The loose href segments (keys?/tokens?/developers?) are only trusted on
|
|
519
|
+
// an actual anchor href, where they're a structured path, not free text.
|
|
520
|
+
const hrefHit = href.length > 0 && API_KEYS_HREF.test(href);
|
|
521
|
+
const textHit = API_KEYS_TEXT.test(text);
|
|
522
|
+
if (!hrefHit && !textHit)
|
|
523
|
+
continue;
|
|
524
|
+
// A "create API key" control is a different affordance (it opens a
|
|
525
|
+
// create flow / modal, it doesn't navigate to the listing). Skip it here
|
|
526
|
+
// UNLESS it's a real anchor with a keys href (then it's a nav link that
|
|
527
|
+
// merely happens to read "New API key").
|
|
528
|
+
if (CREATE_KEY_PHRASE.test(text) && !(el.tag === "a" && hrefHit))
|
|
529
|
+
continue;
|
|
530
|
+
let score = 0;
|
|
531
|
+
if (hrefHit)
|
|
532
|
+
score += 4; // a real, navigable target beats a text guess
|
|
533
|
+
if (/\bapi\s*(?:keys?|tokens?)\b/i.test(text))
|
|
534
|
+
score += 2;
|
|
535
|
+
else if (textHit)
|
|
536
|
+
score += 1;
|
|
537
|
+
if (el.tag === "a")
|
|
538
|
+
score += 1; // prefer anchors over role=button
|
|
539
|
+
if (el.inViewport === true)
|
|
540
|
+
score += 1;
|
|
541
|
+
candidates.push({ el, score });
|
|
542
|
+
}
|
|
543
|
+
if (candidates.length === 0)
|
|
544
|
+
return null;
|
|
545
|
+
candidates.sort((a, b) => b.score - a.score);
|
|
546
|
+
return candidates[0].el;
|
|
547
|
+
}
|
|
421
548
|
// Pick the next fallback URL to try, keyed against the origin of the
|
|
422
549
|
// currently-stuck URL. The curated SERVICE_KEYS_PATHS for the run's
|
|
423
550
|
// service (when its host matches the stuck origin) are tried FIRST,
|
|
424
551
|
// then the generic STUCK_LOOP_FALLBACK_PATHS. Returns null when every
|
|
425
552
|
// path has already been attempted. Exported for unit tests.
|
|
426
|
-
export function pickStuckLoopFallbackUrl(currentUrl, alreadyTried, service) {
|
|
427
|
-
let
|
|
553
|
+
export function pickStuckLoopFallbackUrl(currentUrl, alreadyTried, service, appUrl) {
|
|
554
|
+
let parsedCurrent;
|
|
428
555
|
try {
|
|
429
|
-
|
|
556
|
+
parsedCurrent = new URL(currentUrl);
|
|
430
557
|
}
|
|
431
558
|
catch {
|
|
432
559
|
return null;
|
|
433
560
|
}
|
|
561
|
+
// Compose key-path guesses onto the APP origin, NOT the origin of the
|
|
562
|
+
// currently-stuck URL. After OAuth the stuck URL is the identity-provider
|
|
563
|
+
// subdomain (auth.lumalabs.ai, accounts.<svc>, login.<svc>, the IdP) — which
|
|
564
|
+
// has no settings/keys pages, so "${authOrigin}/settings/keys" 404s by
|
|
565
|
+
// construction. The keys live on the app host (lumalabs.ai). `appUrl` is the
|
|
566
|
+
// signup/app URL the bot actually navigated to (this.resolvedSignupUrl), so
|
|
567
|
+
// its origin is the right host to guess against. Fall back to the stuck
|
|
568
|
+
// origin only when no usable app URL is known.
|
|
569
|
+
let composeBase = parsedCurrent;
|
|
570
|
+
if (appUrl !== undefined) {
|
|
571
|
+
try {
|
|
572
|
+
const parsedApp = new URL(appUrl);
|
|
573
|
+
if ((parsedApp.protocol === "http:" || parsedApp.protocol === "https:") &&
|
|
574
|
+
!isGoogleSearchUrl(appUrl)) {
|
|
575
|
+
composeBase = parsedApp;
|
|
576
|
+
}
|
|
577
|
+
}
|
|
578
|
+
catch {
|
|
579
|
+
// keep the stuck origin
|
|
580
|
+
}
|
|
581
|
+
}
|
|
434
582
|
// about:blank / data: / chrome-error pages have an opaque origin that
|
|
435
583
|
// serializes to the literal string "null" — building "${origin}${path}"
|
|
436
584
|
// then yields an unnavigable "null/settings/keys". Only compose
|
|
437
585
|
// fallbacks against a real http(s) origin.
|
|
438
|
-
if (
|
|
586
|
+
if (composeBase.protocol !== "http:" && composeBase.protocol !== "https:") {
|
|
439
587
|
return null;
|
|
440
588
|
}
|
|
441
|
-
const origin =
|
|
442
|
-
// Skip a candidate when the
|
|
443
|
-
// (
|
|
444
|
-
//
|
|
445
|
-
//
|
|
446
|
-
const
|
|
447
|
-
// Compose curated per-service paths first, but only when the
|
|
589
|
+
const origin = composeBase.origin;
|
|
590
|
+
// Skip a candidate when it resolves to the exact URL we're already stuck
|
|
591
|
+
// on (full origin+path, trailing-slash/case tolerant) — re-navigating
|
|
592
|
+
// there won't break the cycle. Compared on the full URL now that the
|
|
593
|
+
// compose origin can differ from the stuck origin.
|
|
594
|
+
const currentFull = `${parsedCurrent.origin}${parsedCurrent.pathname}`.replace(/\/+$/, "").toLowerCase();
|
|
595
|
+
// Compose curated per-service paths first, but only when the COMPOSE
|
|
448
596
|
// origin's host actually belongs to the named service. The slug is
|
|
449
597
|
// a substring of the host for the vendors we curate (groq →
|
|
450
598
|
// console.groq.com, launchdarkly → app.launchdarkly.com, …); this
|
|
@@ -454,7 +602,7 @@ export function pickStuckLoopFallbackUrl(currentUrl, alreadyTried, service) {
|
|
|
454
602
|
const slug = service !== undefined ? serviceSlug(service) : "";
|
|
455
603
|
const curated = slug !== "" &&
|
|
456
604
|
SERVICE_KEYS_PATHS[slug] !== undefined &&
|
|
457
|
-
|
|
605
|
+
composeBase.hostname.toLowerCase().includes(slug)
|
|
458
606
|
? SERVICE_KEYS_PATHS[slug]
|
|
459
607
|
: [];
|
|
460
608
|
// Curated paths lead; the generic list follows. De-dup so a path that
|
|
@@ -468,7 +616,7 @@ export function pickStuckLoopFallbackUrl(currentUrl, alreadyTried, service) {
|
|
|
468
616
|
const candidate = `${origin}${path}`;
|
|
469
617
|
if (alreadyTried.has(candidate))
|
|
470
618
|
continue;
|
|
471
|
-
if (
|
|
619
|
+
if (`${origin}${path}`.replace(/\/+$/, "").toLowerCase() === currentFull)
|
|
472
620
|
continue;
|
|
473
621
|
return candidate;
|
|
474
622
|
}
|
|
@@ -3305,6 +3453,50 @@ export function isLoadingShellText(text) {
|
|
|
3305
3453
|
// forever, so it is not a signal.
|
|
3306
3454
|
return /\bconnecting\b|\bloading\b|please wait|getting things ready|initiali[sz]ing/i.test(text);
|
|
3307
3455
|
}
|
|
3456
|
+
// The interactive-element count at/above which a page is "hydrated by
|
|
3457
|
+
// definition" — a rendered dashboard/form a user can act on — so a stray
|
|
3458
|
+
// "loading"/"please wait" word in its (visible) text is NOT a hydration
|
|
3459
|
+
// shell. WHY 5: a genuine loading shell paints zero or a handful of chrome
|
|
3460
|
+
// affordances (a logo link, maybe a skip-link); a real authenticated surface
|
|
3461
|
+
// (nav + content + an "API Keys"/"Create" affordance) clears 5 trivially.
|
|
3462
|
+
// Field evidence: luma-ai/unify-ai/sambanova/fireworks-ai/defang carried
|
|
3463
|
+
// 10–95 visible interactive elements yet were flagged a shell EVERY round —
|
|
3464
|
+
// any threshold from ~5 up vetoes all of them while still catching the true
|
|
3465
|
+
// 0-to-few-element shell (northflank). Reuses the same minElements default as
|
|
3466
|
+
// waitForInteractiveDom (5) so the negative gate and the positive readiness
|
|
3467
|
+
// wait agree on what "hydrated" means.
|
|
3468
|
+
export const SHELL_MAX_ELEMENTS = 5;
|
|
3469
|
+
// The authoritative loading-shell decision: a page is a hydration shell only
|
|
3470
|
+
// when loading-text is present in its VISIBLE text AND it has fewer than
|
|
3471
|
+
// SHELL_MAX_ELEMENTS interactive elements. Splitting the two conditions kills
|
|
3472
|
+
// the dominant false positive two ways at once:
|
|
3473
|
+
// 1. visibleText (innerText) drops hidden skeleton/RSC "loading" strings a
|
|
3474
|
+
// raw textContent read picked up;
|
|
3475
|
+
// 2. the inventory veto makes the gate un-fireable on a hydrated page
|
|
3476
|
+
// regardless of any residual stray "loading" word.
|
|
3477
|
+
// Pure + exported for unit tests. The text predicate stays isLoadingShellText
|
|
3478
|
+
// (still used where only text is on hand); this is the call-site gate where
|
|
3479
|
+
// both signals are available.
|
|
3480
|
+
export function isLoadingShell(visibleText, inventoryCount) {
|
|
3481
|
+
if (inventoryCount >= SHELL_MAX_ELEMENTS)
|
|
3482
|
+
return false;
|
|
3483
|
+
return isLoadingShellText(visibleText);
|
|
3484
|
+
}
|
|
3485
|
+
// Thrown from postVerifyLoop when a post-OAuth/post-verify SPA presents a
|
|
3486
|
+
// genuine loading shell that never hydrates within the bounded budget (and a
|
|
3487
|
+
// navigate-to-root retry didn't unstick it). Surfaced as the terminal status
|
|
3488
|
+
// `spa_never_hydrated`. classifyFailure() (skill-schema failure-taxonomy)
|
|
3489
|
+
// has no entry for this kind, so it falls to the deliberate transient default
|
|
3490
|
+
// — a non-demoting outcome (a never-hydrating route is environmental/transient,
|
|
3491
|
+
// not skill rot), and no new exported skill-schema symbol is needed (avoids
|
|
3492
|
+
// the published-dep-skew trap). The leading token before ':' is what
|
|
3493
|
+
// classifyFailure keys on, so the message MUST start with the bare kind.
|
|
3494
|
+
export class SpaNeverHydratedError extends Error {
|
|
3495
|
+
constructor(message) {
|
|
3496
|
+
super(message);
|
|
3497
|
+
this.name = "SpaNeverHydratedError";
|
|
3498
|
+
}
|
|
3499
|
+
}
|
|
3308
3500
|
// Transient "the session is being established RIGHT NOW" copy. MEASURED on
|
|
3309
3501
|
// groq (Stytch B2B): after the OAuth callback, /authenticate shows
|
|
3310
3502
|
// "Logging in…" then "Creating your organization…" for ~5-7s of async
|
|
@@ -3348,6 +3540,12 @@ export class SignupAgent {
|
|
|
3348
3540
|
// backends_used[i] is the .name string of the LLMClient that produced
|
|
3349
3541
|
// the i-th reply this run.
|
|
3350
3542
|
backendsUsed = [];
|
|
3543
|
+
// Fix C4 — the model/provider the backend actually served on the most
|
|
3544
|
+
// recent LLM call, captured per round. callLLM stamps these after every
|
|
3545
|
+
// call; the capture sites read them when dumping a round. Undefined
|
|
3546
|
+
// until the first call (or when the backend doesn't report a model).
|
|
3547
|
+
lastResolvedModel;
|
|
3548
|
+
lastResolvedProvider;
|
|
3351
3549
|
llmPair;
|
|
3352
3550
|
// Captcha encounter state for the current run. Updated by the
|
|
3353
3551
|
// pre/post-submit/re-plan captcha gates in signup(); read by the
|
|
@@ -3355,6 +3553,13 @@ export class SignupAgent {
|
|
|
3355
3553
|
// because a "blocked" outcome is more diagnostic than an earlier
|
|
3356
3554
|
// "solved" one and we always want the failure mode in the result.
|
|
3357
3555
|
captchaEncounter = undefined;
|
|
3556
|
+
// Sticky "this run is on the email path" flag. Set when OAuth turns out to be
|
|
3557
|
+
// login-only (a new identity has no account — Clerk's form_identifier_not_found)
|
|
3558
|
+
// and we fall back to email signup. Without it, the dispatch loop re-runs the
|
|
3559
|
+
// OAuth-first scan after the re-route and re-clicks Google → loops forever
|
|
3560
|
+
// (the cartesia oauth_session_not_persisted bug). Honored by
|
|
3561
|
+
// resolveOAuthCandidates; reset at the start of each signup().
|
|
3562
|
+
committedToEmailPath = false;
|
|
3358
3563
|
// Invisible-captcha presence for the current run. Cloudflare Turnstile
|
|
3359
3564
|
// and reCAPTCHA-v3 are score-based: a HIGH score passes silently with no
|
|
3360
3565
|
// visible widget to "solve", so the visible-gate path above records
|
|
@@ -3688,10 +3893,23 @@ export class SignupAgent {
|
|
|
3688
3893
|
// F14 — selectors the planner clicked WITHOUT advancing the page.
|
|
3689
3894
|
// Each no-progress plan records its click selectors here; the next
|
|
3690
3895
|
// plan that picks ONLY selectors in this set is failed as stuck
|
|
3691
|
-
// instead of looping. Cleared on
|
|
3692
|
-
//
|
|
3693
|
-
//
|
|
3896
|
+
// instead of looping. Cleared on ANY real progress between two
|
|
3897
|
+
// clicks of the same selector — a fill/select/check action OR a
|
|
3898
|
+
// page change (inventory/url moved). The Railway run that motivated
|
|
3899
|
+
// F14 spun the same footer "Email" link 5 times before timing out;
|
|
3900
|
+
// this loop now bails after 2.
|
|
3694
3901
|
let lastNoProgressClickSelectors = new Set();
|
|
3902
|
+
// Page-state fingerprint from the END of the previous round, used to
|
|
3903
|
+
// decide whether the page actually moved between rounds. A
|
|
3904
|
+
// "fill field → submit → (validation error) → fix field → submit
|
|
3905
|
+
// again" cycle is legitimate progress, NOT a loop: kinde's post-OAuth
|
|
3906
|
+
// register form has a globally-unique "domain" field, so the first
|
|
3907
|
+
// guess collides ("taken") and the bot must edit the field and
|
|
3908
|
+
// re-click the SAME "Next" button. Without this, re-clicking the same
|
|
3909
|
+
// selector after a genuine field edit (or any inventory/url change)
|
|
3910
|
+
// false-bailed as planner_loop even though the intervening fill was
|
|
3911
|
+
// real progress. (MEASURED 2026-06-13, kinde, terminal_round 3.)
|
|
3912
|
+
let lastRoundPageSig = null;
|
|
3695
3913
|
// rc.31 — once the bot has explicitly clicked an email-flow
|
|
3696
3914
|
// button (e.g. Railway's "Log in using email" two-stage chooser),
|
|
3697
3915
|
// stay on the email path. Without this, the auto-OAuth-first
|
|
@@ -4057,16 +4275,40 @@ export class SignupAgent {
|
|
|
4057
4275
|
steps.push("Form-fill planner described a logged-in product/billing page (not a signup form) — pivoting to post-verify navigation");
|
|
4058
4276
|
return { kind: "already_oauth" };
|
|
4059
4277
|
}
|
|
4278
|
+
// The page moved since the previous round if the URL changed or the
|
|
4279
|
+
// set of interactive selectors changed (a field gained/lost, a
|
|
4280
|
+
// validation message toggled an element, a wizard step advanced).
|
|
4281
|
+
// ANY such change means whatever the planner did last round was real
|
|
4282
|
+
// progress — clear the no-progress memory so a re-click of a
|
|
4283
|
+
// previously-"dead" selector on the now-changed page isn't judged a
|
|
4284
|
+
// loop. This is the unique-value-retry case (kinde domain field):
|
|
4285
|
+
// edit field → page re-renders → re-click "Next" is legitimate.
|
|
4286
|
+
const pageSig = state.url +
|
|
4287
|
+
"§" +
|
|
4288
|
+
inventory
|
|
4289
|
+
.map((e) => e.selector)
|
|
4290
|
+
.sort()
|
|
4291
|
+
.join("|");
|
|
4292
|
+
if (lastRoundPageSig !== null && pageSig !== lastRoundPageSig) {
|
|
4293
|
+
lastNoProgressClickSelectors = new Set();
|
|
4294
|
+
}
|
|
4295
|
+
lastRoundPageSig = pageSig;
|
|
4060
4296
|
// F14 — stuck-detection: if the plan picks ONLY click selectors
|
|
4061
4297
|
// we already tried in the previous round without page progress,
|
|
4062
4298
|
// it's a planner loop. Fail planning_failed with the offending
|
|
4063
4299
|
// selector(s) so the operator sees what stalled. Doesn't fire
|
|
4064
4300
|
// when the plan adds at least one new selector (legitimate
|
|
4065
|
-
// exploration). Doesn't fire on fill plans (forward progress)
|
|
4301
|
+
// exploration). Doesn't fire on fill plans (forward progress),
|
|
4302
|
+
// nor on a plan that ALSO edits a field this round (a fill/check
|
|
4303
|
+
// alongside the re-click is real progress — kinde's "tick the
|
|
4304
|
+
// required box + re-click Next" advances the form even though the
|
|
4305
|
+
// Next selector repeats).
|
|
4066
4306
|
const planClickSelectors = plan.actions
|
|
4067
4307
|
.filter((a) => a.kind === "click")
|
|
4068
4308
|
.map((a) => a.selector);
|
|
4069
|
-
|
|
4309
|
+
const planEditsAField = plan.actions.some((a) => a.kind === "fill" || a.kind === "check");
|
|
4310
|
+
if (!planEditsAField &&
|
|
4311
|
+
planClickSelectors.length > 0 &&
|
|
4070
4312
|
lastNoProgressClickSelectors.size > 0 &&
|
|
4071
4313
|
planClickSelectors.every((s) => lastNoProgressClickSelectors.has(s))) {
|
|
4072
4314
|
return {
|
|
@@ -4120,6 +4362,16 @@ export class SignupAgent {
|
|
|
4120
4362
|
// static page won't help, so a second consecutive empty plan is
|
|
4121
4363
|
// a dead end. (The 0.1.12 loop spun this 4x on Axiom.)
|
|
4122
4364
|
const hadFill = plan.actions.some((a) => a.kind === "fill");
|
|
4365
|
+
// A check is ALSO a field edit = real progress, even though (unlike
|
|
4366
|
+
// a fill) it doesn't promote the plan to the submit path below.
|
|
4367
|
+
// (The form-fill plan vocabulary is fill/check/click — `select`
|
|
4368
|
+
// belongs to the post-verify loop.) Treat a check as progress for
|
|
4369
|
+
// the no-progress tracker only: a plan that ticked a box advanced
|
|
4370
|
+
// the form, so its click selectors must NOT be recorded as "dead"
|
|
4371
|
+
// (and any prior dead record is cleared). Without this, a "click
|
|
4372
|
+
// Next (no advance) → tick a required box + re-click Next" cycle
|
|
4373
|
+
// false-bailed as a loop even though the check was progress.
|
|
4374
|
+
const hadFieldEdit = plan.actions.some((a) => a.kind === "fill" || a.kind === "check");
|
|
4123
4375
|
if (!hadFill) {
|
|
4124
4376
|
if (plan.actions.length === 0) {
|
|
4125
4377
|
emptyPlans += 1;
|
|
@@ -4142,8 +4394,12 @@ export class SignupAgent {
|
|
|
4142
4394
|
// F14 — record the click selectors that didn't advance the
|
|
4143
4395
|
// page. The next plan's stuck-detection check (above) bails
|
|
4144
4396
|
// if it picks the same ones again. Hint also tells the
|
|
4145
|
-
// planner which selectors NOT to re-pick.
|
|
4146
|
-
|
|
4397
|
+
// planner which selectors NOT to re-pick. A plan that ALSO made
|
|
4398
|
+
// a field edit (select/check) made real progress, so clear the
|
|
4399
|
+
// tracker instead of recording its clicks as dead.
|
|
4400
|
+
lastNoProgressClickSelectors = hadFieldEdit
|
|
4401
|
+
? new Set()
|
|
4402
|
+
: new Set(planClickSelectors);
|
|
4147
4403
|
const avoidHint = planClickSelectors.length > 0
|
|
4148
4404
|
? ` AVOID these selectors — they were clicked but the page did NOT advance: ${planClickSelectors.map((s) => JSON.stringify(s)).join(", ")}.`
|
|
4149
4405
|
: "";
|
|
@@ -4268,8 +4524,30 @@ export class SignupAgent {
|
|
|
4268
4524
|
// the next planner iteration handles SPA settle.
|
|
4269
4525
|
await this.browser.wait(2);
|
|
4270
4526
|
const postGate = await this.runCaptchaGate("Post-submit", steps);
|
|
4271
|
-
if (postGate.blocked)
|
|
4527
|
+
if (postGate.blocked) {
|
|
4528
|
+
// A managed/invisible Turnstile (Clerk's Smart CAPTCHA) resolves
|
|
4529
|
+
// SERVER-SIDE: the submit can succeed — account created, verification
|
|
4530
|
+
// email sent — even though our client-side token poll timed out.
|
|
4531
|
+
// cartesia PROVED this: it emailed a verification code AFTER the bot had
|
|
4532
|
+
// bailed captcha_blocked. The ground truth of "did the submit go
|
|
4533
|
+
// through" is the INBOX, not the client token. So for a POST-submit
|
|
4534
|
+
// Turnstile with an inbox available, don't hard-bail: proceed to the
|
|
4535
|
+
// verification step and let the inbox poll arbitrate — a code arriving
|
|
4536
|
+
// proves the managed Turnstile passed (→ completes); no code surfaces
|
|
4537
|
+
// an honest verification_not_sent rather than a false captcha_blocked.
|
|
4538
|
+
// A genuine pre-submit gate (no inbox, or a non-Turnstile challenge)
|
|
4539
|
+
// still bails captcha_blocked.
|
|
4540
|
+
if (postGate.kind === "turnstile" && task.inbox !== undefined) {
|
|
4541
|
+
steps.push("Post-submit Turnstile token didn't populate — but a managed Turnstile resolves " +
|
|
4542
|
+
"server-side, so the submit may have gone through. Proceeding to verification; " +
|
|
4543
|
+
"the inbox poll arbitrates (a code = submit succeeded).");
|
|
4544
|
+
// Don't let the recorded block short-circuit later gates / the result.
|
|
4545
|
+
this.captchaEncounter = undefined;
|
|
4546
|
+
await this.captureSignupFormRounds(task.service, plan, inventory, fillValues);
|
|
4547
|
+
return { kind: "submitted" };
|
|
4548
|
+
}
|
|
4272
4549
|
return { kind: "captcha_blocked", captchaKind: postGate.kind };
|
|
4550
|
+
}
|
|
4273
4551
|
if (postGate.found && postGate.solved) {
|
|
4274
4552
|
// Re-click submit so the populated token ships with the form.
|
|
4275
4553
|
try {
|
|
@@ -4329,6 +4607,11 @@ export class SignupAgent {
|
|
|
4329
4607
|
state,
|
|
4330
4608
|
inventory,
|
|
4331
4609
|
observed,
|
|
4610
|
+
// Fix C4 — the form-plan's backend (planSignupForm ran before
|
|
4611
|
+
// this synthetic preamble capture, so lastResolved* still reflect
|
|
4612
|
+
// it). These preamble rounds replay the one plan; one backend.
|
|
4613
|
+
...(this.lastResolvedModel !== undefined ? { resolved_model: this.lastResolvedModel } : {}),
|
|
4614
|
+
...(this.lastResolvedProvider !== undefined ? { resolved_provider: this.lastResolvedProvider } : {}),
|
|
4332
4615
|
});
|
|
4333
4616
|
this.captureChainRound += 1;
|
|
4334
4617
|
};
|
|
@@ -4570,8 +4853,10 @@ export class SignupAgent {
|
|
|
4570
4853
|
return [...new Set([...fromMarker, ...live])];
|
|
4571
4854
|
}
|
|
4572
4855
|
async resolveOAuthCandidates(task, steps) {
|
|
4573
|
-
if (task.forceForm === true) {
|
|
4574
|
-
steps.push(
|
|
4856
|
+
if (task.forceForm === true || this.committedToEmailPath) {
|
|
4857
|
+
steps.push(this.committedToEmailPath
|
|
4858
|
+
? "Committed to email path (OAuth was login-only) — OAuth-first scan suppressed"
|
|
4859
|
+
: "Force-form: OAuth-first scan suppressed — taking the email/password path");
|
|
4575
4860
|
return [];
|
|
4576
4861
|
}
|
|
4577
4862
|
const ordered = orderOAuthCandidates(task.oauthProvider, await this.effectiveLoggedInProviders());
|
|
@@ -4740,9 +5025,14 @@ export class SignupAgent {
|
|
|
4740
5025
|
user: args.userBlocks,
|
|
4741
5026
|
max_tokens: args.maxTokens,
|
|
4742
5027
|
...(args.temperature !== undefined ? { temperature: args.temperature } : {}),
|
|
5028
|
+
...(args.deterministic === true ? { deterministic: true } : {}),
|
|
4743
5029
|
});
|
|
4744
5030
|
this.llmCallCount += 1;
|
|
4745
5031
|
this.backendsUsed.push(resp.backend);
|
|
5032
|
+
// Fix C4 — remember the served model/provider so the capture sites
|
|
5033
|
+
// can stamp this round with what actually produced the plan.
|
|
5034
|
+
this.lastResolvedModel = resp.resolved_model;
|
|
5035
|
+
this.lastResolvedProvider = resp.resolved_provider;
|
|
4746
5036
|
return resp.text;
|
|
4747
5037
|
};
|
|
4748
5038
|
const primaryRaw = await callOne(this.llmPair.primary);
|
|
@@ -4825,6 +5115,8 @@ export class SignupAgent {
|
|
|
4825
5115
|
// (Google number-match etc.). Without it, the run still works —
|
|
4826
5116
|
// steps are just only visible in the final result.
|
|
4827
5117
|
const steps = task.stepsSink ?? [];
|
|
5118
|
+
// Fresh per-run: don't let a prior run's email-path commitment leak.
|
|
5119
|
+
this.committedToEmailPath = false;
|
|
4828
5120
|
// Stash the service name so the diagnostic uploader (called from
|
|
4829
5121
|
// deep inside postVerifyLoop after a failed extract) can label
|
|
4830
5122
|
// the snapshot without us threading task through every method.
|
|
@@ -5318,6 +5610,10 @@ export class SignupAgent {
|
|
|
5318
5610
|
// /signup form), fill it IN PLACE — re-navigating to task.signupUrl
|
|
5319
5611
|
// could bounce back to the demo. Otherwise re-navigate (the
|
|
5320
5612
|
// login-only / no-account case left us on a /login page).
|
|
5613
|
+
// OAuth was login-only (no account for this identity). Commit to the
|
|
5614
|
+
// email path for the rest of the run so the dispatch loop's
|
|
5615
|
+
// OAuth-first scan doesn't re-click Google and loop.
|
|
5616
|
+
this.committedToEmailPath = true;
|
|
5321
5617
|
const onSignupFormHtml = (await this.browser.getState().catch(() => null))?.html ?? "";
|
|
5322
5618
|
if (classifySignupHtml(onSignupFormHtml) === "signup") {
|
|
5323
5619
|
steps.push(`OAuth recovery already on a signup form ` +
|
|
@@ -5596,6 +5892,43 @@ export class SignupAgent {
|
|
|
5596
5892
|
...this.resultTail(),
|
|
5597
5893
|
};
|
|
5598
5894
|
}
|
|
5895
|
+
// Before the generic no-credentials miss: a service that completed the
|
|
5896
|
+
// signup form and then dropped the account into a manual-approval gate
|
|
5897
|
+
// (waiting room / waitlist / pending review). Same terminal, non-demoting
|
|
5898
|
+
// onboarding_blocked status the OAuth path uses — there's no key to reach
|
|
5899
|
+
// until a human approves the account, so don't surface it as a generic
|
|
5900
|
+
// failure (which can wrongly chase a code bug) or punish a skill for it.
|
|
5901
|
+
//
|
|
5902
|
+
// ONLY when verification did NOT time out. A pending email-verification
|
|
5903
|
+
// page ("check your email", "we sent a code") can read as a review gate
|
|
5904
|
+
// to the classifier, but the authoritative cause there is the missing
|
|
5905
|
+
// mail (verification_not_sent) — anthropic mislabeled as onboarding_blocked
|
|
5906
|
+
// exactly this way. If we were waiting on an email that never came, that
|
|
5907
|
+
// is the failure; don't reinterpret it as a manual-review gate.
|
|
5908
|
+
const reviewGateText = verificationFailed === undefined ? await this.browser.extractText().catch(() => "") : "";
|
|
5909
|
+
// Closed / invite-only registration takes precedence over the review-gate
|
|
5910
|
+
// and the generic miss — no account can be created, so it's terminally
|
|
5911
|
+
// unservable (dequeue), not a fixable nav bug. Checked only when
|
|
5912
|
+
// verification didn't time out (same reasoning as the review gate).
|
|
5913
|
+
if (verificationFailed === undefined && isSignupsClosed(reviewGateText)) {
|
|
5914
|
+
return {
|
|
5915
|
+
success: false,
|
|
5916
|
+
error: `signups_closed: ${task.service} is not accepting new self-serve sign-ups ` +
|
|
5917
|
+
`(closed / invite-only registration) — no account can be created. Dequeue or sign up manually once open.`,
|
|
5918
|
+
steps,
|
|
5919
|
+
...this.resultTail(),
|
|
5920
|
+
};
|
|
5921
|
+
}
|
|
5922
|
+
if (isOnboardingReviewGate(verificationFailed, reviewGateText)) {
|
|
5923
|
+
return {
|
|
5924
|
+
success: false,
|
|
5925
|
+
error: `onboarding_blocked: ${task.service} put the account into a manual review / ` +
|
|
5926
|
+
`waitlist gate after signup — no API key is obtainable until a human approves ` +
|
|
5927
|
+
`the account. Finish the signup manually once access is granted.`,
|
|
5928
|
+
steps,
|
|
5929
|
+
...this.resultTail(),
|
|
5930
|
+
};
|
|
5931
|
+
}
|
|
5599
5932
|
return {
|
|
5600
5933
|
success: false,
|
|
5601
5934
|
error: verificationFailed ?? "Could not find credentials on page or via email",
|
|
@@ -6348,16 +6681,36 @@ export class SignupAgent {
|
|
|
6348
6681
|
// non-auth path here and is left alone.
|
|
6349
6682
|
if (isSignupOrLoginRoute(this.browser.currentUrl()) &&
|
|
6350
6683
|
!isOAuthProviderHost(this.browser.currentUrl())) {
|
|
6351
|
-
|
|
6352
|
-
|
|
6353
|
-
|
|
6354
|
-
|
|
6355
|
-
|
|
6356
|
-
|
|
6357
|
-
|
|
6358
|
-
|
|
6359
|
-
|
|
6360
|
-
|
|
6684
|
+
// Clerk callback: don't immediately navigate away. On a Clerk combined
|
|
6685
|
+
// sign-in/sign-up flow a new-user OAuth completes the account via a
|
|
6686
|
+
// client-side sign-up transfer that takes a beat AFTER the callback lands;
|
|
6687
|
+
// navigating to root unmounts Clerk's JS and interrupts it (the bug behind
|
|
6688
|
+
// the cartesia/braintrust "oauth_session_not_persisted" cluster — proven
|
|
6689
|
+
// not IP). We can't drive the transfer via window.Clerk (patchright's
|
|
6690
|
+
// isolated world hides it), so instead give Clerk's own JS time and detect
|
|
6691
|
+
// success via cookies (world-agnostic). If a session appears, we're signed
|
|
6692
|
+
// in — skip the navigate-away.
|
|
6693
|
+
const onClerkCallback = /sso-callback|\/sso\b/i.test(this.browser.currentUrl());
|
|
6694
|
+
let clerkSignedIn = false;
|
|
6695
|
+
if (onClerkCallback) {
|
|
6696
|
+
clerkSignedIn = await this.browser.waitForClerkSession(12000).catch(() => false);
|
|
6697
|
+
steps.push(`OAuth: Clerk callback — waited for session establish → ${clerkSignedIn ? "signed in" : "no session (likely login-only OAuth / needs email signup)"}`);
|
|
6698
|
+
}
|
|
6699
|
+
if (clerkSignedIn) {
|
|
6700
|
+
await this.browser.wait(2);
|
|
6701
|
+
}
|
|
6702
|
+
else {
|
|
6703
|
+
const root = originRoot(this.browser.currentUrl());
|
|
6704
|
+
if (root !== null) {
|
|
6705
|
+
steps.push(`OAuth: post-auth landing is a signup/login route (${pathOf(this.browser.currentUrl())}) — ` +
|
|
6706
|
+
`navigating to the app root (${root}) so the service routes us to the dashboard.`);
|
|
6707
|
+
try {
|
|
6708
|
+
await this.browser.goto(root);
|
|
6709
|
+
await this.browser.wait(2);
|
|
6710
|
+
}
|
|
6711
|
+
catch {
|
|
6712
|
+
// navigation hiccup — the post-verify loop re-reads regardless.
|
|
6713
|
+
}
|
|
6361
6714
|
}
|
|
6362
6715
|
}
|
|
6363
6716
|
}
|
|
@@ -6530,6 +6883,9 @@ export class SignupAgent {
|
|
|
6530
6883
|
// oauth_session_not_persisted and abort. The account simply needs
|
|
6531
6884
|
// creating via email, so re-route to form-fill instead of bailing.
|
|
6532
6885
|
if (detectGoogleNoAccount(gateState.url, gateText)) {
|
|
6886
|
+
// Commit to email for the rest of the run — OAuth is login-only here, so
|
|
6887
|
+
// the OAuth-first scan must not re-fire after the form-fill re-route.
|
|
6888
|
+
this.committedToEmailPath = true;
|
|
6533
6889
|
steps.push(`OAuth: ${provider.label} sign-in succeeded but ${task.service} has no account for ` +
|
|
6534
6890
|
`this identity (login-only OAuth, ${pathOf(gateState.url)}) — abandoning OAuth and ` +
|
|
6535
6891
|
`falling back to email/password signup to create the account.`);
|
|
@@ -6720,6 +7076,19 @@ export class SignupAgent {
|
|
|
6720
7076
|
const paywallCheckText = this.lastPostVerifyDoneReason !== null
|
|
6721
7077
|
? `${finalText}\n${this.lastPostVerifyDoneReason}`
|
|
6722
7078
|
: finalText;
|
|
7079
|
+
// Closed / invite-only registration — no account can be created at all
|
|
7080
|
+
// (turbopuffer: "Sign-ups are closed"). Terminally unservable; label it
|
|
7081
|
+
// honestly so the operator dequeues rather than seeing a misleading
|
|
7082
|
+
// oauth_onboarding_failed that implies a fixable nav bug.
|
|
7083
|
+
if (isSignupsClosed(paywallCheckText)) {
|
|
7084
|
+
return {
|
|
7085
|
+
success: false,
|
|
7086
|
+
error: `signups_closed: ${task.service} is not accepting new self-serve sign-ups ` +
|
|
7087
|
+
`(closed / invite-only registration) — no account can be created. Dequeue or sign up manually once open.`,
|
|
7088
|
+
steps,
|
|
7089
|
+
...this.resultTail(),
|
|
7090
|
+
};
|
|
7091
|
+
}
|
|
6723
7092
|
if (isAtPaywall(paywallCheckText)) {
|
|
6724
7093
|
return {
|
|
6725
7094
|
success: false,
|
|
@@ -6729,6 +7098,22 @@ export class SignupAgent {
|
|
|
6729
7098
|
...this.resultTail(),
|
|
6730
7099
|
};
|
|
6731
7100
|
}
|
|
7101
|
+
// Service-side manual-approval gate (waiting room / waitlist / account
|
|
7102
|
+
// pending review). The OAuth handshake succeeded but the service won't
|
|
7103
|
+
// grant a key until a human approves the account — there is no key to
|
|
7104
|
+
// reach autonomously. Same terminal onboarding_blocked status as the
|
|
7105
|
+
// billing wall so it's a non-demoting human-pile outcome, not a
|
|
7106
|
+
// mislabeled oauth_onboarding_failed that wrongly implies a code bug.
|
|
7107
|
+
if (isAtAccountReviewGate(paywallCheckText)) {
|
|
7108
|
+
return {
|
|
7109
|
+
success: false,
|
|
7110
|
+
error: `onboarding_blocked: ${task.service} put the account into a manual review / ` +
|
|
7111
|
+
`waitlist gate after signup — no API key is obtainable until a human approves ` +
|
|
7112
|
+
`the account. Finish the signup manually once access is granted.`,
|
|
7113
|
+
steps,
|
|
7114
|
+
...this.resultTail(),
|
|
7115
|
+
};
|
|
7116
|
+
}
|
|
6732
7117
|
// rc.39 — anti-bot interstitial that survived the post-OAuth
|
|
6733
7118
|
// landing. Turso's GitHub SSO callback runs a Cloudflare check
|
|
6734
7119
|
// that never clears for our Chromium fingerprint; the planner's
|
|
@@ -7009,6 +7394,9 @@ ${formatInventory(input.inventory)}`,
|
|
|
7009
7394
|
// Deterministic form-fill picks (same rationale as the post-verify
|
|
7010
7395
|
// planner — D2). Removes a run-to-run flakiness source.
|
|
7011
7396
|
temperature: 0,
|
|
7397
|
+
// Fix C — pin a single model + provider + seed on the proxy path.
|
|
7398
|
+
// temperature 0 alone leaves the model/provider lottery in play.
|
|
7399
|
+
deterministic: true,
|
|
7012
7400
|
parse: (raw) => parseSignupPlan(raw, allowed),
|
|
7013
7401
|
});
|
|
7014
7402
|
}
|
|
@@ -7422,7 +7810,7 @@ ${formatInventory(input.inventory)}`,
|
|
|
7422
7810
|
catch {
|
|
7423
7811
|
break;
|
|
7424
7812
|
}
|
|
7425
|
-
const fallback = pickStuckLoopFallbackUrl(currentUrl, visitedKeysUrls);
|
|
7813
|
+
const fallback = pickStuckLoopFallbackUrl(currentUrl, visitedKeysUrls, undefined, this.resolvedSignupUrl);
|
|
7426
7814
|
if (fallback === null)
|
|
7427
7815
|
break;
|
|
7428
7816
|
visitedKeysUrls.add(fallback);
|
|
@@ -7508,6 +7896,15 @@ ${formatInventory(input.inventory)}`,
|
|
|
7508
7896
|
// the dashboard for those; a genuine callback rejection stays on login
|
|
7509
7897
|
// even after reload, so this never masks a real wall.
|
|
7510
7898
|
let oauthBounceReloadTried = false;
|
|
7899
|
+
// Consecutive rounds the post-verify page read as a genuine loading shell
|
|
7900
|
+
// (visible loading-text AND a sub-threshold inventory). A real SPA
|
|
7901
|
+
// hydrates within the bounded per-round wait, so a streak means the route
|
|
7902
|
+
// never paints content — burn a navigate-to-root retry, then bail
|
|
7903
|
+
// truthfully rather than re-running the wait every round to run_timeout.
|
|
7904
|
+
// Reset on any non-shell round. Mirrors the consecutiveOauthLoginPageRounds
|
|
7905
|
+
// / oauthBounceReloadTried escape used for the stuck-login case.
|
|
7906
|
+
let shellStreak = 0;
|
|
7907
|
+
let shellRootNavTried = false;
|
|
7511
7908
|
let planFailures = 0;
|
|
7512
7909
|
// 0.8.2-rc.6 — separate counter for upstream-blip retries. Doesn't
|
|
7513
7910
|
// gate planFailures (so a transient 502 won't push us into the
|
|
@@ -7637,6 +8034,9 @@ ${formatInventory(input.inventory)}`,
|
|
|
7637
8034
|
let stuckFiresAtUrl = 0;
|
|
7638
8035
|
let lastStuckFireUrl = null;
|
|
7639
8036
|
const triedFallbackUrls = new Set();
|
|
8037
|
+
// Selectors of API-keys nav links already clicked, so the
|
|
8038
|
+
// click-the-real-link escalation doesn't re-click the same link.
|
|
8039
|
+
const clickedKeysLinks = new Set();
|
|
7640
8040
|
// Premature-done guard budget. When the planner gives up (`done`)
|
|
7641
8041
|
// with zero credentials captured, we navigate to an unvisited
|
|
7642
8042
|
// canonical keys URL and re-plan — bounded so a service that
|
|
@@ -7872,47 +8272,98 @@ ${formatInventory(input.inventory)}`,
|
|
|
7872
8272
|
// SPA hydration guard. A post-OAuth dashboard (northflank's
|
|
7873
8273
|
// /settings/access-tokens, PostHog) can render a "Connecting"/loading
|
|
7874
8274
|
// shell while its JS bundle + websocket finish — slow over a
|
|
7875
|
-
// residential tunnel.
|
|
7876
|
-
//
|
|
7877
|
-
//
|
|
7878
|
-
//
|
|
7879
|
-
//
|
|
7880
|
-
// and
|
|
7881
|
-
//
|
|
8275
|
+
// residential tunnel. We gate on POSITIVE readiness — the instant the
|
|
8276
|
+
// page has SHELL_MAX_ELEMENTS visible interactive elements it is
|
|
8277
|
+
// hydrated by definition and we proceed — rather than looping on the
|
|
8278
|
+
// negative "text still says loading" signal. waitForInteractiveDom
|
|
8279
|
+
// returns the moment that count is met (or after the budget), so a fast
|
|
8280
|
+
// page costs ~0 and a slow one waits exactly as long as needed. This is
|
|
8281
|
+
// the fix for the dominant false positive: a fully-rendered dashboard
|
|
8282
|
+
// whose DOM merely CONTAINS a hidden "loading…"/"please wait 30
|
|
8283
|
+
// seconds…" string no longer spins the wait every round to run_timeout.
|
|
7882
8284
|
//
|
|
7883
8285
|
// Budget = 6x3s = 18s. MEASURED: a dashboard SPA gated on a websocket
|
|
7884
8286
|
// (northflank's wss://platform.northflank.com/websocket) hydrates in
|
|
7885
|
-
// ~12-15s over the tunnel.
|
|
7886
|
-
// will NEVER hydrate (e.g. an authed user stranded on /signup): the
|
|
7887
|
-
// wait re-runs every round and burns the 600s run cap. The escape for
|
|
7888
|
-
// a never-hydrating route is navigate-to-root post-OAuth, not a longer
|
|
7889
|
-
// wait here.
|
|
8287
|
+
// ~12-15s over the tunnel.
|
|
7890
8288
|
//
|
|
7891
8289
|
// ADAPTIVE exception (MEASURED 2026-06-04, clerk): an OAuth/SSO
|
|
7892
8290
|
// CALLBACK route does a token exchange that renders even slower than a
|
|
7893
8291
|
// plain dashboard — clerk's `/sign-in/sso-callback` outlasts 18s and
|
|
7894
8292
|
// the bot bailed at the edge with `oauth_session_not_persisted`. On a
|
|
7895
|
-
// callback route the SPA IS making progress, so
|
|
7896
|
-
//
|
|
7897
|
-
//
|
|
7898
|
-
//
|
|
7899
|
-
const
|
|
7900
|
-
|
|
7901
|
-
|
|
7902
|
-
|
|
7903
|
-
|
|
7904
|
-
|
|
7905
|
-
|
|
7906
|
-
|
|
7907
|
-
|
|
7908
|
-
|
|
7909
|
-
|
|
8293
|
+
// callback route the SPA IS making progress, so 36s of patience is
|
|
8294
|
+
// warranted; everywhere else the 18s budget holds so a genuinely-stuck
|
|
8295
|
+
// route reaches the navigate-to-root escape fast. Read the URL fresh
|
|
8296
|
+
// each round (it may redirect off the callback).
|
|
8297
|
+
const onOAuthCallback = isOAuthCallbackRoute(state.url);
|
|
8298
|
+
const HYDRATION_BUDGET_MS = onOAuthCallback ? 36_000 : 18_000;
|
|
8299
|
+
await this.browser
|
|
8300
|
+
.waitForInteractiveDom(SHELL_MAX_ELEMENTS, HYDRATION_BUDGET_MS)
|
|
8301
|
+
.catch(() => undefined);
|
|
8302
|
+
// Re-read after the wait — the page may have hydrated (or redirected).
|
|
8303
|
+
try {
|
|
8304
|
+
[state, inventory] = await Promise.all([
|
|
8305
|
+
this.browser.getState(),
|
|
8306
|
+
this.buildInventory(args.steps, undefined, 80),
|
|
8307
|
+
]);
|
|
8308
|
+
}
|
|
8309
|
+
catch {
|
|
8310
|
+
// mid-navigation read — keep the prior state/inventory; the shell
|
|
8311
|
+
// decision below uses whatever count we have.
|
|
8312
|
+
}
|
|
8313
|
+
// Negative-side decision, now visibility- AND inventory-aware: a shell
|
|
8314
|
+
// requires loading-text in the VISIBLE text AND a sub-threshold
|
|
8315
|
+
// inventory. The OAuth-callback exclusion keeps the navigate-to-root
|
|
8316
|
+
// escape from firing mid-token-exchange (the callback IS making
|
|
8317
|
+
// progress and a navigate-away would abort the session).
|
|
8318
|
+
const stillShell = !onOAuthCallback &&
|
|
8319
|
+
isLoadingShell(await this.browser.extractVisibleText().catch(() => ""), inventory.length);
|
|
8320
|
+
if (stillShell) {
|
|
8321
|
+
shellStreak += 1;
|
|
8322
|
+
// On the 2nd consecutive shell round, do the navigate-to-root the
|
|
8323
|
+
// budgeted wait can't fix — a route stuck mid-hydration (a blocked
|
|
8324
|
+
// websocket, an SPA wedged on a stale path) often paints the real
|
|
8325
|
+
// dashboard from origin root. Once only.
|
|
8326
|
+
if (shellStreak >= 2 && !shellRootNavTried) {
|
|
8327
|
+
shellRootNavTried = true;
|
|
8328
|
+
const root = originRoot(state.url);
|
|
8329
|
+
args.steps.push(`Post-verify round ${round}: ${pathOf(state.url)} read as a loading shell for ` +
|
|
8330
|
+
`${shellStreak} consecutive rounds — navigating to origin root once before bailing.`);
|
|
8331
|
+
try {
|
|
8332
|
+
await this.browser.goto(root ?? state.url);
|
|
8333
|
+
await this.browser
|
|
8334
|
+
.waitForInteractiveDom(SHELL_MAX_ELEMENTS, 15_000)
|
|
8335
|
+
.catch(() => undefined);
|
|
8336
|
+
[state, inventory] = await Promise.all([
|
|
8337
|
+
this.browser.getState(),
|
|
8338
|
+
this.buildInventory(args.steps, undefined, 80),
|
|
8339
|
+
]);
|
|
8340
|
+
}
|
|
8341
|
+
catch {
|
|
8342
|
+
// navigate/read failed — the streak check below bails on the
|
|
8343
|
+
// next shell read.
|
|
8344
|
+
}
|
|
8345
|
+
// Re-evaluate after the root nav. If it hydrated, fall through to
|
|
8346
|
+
// planning; if it's STILL a shell, bail truthfully now rather than
|
|
8347
|
+
// burning the rest of the round budget to run_timeout.
|
|
8348
|
+
const recovered = !isLoadingShell(await this.browser.extractVisibleText().catch(() => ""), inventory.length);
|
|
8349
|
+
if (recovered) {
|
|
8350
|
+
shellStreak = 0;
|
|
8351
|
+
}
|
|
8352
|
+
else {
|
|
8353
|
+
throw new SpaNeverHydratedError(`spa_never_hydrated: ${args.service}'s post-verify page (${pathOf(state.url)}) ` +
|
|
8354
|
+
`stayed a loading shell across ${shellStreak} rounds and an origin-root reload — ` +
|
|
8355
|
+
`the SPA never rendered an actionable surface (blocked websocket / wedged hydration). ` +
|
|
8356
|
+
`Not a navigation bug; retry or finish the signup manually.`);
|
|
8357
|
+
}
|
|
7910
8358
|
}
|
|
7911
|
-
|
|
7912
|
-
|
|
7913
|
-
|
|
8359
|
+
else {
|
|
8360
|
+
args.steps.push(`Post-verify round ${round}: ${pathOf(state.url)} is a loading shell ` +
|
|
8361
|
+
`(streak ${shellStreak}) — letting the SPA settle one more round`);
|
|
7914
8362
|
}
|
|
7915
8363
|
}
|
|
8364
|
+
else {
|
|
8365
|
+
shellStreak = 0;
|
|
8366
|
+
}
|
|
7916
8367
|
// Stalled-wizard breaker. Build a content signature (URL + each
|
|
7917
8368
|
// inventory element's selector + label) and judge whether the
|
|
7918
8369
|
// PREVIOUS executed action changed the page. If the last few
|
|
@@ -8057,11 +8508,13 @@ ${formatInventory(input.inventory)}`,
|
|
|
8057
8508
|
if (consecutiveOauthLoginPageRounds >= 3) {
|
|
8058
8509
|
args.steps.push(`Post-verify: OAuth run still on a login page (${pathOf(state.url)}) for ` +
|
|
8059
8510
|
`${consecutiveOauthLoginPageRounds} rounds (incl. a reload) — the OAuth callback never persisted; bailing.`);
|
|
8511
|
+
await this.browser.dumpOAuthDebug(args.service, "callback-not-persisted").catch(() => { });
|
|
8060
8512
|
throw new OAuthSessionNotPersistedError(`oauth_session_not_persisted: signed in to ${args.service} via OAuth but the page ` +
|
|
8061
8513
|
`still presents a login screen (${pathOf(state.url)}) after ` +
|
|
8062
|
-
`${consecutiveOauthLoginPageRounds} rounds — the OAuth callback
|
|
8063
|
-
`
|
|
8064
|
-
`residential
|
|
8514
|
+
`${consecutiveOauthLoginPageRounds} rounds — the OAuth callback was rejected at the ` +
|
|
8515
|
+
`automation/fingerprint layer. NOT an IP issue (FALSIFIED 2026-06-14: a clean ` +
|
|
8516
|
+
`residential IP fails this callback identically — see STATE.md), so residential ` +
|
|
8517
|
+
`egress does NOT fix it. Needs a fingerprint/automation fix or manual signup.`);
|
|
8065
8518
|
}
|
|
8066
8519
|
}
|
|
8067
8520
|
else {
|
|
@@ -8199,6 +8652,10 @@ ${formatInventory(input.inventory)}`,
|
|
|
8199
8652
|
state,
|
|
8200
8653
|
inventory,
|
|
8201
8654
|
observed: nextStep,
|
|
8655
|
+
// Fix C4 — stamp the backend that produced THIS round's plan
|
|
8656
|
+
// (planPostVerifyStep set these via callLLM just above).
|
|
8657
|
+
...(this.lastResolvedModel !== undefined ? { resolved_model: this.lastResolvedModel } : {}),
|
|
8658
|
+
...(this.lastResolvedProvider !== undefined ? { resolved_provider: this.lastResolvedProvider } : {}),
|
|
8202
8659
|
});
|
|
8203
8660
|
capturedRound += 1;
|
|
8204
8661
|
// Per-round telemetry upload (rc.11). Mirrors the disk capture
|
|
@@ -8555,7 +9012,7 @@ ${formatInventory(input.inventory)}`,
|
|
|
8555
9012
|
hint = undefined;
|
|
8556
9013
|
continue;
|
|
8557
9014
|
}
|
|
8558
|
-
const fallback = pickStuckLoopFallbackUrl(state.url, triedFallbackUrls, args.service);
|
|
9015
|
+
const fallback = pickStuckLoopFallbackUrl(state.url, triedFallbackUrls, args.service, this.resolvedSignupUrl);
|
|
8559
9016
|
if (fallback !== null) {
|
|
8560
9017
|
triedFallbackUrls.add(fallback);
|
|
8561
9018
|
args.steps.push(`Post-verify: stuck-loop detected ${stuckFiresAtUrl}x at ${state.url} — escalating to a hardcoded API-key URL: ${fallback}`);
|
|
@@ -8670,7 +9127,30 @@ ${formatInventory(input.inventory)}`,
|
|
|
8670
9127
|
// candidate is exhausted, `done` is honored.
|
|
8671
9128
|
const capturedCredCount = Object.keys(credentials).filter((k) => !NON_CREDENTIAL_KEYS.has(k)).length;
|
|
8672
9129
|
if (capturedCredCount === 0 && prematureDoneFallbacks < MAX_PREMATURE_DONE_FALLBACKS) {
|
|
8673
|
-
|
|
9130
|
+
// Prefer CLICKING a real API-keys nav link over guessing a URL.
|
|
9131
|
+
// The dashboard's own sidebar/menu link carries the correct href;
|
|
9132
|
+
// guessing /keys, /api-keys, /settings/api-keys 404s on services
|
|
9133
|
+
// that host keys at a non-standard path (unify-ai). Only when no
|
|
9134
|
+
// such link is in the DOM do we fall through to URL composition.
|
|
9135
|
+
const keysLink = findApiKeysNavLink(inventory, clickedKeysLinks);
|
|
9136
|
+
if (keysLink !== null) {
|
|
9137
|
+
prematureDoneFallbacks += 1;
|
|
9138
|
+
clickedKeysLinks.add(keysLink.selector);
|
|
9139
|
+
const label = (keysLink.visibleText ?? keysLink.ariaLabel ?? keysLink.href ?? keysLink.selector) || keysLink.selector;
|
|
9140
|
+
args.steps.push(`Post-verify: planner emitted done with no credential captured — ` +
|
|
9141
|
+
`clicking the in-page API-keys link "${label.slice(0, 60)}" ` +
|
|
9142
|
+
`(${keysLink.href ?? keysLink.selector}) before guessing a URL`);
|
|
9143
|
+
try {
|
|
9144
|
+
await this.browser.click(keysLink.selector);
|
|
9145
|
+
await this.browser.waitForInteractiveDom(5, 15_000);
|
|
9146
|
+
}
|
|
9147
|
+
catch (err) {
|
|
9148
|
+
args.steps.push(`Post-verify: API-keys link click failed (${err instanceof Error ? err.message : String(err)}) — continuing.`);
|
|
9149
|
+
}
|
|
9150
|
+
hint = undefined;
|
|
9151
|
+
continue;
|
|
9152
|
+
}
|
|
9153
|
+
const fallback = pickStuckLoopFallbackUrl(state.url, triedFallbackUrls, args.service, this.resolvedSignupUrl);
|
|
8674
9154
|
if (fallback !== null) {
|
|
8675
9155
|
prematureDoneFallbacks += 1;
|
|
8676
9156
|
triedFallbackUrls.add(fallback);
|
|
@@ -9178,6 +9658,10 @@ ${formatInventory(input.inventory)}`,
|
|
|
9178
9658
|
state: postState,
|
|
9179
9659
|
inventory: postInventory,
|
|
9180
9660
|
observed: syntheticExtract,
|
|
9661
|
+
// Fix C4 — attribute this synthetic round to the planner call
|
|
9662
|
+
// that drove us here (no LLM ran for this implicit extract).
|
|
9663
|
+
...(this.lastResolvedModel !== undefined ? { resolved_model: this.lastResolvedModel } : {}),
|
|
9664
|
+
...(this.lastResolvedProvider !== undefined ? { resolved_provider: this.lastResolvedProvider } : {}),
|
|
9181
9665
|
});
|
|
9182
9666
|
capturedRound += 1;
|
|
9183
9667
|
if (this.roundUploader !== undefined) {
|
|
@@ -9611,6 +10095,11 @@ ${formatInventory(input.inventory)}${input.hint !== undefined ? `\n\nIMPORTANT
|
|
|
9611
10095
|
// navigation-eval.md). The stall-detector + prior-action memory are the
|
|
9612
10096
|
// escape from a deterministic loop.
|
|
9613
10097
|
temperature: 0,
|
|
10098
|
+
// Fix C — pin a single model + provider + seed on the proxy path so
|
|
10099
|
+
// the same dashboard yields the same step regardless of which backend
|
|
10100
|
+
// OpenRouter would otherwise route to (the model/provider lottery
|
|
10101
|
+
// survives temperature 0).
|
|
10102
|
+
deterministic: true,
|
|
9614
10103
|
parse: (raw) => {
|
|
9615
10104
|
const step = parsePostVerifyStep(raw, allowed);
|
|
9616
10105
|
// A `check` must land on a real checkbox/radio — the planner
|