@trusty-squire/mcp 0.8.2-rc.21 → 0.8.2-rc.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/bot/agent.js CHANGED
@@ -93,22 +93,6 @@ const ONBOARDING_PAYWALL_PATTERNS = [
93
93
  /\b(?:plan\s+|account\s+)?payment\s+required\b/i,
94
94
  /\bcomplet(?:e|ing)\s+(?:billing|payment)\b/i,
95
95
  /\bbilling\s+setup\s+(?:is\s+)?required\b/i,
96
- // 0.8.2-rc.5 — Together.ai's post-OAuth landing surfaces a "payment
97
- // form" gate that the post-verify planner reliably describes in its
98
- // `done` reason:
99
- //
100
- // "This page shows a payment form, and it's not possible to proceed
101
- // further without inputting payment information."
102
- //
103
- // None of the rc.39 patterns covered "payment form" / "payment
104
- // information" — together fell through to `oauth_onboarding_failed`,
105
- // which is misleading (the OAuth handshake succeeded; the wall is
106
- // billing). Patterns are scoped to the "form/information requirement"
107
- // shape so a marketing tile mentioning "payment information" doesn't
108
- // false-positive.
109
- /\bpayment\s+form\b/i,
110
- /\binput(?:ting)?\s+payment\s+information\b/i,
111
- /\benter(?:ing)?\s+payment\s+information\b/i,
112
96
  ];
113
97
  // Negators that, if they appear in the ~30 characters immediately
114
98
  // before a paywall pattern match, flip its meaning from a demand
@@ -145,136 +129,6 @@ export class LLMCallBudgetExceeded extends Error {
145
129
  this.name = "LLMCallBudgetExceeded";
146
130
  }
147
131
  }
148
- // 0.8.2-rc.10 — common dashboard paths that vendors host their
149
- // per-account API key UI at. Ordered most-specific first so a
150
- // fallback navigate doesn't land short of the actual page. Returned
151
- // as an array of path-strings; the caller composes them onto the
152
- // origin of the currently-stuck URL and skips any already tried.
153
- //
154
- // Patterns harvested from Anthropic (settings/keys), Sentry
155
- // (settings/account/api/auth-tokens), Neon (settings#api-keys),
156
- // Render (account/api-keys), Postmark (account/api_tokens),
157
- // OpenRouter (keys), and a long tail of vendors converging on the
158
- // same conventions.
159
- const STUCK_LOOP_FALLBACK_PATHS = [
160
- "/settings/keys",
161
- "/settings/api-keys",
162
- "/settings/api_keys",
163
- "/settings/tokens",
164
- "/settings/api-tokens",
165
- "/settings/account/api/auth-tokens/",
166
- "/account/api-keys",
167
- "/account/api_tokens",
168
- "/account/keys",
169
- "/account/tokens",
170
- "/api-keys",
171
- "/api_keys",
172
- "/keys",
173
- "/tokens",
174
- "/auth-tokens",
175
- "/dashboard/api-keys",
176
- "/dashboard/keys",
177
- ];
178
- // 0.8.2-rc.10 — heuristic for "this account already exists on the
179
- // service and its API keys are masked, with no path to reveal them."
180
- // The test identity (methoxine@gmail.com) accumulates state across
181
- // batches; subsequent runs land on a dashboard whose API-keys page
182
- // shows only the NAMES of existing keys (the values were revealed
183
- // once at create-time and aren't recoverable). Without this
184
- // classifier those runs fall through to a generic
185
- // oauth_onboarding_failed and the harvester treats them like a
186
- // repairable bug.
187
- //
188
- // Conservative rules: must be on a URL that names an API-key page
189
- // (keys / api-keys / api-tokens / auth-tokens / api_keys), AND the
190
- // page text shows BOTH a masking glyph pattern (•••, ***, ─•) AND
191
- // an existing-key word, OR the planner's last reason explicitly
192
- // describes the same shape.
193
- const EXISTING_KEY_URL_HINT = /(?:api[-_/]keys?|api[-_/]tokens?|auth[-_/]tokens?|personal[-_/]access[-_/]tokens?|\/keys(?:\b|\/|$)|\/tokens(?:\b|\/|$)|\/settings\/keys\b|\/settings\/tokens\b|#api[-_/]keys\b|#api[-_/]tokens\b)/i;
194
- const MASKED_KEY_GLYPHS = /(?:•{3,}|\*{3,}|─•|·{3,}|•{3,}|x{6,}|[A-Za-z0-9]{2,4}[•*]{5,})/;
195
- // 0.8.2-rc.12 — widened to catch Neon's existing-key list shape
196
- // (the per-row layout has a "Key name" header + "Created <date>" +
197
- // "Last used <date|never>" — no glyph, no "existing" word, just the
198
- // columns of an API-key listing table). The conservative AND with
199
- // EXISTING_KEY_URL_HINT keeps this from misfiring on marketing copy
200
- // elsewhere on a non-keys URL.
201
- const EXISTING_KEY_WORDS = /\b(?:existing\s+(?:api\s+)?(?:key|token)|previously\s+created|created\s+by\b|api\s+keys?\s*\(\d+\)|tokens?\s*\(\d+\)|reveal|copy\s+key|key\s+name\b|last\s+used\b|created(?:\s+\w+){0,3}\s+(?:\d{1,2},?\s+)?\d{4}\b)/i;
202
- const NO_CREATE_AFFORDANCE_HINT = /\b(?:cannot\s+(?:reveal|extract|read)|values?\s+(?:is\s+)?masked|only\s+shown\s+once|cannot\s+(?:see|view|copy)\s+(?:the\s+)?(?:key|secret|value)|key\s+(?:value|secret)\s+(?:is\s+)?(?:not\s+)?(?:available|recoverable|extractable|shown))\b/i;
203
- export function detectExistingAccountNoExtract(input) {
204
- if (!EXISTING_KEY_URL_HINT.test(input.url))
205
- return false;
206
- // Planner reason naming the no-reveal shape is the strongest single
207
- // signal — the planner has SEEN the page and is describing it.
208
- if (NO_CREATE_AFFORDANCE_HINT.test(input.lastPlannerReason)) {
209
- return true;
210
- }
211
- // 0.8.2-rc.12 — three independent positive paths, ANY of which is
212
- // enough since we already gated on the URL matching an API-keys
213
- // page (which alone weeds out the marketing-tile false-positives
214
- // the conservative pre-rc.12 path was protecting against):
215
- // 1. Mask glyphs in the page (•••, asterisks, ··· — the literal
216
- // "value is hidden" decoration most vendors use).
217
- // 2. Two or more existing-key word patterns matched (a key
218
- // LISTING shape: "Key name" + "Last used" + "Created <date>"
219
- // is unmistakable when found on a /keys-style URL).
220
- // 3. Mask glyph PLUS any existing-key word (the original
221
- // detector — keeps the conservative behavior for vendors
222
- // whose listing UI uses different column labels).
223
- const hasMaskGlyph = MASKED_KEY_GLYPHS.test(input.pageText);
224
- // Tally up to 5 distinct existing-key signals; 2+ is enough.
225
- const existingKeyMatches = [];
226
- const allWords = input.pageText.match(new RegExp(EXISTING_KEY_WORDS, "gi"));
227
- if (allWords !== null) {
228
- const distinct = new Set();
229
- for (const m of allWords) {
230
- distinct.add(m.toLowerCase().replace(/\s+/g, " "));
231
- if (distinct.size >= 5)
232
- break;
233
- }
234
- existingKeyMatches.push(...distinct);
235
- }
236
- if (hasMaskGlyph && existingKeyMatches.length >= 1)
237
- return true;
238
- if (existingKeyMatches.length >= 2)
239
- return true;
240
- if (hasMaskGlyph && /\bAPI\s+keys?\b/i.test(input.pageText))
241
- return true;
242
- return false;
243
- }
244
- // Pick the next fallback URL to try from STUCK_LOOP_FALLBACK_PATHS
245
- // keyed against the origin of the currently-stuck URL. Returns null
246
- // when every path has already been attempted. Exported for unit tests.
247
- export function pickStuckLoopFallbackUrl(currentUrl, alreadyTried) {
248
- let origin;
249
- try {
250
- origin = new URL(currentUrl).origin;
251
- }
252
- catch {
253
- return null;
254
- }
255
- // Skip a candidate when the current URL's path ALREADY matches it
256
- // (case-insensitive, trailing-slash tolerant). The planner is stuck
257
- // ON the page the candidate points to — navigating to the same URL
258
- // again won't break the cycle, only a different path will.
259
- const currentPath = (() => {
260
- try {
261
- return new URL(currentUrl).pathname.replace(/\/+$/, "").toLowerCase();
262
- }
263
- catch {
264
- return "";
265
- }
266
- })();
267
- for (const path of STUCK_LOOP_FALLBACK_PATHS) {
268
- const candidate = `${origin}${path}`;
269
- if (alreadyTried.has(candidate))
270
- continue;
271
- const candidatePath = path.replace(/\/+$/, "").toLowerCase();
272
- if (candidatePath === currentPath)
273
- continue;
274
- return candidate;
275
- }
276
- return null;
277
- }
278
132
  // Best-effort canonical signup URL for a service when the caller
279
133
  // didn't pass one. Most dev-SaaS targets (Resend, Postmark, Mailgun,
280
134
  // MailerSend, IPInfo, Stripe, PostHog) live at <name>.com/signup —
@@ -892,46 +746,6 @@ export function detectAlreadySignedIn(args) {
892
746
  })) {
893
747
  return true;
894
748
  }
895
- // 0.8.2-rc.5 — PostHog-class onboarding wizard. When the URL is
896
- // dashboard-y (path like /project/<id>/onboarding) and the page
897
- // shows project-picker / account-menu / onboarding-skip
898
- // affordances WITHOUT a credential input or OAuth provider button,
899
- // the user is authenticated and the wizard is interstitial. The
900
- // rc.3 overnight run for posthog landed exactly here and bailed
901
- // `oauth_required` because the inventory had only:
902
- // - "Default project" (project picker)
903
- // - "BBento" (account avatar toggle)
904
- // - "Hand off setup" (skip-onboarding affordance)
905
- //
906
- // Detect this shape via a second-tier signal set. Conservative —
907
- // we already gated on "no credential inputs" and "dashboardyPath",
908
- // so a true signup chooser (which has neither of those AND the
909
- // path is /signup or /login) cannot reach this branch.
910
- const POST_AUTH_AFFORDANCE = /^\s*(?:hand\s*off\s*setup|skip\s*(?:onboarding|setup|for\s*now)|invite\s*(?:teammates|members|your\s*team)|set\s*up\s*billing|finish\s*setup|get\s*started|continue\s*to\s*(?:dashboard|app|console))\s*$/i;
911
- // Workspace / project / org picker shape. We pattern-match
912
- // generously because PostHog's reads "Default project" but other
913
- // SaaS dashboards read "My workspace" / "Acme org" / similar. The
914
- // structural cue is "button with one of the workspace-noun words"
915
- // — see TS-1923 (PostHog rc.3 regression).
916
- const WORKSPACE_PICKER = /\b(?:workspace|workspaces|project(?:s)?|organization|organizations|team(?:s)?)\b/i;
917
- const hasPostAuthAffordance = inventory.some((e) => POST_AUTH_AFFORDANCE.test((e.visibleText ?? e.ariaLabel ?? "").trim()));
918
- if (hasPostAuthAffordance) {
919
- // Single signal — the skip-onboarding / handoff verb is strong
920
- // enough on its own. No login page ever offers "Hand off setup".
921
- return true;
922
- }
923
- // Weaker pair: a workspace-picker shape AND the page lacks a
924
- // primary call-to-action that reads as signup ("Continue with
925
- // Google", "Sign up", etc.). Used as a backstop for SPA dashboards
926
- // whose only visible buttons are picker toggles.
927
- const hasWorkspacePicker = inventory.some((e) => WORKSPACE_PICKER.test((e.visibleText ?? e.ariaLabel ?? "").trim()));
928
- const hasSignupOrOAuthAffordance = inventory.some((e) => {
929
- const t = (e.visibleText ?? e.ariaLabel ?? "").trim();
930
- return /\b(?:sign[\s-]*up|signup|continue\s+with|log\s+in\s+with|sign\s+in\s+with)\b/i.test(t);
931
- });
932
- if (hasWorkspacePicker && !hasSignupOrOAuthAffordance) {
933
- return true;
934
- }
935
749
  }
936
750
  return false;
937
751
  }
@@ -1591,43 +1405,6 @@ export function extractAllLabeledTokensFromReason(reason, pageText) {
1591
1405
  function escapeRegex(s) {
1592
1406
  return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
1593
1407
  }
1594
- // Keys that the postVerifyLoop's accumulator stores for housekeeping —
1595
- // they're NOT extracted credentials and must NOT count as "we found
1596
- // something" when deciding whether an extract round succeeded.
1597
- const NON_CREDENTIAL_KEYS = new Set([
1598
- "api_key_truncated", // truncated stub from extractCredentials Pass 1
1599
- "password", // signup form metadata (email-verification path)
1600
- "email", // signup form metadata
1601
- ]);
1602
- // True iff the credentials Record holds at least one extracted value
1603
- // (api_key, username, or any labeled multi-cred field). Excludes
1604
- // metadata + truncated stubs. Used to decide "this extract round
1605
- // produced something — continue the loop / capture a synthetic extract
1606
- // round" vs "every tier missed — try the planner-quoted fallback".
1607
- export function hasAnyExtractedCredential(creds) {
1608
- for (const key of Object.keys(creds)) {
1609
- if (NON_CREDENTIAL_KEYS.has(key))
1610
- continue;
1611
- return true;
1612
- }
1613
- return false;
1614
- }
1615
- // True iff the credentials Record contains a multi-credential bundle
1616
- // — anything beyond the legacy single api_key/username pair. Used by
1617
- // the post-verify loop's early-exit so a partial multi-cred capture
1618
- // doesn't return prematurely (Cloudinary's api_key surfaces 4-5
1619
- // rounds before api_secret; the legacy exit fired the moment api_key
1620
- // was set, losing cloud_name + api_secret).
1621
- export function isMultiCredBundle(creds) {
1622
- for (const key of Object.keys(creds)) {
1623
- if (NON_CREDENTIAL_KEYS.has(key))
1624
- continue;
1625
- if (key === "api_key" || key === "username")
1626
- continue;
1627
- return true;
1628
- }
1629
- return false;
1630
- }
1631
1408
  export function extractApiKeyFromText(text) {
1632
1409
  const prefixed = [
1633
1410
  /\bre_[a-zA-Z0-9_]{20,}\b/, // Resend (key body contains underscores)
@@ -2117,27 +1894,12 @@ export class SignupAgent {
2117
1894
  // Parse/validation failure — includes a hallucinated selector
2118
1895
  // rejected by the inventory check. An error replan.
2119
1896
  const reason = err instanceof Error ? err.message : String(err);
2120
- // 0.8.2-rc.6 mirror the post-verify upstream-blip carve-out.
2121
- // The form-fill planner is also vulnerable to sustained
2122
- // upstream-proxy degradation: the rc.3 + rc.5 batch runs
2123
- // showed openrouter / resend / sentry losing 4+ consecutive
2124
- // proxy calls in a row when the free-tier upstream was
2125
- // throttling. Don't punt to planning_failed for upstream
2126
- // weather — keep re-planning until the budget runs out at the
2127
- // top-level F2 deadline, OR a true logic failure shows up.
2128
- const isUpstreamBlip = /\b50[234]\b/.test(reason) ||
2129
- /\bupstream_(?:error|unreachable)\b/i.test(reason) ||
2130
- /\bnetwork error\b/i.test(reason);
2131
- if (!isUpstreamBlip && ++errorReplans > MAX_ERROR_REPLANS) {
1897
+ if (++errorReplans > MAX_ERROR_REPLANS) {
2132
1898
  return { kind: "planning_failed", reason: `planner output never validated: ${reason}` };
2133
1899
  }
2134
- steps.push(isUpstreamBlip
2135
- ? `⚠ planner request hit a transient upstream blip (${reason}) — retrying`
2136
- : `⚠ plan rejected (${reason}) re-planning`);
2137
- if (!isUpstreamBlip) {
2138
- hint =
2139
- "Your previous plan used a selector not in the inventory. Use ONLY selectors copied verbatim from a `selector=` field.";
2140
- }
1900
+ steps.push(`⚠ plan rejected (${reason}) — re-planning`);
1901
+ hint =
1902
+ "Your previous plan used a selector not in the inventory. Use ONLY selectors copied verbatim from a `selector=` field.";
2141
1903
  continue;
2142
1904
  }
2143
1905
  steps.push(`Plan: ${plan.actions.length} action(s), confidence=${plan.confidence}` +
@@ -2582,22 +2344,12 @@ export class SignupAgent {
2582
2344
  if (this.llmCallCount >= MAX_LLM_CALLS_PER_SIGNUP) {
2583
2345
  throw new LLMCallBudgetExceeded(MAX_LLM_CALLS_PER_SIGNUP);
2584
2346
  }
2585
- // 0.8.2-rc.8 count the call only AFTER the upstream actually
2586
- // replied. The old code incremented before the proxy fetch which
2587
- // means a proxy 502 (caught & surfaced after the 4-attempt retry
2588
- // budget exhausts) cost the same budget unit as a real planner
2589
- // reply. The rc.7 sentry batch run hit this: 2 upstream-blip
2590
- // retries consumed 2 of the 15 calls on top of the 9 successful
2591
- // post-verify rounds — the planner ran out of budget on the 8th
2592
- // permission scope, 5 short of the API key. Failed calls produce
2593
- // no progress; charging them against the budget is wrong. Behave
2594
- // like a meter: only count consumption that actually delivered.
2347
+ this.llmCallCount += 1;
2595
2348
  const resp = await client.createMessage({
2596
2349
  system: args.system,
2597
2350
  user: args.userBlocks,
2598
2351
  max_tokens: args.maxTokens,
2599
2352
  });
2600
- this.llmCallCount += 1;
2601
2353
  this.backendsUsed.push(resp.backend);
2602
2354
  return resp.text;
2603
2355
  };
@@ -2933,34 +2685,6 @@ export class SignupAgent {
2933
2685
  ...this.resultTail(),
2934
2686
  };
2935
2687
  }
2936
- // 0.8.2-rc.10 — same sentinel-pattern routing the runOAuthFlow
2937
- // path uses. The post-verify loop sets lastPostVerifyDoneReason
2938
- // with [stuck_loop] or [existing_account_no_extract] markers
2939
- // when it bails on a planner-loop or pre-existing-key state;
2940
- // surface those distinctly rather than as the generic
2941
- // no_credentials_after_already_signed_in.
2942
- if (this.lastPostVerifyDoneReason !== null &&
2943
- this.lastPostVerifyDoneReason.startsWith("[stuck_loop]")) {
2944
- return {
2945
- success: false,
2946
- error: `planner_stuck: ${task.service}'s dashboard re-picked the same step repeatedly ` +
2947
- `with no inventory change and the bot's hardcoded API-key URL fallbacks did not ` +
2948
- `advance the page — finish the signup manually.`,
2949
- steps,
2950
- ...this.resultTail(),
2951
- };
2952
- }
2953
- if (this.lastPostVerifyDoneReason !== null &&
2954
- this.lastPostVerifyDoneReason.startsWith("[existing_account_no_extract]")) {
2955
- return {
2956
- success: false,
2957
- error: `existing_account_no_extract: ${task.service}'s dashboard shows pre-existing API ` +
2958
- `keys for this identity but the values are masked and unrecoverable — wipe the ` +
2959
- `test identity's account on ${task.service} or sign in manually and reveal the key.`,
2960
- steps,
2961
- ...this.resultTail(),
2962
- };
2963
- }
2964
2688
  return {
2965
2689
  success: false,
2966
2690
  error: "no_credentials_after_already_signed_in: bot detected an authenticated dashboard " +
@@ -3554,20 +3278,14 @@ export class SignupAgent {
3554
3278
  }
3555
3279
  }
3556
3280
  let credentials = await this.extractCredentials();
3557
- // 0.8.2-rc.15 always enter postVerifyLoop. The legacy short-
3558
- // circuit ("only call postVerifyLoop if api_key wasn't already
3559
- // visible") returned early on multi-cred services that happen to
3560
- // land with api_key plain-visible — cloud_name + api_secret on
3561
- // Cloudinary, application_id + admin_api_key on Algolia — and the
3562
- // siblings were never extracted. postVerifyLoop's top-of-iter
3563
- // early-exit is itself multi-cred-aware (rc.13), so when there's
3564
- // nothing more to do, it returns on the first iteration.
3565
- credentials = await this.postVerifyLoop({
3566
- service: task.service,
3567
- maxRounds: task.postVerifyMaxRounds ?? 12,
3568
- steps,
3569
- ...(task.scopeHint !== undefined ? { scopeHint: task.scopeHint } : {}),
3570
- });
3281
+ if (credentials.api_key === undefined) {
3282
+ credentials = await this.postVerifyLoop({
3283
+ service: task.service,
3284
+ maxRounds: task.postVerifyMaxRounds ?? 12,
3285
+ steps,
3286
+ ...(task.scopeHint !== undefined ? { scopeHint: task.scopeHint } : {}),
3287
+ });
3288
+ }
3571
3289
  if (credentials.api_key !== undefined) {
3572
3290
  return {
3573
3291
  success: true,
@@ -3613,41 +3331,6 @@ export class SignupAgent {
3613
3331
  ...this.resultTail(),
3614
3332
  };
3615
3333
  }
3616
- // 0.8.2-rc.10 — planner stuck-loop, fallback URLs exhausted. The
3617
- // postVerifyLoop marks this with the [stuck_loop] sentinel so the
3618
- // operator sees a distinct status (it's not an "OAuth onboarding"
3619
- // failure — OAuth succeeded; the planner got stuck on the
3620
- // post-OAuth navigation).
3621
- if (this.lastPostVerifyDoneReason !== null &&
3622
- this.lastPostVerifyDoneReason.startsWith("[stuck_loop]")) {
3623
- return {
3624
- success: false,
3625
- error: `planner_stuck: ${task.service}'s post-OAuth dashboard re-picked the same step ` +
3626
- `repeatedly with no inventory change and the bot's hardcoded API-key URL fallbacks ` +
3627
- `did not advance the page — finish the signup manually.`,
3628
- steps,
3629
- ...this.resultTail(),
3630
- };
3631
- }
3632
- // 0.8.2-rc.10 — existing-account state with no extractable
3633
- // credential. The postVerifyLoop's existing-key detector
3634
- // (detectExistingAccountNoExtract) classifies a run that lands on
3635
- // an authenticated dashboard whose API-keys page surfaces only
3636
- // masked existing keys + no path to a fresh value. Surfacing this
3637
- // distinctly so the harvester can flag it (e.g. periodically wipe
3638
- // the chrome profile for the test identity) rather than treat it
3639
- // as a real bot failure.
3640
- if (this.lastPostVerifyDoneReason !== null &&
3641
- this.lastPostVerifyDoneReason.startsWith("[existing_account_no_extract]")) {
3642
- return {
3643
- success: false,
3644
- error: `existing_account_no_extract: ${task.service}'s dashboard shows pre-existing API ` +
3645
- `keys for this identity but the values are masked and unrecoverable — wipe the ` +
3646
- `test identity's account on ${task.service} or sign in manually and reveal the key.`,
3647
- steps,
3648
- ...this.resultTail(),
3649
- };
3650
- }
3651
3334
  return {
3652
3335
  success: false,
3653
3336
  error: `oauth_onboarding_failed: signed in to ${task.service} via ${provider.label} but ` +
@@ -3926,37 +3609,8 @@ ${formatInventory(input.inventory)}`,
3926
3609
  }
3927
3610
  async postVerifyLoop(args) {
3928
3611
  let credentials = await this.extractCredentials();
3929
- // 0.8.2-rc.15 — also seed DOM-proximity at loop entry. If the
3930
- // bot lands directly on the api-keys page (Cloudinary navigates
3931
- // through onboarding to the dashboard automatically, sometimes
3932
- // landing on /settings/api-keys), labeled siblings are visible
3933
- // immediately and the loop's top-of-iter check (which respects
3934
- // isMultiCredBundle) can hold the loop open for the planner to
3935
- // emit an explicit extract. Without this seed, only api_key
3936
- // would be set on entry and isMultiCredBundle would return
3937
- // false → loop exits with a partial bundle.
3938
- try {
3939
- const labeledSeed = await this.extractFromDomProximity();
3940
- for (const [k, v] of Object.entries(labeledSeed)) {
3941
- if (credentials[k] === undefined)
3942
- credentials[k] = v;
3943
- }
3944
- }
3945
- catch {
3946
- // Non-fatal — the planner's explicit extract round will run
3947
- // DOM-proximity again, this is just an opportunistic seed.
3948
- }
3949
3612
  let loginAttempts = 0;
3950
3613
  let planFailures = 0;
3951
- // 0.8.2-rc.6 — separate counter for upstream-blip retries. Doesn't
3952
- // gate planFailures (so a transient 502 won't push us into the
3953
- // terminal stop branch after 4 rounds), but is still bounded so a
3954
- // permanently-down proxy can't loop forever. Generous because each
3955
- // blip costs ~5s of network + retry-backoff and the run already
3956
- // has a 10-min top-level timeout — but tight enough that a truly
3957
- // dead upstream doesn't burn the whole maxRounds budget on noise.
3958
- let upstreamBlipRetries = 0;
3959
- const MAX_UPSTREAM_BLIP_RETRIES = 8;
3960
3614
  const oauth = args.credentials === undefined;
3961
3615
  // Re-plan hint for the next round — set when an `extract` step
3962
3616
  // found no key, which means the visible key text is masked /
@@ -4016,26 +3670,6 @@ ${formatInventory(input.inventory)}`,
4016
3670
  // navigate produced no progress. Inject a hint forcing a CLICK
4017
3671
  // on something visible in the current inventory.
4018
3672
  let prevNavigateFromUrl = null;
4019
- // 0.8.2-rc.10 — escalation for the stuck-loop detector.
4020
- //
4021
- // The existing detector injects a re-plan hint when the planner
4022
- // returns the same kind+selector twice with no inventory change,
4023
- // but the planner often ignores the "pick a different KIND" hint
4024
- // and just picks a slightly different SELECTOR for another click.
4025
- // Anthropic's batch failure (rc.8) showed 6 wasted rounds of this
4026
- // before a navigate finally broke the cycle: clicking the sidebar
4027
- // "API Keys" link on a dashboard that wasn't routing to it.
4028
- //
4029
- // Escalation strategy: after N stuck-fires within the SAME URL,
4030
- // try a hard navigate to a guessed API-keys URL (one per origin).
4031
- // If the URL has already advanced past the stuck zone, reset the
4032
- // counter. After every fallback URL is exhausted AND we're still
4033
- // stuck, mark the run [stuck_loop] so the caller surfaces the
4034
- // dedicated error code instead of the generic
4035
- // oauth_onboarding_failed.
4036
- let stuckFiresAtUrl = 0;
4037
- let lastStuckFireUrl = null;
4038
- const triedFallbackUrls = new Set();
4039
3673
  // 0.8.1 — capture chain index is independent of the planner loop
4040
3674
  // round. The loop has two early-`continue` paths (page mid-navigation
4041
3675
  // throw, planner-rejection re-plan) that increment `round` WITHOUT
@@ -4047,60 +3681,11 @@ ${formatInventory(input.inventory)}`,
4047
3681
  // contiguous 0..N-1 chain regardless of how many planner re-plans
4048
3682
  // happen mid-run.
4049
3683
  let capturedRound = 0;
4050
- // 0.8.2-rc.12 — multi-cred-aware loop exit. Track the number of
4051
- // distinct credential keys we've accumulated; if we're in a
4052
- // multi-cred bundle (cloud_name, api_secret, application_id, …)
4053
- // keep planning past the first api_key surfacing so siblings can
4054
- // accumulate. Bounded by `roundsSinceLastNewCredential` so a
4055
- // page that never produces a sibling doesn't loop forever.
4056
- let lastCredentialKeyCount = Object.keys(credentials).filter((k) => !NON_CREDENTIAL_KEYS.has(k)).length;
4057
- let roundsSinceLastNewCredential = 0;
4058
- const MAX_ROUNDS_AWAITING_MORE_CREDENTIALS = 3;
4059
- // 0.8.2-rc.16 — when the loop's pre-entry seed already had a
4060
- // credential (Cloudinary's billing/plans page exposes the api_key
4061
- // via a hidden field that extractCredentials catches), we cannot
4062
- // trust that result as authoritative for multi-cred: the bot
4063
- // hasn't navigated to a labeled api-keys page yet, so cloud_name
4064
- // + api_secret are not yet visible to extractFromDomProximity.
4065
- // Hold the loop open until the planner has issued at least one
4066
- // explicit extract step — only then has the bot affirmatively
4067
- // surveyed the labeled credentials surface.
4068
- const seedHadCredential = credentials.api_key !== undefined || credentials.username !== undefined;
4069
- let plannerExtractEmitted = false;
4070
3684
  for (let round = 0; round < args.maxRounds; round++) {
4071
- const currentCredentialKeyCount = Object.keys(credentials).filter((k) => !NON_CREDENTIAL_KEYS.has(k)).length;
4072
- if (currentCredentialKeyCount > lastCredentialKeyCount) {
4073
- roundsSinceLastNewCredential = 0;
4074
- lastCredentialKeyCount = currentCredentialKeyCount;
4075
- }
4076
- else if (lastCredentialKeyCount > 0) {
4077
- roundsSinceLastNewCredential += 1;
4078
- }
4079
- // Multi-cred services hold the loop open until either the
4080
- // planner returns `done`, the budget expires, or we've made
4081
- // no credential progress for MAX_ROUNDS_AWAITING_MORE_CREDENTIALS
4082
- // consecutive rounds. Single-cred services keep the legacy
4083
- // behavior of returning the moment api_key surfaces — EXCEPT
4084
- // when the api_key came from the pre-loop seed and the
4085
- // planner hasn't yet emitted an explicit extract step. In
4086
- // that case we let the planner run until extract fires.
4087
- const inMultiCredMode = isMultiCredBundle(credentials);
4088
- const haveOnlySeedCredentials = seedHadCredential && !plannerExtractEmitted;
4089
- if (!inMultiCredMode &&
4090
- (credentials.api_key !== undefined || credentials.username !== undefined) &&
4091
- !haveOnlySeedCredentials) {
3685
+ if (credentials.api_key !== undefined || credentials.username !== undefined) {
4092
3686
  args.steps.push(`Post-verify: credentials found on round ${round}.`);
4093
3687
  return credentials;
4094
3688
  }
4095
- if (inMultiCredMode &&
4096
- roundsSinceLastNewCredential >= MAX_ROUNDS_AWAITING_MORE_CREDENTIALS &&
4097
- (credentials.api_key !== undefined || credentials.username !== undefined)) {
4098
- const summary = Object.keys(credentials)
4099
- .filter((k) => !NON_CREDENTIAL_KEYS.has(k))
4100
- .join(", ");
4101
- args.steps.push(`Post-verify: multi-cred bundle stable for ${roundsSinceLastNewCredential} rounds — returning what we have (${summary}).`);
4102
- return credentials;
4103
- }
4104
3689
  // Settle the page first — the previous round's click may have
4105
3690
  // triggered a navigation, and reading a page mid-navigation
4106
3691
  // throws "execution context destroyed". waitForFormReady is
@@ -4148,47 +3733,17 @@ ${formatInventory(input.inventory)}`,
4148
3733
  // form-fill planner has. Bounded so a persistently broken
4149
3734
  // planner still terminates.
4150
3735
  const reason = err instanceof Error ? err.message : String(err);
4151
- // 0.8.2-rc.6 — distinguish upstream-blip from planner-logic
4152
- // failure. The proxy retry-with-backoff (rc.5) handles most
4153
- // transient 502s within a single call, but during a sustained
4154
- // upstream degradation the retry budget exhausts and surfaces
4155
- // here as a planFailure. Counting those toward the 4x stop
4156
- // threshold is wrong — they're not the planner's fault, the
4157
- // upstream is just temporarily unavailable. We allow these to
4158
- // burn a round (forward progress is impossible without a
4159
- // planner reply) but don't tick planFailures, so a transient
4160
- // blip can't push us into the terminal stop branch.
4161
- const isUpstreamBlip = /\b50[234]\b/.test(reason) ||
4162
- /\bupstream_(?:error|unreachable)\b/i.test(reason) ||
4163
- /\bnetwork error\b/i.test(reason);
4164
- if (isUpstreamBlip) {
4165
- upstreamBlipRetries += 1;
4166
- if (upstreamBlipRetries > MAX_UPSTREAM_BLIP_RETRIES) {
4167
- args.steps.push(`Post-verify round ${round}: upstream proxy degraded for ${upstreamBlipRetries} rounds — stopping (likely sustained outage).`);
4168
- break;
4169
- }
4170
- }
4171
- else {
4172
- planFailures += 1;
4173
- }
3736
+ planFailures += 1;
4174
3737
  if (planFailures > 3) {
4175
3738
  args.steps.push(`Post-verify round ${round}: planner failed ${planFailures}x (${reason}) — stopping.`);
4176
3739
  break;
4177
3740
  }
4178
- const label = isUpstreamBlip ? "transient upstream blip" : "planner output rejected";
4179
- args.steps.push(`Post-verify round ${round}: ${label} (${reason})` +
4180
- (isUpstreamBlip
4181
- ? ` — retrying (${upstreamBlipRetries}/${MAX_UPSTREAM_BLIP_RETRIES}).`
4182
- : " re-planning."));
4183
- // No re-plan hint on an upstream blip — the planner's previous
4184
- // output (if any) was fine; only the request itself failed.
4185
- if (!isUpstreamBlip) {
4186
- hint =
4187
- "Your previous step was REJECTED. A click/fill/select `selector` must be " +
4188
- "EXACTLY the value after `selector=` on one inventory line — copy only that " +
4189
- "value (it runs to the end of the line), never the leading `[n] tag …` part " +
4190
- "and never the whole line.";
4191
- }
3741
+ args.steps.push(`Post-verify round ${round}: planner output rejected (${reason}) — re-planning.`);
3742
+ hint =
3743
+ "Your previous step was REJECTED. A click/fill/select `selector` must be " +
3744
+ "EXACTLY the value after `selector=` on one inventory line copy only that " +
3745
+ "value (it runs to the end of the line), never the leading `[n] tag …` part " +
3746
+ "and never the whole line.";
4192
3747
  continue;
4193
3748
  }
4194
3749
  // rc.22 — redact tokens before pushing to the step trail.
@@ -4393,94 +3948,6 @@ ${formatInventory(input.inventory)}`,
4393
3948
  const uncheckedBoxHint = uncheckedBoxes.length > 0
4394
3949
  ? `\n\nVisible checkboxes you haven't ticked yet (often a TOS / agreement gate):\n${uncheckedBoxes.join("\n")}\n\nIssue {"kind":"check"} on any that look like agreements / required confirmations.`
4395
3950
  : "";
4396
- // 0.8.2-rc.10 — escalation. Track stuck-fires per URL so we
4397
- // can switch tactics once the gentle re-plan hint has clearly
4398
- // failed (the planner refuses to break the cycle on its own,
4399
- // see the Anthropic six-round pattern in rc.8).
4400
- if (lastStuckFireUrl === state.url) {
4401
- stuckFiresAtUrl += 1;
4402
- }
4403
- else {
4404
- stuckFiresAtUrl = 1;
4405
- lastStuckFireUrl = state.url;
4406
- }
4407
- // After two stuck fires at the same URL, escalate to a
4408
- // hardcoded /settings/keys-style navigation. Vendors almost
4409
- // always have ONE canonical path; the dashboard often gates
4410
- // it behind a sidebar link the planner can't reliably resolve
4411
- // (Anthropic, Neon, Sentry, Mistral, …). The fallback list is
4412
- // ordered most-specific first so a service whose dashboard
4413
- // root happens to share /settings with the API-keys page
4414
- // doesn't land short of the actual page.
4415
- if (stuckFiresAtUrl >= 2) {
4416
- // 0.8.2-rc.12 — when the bot is ALREADY on a URL that names
4417
- // an API-keys page (path contains /keys, /tokens, /api-keys,
4418
- // etc.) AND the page text shows masked-credential markers,
4419
- // the dashboard is genuinely showing a pre-existing key
4420
- // we can't unmask (Neon's `ts-7229` is the canonical case —
4421
- // the value was revealed once at create-time and is gone).
4422
- // Skip the fallback-URL navigate entirely (it would land
4423
- // on a 404 for vendors whose api-keys page lives at an
4424
- // org-scoped URL like `/app/<org>/settings#api-keys`) and
4425
- // classify as existing_account_no_extract directly.
4426
- try {
4427
- const stuckPageText = await this.browser
4428
- .extractText()
4429
- .catch(() => "");
4430
- if (detectExistingAccountNoExtract({
4431
- url: state.url,
4432
- pageText: stuckPageText,
4433
- lastPlannerReason: nextStep.reason,
4434
- })) {
4435
- this.lastPostVerifyDoneReason =
4436
- `[existing_account_no_extract] stuck-loop at ${state.url} on an existing API-keys page with masked credentials; ` +
4437
- `latest planner reason: ${nextStep.reason}`;
4438
- args.steps.push(`Post-verify: stuck-loop on an existing-keys page — classified as existing_account_no_extract, breaking out.`);
4439
- break;
4440
- }
4441
- }
4442
- catch {
4443
- // best-effort — fall through to the regular fallback path
4444
- // if the page-text read failed.
4445
- }
4446
- const fallback = pickStuckLoopFallbackUrl(state.url, triedFallbackUrls);
4447
- if (fallback !== null) {
4448
- triedFallbackUrls.add(fallback);
4449
- args.steps.push(`Post-verify: stuck-loop detected ${stuckFiresAtUrl}x at ${state.url} — escalating to a hardcoded API-key URL: ${fallback}`);
4450
- try {
4451
- await this.browser.goto(fallback);
4452
- await this.browser.waitForInteractiveDom(5, 15_000);
4453
- }
4454
- catch (err) {
4455
- args.steps.push(`Post-verify: stuck-loop fallback navigate failed (${err instanceof Error ? err.message : String(err)}) — continuing.`);
4456
- }
4457
- // Reset signature tracking so the next round starts clean
4458
- // against the new URL's inventory. Don't reset
4459
- // stuckFiresAtUrl here — it's keyed by URL and the URL
4460
- // about to be observed will be different, which naturally
4461
- // resets it on the next loop entry.
4462
- prevSignature = null;
4463
- prevInventorySize = -1;
4464
- hint = undefined;
4465
- // Don't bump capturedRound — captureOnboardingRound above
4466
- // already wrote a capture for this round (the stuck-loop
4467
- // detector runs AFTER the capture, so the planner's
4468
- // observed step IS on disk). Bumping again here would
4469
- // leave a phantom gap in the chain that verifyCaptureChain
4470
- // rejects as missing_round.
4471
- continue;
4472
- }
4473
- // Every plausible fallback URL has been tried and we're
4474
- // still stuck. Mark with the [stuck_loop] sentinel so the
4475
- // caller surfaces planner_stuck instead of the generic
4476
- // oauth_onboarding_failed, then break out of the loop.
4477
- this.lastPostVerifyDoneReason =
4478
- `[stuck_loop] planner re-picked the same ${nextStep.kind} step ${stuckFiresAtUrl} times at ${state.url} with no inventory change; ` +
4479
- `hardcoded API-key URL fallbacks exhausted (tried: ${[...triedFallbackUrls].join(", ") || "none"}). ` +
4480
- `Latest planner reason: ${nextStep.reason}`;
4481
- args.steps.push(`Post-verify: stuck-loop unresolvable — breaking out with planner_stuck.`);
4482
- break;
4483
- }
4484
3951
  args.steps.push(sameSelector
4485
3952
  ? `Post-verify: no-progress detected — same ${nextStep.kind} on same selector, inventory unchanged. Re-planning instead of re-running.`
4486
3953
  : `Post-verify: no-progress detected — successive click steps with no inventory change. Forcing a non-click action.`);
@@ -4550,131 +4017,129 @@ ${formatInventory(input.inventory)}`,
4550
4017
  hint = undefined;
4551
4018
  try {
4552
4019
  if (nextStep.kind === "extract") {
4553
- // rc.16 record that the planner has now affirmatively
4554
- // asked to extract from the current page. The top-of-iter
4555
- // early-exit consults this to distinguish "api_key came
4556
- // from a hidden field on a billing page" (don't exit) from
4557
- // "api_key came from a labeled credential row the planner
4558
- // just observed" (safe to exit on single-cred services).
4559
- plannerExtractEmitted = true;
4560
- // 0.8.2-rc.12 — multi-cred preservation + always-on Phase E.
4561
- //
4562
- // Pre-rc.12 the extract step was a tower of "if no api_key,
4563
- // try Phase E; else done." That short-circuit silently lost
4564
- // cloud_name + api_secret on Cloudinary-class services whose
4565
- // api_key is plain-visible to the legacy regex extractor —
4566
- // the legacy path filled credentials.api_key, the if-branch
4567
- // skipped Phase E entirely, and the loop's top-of-iter exit
4568
- // returned a partial bundle.
4569
- //
4570
- // New shape: run the legacy extractor, Phase E, the reveal
4571
- // pass, and DOM-proximity UNCONDITIONALLY on every extract
4572
- // round, merging each into `credentials` first-wins. A later
4573
- // pass never clobbers a value an earlier pass labeled. This
4574
- // mirrors the design doc: Phase E is the multi-cred surface;
4575
- // single-cred is just multi-cred-with-one-key.
4576
- const [pageText, inputValues] = await Promise.all([
4577
- this.browser.extractText().catch(() => ""),
4578
- this.browser.extractAllInputValues().catch(() => []),
4579
- ]);
4580
- const verifySource = pageText + "\n" + inputValues.join("\n");
4581
- // Tier 1 legacy single-cred extractor (api_key by shape).
4582
- // Merge into the running accumulator instead of overwriting;
4583
- // a Phase E label captured on a prior round wins over a
4584
- // later legacy regex hit.
4585
- const legacy = await this.extractCredentials();
4586
- for (const [k, v] of Object.entries(legacy)) {
4587
- if (credentials[k] === undefined)
4588
- credentials[k] = v;
4589
- }
4590
- // Tier 2 — Phase E labeled-token parser over the planner's
4591
- // reason. Picks up cloud_name='dlq4xgrca' / api_key='4917…'
4592
- // / application_id='X' / admin_api_key='…' style narrative.
4593
- const labeled = extractAllLabeledTokensFromReason(nextStep.reason, verifySource);
4594
- const labeledNewKeys = Object.keys(labeled).filter((k) => credentials[k] === undefined);
4595
- if (labeledNewKeys.length > 0) {
4596
- for (const k of labeledNewKeys)
4597
- credentials[k] = labeled[k];
4598
- const summary = labeledNewKeys
4599
- .map((k) => `${k}=${labeled[k].slice(0, 4)}…${labeled[k].slice(-4)}`)
4600
- .join(", ");
4601
- args.steps.push(`Post-verify ${round + 1}/${args.maxRounds}: Phase E surfaced ${labeledNewKeys.length} labeled credential(s) (${summary})`);
4602
- }
4603
- // Tier 2.5 — reveal-then-extract when the planner explicitly
4604
- // flagged a masked credential. Fires whether or not we
4605
- // already have other credentials — Cloudinary's api_secret
4606
- // sits beside an already-visible api_key in the table.
4607
- const MASKED_HINT = /\b(?:masked|hidden|bullets?|asterisks?|••+|\*{3,}|reveal|unmask)\b/i;
4608
- if (MASKED_HINT.test(nextStep.reason)) {
4609
- try {
4610
- const revealRes = await this.browser.revealMaskedCredentials();
4611
- args.steps.push(`Post-verify ${round + 1}/${args.maxRounds}: reveal pass clicked=${revealRes.clicked} diagnostic=[${revealRes.diagnostic.join("; ")}]`);
4612
- if (revealRes.clicked > 0) {
4613
- const labeledAfter = await this.extractFromDomProximity();
4614
- const afterNewKeys = Object.keys(labeledAfter).filter((k) => credentials[k] === undefined);
4615
- if (afterNewKeys.length > 0) {
4616
- for (const k of afterNewKeys)
4617
- credentials[k] = labeledAfter[k];
4618
- args.steps.push(`Post-verify ${round + 1}/${args.maxRounds}: post-reveal DOM-proximity extracted ${afterNewKeys.length} more (${afterNewKeys.join(", ")})`);
4020
+ credentials = await this.extractCredentials();
4021
+ if (credentials.api_key === undefined) {
4022
+ // rc.28 planner-quoted-token fallback. The regex
4023
+ // library missed (IPInfo's 14-char hex; some other
4024
+ // shape) but the planner's reason often literally
4025
+ // quotes the value. Accept it IF it's also present
4026
+ // verbatim in the visible page text — that's the
4027
+ // anti-hallucination guardrail.
4028
+ // rc.38 — verify the planner-quoted value against both
4029
+ // visible text AND every input's `value` attribute. The
4030
+ // rc.37 Upstash retest showed the bot quoting a bare UUID
4031
+ // it observed in a create-key modal whose UUID lived in
4032
+ // an <input readonly value="…"> textContent doesn't
4033
+ // include input values, so the verbatim-in-page check
4034
+ // rejected a real credential. Concatenating input values
4035
+ // closes the gap without weakening the anti-hallucination
4036
+ // guarantee (the candidate still has to appear SOMEWHERE
4037
+ // verifiable on the page).
4038
+ const [pageText, inputValues] = await Promise.all([
4039
+ this.browser.extractText().catch(() => ""),
4040
+ this.browser.extractAllInputValues().catch(() => []),
4041
+ ]);
4042
+ const verifySource = pageText + "\n" + inputValues.join("\n");
4043
+ // Phase E multi-cred-aware extraction. Try the labeled
4044
+ // multi-credential parser FIRST. If the planner labeled
4045
+ // 2+ distinct credentials in its reason, fold them all
4046
+ // into the credentials Record. If the parser found at
4047
+ // least one new value (cloud_name, api_secret, etc.
4048
+ // anything beyond the single api_key the legacy path
4049
+ // captures), prefer this. Falls through to the single-
4050
+ // value extractQuotedTokenFromReason when no labeled
4051
+ // tokens parsed (single-cred services, ad-hoc planner
4052
+ // prose without explicit labels).
4053
+ const labeled = extractAllLabeledTokensFromReason(nextStep.reason, verifySource);
4054
+ const labeledKeys = Object.keys(labeled);
4055
+ if (labeledKeys.length >= 2 || (labeledKeys.length === 1 && labeled["api_key"] === undefined)) {
4056
+ credentials = { ...credentials, ...labeled };
4057
+ const summary = labeledKeys
4058
+ .map((k) => `${k}=${labeled[k].slice(0, 4)}…${labeled[k].slice(-4)}`)
4059
+ .join(", ");
4060
+ args.steps.push(`Post-verify ${round + 1}/${args.maxRounds}: extracted ${labeledKeys.length} labeled credential(s) ` +
4061
+ `via Phase E parser (${summary})`);
4062
+ // When the planner's reason explicitly flags a masked
4063
+ // credential ("api_secret is masked", "hidden behind
4064
+ // asterisks", "click Reveal to show"), Phase E only
4065
+ // captured the visible values — try to reveal + extract
4066
+ // the rest on the same round before continuing. Without
4067
+ // this, the loop returns success with a partial bundle
4068
+ // and never tries the reveal click.
4069
+ const MASKED_HINT = /\b(?:masked|hidden|bullets?|asterisks?|••+|\*{3,}|reveal|unmask)\b/i;
4070
+ if (MASKED_HINT.test(nextStep.reason)) {
4071
+ try {
4072
+ const revealRes = await this.browser.revealMaskedCredentials();
4073
+ args.steps.push(`Post-verify ${round + 1}/${args.maxRounds}: reveal pass clicked=${revealRes.clicked} diagnostic=[${revealRes.diagnostic.join("; ")}]`);
4074
+ if (revealRes.clicked > 0) {
4075
+ const labeledAfter = await this.extractFromDomProximity();
4076
+ const newKeys = Object.keys(labeledAfter).filter((k) => credentials[k] === undefined);
4077
+ if (newKeys.length > 0) {
4078
+ for (const k of newKeys)
4079
+ credentials[k] = labeledAfter[k];
4080
+ args.steps.push(`Post-verify ${round + 1}/${args.maxRounds}: post-reveal DOM-proximity extracted ${newKeys.length} more (${newKeys.join(", ")})`);
4081
+ }
4082
+ else {
4083
+ // Surface ALL labeled candidates we found, so
4084
+ // we can see whether the value is on-page but
4085
+ // mislabeled vs. genuinely not surfaced.
4086
+ const allLabeled = await this.browser.extractLabeledCredentialCandidates();
4087
+ const summary = allLabeled
4088
+ .filter((c) => !c.isMasked)
4089
+ .slice(0, 8)
4090
+ .map((c) => `${c.value.slice(0, 6)}…(${c.value.length}ch)/${c.label ?? "no-label"}`)
4091
+ .join(", ");
4092
+ args.steps.push(`Post-verify ${round + 1}/${args.maxRounds}: post-reveal had ${allLabeled.length} candidates; visible: ${summary}`);
4093
+ }
4094
+ }
4619
4095
  }
4620
- else {
4621
- // Diagnostic: which candidates were seen on the page?
4622
- // Helps debug "Reveal click landed but the value
4623
- // didn't appear in proximity to a known label".
4624
- const allLabeled = await this.browser.extractLabeledCredentialCandidates();
4625
- const candSummary = allLabeled
4626
- .filter((c) => !c.isMasked)
4627
- .slice(0, 8)
4628
- .map((c) => `${c.value.slice(0, 6)}…(${c.value.length}ch)/${c.label ?? "no-label"}`)
4629
- .join(", ");
4630
- args.steps.push(`Post-verify ${round + 1}/${args.maxRounds}: post-reveal had ${allLabeled.length} candidates; visible: ${candSummary}`);
4096
+ catch (err) {
4097
+ args.steps.push(`Post-verify ${round + 1}/${args.maxRounds}: reveal pass error (${err instanceof Error ? err.message : String(err)})`);
4631
4098
  }
4632
4099
  }
4100
+ consecutiveFailedExtracts = 0;
4101
+ continue;
4633
4102
  }
4634
- catch (err) {
4635
- args.steps.push(`Post-verify ${round + 1}/${args.maxRounds}: reveal pass error (${err instanceof Error ? err.message : String(err)})`);
4636
- }
4637
- }
4638
- // Tier 3 — DOM-proximity labeled extractor. Walks the
4639
- // visible DOM, pairs credential-shape strings with their
4640
- // nearest credential-label text. Catches services whose
4641
- // planner-reason narrative missed sibling labels but whose
4642
- // DOM still has them as <td>/<dt> pairs.
4643
- try {
4644
- const labeledFromDom = await this.extractFromDomProximity();
4645
- const domNewKeys = Object.keys(labeledFromDom).filter((k) => credentials[k] === undefined);
4646
- if (domNewKeys.length > 0) {
4647
- for (const k of domNewKeys)
4648
- credentials[k] = labeledFromDom[k];
4649
- const summary = domNewKeys
4650
- .map((k) => `${k}=${labeledFromDom[k].slice(0, 4)}…${labeledFromDom[k].slice(-4)}`)
4651
- .join(", ");
4652
- args.steps.push(`Post-verify ${round + 1}/${args.maxRounds}: DOM-proximity surfaced ${domNewKeys.length} more (${summary})`);
4653
- }
4654
- }
4655
- catch {
4656
- // best-effort; never abort an extract pass on DOM-proximity
4657
- // failure (page mid-navigation etc).
4658
- }
4659
- // Anything found across all tiers? hasMultiCredCredentials
4660
- // also catches non-api_key labels (cloud_name, application_id).
4661
- if (hasAnyExtractedCredential(credentials)) {
4662
- consecutiveFailedExtracts = 0;
4663
- continue;
4664
- }
4665
- // True extract failure — every tier missed. Try the legacy
4666
- // single-value planner-quoted fallback for services whose
4667
- // planner prose just bare-quotes the value without a known
4668
- // label vocabulary (Railway UUID-only, IPInfo 14-hex).
4669
- {
4670
4103
  const quoted = extractQuotedTokenFromReason(nextStep.reason, verifySource);
4671
4104
  if (quoted !== null) {
4672
- credentials.api_key = quoted;
4105
+ credentials = { ...credentials, api_key: quoted };
4673
4106
  args.steps.push(`Post-verify ${round + 1}/${args.maxRounds}: extracted token via ` +
4674
4107
  `planner-quoted fallback (${quoted.slice(0, 4)}…${quoted.slice(-4)})`);
4675
4108
  consecutiveFailedExtracts = 0;
4676
4109
  continue;
4677
4110
  }
4111
+ // Tier 4 — DOM-proximity labeled credential extraction.
4112
+ // Run BEFORE bailing the extract. Walks the visible DOM,
4113
+ // finds credential-shape strings, pairs each with its
4114
+ // nearest credential-label text by Euclidean center
4115
+ // distance. Catches multi-cred pages where the planner
4116
+ // mentioned ONE value but the DOM shows several (the
4117
+ // planner's narrative-style extract reason missed the
4118
+ // sibling labels). Also tries to unmask hidden secrets
4119
+ // first by clicking visible Reveal/Eye/Copy buttons.
4120
+ try {
4121
+ await this.browser.revealMaskedCredentials();
4122
+ }
4123
+ catch {
4124
+ // Best-effort; never block the extract pass on a
4125
+ // reveal-click failure.
4126
+ }
4127
+ const labeledFromDom = await this.extractFromDomProximity();
4128
+ const newKeys = Object.keys(labeledFromDom).filter((k) => credentials[k] === undefined);
4129
+ if (newKeys.length > 0) {
4130
+ for (const k of newKeys)
4131
+ credentials[k] = labeledFromDom[k];
4132
+ const summary = newKeys
4133
+ .map((k) => {
4134
+ const v = labeledFromDom[k];
4135
+ return `${k}=${v.slice(0, 4)}…${v.slice(-4)}`;
4136
+ })
4137
+ .join(", ");
4138
+ args.steps.push(`Post-verify ${round + 1}/${args.maxRounds}: extracted ${newKeys.length} labeled credential(s) ` +
4139
+ `via DOM-proximity fallback (${summary})`);
4140
+ consecutiveFailedExtracts = 0;
4141
+ continue;
4142
+ }
4678
4143
  consecutiveFailedExtracts += 1;
4679
4144
  // Best-effort diagnostic upload: when extract returns
4680
4145
  // null despite the planner asserting a credential is
@@ -4739,6 +4204,9 @@ ${formatInventory(input.inventory)}`,
4739
4204
  "generate a fresh one — its full value is shown once, on creation.";
4740
4205
  }
4741
4206
  }
4207
+ else {
4208
+ consecutiveFailedExtracts = 0;
4209
+ }
4742
4210
  }
4743
4211
  else if (nextStep.kind === "click") {
4744
4212
  await this.browser.click(nextStep.selector);
@@ -4754,46 +4222,13 @@ ${formatInventory(input.inventory)}`,
4754
4222
  // services without modal-delay returns in <1s. Saves both
4755
4223
  // time (no overshoot wait) and correctness (catches the
4756
4224
  // modal-render race).
4757
- // 0.8.2-rc.12 — merge polled extract into the running
4758
- // credentials accumulator (was previously assigned to a
4759
- // throwaway `pollExtract` local). On modal-key reveal
4760
- // flows (OpenRouter, Anthropic, OpenAI) the credential
4761
- // appears only here, and the legacy assignment was lost
4762
- // unless the next round's top-of-iter re-read just
4763
- // happened to find it again — a flaky guarantee.
4764
- //
4765
- // 0.8.2-rc.15 — also poll DOM-proximity. A click that
4766
- // reveals an api_secret next to a known label (Cloudinary
4767
- // reveal click → api_secret becomes visible next to "API
4768
- // Secret" text) wouldn't surface in the legacy api_key-
4769
- // shaped regex, so a multi-cred reveal landed nothing
4770
- // unless the explicit extract round re-fired afterward.
4771
4225
  const credentialDeadline = Date.now() + 8000;
4226
+ let pollExtract = {};
4772
4227
  while (Date.now() < credentialDeadline) {
4773
4228
  await this.browser.wait(0.5);
4774
4229
  try {
4775
- const pollExtract = await this.extractCredentials();
4776
- for (const [k, v] of Object.entries(pollExtract)) {
4777
- if (credentials[k] === undefined)
4778
- credentials[k] = v;
4779
- }
4780
- try {
4781
- const pollLabeled = await this.extractFromDomProximity();
4782
- for (const [k, v] of Object.entries(pollLabeled)) {
4783
- if (credentials[k] === undefined)
4784
- credentials[k] = v;
4785
- }
4786
- }
4787
- catch {
4788
- // DOM-proximity failure is non-fatal; we'll retry
4789
- // the next tick or fall through to the next round.
4790
- }
4791
- // Early-exit when we have an api_key — most services'
4792
- // happy path completes in <1s. Multi-cred siblings
4793
- // (api_secret, cloud_name) keep accumulating across
4794
- // subsequent rounds; we don't hold the inner poll for
4795
- // them here.
4796
- if (credentials.api_key !== undefined)
4230
+ pollExtract = await this.extractCredentials();
4231
+ if (pollExtract.api_key !== undefined)
4797
4232
  break;
4798
4233
  }
4799
4234
  catch {
@@ -4878,24 +4313,9 @@ ${formatInventory(input.inventory)}`,
4878
4313
  }
4879
4314
  // Re-extract — but tolerate the page still navigating from the
4880
4315
  // step just taken; the next round settles and re-reads.
4881
- // 0.8.2-rc.12 MERGE into the running accumulator. The pre-
4882
- // rc.12 unconditional assignment wiped multi-cred fields the
4883
- // explicit extract round just accumulated (cloud_name, api_secret,
4884
- // etc.); on the next round's top-of-iter early-exit, only the
4885
- // legacy single api_key survived.
4886
- // 0.8.2-rc.12 — count distinct credential keys before re-extract
4887
- // so the synthetic-extract trigger fires on ANY new key, not just
4888
- // the legacy api_key / username pair. A cloudinary reveal click
4889
- // can produce a fresh api_secret while api_key was already set;
4890
- // the pre-rc.12 trigger silently skipped the synthetic capture
4891
- // and the synthesizer then rejected on no_extract_step.
4892
- const credCountBefore = Object.keys(credentials).filter((k) => !NON_CREDENTIAL_KEYS.has(k)).length;
4316
+ const hadCredentialsBefore = credentials.api_key !== undefined || credentials.username !== undefined;
4893
4317
  try {
4894
- const reExtract = await this.extractCredentials();
4895
- for (const [k, v] of Object.entries(reExtract)) {
4896
- if (credentials[k] === undefined)
4897
- credentials[k] = v;
4898
- }
4318
+ credentials = await this.extractCredentials();
4899
4319
  }
4900
4320
  catch {
4901
4321
  // page mid-navigation — next round's waitForFormReady handles it
@@ -4913,8 +4333,8 @@ ${formatInventory(input.inventory)}`,
4913
4333
  // RIGHT NOW (the action just ran, the token row is now visible).
4914
4334
  // Best-effort — a capture failure must never block returning the
4915
4335
  // credential we already have.
4916
- const credCountAfter = Object.keys(credentials).filter((k) => !NON_CREDENTIAL_KEYS.has(k)).length;
4917
- const haveNewCredentials = credCountAfter > credCountBefore;
4336
+ const haveNewCredentials = !hadCredentialsBefore &&
4337
+ (credentials.api_key !== undefined || credentials.username !== undefined);
4918
4338
  if (haveNewCredentials && nextStep.kind !== "extract") {
4919
4339
  try {
4920
4340
  const [postState, postInventory] = await Promise.all([
@@ -4963,39 +4383,6 @@ ${formatInventory(input.inventory)}`,
4963
4383
  }
4964
4384
  }
4965
4385
  }
4966
- // 0.8.2-rc.10 — existing-account-no-extract classifier. Runs once
4967
- // at loop exit when no credential surfaced AND no more specific
4968
- // marker (paywall, anti-bot, stuck_loop) was already set on
4969
- // lastPostVerifyDoneReason. The test identity
4970
- // (methoxine@gmail.com) accumulates real signups across batches;
4971
- // re-running against the same vendor lands the bot on an
4972
- // authenticated dashboard whose API-keys page shows a masked
4973
- // pre-existing key it cannot reveal (most vendors only show the
4974
- // key value once at create-time). Reporting these as
4975
- // oauth_onboarding_failed is misleading — the bot did navigate
4976
- // correctly, the state is just unrecoverable for this identity.
4977
- const alreadyClassified = this.lastPostVerifyDoneReason !== null &&
4978
- this.lastPostVerifyDoneReason.startsWith("[");
4979
- if (credentials.api_key === undefined &&
4980
- credentials.username === undefined &&
4981
- !alreadyClassified) {
4982
- try {
4983
- const finalState = await this.browser.getState();
4984
- const finalText = await this.browser.extractText().catch(() => "");
4985
- if (detectExistingAccountNoExtract({
4986
- url: finalState.url,
4987
- pageText: finalText,
4988
- lastPlannerReason: this.lastPostVerifyDoneReason ?? "",
4989
- })) {
4990
- this.lastPostVerifyDoneReason =
4991
- `[existing_account_no_extract] at ${finalState.url}; latest planner reason: ${this.lastPostVerifyDoneReason ?? "(none — loop exhausted)"}`;
4992
- args.steps.push("Post-verify: classified as existing_account_no_extract — masked pre-existing key on an authenticated dashboard.");
4993
- }
4994
- }
4995
- catch {
4996
- // best-effort classifier — never block returning the (empty) credentials
4997
- }
4998
- }
4999
4386
  return credentials;
5000
4387
  }
5001
4388
  // Sign in with the credentials created during signup, so the
@@ -5095,30 +4482,17 @@ Strategy:
5095
4482
  return "extract" for a masked key, and do not return "extract" twice
5096
4483
  in a row. Instead click "Create API Key" / "New API Key" / "Generate"
5097
4484
  to make a fresh key, then extract its full value.
5098
- - **PARTIAL MULTI-CRED EXTRACT IS BETTER THAN ZERO** on a multi-
5099
- cred page where some credentials are visible and others are masked
5100
- behind a Reveal button, return {"kind":"extract"} NOW for the
5101
- visible labels (the bot's labeled extractor folds them into the
5102
- credentials bundle) AND in the same reason field flag the masked
5103
- credential so the bot's automatic reveal pass fires. Example
5104
- reason for Cloudinary: "cloud_name='dlq4xgrca' and
5105
- api_key='491741466469613' are visible in the table; api_secret is
5106
- hidden behind a Reveal button please unmask." The masked
5107
- credential's label MUST appear with one of the trigger words
5108
- (masked / hidden / reveal / unmask / bullets / asterisks) so the
5109
- reveal pass triggers. Do this BEFORE attempting any explicit
5110
- reveal click — getting the visible values into the bundle first
5111
- means a failed reveal click only loses the masked credential, not
5112
- the visible ones too.
5113
- - **REVEAL-CLICK AS A FALLBACK** — when the page has ONLY a masked
5114
- credential (no visible siblings) AND there is a VISIBLE "Show",
5115
- "Reveal", "Eye", or eye-icon button next to it, emit a CLICK on
5116
- that button. If a previous reveal click had no effect (the page's
5117
- inventory and screenshot look identical), do NOT keep retrying —
5118
- emit {"kind":"extract"} anyway: the bot's labeled extractor will
5119
- capture whatever IS visible (even if just a cloud_name with no
5120
- api_secret) and return the partial bundle to the caller, which is
5121
- more useful than five wasted rounds of clicking a dead reveal.
4485
+ - **REVEAL-CLICK BEFORE EXTRACT** when a credential is shown masked
4486
+ (•••••, asterisks, dots) AND there is a VISIBLE "Show", "Reveal",
4487
+ "Eye", or eye-icon button NEXT TO IT (typically same row in a
4488
+ credentials table Cloudinary, Twilio, Stripe all follow this
4489
+ pattern for api_secret / auth_token / secret_key), emit a CLICK
4490
+ on that show/reveal button FIRST. Do NOT return extract on the same
4491
+ round as the masked display — the masked text would be parsed as
4492
+ the value. Next round the value will be visible and your extract
4493
+ step can quote it. The bot's reveal-pass is a fallback; explicit
4494
+ clicks via the planner are more reliable because you can see the
4495
+ exact button in the screenshot.
5122
4496
  - To reach API keys, prefer a {"kind":"navigate"} straight to the
5123
4497
  service's API-keys settings URL — note these usually live under the
5124
4498
  user/ACCOUNT settings, not a project or workspace's settings.