@trusty-squire/mcp 0.8.2 → 0.8.3-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/dist/api-client.d.ts +51 -0
  2. package/dist/api-client.d.ts.map +1 -1
  3. package/dist/api-client.js +25 -0
  4. package/dist/api-client.js.map +1 -1
  5. package/dist/bot/agent.d.ts +3 -0
  6. package/dist/bot/agent.d.ts.map +1 -1
  7. package/dist/bot/agent.js +479 -24
  8. package/dist/bot/agent.js.map +1 -1
  9. package/dist/bot/browser.d.ts +1 -0
  10. package/dist/bot/browser.d.ts.map +1 -1
  11. package/dist/bot/browser.js +63 -5
  12. package/dist/bot/browser.js.map +1 -1
  13. package/dist/bot/google-login.d.ts +1 -0
  14. package/dist/bot/google-login.d.ts.map +1 -1
  15. package/dist/bot/google-login.js +21 -1
  16. package/dist/bot/google-login.js.map +1 -1
  17. package/dist/bot/inbox-client.d.ts +3 -0
  18. package/dist/bot/inbox-client.d.ts.map +1 -1
  19. package/dist/bot/inbox-client.js +112 -0
  20. package/dist/bot/inbox-client.js.map +1 -1
  21. package/dist/bot/near-text-hint.d.ts +4 -0
  22. package/dist/bot/near-text-hint.d.ts.map +1 -0
  23. package/dist/bot/near-text-hint.js +72 -0
  24. package/dist/bot/near-text-hint.js.map +1 -0
  25. package/dist/bot/promote-to-skill.d.ts.map +1 -1
  26. package/dist/bot/promote-to-skill.js +365 -49
  27. package/dist/bot/promote-to-skill.js.map +1 -1
  28. package/dist/bot/read-otp.d.ts +6 -0
  29. package/dist/bot/read-otp.d.ts.map +1 -1
  30. package/dist/bot/read-otp.js +22 -0
  31. package/dist/bot/read-otp.js.map +1 -1
  32. package/dist/bot/replay-skill.d.ts.map +1 -1
  33. package/dist/bot/replay-skill.js +99 -109
  34. package/dist/bot/replay-skill.js.map +1 -1
  35. package/dist/install/agents.d.ts +1 -0
  36. package/dist/install/agents.d.ts.map +1 -1
  37. package/dist/install/agents.js +32 -0
  38. package/dist/install/agents.js.map +1 -1
  39. package/dist/install/cli.d.ts.map +1 -1
  40. package/dist/install/cli.js +24 -2
  41. package/dist/install/cli.js.map +1 -1
  42. package/dist/server.d.ts.map +1 -1
  43. package/dist/server.js +20 -1
  44. package/dist/server.js.map +1 -1
  45. package/dist/tools/always-load.d.ts +4 -0
  46. package/dist/tools/always-load.d.ts.map +1 -0
  47. package/dist/tools/always-load.js +6 -0
  48. package/dist/tools/always-load.js.map +1 -0
  49. package/dist/tools/delete-credential.d.ts +12 -0
  50. package/dist/tools/delete-credential.d.ts.map +1 -0
  51. package/dist/tools/delete-credential.js +23 -0
  52. package/dist/tools/delete-credential.js.map +1 -0
  53. package/dist/tools/index.d.ts +10 -1
  54. package/dist/tools/index.d.ts.map +1 -1
  55. package/dist/tools/index.js +17 -1
  56. package/dist/tools/index.js.map +1 -1
  57. package/dist/tools/poll-credential-access.d.ts +12 -0
  58. package/dist/tools/poll-credential-access.d.ts.map +1 -0
  59. package/dist/tools/poll-credential-access.js +29 -0
  60. package/dist/tools/poll-credential-access.js.map +1 -0
  61. package/dist/tools/request-credential.d.ts +69 -0
  62. package/dist/tools/request-credential.d.ts.map +1 -0
  63. package/dist/tools/request-credential.js +69 -0
  64. package/dist/tools/request-credential.js.map +1 -0
  65. package/dist/tools/rotate-credential.d.ts +15 -0
  66. package/dist/tools/rotate-credential.d.ts.map +1 -0
  67. package/dist/tools/rotate-credential.js +29 -0
  68. package/dist/tools/rotate-credential.js.map +1 -0
  69. package/dist/tools/store-credential.d.ts +21 -0
  70. package/dist/tools/store-credential.d.ts.map +1 -0
  71. package/dist/tools/store-credential.js +50 -0
  72. package/dist/tools/store-credential.js.map +1 -0
  73. package/dist/tools/use-credential.d.ts +40 -0
  74. package/dist/tools/use-credential.d.ts.map +1 -0
  75. package/dist/tools/use-credential.js +61 -0
  76. package/dist/tools/use-credential.js.map +1 -0
  77. package/package.json +1 -1
package/dist/bot/agent.js CHANGED
@@ -871,7 +871,7 @@ export function detectAlreadySignedIn(args) {
871
871
  // /apps?, /deployments?, /services? — all common product-name
872
872
  // routes that almost always indicate authenticated state.
873
873
  dashboardyPath =
874
- /\/(?:new|dashboard|projects?|account|settings|workspace|home|redis|kafka|vector|cluster|databases?|instances?|apps?|deployments?|services?)(?:\/|$)/i.test(parsed.pathname) && !/\/(?:signup|sign-up|register|login|sign-in|signin)/i.test(parsed.pathname);
874
+ /\/(?:new|dashboard|projects?|account|settings|workspace|home|redis|kafka|vector|cluster|databases?|instances?|apps?|deployments?|services?|onboarding|welcome|getting-started|get-started|setup)(?:\/|$)/i.test(parsed.pathname) && !/\/(?:signup|sign-up|register|login|sign-in|signin)/i.test(parsed.pathname);
875
875
  }
876
876
  catch {
877
877
  // Malformed URL — skip URL signal.
@@ -932,6 +932,32 @@ export function detectAlreadySignedIn(args) {
932
932
  if (hasWorkspacePicker && !hasSignupOrOAuthAffordance) {
933
933
  return true;
934
934
  }
935
+ // 0.8.3-rc.1 — onboarding-wizard step shape. When the URL clearly
936
+ // names an onboarding path (/onboarding, /welcome, /getting-started,
937
+ // /setup) AND the page has a Next/Continue/Skip/Submit button AND
938
+ // does NOT have any credential input (caught above) AND does NOT
939
+ // have a Sign-up/Continue-with affordance (i.e. it's not a CHOICE
940
+ // between login and signup), the page is mid-onboarding for an
941
+ // already-authenticated user. Mixpanel hits this when a previous
942
+ // run created the account but didn't finish the multi-step
943
+ // onboarding — the bot returns and lands directly on /onboarding.
944
+ let onboardingPath = false;
945
+ try {
946
+ onboardingPath =
947
+ /\/(?:onboarding|welcome|getting-started|get-started|setup)(?:\/|$)/i.test(new URL(url).pathname);
948
+ }
949
+ catch {
950
+ // Malformed URL — skip
951
+ }
952
+ if (onboardingPath && !hasSignupOrOAuthAffordance) {
953
+ const WIZARD_STEP_BTN = /^\s*(?:next|continue|submit|skip(?:\s+for\s+now)?|finish|done)\s*$/i;
954
+ const hasWizardStepButton = inventory.some((e) => {
955
+ const t = (e.visibleText ?? e.ariaLabel ?? "").trim();
956
+ return WIZARD_STEP_BTN.test(t);
957
+ });
958
+ if (hasWizardStepButton)
959
+ return true;
960
+ }
935
961
  }
936
962
  return false;
937
963
  }
@@ -1088,8 +1114,31 @@ export function findOAuthButton(inventory, provider) {
1088
1114
  // ensured the provider name is present; the length cap MAX_OAUTH_
1089
1115
  // BUTTON_TEXT_CHARS (60) ensures it's still buttonish, not a
1090
1116
  // paragraph that happens to mention the provider.
1117
+ //
1118
+ // 0.8.3-rc.1 — reject minimal-label matches whose href points at
1119
+ // a NON-AUTH path on the provider's domain (most often a project
1120
+ // repo URL like github.com/plausible/analytics in a homepage
1121
+ // footer). Without this gate, plausible's footer "GitHub" link
1122
+ // matched, the bot clicked it, ended up on the analytics repo's
1123
+ // page, and misclassified the README content as a security
1124
+ // challenge — burning the entire OAuth budget on a false alarm.
1091
1125
  const stripped = visibleText.toLowerCase().replace(/[^a-z0-9]+/g, " ").trim();
1092
1126
  if (stripped === keyword || stripped === `with ${keyword}`) {
1127
+ // When the element is an <a> with a provider-domain href, require
1128
+ // it to look like an auth route. Repo / docs / marketing links
1129
+ // on the provider's site never start an OAuth flow.
1130
+ if (href.length > 0) {
1131
+ const providerDomain = provider === "github" ? "github.com" : "google.com";
1132
+ if (href.includes(providerDomain)) {
1133
+ // Accept only if href matches the auth pattern OR points
1134
+ // at a login/signup/sessions/oauth path.
1135
+ const looksLikeAuthPath = hrefRe.test(href) ||
1136
+ /github\.com\/(?:login|signin|sign-in|sessions|oauth\/authorize|apps\/[^/]+\/installations\/new|users\/sign_in)/i.test(href) ||
1137
+ /accounts\.google\.com\/(?:o\/oauth2|signin)/i.test(href);
1138
+ if (!looksLikeAuthPath)
1139
+ continue;
1140
+ }
1141
+ }
1093
1142
  return e;
1094
1143
  }
1095
1144
  }
@@ -1125,6 +1174,25 @@ export function isLoginLoopState(url, inventory, provider) {
1125
1174
  // checking the inverse — markers despite the login path.
1126
1175
  if (detectAlreadySignedIn({ inventory, url }))
1127
1176
  return null;
1177
+ // 0.8.3-rc.1 — Clerk-class post-OAuth supplementary-signup form.
1178
+ // Some services (Clerk's dashboard, certain Auth0 templates) handle
1179
+ // OAuth identity but ALSO require the user to fill an email +
1180
+ // password form (plus often a Cloudflare turnstile) before the
1181
+ // account is created. Post-OAuth lands on /sign-up with both the
1182
+ // OAuth buttons still visible AND credential inputs. The legacy
1183
+ // loop-detect path saw the Google button + the login-shaped URL
1184
+ // and looped OAuth indefinitely.
1185
+ //
1186
+ // When BOTH (1) an email/password input is visible AND (2) an
1187
+ // OAuth button for the provider we just used is visible, the page
1188
+ // is a hybrid form, not a loop. Return null so the caller falls
1189
+ // through to the post-verify flow — its planner can drive the
1190
+ // form-fill, the captcha gate (Cloudflare turnstile shows up as a
1191
+ // `check`-shaped checkbox in inventory), and the Continue click
1192
+ // the same way the form-fill phase does for non-OAuth signups.
1193
+ const hasCredentialInput = inventory.some((e) => e.tag === "input" && (e.type === "email" || e.type === "password"));
1194
+ if (hasCredentialInput)
1195
+ return null;
1128
1196
  return findOAuthButton(inventory, provider);
1129
1197
  }
1130
1198
  // Path-only formatter for step trail entries. Same parse semantics as
@@ -1499,6 +1567,25 @@ export function extractAllLabeledTokensFromReason(reason, pageText) {
1499
1567
  personal_api_key: "personal_api_key",
1500
1568
  app_key: "app_key",
1501
1569
  appkey: "app_key",
1570
+ // 0.8.3-rc.1 — typeform's planner uses `personal_access_token`
1571
+ // and `Personal access token` (the latter when transcribing the
1572
+ // page heading verbatim). Both alias to api_key — typeform issues
1573
+ // ONE token type, and downstream consumers expect `api_key`.
1574
+ personal_access_token: "api_key",
1575
+ personalaccesstoken: "api_key",
1576
+ // Bearer / private / write key patterns surfaced across the
1577
+ // 2026-05-29 retest. Each was quoted by the planner but not in
1578
+ // the alias set, so the labeled extractor missed them.
1579
+ bearer_token: "api_key",
1580
+ bearertoken: "api_key",
1581
+ private_key: "api_key",
1582
+ privatekey: "api_key",
1583
+ write_key: "api_key",
1584
+ writekey: "api_key",
1585
+ read_key: "api_key",
1586
+ readkey: "api_key",
1587
+ server_token: "api_key",
1588
+ servertoken: "api_key",
1502
1589
  };
1503
1590
  const out = {};
1504
1591
  // Build the label-alternation from the whitelist keys. Restricting
@@ -1509,9 +1596,10 @@ export function extractAllLabeledTokensFromReason(reason, pageText) {
1509
1596
  // regex prefers `admin_api_key` over `api_key` at the same start.
1510
1597
  const labelKeys = Object.keys(LABEL_ALIASES).sort((a, b) => b.length - a.length);
1511
1598
  const labelAlt = labelKeys.map(escapeRegex).join("|");
1512
- // Hyphen variants — the LLM sometimes emits `cloud-name` instead of
1513
- // `cloud_name`. Replace _ with [-_] inside each alternative.
1514
- const labelAltLoose = labelAlt.replace(/_/g, "[-_]");
1599
+ // Hyphen + space variants — the LLM sometimes emits `cloud-name`
1600
+ // or `Cloud name` instead of `cloud_name`. Replace _ with
1601
+ // [-_\s] inside each alternative so the regex matches all three.
1602
+ const labelAltLoose = labelAlt.replace(/_/g, "[-_\\s]");
1515
1603
  // Two patterns:
1516
1604
  //
1517
1605
  // (A) Strict QUOTED form — `label='value'` / `label="value"` /
@@ -1529,7 +1617,7 @@ export function extractAllLabeledTokensFromReason(reason, pageText) {
1529
1617
  // English status words that look label-like in extract prose.
1530
1618
  const quotedRe = new RegExp(`\\b(${labelAltLoose})\\b\\s*[=:]\\s*['"\`]([A-Za-z0-9_\\-]{4,80})['"\`]`, "gi");
1531
1619
  for (const m of reason.matchAll(quotedRe)) {
1532
- const rawLabel = (m[1] ?? "").toLowerCase().replace(/-/g, "_");
1620
+ const rawLabel = (m[1] ?? "").toLowerCase().replace(/[-\s]+/g, "_");
1533
1621
  const normalized = rawLabel.replace(/_+/g, "_");
1534
1622
  const canonical = LABEL_ALIASES[normalized];
1535
1623
  const value = m[2];
@@ -1570,7 +1658,7 @@ export function extractAllLabeledTokensFromReason(reason, pageText) {
1570
1658
  // captured (possibly-unquoted) value.
1571
1659
  const proseRe = new RegExp(`\\b(${labelAltLoose})\\b\\s*(?:[=:]|\\b(?:is|are)\\b)\\s*['"\`]?([A-Za-z0-9_\\-]{4,80})['"\`]?`, "gi");
1572
1660
  for (const m of reason.matchAll(proseRe)) {
1573
- const rawLabel = (m[1] ?? "").toLowerCase().replace(/-/g, "_");
1661
+ const rawLabel = (m[1] ?? "").toLowerCase().replace(/[-\s]+/g, "_");
1574
1662
  const normalized = rawLabel.replace(/_+/g, "_");
1575
1663
  const canonical = LABEL_ALIASES[normalized];
1576
1664
  const value = m[2];
@@ -1651,6 +1739,12 @@ export function extractApiKeyFromText(text) {
1651
1739
  // being in the input field's `value` attribute. rc.14 — surfaced
1652
1740
  // during the harvester rc.13 pass on Neon.
1653
1741
  /\bnapi_[a-zA-Z0-9]{30,80}\b/, // Neon
1742
+ // 0.8.3-rc.1 — typeform personal access tokens. Shape
1743
+ // `tfp_<alnum-with-underscore>` length 40-80. Surfaced during the
1744
+ // 2026-05-29 retest where the planner SAW the token (quoted in
1745
+ // reason) but no regex matched, so extractCredentials returned
1746
+ // null and the bot bailed `oauth_onboarding_failed`.
1747
+ /\btfp_[A-Za-z0-9_]{40,80}\b/, // Typeform
1654
1748
  // Replicate API tokens. `r8_<40-char alnum>` per their docs. Shown
1655
1749
  // in the table row after Create. The post-verify loop iterates,
1656
1750
  // adds rows, but extractCredentials returned null every round
@@ -1952,7 +2046,13 @@ export class SignupAgent {
1952
2046
  // F2 top-level deadline.
1953
2047
  async planExecuteWithRetry(task, fillValues, steps) {
1954
2048
  const MAX_ERROR_REPLANS = 2;
1955
- const MAX_PROGRESS_REPLANS = 4;
2049
+ // 0.8.3-rc.1 widened from 4 to 6 so submit_disabled re-plans
2050
+ // get more attempts to identify the gating control. Mailgun's
2051
+ // signup form has a non-standard required field whose label the
2052
+ // planner missed on the first 5 plans; the 6th attempt typically
2053
+ // surfaces the unchecked checkbox. Bounded by the 15-call LLM
2054
+ // budget so genuinely-stuck signups still terminate.
2055
+ const MAX_PROGRESS_REPLANS = 6;
1956
2056
  let errorReplans = 0;
1957
2057
  let progressReplans = 0;
1958
2058
  let emptyPlans = 0;
@@ -2272,10 +2372,58 @@ export class SignupAgent {
2272
2372
  return { kind: "submit_failed", reason };
2273
2373
  }
2274
2374
  steps.push(`⚠ ${reason} — re-planning to satisfy it`);
2375
+ // 0.8.3-rc.1 — surface concrete unchecked-checkbox candidates
2376
+ // from the current inventory so the planner picks one
2377
+ // immediately rather than re-guessing. Best-effort: a snapshot
2378
+ // failure falls back to the generic prompt that used to be
2379
+ // here.
2380
+ let uncheckedHint = "";
2381
+ let emptyInputHint = "";
2382
+ try {
2383
+ const snapshotInv = await this.buildInventory(steps, undefined, 60);
2384
+ const unchecked = snapshotInv.filter((e) => e.tag === "input" &&
2385
+ (e.type === "checkbox" || e.role === "checkbox") &&
2386
+ e.checked === false &&
2387
+ e.visible === true);
2388
+ if (unchecked.length > 0) {
2389
+ const lines = unchecked
2390
+ .slice(0, 6)
2391
+ .map((e) => {
2392
+ const label = (e.labelText ?? e.ariaLabel ?? e.placeholder ?? e.name ?? "(no label)")
2393
+ .toString()
2394
+ .slice(0, 60);
2395
+ return ` - selector ${JSON.stringify(e.selector)} label=${JSON.stringify(label)}`;
2396
+ });
2397
+ uncheckedHint = `\nUnchecked checkboxes visible on the page:\n${lines.join("\n")}`;
2398
+ }
2399
+ const emptyInputs = snapshotInv.filter((e) => e.tag === "input" &&
2400
+ e.type !== "checkbox" &&
2401
+ e.type !== "radio" &&
2402
+ e.type !== "hidden" &&
2403
+ (e.value === null || e.value === "") &&
2404
+ e.visible === true);
2405
+ if (emptyInputs.length > 0) {
2406
+ const lines = emptyInputs
2407
+ .slice(0, 6)
2408
+ .map((e) => {
2409
+ const label = (e.labelText ?? e.placeholder ?? e.ariaLabel ?? e.name ?? "(no label)")
2410
+ .toString()
2411
+ .slice(0, 60);
2412
+ return ` - selector ${JSON.stringify(e.selector)} label=${JSON.stringify(label)}`;
2413
+ });
2414
+ emptyInputHint = `\nEmpty visible inputs (any could be the unmet required field):\n${lines.join("\n")}`;
2415
+ }
2416
+ }
2417
+ catch {
2418
+ // best-effort
2419
+ }
2275
2420
  hint =
2276
2421
  "The submit button is disabled — a required field or an agreement " +
2277
- "was not satisfied. Find the agreement CHECKBOX (an input of " +
2278
- "type=checkbox, NOT a link to the terms page) and check it.";
2422
+ "was not satisfied. Issue {\"kind\":\"check\"} on an unchecked " +
2423
+ "agreement/terms checkbox, OR {\"kind\":\"fill\"} on an empty " +
2424
+ "required input. Do NOT click a link." +
2425
+ uncheckedHint +
2426
+ emptyInputHint;
2279
2427
  continue;
2280
2428
  }
2281
2429
  steps.push(`⚠ submit click failed: ${reason}`);
@@ -2558,9 +2706,13 @@ export class SignupAgent {
2558
2706
  this.googleChallengeTimeoutMs = opts.googleChallengeTimeoutMs;
2559
2707
  }
2560
2708
  }
2561
- // Default: 2 minutes enough time for the human to unlock phone,
2562
- // open the Google app, and tap a verification number.
2563
- googleChallengeTimeoutMs = 120_000;
2709
+ // 0.8.3-rc.1 widened from 2 4 minutes. The 2-min window forced
2710
+ // the operator to drop everything immediately on a Telegram alert.
2711
+ // For batch-harvest runs the operator is rarely staring at the
2712
+ // phone; 4 minutes gives realistic time to switch devices, unlock,
2713
+ // open the Google app, and tap. Matches the same wait window the
2714
+ // GitHub challenge path now uses.
2715
+ googleChallengeTimeoutMs = 240_000;
2564
2716
  // Read-only view of how many calls landed on which backend. Exported
2565
2717
  // through SignupResult.llm_backends so tests and ops can verify the
2566
2718
  // dual-mode fallback is actually engaging when expected.
@@ -2917,15 +3069,23 @@ export class SignupAgent {
2917
3069
  // Uses the same post-OAuth loop runOAuthFlow uses after a
2918
3070
  // successful handshake.
2919
3071
  let credentials = await this.extractCredentials();
3072
+ const skippedPostVerify = credentials.api_key !== undefined;
2920
3073
  if (credentials.api_key === undefined) {
2921
3074
  credentials = await this.postVerifyLoop({
2922
3075
  service: task.service,
2923
- maxRounds: task.postVerifyMaxRounds ?? 12,
3076
+ maxRounds: task.postVerifyMaxRounds ?? 24,
2924
3077
  steps,
2925
3078
  ...(task.scopeHint !== undefined ? { scopeHint: task.scopeHint } : {}),
2926
3079
  });
2927
3080
  }
2928
3081
  if (credentials.api_key !== undefined) {
3082
+ // 0.8.3-rc.1 — when extractCredentials short-circuited
3083
+ // before postVerifyLoop ran, no captures were written.
3084
+ // Emit a synthetic extract round so auto-promote can
3085
+ // build a "navigate + extract" skill from this run.
3086
+ if (skippedPostVerify) {
3087
+ await this.writeFastPathSyntheticCapture(task.service, 0, true);
3088
+ }
2929
3089
  return {
2930
3090
  success: true,
2931
3091
  credentials,
@@ -3167,7 +3327,22 @@ export class SignupAgent {
3167
3327
  // to find the number to tap.
3168
3328
  await saveDebugSnapshot(this.browser, "google-challenge");
3169
3329
  if (provider.id === "google") {
3170
- const matchNum = extractGoogleNumberMatch(body);
3330
+ // Try text-based extractor first (zero LLM cost), then fall
3331
+ // back to vision when phrasing drifts. The vision path uses
3332
+ // the screenshot we just saved — if a human can read the
3333
+ // number on screen, Claude vision can too.
3334
+ let matchNum = extractGoogleNumberMatch(body);
3335
+ if (matchNum === null) {
3336
+ try {
3337
+ matchNum = await this.extractGoogleNumberViaVision();
3338
+ if (matchNum !== null) {
3339
+ steps.push(`Google: number-match extractor missed phrasing but vision LLM read "${matchNum}" from the challenge screenshot.`);
3340
+ }
3341
+ }
3342
+ catch (err) {
3343
+ steps.push(`Google: vision-fallback for number extraction threw (${err instanceof Error ? err.message : String(err)})`);
3344
+ }
3345
+ }
3171
3346
  if (matchNum !== null) {
3172
3347
  // rc.26 — surface in real-time via stderr as well as the
3173
3348
  // step trail. The step trail only renders after the run
@@ -3179,7 +3354,7 @@ export class SignupAgent {
3179
3354
  void notifyHeightenedAuth({
3180
3355
  service: task.service,
3181
3356
  digit: String(matchNum),
3182
- windowSeconds: 120,
3357
+ windowSeconds: 240,
3183
3358
  machineToken: task.machineToken,
3184
3359
  apiBase: task.apiBase,
3185
3360
  });
@@ -3189,7 +3364,7 @@ export class SignupAgent {
3189
3364
  void sendTelegramHeightenedAuth({
3190
3365
  service: task.service,
3191
3366
  digit: String(matchNum),
3192
- windowSeconds: 120,
3367
+ windowSeconds: 240,
3193
3368
  });
3194
3369
  }
3195
3370
  else {
@@ -3204,14 +3379,14 @@ export class SignupAgent {
3204
3379
  void notifyHeightenedAuth({
3205
3380
  service: task.service,
3206
3381
  digit: null,
3207
- windowSeconds: 120,
3382
+ windowSeconds: 240,
3208
3383
  machineToken: task.machineToken,
3209
3384
  apiBase: task.apiBase,
3210
3385
  });
3211
3386
  void sendTelegramHeightenedAuth({
3212
3387
  service: task.service,
3213
3388
  digit: null,
3214
- windowSeconds: 120,
3389
+ windowSeconds: 240,
3215
3390
  });
3216
3391
  }
3217
3392
  // Either way (number found or not), the user can still
@@ -3219,7 +3394,7 @@ export class SignupAgent {
3219
3394
  // tapping on their phone. Wait the full 2 minutes.
3220
3395
  const cleared = await this.waitForGoogleChallenge(provider, steps);
3221
3396
  if (!cleared) {
3222
- return this.oauthAbort("needs_login", `Google challenge timed out after 2 minutes. ` +
3397
+ return this.oauthAbort("needs_login", `Google challenge timed out after 4 minutes. ` +
3223
3398
  `Re-run \`${loginCmd}\`, complete the challenge in the window, then retry.`, steps);
3224
3399
  }
3225
3400
  steps.push("Google: challenge cleared — continuing OAuth");
@@ -3251,6 +3426,73 @@ export class SignupAgent {
3251
3426
  }
3252
3427
  steps.push("GitHub: skip-link click did not register — falling back to needs_login abort.");
3253
3428
  }
3429
+ // 0.8.3-rc.1 — GitHub email-link challenge auto-clear. When
3430
+ // GitHub fires "verify it's you" on a new device, the
3431
+ // canonical clear path is to click a link in an email Github
3432
+ // dispatches to lunchboxfortwo@gmail.com. If GMAIL_USER /
3433
+ // GMAIL_APP_PASSWORD are wired on the API, we can poll for
3434
+ // that email, extract the URL, navigate the bot's browser to
3435
+ // it, and re-enter the OAuth flow. Best-effort: failure
3436
+ // degrades to the phone-tap path below.
3437
+ if (provider.id === "github" &&
3438
+ task.machineToken !== undefined &&
3439
+ task.machineToken.length > 0) {
3440
+ steps.push("GitHub: verify-it's-you challenge — polling operator gmail for a device-confirmation link (up to 60s)");
3441
+ try {
3442
+ const { readGitHubChallengeLink } = await import("./read-otp.js");
3443
+ const linkResult = await readGitHubChallengeLink({
3444
+ machineToken: task.machineToken,
3445
+ ...(task.apiBase !== undefined ? { apiBase: task.apiBase } : {}),
3446
+ maxWaitSeconds: 60,
3447
+ });
3448
+ if (linkResult.code !== null) {
3449
+ steps.push(`GitHub: device-confirmation link found in gmail (reason=${linkResult.reason}) — navigating to it`);
3450
+ try {
3451
+ await this.browser.goto(linkResult.code);
3452
+ await this.browser.wait(3);
3453
+ steps.push("GitHub: device confirmation submitted — re-classifying for the consent flow");
3454
+ i--;
3455
+ continue;
3456
+ }
3457
+ catch (err) {
3458
+ steps.push(`GitHub: navigating to confirmation link failed (${err instanceof Error ? err.message : String(err)})`);
3459
+ }
3460
+ }
3461
+ else {
3462
+ steps.push(`GitHub: no confirmation email arrived within 60s (reason=${linkResult.reason}) — falling back to phone-tap wait`);
3463
+ }
3464
+ }
3465
+ catch (err) {
3466
+ steps.push(`GitHub: challenge-clearing import/call threw (${err instanceof Error ? err.message : String(err)})`);
3467
+ }
3468
+ // 0.8.3-rc.1 — fall back to the phone-tap path: fire
3469
+ // Telegram + heightened-auth notifications and wait 4
3470
+ // minutes for the operator to tap their phone. This is the
3471
+ // same shape Google's challenge path already uses; without
3472
+ // it the bot just times out silently with no operator
3473
+ // surface.
3474
+ console.error(`[universal-bot] GITHUB CHALLENGE: tap your phone for ${task.service} — 4 minute window`);
3475
+ steps.push(`GitHub: phone-tap challenge for ${task.service} — operator has 4 minutes to approve on a registered device`);
3476
+ void notifyHeightenedAuth({
3477
+ service: task.service,
3478
+ digit: null,
3479
+ windowSeconds: 240,
3480
+ machineToken: task.machineToken,
3481
+ apiBase: task.apiBase,
3482
+ });
3483
+ void sendTelegramHeightenedAuth({
3484
+ service: task.service,
3485
+ digit: null,
3486
+ windowSeconds: 240,
3487
+ });
3488
+ const cleared = await this.waitForGitHubChallenge(steps);
3489
+ if (cleared) {
3490
+ steps.push("GitHub: challenge cleared — re-classifying for the consent flow");
3491
+ i--;
3492
+ continue;
3493
+ }
3494
+ steps.push("GitHub: phone-tap window elapsed without clear — aborting with needs_login");
3495
+ }
3254
3496
  return this.oauthAbort("needs_login", `${provider.label} interrupted the sign-in with a security challenge ("verify it's you"). ` +
3255
3497
  `Re-run \`${loginCmd}\`, clear the challenge in the window, then retry.`, steps);
3256
3498
  }
@@ -3564,7 +3806,7 @@ export class SignupAgent {
3564
3806
  // nothing more to do, it returns on the first iteration.
3565
3807
  credentials = await this.postVerifyLoop({
3566
3808
  service: task.service,
3567
- maxRounds: task.postVerifyMaxRounds ?? 12,
3809
+ maxRounds: task.postVerifyMaxRounds ?? 24,
3568
3810
  steps,
3569
3811
  ...(task.scopeHint !== undefined ? { scopeHint: task.scopeHint } : {}),
3570
3812
  });
@@ -3691,7 +3933,104 @@ export class SignupAgent {
3691
3933
  if (state !== "challenge")
3692
3934
  return true;
3693
3935
  }
3694
- steps.push("Google: challenge wait timed out after 2 minutes");
3936
+ steps.push("Google: challenge wait timed out after 4 minutes");
3937
+ return false;
3938
+ }
3939
+ // 0.8.3-rc.1 — Claude-vision fallback for Google number-match
3940
+ // challenge extraction. Fires only when extractGoogleNumberMatch's
3941
+ // text patterns miss the current Google phrasing. Sends the most
3942
+ // recent page screenshot to the bot's LLM with a one-line prompt:
3943
+ // "what is the 2-3 digit number to tap." If a human can read the
3944
+ // screen, the vision model can too — the user's correction made
3945
+ // this obvious. Returns null on any failure (no LLM client, parse
3946
+ // error, no digits in reply).
3947
+ async extractGoogleNumberViaVision() {
3948
+ try {
3949
+ const state = await this.browser.getState();
3950
+ const screenshot = state.screenshot;
3951
+ if (screenshot === undefined || screenshot.length === 0)
3952
+ return null;
3953
+ const reply = await this.callLLM({
3954
+ system: "You read numbers from Google authentication challenge screens. " +
3955
+ "The screen shows a 2-3 digit number the user must tap on their phone " +
3956
+ "to verify identity. Reply with ONLY that number. No words, no " +
3957
+ "punctuation, no leading zero unless the number genuinely starts " +
3958
+ "with 0. If the screen does not show a tap-this-number challenge, " +
3959
+ 'reply "NONE".',
3960
+ userBlocks: [
3961
+ {
3962
+ kind: "image",
3963
+ media_type: "image/jpeg",
3964
+ data_base64: screenshot,
3965
+ },
3966
+ {
3967
+ kind: "text",
3968
+ text: "What number must the user tap on their phone?",
3969
+ },
3970
+ ],
3971
+ maxTokens: 8,
3972
+ });
3973
+ const trimmed = reply.trim();
3974
+ if (trimmed.length === 0 || /^none$/i.test(trimmed))
3975
+ return null;
3976
+ const digits = trimmed.match(/\d{1,4}/);
3977
+ if (digits === null)
3978
+ return null;
3979
+ const n = digits[0];
3980
+ if (n.length < 1 || n.length > 4)
3981
+ return null;
3982
+ return n;
3983
+ }
3984
+ catch {
3985
+ return null;
3986
+ }
3987
+ }
3988
+ // 0.8.3-rc.1 — GitHub challenge clear-poll. Mirrors
3989
+ // waitForGoogleChallenge: poll every 3s for the page to transition
3990
+ // off the verify-it's-you state. When the URL leaves the GitHub
3991
+ // session/device-verification routes AND the page text no longer
3992
+ // matches the challenge phrasing, the operator's phone-tap has
3993
+ // cleared the gate and we return true.
3994
+ async waitForGitHubChallenge(steps) {
3995
+ // Match the broader CHALLENGE_PHRASING_RE the auth-state classifier
3996
+ // uses, plus GitHub-specific URL paths. Keeping these regexes
3997
+ // aligned with the outer classifier prevents the wait from
3998
+ // returning "cleared" while the classifier still calls the state
3999
+ // "challenge" (which would cause re-entry into this branch).
4000
+ const CHALLENGE_PATH = /\/(?:sessions\/(?:two-factor|verify-?device|device)|account_verifications|users\/verify_device)/i;
4001
+ const CHALLENGE_BODY = /\b(?:device verification|security challenge|2[- ]?step|2fa|number(?: match)?(?: on (?:your |the )?(?:phone|screen|device))?|tap \d+|tap the number|confirm.{0,15}sign[- ]?in|verify it'?s you)\b/i;
4002
+ const deadlineMs = 4 * 60 * 1000;
4003
+ const deadline = Date.now() + deadlineMs;
4004
+ // Require N consecutive "not on challenge" samples before
4005
+ // declaring cleared. Brief mid-redirect blanks aren't enough —
4006
+ // the OAuth /authorize page transition is usually one second.
4007
+ const CONSECUTIVE_CLEAR_TARGET = 3;
4008
+ let consecutiveClear = 0;
4009
+ while (Date.now() < deadline) {
4010
+ await this.browser.wait(3);
4011
+ if (this.browser.oauthPageClosed())
4012
+ return true;
4013
+ const url = this.browser.currentUrl();
4014
+ let body;
4015
+ try {
4016
+ body = (await this.browser.extractText()).slice(0, 4000);
4017
+ }
4018
+ catch {
4019
+ consecutiveClear = 0;
4020
+ continue;
4021
+ }
4022
+ const stillChallenged = CHALLENGE_PATH.test(url) || CHALLENGE_BODY.test(body);
4023
+ if (stillChallenged) {
4024
+ consecutiveClear = 0;
4025
+ }
4026
+ else {
4027
+ consecutiveClear += 1;
4028
+ if (consecutiveClear >= CONSECUTIVE_CLEAR_TARGET) {
4029
+ return true;
4030
+ }
4031
+ }
4032
+ }
4033
+ steps.push("GitHub: phone-tap challenge wait timed out after 4 minutes");
3695
4034
  return false;
3696
4035
  }
3697
4036
  // Backstop for the critical guarantee (D4): true when the active
@@ -3829,21 +4168,56 @@ ${formatInventory(input.inventory)}`,
3829
4168
  this.heightenedAuthFired = true;
3830
4169
  const msg = digit !== null
3831
4170
  ? `Google challenge detected mid-post-verify: tap ${digit} on your phone — 2 minute window`
3832
- : `Google challenge detected mid-post-verify (number extractor missed it — read the planner reason): ${reason.slice(0, 200)}`;
4171
+ : `Google challenge detected mid-post-verify (number extractor missed it — vision LLM will read the screen): ${reason.slice(0, 200)}`;
3833
4172
  console.error(`[universal-bot] ${msg}`);
3834
4173
  steps.push(`Post-verify: ${msg}`);
3835
4174
  void notifyHeightenedAuth({
3836
4175
  service,
3837
4176
  digit,
3838
- windowSeconds: 120,
4177
+ windowSeconds: 240,
3839
4178
  machineToken: this.currentMachineToken,
3840
4179
  apiBase: this.currentApiBase,
3841
4180
  });
3842
4181
  void sendTelegramHeightenedAuth({
3843
4182
  service,
3844
4183
  digit,
3845
- windowSeconds: 120,
4184
+ windowSeconds: 240,
3846
4185
  });
4186
+ // 0.8.3-rc.1 — vision-LLM fallback for the mid-post-verify path.
4187
+ // When the planner's reason names a challenge but no digit, take
4188
+ // a screenshot, ask Claude vision what number is on screen, and
4189
+ // fire a SECOND Telegram with the extracted number. The first
4190
+ // notification went out immediately (so the operator knows to
4191
+ // grab their phone); this follows up with the number as soon as
4192
+ // vision returns (~2-5s).
4193
+ if (digit === null) {
4194
+ void (async () => {
4195
+ try {
4196
+ const visionDigit = await this.extractGoogleNumberViaVision();
4197
+ if (visionDigit !== null) {
4198
+ const followUp = `Google challenge mid-post-verify: vision LLM read "${visionDigit}" from the screen — tap that on your phone.`;
4199
+ console.error(`[universal-bot] ${followUp}`);
4200
+ steps.push(`Post-verify: ${followUp}`);
4201
+ void sendTelegramHeightenedAuth({
4202
+ service,
4203
+ digit: visionDigit,
4204
+ windowSeconds: 240,
4205
+ });
4206
+ void notifyHeightenedAuth({
4207
+ service,
4208
+ digit: visionDigit,
4209
+ windowSeconds: 240,
4210
+ machineToken: this.currentMachineToken,
4211
+ apiBase: this.currentApiBase,
4212
+ });
4213
+ }
4214
+ }
4215
+ catch {
4216
+ // best-effort — the original alert already fired with no
4217
+ // digit; vision is a nice-to-have follow-up.
4218
+ }
4219
+ })();
4220
+ }
3847
4221
  return true;
3848
4222
  }
3849
4223
  // stop when Claude says "done" or when we extract a credential.
@@ -4036,6 +4410,10 @@ ${formatInventory(input.inventory)}`,
4036
4410
  let stuckFiresAtUrl = 0;
4037
4411
  let lastStuckFireUrl = null;
4038
4412
  const triedFallbackUrls = new Set();
4413
+ // 0.8.3-rc.1 — per-URL set of wizard-forward escalations attempted.
4414
+ // Used so we only force-click the visible Next/Submit once per page
4415
+ // state; if it didn't unstick, fall through to URL fallbacks.
4416
+ const triedWizardForward = new Set();
4039
4417
  // 0.8.1 — capture chain index is independent of the planner loop
4040
4418
  // round. The loop has two early-`continue` paths (page mid-navigation
4041
4419
  // throw, planner-rejection re-plan) that increment `round` WITHOUT
@@ -4090,6 +4468,16 @@ ${formatInventory(input.inventory)}`,
4090
4468
  (credentials.api_key !== undefined || credentials.username !== undefined) &&
4091
4469
  !haveOnlySeedCredentials) {
4092
4470
  args.steps.push(`Post-verify: credentials found on round ${round}.`);
4471
+ // 0.8.3-rc.1 — fast-path synthetic capture. When the bot lands
4472
+ // on a page whose pre-loop extractCredentials() already found
4473
+ // the credential (the "fast path" — perplexity-class), the
4474
+ // loop returns here BEFORE any round was captured. Auto-
4475
+ // promote then sees no captures and skips synthesis, so the
4476
+ // next user has no replayable skill. Emit a single
4477
+ // synthetic-extract round so the synthesizer can produce a
4478
+ // minimal-but-correct "navigate + extract" skill. Best-effort
4479
+ // — capture failure must not block returning the credential.
4480
+ await this.writeFastPathSyntheticCapture(args.service, capturedRound, oauth);
4093
4481
  return credentials;
4094
4482
  }
4095
4483
  if (inMultiCredMode &&
@@ -4099,6 +4487,7 @@ ${formatInventory(input.inventory)}`,
4099
4487
  .filter((k) => !NON_CREDENTIAL_KEYS.has(k))
4100
4488
  .join(", ");
4101
4489
  args.steps.push(`Post-verify: multi-cred bundle stable for ${roundsSinceLastNewCredential} rounds — returning what we have (${summary}).`);
4490
+ await this.writeFastPathSyntheticCapture(args.service, capturedRound, oauth);
4102
4491
  return credentials;
4103
4492
  }
4104
4493
  // Settle the page first — the previous round's click may have
@@ -4443,6 +4832,35 @@ ${formatInventory(input.inventory)}`,
4443
4832
  // best-effort — fall through to the regular fallback path
4444
4833
  // if the page-text read failed.
4445
4834
  }
4835
+ // 0.8.3-rc.1 — wizard-forward auto-escalation. Before
4836
+ // jumping to URL fallbacks, check whether a Next/Continue/
4837
+ // Submit/Done button is visible in the inventory. The
4838
+ // planner sometimes keeps re-clicking the JUST-SELECTED
4839
+ // option ("Individual Contributor" card on Mixpanel's role
4840
+ // step) without realising it should now click Submit to
4841
+ // advance. Force-click the wizard-forward button once; if
4842
+ // it actually moves the page, the next iteration's
4843
+ // inventory-change check resets the stuck counter and the
4844
+ // bot continues normally.
4845
+ const WIZARD_FORWARD = /^\s*(?:next|continue|submit|finish|done|get\s+started)\s*$/i;
4846
+ const wizardBtn = inventory.find((e) => (e.tag === "button" || e.role === "button") &&
4847
+ WIZARD_FORWARD.test((e.visibleText ?? e.ariaLabel ?? "").trim()) &&
4848
+ e.visible === true);
4849
+ if (wizardBtn !== undefined && !triedWizardForward.has(state.url)) {
4850
+ triedWizardForward.add(state.url);
4851
+ args.steps.push(`Post-verify: stuck-loop ${stuckFiresAtUrl}x at ${state.url} — wizard-forward escalation: clicking ${JSON.stringify(wizardBtn.visibleText ?? wizardBtn.ariaLabel)}.`);
4852
+ try {
4853
+ await this.browser.click(wizardBtn.selector);
4854
+ await this.browser.wait(2);
4855
+ }
4856
+ catch (err) {
4857
+ args.steps.push(`Post-verify: wizard-forward click failed (${err instanceof Error ? err.message : String(err)}) — falling through to URL fallback.`);
4858
+ }
4859
+ prevSignature = null;
4860
+ prevInventorySize = -1;
4861
+ hint = undefined;
4862
+ continue;
4863
+ }
4446
4864
  const fallback = pickStuckLoopFallbackUrl(state.url, triedFallbackUrls);
4447
4865
  if (fallback !== null) {
4448
4866
  triedFallbackUrls.add(fallback);
@@ -4998,6 +5416,43 @@ ${formatInventory(input.inventory)}`,
4998
5416
  }
4999
5417
  return credentials;
5000
5418
  }
5419
+ // 0.8.3-rc.1 — write a single synthetic extract round when the
5420
+ // post-verify loop is about to fast-path exit (pre-loop extraction
5421
+ // already produced credentials, so no planner-driven round runs).
5422
+ // Without this, auto-promote sees no captures from the run and
5423
+ // skips publishing a skill — perplexity's "fast path" case.
5424
+ //
5425
+ // The synthesized capture pairs the CURRENT browser state (URL +
5426
+ // inventory) with a synthetic `extract` observation. The
5427
+ // synthesizer's stage-1 step builder will prepend a `navigate` to
5428
+ // the current URL and translate the synthetic round into an
5429
+ // `extract_via_*` step, giving the registry a minimal but correct
5430
+ // replay skill ("navigate to the credential page, extract").
5431
+ //
5432
+ // Best-effort: a capture failure here must NEVER block returning
5433
+ // the credential we already have.
5434
+ async writeFastPathSyntheticCapture(service, capturedRound, oauth) {
5435
+ try {
5436
+ const [state, inventory] = await Promise.all([
5437
+ this.browser.getState(),
5438
+ this.buildInventory([], undefined, 80),
5439
+ ]);
5440
+ captureOnboardingRound({
5441
+ service,
5442
+ round: capturedRound,
5443
+ oauth,
5444
+ state,
5445
+ inventory,
5446
+ observed: {
5447
+ kind: "extract",
5448
+ reason: "fast-path synthetic extract — credentials were already on the page before any planner round ran",
5449
+ },
5450
+ });
5451
+ }
5452
+ catch {
5453
+ // best-effort
5454
+ }
5455
+ }
5001
5456
  // Sign in with the credentials created during signup, so the
5002
5457
  // post-verify flow can reach the authenticated dashboard (SendPulse:
5003
5458
  // confirming the email doesn't establish a session — the API-key