@trusty-squire/mcp 0.6.14-rc.8 → 0.6.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/dist/bot/agent.d.ts +19 -1
  2. package/dist/bot/agent.d.ts.map +1 -1
  3. package/dist/bot/agent.js +453 -50
  4. package/dist/bot/agent.js.map +1 -1
  5. package/dist/bot/browser.d.ts +8 -0
  6. package/dist/bot/browser.d.ts.map +1 -1
  7. package/dist/bot/browser.js +395 -41
  8. package/dist/bot/browser.js.map +1 -1
  9. package/dist/bot/index.d.ts +3 -2
  10. package/dist/bot/index.d.ts.map +1 -1
  11. package/dist/bot/index.js +16 -4
  12. package/dist/bot/index.js.map +1 -1
  13. package/dist/bot/oauth-providers.d.ts +2 -0
  14. package/dist/bot/oauth-providers.d.ts.map +1 -1
  15. package/dist/bot/oauth-providers.js +36 -0
  16. package/dist/bot/oauth-providers.js.map +1 -1
  17. package/dist/bot/onboarding-capture.d.ts +3 -0
  18. package/dist/bot/onboarding-capture.d.ts.map +1 -1
  19. package/dist/bot/onboarding-capture.js +70 -5
  20. package/dist/bot/onboarding-capture.js.map +1 -1
  21. package/dist/bot/promote-to-skill.d.ts +2 -1
  22. package/dist/bot/promote-to-skill.d.ts.map +1 -1
  23. package/dist/bot/promote-to-skill.js +261 -29
  24. package/dist/bot/promote-to-skill.js.map +1 -1
  25. package/dist/bot/replay-skill.d.ts +4 -0
  26. package/dist/bot/replay-skill.d.ts.map +1 -1
  27. package/dist/bot/replay-skill.js +343 -10
  28. package/dist/bot/replay-skill.js.map +1 -1
  29. package/dist/install/cli.d.ts +2 -0
  30. package/dist/install/cli.d.ts.map +1 -1
  31. package/dist/install/cli.js +48 -2
  32. package/dist/install/cli.js.map +1 -1
  33. package/dist/session.d.ts.map +1 -1
  34. package/dist/session.js +15 -5
  35. package/dist/session.js.map +1 -1
  36. package/dist/skill-cli/cli.d.ts +25 -0
  37. package/dist/skill-cli/cli.d.ts.map +1 -1
  38. package/dist/skill-cli/cli.js +558 -13
  39. package/dist/skill-cli/cli.js.map +1 -1
  40. package/dist/skill-cli/registry-http.d.ts +1 -0
  41. package/dist/skill-cli/registry-http.d.ts.map +1 -1
  42. package/dist/skill-cli/registry-http.js +3 -0
  43. package/dist/skill-cli/registry-http.js.map +1 -1
  44. package/dist/skill-cli/signing.d.ts +21 -0
  45. package/dist/skill-cli/signing.d.ts.map +1 -0
  46. package/dist/skill-cli/signing.js +71 -0
  47. package/dist/skill-cli/signing.js.map +1 -0
  48. package/dist/skill-registry-client.d.ts +2 -0
  49. package/dist/skill-registry-client.d.ts.map +1 -1
  50. package/dist/skill-registry-client.js +83 -36
  51. package/dist/skill-registry-client.js.map +1 -1
  52. package/dist/tools/provision-any.d.ts +7 -0
  53. package/dist/tools/provision-any.d.ts.map +1 -1
  54. package/dist/tools/provision-any.js +293 -45
  55. package/dist/tools/provision-any.js.map +1 -1
  56. package/package.json +2 -1
package/dist/bot/agent.js CHANGED
@@ -8,7 +8,7 @@
8
8
  // executor; the prompt is the contract. If a service breaks we tweak the
9
9
  // prompt rather than threading service-specific logic through the agent.
10
10
  import { rankAndCapInventory, scoreSignupButton } from "./browser.js";
11
- import { OAUTH_PROVIDERS, extractOAuthScopes, } from "./oauth-providers.js";
11
+ import { OAUTH_PROVIDERS, extractOAuthScopes, isGitHubDismissible2faSetup, GITHUB_DISMISSIBLE_2FA_SKIP_TEXT, } from "./oauth-providers.js";
12
12
  import { extractGoogleNumberMatch, scrapeGoogleScopePhrases } from "./google-login.js";
13
13
  import { loggedInProviders, clearProviderLoggedIn } from "./login-state.js";
14
14
  import { saveDebugSnapshot } from "./debug.js";
@@ -59,19 +59,45 @@ const VERIFICATION_PROBE_SECONDS = 45;
59
59
  // without a key and the page reads like this, the run ends
60
60
  // `onboarding_blocked` rather than grep-looping a wall it cannot
61
61
  // satisfy (the S3-class trap named in the plan's failure modes).
62
+ //
63
+ // rc.27 — patterns are regexes (not substrings) so word boundaries
64
+ // hold. `isAtPaywall` also rejects matches preceded by a negator
65
+ // ("no", "without", "doesn't require", …) so a free-plan blurb like
66
+ // "No credit card required, no hidden fees" — the exact phrase that
67
+ // false-positively halted the IPInfo run on rc.26 — no longer
68
+ // triggers a paywall verdict.
62
69
  const ONBOARDING_PAYWALL_PATTERNS = [
63
- "add a payment method",
64
- "add a credit card",
65
- "add credit card",
66
- "payment method required",
67
- "a payment method is required",
68
- "credit card required",
69
- "enter your card",
70
- "enter your payment",
71
- "enter payment details",
72
- "upgrade your plan to",
73
- "start your paid plan",
70
+ /\badd\s+a\s+payment\s+method\b/i,
71
+ /\badd\s+(?:a\s+)?credit\s+card\b/i,
72
+ /\bpayment\s+method\s+(?:is\s+)?required\b/i,
73
+ /\bcredit\s+card\s+required\b/i,
74
+ /\benter\s+your\s+card\b/i,
75
+ /\benter\s+your\s+payment\b/i,
76
+ /\benter\s+payment\s+details\b/i,
77
+ /\bupgrade\s+your\s+plan\s+to\b/i,
78
+ /\bstart\s+your\s+paid\s+plan\b/i,
74
79
  ];
80
+ // Negators that, if they appear in the ~30 characters immediately
81
+ // before a paywall pattern match, flip its meaning from a demand
82
+ // to a marketing reassurance. "No", "without", "doesn't require",
83
+ // "don't need", "isn't".
84
+ const PAYWALL_NEGATION_PREFIX = /\b(?:no|without|doesn'?t\s+(?:need|require)|don'?t\s+(?:need|require)|isn'?t)\s+$/i;
85
+ // Exported for unit testing — the post-OAuth heuristic distinguishing
86
+ // "the dashboard is asking for a card before issuing a key" from "the
87
+ // dashboard happens to mention cards on a marketing tile".
88
+ export function isAtPaywall(text) {
89
+ for (const pattern of ONBOARDING_PAYWALL_PATTERNS) {
90
+ const m = pattern.exec(text);
91
+ if (m === null)
92
+ continue;
93
+ const start = Math.max(0, m.index - 30);
94
+ const prefix = text.slice(start, m.index);
95
+ if (PAYWALL_NEGATION_PREFIX.test(prefix))
96
+ continue;
97
+ return true;
98
+ }
99
+ return false;
100
+ }
75
101
  // S3: does this post-submit page text indicate the service genuinely
76
102
  // expects the user to confirm via email? Drives whether the bot polls the
77
103
  // full verification timeout or runs only a short probe. Exported so the
@@ -326,6 +352,39 @@ export function formatInventory(inventory) {
326
352
  ? `value="" (EMPTY — fill before submitting)`
327
353
  : `value=${JSON.stringify(e.value.slice(0, 60))}`);
328
354
  }
355
+ // <select> state. `value=""` is the React-defaulted-placeholder
356
+ // pattern (the first option's value is empty, common for
357
+ // "No workspace" / "Select…" / "Choose…" prompts). React Hook
358
+ // Form treats those fields as untouched and silently rejects
359
+ // submits — Railway's token-creation form was the canonical
360
+ // case. The planner needs the selected text and the option
361
+ // list to issue an explicit `select` step before clicking
362
+ // submit. Selectors run to end-of-line, so this annotation goes
363
+ // BEFORE the trailing `selector=`.
364
+ //
365
+ // rc.17: suppress the DEFAULTED marker for selects we've already
366
+ // selected (data-ts-touched). A successful selectOption to a
367
+ // value="" option leaves value=="" but the form-state is
368
+ // committed — without this suppression the planner would see
369
+ // DEFAULTED again next round and re-select indefinitely.
370
+ if (e.tag === "select") {
371
+ const selectedText = e.selectedOptionText ?? "";
372
+ const isDefaulted = e.value !== null && e.value !== undefined && e.value.length === 0;
373
+ const alreadyTouched = e.interactedThisRun === true;
374
+ bits.push(isDefaulted && !alreadyTouched
375
+ ? `value="" selected=${JSON.stringify(selectedText)} (DEFAULTED — pick an explicit option before submitting)`
376
+ : `value=${JSON.stringify((e.value ?? "").slice(0, 60))} selected=${JSON.stringify(selectedText)}${alreadyTouched ? " (touched — already selected by bot)" : ""}`);
377
+ if (e.selectOptions !== null && e.selectOptions !== undefined && e.selectOptions.length > 0) {
378
+ const optionTexts = e.selectOptions
379
+ .map((o) => o.text || `(value=${JSON.stringify(o.value)})`)
380
+ .filter((t) => t.length > 0)
381
+ .slice(0, 6)
382
+ .map((t) => JSON.stringify(t))
383
+ .join(", ");
384
+ if (optionTexts.length > 0)
385
+ bits.push(`options=[${optionTexts}]`);
386
+ }
387
+ }
329
388
  const label = e.labelText ?? e.ariaLabel;
330
389
  if (label !== null && label !== undefined) {
331
390
  bits.push(`label=${JSON.stringify(label)}`);
@@ -477,24 +536,80 @@ export function detectAntiBotBlock(html) {
477
536
  return "Imperva";
478
537
  return null;
479
538
  }
480
- // F17 — True when the inventory looks like an authenticated
481
- // dashboard rather than a sign-up page. Triggers when a prior OAuth
482
- // bind already linked the account and the service auto-redirects
483
- // past the sign-in widget on the next visit. Detection signals:
484
- // - At least one element whose visible text matches an
485
- // authenticated-state keyword (Sign out / Log out / Dashboard /
486
- // Projects / Settings / Profile / Account)
487
- // - No email/password input fields visible (a true sign-up page
488
- // virtually always has at least one)
489
- // Conservative — both conditions must hold.
490
- export function detectAlreadySignedIn(inventory) {
491
- const AUTH_KEYWORDS = /^\s*(?:sign out|log out|dashboard|projects|settings|profile|my account|account settings|workspaces)\s*$/i;
492
- const hasAuthMarker = inventory.some((e) => AUTH_KEYWORDS.test((e.visibleText ?? e.ariaLabel ?? "").trim()));
493
- if (!hasAuthMarker)
494
- return false;
539
+ // F17 — True when the page looks like an authenticated dashboard
540
+ // rather than a sign-up page. Triggers when a prior OAuth bind
541
+ // already linked the account and the service auto-redirects past
542
+ // the sign-in widget on the next visit.
543
+ //
544
+ // **Universal precondition**: no email/password/tel input visible.
545
+ // A true sign-up page virtually always has at least one; if any
546
+ // such input is present, we are NOT authenticated regardless of
547
+ // what other markers the page carries.
548
+ //
549
+ // **Positive signals (any one fires authentication)**:
550
+ // 1. Explicit nav keyword (Sign out / Log out / Dashboard /
551
+ // Projects / Settings / Profile / Account / Workspaces)
552
+ // the canonical strict-match path. Works for Sentry,
553
+ // OpenRouter, Postmark, etc. — sites with a real nav bar.
554
+ // 2. Billing / trial widget visible ("$X.XX left", "N days left",
555
+ // "Trial") — these only render to authenticated users. Caught
556
+ // Railway's `/new` page where the only post-login marker was
557
+ // the "28 days or $5.00 leftTrial" button.
558
+ // 3. Dashboard-route URL (path contains /new, /dashboard,
559
+ // /projects, /account, /settings, /workspace) AND a creation
560
+ // CTA visible ("New project", "Create", "New <X>") — paired
561
+ // signal that catches sparse SPAs whose entire layout is a
562
+ // single create-form on a logged-in URL.
563
+ //
564
+ // rc.18: signals 2 and 3 added. Previously only signal 1 was
565
+ // checked; Railway's project-creation widget tripped the form-fill
566
+ // fallback (and a low-confidence LLM plan that filled "Empty
567
+ // Project" then waited for a verification email that never came).
568
+ export function detectAlreadySignedIn(args) {
569
+ const { inventory, url } = args;
570
+ // Precondition: any visible credential input → not authenticated.
495
571
  const hasCredentialInput = inventory.some((e) => e.tag === "input" &&
496
572
  (e.type === "email" || e.type === "password" || e.type === "tel"));
497
- return !hasCredentialInput;
573
+ if (hasCredentialInput)
574
+ return false;
575
+ const visibleTextOf = (e) => `${e.visibleText ?? ""} ${e.ariaLabel ?? ""}`.trim();
576
+ // Signal 1 — strict nav-keyword match (the canonical Sentry-class case).
577
+ const AUTH_KEYWORDS = /^\s*(?:sign out|log out|dashboard|projects|settings|profile|my account|account settings|workspaces)\s*$/i;
578
+ if (inventory.some((e) => AUTH_KEYWORDS.test((e.visibleText ?? e.ariaLabel ?? "").trim()))) {
579
+ return true;
580
+ }
581
+ // Signal 2 — billing / trial widget. Patterns observed in the wild:
582
+ // "28 days or $5.00 leftTrial" (Railway, no separator)
583
+ // "Trial" (most SaaS)
584
+ // "$N left" / "N days left" / "remaining"
585
+ const BILLING = /(?:\$\d+(?:\.\d+)?\s*(?:left|remaining)|\d+\s*days?\s*(?:left|remaining|trial)|\btrial\b)/i;
586
+ if (inventory.some((e) => BILLING.test(visibleTextOf(e)))) {
587
+ return true;
588
+ }
589
+ // Signal 3 — dashboard-route URL + creation CTA visible.
590
+ // The URL gate is conservative: a path that READS as dashboard,
591
+ // not /login or /signup or /. Combined with a creation CTA
592
+ // ("New project", "Create workspace", "+ New") it pins the
593
+ // page as a post-login surface.
594
+ let dashboardyPath = false;
595
+ try {
596
+ const parsed = new URL(url);
597
+ dashboardyPath =
598
+ /\/(?:new|dashboard|projects?|account|settings|workspace|home)(?:\/|$)/i.test(parsed.pathname) && !/\/(?:signup|sign-up|register|login|sign-in|signin)/i.test(parsed.pathname);
599
+ }
600
+ catch {
601
+ // Malformed URL — skip URL signal.
602
+ }
603
+ if (dashboardyPath) {
604
+ const CREATION_CTA = /^\s*(?:\+\s*)?(?:new\s+(?:project|workspace|team|app|site|deployment|api\s*key)|create(?:\s+(?:new|a|project|workspace))?)/i;
605
+ if (inventory.some((e) => {
606
+ const t = e.visibleText ?? e.ariaLabel ?? "";
607
+ return CREATION_CTA.test(t.trim());
608
+ })) {
609
+ return true;
610
+ }
611
+ }
612
+ return false;
498
613
  }
499
614
  // True when the page has no fillable text input AND no button that
500
615
  // reads as an email-signup option — a genuinely OAuth/SSO-only
@@ -733,6 +848,33 @@ export function isTruncatedCapture(sourceText, capturedKey) {
733
848
  // help text.
734
849
  return /^\s*(?:\.{3,}|…)/.test(after);
735
850
  }
851
+ // rc.28 — when the regex library doesn't recognize the credential
852
+ // shape (e.g. IPInfo's 14-char hex token has no service-prefix and
853
+ // no nearby "API key" label, so extractApiKeyFromText returns null),
854
+ // the Claude vision planner often still quotes the value in its
855
+ // `extract` step reason — e.g. "The API token 'fd3afcbe09648c' is
856
+ // fully visible on the dashboard under 'API Access'". This pulls
857
+ // quoted credential-shaped substrings from the reason, then keeps
858
+ // only those that appear verbatim in the page text — the
859
+ // verbatim-in-DOM check is the guardrail against accepting a
860
+ // hallucinated value. Exported for unit testing.
861
+ export function extractQuotedTokenFromReason(reason, pageText) {
862
+ // Single/double/back quotes around a credential-shaped value.
863
+ // Min 10 chars filters out short UI words ("Yes", "Copy"); max 80
864
+ // is the same ceiling extractApiKeyFromText effectively uses via
865
+ // its MAX_CREDENTIAL_LENGTH counterpart. Character class matches
866
+ // what real API tokens look like: alphanumeric, underscores,
867
+ // hyphens; no spaces, no punctuation that would gather UI text.
868
+ const matches = reason.matchAll(/['"`]([A-Za-z0-9_\-]{10,80})['"`]/g);
869
+ for (const m of matches) {
870
+ const candidate = m[1];
871
+ if (candidate === undefined)
872
+ continue;
873
+ if (pageText.includes(candidate))
874
+ return candidate;
875
+ }
876
+ return null;
877
+ }
736
878
  export function extractApiKeyFromText(text) {
737
879
  const prefixed = [
738
880
  /\bre_[a-zA-Z0-9_]{20,}\b/, // Resend (key body contains underscores)
@@ -886,6 +1028,17 @@ export class SignupAgent {
886
1028
  return { found: false, solved: false, blocked: false, kind: "turnstile" };
887
1029
  }
888
1030
  steps.push(`${label} captcha (${result.kind}): ${result.solved ? "solved" : "NOT solved (timeout)"}`);
1031
+ // rc.32 — forensic snapshot after the captcha attempt. Without
1032
+ // this, the only snapshot near the captcha is the pre-fill one
1033
+ // taken BEFORE the click, so when a Turnstile fails to solve we
1034
+ // can't tell whether (a) the bot's click didn't register (widget
1035
+ // remains in initial state), (b) the click registered but
1036
+ // Cloudflare immediately rejected it (red X / re-challenge), or
1037
+ // (c) the click registered and a challenge grid rendered that
1038
+ // we can't solve. Each path takes a different fix. Solved runs
1039
+ // also save the snapshot — green-checkmark state is useful
1040
+ // forensic data for tuning the success-detection regex.
1041
+ await saveDebugSnapshot(this.browser, `captcha-after-${result.solved ? "solved" : "timeout"}`);
889
1042
  // Classify the widget for spike telemetry — a pure read, after the
890
1043
  // solve attempt so the challenge grid (if any) has had time to render.
891
1044
  const detected = await this.browser.detectCaptchaVariant();
@@ -958,6 +1111,14 @@ export class SignupAgent {
958
1111
  let emptyPlans = 0;
959
1112
  let oauthScanRetries = 0;
960
1113
  let hint;
1114
+ // rc.31 — once the bot has explicitly clicked an email-flow
1115
+ // button (e.g. Railway's "Log in using email" two-stage chooser),
1116
+ // stay on the email path. Without this, the auto-OAuth-first
1117
+ // detection on the *next* iteration sees the now-revealed
1118
+ // "Continue with Google" button and reroutes — exactly the
1119
+ // regression that produced the Security Code challenge on
1120
+ // methoxine's account during the rc.30 Railway run.
1121
+ let committedToEmailPath = false;
961
1122
  const oauthCandidates = await this.resolveOAuthCandidates(task, steps);
962
1123
  for (;;) {
963
1124
  await this.browser.waitForFormReady();
@@ -984,7 +1145,13 @@ export class SignupAgent {
984
1145
  // provider when one was requested, else every provider the profile
985
1146
  // has a session for. Absent any affordance, fall through to
986
1147
  // form-fill.
987
- if (oauthCandidates.length > 0) {
1148
+ //
1149
+ // rc.31 — skip the OAuth-first scan when we've already committed
1150
+ // to the email path on a previous round. Otherwise a two-stage
1151
+ // chooser ("Log in using email" → reveals a page with both an
1152
+ // email input AND a Google button) reroutes us back to OAuth on
1153
+ // the second round.
1154
+ if (oauthCandidates.length > 0 && !committedToEmailPath) {
988
1155
  const hit = findFirstOAuthButton(inventory, oauthCandidates);
989
1156
  if (hit !== null) {
990
1157
  const label = OAUTH_PROVIDERS[hit.provider].label;
@@ -1014,8 +1181,8 @@ export class SignupAgent {
1014
1181
  // path entirely and route to the post-OAuth navigation loop
1015
1182
  // to find the API key — same path Sentry/OpenRouter use post-
1016
1183
  // handshake.
1017
- if (detectAlreadySignedIn(inventory)) {
1018
- steps.push("Auto-OAuth: page shows dashboard markers (Sign out / Dashboard / etc.) — " +
1184
+ if (detectAlreadySignedIn({ inventory, url: state.url })) {
1185
+ steps.push("Auto-OAuth: page shows authenticated-state markers (nav keyword, billing widget, or dashboard URL + create CTA) — " +
1019
1186
  "treating as already authenticated, jumping to post-verify navigation");
1020
1187
  return { kind: "already_oauth" };
1021
1188
  }
@@ -1103,6 +1270,23 @@ export class SignupAgent {
1103
1270
  continue;
1104
1271
  }
1105
1272
  await this.executePlan(plan, fillValues, steps, bySelector);
1273
+ // rc.31 — flag the email-path commitment once we've executed a
1274
+ // click whose reason explicitly targets an "email" affordance
1275
+ // (Railway's "Log in using email", Vercel's "Continue with
1276
+ // email", etc.). Subsequent OAuth-first scans will then be
1277
+ // suppressed so we don't reroute back to Google/GitHub on the
1278
+ // revealed page (the rc.30 Railway regression: clicking the
1279
+ // email button revealed a page with BOTH an email input AND a
1280
+ // Google button; without this flag the bot picks Google and
1281
+ // triggers the Security Code challenge that methoxine can't
1282
+ // navigate). One-way flag — once we're on email, we stay.
1283
+ if (!committedToEmailPath) {
1284
+ const emailClick = plan.actions.find((a) => a.kind === "click" && /\bemail\b/i.test(a.reason));
1285
+ if (emailClick !== undefined) {
1286
+ committedToEmailPath = true;
1287
+ steps.push("Committed to email-fill path — auto-OAuth-first scan suppressed for the rest of this signup");
1288
+ }
1289
+ }
1106
1290
  // A plan with no fill actions either revealed/advanced the page
1107
1291
  // (a cookie banner, a two-stage "sign up with email" chooser) —
1108
1292
  // worth a re-plan — or found nothing actionable at all. A
@@ -1321,6 +1505,10 @@ export class SignupAgent {
1321
1505
  // diagnosed without users needing to configure debug env vars.
1322
1506
  // Wired from the MCP layer; undefined in unit-test contexts.
1323
1507
  extractFailureUploader;
1508
+ // Per-round telemetry uploader (0.6.14-rc.11). Fires on every post-
1509
+ // verify round so the registry has the full DOM + screenshot trail
1510
+ // for any stuck signup, not just the ones that fail at extract.
1511
+ roundUploader;
1324
1512
  // Set per-task in signup(). Lets the uploader know which service
1325
1513
  // was being provisioned without threading it through every call.
1326
1514
  currentService = "";
@@ -1342,6 +1530,9 @@ export class SignupAgent {
1342
1530
  if (opts.extractFailureUploader !== undefined) {
1343
1531
  this.extractFailureUploader = opts.extractFailureUploader;
1344
1532
  }
1533
+ if (opts.roundUploader !== undefined) {
1534
+ this.roundUploader = opts.roundUploader;
1535
+ }
1345
1536
  }
1346
1537
  // Read-only view of how many calls landed on which backend. Exported
1347
1538
  // through SignupResult.llm_backends so tests and ops can verify the
@@ -1815,6 +2006,27 @@ export class SignupAgent {
1815
2006
  const loginCmd = provider.id === "github"
1816
2007
  ? "npx @trusty-squire/mcp login --provider=github"
1817
2008
  : "npx @trusty-squire/mcp login";
2009
+ // rc.22 — OpenRouter (Clerk) renders a visible Cloudflare Turnstile
2010
+ // checkbox at the bottom of the same form as the OAuth buttons.
2011
+ // Clerk's Google button stops at a loading spinner if Turnstile
2012
+ // hasn't been completed — the OAuth click never redirects, the bot
2013
+ // sees URL unchanged and times out. clickSubmit handles this for
2014
+ // form-submit paths, but OAuth-first bypasses clickSubmit. Run the
2015
+ // tier-2 solver here too. Best-effort: a missing widget no-ops, a
2016
+ // failed solve still proceeds (the click may still work for some
2017
+ // services that don't gate OAuth on Turnstile).
2018
+ try {
2019
+ const captcha = await this.browser.solveVisibleCaptcha(20_000);
2020
+ if (captcha.found) {
2021
+ steps.push(captcha.solved
2022
+ ? `OAuth: ticked the visible ${captcha.kind ?? "captcha"} checkbox before clicking the ${provider.label} affordance`
2023
+ : `OAuth: visible ${captcha.kind ?? "captcha"} present but did not solve in 20s — clicking the ${provider.label} affordance anyway`);
2024
+ }
2025
+ }
2026
+ catch (err) {
2027
+ // Solver is best-effort; never block OAuth on its failure.
2028
+ steps.push(`OAuth: visible-captcha precheck failed (non-fatal): ${err instanceof Error ? err.message : String(err)}`);
2029
+ }
1818
2030
  steps.push(`OAuth: clicking the ${provider.label} sign-in affordance`);
1819
2031
  await this.browser.startOAuth(oauthSelector);
1820
2032
  await this.browser.wait(3);
@@ -1848,26 +2060,71 @@ export class SignupAgent {
1848
2060
  if (authState === "not_provider")
1849
2061
  break; // flow left the provider — back on the service
1850
2062
  if (authState === "challenge") {
1851
- // Google's number-match challenge ("Tap N on your phone") is
1852
- // resolvable by the user without re-running the login flow —
1853
- // surface the number and wait for them to complete it.
2063
+ // rc.26 always capture forensic state at the moment the
2064
+ // challenge is detected. Before this, snapshots fired only at
2065
+ // before-fill / oauth-after-click / oauth-post-consent none
2066
+ // covered the challenge page itself. When
2067
+ // extractGoogleNumberMatch's patterns don't match the current
2068
+ // Google phrasing, this is the only artifact the user can read
2069
+ // to find the number to tap.
2070
+ await saveDebugSnapshot(this.browser, "google-challenge");
1854
2071
  if (provider.id === "google") {
1855
2072
  const matchNum = extractGoogleNumberMatch(body);
1856
2073
  if (matchNum !== null) {
2074
+ // rc.26 — surface in real-time via stderr as well as the
2075
+ // step trail. The step trail only renders after the run
2076
+ // ends; stderr lands in the harvester output immediately,
2077
+ // inside the 2-minute window the user has to react.
2078
+ console.error(`[universal-bot] GOOGLE NUMBER-MATCH: tap "${matchNum}" on your phone — 2 minute window`);
1857
2079
  steps.push(`Google: match the number ${matchNum} on your phone — ` +
1858
2080
  `open the Google app on your phone and tap ${matchNum}`);
1859
- const cleared = await this.waitForGoogleChallenge(provider, steps);
1860
- if (!cleared) {
1861
- return this.oauthAbort("needs_login", `Google number-match challenge timed out after 2 minutes. ` +
1862
- `Re-run \`${loginCmd}\`, complete the challenge in the window, then retry.`, steps);
1863
- }
1864
- steps.push("Google: challenge cleared — continuing OAuth");
1865
- // Re-classify on the next iteration without burning the
1866
- // OAuth-navigation budget (which assumes continuous
1867
- // browser progress, not a 2-minute human pause).
2081
+ }
2082
+ else {
2083
+ // Extractor missed the number Google phrasing has
2084
+ // drifted again. Surface a banner so the user knows to
2085
+ // check the just-saved snapshot before the 2-minute wait
2086
+ // expires.
2087
+ console.error(`[universal-bot] GOOGLE CHALLENGE detected (number-match phrasing not recognized) ` +
2088
+ `read the most recent google-challenge.png in the debug dir to find the number — 2 minute window`);
2089
+ steps.push("Google: challenge detected, number-match extractor missed it. " +
2090
+ "See the latest google-challenge snapshot in the debug dir to read the number.");
2091
+ }
2092
+ // Either way (number found or not), the user can still
2093
+ // clear the challenge in the bot's browser window or by
2094
+ // tapping on their phone. Wait the full 2 minutes.
2095
+ const cleared = await this.waitForGoogleChallenge(provider, steps);
2096
+ if (!cleared) {
2097
+ return this.oauthAbort("needs_login", `Google challenge timed out after 2 minutes. ` +
2098
+ `Re-run \`${loginCmd}\`, complete the challenge in the window, then retry.`, steps);
2099
+ }
2100
+ steps.push("Google: challenge cleared — continuing OAuth");
2101
+ // Re-classify on the next iteration without burning the
2102
+ // OAuth-navigation budget (which assumes continuous
2103
+ // browser progress, not a 2-minute human pause).
2104
+ i--;
2105
+ continue;
2106
+ }
2107
+ // rc.34 — GitHub 2FA sanity-check page is dismissible. When
2108
+ // the user recently (re)configured 2FA, GitHub injects a
2109
+ // "Verify your two-factor authentication (2FA) settings"
2110
+ // overlay on the OAuth /authorize URL with a literal
2111
+ // "skip 2FA verification at this moment" link. That's a
2112
+ // non-blocking nag, not a real challenge — clicking skip
2113
+ // returns the user to the OAuth handshake. Detect + auto-
2114
+ // click before aborting.
2115
+ if (provider.id === "github" && isGitHubDismissible2faSetup(body)) {
2116
+ steps.push("GitHub: 2FA sanity-check overlay detected (post-setup nag, not a real challenge). " +
2117
+ "Clicking 'skip 2FA verification at this moment' to defer.");
2118
+ const clicked = await this.browser.clickLinkByText(GITHUB_DISMISSIBLE_2FA_SKIP_TEXT);
2119
+ if (clicked) {
2120
+ // Give GitHub a moment to navigate back to the consent flow.
2121
+ await this.browser.wait(2);
2122
+ // Re-classify on the next iteration; the URL + body should
2123
+ // now be the actual OAuth /authorize consent page.
1868
2124
  i--;
1869
2125
  continue;
1870
2126
  }
2127
+ steps.push("GitHub: skip-link click did not register — falling back to needs_login abort.");
1871
2128
  }
1872
2129
  return this.oauthAbort("needs_login", `${provider.label} interrupted the sign-in with a security challenge ("verify it's you"). ` +
1873
2130
  `Re-run \`${loginCmd}\`, clear the challenge in the window, then retry.`, steps);
@@ -2006,8 +2263,8 @@ export class SignupAgent {
2006
2263
  }
2007
2264
  // No API key. Distinguish a billing/card wall (onboarding_blocked)
2008
2265
  // from a generic navigation miss — never grep-loop a paid wall.
2009
- const finalText = (await this.browser.extractText().catch(() => "")).toLowerCase();
2010
- if (ONBOARDING_PAYWALL_PATTERNS.some((p) => finalText.includes(p))) {
2266
+ const finalText = await this.browser.extractText().catch(() => "");
2267
+ if (isAtPaywall(finalText)) {
2011
2268
  return {
2012
2269
  success: false,
2013
2270
  error: `onboarding_blocked: ${task.service}'s API key sits behind a billing or ` +
@@ -2283,10 +2540,12 @@ ${formatInventory(input.inventory)}`,
2283
2540
  continue;
2284
2541
  }
2285
2542
  args.steps.push(`Post-verify ${round + 1}/${args.maxRounds}: ${nextStep.kind} — ${nextStep.reason}`);
2286
- // Dev-only (env-gated): dump this round's real page state +
2287
- // inventory into the E1 eval-corpus format, so onboarding
2288
- // adapters can be iterated offline without re-running the
2289
- // rate-limited OAuth handshake.
2543
+ // Dump this round's real page state + inventory in the E1
2544
+ // eval-corpus format so onboarding adapters can be iterated
2545
+ // offline without re-running the rate-limited OAuth handshake.
2546
+ // Default-on as of 0.6.14-rc.11 — writes to
2547
+ // ~/.trusty-squire/corpus/onboarding/ unless an env override
2548
+ // points elsewhere or disables it.
2290
2549
  captureOnboardingRound({
2291
2550
  service: args.service,
2292
2551
  round,
@@ -2295,6 +2554,34 @@ ${formatInventory(input.inventory)}`,
2295
2554
  inventory,
2296
2555
  observed: nextStep,
2297
2556
  });
2557
+ // Per-round telemetry upload (rc.11). Mirrors the disk capture
2558
+ // but ships to the registry so debugging works from any host —
2559
+ // the bot may be running in Goose or a sibling agent that
2560
+ // doesn't share a filesystem with whoever's diagnosing the run.
2561
+ // Fire-and-forget; failures must never abort the loop.
2562
+ if (this.roundUploader !== undefined) {
2563
+ const observedReason = "reason" in nextStep ? nextStep.reason : "";
2564
+ void (async () => {
2565
+ try {
2566
+ await this.roundUploader({
2567
+ service: args.service,
2568
+ round,
2569
+ kind: nextStep.kind,
2570
+ url: state.url,
2571
+ title: state.title,
2572
+ inventory_count: inventory.length,
2573
+ observed_reason: observedReason,
2574
+ html: state.html,
2575
+ ...(state.screenshot !== undefined && state.screenshot.length > 0
2576
+ ? { screenshot_jpeg_base64: state.screenshot }
2577
+ : {}),
2578
+ });
2579
+ }
2580
+ catch {
2581
+ // best-effort — telemetry upload is diagnostic, never load-bearing
2582
+ }
2583
+ })();
2584
+ }
2298
2585
  // Stuck-loop detector. Re-planning steps (done/extract/login/
2299
2586
  // wait/navigate) are exempt: extract is its own progress signal,
2300
2587
  // navigate intentionally changes the URL not the current DOM,
@@ -2334,6 +2621,40 @@ ${formatInventory(input.inventory)}`,
2334
2621
  const emptyInputHint = emptyInputs.length > 0
2335
2622
  ? `\n\nVisible empty inputs on this page (any of these is a likely required field):\n${emptyInputs.join("\n")}\n\nIssue {"kind":"fill"} on one of them with a sensible value.`
2336
2623
  : "";
2624
+ // Defaulted <select>s — value="" means the first <option>
2625
+ // (typically "Select…", "No workspace", "Choose…") is still
2626
+ // showing. React Hook Form treats those as untouched and
2627
+ // silently rejects submits. The Railway token-create form
2628
+ // was the canonical case: the Workspace dropdown's "No
2629
+ // workspace" placeholder was visually selected, but its
2630
+ // value="" left React state undefined, so Create did
2631
+ // nothing. Surface them explicitly so the planner emits a
2632
+ // select step before another click.
2633
+ const defaultedSelects = inventory
2634
+ .filter((e) => e.tag === "select" &&
2635
+ e.value !== null &&
2636
+ e.value !== undefined &&
2637
+ e.value.length === 0 &&
2638
+ e.selectOptions !== null &&
2639
+ e.selectOptions !== undefined &&
2640
+ e.selectOptions.length > 1 &&
2641
+ // rc.17 — skip selects we've already touched; their
2642
+ // form state is committed even though the visible
2643
+ // value="" still trips the DEFAULTED heuristic.
2644
+ e.interactedThisRun !== true)
2645
+ .slice(0, 5)
2646
+ .map((e) => {
2647
+ const label = e.labelText ?? e.ariaLabel ?? e.name ?? e.placeholder ?? "(no label)";
2648
+ // Show the first non-empty-value option as the suggested
2649
+ // pick — the obvious target when the planner doesn't
2650
+ // have a domain reason to prefer a specific one.
2651
+ const realOptions = (e.selectOptions ?? []).filter((o) => o.value.length > 0 && o.text.length > 0);
2652
+ const firstReal = realOptions[0]?.text ?? "(none)";
2653
+ return ` - ${JSON.stringify(label)} → selector=${e.selector} (first real option: ${JSON.stringify(firstReal)})`;
2654
+ });
2655
+ const defaultedSelectHint = defaultedSelects.length > 0
2656
+ ? `\n\nVisible DEFAULTED dropdowns on this page (value="" — React form-state likely treats these as UNTOUCHED, which silently fails submit):\n${defaultedSelects.join("\n")}\n\nIssue {"kind":"select", "option_text":"…"} to commit a choice. Even if the default visible label ("No workspace", "None") is what you want, you MUST emit the select step to register it with the form's state.`
2657
+ : "";
2337
2658
  args.steps.push(sameSelector
2338
2659
  ? `Post-verify: no-progress detected — same ${nextStep.kind} on same selector, inventory unchanged. Re-planning instead of re-running.`
2339
2660
  : `Post-verify: no-progress detected — successive click steps with no inventory change. Forcing a non-click action.`);
@@ -2345,7 +2666,8 @@ ${formatInventory(input.inventory)}`,
2345
2666
  `DIFFERENT KIND: {"kind":"fill"} on any empty text input, {"kind":"check"} on ` +
2346
2667
  `any unticked checkbox, {"kind":"select"} on any unselected dropdown, or ` +
2347
2668
  `{"kind":"done"} if there is genuinely nothing to do.` +
2348
- emptyInputHint;
2669
+ emptyInputHint +
2670
+ defaultedSelectHint;
2349
2671
  prevSignature = signature;
2350
2672
  prevInventorySize = inventory.length;
2351
2673
  continue;
@@ -2367,6 +2689,23 @@ ${formatInventory(input.inventory)}`,
2367
2689
  if (nextStep.kind === "extract") {
2368
2690
  credentials = await this.extractCredentials();
2369
2691
  if (credentials.api_key === undefined) {
2692
+ // rc.28 — planner-quoted-token fallback. The regex
2693
+ // library missed (IPInfo's 14-char hex; some other
2694
+ // shape) but the planner's reason often literally
2695
+ // quotes the value. Accept it IF it's also present
2696
+ // verbatim in the visible page text — that's the
2697
+ // anti-hallucination guardrail.
2698
+ const pageText = await this.browser
2699
+ .extractText()
2700
+ .catch(() => "");
2701
+ const quoted = extractQuotedTokenFromReason(nextStep.reason, pageText);
2702
+ if (quoted !== null) {
2703
+ credentials = { ...credentials, api_key: quoted };
2704
+ args.steps.push(`Post-verify ${round + 1}/${args.maxRounds}: extracted token via ` +
2705
+ `planner-quoted fallback (${quoted.slice(0, 4)}…${quoted.slice(-4)})`);
2706
+ consecutiveFailedExtracts = 0;
2707
+ continue;
2708
+ }
2370
2709
  consecutiveFailedExtracts += 1;
2371
2710
  // Best-effort diagnostic upload: when extract returns
2372
2711
  // null despite the planner asserting a credential is
@@ -2508,12 +2847,74 @@ ${formatInventory(input.inventory)}`,
2508
2847
  }
2509
2848
  // Re-extract — but tolerate the page still navigating from the
2510
2849
  // step just taken; the next round settles and re-reads.
2850
+ const hadCredentialsBefore = credentials.api_key !== undefined || credentials.username !== undefined;
2511
2851
  try {
2512
2852
  credentials = await this.extractCredentials();
2513
2853
  }
2514
2854
  catch {
2515
2855
  // page mid-navigation — next round's waitForFormReady handles it
2516
2856
  }
2857
+ // rc.16 — synthetic extract round capture. When the implicit
2858
+ // extractCredentials() above pulls a credential out of the page
2859
+ // *without* the planner ever having picked an `extract` step,
2860
+ // the for-loop's early-return at the next iteration's top fires
2861
+ // before any further capture is written. The chain that
2862
+ // auto-promote sees then has no `observed.kind === "extract"`
2863
+ // round, so promoteToSkill rejects with no_extract_step. Fix:
2864
+ // when an implicit extract just succeeded and the planner's
2865
+ // chosen step this round wasn't already `extract`, write a
2866
+ // synthetic extract round with fresh state+inventory captured
2867
+ // RIGHT NOW (the action just ran, the token row is now visible).
2868
+ // Best-effort — a capture failure must never block returning the
2869
+ // credential we already have.
2870
+ const haveNewCredentials = !hadCredentialsBefore &&
2871
+ (credentials.api_key !== undefined || credentials.username !== undefined);
2872
+ if (haveNewCredentials && nextStep.kind !== "extract") {
2873
+ try {
2874
+ const [postState, postInventory] = await Promise.all([
2875
+ this.browser.getState(),
2876
+ this.buildInventory(args.steps, undefined, 80),
2877
+ ]);
2878
+ const syntheticExtract = {
2879
+ kind: "extract",
2880
+ reason: `implicit extract after ${nextStep.kind} — credentials surfaced on the page`,
2881
+ };
2882
+ captureOnboardingRound({
2883
+ service: args.service,
2884
+ round: round + 1,
2885
+ oauth,
2886
+ state: postState,
2887
+ inventory: postInventory,
2888
+ observed: syntheticExtract,
2889
+ });
2890
+ if (this.roundUploader !== undefined) {
2891
+ void (async () => {
2892
+ try {
2893
+ await this.roundUploader({
2894
+ service: args.service,
2895
+ round: round + 1,
2896
+ kind: syntheticExtract.kind,
2897
+ url: postState.url,
2898
+ title: postState.title,
2899
+ inventory_count: postInventory.length,
2900
+ observed_reason: syntheticExtract.reason,
2901
+ html: postState.html,
2902
+ ...(postState.screenshot !== undefined && postState.screenshot.length > 0
2903
+ ? { screenshot_jpeg_base64: postState.screenshot }
2904
+ : {}),
2905
+ });
2906
+ }
2907
+ catch {
2908
+ // best-effort
2909
+ }
2910
+ })();
2911
+ }
2912
+ }
2913
+ catch {
2914
+ // best-effort — synthetic capture is auto-promote plumbing,
2915
+ // never load-bearing for the parent signup
2916
+ }
2917
+ }
2517
2918
  }
2518
2919
  return credentials;
2519
2920
  }
@@ -2609,7 +3010,9 @@ ${loginGuidance}
2609
3010
  - If a "Create"/"Continue" button is disabled, look for a required terms-of-service / agreement checkbox and tick it with {"kind":"check"} — use the checkbox's own inventory selector (an entry with type=checkbox), NOT the adjacent "Terms of Service" link. A "click" on a styled checkbox often fails to flip it; use "check".
2610
3011
  - If an Accept / Agree / Continue button is DISABLED and the page shows a ToS / agreement modal (a long scrollable block of legal text, often inside a dialog), AND there is no agreement checkbox in the inventory to tick, return {"kind":"scroll"}. Some services (Railway is the canonical case) only enable the Accept button after the user scrolls the modal body to the bottom. The bot auto-detects the scrollable container — you do NOT need a selector. Do NOT use "click" to try to scroll; "click" does not scroll, it lands a click and returns. After scrolling, the next round should re-read the page and click the now-enabled Accept button (which will appear in the inventory).
2611
3012
  - Prefer the simplest credential path: a project- or organization-level API token / auth token usually needs only a name. A "personal token" with a grid of per-scope permission dropdowns is more work — choose it only if no simpler token type is offered.
3013
+ - **Token names must be unique within the account.** Many services (Railway is the canonical case) silently reject submits whose name collides with an existing token — the click registers, the button takes focus, but no token is created and no error toast is shown. Before filling a token-name input, READ the visible existing-tokens list on the page (names like "mykey", "mytoken123", any others). For the name you fill, prefer a fresh unique name like \`ts-<random>\` or \`agent-<short-suffix>\`; NEVER reuse a name that appears in the existing list — including names with sequential suffixes like \`mykey2\`, \`mykey3\` if the un-suffixed name is also present (assume the user has been iterating). If you cannot see the existing-tokens list (it scrolled off, the page hides it), pick a name with high entropy (8+ random alphanumeric chars).
2612
3014
  - On a token-creation form whose permission/scope dropdowns default to "No Access" / "None", you MUST set permissions BEFORE clicking the create button.
3015
+ - **Defaulted dropdowns (value="") gate submit, even when the visible label looks fine.** An inventory line marked \`(DEFAULTED — pick an explicit option before submitting)\` means a \`<select>\` is showing its first option visually but its underlying value is empty. React-form-state libraries (React Hook Form, Formik) treat those as UNTOUCHED and reject submits silently — the click on the submit button visually focuses it but no submission occurs. Issue \`{"kind":"select", "option_text":"…"}\` to commit a choice BEFORE clicking submit, even if the existing visible label ("No workspace", "None", "Select…") is the option you want. The Railway token-create form was the canonical case: typing the name and clicking Create did nothing for six rounds because the Workspace dropdown was never explicitly selected.
2613
3016
  - **PERMISSION SCOPE — default is MAXIMUM.** ${input.scopeHint !== undefined
2614
3017
  ? `The user provided a scope hint: "${input.scopeHint}". Pick option_text values aligned with this on each permission dropdown.`
2615
3018
  : `No scope hint was provided. Default to the HIGHEST available permission level on EVERY permission dropdown (Admin > Write > Read > anything lower). Most agent use-cases need write access; a read-only token will fail downstream when the agent tries to push data. Set "Admin" if offered; "Write" otherwise. Explicitly use option_text to specify — do NOT rely on first-option behavior, which often picks Read.`}