@trusty-squire/mcp 0.5.9 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/README.md +128 -68
  2. package/dist/api-client.d.ts +1 -0
  3. package/dist/api-client.d.ts.map +1 -1
  4. package/dist/api-client.js +27 -0
  5. package/dist/api-client.js.map +1 -1
  6. package/dist/bot/agent.d.ts +8 -0
  7. package/dist/bot/agent.d.ts.map +1 -1
  8. package/dist/bot/agent.js +504 -58
  9. package/dist/bot/agent.js.map +1 -1
  10. package/dist/bot/browser.d.ts +11 -1
  11. package/dist/bot/browser.d.ts.map +1 -1
  12. package/dist/bot/browser.js +365 -20
  13. package/dist/bot/browser.js.map +1 -1
  14. package/dist/bot/debug.d.ts.map +1 -1
  15. package/dist/bot/debug.js +19 -8
  16. package/dist/bot/debug.js.map +1 -1
  17. package/dist/bot/google-login.d.ts +4 -0
  18. package/dist/bot/google-login.d.ts.map +1 -1
  19. package/dist/bot/google-login.js +86 -7
  20. package/dist/bot/google-login.js.map +1 -1
  21. package/dist/bot/index.d.ts +3 -0
  22. package/dist/bot/index.d.ts.map +1 -1
  23. package/dist/bot/index.js +3 -0
  24. package/dist/bot/index.js.map +1 -1
  25. package/dist/bot/login-state.d.ts +1 -0
  26. package/dist/bot/login-state.d.ts.map +1 -1
  27. package/dist/bot/login-state.js +14 -0
  28. package/dist/bot/login-state.js.map +1 -1
  29. package/dist/bot/xvfb.d.ts +10 -0
  30. package/dist/bot/xvfb.d.ts.map +1 -0
  31. package/dist/bot/xvfb.js +75 -0
  32. package/dist/bot/xvfb.js.map +1 -0
  33. package/dist/install/agents.d.ts.map +1 -1
  34. package/dist/install/agents.js +37 -4
  35. package/dist/install/agents.js.map +1 -1
  36. package/dist/install/cli.d.ts +1 -0
  37. package/dist/install/cli.d.ts.map +1 -1
  38. package/dist/install/cli.js +148 -33
  39. package/dist/install/cli.js.map +1 -1
  40. package/dist/tools/provision-any.d.ts +23 -0
  41. package/dist/tools/provision-any.d.ts.map +1 -1
  42. package/dist/tools/provision-any.js +140 -13
  43. package/dist/tools/provision-any.js.map +1 -1
  44. package/package.json +1 -1
package/dist/bot/agent.js CHANGED
@@ -9,7 +9,8 @@
9
9
  // prompt rather than threading service-specific logic through the agent.
10
10
  import { rankAndCapInventory, scoreSignupButton } from "./browser.js";
11
11
  import { OAUTH_PROVIDERS, extractOAuthScopes, } from "./oauth-providers.js";
12
- import { loggedInProviders } from "./login-state.js";
12
+ import { extractGoogleNumberMatch, scrapeGoogleScopePhrases } from "./google-login.js";
13
+ import { loggedInProviders, clearProviderLoggedIn } from "./login-state.js";
13
14
  import { saveDebugSnapshot } from "./debug.js";
14
15
  import { captureOnboardingRound } from "./onboarding-capture.js";
15
16
  import { wasRecentlyPrewarmed, recordPrewarmSuccess } from "./prewarm-cache.js";
@@ -86,14 +87,46 @@ export class LLMCallBudgetExceeded extends Error {
86
87
  }
87
88
  // Best-effort canonical signup URL for a service when the caller
88
89
  // didn't pass one. Most dev-SaaS targets (Resend, Postmark, Mailgun,
89
- // MailerSend, IPInfo, Stripe, PostHog) live at <name>.com/signup; the
90
- // few that don't (services with hyphens, non-.com TLDs, or non-canonical
91
- // paths) get auto-recovered by looksLikeSignupPage's fallback to the
92
- // Google-search path. Normalization: strip everything that isn't a
93
- // letter/digit, lowercase. Exported for unit testing.
90
+ // MailerSend, IPInfo, Stripe, PostHog) live at <name>.com/signup
91
+ // the .com default catches them. The exceptions services on .io,
92
+ // .ai, .dev live in KNOWN_DOMAINS so a Sentry signup doesn't waste
93
+ // the long Google-search fallback path looking for sentry.com (which
94
+ // redirects weirdly to sentry.io and breaks looksLikeSignupPage).
95
+ // Anything still wrong falls through to the search-and-find path.
96
+ // Exported for unit testing.
97
+ // Either a hostname (default path: /signup) or a full URL (when the
98
+ // service's signup lives on a subdomain or uses a non-standard path —
99
+ // e.g. Cloudflare's dash.cloudflare.com/sign-up).
100
+ const KNOWN_DOMAINS = {
101
+ sentry: "sentry.io",
102
+ openrouter: "openrouter.ai",
103
+ mistral: "mistral.ai",
104
+ anthropic: "anthropic.com",
105
+ mailtrap: "mailtrap.io",
106
+ axiom: "axiom.co",
107
+ loops: "loops.so",
108
+ e2b: "e2b.dev",
109
+ railway: "railway.app",
110
+ supabase: "supabase.com",
111
+ replicate: "replicate.com",
112
+ modal: "modal.com",
113
+ // PostHog uses posthog.com but the dashboard lives at us.posthog.com /
114
+ // eu.posthog.com — signup is on the marketing site, .com is right.
115
+ posthog: "posthog.com",
116
+ // Cloudflare's marketing site has no signup form — it CTAs into the
117
+ // dashboard. Skip the redirect chase and land on the real form.
118
+ cloudflare: "https://dash.cloudflare.com/sign-up",
119
+ // Vercel: marketing /signup redirects through OAuth provider tiles
120
+ // but the actual email form sits on the dashboard.
121
+ vercel: "https://vercel.com/signup",
122
+ };
94
123
  export function guessSignupUrl(service) {
95
124
  const slug = service.toLowerCase().replace(/[^a-z0-9]/g, "");
96
- return `https://${slug}.com/signup`;
125
+ const entry = KNOWN_DOMAINS[slug];
126
+ if (entry !== undefined && /^https?:\/\//i.test(entry))
127
+ return entry;
128
+ const host = entry ?? `${slug}.com`;
129
+ return `https://${host}/signup`;
97
130
  }
98
131
  // True when the URL is a Google search results page — used to gate
99
132
  // the prewarm + the post-load "did we land somewhere useful?" check.
@@ -282,6 +315,26 @@ export function formatInventory(inventory) {
282
315
  })
283
316
  .join("\n");
284
317
  }
318
+ // Recognize a full-page anti-bot interstitial that's still up. Returns
319
+ // the vendor name (for the status message) or null. Pattern matching
320
+ // on visible text rather than markers — most vendors use the same UX
321
+ // template, and matching the user-visible copy is robust to the actual
322
+ // implementation underneath. Exported for unit testing.
323
+ export function detectAntiBotBlock(html) {
324
+ const text = html.toLowerCase();
325
+ // Cloudflare "Just a moment..." / Turnstile pre-clear page. Strong
326
+ // signal: the literal text + the cf-* class names + the title.
327
+ if (/just a moment|cf-(challenge|browser-verification|turnstile)|performing security verification/i.test(text)) {
328
+ return "Cloudflare";
329
+ }
330
+ if (/sucuri|sucuri website firewall/i.test(text))
331
+ return "Sucuri";
332
+ if (/datadome|dd-captcha/i.test(text))
333
+ return "DataDome";
334
+ if (/incapsula|imperva/i.test(text))
335
+ return "Imperva";
336
+ return null;
337
+ }
285
338
  // True when the page has no fillable text input AND no button that
286
339
  // reads as an email-signup option — a genuinely OAuth/SSO-only
287
340
  // service with no form to automate (F3 Issue 4).
@@ -401,7 +454,18 @@ export function parsePostVerifyStep(raw, allowedSelectors) {
401
454
  case "select": {
402
455
  const selector = requireString(obj, "selector", "post-verify select step");
403
456
  checkSelector(selector, "post-verify select step");
404
- return { kind: "select", selector, reason };
457
+ // F11: `option_text` is optional — when present, the executor
458
+ // picks the option whose visible text contains it (case-
459
+ // insensitive substring). When absent, picks the first option.
460
+ const optionText = obj["option_text"];
461
+ return {
462
+ kind: "select",
463
+ selector,
464
+ reason,
465
+ ...(typeof optionText === "string" && optionText.length > 0
466
+ ? { option_text: optionText }
467
+ : {}),
468
+ };
405
469
  }
406
470
  case "check": {
407
471
  const selector = requireString(obj, "selector", "post-verify check step");
@@ -476,6 +540,25 @@ const EMBEDDED_KEY_PREFIXES = [
476
540
  //
477
541
  // Exported for unit testing — the regex tuning here is the load-
478
542
  // bearing logic and deserves direct coverage.
543
+ // True when `capturedKey` is followed by a truncation marker (`...`
544
+ // or the Unicode ellipsis `…`) in `sourceText`. That marker is the
545
+ // signal that the visible display masked the full secret — the
546
+ // regex captured everything up to but not including the marker, so
547
+ // the value LOOKS valid but is short. Used by F10's
548
+ // extract-via-Copy-button recovery path; without this check, the
549
+ // bot accepts the truncated value, stores it, and the user discovers
550
+ // the failure only when their next API call returns 401.
551
+ export function isTruncatedCapture(sourceText, capturedKey) {
552
+ const idx = sourceText.indexOf(capturedKey);
553
+ if (idx < 0)
554
+ return false;
555
+ const after = sourceText.slice(idx + capturedKey.length, idx + capturedKey.length + 10);
556
+ // Whitespace OK between key and ellipsis (some modals render as
557
+ // "sk-or-v1-xxxx ..."). Three OR MORE dots; two dots are ordinary
558
+ // punctuation and would false-positive on e.g. "key value.." in
559
+ // help text.
560
+ return /^\s*(?:\.{3,}|…)/.test(after);
561
+ }
479
562
  export function extractApiKeyFromText(text) {
480
563
  const prefixed = [
481
564
  /\bre_[a-zA-Z0-9_]{20,}\b/, // Resend (key body contains underscores)
@@ -491,6 +574,15 @@ export function extractApiKeyFromText(text) {
491
574
  /\bSG\.[a-zA-Z0-9_\-]{20,}\.[a-zA-Z0-9_\-]{20,}\b/, // SendGrid
492
575
  /\brnd_[a-zA-Z0-9]{20,}\b/, // Render
493
576
  /\bsntry[su]_[A-Za-z0-9_=\-]{20,}/, // Sentry org/user auth token
577
+ // OpenRouter, Anthropic, OpenAI — these are the dominant
578
+ // OAuth-completed-then-copy-needed services. Specific-prefix
579
+ // patterns first so a labeled-pattern fallback isn't load-
580
+ // bearing for them. Putting `sk-or-v1-` before `sk-` so it wins
581
+ // when both could match (cosmetic; both capture the same value).
582
+ /\bsk-or-v1-[a-zA-Z0-9_-]{20,}/, // OpenRouter (sk-or-v1-…)
583
+ /\bsk-ant-[a-zA-Z0-9_-]{20,}/, // Anthropic (sk-ant-…)
584
+ /\bsk-proj-[a-zA-Z0-9_-]{20,}/, // OpenAI project key
585
+ /\bsk-[a-zA-Z0-9]{40,}/, // OpenAI legacy (`sk-` + ~48 chars, no dashes)
494
586
  ];
495
587
  for (const pattern of prefixed) {
496
588
  const match = text.match(pattern);
@@ -723,6 +815,21 @@ export class SignupAgent {
723
815
  steps.push("OAuth-first: no usable provider affordance on the page — " +
724
816
  "falling back to form-fill");
725
817
  }
818
+ // Anti-bot interstitial that didn't clear (Cloudflare/Sucuri/
819
+ // DataDome "Just a moment..." pages that BrowserController has
820
+ // already attempted to wait + reload through). Detect by page
821
+ // text — the inventory will be tiny because the interstitial
822
+ // intentionally has 0 interactive elements. Surface as its own
823
+ // status, not as oauth_required: the latter implies "service is
824
+ // OAuth-only", which is wrong for Cloudflare et al.
825
+ if (inventory.length < 10) {
826
+ const block = detectAntiBotBlock(state.html);
827
+ if (block !== null) {
828
+ steps.push(`Anti-bot block: ${block} interstitial would not clear after retries — ` +
829
+ `the bot's fingerprint/IP did not pass ${block}'s server-side risk score`);
830
+ return { kind: "anti_bot_blocked", vendor: block };
831
+ }
832
+ }
726
833
  // OAuth-only: no fillable input AND no button that reads as an
727
834
  // email-signup option — nothing to automate (Issue 4).
728
835
  if (isOauthOnlyChooser(inventory)) {
@@ -874,6 +981,33 @@ export class SignupAgent {
874
981
  const { inventory, buttonsDropped } = rankAndCapInventory(raw, buttonCap, oauthProviders);
875
982
  steps.push(`Inventory: ${inventory.length} element(s)` +
876
983
  (buttonsDropped > 0 ? ` (${buttonsDropped} low-ranked button(s) dropped)` : ""));
984
+ // Diagnostic: a suspiciously tiny inventory usually means the page
985
+ // either didn't finish rendering OR an anti-bot interstitial (CF
986
+ // Turnstile, "Just a moment...", reCAPTCHA wall) is up. Surface the
987
+ // page state into the step trail so the failure is debuggable from
988
+ // outside the bot host.
989
+ // Threshold tuned 0.6.1: a Railway signup at /signup landed with
990
+ // 6 elements (no OAuth chooser yet — likely a CTA-only landing
991
+ // page before the real signup form renders). 5 was too narrow.
992
+ if (inventory.length < 10 && raw.length < 10) {
993
+ try {
994
+ const state = await this.browser.getState();
995
+ const text = state.html
996
+ .replace(/<script[^>]*>[\s\S]*?<\/script>/gi, "")
997
+ .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, "")
998
+ .replace(/<[^>]+>/g, " ")
999
+ .replace(/\s+/g, " ")
1000
+ .trim()
1001
+ .slice(0, 240);
1002
+ const antiBot = /just a moment|verify you are human|attention required|cloudflare|cf-challenge|cf-turnstile|recaptcha|are you a robot/i.test(state.html);
1003
+ steps.push(`Inventory diagnostic: title=${JSON.stringify(state.title.slice(0, 80))} ` +
1004
+ `url=${state.url.slice(0, 120)} text=${JSON.stringify(text)}` +
1005
+ (antiBot ? " ⚠ anti-bot interstitial detected" : ""));
1006
+ }
1007
+ catch {
1008
+ // best-effort diagnostic; never abort on its failure
1009
+ }
1010
+ }
877
1011
  return inventory;
878
1012
  }
879
1013
  // Which OAuth providers may this signup take? An explicit
@@ -1060,7 +1194,11 @@ export class SignupAgent {
1060
1194
  // call hung. Override the 10-minute default with
1061
1195
  // UNIVERSAL_BOT_RUN_TIMEOUT_MS.
1062
1196
  async signup(task) {
1063
- const steps = [];
1197
+ // task.stepsSink lets a caller (provision-any) share the live step
1198
+ // trail so check_provision_status can surface mid-run prompts
1199
+ // (Google number-match etc.). Without it, the run still works —
1200
+ // steps are just only visible in the final result.
1201
+ const steps = task.stepsSink ?? [];
1064
1202
  const rawTimeout = Number(process.env.UNIVERSAL_BOT_RUN_TIMEOUT_MS);
1065
1203
  const timeoutMs = Number.isFinite(rawTimeout) && rawTimeout > 0 ? rawTimeout : 600_000;
1066
1204
  let timer;
@@ -1093,6 +1231,13 @@ export class SignupAgent {
1093
1231
  const password = task.generatePassword();
1094
1232
  const displayName = "Trusty Squire Bot";
1095
1233
  const username = `tsbot${Date.now().toString().slice(-7)}`;
1234
+ // F13 diagnostic: which Chrome launch mode start() chose, and
1235
+ // whether egress went through the configured proxy. Lets us tell
1236
+ // from outside the box whether the bot actually got an X display
1237
+ // surface AND whether the residential-proxy path engaged.
1238
+ steps.push(`Browser: launched mode=${this.browser.launchMode} ` +
1239
+ `proxy=${this.browser.proxied ?? "direct"} ` +
1240
+ `channel=${this.browser.channel ?? "bundled-chromium"}`);
1096
1241
  try {
1097
1242
  // Step 1: Navigate to signup page
1098
1243
  //
@@ -1145,7 +1290,7 @@ export class SignupAgent {
1145
1290
  }
1146
1291
  if (signupUrl !== guessed || isGoogleSearchUrl(signupUrl)) {
1147
1292
  steps.push("Searching for signup page...");
1148
- const found = await this.findSignupLink();
1293
+ const found = await this.findSignupLink(task.service);
1149
1294
  if (found !== null) {
1150
1295
  // Now that we know the real signup origin, prewarm it before
1151
1296
  // the deep navigation. Same rationale as above.
@@ -1198,6 +1343,16 @@ export class SignupAgent {
1198
1343
  steps,
1199
1344
  ...this.resultTail(),
1200
1345
  };
1346
+ case "anti_bot_blocked":
1347
+ return {
1348
+ success: false,
1349
+ error: `anti_bot_blocked: ${task.service}'s ${outcome.vendor} anti-bot interstitial would ` +
1350
+ `not clear — the bot's IP/fingerprint did not pass ${outcome.vendor}'s server-side ` +
1351
+ `risk score. This is a soft block (no challenge to solve); the user should sign up ` +
1352
+ `manually.`,
1353
+ steps,
1354
+ ...this.resultTail(),
1355
+ };
1201
1356
  case "oauth":
1202
1357
  // T6/T7 — OAuth-first path. runOAuthFlow drives the consent
1203
1358
  // handshake and post-OAuth onboarding to its own terminal
@@ -1266,6 +1421,7 @@ export class SignupAgent {
1266
1421
  credentials: { email: task.email, password },
1267
1422
  maxRounds,
1268
1423
  steps,
1424
+ ...(task.scopeHint !== undefined ? { scopeHint: task.scopeHint } : {}),
1269
1425
  });
1270
1426
  }
1271
1427
  }
@@ -1341,6 +1497,12 @@ export class SignupAgent {
1341
1497
  // Bounded consent walk — handles account-chooser → consent as two
1342
1498
  // steps without ever spinning. Each iteration re-reads the page.
1343
1499
  const MAX_OAUTH_NAV = 6;
1500
+ // True once a clean scope-grant consent has already been
1501
+ // auto-approved on this flow. Subsequent unreadable-scope consent
1502
+ // pages (post-grant confirmation, account chooser routed through
1503
+ // /consent, etc.) get the soft-advance path instead of an abort —
1504
+ // because the scope-grant decision was already made and validated.
1505
+ let consentAlreadyApproved = false;
1344
1506
  for (let i = 0; i < MAX_OAUTH_NAV; i++) {
1345
1507
  if (this.browser.oauthPageClosed()) {
1346
1508
  steps.push(`OAuth: the ${provider.label} window closed — handshake returned to the service`);
@@ -1357,14 +1519,42 @@ export class SignupAgent {
1357
1519
  continue;
1358
1520
  }
1359
1521
  const authState = provider.classifyAuthState(url, body);
1360
- steps.push(`OAuth: ${provider.label} auth state = ${authState}`);
1522
+ steps.push(`OAuth: ${provider.label} auth state = ${authState} (url=${url.slice(0, 120)})`);
1361
1523
  if (authState === "not_provider")
1362
1524
  break; // flow left the provider — back on the service
1363
1525
  if (authState === "challenge") {
1526
+ // Google's number-match challenge ("Tap N on your phone") is
1527
+ // resolvable by the user without re-running the login flow —
1528
+ // surface the number and wait for them to complete it.
1529
+ if (provider.id === "google") {
1530
+ const matchNum = extractGoogleNumberMatch(body);
1531
+ if (matchNum !== null) {
1532
+ steps.push(`Google: match the number ${matchNum} on your phone — ` +
1533
+ `open the Google app on your phone and tap ${matchNum}`);
1534
+ const cleared = await this.waitForGoogleChallenge(provider, steps);
1535
+ if (!cleared) {
1536
+ return this.oauthAbort("needs_login", `Google number-match challenge timed out after 2 minutes. ` +
1537
+ `Re-run \`${loginCmd}\`, complete the challenge in the window, then retry.`, steps);
1538
+ }
1539
+ steps.push("Google: challenge cleared — continuing OAuth");
1540
+ // Re-classify on the next iteration without burning the
1541
+ // OAuth-navigation budget (which assumes continuous
1542
+ // browser progress, not a 2-minute human pause).
1543
+ i--;
1544
+ continue;
1545
+ }
1546
+ }
1364
1547
  return this.oauthAbort("needs_login", `${provider.label} interrupted the sign-in with a security challenge ("verify it's you"). ` +
1365
1548
  `Re-run \`${loginCmd}\`, clear the challenge in the window, then retry.`, steps);
1366
1549
  }
1367
1550
  if (authState === "needs_login") {
1551
+ // Drop the provider from the logged-in marker so the NEXT
1552
+ // signup doesn't optimistically re-take the OAuth path and
1553
+ // fail the same way — it'll fall back to form-fill until the
1554
+ // user runs `mcp login` to re-establish a usable session.
1555
+ clearProviderLoggedIn(provider.id);
1556
+ steps.push(`OAuth: cleared ${provider.label} from logged-in providers — ` +
1557
+ `future signups will form-fill until \`${loginCmd}\` runs`);
1368
1558
  return this.oauthAbort("needs_login", `the bot's ${provider.label} session is missing or expired — no consent screen was reached. ` +
1369
1559
  `Re-run \`${loginCmd}\` to re-establish it, then retry.`, steps);
1370
1560
  }
@@ -1374,20 +1564,64 @@ export class SignupAgent {
1374
1564
  // login page that says "to continue to <app>"). Hand back —
1375
1565
  // never type into it.
1376
1566
  if (await this.oauthLoginFormPresent()) {
1567
+ clearProviderLoggedIn(provider.id);
1377
1568
  return this.oauthAbort("needs_login", `landed on a ${provider.label} sign-in form — the session is missing or expired. ` +
1378
1569
  `Re-run \`${loginCmd}\`, then retry. The bot will not type into ${provider.label}'s login form.`, steps);
1379
1570
  }
1380
1571
  // Genuine consent screen / account chooser — scope-gate it (T7).
1381
1572
  const scopes = extractOAuthScopes(url);
1573
+ // Always surface the parsed scopes so the user / debug logs see
1574
+ // exactly what tripped the gate (or what was allowed through).
1575
+ steps.push(`OAuth: parsed consent scopes = [${scopes === null ? "<unreadable>" : scopes.join(", ")}]`);
1382
1576
  if (scopes === null) {
1577
+ // Defense-in-depth: scrape the page DOM for known scope-grant
1578
+ // verb phrases ("See your", "Manage your contacts", "Send email
1579
+ // on your behalf", etc.). A real scope-grant consent always
1580
+ // lists each scope visually with one of these patterns. An
1581
+ // intermediate page (account chooser, post-grant confirmation,
1582
+ // safety review) does not.
1583
+ const dangerPhrases = provider.id === "google" ? scrapeGoogleScopePhrases(body) : [];
1584
+ if (dangerPhrases.length > 0) {
1585
+ return this.oauthAbort("oauth_consent_needs_review", `${provider.label} consent page (URL unparseable) lists scope-grant phrases: ` +
1586
+ `[${dangerPhrases.join(" | ")}]. Pausing for manual review.`, steps);
1587
+ }
1588
+ if (consentAlreadyApproved) {
1589
+ // We already validated and auto-approved a scope-grant
1590
+ // consent earlier in this flow. This second consent-classed
1591
+ // page has no parseable scopes AND no visible scope-grant
1592
+ // verb phrases — it's a post-grant confirmation / safety
1593
+ // review / account chooser routed through /consent. Soft
1594
+ // advance: try the approve control, and if it isn't there
1595
+ // the loop will re-classify on the next iteration.
1596
+ steps.push("OAuth: post-grant consent page (no parseable scopes, no scope phrases) — advancing");
1597
+ const advanced = await this.browser.advanceOAuthConsent(provider.id);
1598
+ if (!advanced) {
1599
+ steps.push("OAuth: no approve control on the post-grant page — waiting for natural navigation");
1600
+ }
1601
+ await this.browser.wait(3);
1602
+ continue;
1603
+ }
1383
1604
  return this.oauthAbort("oauth_consent_needs_review", `reached a ${provider.label} consent screen but could not read its requested scopes ` +
1384
1605
  `from the URL — pausing for manual review rather than approving blind.`, steps);
1385
1606
  }
1386
- if (!provider.scopesAreBasic(scopes)) {
1387
- return this.oauthAbort("oauth_consent_needs_review", `the consent screen requests scopes beyond basic identity (${scopes.join(", ")}). ` +
1388
- `Approve it manually the bot only auto-approves basic-identity scopes.`, steps);
1607
+ const extraAllowed = new Set(task.allowExtraOAuthScopes ?? []);
1608
+ const nonBasic = scopes.filter((s) => !provider.scopesAreBasic([s]));
1609
+ const unauthorized = nonBasic.filter((s) => !extraAllowed.has(s));
1610
+ if (unauthorized.length > 0) {
1611
+ // Encode requested scopes into the error so the MCP tool layer
1612
+ // can extract them and show the user what to approve.
1613
+ return this.oauthAbort("oauth_consent_needs_review", `${provider.label} consent requests non-basic scopes: [${unauthorized.join(", ")}]. ` +
1614
+ `All requested scopes: [${scopes.join(", ")}]. ` +
1615
+ `To proceed, re-run provision_any_service with allow_extra_oauth_scopes set to ` +
1616
+ `the scopes the user has explicitly approved.`, steps);
1617
+ }
1618
+ if (nonBasic.length > 0) {
1619
+ steps.push(`OAuth: user pre-approved extra scopes [${nonBasic.join(", ")}] — auto-approving`);
1389
1620
  }
1390
- steps.push(`OAuth: consent scopes all basic (${scopes.join(", ")}) — auto-approving`);
1621
+ else {
1622
+ steps.push(`OAuth: consent scopes all basic (${scopes.join(", ")}) — auto-approving`);
1623
+ }
1624
+ consentAlreadyApproved = true;
1391
1625
  const advanced = await this.browser.advanceOAuthConsent(provider.id);
1392
1626
  if (!advanced) {
1393
1627
  return this.oauthAbort("oauth_consent_needs_review", `reached a ${provider.label} consent screen but found no approve control to click — ` +
@@ -1407,6 +1641,7 @@ export class SignupAgent {
1407
1641
  service: task.service,
1408
1642
  maxRounds: task.postVerifyMaxRounds ?? 12,
1409
1643
  steps,
1644
+ ...(task.scopeHint !== undefined ? { scopeHint: task.scopeHint } : {}),
1410
1645
  });
1411
1646
  }
1412
1647
  if (credentials.api_key !== undefined) {
@@ -1449,6 +1684,32 @@ export class SignupAgent {
1449
1684
  ...this.resultTail(),
1450
1685
  };
1451
1686
  }
1687
+ // Poll the provider page until the challenge clears (the user
1688
+ // completed it on their phone) or 2 minutes elapse. Returns true on
1689
+ // resolution, false on timeout. The 2-minute cap is enough time to
1690
+ // unlock a phone, open the Google app, and tap a number; longer
1691
+ // would mask a stuck/abandoned flow.
1692
+ async waitForGoogleChallenge(provider, steps) {
1693
+ const deadline = Date.now() + 120_000;
1694
+ while (Date.now() < deadline) {
1695
+ await this.browser.wait(3);
1696
+ if (this.browser.oauthPageClosed())
1697
+ return true;
1698
+ const url = this.browser.currentUrl();
1699
+ let body;
1700
+ try {
1701
+ body = (await this.browser.extractText()).slice(0, 4000);
1702
+ }
1703
+ catch {
1704
+ continue;
1705
+ }
1706
+ const state = provider.classifyAuthState(url, body);
1707
+ if (state !== "challenge")
1708
+ return true;
1709
+ }
1710
+ steps.push("Google: challenge wait timed out after 2 minutes");
1711
+ return false;
1712
+ }
1452
1713
  // Backstop for the critical guarantee (D4): true when the active
1453
1714
  // provider page carries a credential-entry field — an expired/missing
1454
1715
  // session dropped the bot on a login form. A genuine consent screen
@@ -1622,6 +1883,7 @@ ${formatInventory(input.inventory)}`,
1622
1883
  oauth,
1623
1884
  inventory,
1624
1885
  ...(hint !== undefined ? { hint } : {}),
1886
+ ...(args.scopeHint !== undefined ? { scopeHint: args.scopeHint } : {}),
1625
1887
  });
1626
1888
  }
1627
1889
  catch (err) {
@@ -1683,7 +1945,7 @@ ${formatInventory(input.inventory)}`,
1683
1945
  await this.browser.type(nextStep.selector, nextStep.value);
1684
1946
  }
1685
1947
  else if (nextStep.kind === "select") {
1686
- await this.browser.selectOption(nextStep.selector);
1948
+ await this.browser.selectOption(nextStep.selector, nextStep.option_text);
1687
1949
  await this.browser.wait(1);
1688
1950
  }
1689
1951
  else if (nextStep.kind === "check") {
@@ -1789,7 +2051,7 @@ Schema:
1789
2051
  {"kind":"login","reason":"the page is a login form / we were signed out"}
1790
2052
  {"kind":"click","selector":"<a selector= copied verbatim from the inventory>","reason":"e.g. open the API keys page"}
1791
2053
  {"kind":"fill","selector":"<a selector= from the inventory>","value":"value","reason":"unusual — only for a required project-name etc."}
1792
- {"kind":"select","selector":"<a selector= from the inventory, tag=select>","reason":"pick an option for a dropdown — region, role, country"}
2054
+ {"kind":"select","selector":"<a selector= from the inventory>","option_text":"<visible label of the option to pick — optional>","reason":"pick an option for a dropdown — region, role, country, or a permission/scope on a token form"}
1793
2055
  {"kind":"check","selector":"<a selector= from the inventory, type=checkbox>","reason":"tick a terms-of-service / agreement checkbox"}
1794
2056
  {"kind":"navigate","url":"https://...","reason":"e.g. go directly to /settings/api-keys"}
1795
2057
  {"kind":"wait","seconds":N,"reason":"page is still loading"}
@@ -1816,11 +2078,16 @@ Strategy:
1816
2078
  ${loginGuidance}
1817
2079
  - If we're on a "verify your phone" / "verify email" wall, return done (we can't solve those).
1818
2080
  - If the page wants the user to create a project/key before showing it, fill the minimum and click create.
1819
- - For a required dropdown (an inventory entry with tag=select region, role, country), use {"kind":"select"} — a "click" cannot pick a <select> option, so do not click it repeatedly.
2081
+ - For ANY dropdown native (tag=select) OR a custom combobox (role=combobox / aria-haspopup=listbox, common on modern React apps like Sentry / Stripe / Vercel) use {"kind":"select"}. "click" on a combobox trigger opens it but does not pick an option; do not click it repeatedly.
2082
+ - When you need a SPECIFIC option from the dropdown — e.g. "Project: Read" on Sentry's permissions picker, or a specific region — include "option_text" with the visible label. The executor matches it case-insensitively as a substring. Omit "option_text" when any option is fine (a placeholder country picker).
1820
2083
  - A post-OAuth onboarding form (organization name, region, terms) is normal — fill/select/check its fields and click Continue to advance toward the dashboard; do not return "done" just because it is a form.
1821
2084
  - If a "Create"/"Continue" button is disabled, look for a required terms-of-service / agreement checkbox and tick it with {"kind":"check"} — use the checkbox's own inventory selector (an entry with type=checkbox), NOT the adjacent "Terms of Service" link. A "click" on a styled checkbox often fails to flip it; use "check".
1822
2085
  - Prefer the simplest credential path: a project- or organization-level API token / auth token usually needs only a name. A "personal token" with a grid of per-scope permission dropdowns is more work — choose it only if no simpler token type is offered.
1823
- - On a token-creation form whose permission/scope dropdowns default to "No Access" / "None", you MUST use a select step to set a non-default permission on at least one dropdown BEFORE clicking the create button — creating with all-default permissions does nothing. Do not click the create button repeatedly; set a permission first.
2086
+ - On a token-creation form whose permission/scope dropdowns default to "No Access" / "None", you MUST set permissions BEFORE clicking the create button.
2087
+ - **PERMISSION SCOPE — default is MAXIMUM.** ${input.scopeHint !== undefined
2088
+ ? `The user provided a scope hint: "${input.scopeHint}". Pick option_text values aligned with this on each permission dropdown.`
2089
+ : `No scope hint was provided. Default to the HIGHEST available permission level on EVERY permission dropdown (Admin > Write > Read > anything lower). Most agent use-cases need write access; a read-only token will fail downstream when the agent tries to push data. Set "Admin" if offered; "Write" otherwise. Explicitly use option_text to specify — do NOT rely on first-option behavior, which often picks Read.`}
2090
+ - On a form with MULTIPLE permission rows (Sentry: Project, Team, Member, Issue, Event, Release, Organization), set EACH ONE before clicking Create. One step per turn — return to this turn-by-turn until every row is set.
1824
2091
  - Round ${input.round + 1} of ${input.maxRounds}. Prefer "done" if you're not making progress.`;
1825
2092
  const userBlocks = [
1826
2093
  { kind: "image", media_type: "image/png", data_base64: input.state.screenshot },
@@ -1865,63 +2132,137 @@ ${formatInventory(input.inventory)}${input.hint !== undefined ? `\n\nIMPORTANT
1865
2132
  },
1866
2133
  });
1867
2134
  }
1868
- async findSignupLink() {
2135
+ // Pick a signup link out of the current page's HTML. Used as the
2136
+ // fallback after a Google-search navigation.
2137
+ //
2138
+ // The naive version (regex /href="[^"]*signup[^"]*"/) failed badly
2139
+ // on Google search results: it matched URLs like
2140
+ // accounts.google.com/SignOutOptions?continue=...search?q=Sentry%20signup
2141
+ // — Google's own nav, whose ?continue= query param leaks the
2142
+ // original search query (with "signup" in it) and gets matched.
2143
+ // The bot then navigated to a Google sign-out page and gave up.
2144
+ //
2145
+ // This version:
2146
+ // - parses each href as a real URL
2147
+ // - rejects google.com / accounts.google.com / support.google.com
2148
+ // and other Google nav infra (we're ON a google search page, so
2149
+ // any google.com href is search-nav, not the service)
2150
+ // - matches against host+path only — never query params
2151
+ // - scores candidates: hosts that contain the service name win
2152
+ // over generic matches. Means "sentry.io/signup" beats
2153
+ // "github.com/sentry/sentry/blob/...signup..." (the github
2154
+ // source-code result that mentions signup in a path).
2155
+ // - returns the highest-scoring candidate, or null.
2156
+ async findSignupLink(serviceName) {
1869
2157
  const html = (await this.browser.getState()).html;
1870
- const re = /href="([^"]*(?:signup|register|sign-up|create-account|join)[^"]*)"/gi;
2158
+ const serviceSlug = serviceName?.toLowerCase().replace(/[^a-z0-9]/g, "") ?? "";
2159
+ const candidates = [];
2160
+ const hrefRe = /href="([^"]+)"/g;
1871
2161
  let m;
1872
- while ((m = re.exec(html)) !== null) {
1873
- const href = m[1];
1874
- if (href === undefined)
2162
+ while ((m = hrefRe.exec(html)) !== null) {
2163
+ const raw = m[1];
2164
+ if (raw === undefined)
2165
+ continue;
2166
+ let url;
2167
+ try {
2168
+ url = new URL(raw.startsWith("//") ? `https:${raw}` : raw);
2169
+ }
2170
+ catch {
2171
+ continue;
2172
+ }
2173
+ if (url.protocol !== "https:" && url.protocol !== "http:")
2174
+ continue;
2175
+ // Reject Google's own navigation infrastructure — that's what
2176
+ // tripped the naive regex on the Sentry run.
2177
+ if (/(?:^|\.)google\.com$/.test(url.hostname))
2178
+ continue;
2179
+ if (/(?:^|\.)googleusercontent\.com$/.test(url.hostname))
1875
2180
  continue;
1876
- if (href.includes("signin") || href.includes("login"))
2181
+ if (/(?:^|\.)gstatic\.com$/.test(url.hostname))
1877
2182
  continue;
1878
- if (href.startsWith("http"))
1879
- return href;
1880
- if (href.startsWith("//"))
1881
- return `https:${href}`;
2183
+ // Match against host+path ONLY. Query params can carry the
2184
+ // original search query text and would re-introduce the
2185
+ // junk-link bug.
2186
+ const hostPath = (url.hostname + url.pathname).toLowerCase();
2187
+ if (!/(?:^|\.|\/)(?:signup|register|sign-up|create-account|join)\b/.test(hostPath)) {
2188
+ continue;
2189
+ }
2190
+ // Negative: signin/login/logout in host+path.
2191
+ if (/(?:^|\/)(?:signin|login|logout|sign-in|log-in)\b/.test(hostPath))
2192
+ continue;
2193
+ // Score: a host containing the service slug is a strong match.
2194
+ // Without a slug to compare against, every match scores 1.
2195
+ const hostLower = url.hostname.toLowerCase();
2196
+ const score = serviceSlug.length > 0 && hostLower.includes(serviceSlug) ? 10 : 1;
2197
+ candidates.push({ url: url.toString(), score });
1882
2198
  }
1883
- return null;
2199
+ candidates.sort((a, b) => b.score - a.score);
2200
+ return candidates[0]?.url ?? null;
1884
2201
  }
1885
2202
  // Heuristic: does the currently-loaded page LOOK like a real signup
1886
2203
  // page? Used to decide whether the guessed canonical URL
1887
2204
  // (<service>.com/signup) worked or we need to fall back to a Google
1888
- // search. "Looks like a signup page" = the inventory has at least
1889
- // one text/email input OR a Google/GitHub OAuth button, AND the
1890
- // page title / heading don't shout 404. Deliberately permissive —
1891
- // a marketing page with an embedded email-capture form would pass,
1892
- // which is fine because the form-filler will then realize there's
1893
- // no submit-button-that-takes-an-email-to-a-real-signup and replan.
2205
+ // search.
2206
+ //
2207
+ // Three signals, in order:
2208
+ // 1. URL-path shortcut: if the page's pathname matches
2209
+ // /signup|register|sign-up|create-account|join/, trust it
2210
+ // we navigated to a signup-shaped URL and the redirect chain
2211
+ // kept us on one. Catches Sentry-style cross-TLD redirects
2212
+ // (sentry.com → sentry.io/signup) where the inventory looks
2213
+ // different from a typical signup page but the URL is correct.
2214
+ // 2. 404 guard: drop pages whose title shouts 404 / not found.
2215
+ // 3. Content check: inventory has at least one text/email input
2216
+ // OR a button whose text mentions Google/GitHub (broad on
2217
+ // purpose — a "Continue with Google" / "Login with Google" /
2218
+ // icon-only Google button all count when the bot has a
2219
+ // provider session).
1894
2220
  async looksLikeSignupPage() {
1895
2221
  const state = await this.browser.getState();
1896
- // Cheap 404-ish guard before we go to the trouble of building an
1897
- // inventory. Catches the most common "wrong guess" outcome.
2222
+ // 1. URL-path shortcut. If we navigated to a signup-shaped path
2223
+ // and the browser kept us on one, that's a strong signal —
2224
+ // redirect chains often preserve the path across TLD changes.
2225
+ try {
2226
+ const path = new URL(state.url).pathname.toLowerCase();
2227
+ if (/(?:^|\/)(?:signup|register|sign-up|create-account|join)\b/.test(path)) {
2228
+ return true;
2229
+ }
2230
+ }
2231
+ catch {
2232
+ // Malformed state.url — skip the shortcut, fall through.
2233
+ }
2234
+ // 2. 404 guard.
1898
2235
  const titleLower = (state.title ?? "").toLowerCase();
1899
2236
  if (titleLower.includes("404") ||
1900
2237
  titleLower.includes("not found") ||
1901
2238
  titleLower.includes("page not found")) {
1902
2239
  return false;
1903
2240
  }
2241
+ // 3. Inventory check.
1904
2242
  let inventory;
1905
2243
  try {
1906
2244
  inventory = await this.browser.extractInteractiveElements();
1907
2245
  }
1908
2246
  catch {
1909
- // Inventory failure means we can't tell — assume it worked and
1910
- // let the downstream planExecuteWithRetry give the verdict.
1911
2247
  return true;
1912
2248
  }
1913
2249
  const hasInput = inventory.some((e) => e.tag === "input" &&
1914
2250
  (e.type === "email" || e.type === "text" || e.type === null || e.type === undefined));
1915
2251
  if (hasInput)
1916
2252
  return true;
1917
- // Sometimes the form is gated behind a "Sign in with Google /
1918
- // GitHub" button (Resend, Vercel, etc.). Those count as a usable
1919
- // signup page when the bot has a provider session.
1920
- const hasOAuthButton = inventory.some((e) => {
2253
+ // Broad OAuth-button detection: any element whose visible text or
2254
+ // aria-label mentions "google" or "github" as a word. Covers
2255
+ // "Continue with Google", "Login with Google", "Use Google",
2256
+ // "Sign in with GitHub", and icon-only buttons with
2257
+ // aria-label="Google" — all common on OAuth-only signup pages.
2258
+ // False positives (e.g. a "Google Tag Manager" footer link)
2259
+ // are unlikely on a real signup view and harmless: the worst
2260
+ // case is we trust this page and the downstream planner gives
2261
+ // up cleanly later.
2262
+ return inventory.some((e) => {
1921
2263
  const text = `${e.visibleText ?? ""} ${e.ariaLabel ?? ""}`.toLowerCase();
1922
- return /(?:^|\s)(?:sign(?:\s|-)?in|sign(?:\s|-)?up|continue)\s+with\s+(?:google|github)/i.test(text);
2264
+ return /\b(?:google|github)\b/.test(text);
1923
2265
  });
1924
- return hasOAuthButton;
1925
2266
  }
1926
2267
  async extractCredentials() {
1927
2268
  // IMPORTANT: pull credentials from the *visible* page, not the raw
@@ -1929,27 +2270,132 @@ ${formatInventory(input.inventory)}${input.hint !== undefined ? `\n\nIMPORTANT
1929
2270
  // Turnstile, hCaptcha) whose challenge tokens look like API keys to
1930
2271
  // a naive regex.
1931
2272
  //
1932
- // Two visible surfaces, in priority order:
1933
- // 1. Discrete credential candidates — copy-input values and each
1934
- // element's own direct text. A key is read whole here, un-glued
1935
- // from adjacent buttons; captcha tokens (hidden inputs) are
1936
- // excluded by the browser.
1937
- // 2. The whole visible body text fallback for a key shown as
1938
- // plain prose, accepting that body concatenation can glue
1939
- // neighbours (the extractApiKeyFromText guards catch the worst).
2273
+ // Three-pass extraction, in priority order:
2274
+ // 1. Visible candidates — input values + each element's direct
2275
+ // text. A key read whole, un-glued from adjacent buttons.
2276
+ // 2. F10: when pass 1 hits a TRUNCATED display (modal shows
2277
+ // "sk-or-v1-1687…" with the full secret only on the
2278
+ // clipboard via the Copy button), click the Copy button and
2279
+ // re-extract from `navigator.clipboard.readText()`. This is
2280
+ // the OpenRouter / Anthropic / OpenAI / Stripe modal
2281
+ // pattern — pass 1 would otherwise persist a truncated stub.
2282
+ // 3. F10 fallback: walk hidden inputs. Some modals stash the
2283
+ // full secret in a `display:none` <input> the masked display
2284
+ // reads from.
1940
2285
  const credentials = {};
1941
2286
  let apiKey = null;
2287
+ let truncatedHit = null;
1942
2288
  for (const candidate of await this.browser.extractCredentialCandidates()) {
1943
- apiKey = extractApiKeyFromText(candidate);
1944
- if (apiKey !== null)
1945
- break;
2289
+ const hit = extractApiKeyFromText(candidate);
2290
+ if (hit === null)
2291
+ continue;
2292
+ if (isTruncatedCapture(candidate, hit)) {
2293
+ // Remember the truncated value but keep scanning — a later
2294
+ // candidate may produce a full one (e.g. a hidden input on
2295
+ // the same page).
2296
+ truncatedHit = truncatedHit ?? hit;
2297
+ continue;
2298
+ }
2299
+ apiKey = hit;
2300
+ break;
2301
+ }
2302
+ if (apiKey === null) {
2303
+ const bodyText = await this.browser.extractText();
2304
+ const hit = extractApiKeyFromText(bodyText);
2305
+ if (hit !== null) {
2306
+ if (isTruncatedCapture(bodyText, hit)) {
2307
+ truncatedHit = truncatedHit ?? hit;
2308
+ }
2309
+ else {
2310
+ apiKey = hit;
2311
+ }
2312
+ }
2313
+ }
2314
+ // Pass 2 — Copy-button + clipboard recovery.
2315
+ if (apiKey === null && truncatedHit !== null) {
2316
+ apiKey = await this.tryCopyButtonExtraction();
1946
2317
  }
2318
+ // Pass 3 — hidden-input scan. Cheap to always try as a last
2319
+ // resort, whether or not we saw a truncated hit; a service that
2320
+ // stashes the key in a hidden input may not display it at all.
1947
2321
  if (apiKey === null) {
1948
- apiKey = extractApiKeyFromText(await this.browser.extractText());
2322
+ try {
2323
+ for (const value of await this.browser.extractAllInputValues()) {
2324
+ const hit = extractApiKeyFromText(value);
2325
+ if (hit !== null && !isTruncatedCapture(value, hit)) {
2326
+ apiKey = hit;
2327
+ break;
2328
+ }
2329
+ }
2330
+ }
2331
+ catch {
2332
+ // Hidden-input scan failures are non-fatal; we just stay
2333
+ // with whatever we had (or null).
2334
+ }
2335
+ }
2336
+ // Last resort: if every path returned a truncated value, persist
2337
+ // it with a `_truncated` suffix so the host agent can surface the
2338
+ // partial result to the user (better than reporting "no key
2339
+ // found" when the bot demonstrably reached the modal).
2340
+ if (apiKey === null && truncatedHit !== null) {
2341
+ credentials.api_key_truncated = truncatedHit;
2342
+ return credentials;
1949
2343
  }
1950
2344
  if (apiKey !== null)
1951
2345
  credentials.api_key = apiKey;
1952
2346
  return credentials;
1953
2347
  }
2348
+ // F10: click the page's Copy button (whose label typically reads
2349
+ // "Copy", "Copy key", "Copy secret") and extract the secret from
2350
+ // `navigator.clipboard.readText()`. Returns null on any failure —
2351
+ // the caller has its own fallback paths.
2352
+ async tryCopyButtonExtraction() {
2353
+ let copyBtnSelector = null;
2354
+ try {
2355
+ const inventory = await this.browser.extractInteractiveElements();
2356
+ const copyBtn = inventory.find((e) => {
2357
+ const text = `${e.visibleText ?? ""} ${e.ariaLabel ?? ""}`.trim();
2358
+ // "Copy" alone, "Copy key", "Copy API key", "Copy secret",
2359
+ // "Copy token". Anchored so a "Don't copy this" tooltip
2360
+ // doesn't match. Case-insensitive.
2361
+ return /^\s*copy(?:\b|\s|$)|copy\s+(?:api\s*key|secret|token|key)\b/i.test(text);
2362
+ });
2363
+ if (copyBtn === undefined)
2364
+ return null;
2365
+ copyBtnSelector = copyBtn.selector;
2366
+ }
2367
+ catch {
2368
+ return null;
2369
+ }
2370
+ try {
2371
+ await this.browser.click(copyBtnSelector);
2372
+ // Brief wait — the Copy button's onclick is usually a sync
2373
+ // navigator.clipboard.writeText, but some modals run an async
2374
+ // serialize step (e.g. format-the-key into "Bearer <key>"
2375
+ // first). 1s covers both with no real cost.
2376
+ await this.browser.wait(1);
2377
+ }
2378
+ catch {
2379
+ return null;
2380
+ }
2381
+ let clipboardText;
2382
+ try {
2383
+ clipboardText = await this.browser.readClipboard();
2384
+ }
2385
+ catch {
2386
+ return null;
2387
+ }
2388
+ if (clipboardText.trim().length === 0)
2389
+ return null;
2390
+ const fromClipboard = extractApiKeyFromText(clipboardText);
2391
+ if (fromClipboard === null)
2392
+ return null;
2393
+ // Sanity: don't accept a clipboard hit that is ITSELF truncated
2394
+ // (some Copy buttons copy the masked display rather than the
2395
+ // real value — defensive against that surprising case).
2396
+ if (isTruncatedCapture(clipboardText, fromClipboard))
2397
+ return null;
2398
+ return fromClipboard;
2399
+ }
1954
2400
  }
1955
2401
  //# sourceMappingURL=agent.js.map