@trusty-squire/mcp 0.5.9 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +128 -68
  2. package/dist/api-client.d.ts +1 -0
  3. package/dist/api-client.d.ts.map +1 -1
  4. package/dist/api-client.js +27 -0
  5. package/dist/api-client.js.map +1 -1
  6. package/dist/bot/agent.d.ts +8 -0
  7. package/dist/bot/agent.d.ts.map +1 -1
  8. package/dist/bot/agent.js +492 -57
  9. package/dist/bot/agent.js.map +1 -1
  10. package/dist/bot/browser.d.ts +11 -1
  11. package/dist/bot/browser.d.ts.map +1 -1
  12. package/dist/bot/browser.js +365 -20
  13. package/dist/bot/browser.js.map +1 -1
  14. package/dist/bot/debug.d.ts.map +1 -1
  15. package/dist/bot/debug.js +19 -8
  16. package/dist/bot/debug.js.map +1 -1
  17. package/dist/bot/google-login.d.ts +4 -0
  18. package/dist/bot/google-login.d.ts.map +1 -1
  19. package/dist/bot/google-login.js +86 -7
  20. package/dist/bot/google-login.js.map +1 -1
  21. package/dist/bot/index.d.ts +3 -0
  22. package/dist/bot/index.d.ts.map +1 -1
  23. package/dist/bot/index.js +3 -0
  24. package/dist/bot/index.js.map +1 -1
  25. package/dist/bot/xvfb.d.ts +10 -0
  26. package/dist/bot/xvfb.d.ts.map +1 -0
  27. package/dist/bot/xvfb.js +75 -0
  28. package/dist/bot/xvfb.js.map +1 -0
  29. package/dist/install/agents.d.ts.map +1 -1
  30. package/dist/install/agents.js +37 -4
  31. package/dist/install/agents.js.map +1 -1
  32. package/dist/install/cli.d.ts +1 -0
  33. package/dist/install/cli.d.ts.map +1 -1
  34. package/dist/install/cli.js +148 -33
  35. package/dist/install/cli.js.map +1 -1
  36. package/dist/tools/provision-any.d.ts +23 -0
  37. package/dist/tools/provision-any.d.ts.map +1 -1
  38. package/dist/tools/provision-any.js +135 -9
  39. package/dist/tools/provision-any.js.map +1 -1
  40. package/package.json +1 -1
package/dist/bot/agent.js CHANGED
@@ -9,6 +9,7 @@
9
9
  // prompt rather than threading service-specific logic through the agent.
10
10
  import { rankAndCapInventory, scoreSignupButton } from "./browser.js";
11
11
  import { OAUTH_PROVIDERS, extractOAuthScopes, } from "./oauth-providers.js";
12
+ import { extractGoogleNumberMatch, scrapeGoogleScopePhrases } from "./google-login.js";
12
13
  import { loggedInProviders } from "./login-state.js";
13
14
  import { saveDebugSnapshot } from "./debug.js";
14
15
  import { captureOnboardingRound } from "./onboarding-capture.js";
@@ -86,14 +87,46 @@ export class LLMCallBudgetExceeded extends Error {
86
87
  }
87
88
  // Best-effort canonical signup URL for a service when the caller
88
89
  // didn't pass one. Most dev-SaaS targets (Resend, Postmark, Mailgun,
89
- // MailerSend, IPInfo, Stripe, PostHog) live at <name>.com/signup; the
90
- // few that don't (services with hyphens, non-.com TLDs, or non-canonical
91
- // paths) get auto-recovered by looksLikeSignupPage's fallback to the
92
- // Google-search path. Normalization: strip everything that isn't a
93
- // letter/digit, lowercase. Exported for unit testing.
90
+ // MailerSend, IPInfo, Stripe, PostHog) live at <name>.com/signup
91
+ // the .com default catches them. The exceptions services on .io,
92
+ // .ai, .dev live in KNOWN_DOMAINS so a Sentry signup doesn't waste
93
+ // the long Google-search fallback path looking for sentry.com (which
94
+ // redirects weirdly to sentry.io and breaks looksLikeSignupPage).
95
+ // Anything still wrong falls through to the search-and-find path.
96
+ // Exported for unit testing.
97
+ // Either a hostname (default path: /signup) or a full URL (when the
98
+ // service's signup lives on a subdomain or uses a non-standard path —
99
+ // e.g. Cloudflare's dash.cloudflare.com/sign-up).
100
+ const KNOWN_DOMAINS = {
101
+ sentry: "sentry.io",
102
+ openrouter: "openrouter.ai",
103
+ mistral: "mistral.ai",
104
+ anthropic: "anthropic.com",
105
+ mailtrap: "mailtrap.io",
106
+ axiom: "axiom.co",
107
+ loops: "loops.so",
108
+ e2b: "e2b.dev",
109
+ railway: "railway.app",
110
+ supabase: "supabase.com",
111
+ replicate: "replicate.com",
112
+ modal: "modal.com",
113
+ // PostHog uses posthog.com but the dashboard lives at us.posthog.com /
114
+ // eu.posthog.com — signup is on the marketing site, .com is right.
115
+ posthog: "posthog.com",
116
+ // Cloudflare's marketing site has no signup form — it CTAs into the
117
+ // dashboard. Skip the redirect chase and land on the real form.
118
+ cloudflare: "https://dash.cloudflare.com/sign-up",
119
+ // Vercel: marketing /signup redirects through OAuth provider tiles
120
+ // but the actual email form sits on the dashboard.
121
+ vercel: "https://vercel.com/signup",
122
+ };
94
123
  export function guessSignupUrl(service) {
95
124
  const slug = service.toLowerCase().replace(/[^a-z0-9]/g, "");
96
- return `https://${slug}.com/signup`;
125
+ const entry = KNOWN_DOMAINS[slug];
126
+ if (entry !== undefined && /^https?:\/\//i.test(entry))
127
+ return entry;
128
+ const host = entry ?? `${slug}.com`;
129
+ return `https://${host}/signup`;
97
130
  }
98
131
  // True when the URL is a Google search results page — used to gate
99
132
  // the prewarm + the post-load "did we land somewhere useful?" check.
@@ -282,6 +315,26 @@ export function formatInventory(inventory) {
282
315
  })
283
316
  .join("\n");
284
317
  }
318
+ // Recognize a full-page anti-bot interstitial that's still up. Returns
319
+ // the vendor name (for the status message) or null. Pattern matching
320
+ // on visible text rather than markers — most vendors use the same UX
321
+ // template, and matching the user-visible copy is robust to the actual
322
+ // implementation underneath. Exported for unit testing.
323
+ export function detectAntiBotBlock(html) {
324
+ const text = html.toLowerCase();
325
+ // Cloudflare "Just a moment..." / Turnstile pre-clear page. Strong
326
+ // signal: the literal text + the cf-* class names + the title.
327
+ if (/just a moment|cf-(challenge|browser-verification|turnstile)|performing security verification/i.test(text)) {
328
+ return "Cloudflare";
329
+ }
330
+ if (/sucuri|sucuri website firewall/i.test(text))
331
+ return "Sucuri";
332
+ if (/datadome|dd-captcha/i.test(text))
333
+ return "DataDome";
334
+ if (/incapsula|imperva/i.test(text))
335
+ return "Imperva";
336
+ return null;
337
+ }
285
338
  // True when the page has no fillable text input AND no button that
286
339
  // reads as an email-signup option — a genuinely OAuth/SSO-only
287
340
  // service with no form to automate (F3 Issue 4).
@@ -401,7 +454,18 @@ export function parsePostVerifyStep(raw, allowedSelectors) {
401
454
  case "select": {
402
455
  const selector = requireString(obj, "selector", "post-verify select step");
403
456
  checkSelector(selector, "post-verify select step");
404
- return { kind: "select", selector, reason };
457
+ // F11: `option_text` is optional — when present, the executor
458
+ // picks the option whose visible text contains it (case-
459
+ // insensitive substring). When absent, picks the first option.
460
+ const optionText = obj["option_text"];
461
+ return {
462
+ kind: "select",
463
+ selector,
464
+ reason,
465
+ ...(typeof optionText === "string" && optionText.length > 0
466
+ ? { option_text: optionText }
467
+ : {}),
468
+ };
405
469
  }
406
470
  case "check": {
407
471
  const selector = requireString(obj, "selector", "post-verify check step");
@@ -476,6 +540,25 @@ const EMBEDDED_KEY_PREFIXES = [
476
540
  //
477
541
  // Exported for unit testing — the regex tuning here is the load-
478
542
  // bearing logic and deserves direct coverage.
543
+ // True when `capturedKey` is followed by a truncation marker (`...`
544
+ // or the Unicode ellipsis `…`) in `sourceText`. That marker is the
545
+ // signal that the visible display masked the full secret — the
546
+ // regex captured everything up to but not including the marker, so
547
+ // the value LOOKS valid but is short. Used by F10's
548
+ // extract-via-Copy-button recovery path; without this check, the
549
+ // bot accepts the truncated value, stores it, and the user discovers
550
+ // the failure only when their next API call returns 401.
551
+ export function isTruncatedCapture(sourceText, capturedKey) {
552
+ const idx = sourceText.indexOf(capturedKey);
553
+ if (idx < 0)
554
+ return false;
555
+ const after = sourceText.slice(idx + capturedKey.length, idx + capturedKey.length + 10);
556
+ // Whitespace OK between key and ellipsis (some modals render as
557
+ // "sk-or-v1-xxxx ..."). Three OR MORE dots; two dots are ordinary
558
+ // punctuation and would false-positive on e.g. "key value.." in
559
+ // help text.
560
+ return /^\s*(?:\.{3,}|…)/.test(after);
561
+ }
479
562
  export function extractApiKeyFromText(text) {
480
563
  const prefixed = [
481
564
  /\bre_[a-zA-Z0-9_]{20,}\b/, // Resend (key body contains underscores)
@@ -491,6 +574,15 @@ export function extractApiKeyFromText(text) {
491
574
  /\bSG\.[a-zA-Z0-9_\-]{20,}\.[a-zA-Z0-9_\-]{20,}\b/, // SendGrid
492
575
  /\brnd_[a-zA-Z0-9]{20,}\b/, // Render
493
576
  /\bsntry[su]_[A-Za-z0-9_=\-]{20,}/, // Sentry org/user auth token
577
+ // OpenRouter, Anthropic, OpenAI — these are the dominant
578
+ // OAuth-completed-then-copy-needed services. Specific-prefix
579
+ // patterns first so a labeled-pattern fallback isn't load-
580
+ // bearing for them. Putting `sk-or-v1-` before `sk-` so it wins
581
+ // when both could match (cosmetic; both capture the same value).
582
+ /\bsk-or-v1-[a-zA-Z0-9_-]{20,}/, // OpenRouter (sk-or-v1-…)
583
+ /\bsk-ant-[a-zA-Z0-9_-]{20,}/, // Anthropic (sk-ant-…)
584
+ /\bsk-proj-[a-zA-Z0-9_-]{20,}/, // OpenAI project key
585
+ /\bsk-[a-zA-Z0-9]{40,}/, // OpenAI legacy (`sk-` + ~48 chars, no dashes)
494
586
  ];
495
587
  for (const pattern of prefixed) {
496
588
  const match = text.match(pattern);
@@ -723,6 +815,21 @@ export class SignupAgent {
723
815
  steps.push("OAuth-first: no usable provider affordance on the page — " +
724
816
  "falling back to form-fill");
725
817
  }
818
+ // Anti-bot interstitial that didn't clear (Cloudflare/Sucuri/
819
+ // DataDome "Just a moment..." pages that BrowserController has
820
+ // already attempted to wait + reload through). Detect by page
821
+ // text — the inventory will be tiny because the interstitial
822
+ // intentionally has 0 interactive elements. Surface as its own
823
+ // status, not as oauth_required: the latter implies "service is
824
+ // OAuth-only", which is wrong for Cloudflare et al.
825
+ if (inventory.length < 5) {
826
+ const block = detectAntiBotBlock(state.html);
827
+ if (block !== null) {
828
+ steps.push(`Anti-bot block: ${block} interstitial would not clear after retries — ` +
829
+ `the bot's fingerprint/IP did not pass ${block}'s server-side risk score`);
830
+ return { kind: "anti_bot_blocked", vendor: block };
831
+ }
832
+ }
726
833
  // OAuth-only: no fillable input AND no button that reads as an
727
834
  // email-signup option — nothing to automate (Issue 4).
728
835
  if (isOauthOnlyChooser(inventory)) {
@@ -874,6 +981,30 @@ export class SignupAgent {
874
981
  const { inventory, buttonsDropped } = rankAndCapInventory(raw, buttonCap, oauthProviders);
875
982
  steps.push(`Inventory: ${inventory.length} element(s)` +
876
983
  (buttonsDropped > 0 ? ` (${buttonsDropped} low-ranked button(s) dropped)` : ""));
984
+ // Diagnostic: a suspiciously tiny inventory usually means the page
985
+ // either didn't finish rendering OR an anti-bot interstitial (CF
986
+ // Turnstile, "Just a moment...", reCAPTCHA wall) is up. Surface the
987
+ // page state into the step trail so the failure is debuggable from
988
+ // outside the bot host.
989
+ if (inventory.length < 5 && raw.length < 5) {
990
+ try {
991
+ const state = await this.browser.getState();
992
+ const text = state.html
993
+ .replace(/<script[^>]*>[\s\S]*?<\/script>/gi, "")
994
+ .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, "")
995
+ .replace(/<[^>]+>/g, " ")
996
+ .replace(/\s+/g, " ")
997
+ .trim()
998
+ .slice(0, 240);
999
+ const antiBot = /just a moment|verify you are human|attention required|cloudflare|cf-challenge|cf-turnstile|recaptcha|are you a robot/i.test(state.html);
1000
+ steps.push(`Inventory diagnostic: title=${JSON.stringify(state.title.slice(0, 80))} ` +
1001
+ `url=${state.url.slice(0, 120)} text=${JSON.stringify(text)}` +
1002
+ (antiBot ? " ⚠ anti-bot interstitial detected" : ""));
1003
+ }
1004
+ catch {
1005
+ // best-effort diagnostic; never abort on its failure
1006
+ }
1007
+ }
877
1008
  return inventory;
878
1009
  }
879
1010
  // Which OAuth providers may this signup take? An explicit
@@ -1060,7 +1191,11 @@ export class SignupAgent {
1060
1191
  // call hung. Override the 10-minute default with
1061
1192
  // UNIVERSAL_BOT_RUN_TIMEOUT_MS.
1062
1193
  async signup(task) {
1063
- const steps = [];
1194
+ // task.stepsSink lets a caller (provision-any) share the live step
1195
+ // trail so check_provision_status can surface mid-run prompts
1196
+ // (Google number-match etc.). Without it, the run still works —
1197
+ // steps are just only visible in the final result.
1198
+ const steps = task.stepsSink ?? [];
1064
1199
  const rawTimeout = Number(process.env.UNIVERSAL_BOT_RUN_TIMEOUT_MS);
1065
1200
  const timeoutMs = Number.isFinite(rawTimeout) && rawTimeout > 0 ? rawTimeout : 600_000;
1066
1201
  let timer;
@@ -1093,6 +1228,13 @@ export class SignupAgent {
1093
1228
  const password = task.generatePassword();
1094
1229
  const displayName = "Trusty Squire Bot";
1095
1230
  const username = `tsbot${Date.now().toString().slice(-7)}`;
1231
+ // F13 diagnostic: which Chrome launch mode start() chose, and
1232
+ // whether egress went through the configured proxy. Lets us tell
1233
+ // from outside the box whether the bot actually got an X display
1234
+ // surface AND whether the residential-proxy path engaged.
1235
+ steps.push(`Browser: launched mode=${this.browser.launchMode} ` +
1236
+ `proxy=${this.browser.proxied ?? "direct"} ` +
1237
+ `channel=${this.browser.channel ?? "bundled-chromium"}`);
1096
1238
  try {
1097
1239
  // Step 1: Navigate to signup page
1098
1240
  //
@@ -1145,7 +1287,7 @@ export class SignupAgent {
1145
1287
  }
1146
1288
  if (signupUrl !== guessed || isGoogleSearchUrl(signupUrl)) {
1147
1289
  steps.push("Searching for signup page...");
1148
- const found = await this.findSignupLink();
1290
+ const found = await this.findSignupLink(task.service);
1149
1291
  if (found !== null) {
1150
1292
  // Now that we know the real signup origin, prewarm it before
1151
1293
  // the deep navigation. Same rationale as above.
@@ -1198,6 +1340,16 @@ export class SignupAgent {
1198
1340
  steps,
1199
1341
  ...this.resultTail(),
1200
1342
  };
1343
+ case "anti_bot_blocked":
1344
+ return {
1345
+ success: false,
1346
+ error: `anti_bot_blocked: ${task.service}'s ${outcome.vendor} anti-bot interstitial would ` +
1347
+ `not clear — the bot's IP/fingerprint did not pass ${outcome.vendor}'s server-side ` +
1348
+ `risk score. This is a soft block (no challenge to solve); the user should sign up ` +
1349
+ `manually.`,
1350
+ steps,
1351
+ ...this.resultTail(),
1352
+ };
1201
1353
  case "oauth":
1202
1354
  // T6/T7 — OAuth-first path. runOAuthFlow drives the consent
1203
1355
  // handshake and post-OAuth onboarding to its own terminal
@@ -1266,6 +1418,7 @@ export class SignupAgent {
1266
1418
  credentials: { email: task.email, password },
1267
1419
  maxRounds,
1268
1420
  steps,
1421
+ ...(task.scopeHint !== undefined ? { scopeHint: task.scopeHint } : {}),
1269
1422
  });
1270
1423
  }
1271
1424
  }
@@ -1341,6 +1494,12 @@ export class SignupAgent {
1341
1494
  // Bounded consent walk — handles account-chooser → consent as two
1342
1495
  // steps without ever spinning. Each iteration re-reads the page.
1343
1496
  const MAX_OAUTH_NAV = 6;
1497
+ // True once a clean scope-grant consent has already been
1498
+ // auto-approved on this flow. Subsequent unreadable-scope consent
1499
+ // pages (post-grant confirmation, account chooser routed through
1500
+ // /consent, etc.) get the soft-advance path instead of an abort —
1501
+ // because the scope-grant decision was already made and validated.
1502
+ let consentAlreadyApproved = false;
1344
1503
  for (let i = 0; i < MAX_OAUTH_NAV; i++) {
1345
1504
  if (this.browser.oauthPageClosed()) {
1346
1505
  steps.push(`OAuth: the ${provider.label} window closed — handshake returned to the service`);
@@ -1357,10 +1516,31 @@ export class SignupAgent {
1357
1516
  continue;
1358
1517
  }
1359
1518
  const authState = provider.classifyAuthState(url, body);
1360
- steps.push(`OAuth: ${provider.label} auth state = ${authState}`);
1519
+ steps.push(`OAuth: ${provider.label} auth state = ${authState} (url=${url.slice(0, 120)})`);
1361
1520
  if (authState === "not_provider")
1362
1521
  break; // flow left the provider — back on the service
1363
1522
  if (authState === "challenge") {
1523
+ // Google's number-match challenge ("Tap N on your phone") is
1524
+ // resolvable by the user without re-running the login flow —
1525
+ // surface the number and wait for them to complete it.
1526
+ if (provider.id === "google") {
1527
+ const matchNum = extractGoogleNumberMatch(body);
1528
+ if (matchNum !== null) {
1529
+ steps.push(`Google: match the number ${matchNum} on your phone — ` +
1530
+ `open the Google app on your phone and tap ${matchNum}`);
1531
+ const cleared = await this.waitForGoogleChallenge(provider, steps);
1532
+ if (!cleared) {
1533
+ return this.oauthAbort("needs_login", `Google number-match challenge timed out after 2 minutes. ` +
1534
+ `Re-run \`${loginCmd}\`, complete the challenge in the window, then retry.`, steps);
1535
+ }
1536
+ steps.push("Google: challenge cleared — continuing OAuth");
1537
+ // Re-classify on the next iteration without burning the
1538
+ // OAuth-navigation budget (which assumes continuous
1539
+ // browser progress, not a 2-minute human pause).
1540
+ i--;
1541
+ continue;
1542
+ }
1543
+ }
1364
1544
  return this.oauthAbort("needs_login", `${provider.label} interrupted the sign-in with a security challenge ("verify it's you"). ` +
1365
1545
  `Re-run \`${loginCmd}\`, clear the challenge in the window, then retry.`, steps);
1366
1546
  }
@@ -1379,15 +1559,58 @@ export class SignupAgent {
1379
1559
  }
1380
1560
  // Genuine consent screen / account chooser — scope-gate it (T7).
1381
1561
  const scopes = extractOAuthScopes(url);
1562
+ // Always surface the parsed scopes so the user / debug logs see
1563
+ // exactly what tripped the gate (or what was allowed through).
1564
+ steps.push(`OAuth: parsed consent scopes = [${scopes === null ? "<unreadable>" : scopes.join(", ")}]`);
1382
1565
  if (scopes === null) {
1566
+ // Defense-in-depth: scrape the page DOM for known scope-grant
1567
+ // verb phrases ("See your", "Manage your contacts", "Send email
1568
+ // on your behalf", etc.). A real scope-grant consent always
1569
+ // lists each scope visually with one of these patterns. An
1570
+ // intermediate page (account chooser, post-grant confirmation,
1571
+ // safety review) does not.
1572
+ const dangerPhrases = provider.id === "google" ? scrapeGoogleScopePhrases(body) : [];
1573
+ if (dangerPhrases.length > 0) {
1574
+ return this.oauthAbort("oauth_consent_needs_review", `${provider.label} consent page (URL unparseable) lists scope-grant phrases: ` +
1575
+ `[${dangerPhrases.join(" | ")}]. Pausing for manual review.`, steps);
1576
+ }
1577
+ if (consentAlreadyApproved) {
1578
+ // We already validated and auto-approved a scope-grant
1579
+ // consent earlier in this flow. This second consent-classed
1580
+ // page has no parseable scopes AND no visible scope-grant
1581
+ // verb phrases — it's a post-grant confirmation / safety
1582
+ // review / account chooser routed through /consent. Soft
1583
+ // advance: try the approve control, and if it isn't there
1584
+ // the loop will re-classify on the next iteration.
1585
+ steps.push("OAuth: post-grant consent page (no parseable scopes, no scope phrases) — advancing");
1586
+ const advanced = await this.browser.advanceOAuthConsent(provider.id);
1587
+ if (!advanced) {
1588
+ steps.push("OAuth: no approve control on the post-grant page — waiting for natural navigation");
1589
+ }
1590
+ await this.browser.wait(3);
1591
+ continue;
1592
+ }
1383
1593
  return this.oauthAbort("oauth_consent_needs_review", `reached a ${provider.label} consent screen but could not read its requested scopes ` +
1384
1594
  `from the URL — pausing for manual review rather than approving blind.`, steps);
1385
1595
  }
1386
- if (!provider.scopesAreBasic(scopes)) {
1387
- return this.oauthAbort("oauth_consent_needs_review", `the consent screen requests scopes beyond basic identity (${scopes.join(", ")}). ` +
1388
- `Approve it manually the bot only auto-approves basic-identity scopes.`, steps);
1596
+ const extraAllowed = new Set(task.allowExtraOAuthScopes ?? []);
1597
+ const nonBasic = scopes.filter((s) => !provider.scopesAreBasic([s]));
1598
+ const unauthorized = nonBasic.filter((s) => !extraAllowed.has(s));
1599
+ if (unauthorized.length > 0) {
1600
+ // Encode requested scopes into the error so the MCP tool layer
1601
+ // can extract them and show the user what to approve.
1602
+ return this.oauthAbort("oauth_consent_needs_review", `${provider.label} consent requests non-basic scopes: [${unauthorized.join(", ")}]. ` +
1603
+ `All requested scopes: [${scopes.join(", ")}]. ` +
1604
+ `To proceed, re-run provision_any_service with allow_extra_oauth_scopes set to ` +
1605
+ `the scopes the user has explicitly approved.`, steps);
1606
+ }
1607
+ if (nonBasic.length > 0) {
1608
+ steps.push(`OAuth: user pre-approved extra scopes [${nonBasic.join(", ")}] — auto-approving`);
1389
1609
  }
1390
- steps.push(`OAuth: consent scopes all basic (${scopes.join(", ")}) — auto-approving`);
1610
+ else {
1611
+ steps.push(`OAuth: consent scopes all basic (${scopes.join(", ")}) — auto-approving`);
1612
+ }
1613
+ consentAlreadyApproved = true;
1391
1614
  const advanced = await this.browser.advanceOAuthConsent(provider.id);
1392
1615
  if (!advanced) {
1393
1616
  return this.oauthAbort("oauth_consent_needs_review", `reached a ${provider.label} consent screen but found no approve control to click — ` +
@@ -1407,6 +1630,7 @@ export class SignupAgent {
1407
1630
  service: task.service,
1408
1631
  maxRounds: task.postVerifyMaxRounds ?? 12,
1409
1632
  steps,
1633
+ ...(task.scopeHint !== undefined ? { scopeHint: task.scopeHint } : {}),
1410
1634
  });
1411
1635
  }
1412
1636
  if (credentials.api_key !== undefined) {
@@ -1449,6 +1673,32 @@ export class SignupAgent {
1449
1673
  ...this.resultTail(),
1450
1674
  };
1451
1675
  }
1676
+ // Poll the provider page until the challenge clears (the user
1677
+ // completed it on their phone) or 2 minutes elapse. Returns true on
1678
+ // resolution, false on timeout. The 2-minute cap is enough time to
1679
+ // unlock a phone, open the Google app, and tap a number; longer
1680
+ // would mask a stuck/abandoned flow.
1681
+ async waitForGoogleChallenge(provider, steps) {
1682
+ const deadline = Date.now() + 120_000;
1683
+ while (Date.now() < deadline) {
1684
+ await this.browser.wait(3);
1685
+ if (this.browser.oauthPageClosed())
1686
+ return true;
1687
+ const url = this.browser.currentUrl();
1688
+ let body;
1689
+ try {
1690
+ body = (await this.browser.extractText()).slice(0, 4000);
1691
+ }
1692
+ catch {
1693
+ continue;
1694
+ }
1695
+ const state = provider.classifyAuthState(url, body);
1696
+ if (state !== "challenge")
1697
+ return true;
1698
+ }
1699
+ steps.push("Google: challenge wait timed out after 2 minutes");
1700
+ return false;
1701
+ }
1452
1702
  // Backstop for the critical guarantee (D4): true when the active
1453
1703
  // provider page carries a credential-entry field — an expired/missing
1454
1704
  // session dropped the bot on a login form. A genuine consent screen
@@ -1622,6 +1872,7 @@ ${formatInventory(input.inventory)}`,
1622
1872
  oauth,
1623
1873
  inventory,
1624
1874
  ...(hint !== undefined ? { hint } : {}),
1875
+ ...(args.scopeHint !== undefined ? { scopeHint: args.scopeHint } : {}),
1625
1876
  });
1626
1877
  }
1627
1878
  catch (err) {
@@ -1683,7 +1934,7 @@ ${formatInventory(input.inventory)}`,
1683
1934
  await this.browser.type(nextStep.selector, nextStep.value);
1684
1935
  }
1685
1936
  else if (nextStep.kind === "select") {
1686
- await this.browser.selectOption(nextStep.selector);
1937
+ await this.browser.selectOption(nextStep.selector, nextStep.option_text);
1687
1938
  await this.browser.wait(1);
1688
1939
  }
1689
1940
  else if (nextStep.kind === "check") {
@@ -1789,7 +2040,7 @@ Schema:
1789
2040
  {"kind":"login","reason":"the page is a login form / we were signed out"}
1790
2041
  {"kind":"click","selector":"<a selector= copied verbatim from the inventory>","reason":"e.g. open the API keys page"}
1791
2042
  {"kind":"fill","selector":"<a selector= from the inventory>","value":"value","reason":"unusual — only for a required project-name etc."}
1792
- {"kind":"select","selector":"<a selector= from the inventory, tag=select>","reason":"pick an option for a dropdown — region, role, country"}
2043
+ {"kind":"select","selector":"<a selector= from the inventory>","option_text":"<visible label of the option to pick — optional>","reason":"pick an option for a dropdown — region, role, country, or a permission/scope on a token form"}
1793
2044
  {"kind":"check","selector":"<a selector= from the inventory, type=checkbox>","reason":"tick a terms-of-service / agreement checkbox"}
1794
2045
  {"kind":"navigate","url":"https://...","reason":"e.g. go directly to /settings/api-keys"}
1795
2046
  {"kind":"wait","seconds":N,"reason":"page is still loading"}
@@ -1816,11 +2067,16 @@ Strategy:
1816
2067
  ${loginGuidance}
1817
2068
  - If we're on a "verify your phone" / "verify email" wall, return done (we can't solve those).
1818
2069
  - If the page wants the user to create a project/key before showing it, fill the minimum and click create.
1819
- - For a required dropdown (an inventory entry with tag=select region, role, country), use {"kind":"select"} — a "click" cannot pick a <select> option, so do not click it repeatedly.
2070
+ - For ANY dropdown native (tag=select) OR a custom combobox (role=combobox / aria-haspopup=listbox, common on modern React apps like Sentry / Stripe / Vercel) use {"kind":"select"}. "click" on a combobox trigger opens it but does not pick an option; do not click it repeatedly.
2071
+ - When you need a SPECIFIC option from the dropdown — e.g. "Project: Read" on Sentry's permissions picker, or a specific region — include "option_text" with the visible label. The executor matches it case-insensitively as a substring. Omit "option_text" when any option is fine (a placeholder country picker).
1820
2072
  - A post-OAuth onboarding form (organization name, region, terms) is normal — fill/select/check its fields and click Continue to advance toward the dashboard; do not return "done" just because it is a form.
1821
2073
  - If a "Create"/"Continue" button is disabled, look for a required terms-of-service / agreement checkbox and tick it with {"kind":"check"} — use the checkbox's own inventory selector (an entry with type=checkbox), NOT the adjacent "Terms of Service" link. A "click" on a styled checkbox often fails to flip it; use "check".
1822
2074
  - Prefer the simplest credential path: a project- or organization-level API token / auth token usually needs only a name. A "personal token" with a grid of per-scope permission dropdowns is more work — choose it only if no simpler token type is offered.
1823
- - On a token-creation form whose permission/scope dropdowns default to "No Access" / "None", you MUST use a select step to set a non-default permission on at least one dropdown BEFORE clicking the create button — creating with all-default permissions does nothing. Do not click the create button repeatedly; set a permission first.
2075
+ - On a token-creation form whose permission/scope dropdowns default to "No Access" / "None", you MUST set permissions BEFORE clicking the create button.
2076
+ - **PERMISSION SCOPE — default is MAXIMUM.** ${input.scopeHint !== undefined
2077
+ ? `The user provided a scope hint: "${input.scopeHint}". Pick option_text values aligned with this on each permission dropdown.`
2078
+ : `No scope hint was provided. Default to the HIGHEST available permission level on EVERY permission dropdown (Admin > Write > Read > anything lower). Most agent use-cases need write access; a read-only token will fail downstream when the agent tries to push data. Set "Admin" if offered; "Write" otherwise. Explicitly use option_text to specify — do NOT rely on first-option behavior, which often picks Read.`}
2079
+ - On a form with MULTIPLE permission rows (Sentry: Project, Team, Member, Issue, Event, Release, Organization), set EACH ONE before clicking Create. One step per turn — return to this turn-by-turn until every row is set.
1824
2080
  - Round ${input.round + 1} of ${input.maxRounds}. Prefer "done" if you're not making progress.`;
1825
2081
  const userBlocks = [
1826
2082
  { kind: "image", media_type: "image/png", data_base64: input.state.screenshot },
@@ -1865,63 +2121,137 @@ ${formatInventory(input.inventory)}${input.hint !== undefined ? `\n\nIMPORTANT
1865
2121
  },
1866
2122
  });
1867
2123
  }
1868
- async findSignupLink() {
2124
+ // Pick a signup link out of the current page's HTML. Used as the
2125
+ // fallback after a Google-search navigation.
2126
+ //
2127
+ // The naive version (regex /href="[^"]*signup[^"]*"/) failed badly
2128
+ // on Google search results: it matched URLs like
2129
+ // accounts.google.com/SignOutOptions?continue=...search?q=Sentry%20signup
2130
+ // — Google's own nav, whose ?continue= query param leaks the
2131
+ // original search query (with "signup" in it) and gets matched.
2132
+ // The bot then navigated to a Google sign-out page and gave up.
2133
+ //
2134
+ // This version:
2135
+ // - parses each href as a real URL
2136
+ // - rejects google.com / accounts.google.com / support.google.com
2137
+ // and other Google nav infra (we're ON a google search page, so
2138
+ // any google.com href is search-nav, not the service)
2139
+ // - matches against host+path only — never query params
2140
+ // - scores candidates: hosts that contain the service name win
2141
+ // over generic matches. Means "sentry.io/signup" beats
2142
+ // "github.com/sentry/sentry/blob/...signup..." (the github
2143
+ // source-code result that mentions signup in a path).
2144
+ // - returns the highest-scoring candidate, or null.
2145
+ async findSignupLink(serviceName) {
1869
2146
  const html = (await this.browser.getState()).html;
1870
- const re = /href="([^"]*(?:signup|register|sign-up|create-account|join)[^"]*)"/gi;
2147
+ const serviceSlug = serviceName?.toLowerCase().replace(/[^a-z0-9]/g, "") ?? "";
2148
+ const candidates = [];
2149
+ const hrefRe = /href="([^"]+)"/g;
1871
2150
  let m;
1872
- while ((m = re.exec(html)) !== null) {
1873
- const href = m[1];
1874
- if (href === undefined)
2151
+ while ((m = hrefRe.exec(html)) !== null) {
2152
+ const raw = m[1];
2153
+ if (raw === undefined)
2154
+ continue;
2155
+ let url;
2156
+ try {
2157
+ url = new URL(raw.startsWith("//") ? `https:${raw}` : raw);
2158
+ }
2159
+ catch {
2160
+ continue;
2161
+ }
2162
+ if (url.protocol !== "https:" && url.protocol !== "http:")
2163
+ continue;
2164
+ // Reject Google's own navigation infrastructure — that's what
2165
+ // tripped the naive regex on the Sentry run.
2166
+ if (/(?:^|\.)google\.com$/.test(url.hostname))
2167
+ continue;
2168
+ if (/(?:^|\.)googleusercontent\.com$/.test(url.hostname))
1875
2169
  continue;
1876
- if (href.includes("signin") || href.includes("login"))
2170
+ if (/(?:^|\.)gstatic\.com$/.test(url.hostname))
1877
2171
  continue;
1878
- if (href.startsWith("http"))
1879
- return href;
1880
- if (href.startsWith("//"))
1881
- return `https:${href}`;
2172
+ // Match against host+path ONLY. Query params can carry the
2173
+ // original search query text and would re-introduce the
2174
+ // junk-link bug.
2175
+ const hostPath = (url.hostname + url.pathname).toLowerCase();
2176
+ if (!/(?:^|\.|\/)(?:signup|register|sign-up|create-account|join)\b/.test(hostPath)) {
2177
+ continue;
2178
+ }
2179
+ // Negative: signin/login/logout in host+path.
2180
+ if (/(?:^|\/)(?:signin|login|logout|sign-in|log-in)\b/.test(hostPath))
2181
+ continue;
2182
+ // Score: a host containing the service slug is a strong match.
2183
+ // Without a slug to compare against, every match scores 1.
2184
+ const hostLower = url.hostname.toLowerCase();
2185
+ const score = serviceSlug.length > 0 && hostLower.includes(serviceSlug) ? 10 : 1;
2186
+ candidates.push({ url: url.toString(), score });
1882
2187
  }
1883
- return null;
2188
+ candidates.sort((a, b) => b.score - a.score);
2189
+ return candidates[0]?.url ?? null;
1884
2190
  }
1885
2191
  // Heuristic: does the currently-loaded page LOOK like a real signup
1886
2192
  // page? Used to decide whether the guessed canonical URL
1887
2193
  // (<service>.com/signup) worked or we need to fall back to a Google
1888
- // search. "Looks like a signup page" = the inventory has at least
1889
- // one text/email input OR a Google/GitHub OAuth button, AND the
1890
- // page title / heading don't shout 404. Deliberately permissive —
1891
- // a marketing page with an embedded email-capture form would pass,
1892
- // which is fine because the form-filler will then realize there's
1893
- // no submit-button-that-takes-an-email-to-a-real-signup and replan.
2194
+ // search.
2195
+ //
2196
+ // Three signals, in order:
2197
+ // 1. URL-path shortcut: if the page's pathname matches
2198
+ // /signup|register|sign-up|create-account|join/, trust it
2199
+ // we navigated to a signup-shaped URL and the redirect chain
2200
+ // kept us on one. Catches Sentry-style cross-TLD redirects
2201
+ // (sentry.com → sentry.io/signup) where the inventory looks
2202
+ // different from a typical signup page but the URL is correct.
2203
+ // 2. 404 guard: drop pages whose title shouts 404 / not found.
2204
+ // 3. Content check: inventory has at least one text/email input
2205
+ // OR a button whose text mentions Google/GitHub (broad on
2206
+ // purpose — a "Continue with Google" / "Login with Google" /
2207
+ // icon-only Google button all count when the bot has a
2208
+ // provider session).
1894
2209
  async looksLikeSignupPage() {
1895
2210
  const state = await this.browser.getState();
1896
- // Cheap 404-ish guard before we go to the trouble of building an
1897
- // inventory. Catches the most common "wrong guess" outcome.
2211
+ // 1. URL-path shortcut. If we navigated to a signup-shaped path
2212
+ // and the browser kept us on one, that's a strong signal —
2213
+ // redirect chains often preserve the path across TLD changes.
2214
+ try {
2215
+ const path = new URL(state.url).pathname.toLowerCase();
2216
+ if (/(?:^|\/)(?:signup|register|sign-up|create-account|join)\b/.test(path)) {
2217
+ return true;
2218
+ }
2219
+ }
2220
+ catch {
2221
+ // Malformed state.url — skip the shortcut, fall through.
2222
+ }
2223
+ // 2. 404 guard.
1898
2224
  const titleLower = (state.title ?? "").toLowerCase();
1899
2225
  if (titleLower.includes("404") ||
1900
2226
  titleLower.includes("not found") ||
1901
2227
  titleLower.includes("page not found")) {
1902
2228
  return false;
1903
2229
  }
2230
+ // 3. Inventory check.
1904
2231
  let inventory;
1905
2232
  try {
1906
2233
  inventory = await this.browser.extractInteractiveElements();
1907
2234
  }
1908
2235
  catch {
1909
- // Inventory failure means we can't tell — assume it worked and
1910
- // let the downstream planExecuteWithRetry give the verdict.
1911
2236
  return true;
1912
2237
  }
1913
2238
  const hasInput = inventory.some((e) => e.tag === "input" &&
1914
2239
  (e.type === "email" || e.type === "text" || e.type === null || e.type === undefined));
1915
2240
  if (hasInput)
1916
2241
  return true;
1917
- // Sometimes the form is gated behind a "Sign in with Google /
1918
- // GitHub" button (Resend, Vercel, etc.). Those count as a usable
1919
- // signup page when the bot has a provider session.
1920
- const hasOAuthButton = inventory.some((e) => {
2242
+ // Broad OAuth-button detection: any element whose visible text or
2243
+ // aria-label mentions "google" or "github" as a word. Covers
2244
+ // "Continue with Google", "Login with Google", "Use Google",
2245
+ // "Sign in with GitHub", and icon-only buttons with
2246
+ // aria-label="Google" — all common on OAuth-only signup pages.
2247
+ // False positives (e.g. a "Google Tag Manager" footer link)
2248
+ // are unlikely on a real signup view and harmless: the worst
2249
+ // case is we trust this page and the downstream planner gives
2250
+ // up cleanly later.
2251
+ return inventory.some((e) => {
1921
2252
  const text = `${e.visibleText ?? ""} ${e.ariaLabel ?? ""}`.toLowerCase();
1922
- return /(?:^|\s)(?:sign(?:\s|-)?in|sign(?:\s|-)?up|continue)\s+with\s+(?:google|github)/i.test(text);
2253
+ return /\b(?:google|github)\b/.test(text);
1923
2254
  });
1924
- return hasOAuthButton;
1925
2255
  }
1926
2256
  async extractCredentials() {
1927
2257
  // IMPORTANT: pull credentials from the *visible* page, not the raw
@@ -1929,27 +2259,132 @@ ${formatInventory(input.inventory)}${input.hint !== undefined ? `\n\nIMPORTANT
1929
2259
  // Turnstile, hCaptcha) whose challenge tokens look like API keys to
1930
2260
  // a naive regex.
1931
2261
  //
1932
- // Two visible surfaces, in priority order:
1933
- // 1. Discrete credential candidates — copy-input values and each
1934
- // element's own direct text. A key is read whole here, un-glued
1935
- // from adjacent buttons; captcha tokens (hidden inputs) are
1936
- // excluded by the browser.
1937
- // 2. The whole visible body text fallback for a key shown as
1938
- // plain prose, accepting that body concatenation can glue
1939
- // neighbours (the extractApiKeyFromText guards catch the worst).
2262
+ // Three-pass extraction, in priority order:
2263
+ // 1. Visible candidates — input values + each element's direct
2264
+ // text. A key read whole, un-glued from adjacent buttons.
2265
+ // 2. F10: when pass 1 hits a TRUNCATED display (modal shows
2266
+ // "sk-or-v1-1687…" with the full secret only on the
2267
+ // clipboard via the Copy button), click the Copy button and
2268
+ // re-extract from `navigator.clipboard.readText()`. This is
2269
+ // the OpenRouter / Anthropic / OpenAI / Stripe modal
2270
+ // pattern — pass 1 would otherwise persist a truncated stub.
2271
+ // 3. F10 fallback: walk hidden inputs. Some modals stash the
2272
+ // full secret in a `display:none` <input> the masked display
2273
+ // reads from.
1940
2274
  const credentials = {};
1941
2275
  let apiKey = null;
2276
+ let truncatedHit = null;
1942
2277
  for (const candidate of await this.browser.extractCredentialCandidates()) {
1943
- apiKey = extractApiKeyFromText(candidate);
1944
- if (apiKey !== null)
1945
- break;
2278
+ const hit = extractApiKeyFromText(candidate);
2279
+ if (hit === null)
2280
+ continue;
2281
+ if (isTruncatedCapture(candidate, hit)) {
2282
+ // Remember the truncated value but keep scanning — a later
2283
+ // candidate may produce a full one (e.g. a hidden input on
2284
+ // the same page).
2285
+ truncatedHit = truncatedHit ?? hit;
2286
+ continue;
2287
+ }
2288
+ apiKey = hit;
2289
+ break;
2290
+ }
2291
+ if (apiKey === null) {
2292
+ const bodyText = await this.browser.extractText();
2293
+ const hit = extractApiKeyFromText(bodyText);
2294
+ if (hit !== null) {
2295
+ if (isTruncatedCapture(bodyText, hit)) {
2296
+ truncatedHit = truncatedHit ?? hit;
2297
+ }
2298
+ else {
2299
+ apiKey = hit;
2300
+ }
2301
+ }
2302
+ }
2303
+ // Pass 2 — Copy-button + clipboard recovery.
2304
+ if (apiKey === null && truncatedHit !== null) {
2305
+ apiKey = await this.tryCopyButtonExtraction();
1946
2306
  }
2307
+ // Pass 3 — hidden-input scan. Cheap to always try as a last
2308
+ // resort, whether or not we saw a truncated hit; a service that
2309
+ // stashes the key in a hidden input may not display it at all.
1947
2310
  if (apiKey === null) {
1948
- apiKey = extractApiKeyFromText(await this.browser.extractText());
2311
+ try {
2312
+ for (const value of await this.browser.extractAllInputValues()) {
2313
+ const hit = extractApiKeyFromText(value);
2314
+ if (hit !== null && !isTruncatedCapture(value, hit)) {
2315
+ apiKey = hit;
2316
+ break;
2317
+ }
2318
+ }
2319
+ }
2320
+ catch {
2321
+ // Hidden-input scan failures are non-fatal; we just stay
2322
+ // with whatever we had (or null).
2323
+ }
2324
+ }
2325
+ // Last resort: if every path returned a truncated value, persist
2326
+ // it with a `_truncated` suffix so the host agent can surface the
2327
+ // partial result to the user (better than reporting "no key
2328
+ // found" when the bot demonstrably reached the modal).
2329
+ if (apiKey === null && truncatedHit !== null) {
2330
+ credentials.api_key_truncated = truncatedHit;
2331
+ return credentials;
1949
2332
  }
1950
2333
  if (apiKey !== null)
1951
2334
  credentials.api_key = apiKey;
1952
2335
  return credentials;
1953
2336
  }
2337
+ // F10: click the page's Copy button (whose label typically reads
2338
+ // "Copy", "Copy key", "Copy secret") and extract the secret from
2339
+ // `navigator.clipboard.readText()`. Returns null on any failure —
2340
+ // the caller has its own fallback paths.
2341
+ async tryCopyButtonExtraction() {
2342
+ let copyBtnSelector = null;
2343
+ try {
2344
+ const inventory = await this.browser.extractInteractiveElements();
2345
+ const copyBtn = inventory.find((e) => {
2346
+ const text = `${e.visibleText ?? ""} ${e.ariaLabel ?? ""}`.trim();
2347
+ // "Copy" alone, "Copy key", "Copy API key", "Copy secret",
2348
+ // "Copy token". Anchored so a "Don't copy this" tooltip
2349
+ // doesn't match. Case-insensitive.
2350
+ return /^\s*copy(?:\b|\s|$)|copy\s+(?:api\s*key|secret|token|key)\b/i.test(text);
2351
+ });
2352
+ if (copyBtn === undefined)
2353
+ return null;
2354
+ copyBtnSelector = copyBtn.selector;
2355
+ }
2356
+ catch {
2357
+ return null;
2358
+ }
2359
+ try {
2360
+ await this.browser.click(copyBtnSelector);
2361
+ // Brief wait — the Copy button's onclick is usually a sync
2362
+ // navigator.clipboard.writeText, but some modals run an async
2363
+ // serialize step (e.g. format-the-key into "Bearer <key>"
2364
+ // first). 1s covers both with no real cost.
2365
+ await this.browser.wait(1);
2366
+ }
2367
+ catch {
2368
+ return null;
2369
+ }
2370
+ let clipboardText;
2371
+ try {
2372
+ clipboardText = await this.browser.readClipboard();
2373
+ }
2374
+ catch {
2375
+ return null;
2376
+ }
2377
+ if (clipboardText.trim().length === 0)
2378
+ return null;
2379
+ const fromClipboard = extractApiKeyFromText(clipboardText);
2380
+ if (fromClipboard === null)
2381
+ return null;
2382
+ // Sanity: don't accept a clipboard hit that is ITSELF truncated
2383
+ // (some Copy buttons copy the masked display rather than the
2384
+ // real value — defensive against that surprising case).
2385
+ if (isTruncatedCapture(clipboardText, fromClipboard))
2386
+ return null;
2387
+ return fromClipboard;
2388
+ }
1954
2389
  }
1955
2390
  //# sourceMappingURL=agent.js.map