@trusty-squire/mcp 0.1.18 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/dist/bin.js +2 -2
  2. package/dist/bin.js.map +1 -1
  3. package/dist/bot/agent.d.ts +15 -1
  4. package/dist/bot/agent.d.ts.map +1 -1
  5. package/dist/bot/agent.js +531 -52
  6. package/dist/bot/agent.js.map +1 -1
  7. package/dist/bot/browser.d.ts +15 -3
  8. package/dist/bot/browser.d.ts.map +1 -1
  9. package/dist/bot/browser.js +281 -56
  10. package/dist/bot/browser.js.map +1 -1
  11. package/dist/bot/google-login.d.ts +18 -0
  12. package/dist/bot/google-login.d.ts.map +1 -0
  13. package/dist/bot/google-login.js +379 -0
  14. package/dist/bot/google-login.js.map +1 -0
  15. package/dist/bot/index.d.ts +5 -0
  16. package/dist/bot/index.d.ts.map +1 -1
  17. package/dist/bot/index.js +14 -0
  18. package/dist/bot/index.js.map +1 -1
  19. package/dist/bot/llm-client.d.ts.map +1 -1
  20. package/dist/bot/llm-client.js +19 -12
  21. package/dist/bot/llm-client.js.map +1 -1
  22. package/dist/bot/oauth-lock.d.ts +2 -0
  23. package/dist/bot/oauth-lock.d.ts.map +1 -0
  24. package/dist/bot/oauth-lock.js +28 -0
  25. package/dist/bot/oauth-lock.js.map +1 -0
  26. package/dist/bot/oauth-providers.d.ts +16 -0
  27. package/dist/bot/oauth-providers.d.ts.map +1 -0
  28. package/dist/bot/oauth-providers.js +100 -0
  29. package/dist/bot/oauth-providers.js.map +1 -0
  30. package/dist/bot/onboarding-capture.d.ts +17 -0
  31. package/dist/bot/onboarding-capture.d.ts.map +1 -0
  32. package/dist/bot/onboarding-capture.js +52 -0
  33. package/dist/bot/onboarding-capture.js.map +1 -0
  34. package/dist/bot/profile.d.ts +2 -0
  35. package/dist/bot/profile.d.ts.map +1 -0
  36. package/dist/bot/profile.js +11 -0
  37. package/dist/bot/profile.js.map +1 -0
  38. package/dist/install/cli.d.ts +4 -1
  39. package/dist/install/cli.d.ts.map +1 -1
  40. package/dist/install/cli.js +41 -1
  41. package/dist/install/cli.js.map +1 -1
  42. package/dist/tools/provision-any.d.ts +15 -0
  43. package/dist/tools/provision-any.d.ts.map +1 -1
  44. package/dist/tools/provision-any.js +66 -2
  45. package/dist/tools/provision-any.js.map +1 -1
  46. package/package.json +3 -1
package/dist/bot/agent.js CHANGED
@@ -8,7 +8,9 @@
8
8
  // executor; the prompt is the contract. If a service breaks we tweak the
9
9
  // prompt rather than threading service-specific logic through the agent.
10
10
  import { rankAndCapInventory, scoreSignupButton } from "./browser.js";
11
+ import { OAUTH_PROVIDERS, extractOAuthScopes, } from "./oauth-providers.js";
11
12
  import { saveDebugSnapshot } from "./debug.js";
13
+ import { captureOnboardingRound } from "./onboarding-capture.js";
12
14
  import { wasRecentlyPrewarmed, recordPrewarmSuccess } from "./prewarm-cache.js";
13
15
  import { pickLLMPair, } from "./llm-client.js";
14
16
  // Hard cap on LLM calls per signup. A signup that runs away to 20+ calls
@@ -47,6 +49,24 @@ const VERIFICATION_EXPECTED_PATTERNS = [
47
49
  // their email. Legitimate verification mail almost always lands inside a
48
50
  // minute; this catches the fast case without 300s of dead air.
49
51
  const VERIFICATION_PROBE_SECONDS = 45;
52
+ // T7: page text that means the post-OAuth API key sits behind a
53
+ // billing / payment-method wall. When the OAuth onboarding loop ends
54
+ // without a key and the page reads like this, the run ends
55
+ // `onboarding_blocked` rather than grep-looping a wall it cannot
56
+ // satisfy (the S3-class trap named in the plan's failure modes).
57
+ const ONBOARDING_PAYWALL_PATTERNS = [
58
+ "add a payment method",
59
+ "add a credit card",
60
+ "add credit card",
61
+ "payment method required",
62
+ "a payment method is required",
63
+ "credit card required",
64
+ "enter your card",
65
+ "enter your payment",
66
+ "enter payment details",
67
+ "upgrade your plan to",
68
+ "start your paid plan",
69
+ ];
50
70
  // S3: does this post-submit page text indicate the service genuinely
51
71
  // expects the user to confirm via email? Drives whether the bot polls the
52
72
  // full verification timeout or runs only a short probe. Exported so the
@@ -253,12 +273,72 @@ export function isOauthOnlyChooser(inventory) {
253
273
  const hasEmailOption = inventory.some((e) => scoreSignupButton(`${e.visibleText ?? ""} ${e.ariaLabel ?? ""} ${e.labelText ?? ""}`) > 0);
254
274
  return !hasEmailOption;
255
275
  }
256
- export function parsePostVerifyStep(raw) {
276
+ // Find a "Sign in with <provider>" affordance in the page inventory —
277
+ // the entry point for the OAuth-first path (T6/T13). Three signals, in
278
+ // confidence order — derived from a live sweep where the text-only
279
+ // heuristic missed real buttons:
280
+ // 1. href — an <a> whose link routes through the provider's OAuth
281
+ // endpoint (/identity/login/google, /auth/github/callback, …).
282
+ // Unambiguous: a marketing link to policies.google.com does not.
283
+ // 2. iconLabel — an icon-only button with no text at all, named only
284
+ // by a descendant <img alt="Google"> / <svg><title> (Mistral).
285
+ // 3. text + an auth verb — "Continue with Google", "Sign up with
286
+ // GitHub". The auth verb is what keeps a bare "Google" nav link
287
+ // or "Google's Privacy Policy" out.
288
+ // Returns null when the page has no such affordance — the planner then
289
+ // falls back to form-fill. Exported for unit testing.
290
+ export function findOAuthButton(inventory, provider) {
291
+ const keyword = OAUTH_PROVIDERS[provider].buttonKeyword;
292
+ const keywordRe = new RegExp(`\\b${keyword}\\b`);
293
+ const hrefRe = new RegExp(`(?:login|signin|sign-in|auth|oauth|connect|sso)[/_-]*${keyword}` +
294
+ `|${keyword}[/_-]*(?:login|signin|auth|oauth|connect)`, "i");
295
+ for (const e of inventory) {
296
+ const isButtonish = e.tag === "button" ||
297
+ e.tag === "a" ||
298
+ e.role === "button" ||
299
+ e.type === "submit" ||
300
+ e.type === "button";
301
+ if (!isButtonish)
302
+ continue;
303
+ // 1. An <a> whose href routes through the provider's OAuth endpoint.
304
+ const href = (e.href ?? "").toLowerCase();
305
+ if (href.length > 0 && hrefRe.test(href))
306
+ return e;
307
+ // 2. Icon-only button — named only by a descendant img/svg.
308
+ if (keywordRe.test((e.iconLabel ?? "").toLowerCase()))
309
+ return e;
310
+ // 3. Visible text / accessible label naming the provider + an
311
+ // auth verb. The auth verb requirement rejects nav and policy
312
+ // links that merely mention the provider.
313
+ const text = `${e.visibleText ?? ""} ${e.ariaLabel ?? ""} ${e.labelText ?? ""}`
314
+ .toLowerCase()
315
+ .replace(/\s+/g, " ")
316
+ .trim();
317
+ if (!keywordRe.test(text))
318
+ continue;
319
+ if (/\b(sign|signup|signin|continue|log ?in|connect|auth)\b/.test(text)) {
320
+ return e;
321
+ }
322
+ }
323
+ return null;
324
+ }
325
+ // Parse a post-verify step. When `allowedSelectors` is supplied, a
326
+ // `click`/`fill` selector that is not in the page inventory is a
327
+ // parse-time rejection — the same DOM-grounding F3 gave the signup
328
+ // planner (parseSignupPlan). It stops the post-OAuth onboarding
329
+ // planner from inventing CSS selectors that never resolve, which was
330
+ // the dominant onboarding-navigation failure mode.
331
+ export function parsePostVerifyStep(raw, allowedSelectors) {
257
332
  const obj = extractJsonObject(raw);
258
333
  const kind = obj["kind"];
259
334
  // `reason` is required by the schema but advisory; default it so a
260
335
  // model omitting it doesn't trip a retry on an otherwise-valid step.
261
336
  const reason = typeof obj["reason"] === "string" ? obj["reason"] : "";
337
+ const checkSelector = (selector, context) => {
338
+ if (allowedSelectors !== undefined && !allowedSelectors.has(selector)) {
339
+ throw new Error(`${context}: selector ${JSON.stringify(selector)} is not in the page inventory`);
340
+ }
341
+ };
262
342
  switch (kind) {
263
343
  case "done":
264
344
  return { kind: "done", reason };
@@ -266,19 +346,31 @@ export function parsePostVerifyStep(raw) {
266
346
  return { kind: "extract", reason };
267
347
  case "login":
268
348
  return { kind: "login", reason };
269
- case "click":
270
- return {
271
- kind: "click",
272
- selector: requireString(obj, "selector", "post-verify click step"),
273
- reason,
274
- };
275
- case "fill":
349
+ case "click": {
350
+ const selector = requireString(obj, "selector", "post-verify click step");
351
+ checkSelector(selector, "post-verify click step");
352
+ return { kind: "click", selector, reason };
353
+ }
354
+ case "fill": {
355
+ const selector = requireString(obj, "selector", "post-verify fill step");
356
+ checkSelector(selector, "post-verify fill step");
276
357
  return {
277
358
  kind: "fill",
278
- selector: requireString(obj, "selector", "post-verify fill step"),
359
+ selector,
279
360
  value: requireString(obj, "value", "post-verify fill step"),
280
361
  reason,
281
362
  };
363
+ }
364
+ case "select": {
365
+ const selector = requireString(obj, "selector", "post-verify select step");
366
+ checkSelector(selector, "post-verify select step");
367
+ return { kind: "select", selector, reason };
368
+ }
369
+ case "check": {
370
+ const selector = requireString(obj, "selector", "post-verify check step");
371
+ checkSelector(selector, "post-verify check step");
372
+ return { kind: "check", selector, reason };
373
+ }
282
374
  case "navigate":
283
375
  return {
284
376
  kind: "navigate",
@@ -315,6 +407,22 @@ const CAPTCHA_TOKEN_MARKERS = [
315
407
  "g-recaptcha-response",
316
408
  "h-captcha-response",
317
409
  ];
410
+ // Distinctive service key prefixes. If a *labeled* match's value
411
+ // embeds one of these NOT at its start, the regex straddled glued UI
412
+ // text on a dense dashboard (e.g. Render's API-keys list rendered as
413
+ // "...Name bot-key Menu Key rnd_xxxx" with no separators) — the real
414
+ // key starts at the prefix, so the labeled match is contaminated and
415
+ // must be rejected. A clean labeled key either starts with its prefix
416
+ // (then the prefixed patterns above already caught it) or carries no
417
+ // known prefix at all.
418
+ const EMBEDDED_KEY_PREFIXES = [
419
+ "rnd_",
420
+ "phc_",
421
+ "sk_live_",
422
+ "sk_test_",
423
+ "pk_live_",
424
+ "pk_test_",
425
+ ];
318
426
  // Pull an API key out of the *visible* page text.
319
427
  //
320
428
  // Two strategies, in priority order:
@@ -333,25 +441,36 @@ const CAPTCHA_TOKEN_MARKERS = [
333
441
  // bearing logic and deserves direct coverage.
334
442
  export function extractApiKeyFromText(text) {
335
443
  const prefixed = [
336
- /\bre_[a-zA-Z0-9]{20,}\b/, // Resend
444
+ /\bre_[a-zA-Z0-9_]{20,}\b/, // Resend (key body contains underscores)
337
445
  /\bsk_(?:live|test)_[a-zA-Z0-9]{20,}\b/, // Stripe secret
338
- /\bpk_(?:live|test)_[a-zA-Z0-9]{20,}\b/, // Stripe public
446
+ // NOTE: client-embedded PUBLIC keys are deliberately NOT matched —
447
+ // Stripe publishable (pk_live_/pk_test_) and PostHog project
448
+ // (phc_) keys ship in the client-side JS of every site that uses
449
+ // those vendors, so finding one on a page means "this service
450
+ // embeds Stripe/PostHog", not "here is the user's credential".
451
+ // Each produced a false success on Mistral (its billing pk_live_,
452
+ // then its analytics phc_, surfaced as the api_key).
339
453
  /\bkey-[a-f0-9]{32}\b/, // Mailgun
340
- /\bphc_[a-zA-Z0-9]{32,}\b/, // PostHog
341
454
  /\bSG\.[a-zA-Z0-9_\-]{20,}\.[a-zA-Z0-9_\-]{20,}\b/, // SendGrid
455
+ /\brnd_[a-zA-Z0-9]{20,}\b/, // Render
456
+ /\bsntry[su]_[A-Za-z0-9_=\-]{20,}/, // Sentry org/user auth token
342
457
  ];
343
458
  for (const pattern of prefixed) {
344
459
  const match = text.match(pattern);
345
460
  if (match !== null)
346
461
  return match[0];
347
462
  }
348
- // Labeled patterns. The gap between label and value is
349
- // `[ \t]*[:=]?[ \t]*` — only spaces/tabs, never a newline — so the
350
- // value must be adjacent to its label. The value charset excludes
351
- // the captcha-token shape implicitly via the length ceiling, and we
352
- // re-check markers explicitly below for the dot-bearing bearer case.
463
+ // Labeled patterns. The label and value MUST be separated by a real
464
+ // separator — a colon/equals, or whitespace — `(?:[ \t]*[:=][ \t]*|[ \t]+)`,
465
+ // never a newline. A MANDATORY separator is what keeps the regex from
466
+ // latching the label onto glued dashboard nav text: a sidebar
467
+ // rendering "API Keys" "Webhooks" "Settings" as adjacent links
468
+ // concatenates in textContent to "API KeysWebhooksSettings…", and an
469
+ // optional-gap regex would capture "sWebhooksSettings…" as the key
470
+ // (Resend false-positive). Requiring `:`/`=`/space means "API Key"
471
+ // followed immediately by a letter does not match.
353
472
  const labeled = [
354
- /(?:api[_\s-]?key|access[_\s-]?token|secret[_\s-]?key)[ \t]*[:=]?[ \t]*([a-zA-Z0-9_\-]{20,})/i,
473
+ /(?:api[_\s-]?key|access[_\s-]?token|secret[_\s-]?key)(?:[ \t]*[:=][ \t]*|[ \t]+)([a-zA-Z0-9_\-]{20,})/i,
355
474
  /\b[Bb]earer[ \t]+([a-zA-Z0-9_\-.]{30,})/,
356
475
  ];
357
476
  for (const pattern of labeled) {
@@ -366,6 +485,10 @@ export function extractApiKeyFromText(text) {
366
485
  const lower = candidate.toLowerCase();
367
486
  if (CAPTCHA_TOKEN_MARKERS.some((marker) => lower.includes(marker)))
368
487
  continue;
488
+ // Contaminated: the labeled match straddled glued dashboard text
489
+ // onto a real key (the key prefix sits mid-candidate, not at 0).
490
+ if (EMBEDDED_KEY_PREFIXES.some((p) => lower.indexOf(p) > 0))
491
+ continue;
369
492
  return candidate;
370
493
  }
371
494
  return null;
@@ -522,12 +645,41 @@ export class SignupAgent {
522
645
  let errorReplans = 0;
523
646
  let progressReplans = 0;
524
647
  let emptyPlans = 0;
648
+ let oauthScanRetries = 0;
525
649
  let hint;
650
+ const oauthProvider = task.oauthProvider;
526
651
  for (;;) {
527
652
  await this.browser.waitForFormReady();
528
653
  await saveDebugSnapshot(this.browser, "before-fill");
529
654
  const state = await this.browser.getState();
530
- const inventory = await this.buildInventory(steps);
655
+ const inventory = await this.buildInventory(steps, oauthProvider);
656
+ // T6/T13 — OAuth-first: when an OAuth signup is requested and the
657
+ // page carries a "Sign in with <provider>" affordance, the OAuth
658
+ // button unconditionally outranks any form field (a rule, not
659
+ // score arithmetic — spec refinement). Hand off to the OAuth
660
+ // consent flow. Absent the affordance, fall through to form-fill.
661
+ if (oauthProvider !== undefined) {
662
+ const oauthButton = findOAuthButton(inventory, oauthProvider);
663
+ const label = OAUTH_PROVIDERS[oauthProvider].label;
664
+ if (oauthButton !== null) {
665
+ steps.push(`OAuth-first: found a ${label} sign-in affordance ` +
666
+ `(${JSON.stringify(oauthButton.visibleText ?? oauthButton.ariaLabel ?? label)}) ` +
667
+ `— taking the OAuth path`);
668
+ return { kind: "oauth", selector: oauthButton.selector };
669
+ }
670
+ // SSO buttons frequently load async — Mistral renders its
671
+ // icon-only provider buttons after the email form. Re-extract
672
+ // a couple of times before giving up on the OAuth path.
673
+ if (oauthScanRetries < 2) {
674
+ oauthScanRetries += 1;
675
+ steps.push(`OAuth-first: no ${label} affordance yet — waiting for an async ` +
676
+ `render (retry ${oauthScanRetries}/2)`);
677
+ await this.browser.wait(3);
678
+ continue;
679
+ }
680
+ steps.push(`OAuth-first requested but no ${label} affordance on the page — ` +
681
+ `falling back to form-fill`);
682
+ }
531
683
  // OAuth-only: no fillable input AND no button that reads as an
532
684
  // email-signup option — nothing to automate (Issue 4).
533
685
  if (isOauthOnlyChooser(inventory)) {
@@ -668,9 +820,16 @@ export class SignupAgent {
668
820
  }
669
821
  }
670
822
  // Extract + rank the page's interactive elements (F3 T1/T2).
671
- async buildInventory(steps) {
823
+ // `oauthProvider` keeps that provider's OAuth affordance from being
824
+ // ranked out of the capped inventory when an OAuth-first signup is
825
+ // requested (T6/T13). `buttonCap` widens for the post-OAuth
826
+ // onboarding loop: a dashboard carries far more nav links than a
827
+ // signup form, and they do not score as signup buttons, so the
828
+ // default cap would drop the "API Keys"/"Settings" links the
829
+ // onboarding planner must reach.
830
+ async buildInventory(steps, oauthProvider, buttonCap = 25) {
672
831
  const raw = await this.browser.extractInteractiveElements();
673
- const { inventory, buttonsDropped } = rankAndCapInventory(raw);
832
+ const { inventory, buttonsDropped } = rankAndCapInventory(raw, buttonCap, oauthProvider);
674
833
  steps.push(`Inventory: ${inventory.length} element(s)` +
675
834
  (buttonsDropped > 0 ? ` (${buttonsDropped} low-ranked button(s) dropped)` : ""));
676
835
  return inventory;
@@ -954,6 +1113,11 @@ export class SignupAgent {
954
1113
  steps,
955
1114
  ...this.resultTail(),
956
1115
  };
1116
+ case "oauth":
1117
+ // T6/T7 — OAuth-first path. runOAuthFlow drives the consent
1118
+ // handshake and post-OAuth onboarding to its own terminal
1119
+ // SignupResult; there is no form submit / email verification.
1120
+ return await this.runOAuthFlow(task, outcome.selector, steps);
957
1121
  case "submitted":
958
1122
  break;
959
1123
  }
@@ -993,8 +1157,7 @@ export class SignupAgent {
993
1157
  const maxRounds = task.postVerifyMaxRounds ?? 6;
994
1158
  credentials = await this.postVerifyLoop({
995
1159
  service: task.service,
996
- email: task.email,
997
- password,
1160
+ credentials: { email: task.email, password },
998
1161
  maxRounds,
999
1162
  steps,
1000
1163
  });
@@ -1047,6 +1210,151 @@ export class SignupAgent {
1047
1210
  };
1048
1211
  }
1049
1212
  }
1213
+ // ------------ OAuth-first signup (T6/T7/T13) ------------
1214
+ // Drive an OAuth signup (Google or GitHub) to a terminal
1215
+ // SignupResult. Entered from runSignup when planExecuteWithRetry
1216
+ // found the provider's affordance (T6). Steps: click the button →
1217
+ // walk the consent screens → scope-gate them → drive post-OAuth
1218
+ // onboarding to the API key.
1219
+ //
1220
+ // THE CRITICAL GUARANTEE (D4 / eng-review critical gap): if the flow
1221
+ // lands on the provider's credential form (expired/missing session)
1222
+ // or a security challenge, it hands back `needs_login` and NEVER
1223
+ // types into that form. Driving the provider's login is exactly what
1224
+ // trips its automation detection — and there is no password to give.
1225
+ async runOAuthFlow(task, oauthSelector, steps) {
1226
+ const provider = OAUTH_PROVIDERS[task.oauthProvider ?? "google"];
1227
+ const loginCmd = provider.id === "github"
1228
+ ? "npx @trusty-squire/mcp login --provider=github"
1229
+ : "npx @trusty-squire/mcp login";
1230
+ steps.push(`OAuth: clicking the ${provider.label} sign-in affordance`);
1231
+ await this.browser.startOAuth(oauthSelector);
1232
+ await this.browser.wait(3);
1233
+ await saveDebugSnapshot(this.browser, "oauth-after-click");
1234
+ // Bounded consent walk — handles account-chooser → consent as two
1235
+ // steps without ever spinning. Each iteration re-reads the page.
1236
+ const MAX_OAUTH_NAV = 6;
1237
+ for (let i = 0; i < MAX_OAUTH_NAV; i++) {
1238
+ if (this.browser.oauthPageClosed()) {
1239
+ steps.push(`OAuth: the ${provider.label} window closed — handshake returned to the service`);
1240
+ break;
1241
+ }
1242
+ const url = this.browser.currentUrl();
1243
+ let body;
1244
+ try {
1245
+ body = (await this.browser.extractText()).slice(0, 4000);
1246
+ }
1247
+ catch {
1248
+ // The page is navigating between provider screens — re-read.
1249
+ await this.browser.wait(1);
1250
+ continue;
1251
+ }
1252
+ const authState = provider.classifyAuthState(url, body);
1253
+ steps.push(`OAuth: ${provider.label} auth state = ${authState}`);
1254
+ if (authState === "not_provider")
1255
+ break; // flow left the provider — back on the service
1256
+ if (authState === "challenge") {
1257
+ return this.oauthAbort("needs_login", `${provider.label} interrupted the sign-in with a security challenge ("verify it's you"). ` +
1258
+ `Re-run \`${loginCmd}\`, clear the challenge in the window, then retry.`, steps);
1259
+ }
1260
+ if (authState === "needs_login") {
1261
+ return this.oauthAbort("needs_login", `the bot's ${provider.label} session is missing or expired — no consent screen was reached. ` +
1262
+ `Re-run \`${loginCmd}\` to re-establish it, then retry.`, steps);
1263
+ }
1264
+ // authState === "consent". Backstop the page classifier with a
1265
+ // live-DOM check: if the page actually carries a credential
1266
+ // field it is a login form (the text classifier can catch a
1267
+ // login page that says "to continue to <app>"). Hand back —
1268
+ // never type into it.
1269
+ if (await this.oauthLoginFormPresent()) {
1270
+ return this.oauthAbort("needs_login", `landed on a ${provider.label} sign-in form — the session is missing or expired. ` +
1271
+ `Re-run \`${loginCmd}\`, then retry. The bot will not type into ${provider.label}'s login form.`, steps);
1272
+ }
1273
+ // Genuine consent screen / account chooser — scope-gate it (T7).
1274
+ const scopes = extractOAuthScopes(url);
1275
+ if (scopes === null) {
1276
+ return this.oauthAbort("oauth_consent_needs_review", `reached a ${provider.label} consent screen but could not read its requested scopes ` +
1277
+ `from the URL — pausing for manual review rather than approving blind.`, steps);
1278
+ }
1279
+ if (!provider.scopesAreBasic(scopes)) {
1280
+ return this.oauthAbort("oauth_consent_needs_review", `the consent screen requests scopes beyond basic identity (${scopes.join(", ")}). ` +
1281
+ `Approve it manually — the bot only auto-approves basic-identity scopes.`, steps);
1282
+ }
1283
+ steps.push(`OAuth: consent scopes all basic (${scopes.join(", ")}) — auto-approving`);
1284
+ const advanced = await this.browser.advanceOAuthConsent(provider.id);
1285
+ if (!advanced) {
1286
+ return this.oauthAbort("oauth_consent_needs_review", `reached a ${provider.label} consent screen but found no approve control to click — ` +
1287
+ `approve it manually.`, steps);
1288
+ }
1289
+ await this.browser.wait(3);
1290
+ }
1291
+ // Handshake done — restore the product page (popup flow is a no-op
1292
+ // for same-tab redirects) and drive post-OAuth onboarding.
1293
+ await this.browser.settleAfterOAuth();
1294
+ await this.browser.wait(2);
1295
+ await saveDebugSnapshot(this.browser, "oauth-post-consent");
1296
+ steps.push(`OAuth: signed in via ${provider.label} — driving post-OAuth onboarding to the API key`);
1297
+ let credentials = await this.extractCredentials();
1298
+ if (credentials.api_key === undefined) {
1299
+ credentials = await this.postVerifyLoop({
1300
+ service: task.service,
1301
+ maxRounds: task.postVerifyMaxRounds ?? 12,
1302
+ steps,
1303
+ });
1304
+ }
1305
+ if (credentials.api_key !== undefined) {
1306
+ return {
1307
+ success: true,
1308
+ credentials: { ...credentials },
1309
+ steps,
1310
+ ...this.resultTail(),
1311
+ };
1312
+ }
1313
+ // No API key. Distinguish a billing/card wall (onboarding_blocked)
1314
+ // from a generic navigation miss — never grep-loop a paid wall.
1315
+ const finalText = (await this.browser.extractText().catch(() => "")).toLowerCase();
1316
+ if (ONBOARDING_PAYWALL_PATTERNS.some((p) => finalText.includes(p))) {
1317
+ return {
1318
+ success: false,
1319
+ error: `onboarding_blocked: ${task.service}'s API key sits behind a billing or ` +
1320
+ `payment-method wall the bot will not cross — finish the signup manually.`,
1321
+ steps,
1322
+ ...this.resultTail(),
1323
+ };
1324
+ }
1325
+ return {
1326
+ success: false,
1327
+ error: `oauth_onboarding_failed: signed in to ${task.service} via ${provider.label} but ` +
1328
+ `could not reach an API key through post-OAuth onboarding.`,
1329
+ steps,
1330
+ ...this.resultTail(),
1331
+ };
1332
+ }
1333
+ // Build a terminal SignupResult for an aborted OAuth run. `prefix`
1334
+ // is the error tag provision-any.ts maps to a tool status
1335
+ // (needs_login, oauth_consent_needs_review).
1336
+ oauthAbort(prefix, detail, steps) {
1337
+ steps.push(`OAuth aborted (${prefix}): ${detail}`);
1338
+ return {
1339
+ success: false,
1340
+ error: `${prefix}: ${detail}`,
1341
+ steps,
1342
+ ...this.resultTail(),
1343
+ };
1344
+ }
1345
+ // Backstop for the critical guarantee (D4): true when the active
1346
+ // provider page carries a credential-entry field — an expired/missing
1347
+ // session dropped the bot on a login form. A genuine consent screen
1348
+ // or account chooser has buttons/tiles only, no text inputs.
1349
+ async oauthLoginFormPresent() {
1350
+ const inv = await this.browser.extractInteractiveElements();
1351
+ return inv.some((e) => e.tag === "input" &&
1352
+ (e.type === "email" ||
1353
+ e.type === "password" ||
1354
+ e.type === "text" ||
1355
+ e.type === "tel" ||
1356
+ e.type === null));
1357
+ }
1050
1358
  // ------------ Claude planner ------------
1051
1359
  async planSignupForm(input) {
1052
1360
  const systemPrompt = `You plan how to fill a web signup form.
@@ -1147,40 +1455,118 @@ ${formatInventory(input.inventory)}`,
1147
1455
  }
1148
1456
  throw lastErr ?? new Error("verification email did not arrive in time");
1149
1457
  }
1150
- // After verification, drive the browser toward the API key. Each round
1151
- // asks Claude what to do next given the current page; we stop when
1152
- // Claude says "done" or when we extract a credential. Bounded by
1153
- // maxRounds so a confused agent can't burn the whole context window.
1458
+ // Drive the browser toward the API key after the account exists —
1459
+ // used by BOTH the email-verification path and the OAuth path (T9).
1460
+ // Each round asks Claude what to do next given the current page; we
1461
+ // stop when Claude says "done" or when we extract a credential.
1462
+ // Bounded by maxRounds so a confused agent can't burn the context.
1463
+ //
1464
+ // T9 — decoupled from email+password. `credentials` is present only
1465
+ // on the email-verification path, where the loop may need to sign in
1466
+ // with the just-created account (SendPulse). On the OAuth path it is
1467
+ // absent: there is no password, and the Google session already
1468
+ // authenticated the user — a `login` step is then a no-op.
1154
1469
  async postVerifyLoop(args) {
1155
1470
  let credentials = await this.extractCredentials();
1156
1471
  let loginAttempts = 0;
1472
+ let planFailures = 0;
1473
+ const oauth = args.credentials === undefined;
1474
+ // Re-plan hint for the next round — set when an `extract` step
1475
+ // found no key, which means the visible key text is masked /
1476
+ // truncated (the S3-class trap: the planner sees a key-shaped
1477
+ // string and keeps asking to extract it forever), or when the
1478
+ // planner's last step was rejected.
1479
+ let hint;
1157
1480
  for (let round = 0; round < args.maxRounds; round++) {
1158
1481
  if (credentials.api_key !== undefined || credentials.username !== undefined) {
1159
1482
  args.steps.push(`Post-verify: credentials found on round ${round}.`);
1160
1483
  return credentials;
1161
1484
  }
1162
- const state = await this.browser.getState();
1485
+ // Settle the page first — the previous round's click may have
1486
+ // triggered a navigation, and reading a page mid-navigation
1487
+ // throws "execution context destroyed". waitForFormReady is
1488
+ // best-effort (swallows its own timeouts).
1489
+ await this.browser.waitForFormReady();
1490
+ // DOM-grounded inventory so the planner picks verified selectors
1491
+ // instead of inventing CSS that never resolves. A dashboard has
1492
+ // far more nav links than a signup form, so the button cap is
1493
+ // widened (the "API Keys"/"Settings" links must survive ranking).
1494
+ // Reading state can still race a navigation — a transient throw
1495
+ // burns the round rather than crashing the whole run.
1496
+ let state;
1497
+ let inventory;
1498
+ try {
1499
+ state = await this.browser.getState();
1500
+ inventory = await this.buildInventory(args.steps, undefined, 80);
1501
+ }
1502
+ catch (err) {
1503
+ args.steps.push(`Post-verify round ${round}: page was mid-navigation ` +
1504
+ `(${err instanceof Error ? err.message : String(err)}) — retrying`);
1505
+ await this.browser.wait(2);
1506
+ continue;
1507
+ }
1163
1508
  let nextStep;
1164
1509
  try {
1165
1510
  nextStep = await this.planPostVerifyStep({
1166
1511
  service: args.service,
1167
- email: args.email,
1168
- password: args.password,
1169
1512
  round,
1170
1513
  maxRounds: args.maxRounds,
1171
1514
  state,
1515
+ oauth,
1516
+ inventory,
1517
+ ...(hint !== undefined ? { hint } : {}),
1172
1518
  });
1173
1519
  }
1174
1520
  catch (err) {
1175
- args.steps.push(`Post-verify round ${round}: planner failed (${err instanceof Error ? err.message : String(err)}). Stopping.`);
1176
- break;
1521
+ // The planner's output did not validate most often a
1522
+ // selector not in the inventory (the model copied the whole
1523
+ // line, not just the `selector=` value). Re-plan with a hint
1524
+ // rather than abandon the run, the same resilience the
1525
+ // form-fill planner has. Bounded so a persistently broken
1526
+ // planner still terminates.
1527
+ const reason = err instanceof Error ? err.message : String(err);
1528
+ planFailures += 1;
1529
+ if (planFailures > 3) {
1530
+ args.steps.push(`Post-verify round ${round}: planner failed ${planFailures}x (${reason}) — stopping.`);
1531
+ break;
1532
+ }
1533
+ args.steps.push(`Post-verify round ${round}: planner output rejected (${reason}) — re-planning.`);
1534
+ hint =
1535
+ "Your previous step was REJECTED. A click/fill/select `selector` must be " +
1536
+ "EXACTLY the value after `selector=` on one inventory line — copy only that " +
1537
+ "value (it runs to the end of the line), never the leading `[n] tag …` part " +
1538
+ "and never the whole line.";
1539
+ continue;
1177
1540
  }
1178
1541
  args.steps.push(`Post-verify ${round + 1}/${args.maxRounds}: ${nextStep.kind} — ${nextStep.reason}`);
1542
+ // Dev-only (env-gated): dump this round's real page state +
1543
+ // inventory into the E1 eval-corpus format, so onboarding
1544
+ // adapters can be iterated offline without re-running the
1545
+ // rate-limited OAuth handshake.
1546
+ captureOnboardingRound({
1547
+ service: args.service,
1548
+ round,
1549
+ oauth,
1550
+ state,
1551
+ inventory,
1552
+ observed: nextStep,
1553
+ });
1179
1554
  if (nextStep.kind === "done")
1180
1555
  break;
1556
+ hint = undefined;
1181
1557
  try {
1182
1558
  if (nextStep.kind === "extract") {
1183
1559
  credentials = await this.extractCredentials();
1560
+ if (credentials.api_key === undefined) {
1561
+ // The planner saw a key-shaped string but extraction got
1562
+ // nothing — the on-page key is masked/truncated. Steer the
1563
+ // next round off `extract` and toward creating a fresh key.
1564
+ hint =
1565
+ "Your last 'extract' found NO key — the key text on the page is " +
1566
+ "masked or truncated (e.g. shows '...' or dots). A masked existing " +
1567
+ "key cannot be extracted. Click 'Create API Key' / 'New API Key' to " +
1568
+ "generate a fresh one — its full value is shown once, on creation.";
1569
+ }
1184
1570
  }
1185
1571
  else if (nextStep.kind === "click") {
1186
1572
  await this.browser.click(nextStep.selector);
@@ -1189,6 +1575,16 @@ ${formatInventory(input.inventory)}`,
1189
1575
  else if (nextStep.kind === "fill") {
1190
1576
  await this.browser.type(nextStep.selector, nextStep.value);
1191
1577
  }
1578
+ else if (nextStep.kind === "select") {
1579
+ await this.browser.selectOption(nextStep.selector);
1580
+ await this.browser.wait(1);
1581
+ }
1582
+ else if (nextStep.kind === "check") {
1583
+ // browser.check force-ticks + scrolls into view + verifies —
1584
+ // a styled TOS checkbox a plain click can't flip.
1585
+ await this.browser.check(nextStep.selector);
1586
+ await this.browser.wait(1);
1587
+ }
1192
1588
  else if (nextStep.kind === "navigate") {
1193
1589
  await this.browser.goto(nextStep.url);
1194
1590
  await this.browser.wait(3);
@@ -1197,19 +1593,34 @@ ${formatInventory(input.inventory)}`,
1197
1593
  await this.browser.wait(Math.min(nextStep.seconds, 15));
1198
1594
  }
1199
1595
  else if (nextStep.kind === "login") {
1200
- if (loginAttempts >= 2) {
1596
+ if (args.credentials === undefined) {
1597
+ // OAuth run — no password to give, and the Google session
1598
+ // already authenticated us. Treat `login` as a no-op note.
1599
+ args.steps.push("Post-verify: planner asked to log in, but this is an OAuth run — " +
1600
+ "already authenticated via Google; skipping.");
1601
+ }
1602
+ else if (loginAttempts >= 2) {
1201
1603
  args.steps.push("Post-verify: already attempted login twice — stopping.");
1202
1604
  break;
1203
1605
  }
1204
- loginAttempts += 1;
1205
- await this.loginWithCredentials(args.email, args.password, args.steps);
1606
+ else {
1607
+ loginAttempts += 1;
1608
+ await this.loginWithCredentials(args.credentials.email, args.credentials.password, args.steps);
1609
+ }
1206
1610
  }
1207
1611
  }
1208
1612
  catch (err) {
1209
1613
  args.steps.push(`Post-verify action failed (${nextStep.kind}): ${err instanceof Error ? err.message : String(err)}`);
1210
1614
  // Don't bail — Claude may recover on the next round.
1211
1615
  }
1212
- credentials = await this.extractCredentials();
1616
+ // Re-extract but tolerate the page still navigating from the
1617
+ // step just taken; the next round settles and re-reads.
1618
+ try {
1619
+ credentials = await this.extractCredentials();
1620
+ }
1621
+ catch {
1622
+ // page mid-navigation — next round's waitForFormReady handles it
1623
+ }
1213
1624
  }
1214
1625
  return credentials;
1215
1626
  }
@@ -1253,27 +1664,56 @@ ${formatInventory(input.inventory)}`,
1253
1664
  }
1254
1665
  async planPostVerifyStep(input) {
1255
1666
  const visibleText = (input.state.html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim()).slice(0, 2500);
1256
- const systemPrompt = `You are driving a headless browser after a SaaS signup verification.
1667
+ const loginGuidance = input.oauth
1668
+ ? `- You are ALREADY signed in via Google — NEVER return {"kind":"login"}. If the page looks like a login wall, return {"kind":"navigate"} to the service's dashboard, or {"kind":"done"}.`
1669
+ : `- If the page is a LOGIN form, or says you must sign in, or you've been signed out, return {"kind":"login"} — the bot signs in with the signup email + password. Do NOT return done on a login wall.`;
1670
+ const systemPrompt = `You are driving a headless browser after a SaaS signup ${input.oauth ? "via Google OAuth" : "verification"}.
1257
1671
  Your goal: surface the user's API key (or any credential — token, secret, app id) so it can be extracted from the page.
1258
1672
 
1259
1673
  You may issue ONE step per turn. Reply with a single JSON object, no prose.
1260
1674
 
1675
+ You are given a screenshot and an INVENTORY of the page's interactive
1676
+ elements — each line ends with a precise \`selector=\` the bot has
1677
+ verified resolves.
1678
+
1261
1679
  Schema:
1262
1680
  {"kind":"done","reason":"why we should stop"}
1263
1681
  {"kind":"extract","reason":"the API key is now visible on this page"}
1264
1682
  {"kind":"login","reason":"the page is a login form / we were signed out"}
1265
- {"kind":"click","selector":"CSS","reason":"e.g. dismiss onboarding modal / open API keys page"}
1266
- {"kind":"fill","selector":"CSS","value":"value","reason":"unusual — only for required project-name etc."}
1267
- {"kind":"navigate","url":"https://...","reason":"e.g. go directly to /settings/api"}
1683
+ {"kind":"click","selector":"<a selector= copied verbatim from the inventory>","reason":"e.g. open the API keys page"}
1684
+ {"kind":"fill","selector":"<a selector= from the inventory>","value":"value","reason":"unusual — only for a required project-name etc."}
1685
+ {"kind":"select","selector":"<a selector= from the inventory, tag=select>","reason":"pick an option for a dropdown — region, role, country"}
1686
+ {"kind":"check","selector":"<a selector= from the inventory, type=checkbox>","reason":"tick a terms-of-service / agreement checkbox"}
1687
+ {"kind":"navigate","url":"https://...","reason":"e.g. go directly to /settings/api-keys"}
1268
1688
  {"kind":"wait","seconds":N,"reason":"page is still loading"}
1269
1689
 
1690
+ - CRITICAL: every "selector" in a click/fill step MUST be copied
1691
+ verbatim from a \`selector=\` field in the inventory below. Never
1692
+ invent or guess a selector — one not in the inventory is rejected.
1693
+ - If the element you want is NOT in the inventory, use {"kind":"navigate"}
1694
+ to a likely settings URL instead of guessing a selector.
1695
+
1270
1696
  Strategy:
1271
- - If the API key text is visible, return {"kind":"extract"}.
1272
- - If there's a dashboard menu link like "API Keys" / "Tokens" / "Developer", click it.
1273
- - If there's an onboarding modal blocking, dismiss it.
1274
- - If the page is a LOGIN form, or says you must sign in, or you've been signed out, return {"kind":"login"} — the bot signs in with the signup email + password. Do NOT return done on a login wall.
1697
+ - If a FULL, untruncated API key is visible, return {"kind":"extract"}.
1698
+ - A key shown masked or truncated (with "...", dots, or "") is NOT
1699
+ extractable its full value is shown only once, at creation. Do NOT
1700
+ return "extract" for a masked key, and do not return "extract" twice
1701
+ in a row. Instead click "Create API Key" / "New API Key" / "Generate"
1702
+ to make a fresh key, then extract its full value.
1703
+ - To reach API keys, prefer a {"kind":"navigate"} straight to the
1704
+ service's API-keys settings URL — note these usually live under the
1705
+ user/ACCOUNT settings, not a project or workspace's settings.
1706
+ - Otherwise click a dashboard menu link like "API Keys" / "Tokens" /
1707
+ "Developer" / "Settings" — using its inventory selector.
1708
+ - If there's an onboarding modal or a "Skip" link blocking, dismiss it.
1709
+ ${loginGuidance}
1275
1710
  - If we're on a "verify your phone" / "verify email" wall, return done (we can't solve those).
1276
- - If the page wants the user to create a project before showing keys, fill the minimum and click create.
1711
+ - If the page wants the user to create a project/key before showing it, fill the minimum and click create.
1712
+ - For a required dropdown (an inventory entry with tag=select — region, role, country), use {"kind":"select"} — a "click" cannot pick a <select> option, so do not click it repeatedly.
1713
+ - A post-OAuth onboarding form (organization name, region, terms) is normal — fill/select/check its fields and click Continue to advance toward the dashboard; do not return "done" just because it is a form.
1714
+ - If a "Create"/"Continue" button is disabled, look for a required terms-of-service / agreement checkbox and tick it with {"kind":"check"} — use the checkbox's own inventory selector (an entry with type=checkbox), NOT the adjacent "Terms of Service" link. A "click" on a styled checkbox often fails to flip it; use "check".
1715
+ - Prefer the simplest credential path: a project- or organization-level API token / auth token usually needs only a name. A "personal token" with a grid of per-scope permission dropdowns is more work — choose it only if no simpler token type is offered.
1716
+ - On a token-creation form whose permission/scope dropdowns default to "No Access" / "None", you MUST use a select step to set a non-default permission on at least one dropdown BEFORE clicking the create button — creating with all-default permissions does nothing. Do not click the create button repeatedly; set a permission first.
1277
1717
  - Round ${input.round + 1} of ${input.maxRounds}. Prefer "done" if you're not making progress.`;
1278
1718
  const userBlocks = [
1279
1719
  { kind: "image", media_type: "image/png", data_base64: input.state.screenshot },
@@ -1285,14 +1725,37 @@ Title: ${input.state.title}
1285
1725
  Round: ${input.round + 1}/${input.maxRounds}
1286
1726
 
1287
1727
  Visible text (truncated):
1288
- ${visibleText}`,
1728
+ ${visibleText}
1729
+
1730
+ Interactive element inventory:
1731
+ ${formatInventory(input.inventory)}${input.hint !== undefined ? `\n\nIMPORTANT — ${input.hint}` : ""}`,
1289
1732
  },
1290
1733
  ];
1734
+ // The planner may only pick click/fill selectors the bot supplied.
1735
+ const allowed = new Set(input.inventory.map((e) => e.selector));
1291
1736
  return this.callLLM({
1292
1737
  system: systemPrompt,
1293
1738
  userBlocks,
1294
1739
  maxTokens: 500,
1295
- parse: parsePostVerifyStep,
1740
+ parse: (raw) => {
1741
+ const step = parsePostVerifyStep(raw, allowed);
1742
+ // A `check` must land on a real checkbox/radio — the planner
1743
+ // otherwise picks the adjacent "Terms of Service" *link*, which
1744
+ // page.check() cannot tick. Reject it so the round re-plans.
1745
+ if (step.kind === "check") {
1746
+ const el = input.inventory.find((e) => e.selector === step.selector);
1747
+ const checkable = el !== undefined &&
1748
+ ((el.tag === "input" &&
1749
+ (el.type === "checkbox" || el.type === "radio")) ||
1750
+ el.role === "checkbox" ||
1751
+ el.role === "radio");
1752
+ if (!checkable) {
1753
+ throw new Error(`post-verify check step: ${JSON.stringify(step.selector)} is not a ` +
1754
+ `checkbox — pick the actual agreement checkbox, not its label or link`);
1755
+ }
1756
+ }
1757
+ return step;
1758
+ },
1296
1759
  });
1297
1760
  }
1298
1761
  async findSignupLink() {
@@ -1313,13 +1776,29 @@ ${visibleText}`,
1313
1776
  return null;
1314
1777
  }
1315
1778
  async extractCredentials() {
1316
- // IMPORTANT: pull credentials from the *visible* page text, not the raw
1779
+ // IMPORTANT: pull credentials from the *visible* page, not the raw
1317
1780
  // HTML. Reading from HTML matches anti-bot challenge JS (Cloudflare
1318
- // Turnstile, hCaptcha) whose challenge tokens look like API keys to a
1319
- // naive regex.
1320
- const text = await this.browser.extractText();
1781
+ // Turnstile, hCaptcha) whose challenge tokens look like API keys to
1782
+ // a naive regex.
1783
+ //
1784
+ // Two visible surfaces, in priority order:
1785
+ // 1. Discrete credential candidates — copy-input values and each
1786
+ // element's own direct text. A key is read whole here, un-glued
1787
+ // from adjacent buttons; captcha tokens (hidden inputs) are
1788
+ // excluded by the browser.
1789
+ // 2. The whole visible body text — fallback for a key shown as
1790
+ // plain prose, accepting that body concatenation can glue
1791
+ // neighbours (the extractApiKeyFromText guards catch the worst).
1321
1792
  const credentials = {};
1322
- const apiKey = extractApiKeyFromText(text);
1793
+ let apiKey = null;
1794
+ for (const candidate of await this.browser.extractCredentialCandidates()) {
1795
+ apiKey = extractApiKeyFromText(candidate);
1796
+ if (apiKey !== null)
1797
+ break;
1798
+ }
1799
+ if (apiKey === null) {
1800
+ apiKey = extractApiKeyFromText(await this.browser.extractText());
1801
+ }
1323
1802
  if (apiKey !== null)
1324
1803
  credentials.api_key = apiKey;
1325
1804
  return credentials;