@trusty-squire/mcp 0.1.17 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/dist/bin.js +2 -2
  2. package/dist/bin.js.map +1 -1
  3. package/dist/bot/agent.d.ts +7 -1
  4. package/dist/bot/agent.d.ts.map +1 -1
  5. package/dist/bot/agent.js +396 -39
  6. package/dist/bot/agent.js.map +1 -1
  7. package/dist/bot/browser.d.ts +15 -3
  8. package/dist/bot/browser.d.ts.map +1 -1
  9. package/dist/bot/browser.js +273 -56
  10. package/dist/bot/browser.js.map +1 -1
  11. package/dist/bot/google-login.d.ts +18 -0
  12. package/dist/bot/google-login.d.ts.map +1 -0
  13. package/dist/bot/google-login.js +379 -0
  14. package/dist/bot/google-login.js.map +1 -0
  15. package/dist/bot/index.d.ts +5 -0
  16. package/dist/bot/index.d.ts.map +1 -1
  17. package/dist/bot/index.js +14 -0
  18. package/dist/bot/index.js.map +1 -1
  19. package/dist/bot/llm-client.d.ts.map +1 -1
  20. package/dist/bot/llm-client.js +19 -12
  21. package/dist/bot/llm-client.js.map +1 -1
  22. package/dist/bot/oauth-lock.d.ts +2 -0
  23. package/dist/bot/oauth-lock.d.ts.map +1 -0
  24. package/dist/bot/oauth-lock.js +28 -0
  25. package/dist/bot/oauth-lock.js.map +1 -0
  26. package/dist/bot/oauth-providers.d.ts +16 -0
  27. package/dist/bot/oauth-providers.d.ts.map +1 -0
  28. package/dist/bot/oauth-providers.js +100 -0
  29. package/dist/bot/oauth-providers.js.map +1 -0
  30. package/dist/bot/oauth-thin-slice.d.ts +2 -0
  31. package/dist/bot/oauth-thin-slice.d.ts.map +1 -0
  32. package/dist/bot/oauth-thin-slice.js +203 -0
  33. package/dist/bot/oauth-thin-slice.js.map +1 -0
  34. package/dist/bot/profile.d.ts +2 -0
  35. package/dist/bot/profile.d.ts.map +1 -0
  36. package/dist/bot/profile.js +11 -0
  37. package/dist/bot/profile.js.map +1 -0
  38. package/dist/install/cli.d.ts +4 -1
  39. package/dist/install/cli.d.ts.map +1 -1
  40. package/dist/install/cli.js +41 -1
  41. package/dist/install/cli.js.map +1 -1
  42. package/dist/server.d.ts.map +1 -1
  43. package/dist/server.js +3 -2
  44. package/dist/server.js.map +1 -1
  45. package/dist/tools/index.d.ts +2 -1
  46. package/dist/tools/index.d.ts.map +1 -1
  47. package/dist/tools/index.js +3 -2
  48. package/dist/tools/index.js.map +1 -1
  49. package/dist/tools/provision-any.d.ts +50 -46
  50. package/dist/tools/provision-any.d.ts.map +1 -1
  51. package/dist/tools/provision-any.js +266 -107
  52. package/dist/tools/provision-any.js.map +1 -1
  53. package/package.json +3 -1
package/dist/bot/agent.js CHANGED
@@ -8,6 +8,7 @@
8
8
  // executor; the prompt is the contract. If a service breaks we tweak the
9
9
  // prompt rather than threading service-specific logic through the agent.
10
10
  import { rankAndCapInventory, scoreSignupButton } from "./browser.js";
11
+ import { OAUTH_PROVIDERS, extractOAuthScopes, } from "./oauth-providers.js";
11
12
  import { saveDebugSnapshot } from "./debug.js";
12
13
  import { wasRecentlyPrewarmed, recordPrewarmSuccess } from "./prewarm-cache.js";
13
14
  import { pickLLMPair, } from "./llm-client.js";
@@ -47,6 +48,24 @@ const VERIFICATION_EXPECTED_PATTERNS = [
47
48
  // their email. Legitimate verification mail almost always lands inside a
48
49
  // minute; this catches the fast case without 300s of dead air.
49
50
  const VERIFICATION_PROBE_SECONDS = 45;
51
+ // T7: page text that means the post-OAuth API key sits behind a
52
+ // billing / payment-method wall. When the OAuth onboarding loop ends
53
+ // without a key and the page reads like this, the run ends
54
+ // `onboarding_blocked` rather than grep-looping a wall it cannot
55
+ // satisfy (the S3-class trap named in the plan's failure modes).
56
+ const ONBOARDING_PAYWALL_PATTERNS = [
57
+ "add a payment method",
58
+ "add a credit card",
59
+ "add credit card",
60
+ "payment method required",
61
+ "a payment method is required",
62
+ "credit card required",
63
+ "enter your card",
64
+ "enter your payment",
65
+ "enter payment details",
66
+ "upgrade your plan to",
67
+ "start your paid plan",
68
+ ];
50
69
  // S3: does this post-submit page text indicate the service genuinely
51
70
  // expects the user to confirm via email? Drives whether the bot polls the
52
71
  // full verification timeout or runs only a short probe. Exported so the
@@ -253,12 +272,72 @@ export function isOauthOnlyChooser(inventory) {
253
272
  const hasEmailOption = inventory.some((e) => scoreSignupButton(`${e.visibleText ?? ""} ${e.ariaLabel ?? ""} ${e.labelText ?? ""}`) > 0);
254
273
  return !hasEmailOption;
255
274
  }
256
- export function parsePostVerifyStep(raw) {
275
+ // Find a "Sign in with <provider>" affordance in the page inventory —
276
+ // the entry point for the OAuth-first path (T6/T13). Three signals, in
277
+ // confidence order — derived from a live sweep where the text-only
278
+ // heuristic missed real buttons:
279
+ // 1. href — an <a> whose link routes through the provider's OAuth
280
+ // endpoint (/identity/login/google, /auth/github/callback, …).
281
+ // Unambiguous: a marketing link to policies.google.com does not.
282
+ // 2. iconLabel — an icon-only button with no text at all, named only
283
+ // by a descendant <img alt="Google"> / <svg><title> (Mistral).
284
+ // 3. text + an auth verb — "Continue with Google", "Sign up with
285
+ // GitHub". The auth verb is what keeps a bare "Google" nav link
286
+ // or "Google's Privacy Policy" out.
287
+ // Returns null when the page has no such affordance — the planner then
288
+ // falls back to form-fill. Exported for unit testing.
289
+ export function findOAuthButton(inventory, provider) {
290
+ const keyword = OAUTH_PROVIDERS[provider].buttonKeyword;
291
+ const keywordRe = new RegExp(`\\b${keyword}\\b`);
292
+ const hrefRe = new RegExp(`(?:login|signin|sign-in|auth|oauth|connect|sso)[/_-]*${keyword}` +
293
+ `|${keyword}[/_-]*(?:login|signin|auth|oauth|connect)`, "i");
294
+ for (const e of inventory) {
295
+ const isButtonish = e.tag === "button" ||
296
+ e.tag === "a" ||
297
+ e.role === "button" ||
298
+ e.type === "submit" ||
299
+ e.type === "button";
300
+ if (!isButtonish)
301
+ continue;
302
+ // 1. An <a> whose href routes through the provider's OAuth endpoint.
303
+ const href = (e.href ?? "").toLowerCase();
304
+ if (href.length > 0 && hrefRe.test(href))
305
+ return e;
306
+ // 2. Icon-only button — named only by a descendant img/svg.
307
+ if (keywordRe.test((e.iconLabel ?? "").toLowerCase()))
308
+ return e;
309
+ // 3. Visible text / accessible label naming the provider + an
310
+ // auth verb. The auth verb requirement rejects nav and policy
311
+ // links that merely mention the provider.
312
+ const text = `${e.visibleText ?? ""} ${e.ariaLabel ?? ""} ${e.labelText ?? ""}`
313
+ .toLowerCase()
314
+ .replace(/\s+/g, " ")
315
+ .trim();
316
+ if (!keywordRe.test(text))
317
+ continue;
318
+ if (/\b(sign|signup|signin|continue|log ?in|connect|auth)\b/.test(text)) {
319
+ return e;
320
+ }
321
+ }
322
+ return null;
323
+ }
324
+ // Parse a post-verify step. When `allowedSelectors` is supplied, a
325
+ // `click`/`fill` selector that is not in the page inventory is a
326
+ // parse-time rejection — the same DOM-grounding F3 gave the signup
327
+ // planner (parseSignupPlan). It stops the post-OAuth onboarding
328
+ // planner from inventing CSS selectors that never resolve, which was
329
+ // the dominant onboarding-navigation failure mode.
330
+ export function parsePostVerifyStep(raw, allowedSelectors) {
257
331
  const obj = extractJsonObject(raw);
258
332
  const kind = obj["kind"];
259
333
  // `reason` is required by the schema but advisory; default it so a
260
334
  // model omitting it doesn't trip a retry on an otherwise-valid step.
261
335
  const reason = typeof obj["reason"] === "string" ? obj["reason"] : "";
336
+ const checkSelector = (selector, context) => {
337
+ if (allowedSelectors !== undefined && !allowedSelectors.has(selector)) {
338
+ throw new Error(`${context}: selector ${JSON.stringify(selector)} is not in the page inventory`);
339
+ }
340
+ };
262
341
  switch (kind) {
263
342
  case "done":
264
343
  return { kind: "done", reason };
@@ -266,19 +345,21 @@ export function parsePostVerifyStep(raw) {
266
345
  return { kind: "extract", reason };
267
346
  case "login":
268
347
  return { kind: "login", reason };
269
- case "click":
270
- return {
271
- kind: "click",
272
- selector: requireString(obj, "selector", "post-verify click step"),
273
- reason,
274
- };
275
- case "fill":
348
+ case "click": {
349
+ const selector = requireString(obj, "selector", "post-verify click step");
350
+ checkSelector(selector, "post-verify click step");
351
+ return { kind: "click", selector, reason };
352
+ }
353
+ case "fill": {
354
+ const selector = requireString(obj, "selector", "post-verify fill step");
355
+ checkSelector(selector, "post-verify fill step");
276
356
  return {
277
357
  kind: "fill",
278
- selector: requireString(obj, "selector", "post-verify fill step"),
358
+ selector,
279
359
  value: requireString(obj, "value", "post-verify fill step"),
280
360
  reason,
281
361
  };
362
+ }
282
363
  case "navigate":
283
364
  return {
284
365
  kind: "navigate",
@@ -315,6 +396,22 @@ const CAPTCHA_TOKEN_MARKERS = [
315
396
  "g-recaptcha-response",
316
397
  "h-captcha-response",
317
398
  ];
399
+ // Distinctive service key prefixes. If a *labeled* match's value
400
+ // embeds one of these NOT at its start, the regex straddled glued UI
401
+ // text on a dense dashboard (e.g. Render's API-keys list rendered as
402
+ // "...Name bot-key Menu Key rnd_xxxx" with no separators) — the real
403
+ // key starts at the prefix, so the labeled match is contaminated and
404
+ // must be rejected. A clean labeled key either starts with its prefix
405
+ // (then the prefixed patterns above already caught it) or carries no
406
+ // known prefix at all.
407
+ const EMBEDDED_KEY_PREFIXES = [
408
+ "rnd_",
409
+ "phc_",
410
+ "sk_live_",
411
+ "sk_test_",
412
+ "pk_live_",
413
+ "pk_test_",
414
+ ];
318
415
  // Pull an API key out of the *visible* page text.
319
416
  //
320
417
  // Two strategies, in priority order:
@@ -339,6 +436,7 @@ export function extractApiKeyFromText(text) {
339
436
  /\bkey-[a-f0-9]{32}\b/, // Mailgun
340
437
  /\bphc_[a-zA-Z0-9]{32,}\b/, // PostHog
341
438
  /\bSG\.[a-zA-Z0-9_\-]{20,}\.[a-zA-Z0-9_\-]{20,}\b/, // SendGrid
439
+ /\brnd_[a-zA-Z0-9]{20,}\b/, // Render
342
440
  ];
343
441
  for (const pattern of prefixed) {
344
442
  const match = text.match(pattern);
@@ -366,6 +464,10 @@ export function extractApiKeyFromText(text) {
366
464
  const lower = candidate.toLowerCase();
367
465
  if (CAPTCHA_TOKEN_MARKERS.some((marker) => lower.includes(marker)))
368
466
  continue;
467
+ // Contaminated: the labeled match straddled glued dashboard text
468
+ // onto a real key (the key prefix sits mid-candidate, not at 0).
469
+ if (EMBEDDED_KEY_PREFIXES.some((p) => lower.indexOf(p) > 0))
470
+ continue;
369
471
  return candidate;
370
472
  }
371
473
  return null;
@@ -523,11 +625,29 @@ export class SignupAgent {
523
625
  let progressReplans = 0;
524
626
  let emptyPlans = 0;
525
627
  let hint;
628
+ const oauthProvider = task.oauthProvider;
526
629
  for (;;) {
527
630
  await this.browser.waitForFormReady();
528
631
  await saveDebugSnapshot(this.browser, "before-fill");
529
632
  const state = await this.browser.getState();
530
- const inventory = await this.buildInventory(steps);
633
+ const inventory = await this.buildInventory(steps, oauthProvider);
634
+ // T6/T13 — OAuth-first: when an OAuth signup is requested and the
635
+ // page carries a "Sign in with <provider>" affordance, the OAuth
636
+ // button unconditionally outranks any form field (a rule, not
637
+ // score arithmetic — spec refinement). Hand off to the OAuth
638
+ // consent flow. Absent the affordance, fall through to form-fill.
639
+ if (oauthProvider !== undefined) {
640
+ const oauthButton = findOAuthButton(inventory, oauthProvider);
641
+ const label = OAUTH_PROVIDERS[oauthProvider].label;
642
+ if (oauthButton !== null) {
643
+ steps.push(`OAuth-first: found a ${label} sign-in affordance ` +
644
+ `(${JSON.stringify(oauthButton.visibleText ?? oauthButton.ariaLabel ?? label)}) ` +
645
+ `— taking the OAuth path`);
646
+ return { kind: "oauth", selector: oauthButton.selector };
647
+ }
648
+ steps.push(`OAuth-first requested but no ${label} affordance on the page — ` +
649
+ `falling back to form-fill`);
650
+ }
531
651
  // OAuth-only: no fillable input AND no button that reads as an
532
652
  // email-signup option — nothing to automate (Issue 4).
533
653
  if (isOauthOnlyChooser(inventory)) {
@@ -668,9 +788,16 @@ export class SignupAgent {
668
788
  }
669
789
  }
670
790
  // Extract + rank the page's interactive elements (F3 T1/T2).
671
- async buildInventory(steps) {
791
+ // `oauthProvider` keeps that provider's OAuth affordance from being
792
+ // ranked out of the capped inventory when an OAuth-first signup is
793
+ // requested (T6/T13). `buttonCap` widens for the post-OAuth
794
+ // onboarding loop: a dashboard carries far more nav links than a
795
+ // signup form, and they do not score as signup buttons, so the
796
+ // default cap would drop the "API Keys"/"Settings" links the
797
+ // onboarding planner must reach.
798
+ async buildInventory(steps, oauthProvider, buttonCap = 25) {
672
799
  const raw = await this.browser.extractInteractiveElements();
673
- const { inventory, buttonsDropped } = rankAndCapInventory(raw);
800
+ const { inventory, buttonsDropped } = rankAndCapInventory(raw, buttonCap, oauthProvider);
674
801
  steps.push(`Inventory: ${inventory.length} element(s)` +
675
802
  (buttonsDropped > 0 ? ` (${buttonsDropped} low-ranked button(s) dropped)` : ""));
676
803
  return inventory;
@@ -954,6 +1081,11 @@ export class SignupAgent {
954
1081
  steps,
955
1082
  ...this.resultTail(),
956
1083
  };
1084
+ case "oauth":
1085
+ // T6/T7 — OAuth-first path. runOAuthFlow drives the consent
1086
+ // handshake and post-OAuth onboarding to its own terminal
1087
+ // SignupResult; there is no form submit / email verification.
1088
+ return await this.runOAuthFlow(task, outcome.selector, steps);
957
1089
  case "submitted":
958
1090
  break;
959
1091
  }
@@ -993,8 +1125,7 @@ export class SignupAgent {
993
1125
  const maxRounds = task.postVerifyMaxRounds ?? 6;
994
1126
  credentials = await this.postVerifyLoop({
995
1127
  service: task.service,
996
- email: task.email,
997
- password,
1128
+ credentials: { email: task.email, password },
998
1129
  maxRounds,
999
1130
  steps,
1000
1131
  });
@@ -1047,6 +1178,151 @@ export class SignupAgent {
1047
1178
  };
1048
1179
  }
1049
1180
  }
1181
+ // ------------ OAuth-first signup (T6/T7/T13) ------------
1182
+ // Drive an OAuth signup (Google or GitHub) to a terminal
1183
+ // SignupResult. Entered from runSignup when planExecuteWithRetry
1184
+ // found the provider's affordance (T6). Steps: click the button →
1185
+ // walk the consent screens → scope-gate them → drive post-OAuth
1186
+ // onboarding to the API key.
1187
+ //
1188
+ // THE CRITICAL GUARANTEE (D4 / eng-review critical gap): if the flow
1189
+ // lands on the provider's credential form (expired/missing session)
1190
+ // or a security challenge, it hands back `needs_login` and NEVER
1191
+ // types into that form. Driving the provider's login is exactly what
1192
+ // trips its automation detection — and there is no password to give.
1193
+ async runOAuthFlow(task, oauthSelector, steps) {
1194
+ const provider = OAUTH_PROVIDERS[task.oauthProvider ?? "google"];
1195
+ const loginCmd = provider.id === "github"
1196
+ ? "npx @trusty-squire/mcp login --provider=github"
1197
+ : "npx @trusty-squire/mcp login";
1198
+ steps.push(`OAuth: clicking the ${provider.label} sign-in affordance`);
1199
+ await this.browser.startOAuth(oauthSelector);
1200
+ await this.browser.wait(3);
1201
+ await saveDebugSnapshot(this.browser, "oauth-after-click");
1202
+ // Bounded consent walk — handles account-chooser → consent as two
1203
+ // steps without ever spinning. Each iteration re-reads the page.
1204
+ const MAX_OAUTH_NAV = 6;
1205
+ for (let i = 0; i < MAX_OAUTH_NAV; i++) {
1206
+ if (this.browser.oauthPageClosed()) {
1207
+ steps.push(`OAuth: the ${provider.label} window closed — handshake returned to the service`);
1208
+ break;
1209
+ }
1210
+ const url = this.browser.currentUrl();
1211
+ let body;
1212
+ try {
1213
+ body = (await this.browser.extractText()).slice(0, 4000);
1214
+ }
1215
+ catch {
1216
+ // The page is navigating between provider screens — re-read.
1217
+ await this.browser.wait(1);
1218
+ continue;
1219
+ }
1220
+ const authState = provider.classifyAuthState(url, body);
1221
+ steps.push(`OAuth: ${provider.label} auth state = ${authState}`);
1222
+ if (authState === "not_provider")
1223
+ break; // flow left the provider — back on the service
1224
+ if (authState === "challenge") {
1225
+ return this.oauthAbort("needs_login", `${provider.label} interrupted the sign-in with a security challenge ("verify it's you"). ` +
1226
+ `Re-run \`${loginCmd}\`, clear the challenge in the window, then retry.`, steps);
1227
+ }
1228
+ if (authState === "needs_login") {
1229
+ return this.oauthAbort("needs_login", `the bot's ${provider.label} session is missing or expired — no consent screen was reached. ` +
1230
+ `Re-run \`${loginCmd}\` to re-establish it, then retry.`, steps);
1231
+ }
1232
+ // authState === "consent". Backstop the page classifier with a
1233
+ // live-DOM check: if the page actually carries a credential
1234
+ // field it is a login form (the text classifier can catch a
1235
+ // login page that says "to continue to <app>"). Hand back —
1236
+ // never type into it.
1237
+ if (await this.oauthLoginFormPresent()) {
1238
+ return this.oauthAbort("needs_login", `landed on a ${provider.label} sign-in form — the session is missing or expired. ` +
1239
+ `Re-run \`${loginCmd}\`, then retry. The bot will not type into ${provider.label}'s login form.`, steps);
1240
+ }
1241
+ // Genuine consent screen / account chooser — scope-gate it (T7).
1242
+ const scopes = extractOAuthScopes(url);
1243
+ if (scopes === null) {
1244
+ return this.oauthAbort("oauth_consent_needs_review", `reached a ${provider.label} consent screen but could not read its requested scopes ` +
1245
+ `from the URL — pausing for manual review rather than approving blind.`, steps);
1246
+ }
1247
+ if (!provider.scopesAreBasic(scopes)) {
1248
+ return this.oauthAbort("oauth_consent_needs_review", `the consent screen requests scopes beyond basic identity (${scopes.join(", ")}). ` +
1249
+ `Approve it manually — the bot only auto-approves basic-identity scopes.`, steps);
1250
+ }
1251
+ steps.push(`OAuth: consent scopes all basic (${scopes.join(", ")}) — auto-approving`);
1252
+ const advanced = await this.browser.advanceOAuthConsent(provider.id);
1253
+ if (!advanced) {
1254
+ return this.oauthAbort("oauth_consent_needs_review", `reached a ${provider.label} consent screen but found no approve control to click — ` +
1255
+ `approve it manually.`, steps);
1256
+ }
1257
+ await this.browser.wait(3);
1258
+ }
1259
+ // Handshake done — restore the product page (popup flow is a no-op
1260
+ // for same-tab redirects) and drive post-OAuth onboarding.
1261
+ await this.browser.settleAfterOAuth();
1262
+ await this.browser.wait(2);
1263
+ await saveDebugSnapshot(this.browser, "oauth-post-consent");
1264
+ steps.push(`OAuth: signed in via ${provider.label} — driving post-OAuth onboarding to the API key`);
1265
+ let credentials = await this.extractCredentials();
1266
+ if (credentials.api_key === undefined) {
1267
+ credentials = await this.postVerifyLoop({
1268
+ service: task.service,
1269
+ maxRounds: task.postVerifyMaxRounds ?? 8,
1270
+ steps,
1271
+ });
1272
+ }
1273
+ if (credentials.api_key !== undefined) {
1274
+ return {
1275
+ success: true,
1276
+ credentials: { ...credentials },
1277
+ steps,
1278
+ ...this.resultTail(),
1279
+ };
1280
+ }
1281
+ // No API key. Distinguish a billing/card wall (onboarding_blocked)
1282
+ // from a generic navigation miss — never grep-loop a paid wall.
1283
+ const finalText = (await this.browser.extractText().catch(() => "")).toLowerCase();
1284
+ if (ONBOARDING_PAYWALL_PATTERNS.some((p) => finalText.includes(p))) {
1285
+ return {
1286
+ success: false,
1287
+ error: `onboarding_blocked: ${task.service}'s API key sits behind a billing or ` +
1288
+ `payment-method wall the bot will not cross — finish the signup manually.`,
1289
+ steps,
1290
+ ...this.resultTail(),
1291
+ };
1292
+ }
1293
+ return {
1294
+ success: false,
1295
+ error: `oauth_onboarding_failed: signed in to ${task.service} via ${provider.label} but ` +
1296
+ `could not reach an API key through post-OAuth onboarding.`,
1297
+ steps,
1298
+ ...this.resultTail(),
1299
+ };
1300
+ }
1301
+ // Build a terminal SignupResult for an aborted OAuth run. `prefix`
1302
+ // is the error tag provision-any.ts maps to a tool status
1303
+ // (needs_login, oauth_consent_needs_review).
1304
+ oauthAbort(prefix, detail, steps) {
1305
+ steps.push(`OAuth aborted (${prefix}): ${detail}`);
1306
+ return {
1307
+ success: false,
1308
+ error: `${prefix}: ${detail}`,
1309
+ steps,
1310
+ ...this.resultTail(),
1311
+ };
1312
+ }
1313
+ // Backstop for the critical guarantee (D4): true when the active
1314
+ // provider page carries a credential-entry field — an expired/missing
1315
+ // session dropped the bot on a login form. A genuine consent screen
1316
+ // or account chooser has buttons/tiles only, no text inputs.
1317
+ async oauthLoginFormPresent() {
1318
+ const inv = await this.browser.extractInteractiveElements();
1319
+ return inv.some((e) => e.tag === "input" &&
1320
+ (e.type === "email" ||
1321
+ e.type === "password" ||
1322
+ e.type === "text" ||
1323
+ e.type === "tel" ||
1324
+ e.type === null));
1325
+ }
1050
1326
  // ------------ Claude planner ------------
1051
1327
  async planSignupForm(input) {
1052
1328
  const systemPrompt = `You plan how to fill a web signup form.
@@ -1147,28 +1423,47 @@ ${formatInventory(input.inventory)}`,
1147
1423
  }
1148
1424
  throw lastErr ?? new Error("verification email did not arrive in time");
1149
1425
  }
1150
- // After verification, drive the browser toward the API key. Each round
1151
- // asks Claude what to do next given the current page; we stop when
1152
- // Claude says "done" or when we extract a credential. Bounded by
1153
- // maxRounds so a confused agent can't burn the whole context window.
1426
+ // Drive the browser toward the API key after the account exists —
1427
+ // used by BOTH the email-verification path and the OAuth path (T9).
1428
+ // Each round asks Claude what to do next given the current page; we
1429
+ // stop when Claude says "done" or when we extract a credential.
1430
+ // Bounded by maxRounds so a confused agent can't burn the context.
1431
+ //
1432
+ // T9 — decoupled from email+password. `credentials` is present only
1433
+ // on the email-verification path, where the loop may need to sign in
1434
+ // with the just-created account (SendPulse). On the OAuth path it is
1435
+ // absent: there is no password, and the Google session already
1436
+ // authenticated the user — a `login` step is then a no-op.
1154
1437
  async postVerifyLoop(args) {
1155
1438
  let credentials = await this.extractCredentials();
1156
1439
  let loginAttempts = 0;
1440
+ const oauth = args.credentials === undefined;
1441
+ // Re-plan hint for the next round — set when an `extract` step
1442
+ // found no key, which means the visible key text is masked /
1443
+ // truncated (the S3-class trap: the planner sees a key-shaped
1444
+ // string and keeps asking to extract it forever).
1445
+ let hint;
1157
1446
  for (let round = 0; round < args.maxRounds; round++) {
1158
1447
  if (credentials.api_key !== undefined || credentials.username !== undefined) {
1159
1448
  args.steps.push(`Post-verify: credentials found on round ${round}.`);
1160
1449
  return credentials;
1161
1450
  }
1162
1451
  const state = await this.browser.getState();
1452
+ // DOM-grounded inventory so the planner picks verified selectors
1453
+ // instead of inventing CSS that never resolves. A dashboard has
1454
+ // far more nav links than a signup form, so the button cap is
1455
+ // widened (the "API Keys"/"Settings" links must survive ranking).
1456
+ const inventory = await this.buildInventory(args.steps, undefined, 80);
1163
1457
  let nextStep;
1164
1458
  try {
1165
1459
  nextStep = await this.planPostVerifyStep({
1166
1460
  service: args.service,
1167
- email: args.email,
1168
- password: args.password,
1169
1461
  round,
1170
1462
  maxRounds: args.maxRounds,
1171
1463
  state,
1464
+ oauth,
1465
+ inventory,
1466
+ ...(hint !== undefined ? { hint } : {}),
1172
1467
  });
1173
1468
  }
1174
1469
  catch (err) {
@@ -1178,9 +1473,20 @@ ${formatInventory(input.inventory)}`,
1178
1473
  args.steps.push(`Post-verify ${round + 1}/${args.maxRounds}: ${nextStep.kind} — ${nextStep.reason}`);
1179
1474
  if (nextStep.kind === "done")
1180
1475
  break;
1476
+ hint = undefined;
1181
1477
  try {
1182
1478
  if (nextStep.kind === "extract") {
1183
1479
  credentials = await this.extractCredentials();
1480
+ if (credentials.api_key === undefined) {
1481
+ // The planner saw a key-shaped string but extraction got
1482
+ // nothing — the on-page key is masked/truncated. Steer the
1483
+ // next round off `extract` and toward creating a fresh key.
1484
+ hint =
1485
+ "Your last 'extract' found NO key — the key text on the page is " +
1486
+ "masked or truncated (e.g. shows '...' or dots). A masked existing " +
1487
+ "key cannot be extracted. Click 'Create API Key' / 'New API Key' to " +
1488
+ "generate a fresh one — its full value is shown once, on creation.";
1489
+ }
1184
1490
  }
1185
1491
  else if (nextStep.kind === "click") {
1186
1492
  await this.browser.click(nextStep.selector);
@@ -1197,12 +1503,20 @@ ${formatInventory(input.inventory)}`,
1197
1503
  await this.browser.wait(Math.min(nextStep.seconds, 15));
1198
1504
  }
1199
1505
  else if (nextStep.kind === "login") {
1200
- if (loginAttempts >= 2) {
1506
+ if (args.credentials === undefined) {
1507
+ // OAuth run — no password to give, and the Google session
1508
+ // already authenticated us. Treat `login` as a no-op note.
1509
+ args.steps.push("Post-verify: planner asked to log in, but this is an OAuth run — " +
1510
+ "already authenticated via Google; skipping.");
1511
+ }
1512
+ else if (loginAttempts >= 2) {
1201
1513
  args.steps.push("Post-verify: already attempted login twice — stopping.");
1202
1514
  break;
1203
1515
  }
1204
- loginAttempts += 1;
1205
- await this.loginWithCredentials(args.email, args.password, args.steps);
1516
+ else {
1517
+ loginAttempts += 1;
1518
+ await this.loginWithCredentials(args.credentials.email, args.credentials.password, args.steps);
1519
+ }
1206
1520
  }
1207
1521
  }
1208
1522
  catch (err) {
@@ -1253,27 +1567,49 @@ ${formatInventory(input.inventory)}`,
1253
1567
  }
1254
1568
  async planPostVerifyStep(input) {
1255
1569
  const visibleText = (input.state.html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim()).slice(0, 2500);
1256
- const systemPrompt = `You are driving a headless browser after a SaaS signup verification.
1570
+ const loginGuidance = input.oauth
1571
+ ? `- You are ALREADY signed in via Google — NEVER return {"kind":"login"}. If the page looks like a login wall, return {"kind":"navigate"} to the service's dashboard, or {"kind":"done"}.`
1572
+ : `- If the page is a LOGIN form, or says you must sign in, or you've been signed out, return {"kind":"login"} — the bot signs in with the signup email + password. Do NOT return done on a login wall.`;
1573
+ const systemPrompt = `You are driving a headless browser after a SaaS signup ${input.oauth ? "via Google OAuth" : "verification"}.
1257
1574
  Your goal: surface the user's API key (or any credential — token, secret, app id) so it can be extracted from the page.
1258
1575
 
1259
1576
  You may issue ONE step per turn. Reply with a single JSON object, no prose.
1260
1577
 
1578
+ You are given a screenshot and an INVENTORY of the page's interactive
1579
+ elements — each line ends with a precise \`selector=\` the bot has
1580
+ verified resolves.
1581
+
1261
1582
  Schema:
1262
1583
  {"kind":"done","reason":"why we should stop"}
1263
1584
  {"kind":"extract","reason":"the API key is now visible on this page"}
1264
1585
  {"kind":"login","reason":"the page is a login form / we were signed out"}
1265
- {"kind":"click","selector":"CSS","reason":"e.g. dismiss onboarding modal / open API keys page"}
1266
- {"kind":"fill","selector":"CSS","value":"value","reason":"unusual — only for required project-name etc."}
1267
- {"kind":"navigate","url":"https://...","reason":"e.g. go directly to /settings/api"}
1586
+ {"kind":"click","selector":"<a selector= copied verbatim from the inventory>","reason":"e.g. open the API keys page"}
1587
+ {"kind":"fill","selector":"<a selector= from the inventory>","value":"value","reason":"unusual — only for a required project-name etc."}
1588
+ {"kind":"navigate","url":"https://...","reason":"e.g. go directly to /settings/api-keys"}
1268
1589
  {"kind":"wait","seconds":N,"reason":"page is still loading"}
1269
1590
 
1591
+ - CRITICAL: every "selector" in a click/fill step MUST be copied
1592
+ verbatim from a \`selector=\` field in the inventory below. Never
1593
+ invent or guess a selector — one not in the inventory is rejected.
1594
+ - If the element you want is NOT in the inventory, use {"kind":"navigate"}
1595
+ to a likely settings URL instead of guessing a selector.
1596
+
1270
1597
  Strategy:
1271
- - If the API key text is visible, return {"kind":"extract"}.
1272
- - If there's a dashboard menu link like "API Keys" / "Tokens" / "Developer", click it.
1273
- - If there's an onboarding modal blocking, dismiss it.
1274
- - If the page is a LOGIN form, or says you must sign in, or you've been signed out, return {"kind":"login"} — the bot signs in with the signup email + password. Do NOT return done on a login wall.
1598
+ - If a FULL, untruncated API key is visible, return {"kind":"extract"}.
1599
+ - A key shown masked or truncated (with "...", dots, or "") is NOT
1600
+ extractable its full value is shown only once, at creation. Do NOT
1601
+ return "extract" for a masked key, and do not return "extract" twice
1602
+ in a row. Instead click "Create API Key" / "New API Key" / "Generate"
1603
+ to make a fresh key, then extract its full value.
1604
+ - To reach API keys, prefer a {"kind":"navigate"} straight to the
1605
+ service's API-keys settings URL — note these usually live under the
1606
+ user/ACCOUNT settings, not a project or workspace's settings.
1607
+ - Otherwise click a dashboard menu link like "API Keys" / "Tokens" /
1608
+ "Developer" / "Settings" — using its inventory selector.
1609
+ - If there's an onboarding modal or a "Skip" link blocking, dismiss it.
1610
+ ${loginGuidance}
1275
1611
  - If we're on a "verify your phone" / "verify email" wall, return done (we can't solve those).
1276
- - If the page wants the user to create a project before showing keys, fill the minimum and click create.
1612
+ - If the page wants the user to create a project/key before showing it, fill the minimum and click create.
1277
1613
  - Round ${input.round + 1} of ${input.maxRounds}. Prefer "done" if you're not making progress.`;
1278
1614
  const userBlocks = [
1279
1615
  { kind: "image", media_type: "image/png", data_base64: input.state.screenshot },
@@ -1285,14 +1621,19 @@ Title: ${input.state.title}
1285
1621
  Round: ${input.round + 1}/${input.maxRounds}
1286
1622
 
1287
1623
  Visible text (truncated):
1288
- ${visibleText}`,
1624
+ ${visibleText}
1625
+
1626
+ Interactive element inventory:
1627
+ ${formatInventory(input.inventory)}${input.hint !== undefined ? `\n\nIMPORTANT — ${input.hint}` : ""}`,
1289
1628
  },
1290
1629
  ];
1630
+ // The planner may only pick click/fill selectors the bot supplied.
1631
+ const allowed = new Set(input.inventory.map((e) => e.selector));
1291
1632
  return this.callLLM({
1292
1633
  system: systemPrompt,
1293
1634
  userBlocks,
1294
1635
  maxTokens: 500,
1295
- parse: parsePostVerifyStep,
1636
+ parse: (raw) => parsePostVerifyStep(raw, allowed),
1296
1637
  });
1297
1638
  }
1298
1639
  async findSignupLink() {
@@ -1313,13 +1654,29 @@ ${visibleText}`,
1313
1654
  return null;
1314
1655
  }
1315
1656
  async extractCredentials() {
1316
- // IMPORTANT: pull credentials from the *visible* page text, not the raw
1657
+ // IMPORTANT: pull credentials from the *visible* page, not the raw
1317
1658
  // HTML. Reading from HTML matches anti-bot challenge JS (Cloudflare
1318
- // Turnstile, hCaptcha) whose challenge tokens look like API keys to a
1319
- // naive regex.
1320
- const text = await this.browser.extractText();
1659
+ // Turnstile, hCaptcha) whose challenge tokens look like API keys to
1660
+ // a naive regex.
1661
+ //
1662
+ // Two visible surfaces, in priority order:
1663
+ // 1. Discrete credential candidates — copy-input values and each
1664
+ // element's own direct text. A key is read whole here, un-glued
1665
+ // from adjacent buttons; captcha tokens (hidden inputs) are
1666
+ // excluded by the browser.
1667
+ // 2. The whole visible body text — fallback for a key shown as
1668
+ // plain prose, accepting that body concatenation can glue
1669
+ // neighbours (the extractApiKeyFromText guards catch the worst).
1321
1670
  const credentials = {};
1322
- const apiKey = extractApiKeyFromText(text);
1671
+ let apiKey = null;
1672
+ for (const candidate of await this.browser.extractCredentialCandidates()) {
1673
+ apiKey = extractApiKeyFromText(candidate);
1674
+ if (apiKey !== null)
1675
+ break;
1676
+ }
1677
+ if (apiKey === null) {
1678
+ apiKey = extractApiKeyFromText(await this.browser.extractText());
1679
+ }
1323
1680
  if (apiKey !== null)
1324
1681
  credentials.api_key = apiKey;
1325
1682
  return credentials;