@trusty-squire/mcp 0.8.15 → 0.8.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bot/agent.d.ts +42 -3
- package/dist/bot/agent.d.ts.map +1 -1
- package/dist/bot/agent.js +2423 -272
- package/dist/bot/agent.js.map +1 -1
- package/dist/bot/browser.d.ts +31 -3
- package/dist/bot/browser.d.ts.map +1 -1
- package/dist/bot/browser.js +872 -113
- package/dist/bot/browser.js.map +1 -1
- package/dist/bot/captcha-solver-2captcha.d.ts +12 -0
- package/dist/bot/captcha-solver-2captcha.d.ts.map +1 -1
- package/dist/bot/captcha-solver-2captcha.js +28 -5
- package/dist/bot/captcha-solver-2captcha.js.map +1 -1
- package/dist/bot/google-login.d.ts.map +1 -1
- package/dist/bot/google-login.js +39 -0
- package/dist/bot/google-login.js.map +1 -1
- package/dist/bot/index.d.ts +1 -1
- package/dist/bot/index.d.ts.map +1 -1
- package/dist/bot/oauth-providers.d.ts.map +1 -1
- package/dist/bot/oauth-providers.js +13 -3
- package/dist/bot/oauth-providers.js.map +1 -1
- package/dist/bot/promote-to-skill.d.ts +2 -1
- package/dist/bot/promote-to-skill.d.ts.map +1 -1
- package/dist/bot/promote-to-skill.js +26 -0
- package/dist/bot/promote-to-skill.js.map +1 -1
- package/dist/bot/replay-skill.d.ts.map +1 -1
- package/dist/bot/replay-skill.js +237 -32
- package/dist/bot/replay-skill.js.map +1 -1
- package/dist/bot/xvfb.d.ts.map +1 -1
- package/dist/bot/xvfb.js +8 -3
- package/dist/bot/xvfb.js.map +1 -1
- package/dist/install/cli.d.ts +5 -0
- package/dist/install/cli.d.ts.map +1 -1
- package/dist/install/cli.js +33 -8
- package/dist/install/cli.js.map +1 -1
- package/dist/tools/signup-telemetry.d.ts +2 -2
- package/dist/tools/signup-telemetry.d.ts.map +1 -1
- package/dist/tools/signup-telemetry.js.map +1 -1
- package/package.json +2 -1
package/dist/bot/agent.js
CHANGED
|
@@ -15,7 +15,7 @@ import { sendTelegramHeightenedAuth } from "./telegram-notify.js";
|
|
|
15
15
|
import { TwoCaptchaSolver } from "./captcha-solver-2captcha.js";
|
|
16
16
|
import { redactCredentials } from "./redact.js";
|
|
17
17
|
import { readOperatorOtp, fromDomainFromUrl } from "./read-otp.js";
|
|
18
|
-
import { loggedInProviders, clearProviderLoggedIn } from "./login-state.js";
|
|
18
|
+
import { loggedInProviders, clearProviderLoggedIn, markProviderLoggedIn, } from "./login-state.js";
|
|
19
19
|
import { saveDebugSnapshot } from "./debug.js";
|
|
20
20
|
import { captureOnboardingRound } from "./onboarding-capture.js";
|
|
21
21
|
import { wasRecentlyPrewarmed, recordPrewarmSuccess } from "./prewarm-cache.js";
|
|
@@ -55,10 +55,48 @@ const VERIFICATION_EXPECTED_PATTERNS = [
|
|
|
55
55
|
"almost there",
|
|
56
56
|
"one more step",
|
|
57
57
|
];
|
|
58
|
-
// Short probe when the post-submit page never
|
|
59
|
-
//
|
|
58
|
+
// Short probe when, even after a settle, the post-submit page still never
|
|
59
|
+
// prompted the user to check their email AND no account-created signal
|
|
60
|
+
// appeared. Legitimate verification mail almost always lands inside a
|
|
60
61
|
// minute; this catches the fast case without 300s of dead air.
|
|
61
62
|
const VERIFICATION_PROBE_SECONDS = 45;
|
|
63
|
+
// Settle window before the SECOND post-submit page read. SPA signups
|
|
64
|
+
// (Postmark, ElevenLabs, Browserbase, Grafana Cloud, …) swap in their
|
|
65
|
+
// "check your email" confirmation screen a beat AFTER submit. Reading the
|
|
66
|
+
// DOM the instant extraction fails races that render and mislabels the
|
|
67
|
+
// run as "no email expected", collapsing the poll to the 45s probe and
|
|
68
|
+
// abandoning mail that was, in fact, on its way.
|
|
69
|
+
const SUBMIT_SETTLE_SECONDS = 3;
|
|
70
|
+
// Poll floor once the form CLEANLY SUBMITTED but the page text stayed
|
|
71
|
+
// inconclusive about an email prompt (no "check your email", but also no
|
|
72
|
+
// hard error/rejection). A clean submit means an account was created, so
|
|
73
|
+
// real verification mail is plausibly inbound; transactional senders on a
|
|
74
|
+
// fresh send (Postmark, SendGrid) routinely take longer than the 45s
|
|
75
|
+
// probe. Polling 120s here — rather than bailing at 45s — is the
|
|
76
|
+
// difference between catching that mail and a false `verification_not_sent`.
|
|
77
|
+
// Still bounded so a genuinely-silent service doesn't hold the run for the
|
|
78
|
+
// full 180s expected-email timeout.
|
|
79
|
+
const SUBMITTED_PROBE_FLOOR_SECONDS = 120;
|
|
80
|
+
// Post-submit page text that means the submit was REJECTED, not accepted —
|
|
81
|
+
// no account was created, so no verification mail is coming and even the
|
|
82
|
+
// 45s probe is wasted. Lets the bot bail immediately instead of polling.
|
|
83
|
+
// Kept conservative: only unambiguous rejection phrasings.
|
|
84
|
+
const SUBMIT_REJECTED_PATTERNS = [
|
|
85
|
+
/\balready\s+(?:registered|exists|in\s+use|taken|have\s+an\s+account)\b/i,
|
|
86
|
+
/\b(?:email|account|username)\s+(?:is\s+)?already\s+(?:registered|taken|in\s+use)\b/i,
|
|
87
|
+
/\bthat\s+email\s+is\s+already\b/i,
|
|
88
|
+
/\ban?\s+account\s+(?:with\s+(?:this|that)\s+email\s+)?already\s+exists\b/i,
|
|
89
|
+
/\bplease\s+(?:try\s+again|correct\s+the\s+errors?)\b/i,
|
|
90
|
+
/\bthis\s+field\s+is\s+required\b/i,
|
|
91
|
+
/\b(?:email|password)\s+cannot\s+be\s+empty\b/i,
|
|
92
|
+
/\binvalid\s+(?:email|password)\b/i,
|
|
93
|
+
];
|
|
94
|
+
// Exported for unit testing. True when the post-submit page reads like a
|
|
95
|
+
// rejected submit (account not created), so the bot should not poll for a
|
|
96
|
+
// verification email that will never arrive.
|
|
97
|
+
export function submitWasRejected(pageText) {
|
|
98
|
+
return SUBMIT_REJECTED_PATTERNS.some((p) => p.test(pageText));
|
|
99
|
+
}
|
|
62
100
|
// T7: page text that means the post-OAuth API key sits behind a
|
|
63
101
|
// billing / payment-method wall. When the OAuth onboarding loop ends
|
|
64
102
|
// without a key and the page reads like this, the run ends
|
|
@@ -174,18 +212,75 @@ const STUCK_LOOP_FALLBACK_PATHS = [
|
|
|
174
212
|
"/settings/tokens",
|
|
175
213
|
"/settings/api-tokens",
|
|
176
214
|
"/settings/account/api/auth-tokens/",
|
|
215
|
+
// 0.8.3-rc.2 — added after the post-OAuth onboarding drain
|
|
216
|
+
// (amplitude/groq/launchdarkly/modal/weaviate/…). These conventions
|
|
217
|
+
// were absent from the generic list and each cost a service its
|
|
218
|
+
// whole post-verify budget: LaunchDarkly hosts keys at
|
|
219
|
+
// /settings/authorization, a cohort of consoles use a bare
|
|
220
|
+
// /settings/access-tokens or /settings/api, and several put the
|
|
221
|
+
// developer surface at /settings/developers. They sit AFTER the
|
|
222
|
+
// historic, more-common paths so we don't regress an existing hit.
|
|
223
|
+
"/settings/authorization",
|
|
224
|
+
"/settings/access-tokens",
|
|
225
|
+
"/settings/developers",
|
|
226
|
+
"/settings/developer",
|
|
227
|
+
"/settings/api",
|
|
177
228
|
"/account/api-keys",
|
|
178
229
|
"/account/api_tokens",
|
|
230
|
+
"/account/api-tokens",
|
|
179
231
|
"/account/keys",
|
|
180
232
|
"/account/tokens",
|
|
233
|
+
"/account/access-tokens",
|
|
181
234
|
"/api-keys",
|
|
182
235
|
"/api_keys",
|
|
236
|
+
"/api-tokens",
|
|
183
237
|
"/keys",
|
|
184
238
|
"/tokens",
|
|
239
|
+
"/access-tokens",
|
|
185
240
|
"/auth-tokens",
|
|
241
|
+
"/developers",
|
|
186
242
|
"/dashboard/api-keys",
|
|
187
243
|
"/dashboard/keys",
|
|
188
244
|
];
|
|
245
|
+
// 0.8.3-rc.2 — curated per-service API-key paths, consulted BEFORE the
|
|
246
|
+
// generic STUCK_LOOP_FALLBACK_PATHS list when the stuck origin belongs
|
|
247
|
+
// to one of these vendors. The generic conventions can't reach these:
|
|
248
|
+
// the path is either non-obvious (LaunchDarkly's /settings/authorization),
|
|
249
|
+
// or the vendor splits the convention differently than the majority
|
|
250
|
+
// (groq keys live at a bare /keys but the planner kept guessing
|
|
251
|
+
// /settings/api-keys, which 404s). Keyed by service SLUG (lowercased,
|
|
252
|
+
// alphanumerics only) so it survives the inbox-alias slug the bot is
|
|
253
|
+
// invoked with, and additionally matched against the stuck URL's host so
|
|
254
|
+
// a curated path is only ever composed onto the vendor it was harvested
|
|
255
|
+
// from. Each entry is ordered most-specific-first.
|
|
256
|
+
//
|
|
257
|
+
// This is deliberately a SMALL map of paths the generic heuristic
|
|
258
|
+
// provably can't derive — not a 12-service URL table. Most services in
|
|
259
|
+
// the onboarding-drain cohort are fixed by the widened generic list
|
|
260
|
+
// above; only the ones whose real path is genuinely un-guessable land
|
|
261
|
+
// here.
|
|
262
|
+
const SERVICE_KEYS_PATHS = {
|
|
263
|
+
// console.groq.com/keys — the planner kept trying /settings/api-keys
|
|
264
|
+
// (404). The bare /keys IS in the generic list but lands deep in the
|
|
265
|
+
// order; pin it first for groq so the first escalation hits.
|
|
266
|
+
groq: ["/keys", "/settings/keys"],
|
|
267
|
+
// app.launchdarkly.com/settings/authorization — LD's access-token UI.
|
|
268
|
+
// /settings/api-keys and /settings/generated-credentials both 404.
|
|
269
|
+
launchdarkly: ["/settings/authorization"],
|
|
270
|
+
// Weaviate keys are issued per-cluster, but the org-level admin page
|
|
271
|
+
// is the closest reachable surface; account-scoped guesses all 404.
|
|
272
|
+
weaviate: ["/account", "/settings/api-keys"],
|
|
273
|
+
// northflank hosts user API keys under the account menu, not a
|
|
274
|
+
// top-level /settings/keys.
|
|
275
|
+
northflank: ["/account/api", "/settings/api"],
|
|
276
|
+
};
|
|
277
|
+
// Normalize a service name to the slug used as a SERVICE_KEYS_PATHS key:
|
|
278
|
+
// lowercased, alphanumerics only. Mirrors guessSignupUrl's slug rule so
|
|
279
|
+
// the inbox-alias-derived service string (e.g. "Groq Cloud") resolves to
|
|
280
|
+
// the same key the map is authored under. Exported for unit testing.
|
|
281
|
+
export function serviceSlug(service) {
|
|
282
|
+
return service.toLowerCase().replace(/[^a-z0-9]/g, "");
|
|
283
|
+
}
|
|
189
284
|
// 0.8.2-rc.10 — heuristic for "this account already exists on the
|
|
190
285
|
// service and its API keys are masked, with no path to reveal them."
|
|
191
286
|
// The test identity (methoxine@gmail.com) accumulates state across
|
|
@@ -252,34 +347,120 @@ export function detectExistingAccountNoExtract(input) {
|
|
|
252
347
|
return true;
|
|
253
348
|
return false;
|
|
254
349
|
}
|
|
255
|
-
//
|
|
256
|
-
//
|
|
257
|
-
//
|
|
258
|
-
|
|
259
|
-
|
|
350
|
+
// A "mint a fresh key" affordance — a button/link that creates a new
|
|
351
|
+
// API key/token. The label vocabulary is deliberately broad ("create",
|
|
352
|
+
// "generate", "new", "add" paired with a key/token noun) but must be
|
|
353
|
+
// paired with a credential noun so a bare "New project" / "Add member"
|
|
354
|
+
// button on a dashboard isn't mistaken for a key-minting control.
|
|
355
|
+
//
|
|
356
|
+
// Word-boundary-anchored to avoid matching "recreate" / "regenerate
|
|
357
|
+
// password" style false friends — though "regenerate" + a key noun IS
|
|
358
|
+
// a valid mint affordance (rotating a key produces a fresh value), so
|
|
359
|
+
// it's included explicitly.
|
|
360
|
+
const CREATE_KEY_VERB = /\b(?:create|generate|regenerate|new|add|issue|mint)\b/i;
|
|
361
|
+
const CREATE_KEY_NOUN = /\b(?:api[\s_-]*keys?|secret[\s_-]*keys?|access[\s_-]*tokens?|personal[\s_-]*access[\s_-]*tokens?|api[\s_-]*tokens?|auth[\s_-]*tokens?|tokens?|keys?|credentials?)\b/i;
|
|
362
|
+
// A standalone phrase that is unambiguously a key-minting control even
|
|
363
|
+
// without the verb+noun co-occurrence test (some buttons read just
|
|
364
|
+
// "New API key" with the verb folded into "new"). Kept separate so the
|
|
365
|
+
// generic verb/noun pairing can stay strict.
|
|
366
|
+
const CREATE_KEY_PHRASE = /\b(?:create|generate|new|add|issue|mint)\s+(?:a\s+)?(?:new\s+)?(?:api|secret|access|auth|personal\s+access)?\s*(?:keys?|tokens?|credentials?)\b/i;
|
|
367
|
+
// Scan an inventory for the single best "create new key / generate API
|
|
368
|
+
// key / new token" affordance. Returns the matching element or null.
|
|
369
|
+
// Exported for unit tests. Pure — operates on the inventory shape only,
|
|
370
|
+
// no browser access, so it can be unit-tested with synthetic elements.
|
|
371
|
+
export function findCreateKeyAffordance(inventory) {
|
|
372
|
+
const candidates = [];
|
|
373
|
+
for (const el of inventory) {
|
|
374
|
+
// Only buttons / links / role=button are mint controls; an <input>
|
|
375
|
+
// (a text field named "key") is never the create action.
|
|
376
|
+
const isClickable = el.tag === "button" ||
|
|
377
|
+
el.tag === "a" ||
|
|
378
|
+
el.role === "button" ||
|
|
379
|
+
el.role === "link";
|
|
380
|
+
if (!isClickable)
|
|
381
|
+
continue;
|
|
382
|
+
// A non-visible element can't be clicked reliably; skip when the
|
|
383
|
+
// live extractor told us it's hidden (test fixtures that omit
|
|
384
|
+
// `visible` are treated as visible).
|
|
385
|
+
if (el.visible === false)
|
|
386
|
+
continue;
|
|
387
|
+
const haystack = [
|
|
388
|
+
el.visibleText,
|
|
389
|
+
el.ariaLabel,
|
|
390
|
+
el.title,
|
|
391
|
+
el.labelText,
|
|
392
|
+
el.iconLabel,
|
|
393
|
+
]
|
|
394
|
+
.filter((s) => s !== null && s !== undefined)
|
|
395
|
+
.join(" ")
|
|
396
|
+
.trim();
|
|
397
|
+
if (haystack.length === 0)
|
|
398
|
+
continue;
|
|
399
|
+
const phraseHit = CREATE_KEY_PHRASE.test(haystack);
|
|
400
|
+
const verbNounHit = CREATE_KEY_VERB.test(haystack) && CREATE_KEY_NOUN.test(haystack);
|
|
401
|
+
if (!phraseHit && !verbNounHit)
|
|
402
|
+
continue;
|
|
403
|
+
// Score: a full phrase match is the strongest signal; an explicit
|
|
404
|
+
// "api key" / "token" noun beats a bare "key"; in-viewport beats
|
|
405
|
+
// off-screen. Highest score wins so a precise "Create API Key" is
|
|
406
|
+
// preferred over a generic "Add key".
|
|
407
|
+
let score = 0;
|
|
408
|
+
if (phraseHit)
|
|
409
|
+
score += 4;
|
|
410
|
+
if (/\bapi[\s_-]*keys?\b|\bapi[\s_-]*tokens?\b/i.test(haystack))
|
|
411
|
+
score += 2;
|
|
412
|
+
if (el.inViewport === true)
|
|
413
|
+
score += 1;
|
|
414
|
+
candidates.push({ el, score });
|
|
415
|
+
}
|
|
416
|
+
if (candidates.length === 0)
|
|
417
|
+
return null;
|
|
418
|
+
candidates.sort((a, b) => b.score - a.score);
|
|
419
|
+
return candidates[0].el;
|
|
420
|
+
}
|
|
421
|
+
// Pick the next fallback URL to try, keyed against the origin of the
|
|
422
|
+
// currently-stuck URL. The curated SERVICE_KEYS_PATHS for the run's
|
|
423
|
+
// service (when its host matches the stuck origin) are tried FIRST,
|
|
424
|
+
// then the generic STUCK_LOOP_FALLBACK_PATHS. Returns null when every
|
|
425
|
+
// path has already been attempted. Exported for unit tests.
|
|
426
|
+
export function pickStuckLoopFallbackUrl(currentUrl, alreadyTried, service) {
|
|
427
|
+
let parsed;
|
|
260
428
|
try {
|
|
261
|
-
|
|
429
|
+
parsed = new URL(currentUrl);
|
|
262
430
|
}
|
|
263
431
|
catch {
|
|
264
432
|
return null;
|
|
265
433
|
}
|
|
434
|
+
const origin = parsed.origin;
|
|
266
435
|
// Skip a candidate when the current URL's path ALREADY matches it
|
|
267
436
|
// (case-insensitive, trailing-slash tolerant). The planner is stuck
|
|
268
437
|
// ON the page the candidate points to — navigating to the same URL
|
|
269
438
|
// again won't break the cycle, only a different path will.
|
|
270
|
-
const currentPath = (()
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
439
|
+
const currentPath = parsed.pathname.replace(/\/+$/, "").toLowerCase();
|
|
440
|
+
// Compose curated per-service paths first, but only when the stuck
|
|
441
|
+
// origin's host actually belongs to the named service. The slug is
|
|
442
|
+
// a substring of the host for the vendors we curate (groq →
|
|
443
|
+
// console.groq.com, launchdarkly → app.launchdarkly.com, …); this
|
|
444
|
+
// host gate stops a curated path from being composed onto an
|
|
445
|
+
// unrelated origin the bot wandered onto (e.g. an OAuth provider or
|
|
446
|
+
// a redirect to a marketing domain).
|
|
447
|
+
const slug = service !== undefined ? serviceSlug(service) : "";
|
|
448
|
+
const curated = slug !== "" &&
|
|
449
|
+
SERVICE_KEYS_PATHS[slug] !== undefined &&
|
|
450
|
+
parsed.hostname.toLowerCase().includes(slug)
|
|
451
|
+
? SERVICE_KEYS_PATHS[slug]
|
|
452
|
+
: [];
|
|
453
|
+
// Curated paths lead; the generic list follows. De-dup so a path that
|
|
454
|
+
// appears in both (groq's /keys, /settings/keys) isn't offered twice.
|
|
455
|
+
const seen = new Set();
|
|
456
|
+
for (const path of [...curated, ...STUCK_LOOP_FALLBACK_PATHS]) {
|
|
457
|
+
const candidatePath = path.replace(/\/+$/, "").toLowerCase();
|
|
458
|
+
if (seen.has(candidatePath))
|
|
459
|
+
continue;
|
|
460
|
+
seen.add(candidatePath);
|
|
279
461
|
const candidate = `${origin}${path}`;
|
|
280
462
|
if (alreadyTried.has(candidate))
|
|
281
463
|
continue;
|
|
282
|
-
const candidatePath = path.replace(/\/+$/, "").toLowerCase();
|
|
283
464
|
if (candidatePath === currentPath)
|
|
284
465
|
continue;
|
|
285
466
|
return candidate;
|
|
@@ -389,6 +570,72 @@ export function isGoogleSearchUrl(url) {
|
|
|
389
570
|
return false;
|
|
390
571
|
}
|
|
391
572
|
}
|
|
573
|
+
// Google's NEWER consent screen (URL form
|
|
574
|
+
// `accounts.google.com/signin/oauth/id?...&part=<opaque-token>`) hides
|
|
575
|
+
// the requested scopes behind the opaque `part=` token — there is no
|
|
576
|
+
// `scope=` query param to read, so extractOAuthScopes() returns null.
|
|
577
|
+
// The only remaining signal is the visible DOM: the consent page lists
|
|
578
|
+
// each requested item as a templated phrase. These pattern sets let us
|
|
579
|
+
// classify that DOM as basic-only vs. reaching beyond identity.
|
|
580
|
+
//
|
|
581
|
+
// BASIC = the openid/email/profile family — the exact thing the
|
|
582
|
+
// URL-readable happy path (scopesAreBasic → auto-approve) already
|
|
583
|
+
// approves without a human. We require a positive basic signal so an
|
|
584
|
+
// empty/ambiguous DOM never counts as basic.
|
|
585
|
+
const GOOGLE_BASIC_CONSENT_PHRASES = [
|
|
586
|
+
// "See your primary Google Account email address"
|
|
587
|
+
/see\s+your\s+primary\s+google\s+account\s+email\s+address/i,
|
|
588
|
+
// generic email-address grant wording
|
|
589
|
+
/\byour\s+(?:primary\s+)?(?:google\s+account\s+)?email\s+address\b/i,
|
|
590
|
+
// "See your personal info, including any personal info you've made
|
|
591
|
+
// publicly available" / "See your public profile"
|
|
592
|
+
/see\s+your\s+personal\s+info/i,
|
|
593
|
+
/your\s+public\s+profile/i,
|
|
594
|
+
// "Associate you with your personal info on Google"
|
|
595
|
+
/associate\s+you\s+with\s+your\s+personal\s+info/i,
|
|
596
|
+
];
|
|
597
|
+
// Sensitive (non-basic) scope-grant wording. Any hit means the consent
|
|
598
|
+
// reaches beyond identity — never auto-approve. Kept broad on purpose:
|
|
599
|
+
// a false "non-basic" only costs a manual review, but a missed one
|
|
600
|
+
// would auto-approve a sensitive grant.
|
|
601
|
+
const GOOGLE_NON_BASIC_CONSENT_PHRASES = [
|
|
602
|
+
/\bcontacts?\b/i,
|
|
603
|
+
/\bcalendars?\b/i,
|
|
604
|
+
/\b(?:google\s+)?drive\b/i,
|
|
605
|
+
/\byour\s+files?\b/i,
|
|
606
|
+
/\bgmail\b/i,
|
|
607
|
+
/send\s+(?:email|mail|messages)/i,
|
|
608
|
+
/\bspreadsheets?\b/i,
|
|
609
|
+
/\bsheets\b/i,
|
|
610
|
+
/\bphotos\b/i,
|
|
611
|
+
/\byoutube\b/i,
|
|
612
|
+
/\bon\s+your\s+behalf\b/i,
|
|
613
|
+
/\bmanage\s+your\b/i,
|
|
614
|
+
/\bedit\s+your\b/i,
|
|
615
|
+
/\bdelete\s+your\b/i,
|
|
616
|
+
/see\s+and\s+download\s+your/i,
|
|
617
|
+
];
|
|
618
|
+
// "basic" = the consent DOM lists ONLY openid/email/profile-family
|
|
619
|
+
// grants. See the block comment above for WHY this exists (Google hides
|
|
620
|
+
// scopes behind `part=` in the new consent URL; the visible phrases are
|
|
621
|
+
// the only signal, and a basic-only consent is what the URL-readable
|
|
622
|
+
// path auto-approves anyway). Returns false on ambiguous/empty so the
|
|
623
|
+
// caller keeps its conservative oauth_consent_needs_review abort —
|
|
624
|
+
// this gate only RECOVERS the basic-only case, never widens approval.
|
|
625
|
+
// Exported for unit testing.
|
|
626
|
+
export function googleConsentIsBasicFromDom(bodyText) {
|
|
627
|
+
// Reuse the existing danger scraper as the first backstop — if it
|
|
628
|
+
// flags any sensitive scope-grant phrase, this is not basic-only.
|
|
629
|
+
if (scrapeGoogleScopePhrases(bodyText).length > 0)
|
|
630
|
+
return false;
|
|
631
|
+
const hasNonBasic = GOOGLE_NON_BASIC_CONSENT_PHRASES.some((p) => p.test(bodyText));
|
|
632
|
+
if (hasNonBasic)
|
|
633
|
+
return false;
|
|
634
|
+
// Require a positive basic signal: an empty/ambiguous DOM (no
|
|
635
|
+
// recognizable grant wording) returns false so the caller does not
|
|
636
|
+
// approve blind.
|
|
637
|
+
return GOOGLE_BASIC_CONSENT_PHRASES.some((p) => p.test(bodyText));
|
|
638
|
+
}
|
|
392
639
|
// The set of value_kinds the planner is allowed to emit. Kept as a
|
|
393
640
|
// runtime array so validation and the exhaustive `valueFor` switch
|
|
394
641
|
// share one source of truth.
|
|
@@ -581,6 +828,21 @@ export function parseSignupPlan(raw, allowedSelectors) {
|
|
|
581
828
|
? { actions, submit_selector: submitSelector, confidence, notes }
|
|
582
829
|
: { actions, submit_selector: submitSelector, confidence };
|
|
583
830
|
}
|
|
831
|
+
// True when a clickSubmit failure is a Playwright visibility/attach
|
|
832
|
+
// timeout rather than a genuine hard error. A timeout means the submit
|
|
833
|
+
// selector resolved at plan-time but was gone by click-time — almost
|
|
834
|
+
// always because an earlier action in the same plan advanced a
|
|
835
|
+
// multi-step SPA (Paddle's "Continue" → next screen), so the right
|
|
836
|
+
// recovery is a re-plan against the new page, not a run-ending
|
|
837
|
+
// submit_failed. Matches Playwright's `locator.waitFor`/`waitForSelector`
|
|
838
|
+
// timeout text; deliberately does NOT match `submit_disabled` (handled
|
|
839
|
+
// separately) or other click errors (genuine failures). Exported for
|
|
840
|
+
// unit testing.
|
|
841
|
+
export function isSubmitTimeout(reason) {
|
|
842
|
+
if (reason.startsWith("submit_disabled"))
|
|
843
|
+
return false;
|
|
844
|
+
return /Timeout \d+ms exceeded/i.test(reason) && /waitfor|waiting for/i.test(reason);
|
|
845
|
+
}
|
|
584
846
|
// Render the element inventory as a compact text block for the
|
|
585
847
|
// planner — one line per element, ending with the verified
|
|
586
848
|
// `selector=` the planner must copy verbatim (F3 T3).
|
|
@@ -821,6 +1083,369 @@ export function hostMatchesServiceDomain(hostname, serviceSlug) {
|
|
|
821
1083
|
const normalized = firstLabel.replace(/[^a-z0-9]/g, "");
|
|
822
1084
|
return normalized === serviceSlug;
|
|
823
1085
|
}
|
|
1086
|
+
// Strip HTML tags + decode the handful of entities that show up in the
|
|
1087
|
+
// copy we key on, then lowercase. We classify on the VISIBLE COPY because
|
|
1088
|
+
// that's the only thing that reliably distinguishes a signup form from a
|
|
1089
|
+
// login form — both have an <input type="password"> and an email field,
|
|
1090
|
+
// so structure alone is ambiguous (the exact bug looksLikeSignupPage
|
|
1091
|
+
// can't see past). The decoded entities matter: "Create account" or
|
|
1092
|
+
// a "Don't have an account?" link would otherwise hide the
|
|
1093
|
+
// discriminating phrase behind an entity.
|
|
1094
|
+
function stripHtmlToText(html) {
|
|
1095
|
+
return html
|
|
1096
|
+
.replace(/<script\b[^>]*>[\s\S]*?<\/script>/gi, " ")
|
|
1097
|
+
.replace(/<style\b[^>]*>[\s\S]*?<\/style>/gi, " ")
|
|
1098
|
+
.replace(/<[^>]+>/g, " ")
|
|
1099
|
+
.replace(/ /gi, " ")
|
|
1100
|
+
.replace(/&/gi, "&")
|
|
1101
|
+
.replace(/'/gi, "'")
|
|
1102
|
+
.replace(/'/g, "'")
|
|
1103
|
+
.replace(/"/gi, '"')
|
|
1104
|
+
.replace(/\s+/g, " ")
|
|
1105
|
+
.toLowerCase();
|
|
1106
|
+
}
|
|
1107
|
+
// Classify a fetched page as a signup form, a login form, or neither.
|
|
1108
|
+
//
|
|
1109
|
+
// WHY this exists: looksLikeSignupPage() answers "does this page have a
|
|
1110
|
+
// form?" — which a LOGIN page also satisfies (email + password + a
|
|
1111
|
+
// "Continue with Google" button). The discriminator is the COPY, not the
|
|
1112
|
+
// structure: a real email-signup form carries create-account CTA text
|
|
1113
|
+
// ("create account", "sign up", "get started", "register"); a login form
|
|
1114
|
+
// carries "sign in" / "log in" / "welcome back" and lacks the create CTA.
|
|
1115
|
+
// This is the heart of the stale-URL fix — a curated /signup that
|
|
1116
|
+
// silently serves the login SPA classifies as "login" here, which lets
|
|
1117
|
+
// the resolver reject it and probe for the real signup path.
|
|
1118
|
+
export function classifySignupHtml(html, title) {
|
|
1119
|
+
const text = stripHtmlToText(html);
|
|
1120
|
+
const titleLower = (title ?? "").toLowerCase();
|
|
1121
|
+
// 404 / error shell wins regardless of stray form copy — a "not found"
|
|
1122
|
+
// title is the strongest "this isn't the page you wanted" signal.
|
|
1123
|
+
if (titleLower.includes("404") ||
|
|
1124
|
+
titleLower.includes("not found") ||
|
|
1125
|
+
titleLower.includes("page not found")) {
|
|
1126
|
+
return "other";
|
|
1127
|
+
}
|
|
1128
|
+
// A password field is the structural prerequisite for an auth form. We
|
|
1129
|
+
// regex the RAW html (not the stripped text) because attribute values
|
|
1130
|
+
// live inside the tags the stripper removes. Either the input type or a
|
|
1131
|
+
// name="password"/id="password" counts — some SPAs render the field
|
|
1132
|
+
// without an explicit type=password.
|
|
1133
|
+
const hasPassword = /type\s*=\s*["']?password["']?/i.test(html) ||
|
|
1134
|
+
/(?:name|id)\s*=\s*["']?password["']?/i.test(html);
|
|
1135
|
+
// Create-account CTA copy — the signup discriminator. "sign up" is
|
|
1136
|
+
// word-bounded so it matches "sign up" but not "designup"; "get
|
|
1137
|
+
// started" and "register" round out the common variants.
|
|
1138
|
+
const hasSignupCta = /\bcreate (?:an )?account\b/.test(text) ||
|
|
1139
|
+
/\bcreate your account\b/.test(text) ||
|
|
1140
|
+
/\bsign[\s-]?up\b/.test(text) ||
|
|
1141
|
+
/\bget started\b/.test(text) ||
|
|
1142
|
+
/\bregister\b/.test(text);
|
|
1143
|
+
// Generic login copy — present on any sign-IN form.
|
|
1144
|
+
const hasLoginCopy = /\bsign in\b/.test(text) ||
|
|
1145
|
+
/\blog[\s-]?in\b/.test(text) ||
|
|
1146
|
+
/\bwelcome back\b/.test(text);
|
|
1147
|
+
// LOGIN-DOMINANT headings: even when a "Sign up" link sits in the
|
|
1148
|
+
// footer ("Don't have an account? Sign up"), these headings mean the
|
|
1149
|
+
// PRIMARY form is login. Used to veto a false "signup" read.
|
|
1150
|
+
const loginDominant = /\bsign in to your account\b/.test(text) ||
|
|
1151
|
+
/\bwelcome back\b/.test(text) ||
|
|
1152
|
+
/\blog[\s-]?in to\b/.test(text);
|
|
1153
|
+
if (hasPassword && hasSignupCta && !loginDominant) {
|
|
1154
|
+
// Has the form AND advertises account creation, and isn't a login
|
|
1155
|
+
// page that merely links to signup — this is the page we want.
|
|
1156
|
+
return "signup";
|
|
1157
|
+
}
|
|
1158
|
+
// A login-dominant heading wins even when a stray signup link bumped
|
|
1159
|
+
// hasSignupCta (the "Don't have an account? Sign up" footer case).
|
|
1160
|
+
if (loginDominant && hasPassword) {
|
|
1161
|
+
return "login";
|
|
1162
|
+
}
|
|
1163
|
+
if (hasLoginCopy && !hasSignupCta) {
|
|
1164
|
+
// Login copy with no create-account CTA anywhere — a sign-in form.
|
|
1165
|
+
return "login";
|
|
1166
|
+
}
|
|
1167
|
+
// No password field and no clear CTA → marketing page / empty SPA shell
|
|
1168
|
+
// / 404 body. Not a form we can fill.
|
|
1169
|
+
return "other";
|
|
1170
|
+
}
|
|
1171
|
+
// Pull the email address an email-verification wall names ("check your
|
|
1172
|
+
// <addr> inbox", "we sent a link to <addr>"). Returns the first email-shaped
|
|
1173
|
+
// token, or null. Used to poll the RIGHT alias when the wall was reached
|
|
1174
|
+
// without a fresh submit (a pending account may carry an alias from a prior
|
|
1175
|
+
// run, not task.email). Exported for unit tests.
|
|
1176
|
+
export function extractVerifyWallAlias(text) {
|
|
1177
|
+
const re = /[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]{2,}/gi;
|
|
1178
|
+
let m;
|
|
1179
|
+
while ((m = re.exec(text)) !== null) {
|
|
1180
|
+
const addr = m[0];
|
|
1181
|
+
// Reject email-SHAPED asset references — raw HTML carries script/style
|
|
1182
|
+
// srcs like "amplitude-analytics-browser@2.42.4-fe68beca4b18.js" that the
|
|
1183
|
+
// pattern otherwise matches. A real verification alias never ends in a
|
|
1184
|
+
// file extension.
|
|
1185
|
+
if (/\.(?:js|mjs|css|map|png|jpe?g|svg|gif|ico|woff2?|ttf|webp)$/i.test(addr)) {
|
|
1186
|
+
continue;
|
|
1187
|
+
}
|
|
1188
|
+
return addr;
|
|
1189
|
+
}
|
|
1190
|
+
return null;
|
|
1191
|
+
}
|
|
1192
|
+
// Pure: does this post-submit page look like a CONTINUATION step of the same
|
|
1193
|
+
// signup (a dedicated "Create your password" page — amplitude's step 2 — is the
|
|
1194
|
+
// canonical case) rather than a dashboard, a credentials page, or a
|
|
1195
|
+
// verify-your-email screen? Conservative on purpose: requires a VISIBLE, EMPTY
|
|
1196
|
+
// password input the bot still needs to fill AND a create/continue-style submit
|
|
1197
|
+
// control, and the page must NOT read as a verify-your-email screen or a login
|
|
1198
|
+
// form (a "sign in" page also has a password field, but re-filling it with the
|
|
1199
|
+
// run's generated password would just fail). Exported for unit tests.
|
|
1200
|
+
export function isContinuationFormStep(html, inventory) {
|
|
1201
|
+
// A verify-your-email page is finished by the inbox poll, not re-filled.
|
|
1202
|
+
if (expectsVerificationEmail(html))
|
|
1203
|
+
return false;
|
|
1204
|
+
// A login page must not be mistaken for a signup continuation.
|
|
1205
|
+
if (classifySignupHtml(html) === "login")
|
|
1206
|
+
return false;
|
|
1207
|
+
const hasEmptyPassword = inventory.some((e) => e.tag === "input" &&
|
|
1208
|
+
e.type === "password" &&
|
|
1209
|
+
e.visible !== false &&
|
|
1210
|
+
(e.value ?? "") === "");
|
|
1211
|
+
if (!hasEmptyPassword)
|
|
1212
|
+
return false;
|
|
1213
|
+
return inventory.some((e) => {
|
|
1214
|
+
if (e.tag !== "button" && e.type !== "submit")
|
|
1215
|
+
return false;
|
|
1216
|
+
const t = `${e.visibleText ?? ""} ${e.ariaLabel ?? ""}`.toLowerCase();
|
|
1217
|
+
return /\b(?:create|continue|sign[\s-]?up|next|submit|finish|get started|done)\b/.test(t);
|
|
1218
|
+
});
|
|
1219
|
+
}
|
|
1220
|
+
// Find the in-page "create an account" affordance on a LOGIN page that
|
|
1221
|
+
// also advertises signup ("Don't have an account? Sign up for free" —
|
|
1222
|
+
// the amplitude case). After Google OAuth, such a service has signed the
|
|
1223
|
+
// identity in but has no account/org for it, and expects the in-page
|
|
1224
|
+
// signup CTA to be clicked to create one. We surface that element so the
|
|
1225
|
+
// post-OAuth recovery can click it and re-route into the email/password
|
|
1226
|
+
// signup path, instead of re-triggering OAuth in a loop.
|
|
1227
|
+
//
|
|
1228
|
+
// A login page carries BOTH a "Sign in" submit button AND a "Sign up"
|
|
1229
|
+
// link — we want the latter. Returns null when no signup affordance is
|
|
1230
|
+
// present (so callers fall through to the existing re-OAuth path).
|
|
1231
|
+
export function findSignupCtaElement(inventory) {
|
|
1232
|
+
// Signup intent: "sign up" / "sign up for free" / "create (an) account" /
|
|
1233
|
+
// "register" / "get started". Word-bounded so "signup" matches but
|
|
1234
|
+
// "designup" doesn't.
|
|
1235
|
+
const signupIntent = /\b(?:sign[\s-]?up(?:\s+for\s+free)?|create\s+(?:an?\s+)?account|register|get\s+started)\b/i;
|
|
1236
|
+
// OAuth affordances ("Continue with Google", "Sign in with GitHub") —
|
|
1237
|
+
// clicking these re-triggers the OAuth handshake, the exact loop we're
|
|
1238
|
+
// trying to escape. EXCLUDE them even though "sign in with" brushes the
|
|
1239
|
+
// loginIntent regex below.
|
|
1240
|
+
const oauthAffordance = /continue with|sign in with|log ?in with/i;
|
|
1241
|
+
// Pure login affordance ("Sign in" / "Log in") WITHOUT a signup word —
|
|
1242
|
+
// a login page's primary submit button. EXCLUDE it; we want the signup
|
|
1243
|
+
// link sitting next to it, not the sign-in button.
|
|
1244
|
+
const loginIntent = /\b(?:sign[\s-]?in|log[\s-]?in)\b/i;
|
|
1245
|
+
let best = null;
|
|
1246
|
+
for (const el of inventory) {
|
|
1247
|
+
// Only clickable affordances — an <a>, a <button>, or anything with an
|
|
1248
|
+
// explicit button role. A signup CTA is one of these; a bare <div>
|
|
1249
|
+
// label isn't reliably clickable.
|
|
1250
|
+
const isClickable = el.tag === "a" ||
|
|
1251
|
+
el.tag === "button" ||
|
|
1252
|
+
(el.role ?? "").toLowerCase() === "button";
|
|
1253
|
+
if (!isClickable)
|
|
1254
|
+
continue;
|
|
1255
|
+
const label = `${el.visibleText ?? ""} ${el.ariaLabel ?? ""}`.trim();
|
|
1256
|
+
if (label === "")
|
|
1257
|
+
continue;
|
|
1258
|
+
// EXCLUDE OAuth buttons — clicking re-OAuths (the loop we're escaping).
|
|
1259
|
+
if (oauthAffordance.test(label))
|
|
1260
|
+
continue;
|
|
1261
|
+
// Must read as a signup affordance.
|
|
1262
|
+
if (!signupIntent.test(label))
|
|
1263
|
+
continue;
|
|
1264
|
+
// EXCLUDE a pure login button — one whose label reads as sign-IN but
|
|
1265
|
+
// carries no signup word. (signupIntent already matched this element's
|
|
1266
|
+
// own label, so this guard is defensive: it drops anything that is
|
|
1267
|
+
// login-only despite a stray match.)
|
|
1268
|
+
if (loginIntent.test(label) && !signupIntent.test(label))
|
|
1269
|
+
continue;
|
|
1270
|
+
// Prefer an <a>/<button> over a role=button div — a real link/button is
|
|
1271
|
+
// the canonical signup CTA. First clickable match wins; an anchor or
|
|
1272
|
+
// button upgrades a prior role-button-div pick.
|
|
1273
|
+
if (best === null) {
|
|
1274
|
+
best = el;
|
|
1275
|
+
}
|
|
1276
|
+
else if (best.tag !== "a" &&
|
|
1277
|
+
best.tag !== "button" &&
|
|
1278
|
+
(el.tag === "a" || el.tag === "button")) {
|
|
1279
|
+
best = el;
|
|
1280
|
+
}
|
|
1281
|
+
}
|
|
1282
|
+
return best;
|
|
1283
|
+
}
|
|
1284
|
+
// True when a post-OAuth page is a read-only DEMO / sandbox the service drops
|
|
1285
|
+
// new users into (amplitude: app.amplitude.com/analytics/demo) rather than a
|
|
1286
|
+
// real account — there is no API key here, and a real org needs the page's
|
|
1287
|
+
// "Create a free account" CTA. Conservative: a `/demo` URL segment OR explicit
|
|
1288
|
+
// demo copy ("you are currently in the … demo" / "this is a demo"). Exported
|
|
1289
|
+
// for unit tests.
|
|
1290
|
+
export function isSandboxDemoState(url, bodyText) {
|
|
1291
|
+
try {
|
|
1292
|
+
const path = new URL(url).pathname.toLowerCase();
|
|
1293
|
+
if (/(?:^|\/)demo(?:\/|$)/.test(path))
|
|
1294
|
+
return true;
|
|
1295
|
+
}
|
|
1296
|
+
catch {
|
|
1297
|
+
// fall through to the text check
|
|
1298
|
+
}
|
|
1299
|
+
return /you are currently in the .{0,30}demo|this is (?:a|the) .{0,20}demo|viewing (?:the )?demo|demo (?:account|environment|workspace)\b/i.test(bodyText);
|
|
1300
|
+
}
|
|
1301
|
+
// Find the "Create a free account" CTA that escapes a demo/sandbox into the
|
|
1302
|
+
// real signup. Distinct from findSignupCtaElement because the demo phrasing
|
|
1303
|
+
// ("Create a free account") has "free" between "a" and "account", which that
|
|
1304
|
+
// helper's tighter regex doesn't match. Clickable tags only. Exported for
|
|
1305
|
+
// unit tests.
|
|
1306
|
+
export function findCreateAccountCta(inventory) {
|
|
1307
|
+
const re = /create\s+(?:a\s+)?(?:free\s+)?account|sign\s*up\s+for\s+free|get\s+started\s+for\s+free/i;
|
|
1308
|
+
for (const e of inventory) {
|
|
1309
|
+
if (e.tag !== "a" && e.tag !== "button" && e.role !== "button")
|
|
1310
|
+
continue;
|
|
1311
|
+
const text = `${e.visibleText ?? ""} ${e.ariaLabel ?? ""}`.trim();
|
|
1312
|
+
if (re.test(text))
|
|
1313
|
+
return e;
|
|
1314
|
+
}
|
|
1315
|
+
return null;
|
|
1316
|
+
}
|
|
1317
|
+
// Conventional signup paths to probe, in priority order. Small + ordered
|
|
1318
|
+
// on purpose — we want the FIRST real signup form, not a fan-out across
|
|
1319
|
+
// dozens of guesses that each cost a round-trip over a residential
|
|
1320
|
+
// tunnel. "/auth/signup" sits high because it catches the plunk case
|
|
1321
|
+
// (app.useplunk.com/auth/signup 308 → next-app.useplunk.com/auth/signup).
|
|
1322
|
+
const CONVENTIONAL_SIGNUP_PATHS = [
|
|
1323
|
+
"/signup",
|
|
1324
|
+
"/auth/signup",
|
|
1325
|
+
"/sign-up",
|
|
1326
|
+
"/register",
|
|
1327
|
+
"/users/sign_up",
|
|
1328
|
+
"/account/signup",
|
|
1329
|
+
"/join",
|
|
1330
|
+
];
|
|
1331
|
+
// Host-prefix swaps: dashboards live behind app./console./dashboard./www.,
|
|
1332
|
+
// but the signup form often lives on auth. or the bare apex. Swapping the
|
|
1333
|
+
// leading label widens the probe to those hosts without fanning out
|
|
1334
|
+
// blindly across arbitrary subdomains.
|
|
1335
|
+
const SIGNUP_HOST_PREFIX_SWAPS = [
|
|
1336
|
+
[/^app\./, "auth."],
|
|
1337
|
+
[/^www\./, "auth."],
|
|
1338
|
+
[/^console\./, "auth."],
|
|
1339
|
+
[/^dashboard\./, "auth."],
|
|
1340
|
+
];
|
|
1341
|
+
// Build the ordered, de-duped candidate URL set for the probe: every
|
|
1342
|
+
// conventional path across (the hint host, the prefix-swapped hosts, and
|
|
1343
|
+
// the bare eTLD+1). The resolver's final domain-safety check guards
|
|
1344
|
+
// against a candidate that ends up redirecting off-domain.
|
|
1345
|
+
function buildSignupCandidates(hint) {
|
|
1346
|
+
const hosts = new Set([hint.hostname]);
|
|
1347
|
+
for (const [from, to] of SIGNUP_HOST_PREFIX_SWAPS) {
|
|
1348
|
+
if (from.test(hint.hostname)) {
|
|
1349
|
+
hosts.add(hint.hostname.replace(from, to));
|
|
1350
|
+
}
|
|
1351
|
+
}
|
|
1352
|
+
const registered = getDomain(hint.hostname);
|
|
1353
|
+
if (registered !== null)
|
|
1354
|
+
hosts.add(registered);
|
|
1355
|
+
const candidates = [];
|
|
1356
|
+
const seen = new Set();
|
|
1357
|
+
// Path-major so each path is tried across all hosts before the next
|
|
1358
|
+
// path — "/signup" everywhere, then "/auth/signup" everywhere, etc.
|
|
1359
|
+
for (const path of CONVENTIONAL_SIGNUP_PATHS) {
|
|
1360
|
+
for (const host of hosts) {
|
|
1361
|
+
const url = `https://${host}${path}`;
|
|
1362
|
+
if (!seen.has(url)) {
|
|
1363
|
+
seen.add(url);
|
|
1364
|
+
candidates.push(url);
|
|
1365
|
+
}
|
|
1366
|
+
}
|
|
1367
|
+
}
|
|
1368
|
+
return candidates;
|
|
1369
|
+
}
|
|
1370
|
+
// Tier A of the signup-URL resolver — the HTTP fast-path. Given a hint URL
|
|
1371
|
+
// (curated YAML or a guess) and an injectable redirect-following fetcher,
|
|
1372
|
+
// return a URL that actually serves a signup FORM, or null if the HTTP
|
|
1373
|
+
// probe can't resolve one (the caller then escalates to the landing-page
|
|
1374
|
+
// CTA or the Google-search fallback).
|
|
1375
|
+
//
|
|
1376
|
+
// `fetchText` is injected so this is unit-testable with a fake — in
|
|
1377
|
+
// production it's bound to BrowserController.fetchText, which egresses
|
|
1378
|
+
// through the same residential proxy + cookie jar as the real navigation,
|
|
1379
|
+
// so a redirect/HTML read here is representative of what the browser would
|
|
1380
|
+
// land on. Pure-ish: no browser, no globals beyond the PSL helper.
|
|
1381
|
+
export async function resolveSignupUrlByProbe(hintUrl, serviceSlug, fetchText, log) {
|
|
1382
|
+
const note = (m) => log?.(m);
|
|
1383
|
+
let hint;
|
|
1384
|
+
try {
|
|
1385
|
+
hint = new URL(hintUrl);
|
|
1386
|
+
}
|
|
1387
|
+
catch {
|
|
1388
|
+
note(`[signup-url] hint ${hintUrl} is not a URL — skipping HTTP probe`);
|
|
1389
|
+
return null;
|
|
1390
|
+
}
|
|
1391
|
+
// Fast path: the hint itself, followed through redirects. A 308 chain
|
|
1392
|
+
// (plunk's app. → next-app.) resolves here for free.
|
|
1393
|
+
const hintRes = await fetchText(hintUrl);
|
|
1394
|
+
if (hintRes !== null && classifySignupHtml(hintRes.bodyText) === "signup") {
|
|
1395
|
+
if (hintRes.finalUrl !== hintUrl) {
|
|
1396
|
+
note(`[signup-url] hint ${hintUrl} redirected to signup ${hintRes.finalUrl}`);
|
|
1397
|
+
}
|
|
1398
|
+
else {
|
|
1399
|
+
note(`[signup-url] hint ${hintUrl} is already a signup form`);
|
|
1400
|
+
}
|
|
1401
|
+
return hintRes.finalUrl;
|
|
1402
|
+
}
|
|
1403
|
+
note(`[signup-url] hint ${hintUrl} did not classify as signup` +
|
|
1404
|
+
(hintRes === null
|
|
1405
|
+
? " (fetch failed)"
|
|
1406
|
+
: ` (${classifySignupHtml(hintRes.bodyText)})`));
|
|
1407
|
+
// The hint's registered domain (eTLD+1) is the trusted anchor — it's the
|
|
1408
|
+
// curated/guessed signup_url we were told to start from. A conventional-
|
|
1409
|
+
// path candidate is in-bounds when it stays on that SAME registered
|
|
1410
|
+
// domain, which is the robust check: the service SLUG frequently isn't
|
|
1411
|
+
// the domain label (plunk's site is useplunk.com, railway's is
|
|
1412
|
+
// railway.com), so matching the candidate against the slug wrongly
|
|
1413
|
+
// rejected legitimate same-site redirects (plunk app.→next-app.). We keep
|
|
1414
|
+
// a slug match as a secondary allowance for a curated hint that itself
|
|
1415
|
+
// points at a canonical site on a different registered domain.
|
|
1416
|
+
const hintDomain = getDomain(hint.hostname.toLowerCase());
|
|
1417
|
+
// Probe the conventional paths. The first one that BOTH classifies as a
|
|
1418
|
+
// signup form AND stays on the service's own registered domain wins. The
|
|
1419
|
+
// domain check guards against a path that redirects to a third party
|
|
1420
|
+
// (e.g. a generic SSO portal on a different registered domain).
|
|
1421
|
+
for (const candidate of buildSignupCandidates(hint)) {
|
|
1422
|
+
if (candidate === hintUrl)
|
|
1423
|
+
continue; // already tried as the hint
|
|
1424
|
+
const res = await fetchText(candidate);
|
|
1425
|
+
if (res === null)
|
|
1426
|
+
continue;
|
|
1427
|
+
if (classifySignupHtml(res.bodyText) !== "signup")
|
|
1428
|
+
continue;
|
|
1429
|
+
let finalHost;
|
|
1430
|
+
try {
|
|
1431
|
+
finalHost = new URL(res.finalUrl).hostname;
|
|
1432
|
+
}
|
|
1433
|
+
catch {
|
|
1434
|
+
continue;
|
|
1435
|
+
}
|
|
1436
|
+
const finalDomain = getDomain(finalHost.toLowerCase());
|
|
1437
|
+
const sameRegisteredDomain = hintDomain !== null && finalDomain !== null && finalDomain === hintDomain;
|
|
1438
|
+
if (!sameRegisteredDomain && !hostMatchesServiceDomain(finalHost, serviceSlug)) {
|
|
1439
|
+
note(`[signup-url] candidate ${candidate} → ${res.finalUrl} rejected: ` +
|
|
1440
|
+
`off-domain (hint domain ${hintDomain ?? "?"})`);
|
|
1441
|
+
continue;
|
|
1442
|
+
}
|
|
1443
|
+
note(`[signup-url] resolved via probe: ${candidate} → ${res.finalUrl}`);
|
|
1444
|
+
return res.finalUrl;
|
|
1445
|
+
}
|
|
1446
|
+
note(`[signup-url] no conventional signup path resolved for ${hintUrl}`);
|
|
1447
|
+
return null;
|
|
1448
|
+
}
|
|
824
1449
|
// BUG-3 GUARD — diagnostic flag for the Inventory snapshot. Stricter
|
|
825
1450
|
// than detectAntiBotBlock (no "cf-turnstile" / "recaptcha" raw-HTML
|
|
826
1451
|
// matches) because the previous regex false-positive matched legitimate
|
|
@@ -887,6 +1512,39 @@ export function detectAlreadySignedIn(args) {
|
|
|
887
1512
|
(e.type === "email" || e.type === "password" || e.type === "tel"));
|
|
888
1513
|
if (hasCredentialInput)
|
|
889
1514
|
return false;
|
|
1515
|
+
// Signal 0 — a strong post-login URL path. An onboarding /
|
|
1516
|
+
// getting-started / welcome route is only reachable AFTER you're
|
|
1517
|
+
// authenticated (you cannot see a "you're all set, next steps" wizard
|
|
1518
|
+
// without a session), so the URL alone is conclusive here — unlike the
|
|
1519
|
+
// weaker dashboard paths in Signal 3, no paired creation-CTA is needed.
|
|
1520
|
+
// last9 lands the bot on /v2/organizations/<slug>/getting-started with
|
|
1521
|
+
// its Google session already active; its buttons ("Choose your region",
|
|
1522
|
+
// "You're all set! Next steps", "Upgrade Plan") matched none of the CTA
|
|
1523
|
+
// vocabularies below, so it used to bail `oauth_required` — claiming
|
|
1524
|
+
// "only OAuth/SSO signup, no email/password form" while the bot was in
|
|
1525
|
+
// fact fully signed in. The precondition above already ruled out a
|
|
1526
|
+
// signup chooser (no credential input).
|
|
1527
|
+
// ...UNLESS the page still presents a signup/OAuth chooser (a
|
|
1528
|
+
// "Continue with Google" button or a bare "Sign up"/"Log in"). Some
|
|
1529
|
+
// services route the login chooser through an /onboarding-style URL; if
|
|
1530
|
+
// a provider button is visible, the bot must OAuth via it, not treat the
|
|
1531
|
+
// page as already-authenticated. (PostHog TS-1923.)
|
|
1532
|
+
const hasSignupAffordance = inventory.some((e) => {
|
|
1533
|
+
const t = `${e.visibleText ?? ""} ${e.ariaLabel ?? ""}`
|
|
1534
|
+
.toLowerCase()
|
|
1535
|
+
.replace(/\s+/g, " ")
|
|
1536
|
+
.trim();
|
|
1537
|
+
return (/\b(?:continue with|sign ?up with|sign ?in with|log ?in with|with (?:google|github|gitlab|microsoft|apple))\b/.test(t) || /^(?:sign ?up|sign ?in|log ?in|create (?:an )?account)$/.test(t));
|
|
1538
|
+
});
|
|
1539
|
+
try {
|
|
1540
|
+
if (!hasSignupAffordance &&
|
|
1541
|
+
/\/(?:getting-started|get-started|onboarding|welcome)(?:\/|$)/i.test(new URL(url).pathname)) {
|
|
1542
|
+
return true;
|
|
1543
|
+
}
|
|
1544
|
+
}
|
|
1545
|
+
catch {
|
|
1546
|
+
// malformed URL — fall through to the other signals
|
|
1547
|
+
}
|
|
890
1548
|
const visibleTextOf = (e) => `${e.visibleText ?? ""} ${e.ariaLabel ?? ""}`.trim();
|
|
891
1549
|
// Signal 1 — strict nav-keyword match (the canonical Sentry-class case).
|
|
892
1550
|
const AUTH_KEYWORDS = /^\s*(?:sign out|log out|dashboard|projects|settings|profile|my account|account settings|workspaces)\s*$/i;
|
|
@@ -1130,13 +1788,25 @@ export function findOAuthButton(inventory, provider) {
|
|
|
1130
1788
|
const href = (e.href ?? "").toLowerCase();
|
|
1131
1789
|
if (href.length > 0 && hrefRe.test(href))
|
|
1132
1790
|
return e;
|
|
1133
|
-
// 2. Icon-only button — named only by a descendant img/svg.
|
|
1134
|
-
//
|
|
1135
|
-
//
|
|
1136
|
-
//
|
|
1137
|
-
//
|
|
1138
|
-
|
|
1139
|
-
|
|
1791
|
+
// 2. Icon-only (logo) button — named only by a descendant img/svg.
|
|
1792
|
+
// Truly-empty visibleText is the clean case. But a logo button whose
|
|
1793
|
+
// <svg> carries a <title>GitHub</title> LEAKS that title into
|
|
1794
|
+
// textContent (northflank renders "GitHubGitHub" — doubled, which
|
|
1795
|
+
// also defeats the \bgithub\b match in path 3), so it isn't strictly
|
|
1796
|
+
// empty. Treat it as icon-only too WHEN its visible text is nothing
|
|
1797
|
+
// but the provider name (any number of times): strip every keyword
|
|
1798
|
+
// occurrence and require no residue. A nav link like "GitHub's
|
|
1799
|
+
// Privacy Policy" leaves residue and is correctly rejected. The
|
|
1800
|
+
// iconLabel must still independently name the provider, so a stray
|
|
1801
|
+
// one-word label can't false-positive.
|
|
1802
|
+
const kw = keyword.toLowerCase();
|
|
1803
|
+
const residue = visibleText
|
|
1804
|
+
.toLowerCase()
|
|
1805
|
+
.split(kw)
|
|
1806
|
+
.join("")
|
|
1807
|
+
.replace(/[\s·|/–-]+/g, "");
|
|
1808
|
+
const isLogoOnly = visibleText.length === 0 || residue.length === 0;
|
|
1809
|
+
if (isLogoOnly && keywordRe.test((e.iconLabel ?? "").toLowerCase())) {
|
|
1140
1810
|
return e;
|
|
1141
1811
|
}
|
|
1142
1812
|
// 3. Visible text / accessible label naming the provider + an
|
|
@@ -1148,7 +1818,16 @@ export function findOAuthButton(inventory, provider) {
|
|
|
1148
1818
|
.trim();
|
|
1149
1819
|
if (!keywordRe.test(text))
|
|
1150
1820
|
continue;
|
|
1151
|
-
|
|
1821
|
+
// "with <provider>" is the OAuth-button idiom and is accepted
|
|
1822
|
+
// directly — it survives an SVG accessible name glued to the verb.
|
|
1823
|
+
// elevenlabs renders its button text as "GoogleSign up with Google",
|
|
1824
|
+
// which fuses "sign" into "googlesign" so the bare \bsign\b check
|
|
1825
|
+
// misses, but "with google" still matches. (A blanket camelCase split
|
|
1826
|
+
// can't be used to un-glue it — it would mangle the provider name
|
|
1827
|
+
// itself, e.g. "GitHub" → "Git Hub".)
|
|
1828
|
+
const withProviderRe = new RegExp(`\\bwith ${keyword}\\b`);
|
|
1829
|
+
if (/\b(sign|signup|signin|continue|log ?in|connect|auth)\b/.test(text) ||
|
|
1830
|
+
withProviderRe.test(text)) {
|
|
1152
1831
|
return e;
|
|
1153
1832
|
}
|
|
1154
1833
|
// rc.39 — minimal-label OAuth buttons. Some auth UIs render the
|
|
@@ -1228,15 +1907,24 @@ export function isLoginLoopState(url, inventory, provider) {
|
|
|
1228
1907
|
// loop-detect path saw the Google button + the login-shaped URL
|
|
1229
1908
|
// and looped OAuth indefinitely.
|
|
1230
1909
|
//
|
|
1231
|
-
// When
|
|
1232
|
-
//
|
|
1233
|
-
//
|
|
1234
|
-
//
|
|
1235
|
-
//
|
|
1236
|
-
//
|
|
1237
|
-
//
|
|
1238
|
-
|
|
1239
|
-
|
|
1910
|
+
// When a PASSWORD input is visible alongside (2) an OAuth button for
|
|
1911
|
+
// the provider we just used, the page is a genuine hybrid
|
|
1912
|
+
// credential-creation form (Clerk/Auth0: email + password [+ turnstile]),
|
|
1913
|
+
// not a loop. Return null so the caller falls through to the
|
|
1914
|
+
// post-verify flow — its planner drives the form-fill, the captcha
|
|
1915
|
+
// gate, and the Continue click the same way the form-fill phase does.
|
|
1916
|
+
//
|
|
1917
|
+
// A BARE EMAIL field does NOT count: it's the near-universal "or
|
|
1918
|
+
// continue with email" magic-link/OTP alternative that sits next to
|
|
1919
|
+
// the OAuth buttons on an ordinary login page (groq's /authenticate,
|
|
1920
|
+
// northflank's /login, …). Treating that as a hybrid form suppressed
|
|
1921
|
+
// the login-loop OAuth retry these services REQUIRE — they finalize
|
|
1922
|
+
// the Stytch/WorkOS session only on a second OAuth click — and
|
|
1923
|
+
// stranded them at oauth_session_not_persisted. The email-OTP case
|
|
1924
|
+
// that genuinely needs the planner is caught separately downstream
|
|
1925
|
+
// (detectEmailOtpGate), so narrowing to password here is safe.
|
|
1926
|
+
const hasPasswordInput = inventory.some((e) => e.tag === "input" && e.type === "password");
|
|
1927
|
+
if (hasPasswordInput)
|
|
1240
1928
|
return null;
|
|
1241
1929
|
return findOAuthButton(inventory, provider);
|
|
1242
1930
|
}
|
|
@@ -1308,6 +1996,47 @@ export function detectSsoRestriction(pageText) {
|
|
|
1308
1996
|
// "Single Sign-On is required", "SSO organization membership".
|
|
1309
1997
|
return /(?:managed\s+via\s+(?:sso|single\s+sign-?on)|sso[\s-]?managed|sso\s+organization|single\s+sign-?on\s+is\s+required|enforced\s+by\s+(?:sso|saml))/.test(lower);
|
|
1310
1998
|
}
|
|
1999
|
+
// Google-OAuth-is-LOGIN-ONLY (plunk class). Some services accept Google
|
|
2000
|
+
// only to log an EXISTING account in; they do NOT auto-provision a new
|
|
2001
|
+
// account for a first-time Google identity. The OAuth handshake
|
|
2002
|
+
// completes, then the service bounces back to its login page with an
|
|
2003
|
+
// explicit "no account" message — e.g. plunk lands on
|
|
2004
|
+
// `…/auth/login?message=No%20account%20found%20for%20this%20Google%20account`.
|
|
2005
|
+
//
|
|
2006
|
+
// WHY a dedicated detector: this state otherwise trips
|
|
2007
|
+
// detectManualLoginFallback (it IS a /login form) and aborts as
|
|
2008
|
+
// `oauth_session_not_persisted` — misleading, because nothing dropped
|
|
2009
|
+
// the session; the account simply was never created. The correct
|
|
2010
|
+
// recovery is to abandon OAuth and create the account via the
|
|
2011
|
+
// email/password form. Caller re-routes to form-fill on a true return.
|
|
2012
|
+
//
|
|
2013
|
+
// Conservative by design: matches the URL query AND body text against
|
|
2014
|
+
// CLEAR no-account / must-sign-up phrasing. A normal consent page or a
|
|
2015
|
+
// post-login dashboard (which never carries these phrases) must NOT
|
|
2016
|
+
// match, or we'd wrongly abandon a working OAuth session.
|
|
2017
|
+
export function detectGoogleNoAccount(url, bodyText) {
|
|
2018
|
+
// Inspect the decoded query string (where plunk parks its message)
|
|
2019
|
+
// plus the page body — both lowercased for case-insensitive matching.
|
|
2020
|
+
let query = "";
|
|
2021
|
+
try {
|
|
2022
|
+
const u = new URL(url);
|
|
2023
|
+
query = decodeURIComponent(u.search).toLowerCase();
|
|
2024
|
+
}
|
|
2025
|
+
catch {
|
|
2026
|
+
query = "";
|
|
2027
|
+
}
|
|
2028
|
+
const haystack = `${query}\n${bodyText.toLowerCase()}`;
|
|
2029
|
+
// MEASURED 2026-06-04 (clerk): after Google OAuth, clerk bounces to its
|
|
2030
|
+
// sign-in showing "The External Account was not found" — Google signed
|
|
2031
|
+
// in but no clerk account exists for this identity (same class as plunk's
|
|
2032
|
+
// "No account found"). The added "…not found" / "couldn't find an
|
|
2033
|
+
// account" / "no such account" variants below catch clerk's wording.
|
|
2034
|
+
// Every phrase still requires the word "account" (or "external account"),
|
|
2035
|
+
// so a bare 404 "Page not found" does NOT trip this and abandon a working
|
|
2036
|
+
// OAuth session.
|
|
2037
|
+
const noAccountPhrase = /no account found|external account was not found|account (?:was )?not found|no (?:such )?account (?:found|exists)|account (?:doesn['’]?t|does not) exist|couldn['’]?t find (?:an|your) account|no account associated|sign up (?:first|to continue)|create an account|[?&]google-auth-error|register first/;
|
|
2038
|
+
return noAccountPhrase.test(haystack);
|
|
2039
|
+
}
|
|
1311
2040
|
// (d) Stuck-on-Google-OAuth-screens (Upstash class). After
|
|
1312
2041
|
// settleAfterOAuth the URL is STILL on accounts.google.com — the
|
|
1313
2042
|
// handshake didn't redirect through to the service. Most common
|
|
@@ -1327,6 +2056,25 @@ export function detectStuckOnGoogleOAuth(url) {
|
|
|
1327
2056
|
return false;
|
|
1328
2057
|
}
|
|
1329
2058
|
}
|
|
2059
|
+
// Is the current URL an OAuth/SSO CALLBACK route — the redirect target
|
|
2060
|
+
// where the SPA exchanges the provider code for a session? MEASURED
|
|
2061
|
+
// 2026-06-04: clerk's `/sign-in/sso-callback` does a token exchange that
|
|
2062
|
+
// renders even slower than its already-slow dashboard (~15s over the
|
|
2063
|
+
// residential proxy). On a callback route the SPA IS making progress, so
|
|
2064
|
+
// the post-verify hydration loop grants it a larger budget; on every
|
|
2065
|
+
// other route the smaller budget holds (a never-hydrating page must not
|
|
2066
|
+
// burn the run cap). Matches on the pathname only (PSL-safe via URL parse,
|
|
2067
|
+
// try/catch → false for non-URLs).
|
|
2068
|
+
export function isOAuthCallbackRoute(url) {
|
|
2069
|
+
let pathname = "";
|
|
2070
|
+
try {
|
|
2071
|
+
pathname = new URL(url).pathname;
|
|
2072
|
+
}
|
|
2073
|
+
catch {
|
|
2074
|
+
return false;
|
|
2075
|
+
}
|
|
2076
|
+
return /\/sso-callback|\/oauth\/callback|\/auth\/callback|\/callback(?:\/|$)|\/login\/callback/i.test(pathname);
|
|
2077
|
+
}
|
|
1330
2078
|
// Scan the inventory for the first OAuth affordance among `providers`,
|
|
1331
2079
|
// in order — the auto-prefer decision passes every provider the
|
|
1332
2080
|
// profile has a session for. Returns the matched provider + element.
|
|
@@ -1338,21 +2086,74 @@ export function findFirstOAuthButton(inventory, providers) {
|
|
|
1338
2086
|
}
|
|
1339
2087
|
return null;
|
|
1340
2088
|
}
|
|
2089
|
+
// A page can gate the real login UI behind a generic "Sign In to
|
|
2090
|
+
// Continue" interstitial that renders NO provider affordance yet —
|
|
2091
|
+
// Qdrant's session-expiry flow redirects to /logout?aerr=expired whose
|
|
2092
|
+
// only element is a "Sign In to Continue" button; the Google button
|
|
2093
|
+
// lives one click deeper. The OAuth-first scan finds no provider
|
|
2094
|
+
// affordance and was bailing `oauth_required` without ever advancing.
|
|
2095
|
+
// This finds a generic sign-in-ish button to CLICK so the next scan can
|
|
2096
|
+
// see the provider buttons. Strictly gated on sign-in vocabulary so we
|
|
2097
|
+
// never click an arbitrary CTA: a bare "Continue" / "Get started" /
|
|
2098
|
+
// "Go to Dashboard" / 404-recovery / "Join Workspace" button does NOT
|
|
2099
|
+
// match — only text that explicitly reads as advancing into a login.
|
|
2100
|
+
// Caller bounds the number of click-throughs. Returns null when no such
|
|
2101
|
+
// affordance exists (the page is then either genuinely SSO-only, a 404,
|
|
2102
|
+
// or already authenticated). Exported for unit testing.
|
|
2103
|
+
const SIGN_IN_ADVANCE_RE = /\b(?:sign[\s-]?in|log[\s-]?in|continue with email|continue to (?:sign|log)|get started)\b/i;
|
|
2104
|
+
export function findSignInAdvanceButton(inventory, providers) {
|
|
2105
|
+
// If a provider affordance is already present, advancing is pointless
|
|
2106
|
+
// — the caller would have taken the OAuth path. Guard so a page that
|
|
2107
|
+
// has both (e.g. a "Sign in" header link + "Continue with Google")
|
|
2108
|
+
// never routes through this click-through path.
|
|
2109
|
+
if (findFirstOAuthButton(inventory, providers) !== null)
|
|
2110
|
+
return null;
|
|
2111
|
+
for (const e of inventory) {
|
|
2112
|
+
const isButtonish = e.tag === "button" ||
|
|
2113
|
+
e.tag === "a" ||
|
|
2114
|
+
e.role === "button" ||
|
|
2115
|
+
e.type === "submit" ||
|
|
2116
|
+
e.type === "button";
|
|
2117
|
+
if (!isButtonish)
|
|
2118
|
+
continue;
|
|
2119
|
+
const text = `${e.visibleText ?? ""} ${e.ariaLabel ?? ""}`
|
|
2120
|
+
.replace(/\s+/g, " ")
|
|
2121
|
+
.trim();
|
|
2122
|
+
// Sanity-cap: a real sign-in button is short. A long string is a
|
|
2123
|
+
// paragraph / card that happens to contain "sign in".
|
|
2124
|
+
if (text.length === 0 || text.length > MAX_OAUTH_BUTTON_TEXT_CHARS)
|
|
2125
|
+
continue;
|
|
2126
|
+
if (SIGN_IN_ADVANCE_RE.test(text))
|
|
2127
|
+
return e;
|
|
2128
|
+
}
|
|
2129
|
+
return null;
|
|
2130
|
+
}
|
|
1341
2131
|
// Order the OAuth providers the bot may use for a signup, given the
|
|
1342
2132
|
// service's yaml pin (if any) and the providers the persistent profile
|
|
1343
2133
|
// actually has a session for. `findFirstOAuthButton` walks this list in
|
|
1344
2134
|
// order and uses the first provider the PAGE offers, so order = preference.
|
|
1345
2135
|
//
|
|
1346
|
-
//
|
|
1347
|
-
//
|
|
1348
|
-
//
|
|
1349
|
-
//
|
|
1350
|
-
//
|
|
1351
|
-
//
|
|
1352
|
-
//
|
|
1353
|
-
//
|
|
2136
|
+
// RULE 1 — respect an explicit pin when its session is warm. The operator
|
|
2137
|
+
// pins a provider for a reason the bot can't see from the page: e.g.
|
|
2138
|
+
// northflank surfaces Google only as on-demand One-Tap (a FedCM widget the
|
|
2139
|
+
// redirect flow can't drive) while its GitHub button is a clean redirect, so
|
|
2140
|
+
// the service is pinned github. Leading with the warm pin honors that, with
|
|
2141
|
+
// the OTHER warm provider kept as a fallback for pages that only render it.
|
|
2142
|
+
// (This became safe once `login` was fixed to establish the session through
|
|
2143
|
+
// the bot's egress proxy — a warm GitHub session no longer dies on an IP
|
|
2144
|
+
// jump, so it doesn't hit the /authorize 2FA wall the way a stale one did.)
|
|
2145
|
+
//
|
|
2146
|
+
// RULE 2 — with NO pin, Google leads when present: empirically its OAuth
|
|
2147
|
+
// blocks less hard than a cold GitHub flow.
|
|
1354
2148
|
export function orderOAuthCandidates(pinned, loggedIn) {
|
|
1355
2149
|
if (pinned !== undefined) {
|
|
2150
|
+
if (loggedIn.includes(pinned)) {
|
|
2151
|
+
const others = loggedIn
|
|
2152
|
+
.filter((p) => p !== pinned)
|
|
2153
|
+
.sort((a, b) => (a === "google" ? -1 : b === "google" ? 1 : 0));
|
|
2154
|
+
return [pinned, ...others];
|
|
2155
|
+
}
|
|
2156
|
+
// Pin's session isn't warm — fall back to whatever IS (Google preferred).
|
|
1356
2157
|
if (pinned !== "google" && loggedIn.includes("google"))
|
|
1357
2158
|
return ["google", pinned];
|
|
1358
2159
|
return [pinned];
|
|
@@ -1915,6 +2716,56 @@ export function extractApiKeyFromText(text) {
|
|
|
1915
2716
|
}
|
|
1916
2717
|
return null;
|
|
1917
2718
|
}
|
|
2719
|
+
// Password-manager / autofill UI affordances that render as short
|
|
2720
|
+
// word-tokens on credential pages. A render API-keys page ships a
|
|
2721
|
+
// "Save to 1Password" / "1Password" autofill button next to the real
|
|
2722
|
+
// `rnd_…` key; LastPass, Bitwarden, and Dashlane do the same. These
|
|
2723
|
+
// strings are alphanumeric, often carry a digit ("1Password"), and sit
|
|
2724
|
+
// EARLIER in DOM order than the credential — so the validator-blind
|
|
2725
|
+
// candidate-scan tiers (replay-skill.ts) used to return them as the
|
|
2726
|
+
// "credential" and the downstream length validator then rejected them
|
|
2727
|
+
// (the 0DTW2V66 render skill: `got="1Password" length 9 below min 32`).
|
|
2728
|
+
// They are never credentials; reject them at the candidate layer so the
|
|
2729
|
+
// scan moves on to the real key instead of the right key being shadowed
|
|
2730
|
+
// by a UI word. Matched case-insensitively as a whole token (the
|
|
2731
|
+
// candidates the scan tiers feed in are already whitespace-trimmed
|
|
2732
|
+
// single tokens). Exported for unit testing.
|
|
2733
|
+
const CREDENTIAL_NOISE_TOKENS = [
|
|
2734
|
+
"1password",
|
|
2735
|
+
"lastpass",
|
|
2736
|
+
"bitwarden",
|
|
2737
|
+
"dashlane",
|
|
2738
|
+
"keepass",
|
|
2739
|
+
"keeper",
|
|
2740
|
+
"nordpass",
|
|
2741
|
+
"proton pass",
|
|
2742
|
+
"protonpass",
|
|
2743
|
+
"autofill",
|
|
2744
|
+
"passwords",
|
|
2745
|
+
];
|
|
2746
|
+
// Verb-prefixed UI affordances ("Save to 1Password", "Copy to
|
|
2747
|
+
// clipboard", "Add to vault"). The candidate-scan tiers tokenize on
|
|
2748
|
+
// whitespace so a multi-word affordance rarely survives as one
|
|
2749
|
+
// candidate — but extractText()/innerText passes glue it together, so
|
|
2750
|
+
// guard the leading verbs too.
|
|
2751
|
+
const CREDENTIAL_NOISE_PREFIXES = [
|
|
2752
|
+
"save to ",
|
|
2753
|
+
"copy to ",
|
|
2754
|
+
"add to ",
|
|
2755
|
+
"store in ",
|
|
2756
|
+
];
|
|
2757
|
+
// True when a candidate string is a password-manager / autofill UI
|
|
2758
|
+
// affordance rather than a real credential value. Used by the replay
|
|
2759
|
+
// engine's raw-candidate scan tiers to keep "1Password"-class words
|
|
2760
|
+
// out of the credential slot. Exported for unit testing.
|
|
2761
|
+
export function isCredentialNoiseCandidate(candidate) {
|
|
2762
|
+
const lower = candidate.trim().toLowerCase();
|
|
2763
|
+
if (lower.length === 0)
|
|
2764
|
+
return false;
|
|
2765
|
+
if (CREDENTIAL_NOISE_TOKENS.includes(lower))
|
|
2766
|
+
return true;
|
|
2767
|
+
return CREDENTIAL_NOISE_PREFIXES.some((p) => lower.startsWith(p));
|
|
2768
|
+
}
|
|
1918
2769
|
// Choose which link in a verification email to click. Scores each URL
|
|
1919
2770
|
// by keyword and picks the best — but only if it scored positive.
|
|
1920
2771
|
//
|
|
@@ -1940,12 +2791,174 @@ export function pickVerificationLink(links) {
|
|
|
1940
2791
|
const top = scored[0];
|
|
1941
2792
|
return top !== undefined && top.score > 0 ? top.url : null;
|
|
1942
2793
|
}
|
|
2794
|
+
// Pick a verification link by its ANCHOR TEXT in the email HTML — the fallback
|
|
2795
|
+
// when pickVerificationLink (which scores the URL) fails because the link is
|
|
2796
|
+
// wrapped in a click-tracker that hides the keyword behind a redirect. MEASURED
|
|
2797
|
+
// on amplitude (2026-06-04): its "Activate account" link is a
|
|
2798
|
+
// u…ct.sendgrid.net/ls/click?upn=… URL (no "activate" in the URL), so the
|
|
2799
|
+
// URL scorer returned null and the bot fell to a false-positive "code" (the
|
|
2800
|
+
// year "2025"). The anchor TEXT still reads "Activate account". Pure + exported
|
|
2801
|
+
// for unit tests.
|
|
2802
|
+
export function pickVerificationLinkFromHtml(bodyHtml) {
|
|
2803
|
+
const anchorRe = /<a\b[^>]*href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/gi;
|
|
2804
|
+
let best = null;
|
|
2805
|
+
let m;
|
|
2806
|
+
while ((m = anchorRe.exec(bodyHtml)) !== null) {
|
|
2807
|
+
const href = (m[1] ?? "").replace(/&/g, "&");
|
|
2808
|
+
if (!/^https?:\/\//i.test(href))
|
|
2809
|
+
continue;
|
|
2810
|
+
const text = (m[2] ?? "")
|
|
2811
|
+
.replace(/<[^>]+>/g, " ")
|
|
2812
|
+
.replace(/&[a-z]+;/gi, " ")
|
|
2813
|
+
.replace(/\s+/g, " ")
|
|
2814
|
+
.trim()
|
|
2815
|
+
.toLowerCase();
|
|
2816
|
+
let score = 0;
|
|
2817
|
+
if (/\b(?:verify|confirm|activate)\b/.test(text))
|
|
2818
|
+
score += 10;
|
|
2819
|
+
if (/verify (?:your )?email|confirm (?:your )?email|activate (?:your )?account|complete (?:your )?sign[\s-]?up/.test(text)) {
|
|
2820
|
+
score += 5;
|
|
2821
|
+
}
|
|
2822
|
+
if (/get started|finish setting up/.test(text))
|
|
2823
|
+
score += 3;
|
|
2824
|
+
if (/unsubscribe|preferences|manage|view (?:in|this) (?:browser|email)|privacy|terms/.test(text)) {
|
|
2825
|
+
score -= 10;
|
|
2826
|
+
}
|
|
2827
|
+
if (score > (best?.score ?? 0))
|
|
2828
|
+
best = { url: href, score };
|
|
2829
|
+
}
|
|
2830
|
+
return best !== null && best.score > 0 ? best.url : null;
|
|
2831
|
+
}
|
|
1943
2832
|
// Discriminates LLMPair from LLMClient. LLMPair has `primary` (an
|
|
1944
2833
|
// LLMClient); LLMClient has `createMessage`. They're mutually exclusive
|
|
1945
2834
|
// shapes so a structural check is reliable.
|
|
1946
2835
|
function isLLMPair(x) {
|
|
1947
2836
|
return "primary" in x && typeof x.primary === "object" && x.primary !== null;
|
|
1948
2837
|
}
|
|
2838
|
+
// True when the last `threshold` executed ACTIONS (click/select/check/
|
|
2839
|
+
// fill — steps meant to mutate the page) each left the page content
|
|
2840
|
+
// UNCHANGED. That is the signature of a broken onboarding wizard that
|
|
2841
|
+
// re-presents itself no matter what the bot clicks (the axiom case,
|
|
2842
|
+
// measured 2026-06-03): the planner keeps correctly reacting to a
|
|
2843
|
+
// visibly-unfilled form, but the click never registers, so without this
|
|
2844
|
+
// the run burns all 24 rounds + LLM budget re-clicking the same card.
|
|
2845
|
+
// Navigates / waits / extracts are excluded — they legitimately don't
|
|
2846
|
+
// change the current DOM (navigate changes URL, wait pauses). Pure +
|
|
2847
|
+
// exported for unit tests.
|
|
2848
|
+
export function isStalledOnActions(effects, threshold = 3) {
|
|
2849
|
+
if (effects.length < threshold)
|
|
2850
|
+
return false;
|
|
2851
|
+
const ACTION_KINDS = new Set(["click", "select", "check", "fill"]);
|
|
2852
|
+
const recent = effects.slice(-threshold);
|
|
2853
|
+
if (!recent.every((e) => ACTION_KINDS.has(e.kind) && e.pageUnchanged)) {
|
|
2854
|
+
return false;
|
|
2855
|
+
}
|
|
2856
|
+
// A genuine stall RE-acts on the SAME element (the planner keeps clicking
|
|
2857
|
+
// one card whose click never registers). Acting on DISTINCT selectors is
|
|
2858
|
+
// PROGRESS through a multi-field wizard — selecting role, then company
|
|
2859
|
+
// size, then a plan doesn't change the inventory, but each is a different
|
|
2860
|
+
// choice (axiom). Only call it stalled when a selector REPEATS (fewer
|
|
2861
|
+
// distinct selectors than actions). All-distinct → let the wizard finish.
|
|
2862
|
+
const selectors = recent.map((e) => e.selector ?? "");
|
|
2863
|
+
const distinct = new Set(selectors).size;
|
|
2864
|
+
// If selectors weren't recorded (older callers pass none), fall back to the
|
|
2865
|
+
// original kind+unchanged behavior so existing tests/paths don't regress.
|
|
2866
|
+
const anyRecorded = recent.some((e) => e.selector !== undefined);
|
|
2867
|
+
if (!anyRecorded)
|
|
2868
|
+
return true;
|
|
2869
|
+
return distinct < threshold;
|
|
2870
|
+
}
|
|
2871
|
+
// True when a URL reads as a login / authentication screen. Service-
|
|
2872
|
+
// agnostic (path-based, no per-service hosts) — used to detect a
|
|
2873
|
+
// non-persisting OAuth session: after a successful OAuth, an
|
|
2874
|
+
// authenticated bot lands on a dashboard, not a login page. Pure +
|
|
2875
|
+
// exported for tests.
|
|
2876
|
+
export function isLoginPageUrl(url) {
|
|
2877
|
+
try {
|
|
2878
|
+
const path = new URL(url).pathname.toLowerCase();
|
|
2879
|
+
if (/(?:^|\/)(?:login|signin|sign-in|authenticate|sso)(?:\/|$)/.test(path)) {
|
|
2880
|
+
return true;
|
|
2881
|
+
}
|
|
2882
|
+
}
|
|
2883
|
+
catch {
|
|
2884
|
+
return false;
|
|
2885
|
+
}
|
|
2886
|
+
// Some providers keep the path stable but flag the failed auth in the
|
|
2887
|
+
// query (amplitude: /login?google-auth-error=…).
|
|
2888
|
+
return /[?&]google-auth-error\b/i.test(url);
|
|
2889
|
+
}
|
|
2890
|
+
// A pre-account route (signup OR login OR register) — the set of paths an
|
|
2891
|
+
// AUTHENTICATED user has no business sitting on. Broader than
|
|
2892
|
+
// isLoginPageUrl (which is tuned for the OAuth-callback-loop detector and
|
|
2893
|
+
// deliberately excludes /signup). Used for the post-OAuth dead-route
|
|
2894
|
+
// escape. Exported for unit tests.
|
|
2895
|
+
export function isSignupOrLoginRoute(url) {
|
|
2896
|
+
try {
|
|
2897
|
+
const path = new URL(url).pathname.toLowerCase();
|
|
2898
|
+
return /(?:^|\/)(?:login|signin|sign-in|sign[_-]?up|signup|register|authenticate|sso)(?:\/|$)/.test(path);
|
|
2899
|
+
}
|
|
2900
|
+
catch {
|
|
2901
|
+
return false;
|
|
2902
|
+
}
|
|
2903
|
+
}
|
|
2904
|
+
// The scheme://host root of a URL (no path/query) — the place a service
|
|
2905
|
+
// redirects an authenticated user to their dashboard. Null on a malformed
|
|
2906
|
+
// URL. Exported for unit tests.
|
|
2907
|
+
export function originRoot(url) {
|
|
2908
|
+
try {
|
|
2909
|
+
return new URL(url).origin + "/";
|
|
2910
|
+
}
|
|
2911
|
+
catch {
|
|
2912
|
+
return null;
|
|
2913
|
+
}
|
|
2914
|
+
}
|
|
2915
|
+
// A modern SPA dashboard often paints a "Connecting…" / "Loading…" shell
|
|
2916
|
+
// (plus the static <noscript> "enable JavaScript" fallback) for a beat
|
|
2917
|
+
// while its JS bundle and websocket finish — especially over a
|
|
2918
|
+
// high-latency residential tunnel. During that window the page has ZERO
|
|
2919
|
+
// interactive elements. northflank's /settings/access-tokens lands on
|
|
2920
|
+
// exactly this shell post-OAuth; the post-verify planner reads the empty
|
|
2921
|
+
// inventory and concludes {"kind":"done","no elements"} ~2s in, abandoning
|
|
2922
|
+
// a page that was about to render the token UI. Detect the shell so the
|
|
2923
|
+
// caller can wait for hydration instead of giving up. Matched ONLY
|
|
2924
|
+
// alongside an empty inventory, so the narrow phrasing here won't swallow
|
|
2925
|
+
// a real dashboard that merely contains the word "loading". Exported for
|
|
2926
|
+
// unit tests.
|
|
2927
|
+
export function isLoadingShellText(text) {
|
|
2928
|
+
// The Google account chooser ("Choose an account to continue to <App>")
|
|
2929
|
+
// carries a stray "Loading" label but is an ACTIONABLE page, not a
|
|
2930
|
+
// hydration shell — the clerk post-verify loop must click the account
|
|
2931
|
+
// card, not idle through the hydration-wait ticks. Veto the shell read
|
|
2932
|
+
// before the generic "loading" match below can fire on it.
|
|
2933
|
+
if (/choose an account/i.test(text))
|
|
2934
|
+
return false;
|
|
2935
|
+
// ONLY transient "still rendering" copy. The <noscript> fallback
|
|
2936
|
+
// ("This application cannot function without JavaScript…") is PERMANENT
|
|
2937
|
+
// in the DOM and was matched here by mistake — it made northflank (whose
|
|
2938
|
+
// noscript text never leaves the body) read as a perpetual loading shell,
|
|
2939
|
+
// so the hydration waits never exited. JS-enabled pages keep that text
|
|
2940
|
+
// forever, so it is not a signal.
|
|
2941
|
+
return /\bconnecting\b|\bloading\b|please wait|getting things ready|initiali[sz]ing/i.test(text);
|
|
2942
|
+
}
|
|
2943
|
+
// Transient "the session is being established RIGHT NOW" copy. MEASURED on
|
|
2944
|
+
// groq (Stytch B2B): after the OAuth callback, /authenticate shows
|
|
2945
|
+
// "Logging in…" then "Creating your organization…" for ~5-7s of async
|
|
2946
|
+
// discovery+org-creation+session calls before redirecting to the dashboard.
|
|
2947
|
+
// Interrupting that window (navigating away, or — worse — re-clicking the
|
|
2948
|
+
// OAuth button) ABORTS the org creation and the session never finalizes,
|
|
2949
|
+
// which is exactly how the bot was failing groq. When this text is present
|
|
2950
|
+
// the bot must WAIT, never act. Generalizes to any async-session auth
|
|
2951
|
+
// (Stytch / WorkOS / Auth0 org provisioning). Exported for unit tests.
|
|
2952
|
+
export function isAuthProcessingText(text) {
|
|
2953
|
+
return /logging in|signing in|creating your organization|creating your account|setting up your account|authenticating|finishing (?:sign|log)|redirecting you|one moment/i.test(text);
|
|
2954
|
+
}
|
|
2955
|
+
// Sentinel returned by runOAuthFlow when the OAuth path is a dead end
|
|
2956
|
+
// that the email/password form-fill path can still recover (Google
|
|
2957
|
+
// login-only services that never created an account — see
|
|
2958
|
+
// detectGoogleNoAccount). runSignup catches it and re-runs the form-fill
|
|
2959
|
+
// path with OAuth-first suppressed. A unique const so it can't collide
|
|
2960
|
+
// with any SignupResult.error string.
|
|
2961
|
+
const OAUTH_FALL_BACK_TO_FORM_FILL = "__fall_back_to_form_fill__";
|
|
1949
2962
|
export class SignupAgent {
|
|
1950
2963
|
browser;
|
|
1951
2964
|
// Per-run counter so a single SignupAgent (which lives one run) can't
|
|
@@ -2038,7 +3051,13 @@ export class SignupAgent {
|
|
|
2038
3051
|
}
|
|
2039
3052
|
else if (detected.variant === "recaptcha_v3") {
|
|
2040
3053
|
this.invisibleCaptcha = { kind: "recaptcha", variant: "recaptcha_v3" };
|
|
2041
|
-
|
|
3054
|
+
// Invisible reCAPTCHA scores in the background, but its token is only
|
|
3055
|
+
// minted when grecaptcha.execute() runs — and a form like amplitude's
|
|
3056
|
+
// REQUIRES that token to submit. Mint it now (passes on our ~1.0
|
|
3057
|
+
// score) so the imminent submit carries a valid g-recaptcha-response,
|
|
3058
|
+
// instead of submitting with an empty token and silently no-op'ing.
|
|
3059
|
+
const minted = await this.browser.triggerInvisibleRecaptcha();
|
|
3060
|
+
steps.push(`${label} captcha: invisible reCAPTCHA v3 — ${minted ? "minted score token via grecaptcha.execute()" : "badge present, token not minted (form may submit it itself)"}`);
|
|
2042
3061
|
}
|
|
2043
3062
|
}
|
|
2044
3063
|
return { found: false, solved: false, blocked: false, kind: "turnstile" };
|
|
@@ -2056,7 +3075,15 @@ export class SignupAgent {
|
|
|
2056
3075
|
result.kind === "recaptcha" &&
|
|
2057
3076
|
this.captchaSolver?.isAvailable() === true) {
|
|
2058
3077
|
const sitekey = await this.browser.extractRecaptchaSitekey();
|
|
2059
|
-
if (sitekey
|
|
3078
|
+
if (sitekey === null) {
|
|
3079
|
+
// result.kind said "recaptcha" but no key with the reCAPTCHA `6L`
|
|
3080
|
+
// format is on the page — almost always an hCaptcha/Turnstile
|
|
3081
|
+
// widget misbucketed by the host-input heuristic. 2Captcha's
|
|
3082
|
+
// reCAPTCHA endpoint would reject the wrong-provider key
|
|
3083
|
+
// (ERROR_WRONG_GOOGLEKEY); skip it and surface the real shape.
|
|
3084
|
+
steps.push(`${label} captcha: no genuine reCAPTCHA sitekey on page (widget is likely hCaptcha/Turnstile) — skipping 2Captcha`);
|
|
3085
|
+
}
|
|
3086
|
+
else {
|
|
2060
3087
|
const pageUrl = (await this.browser.getState().catch(() => null))?.url;
|
|
2061
3088
|
if (pageUrl !== undefined) {
|
|
2062
3089
|
steps.push(`${label} captcha: Tier 3 — submitting sitekey to 2Captcha (${sitekey.slice(0, 10)}…)`);
|
|
@@ -2082,6 +3109,38 @@ export class SignupAgent {
|
|
|
2082
3109
|
}
|
|
2083
3110
|
}
|
|
2084
3111
|
}
|
|
3112
|
+
// Tier 3 for hCaptcha (plausible). Distinct provider, distinct
|
|
3113
|
+
// 2Captcha method (method=hcaptcha) + a UUID sitekey the reCAPTCHA
|
|
3114
|
+
// `6L` guard rejects — so it needs its own extractor, solver call,
|
|
3115
|
+
// and h-captcha-response injector. Same structure as reCAPTCHA Tier 3.
|
|
3116
|
+
if (!result.solved &&
|
|
3117
|
+
result.kind === "hcaptcha" &&
|
|
3118
|
+
this.captchaSolver?.isAvailable() === true) {
|
|
3119
|
+
const sitekey = await this.browser.extractHcaptchaSitekey();
|
|
3120
|
+
const pageUrl = (await this.browser.getState().catch(() => null))?.url;
|
|
3121
|
+
if (sitekey !== null && pageUrl !== undefined) {
|
|
3122
|
+
steps.push(`${label} captcha: Tier 3 — submitting hCaptcha sitekey to 2Captcha (${sitekey.slice(0, 10)}…)`);
|
|
3123
|
+
const solveRes = await this.captchaSolver.solveHcaptcha({ sitekey, pageUrl });
|
|
3124
|
+
if (solveRes.kind === "ok") {
|
|
3125
|
+
const injected = await this.browser.injectHcaptchaToken(solveRes.token);
|
|
3126
|
+
if (injected) {
|
|
3127
|
+
steps.push(`${label} captcha: Tier 3 hCaptcha solved in ${Math.round(solveRes.durationMs / 1000)}s via 2Captcha`);
|
|
3128
|
+
result = { ...result, solved: true };
|
|
3129
|
+
}
|
|
3130
|
+
else {
|
|
3131
|
+
steps.push(`${label} captcha: Tier 3 hCaptcha token arrived but page injection failed — captcha stays blocked`);
|
|
3132
|
+
}
|
|
3133
|
+
}
|
|
3134
|
+
else {
|
|
3135
|
+
steps.push(`${label} captcha: Tier 3 hCaptcha ${solveRes.kind}` +
|
|
3136
|
+
("reason" in solveRes ? `: ${solveRes.reason}` : "") +
|
|
3137
|
+
("durationMs" in solveRes ? ` (${Math.round(solveRes.durationMs / 1000)}s)` : ""));
|
|
3138
|
+
}
|
|
3139
|
+
}
|
|
3140
|
+
else if (sitekey === null) {
|
|
3141
|
+
steps.push(`${label} captcha: hCaptcha widget detected but no sitekey found — cannot Tier-3 solve`);
|
|
3142
|
+
}
|
|
3143
|
+
}
|
|
2085
3144
|
// rc.32 — forensic snapshot after the captcha attempt. Without
|
|
2086
3145
|
// this, the only snapshot near the captcha is the pre-fill one
|
|
2087
3146
|
// taken BEFORE the click, so when a Turnstile fails to solve we
|
|
@@ -2157,7 +3216,14 @@ export class SignupAgent {
|
|
|
2157
3216
|
// click or a post-submit validation error ("the page advanced")
|
|
2158
3217
|
// gets more headroom. All bounded by the 15-call LLM breaker + the
|
|
2159
3218
|
// F2 top-level deadline.
|
|
2160
|
-
async planExecuteWithRetry(task, fillValues, steps
|
|
3219
|
+
async planExecuteWithRetry(task, fillValues, steps,
|
|
3220
|
+
// When true, suppress the OAuth-first scan entirely and go straight
|
|
3221
|
+
// to form-fill. Set by the re-route after the OAuth path discovered
|
|
3222
|
+
// the Google identity has no account (detectGoogleNoAccount) — the
|
|
3223
|
+
// page still carries a "Continue with Google" button, so without
|
|
3224
|
+
// this the scan would re-pick OAuth and loop right back into the
|
|
3225
|
+
// same no-account bounce. One-shot equivalent of committedToEmailPath.
|
|
3226
|
+
forceFormFill = false) {
|
|
2161
3227
|
const MAX_ERROR_REPLANS = 2;
|
|
2162
3228
|
// 0.8.3-rc.1 — widened from 4 to 6 so submit_disabled re-plans
|
|
2163
3229
|
// get more attempts to identify the gating control. Mailgun's
|
|
@@ -2170,6 +3236,12 @@ export class SignupAgent {
|
|
|
2170
3236
|
let progressReplans = 0;
|
|
2171
3237
|
let emptyPlans = 0;
|
|
2172
3238
|
let oauthScanRetries = 0;
|
|
3239
|
+
// Bounded click-throughs of a generic "Sign In to Continue"
|
|
3240
|
+
// interstitial that gates the provider buttons (Qdrant). Capped so
|
|
3241
|
+
// an SSO-only page that keeps re-showing a sign-in button (or a
|
|
3242
|
+
// redirect loop) still terminates at oauth_required.
|
|
3243
|
+
let signInAdvanceClicks = 0;
|
|
3244
|
+
const MAX_SIGN_IN_ADVANCE_CLICKS = 2;
|
|
2173
3245
|
let hint;
|
|
2174
3246
|
// F14 — selectors the planner clicked WITHOUT advancing the page.
|
|
2175
3247
|
// Each no-progress plan records its click selectors here; the next
|
|
@@ -2185,7 +3257,7 @@ export class SignupAgent {
|
|
|
2185
3257
|
// "Continue with Google" button and reroutes — exactly the
|
|
2186
3258
|
// regression that produced the Security Code challenge on
|
|
2187
3259
|
// methoxine's account during the rc.30 Railway run.
|
|
2188
|
-
let committedToEmailPath =
|
|
3260
|
+
let committedToEmailPath = forceFormFill;
|
|
2189
3261
|
const oauthCandidates = await this.resolveOAuthCandidates(task, steps);
|
|
2190
3262
|
for (;;) {
|
|
2191
3263
|
await this.browser.waitForFormReady();
|
|
@@ -2205,6 +3277,50 @@ export class SignupAgent {
|
|
|
2205
3277
|
this.browser.getState(),
|
|
2206
3278
|
this.buildInventory(steps, oauthCandidates),
|
|
2207
3279
|
]);
|
|
3280
|
+
// Email-verification WALL reached without a fresh submit — e.g. OAuth
|
|
3281
|
+
// landed on a pending account's "Verify your email — check <addr>" page.
|
|
3282
|
+
// A real signup form still has fields to fill; a wall has only
|
|
3283
|
+
// Open-Gmail / Resend / Return buttons, on which the form-fill planner
|
|
3284
|
+
// stalls. Route to the post-submit inbox-poll + verification-link flow
|
|
3285
|
+
// instead, polling the alias the wall names (which may differ from
|
|
3286
|
+
// task.email when a prior run created the pending account).
|
|
3287
|
+
{
|
|
3288
|
+
// Use the already-fetched state.html (don't call extractText() again —
|
|
3289
|
+
// an extra read would shift queue-backed test mocks and isn't needed:
|
|
3290
|
+
// the verification copy is in the rendered HTML).
|
|
3291
|
+
const wallText = state.html;
|
|
3292
|
+
const hasFillableInput = inventory.some((e) => e.tag === "input" &&
|
|
3293
|
+
(e.type === "email" ||
|
|
3294
|
+
e.type === "text" ||
|
|
3295
|
+
e.type === "password" ||
|
|
3296
|
+
e.type === null) &&
|
|
3297
|
+
e.visible !== false);
|
|
3298
|
+
if (!hasFillableInput && expectsVerificationEmail(wallText)) {
|
|
3299
|
+
const alias = extractVerifyWallAlias(wallText);
|
|
3300
|
+
this.pendingVerificationAlias = alias;
|
|
3301
|
+
steps.push(`Form: email-verification wall (no fields to fill${alias !== null ? `, check ${alias}` : ""}) — ` +
|
|
3302
|
+
`routing to the inbox-poll + verification-link flow.`);
|
|
3303
|
+
// The named link may be stale (a pending account from a prior run);
|
|
3304
|
+
// click "Resend verification email" if present to refresh it.
|
|
3305
|
+
const resend = inventory.find((e) => {
|
|
3306
|
+
if (e.tag !== "button" && e.tag !== "a")
|
|
3307
|
+
return false;
|
|
3308
|
+
const t = `${e.visibleText ?? ""} ${e.ariaLabel ?? ""}`.toLowerCase();
|
|
3309
|
+
return /resend (?:verification )?(?:email|link)|send (?:it )?again/.test(t);
|
|
3310
|
+
});
|
|
3311
|
+
if (resend !== undefined) {
|
|
3312
|
+
try {
|
|
3313
|
+
await this.browser.click(resend.selector);
|
|
3314
|
+
steps.push(`Form: clicked "Resend verification email" to refresh the link.`);
|
|
3315
|
+
await this.browser.wait(2);
|
|
3316
|
+
}
|
|
3317
|
+
catch {
|
|
3318
|
+
// non-fatal — poll for whatever's already in the inbox
|
|
3319
|
+
}
|
|
3320
|
+
}
|
|
3321
|
+
return { kind: "submitted" };
|
|
3322
|
+
}
|
|
3323
|
+
}
|
|
2208
3324
|
// OAuth-first (T6/T13 + auto-prefer): when the page carries a
|
|
2209
3325
|
// "Sign in with <provider>" affordance for a provider the bot can
|
|
2210
3326
|
// use, that button unconditionally outranks any form field — hand
|
|
@@ -2233,11 +3349,20 @@ export class SignupAgent {
|
|
|
2233
3349
|
}
|
|
2234
3350
|
// SSO buttons frequently load async — Mistral renders its
|
|
2235
3351
|
// icon-only provider buttons after the email form. Re-extract
|
|
2236
|
-
// a couple of times before giving up on the OAuth path.
|
|
2237
|
-
|
|
3352
|
+
// a couple of times before giving up on the OAuth path. On a
|
|
3353
|
+
// websocket-gated SPA (northflank) the WHOLE page — provider
|
|
3354
|
+
// buttons included — renders only after a ~15s hydration, so a
|
|
3355
|
+
// "Connecting"/loading shell warrants far more patience than the
|
|
3356
|
+
// default 2 retries (otherwise the bot gives up at ~6s and wrongly
|
|
3357
|
+
// falls back to the email-signup path before the GitHub button
|
|
3358
|
+
// even exists).
|
|
3359
|
+
const oauthScanShell = isLoadingShellText(await this.browser.extractText().catch(() => ""));
|
|
3360
|
+
const maxOauthScanRetries = oauthScanShell ? 8 : 2;
|
|
3361
|
+
if (oauthScanRetries < maxOauthScanRetries) {
|
|
2238
3362
|
oauthScanRetries += 1;
|
|
2239
3363
|
steps.push(`OAuth-first: no provider affordance yet — waiting for an ` +
|
|
2240
|
-
`async render (retry ${oauthScanRetries}
|
|
3364
|
+
`async render (retry ${oauthScanRetries}/${maxOauthScanRetries}` +
|
|
3365
|
+
`${oauthScanShell ? ", page still a loading shell" : ""})`);
|
|
2241
3366
|
await this.browser.wait(3);
|
|
2242
3367
|
continue;
|
|
2243
3368
|
}
|
|
@@ -2253,6 +3378,36 @@ export class SignupAgent {
|
|
|
2253
3378
|
"treating as already authenticated, jumping to post-verify navigation");
|
|
2254
3379
|
return { kind: "already_oauth" };
|
|
2255
3380
|
}
|
|
3381
|
+
// The provider buttons can sit one click behind a generic
|
|
3382
|
+
// "Sign In to Continue" interstitial (Qdrant's session-expiry
|
|
3383
|
+
// /logout?aerr=expired redirect renders only that button). The
|
|
3384
|
+
// OAuth scan above found nothing because the real login UI
|
|
3385
|
+
// hasn't been reached yet. Click the sign-in-ish affordance to
|
|
3386
|
+
// advance, reset the async-render retries, and re-scan — bounded
|
|
3387
|
+
// so a page that just keeps showing a sign-in button (genuine
|
|
3388
|
+
// SSO-only / a redirect loop) still terminates at oauth_required
|
|
3389
|
+
// rather than clicking forever.
|
|
3390
|
+
if (signInAdvanceClicks < MAX_SIGN_IN_ADVANCE_CLICKS) {
|
|
3391
|
+
const advance = findSignInAdvanceButton(inventory, oauthCandidates);
|
|
3392
|
+
if (advance !== null) {
|
|
3393
|
+
signInAdvanceClicks += 1;
|
|
3394
|
+
steps.push(`OAuth-first: no provider affordance, but found a generic ` +
|
|
3395
|
+
`sign-in affordance (${JSON.stringify(advance.visibleText ?? advance.ariaLabel ?? "")}) ` +
|
|
3396
|
+
`— clicking it to advance to the real login page ` +
|
|
3397
|
+
`(${signInAdvanceClicks}/${MAX_SIGN_IN_ADVANCE_CLICKS})`);
|
|
3398
|
+
try {
|
|
3399
|
+
await this.browser.click(advance.selector);
|
|
3400
|
+
}
|
|
3401
|
+
catch (err) {
|
|
3402
|
+
steps.push(`OAuth-first: sign-in advance click failed (${err instanceof Error ? err.message : String(err)}) ` +
|
|
3403
|
+
`— falling through to form-fill`);
|
|
3404
|
+
}
|
|
3405
|
+
// Reset the async-render budget for the now-advanced page so
|
|
3406
|
+
// its provider buttons get the same couple of render retries.
|
|
3407
|
+
oauthScanRetries = 0;
|
|
3408
|
+
continue;
|
|
3409
|
+
}
|
|
3410
|
+
}
|
|
2256
3411
|
steps.push("OAuth-first: no usable provider affordance on the page — " +
|
|
2257
3412
|
"falling back to form-fill");
|
|
2258
3413
|
// Dump visible buttons/links so we can see what the OAuth-
|
|
@@ -2297,7 +3452,7 @@ export class SignupAgent {
|
|
|
2297
3452
|
// providers, the situation is recoverable — surface the
|
|
2298
3453
|
// specific provider to seed.
|
|
2299
3454
|
const visibleProviders = detectOAuthProvidersInInventory(inventory);
|
|
2300
|
-
const haveSessions =
|
|
3455
|
+
const haveSessions = await this.effectiveLoggedInProviders();
|
|
2301
3456
|
const missingProviders = visibleProviders.filter((p) => !haveSessions.includes(p));
|
|
2302
3457
|
if (missingProviders.length > 0 &&
|
|
2303
3458
|
// Only surface needs_oauth_provider_session when the user
|
|
@@ -2467,6 +3622,17 @@ export class SignupAgent {
|
|
|
2467
3622
|
// stuck-tracker so a legitimate later click isn't false-positive
|
|
2468
3623
|
// rejected.
|
|
2469
3624
|
lastNoProgressClickSelectors = new Set();
|
|
3625
|
+
// Deterministic agreement-checkbox guard — runs BEFORE the captcha
|
|
3626
|
+
// gate + submit so the form is fully satisfied at submit time. The
|
|
3627
|
+
// LLM planner sometimes skips a required TOS box (amplitude: it
|
|
3628
|
+
// read the box as one of the adjacent card-radios), and when the
|
|
3629
|
+
// service doesn't disable submit for an unchecked box, the click
|
|
3630
|
+
// silently no-ops. This ticks terms/privacy/consent boxes while
|
|
3631
|
+
// never touching marketing opt-ins. Best-effort: never throws.
|
|
3632
|
+
const agreementBoxes = await this.browser.checkRequiredAgreementBoxes();
|
|
3633
|
+
if (agreementBoxes.length > 0) {
|
|
3634
|
+
steps.push(`Form: checked required agreement box(es): [${agreementBoxes.join(", ")}]`);
|
|
3635
|
+
}
|
|
2470
3636
|
// Captcha gate + submit.
|
|
2471
3637
|
const preGate = await this.runCaptchaGate("Pre-submit", steps);
|
|
2472
3638
|
if (preGate.blocked)
|
|
@@ -2539,6 +3705,27 @@ export class SignupAgent {
|
|
|
2539
3705
|
emptyInputHint;
|
|
2540
3706
|
continue;
|
|
2541
3707
|
}
|
|
3708
|
+
// A submit-selector visibility timeout means the element the
|
|
3709
|
+
// planner picked is no longer on the page — typically because an
|
|
3710
|
+
// earlier action in THIS plan advanced a multi-step SPA (Paddle's
|
|
3711
|
+
// signup: a "Continue" click navigates to a new screen, so the
|
|
3712
|
+
// submit selector that resolved at plan-time has vanished by the
|
|
3713
|
+
// time clickSubmit polls for it). The page DID move forward, so
|
|
3714
|
+
// re-plan against the new state rather than failing the whole run
|
|
3715
|
+
// on a stale selector. Bounded by the same progressReplans
|
|
3716
|
+
// headroom as the disabled-submit / post-validation re-plans.
|
|
3717
|
+
if (isSubmitTimeout(reason)) {
|
|
3718
|
+
if (++progressReplans > MAX_PROGRESS_REPLANS) {
|
|
3719
|
+
return { kind: "submit_failed", reason };
|
|
3720
|
+
}
|
|
3721
|
+
steps.push(`⚠ submit selector went stale (${reason.split("\n")[0]}) — the page likely advanced; re-planning`);
|
|
3722
|
+
hint =
|
|
3723
|
+
"The submit button selected last round was no longer present when " +
|
|
3724
|
+
"we tried to click it — an earlier action probably advanced the page. " +
|
|
3725
|
+
"Re-read the now-visible form and plan the next step (pick the submit " +
|
|
3726
|
+
"button that is actually on the current screen).";
|
|
3727
|
+
continue;
|
|
3728
|
+
}
|
|
2542
3729
|
steps.push(`⚠ submit click failed: ${reason}`);
|
|
2543
3730
|
return { kind: "submit_failed", reason };
|
|
2544
3731
|
}
|
|
@@ -2687,12 +3874,33 @@ export class SignupAgent {
|
|
|
2687
3874
|
return false;
|
|
2688
3875
|
}
|
|
2689
3876
|
}
|
|
3877
|
+
// Which OAuth providers can the bot actually use right now — the UNION of
|
|
3878
|
+
// the logged-in-providers.json marker (a memo) and a LIVE read of the
|
|
3879
|
+
// browser's cookie jar. The cookie jar is ground truth, so a warm session
|
|
3880
|
+
// is never invisible just because the marker drifted (the GitHub-skipped-
|
|
3881
|
+
// for-Google bug). Self-heals the marker for any live session it was
|
|
3882
|
+
// missing. Falls back to the marker alone if the cookie read fails.
|
|
3883
|
+
async effectiveLoggedInProviders() {
|
|
3884
|
+
const fromMarker = loggedInProviders();
|
|
3885
|
+
let live = [];
|
|
3886
|
+
try {
|
|
3887
|
+
live = await this.browser.detectSessionProviders();
|
|
3888
|
+
}
|
|
3889
|
+
catch {
|
|
3890
|
+
live = [];
|
|
3891
|
+
}
|
|
3892
|
+
for (const p of live) {
|
|
3893
|
+
if (!fromMarker.includes(p))
|
|
3894
|
+
markProviderLoggedIn(p);
|
|
3895
|
+
}
|
|
3896
|
+
return [...new Set([...fromMarker, ...live])];
|
|
3897
|
+
}
|
|
2690
3898
|
async resolveOAuthCandidates(task, steps) {
|
|
2691
3899
|
if (task.forceForm === true) {
|
|
2692
3900
|
steps.push("Force-form: OAuth-first scan suppressed — taking the email/password path");
|
|
2693
3901
|
return [];
|
|
2694
3902
|
}
|
|
2695
|
-
const ordered = orderOAuthCandidates(task.oauthProvider,
|
|
3903
|
+
const ordered = orderOAuthCandidates(task.oauthProvider, await this.effectiveLoggedInProviders());
|
|
2696
3904
|
if (ordered.length === 0)
|
|
2697
3905
|
return [];
|
|
2698
3906
|
const pinNote = task.oauthProvider !== undefined &&
|
|
@@ -2761,6 +3969,11 @@ export class SignupAgent {
|
|
|
2761
3969
|
// it. Cleared once the loop emits a step that targets the OTP
|
|
2762
3970
|
// input, so the hint doesn't echo into later unrelated rounds.
|
|
2763
3971
|
pendingOtpCode = null;
|
|
3972
|
+
// Set when planExecuteWithRetry routes an email-verification WALL (reached
|
|
3973
|
+
// without a fresh submit — e.g. OAuth landed on a pending account's "Verify
|
|
3974
|
+
// your email — check <addr>" page) into the post-submit email flow. The poll
|
|
3975
|
+
// targets this alias (the one the wall names) instead of task.email.
|
|
3976
|
+
pendingVerificationAlias = null;
|
|
2764
3977
|
// rc.39 — when postVerifyLoop exits because the planner returned
|
|
2765
3978
|
// `done`, capture the planner's stated reason so the caller can
|
|
2766
3979
|
// factor it into paywall classification. Koyeb (and similar)
|
|
@@ -3014,6 +4227,30 @@ export class SignupAgent {
|
|
|
3014
4227
|
: {}),
|
|
3015
4228
|
}));
|
|
3016
4229
|
let signupUrl = guessed;
|
|
4230
|
+
// Tier A — HTTP fast-path signup-URL resolver. Before committing to
|
|
4231
|
+
// the (~6-minute) navigation, probe the candidate over the SAME
|
|
4232
|
+
// proxy via the context request API and confirm it actually serves a
|
|
4233
|
+
// signup FORM (not a login SPA / 404). Curated signup_urls go stale
|
|
4234
|
+
// (plunk's app.useplunk.com/signup now 404s and silently serves the
|
|
4235
|
+
// login page; the real form moved to next-app.useplunk.com/auth/
|
|
4236
|
+
// signup). The probe follows redirects + tries conventional paths
|
|
4237
|
+
// and adopts a better URL when it finds one. Non-Google URLs only —
|
|
4238
|
+
// a Google-search URL is the explicit fallback path, not a hint.
|
|
4239
|
+
if (!isGoogleSearchUrl(signupUrl)) {
|
|
4240
|
+
const serviceSlug = task.service.toLowerCase().replace(/[^a-z0-9]/g, "");
|
|
4241
|
+
const resolved = await resolveSignupUrlByProbe(signupUrl, serviceSlug, (u) => this.browser.fetchText(u), (m) => steps.push(m));
|
|
4242
|
+
if (resolved !== null && resolved !== signupUrl) {
|
|
4243
|
+
steps.push(`[signup-url] resolved ${signupUrl} → ${resolved}`);
|
|
4244
|
+
// A curated URL that the resolver had to move is a stale-YAML
|
|
4245
|
+
// signal worth surfacing in telemetry (curated URLs are
|
|
4246
|
+
// supposed to be the trusted, hand-verified path).
|
|
4247
|
+
if (task.signupUrl !== undefined) {
|
|
4248
|
+
steps.push(`⚠ curated signup_url for ${task.service} looks stale ` +
|
|
4249
|
+
`(${signupUrl}); using ${resolved}`);
|
|
4250
|
+
}
|
|
4251
|
+
signupUrl = resolved;
|
|
4252
|
+
}
|
|
4253
|
+
}
|
|
3017
4254
|
// Prewarm the target origin before hitting the (often-strict) signup
|
|
3018
4255
|
// page. Two things this buys us:
|
|
3019
4256
|
// 1. First-party cookies on the root domain. Cloudflare's
|
|
@@ -3036,56 +4273,122 @@ export class SignupAgent {
|
|
|
3036
4273
|
// PERF: goto() awaits domcontentloaded; the subsequent
|
|
3037
4274
|
// waitForFormReady in planExecuteWithRetry handles SPA settle.
|
|
3038
4275
|
// No need for a blind 2s dwell here.
|
|
3039
|
-
//
|
|
3040
|
-
//
|
|
3041
|
-
//
|
|
3042
|
-
//
|
|
3043
|
-
// the bot recover from a wrong canonical guess (e.g. a service
|
|
3044
|
-
// that uses /register or a non-`.com` TLD).
|
|
4276
|
+
// After load: does the rendered page look like a signup form?
|
|
4277
|
+
// looksLikeSignupPage() can't tell signup from login (both have
|
|
4278
|
+
// email+password), so we ALSO classify the rendered HTML's copy via
|
|
4279
|
+
// classifySignupHtml — that's what distinguishes the two.
|
|
3045
4280
|
//
|
|
3046
|
-
// A curated task.signupUrl is trusted
|
|
3047
|
-
//
|
|
3048
|
-
//
|
|
3049
|
-
//
|
|
3050
|
-
//
|
|
3051
|
-
//
|
|
3052
|
-
|
|
3053
|
-
|
|
3054
|
-
|
|
3055
|
-
|
|
3056
|
-
|
|
3057
|
-
|
|
3058
|
-
|
|
3059
|
-
}
|
|
3060
|
-
|
|
3061
|
-
|
|
3062
|
-
const
|
|
3063
|
-
if (
|
|
3064
|
-
|
|
3065
|
-
|
|
3066
|
-
await this.runPrewarm(found, steps);
|
|
3067
|
-
steps.push(`Found signup link: ${found}`);
|
|
3068
|
-
await this.browser.goto(found);
|
|
3069
|
-
// PERF: planner loop's waitForFormReady is next; no dwell.
|
|
4281
|
+
// A curated task.signupUrl is no longer trusted blindly: it can land
|
|
4282
|
+
// on a login page (a stale path the SPA reroutes to /login). We
|
|
4283
|
+
// trigger recovery for BOTH guessed and curated URLs — but
|
|
4284
|
+
// conservatively for curated ones, to avoid regressing a good
|
|
4285
|
+
// curated URL: recover ONLY when the copy classifies as "login" or
|
|
4286
|
+
// "other" AND looksLikeSignupPage also disagrees. The structural
|
|
4287
|
+
// check is the backstop for an OAuth-only signup page ("Continue
|
|
4288
|
+
// with Google", no email/password copy) that classifySignupHtml
|
|
4289
|
+
// would otherwise read as "other". (A promoted-skill URL is replay-
|
|
4290
|
+
// verified and a guessed URL that's wrong is recovered here too.)
|
|
4291
|
+
let needsRecovery = false;
|
|
4292
|
+
if (task.signupUrl === undefined) {
|
|
4293
|
+
needsRecovery = !(await this.looksLikeSignupPage());
|
|
4294
|
+
}
|
|
4295
|
+
else {
|
|
4296
|
+
const rendered = (await this.browser.getState()).html;
|
|
4297
|
+
const klass = classifySignupHtml(rendered);
|
|
4298
|
+
if (klass !== "signup" && !(await this.looksLikeSignupPage())) {
|
|
4299
|
+
needsRecovery = true;
|
|
4300
|
+
steps.push(`curated signup_url for ${task.service} rendered as "${klass}", not a signup form — attempting recovery`);
|
|
3070
4301
|
}
|
|
3071
|
-
|
|
3072
|
-
|
|
3073
|
-
|
|
3074
|
-
|
|
3075
|
-
|
|
3076
|
-
|
|
3077
|
-
|
|
3078
|
-
|
|
3079
|
-
|
|
3080
|
-
|
|
3081
|
-
|
|
3082
|
-
|
|
3083
|
-
|
|
3084
|
-
|
|
3085
|
-
|
|
3086
|
-
|
|
3087
|
-
|
|
4302
|
+
}
|
|
4303
|
+
if (needsRecovery) {
|
|
4304
|
+
if (task.signupUrl === undefined) {
|
|
4305
|
+
steps.push(`${signupUrl} didn't look like a signup page — attempting recovery`);
|
|
4306
|
+
}
|
|
4307
|
+
// Tier B — landing-page CTA self-heal. Before the heavyweight
|
|
4308
|
+
// Google-search path, navigate to the site root and click the
|
|
4309
|
+
// highest-scored signup CTA (same scorer the planner uses). This
|
|
4310
|
+
// catches static-host SPAs that serve a 200 empty shell for every
|
|
4311
|
+
// path (so the HTTP probe can't tell signup from login) but DO
|
|
4312
|
+
// render a real "Sign up" CTA once the JS hydrates on the root.
|
|
4313
|
+
const root = originRoot(signupUrl);
|
|
4314
|
+
let recovered = false;
|
|
4315
|
+
if (root !== null) {
|
|
4316
|
+
steps.push(`[signup-url] Tier B: landing-page CTA at ${root}`);
|
|
4317
|
+
try {
|
|
4318
|
+
await this.runPrewarm(root, steps);
|
|
4319
|
+
await this.browser.goto(root);
|
|
4320
|
+
const inventory = await this.browser.extractInteractiveElements();
|
|
4321
|
+
// Score every interactive element's text; pick the best
|
|
4322
|
+
// signup CTA. Providers are driven negative by scoreSignupButton
|
|
4323
|
+
// (we want the email-signup affordance, not an OAuth button).
|
|
4324
|
+
let best = null;
|
|
4325
|
+
for (const el of inventory) {
|
|
4326
|
+
const label = el.visibleText ?? el.ariaLabel ?? el.iconLabel ?? el.title ?? "";
|
|
4327
|
+
if (label.trim().length === 0)
|
|
4328
|
+
continue;
|
|
4329
|
+
const score = scoreSignupButton(label, ["google", "github"]);
|
|
4330
|
+
if (best === null || score > best.score)
|
|
4331
|
+
best = { el, score };
|
|
4332
|
+
}
|
|
4333
|
+
if (best !== null && best.score > 0) {
|
|
4334
|
+
steps.push(`[signup-url] Tier B clicking CTA "${(best.el.visibleText ?? best.el.ariaLabel ?? "").slice(0, 40)}" (score ${best.score})`);
|
|
4335
|
+
await this.browser.click(best.el.selector);
|
|
4336
|
+
const landed = (await this.browser.getState()).html;
|
|
4337
|
+
if (classifySignupHtml(landed) === "signup") {
|
|
4338
|
+
const url = this.browser.currentUrl();
|
|
4339
|
+
steps.push(`[signup-url] Tier B recovered signup page: ${url}`);
|
|
4340
|
+
signupUrl = url;
|
|
4341
|
+
recovered = true;
|
|
4342
|
+
}
|
|
4343
|
+
else {
|
|
4344
|
+
steps.push(`[signup-url] Tier B click did not reach a signup form — falling through to search`);
|
|
4345
|
+
}
|
|
4346
|
+
}
|
|
4347
|
+
else {
|
|
4348
|
+
steps.push(`[signup-url] Tier B found no scoring signup CTA on ${root}`);
|
|
4349
|
+
}
|
|
3088
4350
|
}
|
|
4351
|
+
catch (err) {
|
|
4352
|
+
steps.push(`[signup-url] Tier B failed (non-fatal): ${err instanceof Error ? err.message : String(err)}`);
|
|
4353
|
+
}
|
|
4354
|
+
}
|
|
4355
|
+
// Final fallback — the existing Google-search + findSignupLink
|
|
4356
|
+
// path, unchanged. Only when Tier B didn't recover.
|
|
4357
|
+
if (!recovered) {
|
|
4358
|
+
const fallbackSearch = `https://www.google.com/search?q=${encodeURIComponent(`${task.service} signup`)}`;
|
|
4359
|
+
await this.browser.goto(fallbackSearch);
|
|
4360
|
+
// PERF: domcontentloaded from goto() + findSignupLink reads
|
|
4361
|
+
// the DOM itself — no blind dwell needed.
|
|
4362
|
+
signupUrl = fallbackSearch;
|
|
4363
|
+
}
|
|
4364
|
+
}
|
|
4365
|
+
if (isGoogleSearchUrl(signupUrl)) {
|
|
4366
|
+
steps.push("Searching for signup page...");
|
|
4367
|
+
const found = await this.findSignupLink(task.service);
|
|
4368
|
+
if (found !== null) {
|
|
4369
|
+
// Now that we know the real signup origin, prewarm it before
|
|
4370
|
+
// the deep navigation. Same rationale as above.
|
|
4371
|
+
await this.runPrewarm(found, steps);
|
|
4372
|
+
steps.push(`Found signup link: ${found}`);
|
|
4373
|
+
await this.browser.goto(found);
|
|
4374
|
+
// PERF: planner loop's waitForFormReady is next; no dwell.
|
|
4375
|
+
}
|
|
4376
|
+
else {
|
|
4377
|
+
// BUG-1 GUARD: findSignupLink filters off-domain candidates
|
|
4378
|
+
// (registered-domain match against the service slug). If
|
|
4379
|
+
// nothing remained AND we'd been sent here from a Google
|
|
4380
|
+
// fallback, the bot is sitting on a SERP with no usable
|
|
4381
|
+
// destination — abort rather than let the form-fill planner
|
|
4382
|
+
// happily fill the Google search box.
|
|
4383
|
+
return {
|
|
4384
|
+
success: false,
|
|
4385
|
+
error: `no_signup_link: searched for ${task.service}'s signup page and ` +
|
|
4386
|
+
`found no on-domain candidates. The service likely doesn't have ` +
|
|
4387
|
+
`a public self-serve signup, or the bot's domain guard rejected ` +
|
|
4388
|
+
`every match. Sign up manually.`,
|
|
4389
|
+
steps,
|
|
4390
|
+
...this.resultTail(),
|
|
4391
|
+
};
|
|
3089
4392
|
}
|
|
3090
4393
|
}
|
|
3091
4394
|
// Steps 2-5: plan the form, fill it, submit — via the
|
|
@@ -3101,147 +4404,223 @@ export class SignupAgent {
|
|
|
3101
4404
|
// `literal` has no fixed value — resolved per-action.
|
|
3102
4405
|
literal: "",
|
|
3103
4406
|
};
|
|
3104
|
-
|
|
3105
|
-
|
|
3106
|
-
|
|
3107
|
-
|
|
3108
|
-
|
|
3109
|
-
|
|
3110
|
-
|
|
3111
|
-
|
|
3112
|
-
|
|
3113
|
-
|
|
3114
|
-
|
|
3115
|
-
|
|
3116
|
-
|
|
3117
|
-
|
|
3118
|
-
|
|
3119
|
-
|
|
3120
|
-
|
|
3121
|
-
|
|
3122
|
-
|
|
3123
|
-
|
|
3124
|
-
steps,
|
|
3125
|
-
...this.resultTail(),
|
|
3126
|
-
};
|
|
3127
|
-
case "oauth_required":
|
|
3128
|
-
return {
|
|
3129
|
-
success: false,
|
|
3130
|
-
error: `oauth_required: ${task.service} offers only OAuth/SSO signup — there is no email/password form to automate.`,
|
|
3131
|
-
steps,
|
|
3132
|
-
...this.resultTail(),
|
|
3133
|
-
};
|
|
3134
|
-
case "needs_oauth_provider_session": {
|
|
3135
|
-
// rc.33-task — actionable: name the missing provider so
|
|
3136
|
-
// the user runs the right `mcp login` command. When more
|
|
3137
|
-
// than one provider is missing, the message lists them and
|
|
3138
|
-
// recommends any single one (operator picks).
|
|
3139
|
-
const missing = outcome.missingProviders;
|
|
3140
|
-
const have = outcome.haveSessions;
|
|
3141
|
-
const firstMissing = missing[0];
|
|
3142
|
-
const missingLabel = missing
|
|
3143
|
-
.map((p) => OAUTH_PROVIDERS[p].label)
|
|
3144
|
-
.join(" / ");
|
|
3145
|
-
const haveLabel = have.length > 0
|
|
3146
|
-
? have.map((p) => OAUTH_PROVIDERS[p].label).join(", ")
|
|
3147
|
-
: "(none)";
|
|
3148
|
-
return {
|
|
3149
|
-
success: false,
|
|
3150
|
-
error: `needs_oauth_provider_session: ${task.service} offers ${missingLabel} OAuth ` +
|
|
3151
|
-
`but the bot's chrome profile has no ${missingLabel} session ` +
|
|
3152
|
-
`(currently signed in to: ${haveLabel}). ` +
|
|
3153
|
-
`Run \`npx @trusty-squire/mcp login --provider=${firstMissing}\` ` +
|
|
3154
|
-
`to seed the session, then retry.`,
|
|
3155
|
-
steps,
|
|
3156
|
-
...this.resultTail(),
|
|
3157
|
-
};
|
|
3158
|
-
}
|
|
3159
|
-
case "anti_bot_blocked":
|
|
3160
|
-
return {
|
|
3161
|
-
success: false,
|
|
3162
|
-
error: `anti_bot_blocked: ${task.service}'s ${outcome.vendor} anti-bot interstitial would ` +
|
|
3163
|
-
`not clear — the bot's IP/fingerprint did not pass ${outcome.vendor}'s server-side ` +
|
|
3164
|
-
`risk score. This is a soft block (no challenge to solve); the user should sign up ` +
|
|
3165
|
-
`manually.`,
|
|
3166
|
-
steps,
|
|
3167
|
-
...this.resultTail(),
|
|
3168
|
-
};
|
|
3169
|
-
case "oauth":
|
|
3170
|
-
// T6/T7 — OAuth-first path. runOAuthFlow drives the consent
|
|
3171
|
-
// handshake and post-OAuth onboarding to its own terminal
|
|
3172
|
-
// SignupResult; there is no form submit / email verification.
|
|
3173
|
-
return await this.runOAuthFlow(task, outcome.selector, outcome.provider, steps);
|
|
3174
|
-
case "already_oauth": {
|
|
3175
|
-
// F17 — page rendered an authenticated dashboard (a
|
|
3176
|
-
// previous OAuth bind already linked the account). Skip
|
|
3177
|
-
// consent + form-fill, navigate straight to the API key.
|
|
3178
|
-
// Uses the same post-OAuth loop runOAuthFlow uses after a
|
|
3179
|
-
// successful handshake.
|
|
3180
|
-
let credentials = await this.extractCredentials();
|
|
3181
|
-
const skippedPostVerify = credentials.api_key !== undefined;
|
|
3182
|
-
if (credentials.api_key === undefined) {
|
|
3183
|
-
credentials = await this.postVerifyLoop({
|
|
3184
|
-
service: task.service,
|
|
3185
|
-
maxRounds: task.postVerifyMaxRounds ?? 24,
|
|
4407
|
+
// `outcome` is re-computed when the OAuth path signals a form-fill
|
|
4408
|
+
// fall-back (Google login-only / no-account, e.g. plunk): the
|
|
4409
|
+
// `case "oauth"` handler re-runs planExecuteWithRetry with OAuth-
|
|
4410
|
+
// first suppressed and loops back through this same switch, so
|
|
4411
|
+
// every terminal case (submitted, planning_failed, …) stays in one
|
|
4412
|
+
// place. Bounded to a single re-route so a service that keeps
|
|
4413
|
+
// bouncing can't spin here.
|
|
4414
|
+
let outcome = await this.planExecuteWithRetry(task, fillValues, steps);
|
|
4415
|
+
let oauthFallbackUsed = false;
|
|
4416
|
+
// Multi-step signup guard (amplitude: email/name step → a dedicated
|
|
4417
|
+
// "Create your password" step). Bounds how many continuation form steps
|
|
4418
|
+
// we'll fill after the first submit before treating the signup as done.
|
|
4419
|
+
let multiStepRounds = 0;
|
|
4420
|
+
const MAX_MULTI_STEP_ROUNDS = 3;
|
|
4421
|
+
dispatch: for (;;) {
|
|
4422
|
+
switch (outcome.kind) {
|
|
4423
|
+
case "captcha_blocked":
|
|
4424
|
+
return {
|
|
4425
|
+
success: false,
|
|
4426
|
+
error: `captcha_blocked: ${outcome.captchaKind} challenge did not resolve. The site flagged this session.`,
|
|
3186
4427
|
steps,
|
|
3187
|
-
...(
|
|
3188
|
-
|
|
3189
|
-
|
|
3190
|
-
|
|
3191
|
-
|
|
3192
|
-
|
|
3193
|
-
|
|
3194
|
-
|
|
3195
|
-
|
|
3196
|
-
|
|
3197
|
-
|
|
3198
|
-
|
|
3199
|
-
|
|
4428
|
+
...this.resultTail(),
|
|
4429
|
+
};
|
|
4430
|
+
case "submit_failed":
|
|
4431
|
+
return {
|
|
4432
|
+
success: false,
|
|
4433
|
+
error: `submit_failed: could not click the signup button — ${outcome.reason}`,
|
|
4434
|
+
steps,
|
|
4435
|
+
...this.resultTail(),
|
|
4436
|
+
};
|
|
4437
|
+
case "planning_failed":
|
|
4438
|
+
return {
|
|
4439
|
+
success: false,
|
|
4440
|
+
error: `planning_failed: ${outcome.reason}`,
|
|
4441
|
+
steps,
|
|
4442
|
+
...this.resultTail(),
|
|
4443
|
+
};
|
|
4444
|
+
case "oauth_required":
|
|
4445
|
+
return {
|
|
4446
|
+
success: false,
|
|
4447
|
+
error: `oauth_required: ${task.service} offers only OAuth/SSO signup — there is no email/password form to automate.`,
|
|
4448
|
+
steps,
|
|
4449
|
+
...this.resultTail(),
|
|
4450
|
+
};
|
|
4451
|
+
case "needs_oauth_provider_session": {
|
|
4452
|
+
// rc.33-task — actionable: name the missing provider so
|
|
4453
|
+
// the user runs the right `mcp login` command. When more
|
|
4454
|
+
// than one provider is missing, the message lists them and
|
|
4455
|
+
// recommends any single one (operator picks).
|
|
4456
|
+
const missing = outcome.missingProviders;
|
|
4457
|
+
const have = outcome.haveSessions;
|
|
4458
|
+
const firstMissing = missing[0];
|
|
4459
|
+
const missingLabel = missing
|
|
4460
|
+
.map((p) => OAUTH_PROVIDERS[p].label)
|
|
4461
|
+
.join(" / ");
|
|
4462
|
+
const haveLabel = have.length > 0
|
|
4463
|
+
? have.map((p) => OAUTH_PROVIDERS[p].label).join(", ")
|
|
4464
|
+
: "(none)";
|
|
3200
4465
|
return {
|
|
3201
|
-
success:
|
|
3202
|
-
|
|
4466
|
+
success: false,
|
|
4467
|
+
error: `needs_oauth_provider_session: ${task.service} offers ${missingLabel} OAuth ` +
|
|
4468
|
+
`but the bot's chrome profile has no ${missingLabel} session ` +
|
|
4469
|
+
`(currently signed in to: ${haveLabel}). ` +
|
|
4470
|
+
`Run \`npx @trusty-squire/mcp login --provider=${firstMissing}\` ` +
|
|
4471
|
+
`to seed the session, then retry.`,
|
|
3203
4472
|
steps,
|
|
3204
4473
|
...this.resultTail(),
|
|
3205
4474
|
};
|
|
3206
4475
|
}
|
|
3207
|
-
|
|
3208
|
-
// path uses. The post-verify loop sets lastPostVerifyDoneReason
|
|
3209
|
-
// with [stuck_loop] or [existing_account_no_extract] markers
|
|
3210
|
-
// when it bails on a planner-loop or pre-existing-key state;
|
|
3211
|
-
// surface those distinctly rather than as the generic
|
|
3212
|
-
// no_credentials_after_already_signed_in.
|
|
3213
|
-
if (this.lastPostVerifyDoneReason !== null &&
|
|
3214
|
-
this.lastPostVerifyDoneReason.startsWith("[stuck_loop]")) {
|
|
4476
|
+
case "anti_bot_blocked":
|
|
3215
4477
|
return {
|
|
3216
4478
|
success: false,
|
|
3217
|
-
error: `
|
|
3218
|
-
`
|
|
3219
|
-
`
|
|
4479
|
+
error: `anti_bot_blocked: ${task.service}'s ${outcome.vendor} anti-bot interstitial would ` +
|
|
4480
|
+
`not clear — the bot's IP/fingerprint did not pass ${outcome.vendor}'s server-side ` +
|
|
4481
|
+
`risk score. This is a soft block (no challenge to solve); the user should sign up ` +
|
|
4482
|
+
`manually.`,
|
|
3220
4483
|
steps,
|
|
3221
4484
|
...this.resultTail(),
|
|
3222
4485
|
};
|
|
4486
|
+
case "oauth": {
|
|
4487
|
+
// T6/T7 — OAuth-first path. runOAuthFlow drives the consent
|
|
4488
|
+
// handshake and post-OAuth onboarding to its own terminal
|
|
4489
|
+
// SignupResult; there is no form submit / email verification.
|
|
4490
|
+
const oauthResult = await this.runOAuthFlow(task, outcome.selector, outcome.provider, steps);
|
|
4491
|
+
// Google login-only / no-account (plunk): OAuth is a dead end
|
|
4492
|
+
// but the email/password form can still create the account.
|
|
4493
|
+
// Re-run the form-fill path ONCE with OAuth-first suppressed
|
|
4494
|
+
// (forceFormFill) — re-navigate to the signup form first since
|
|
4495
|
+
// the OAuth flow left us on the service's /login page — then
|
|
4496
|
+
// loop back through this switch to dispatch the new outcome.
|
|
4497
|
+
if (oauthResult === OAUTH_FALL_BACK_TO_FORM_FILL) {
|
|
4498
|
+
if (oauthFallbackUsed) {
|
|
4499
|
+
// Already fell back once and OAuth came up again — refuse
|
|
4500
|
+
// to ping-pong. Surface the dead end honestly.
|
|
4501
|
+
return {
|
|
4502
|
+
success: false,
|
|
4503
|
+
error: `oauth_required: ${task.service}'s OAuth is login-only (no account for this ` +
|
|
4504
|
+
`identity) and the email/password fall-back did not complete a signup.`,
|
|
4505
|
+
steps,
|
|
4506
|
+
...this.resultTail(),
|
|
4507
|
+
};
|
|
4508
|
+
}
|
|
4509
|
+
oauthFallbackUsed = true;
|
|
4510
|
+
// If the OAuth recovery already left us ON a signup form (the
|
|
4511
|
+
// amplitude demo-escape clicked "Create a free account" → the real
|
|
4512
|
+
// /signup form), fill it IN PLACE — re-navigating to task.signupUrl
|
|
4513
|
+
// could bounce back to the demo. Otherwise re-navigate (the
|
|
4514
|
+
// login-only / no-account case left us on a /login page).
|
|
4515
|
+
const onSignupFormHtml = (await this.browser.getState().catch(() => null))?.html ?? "";
|
|
4516
|
+
if (classifySignupHtml(onSignupFormHtml) === "signup") {
|
|
4517
|
+
steps.push(`OAuth recovery already on a signup form ` +
|
|
4518
|
+
`(${pathOf(this.browser.currentUrl())}) — filling in place.`);
|
|
4519
|
+
}
|
|
4520
|
+
else {
|
|
4521
|
+
const formUrl = task.signupUrl ?? this.browser.currentUrl();
|
|
4522
|
+
steps.push(`Re-routing to email/password signup at ${formUrl} after OAuth no-account.`);
|
|
4523
|
+
await this.browser.goto(formUrl);
|
|
4524
|
+
}
|
|
4525
|
+
outcome = await this.planExecuteWithRetry(task, fillValues, steps,
|
|
4526
|
+
/* forceFormFill */ true);
|
|
4527
|
+
continue dispatch;
|
|
4528
|
+
}
|
|
4529
|
+
return oauthResult;
|
|
3223
4530
|
}
|
|
3224
|
-
|
|
3225
|
-
|
|
4531
|
+
case "already_oauth": {
|
|
4532
|
+
// F17 — page rendered an authenticated dashboard (a
|
|
4533
|
+
// previous OAuth bind already linked the account). Skip
|
|
4534
|
+
// consent + form-fill, navigate straight to the API key.
|
|
4535
|
+
// Uses the same post-OAuth loop runOAuthFlow uses after a
|
|
4536
|
+
// successful handshake.
|
|
4537
|
+
let credentials = await this.extractCredentials();
|
|
4538
|
+
const skippedPostVerify = credentials.api_key !== undefined;
|
|
4539
|
+
if (credentials.api_key === undefined) {
|
|
4540
|
+
credentials = await this.postVerifyLoop({
|
|
4541
|
+
service: task.service,
|
|
4542
|
+
maxRounds: task.postVerifyMaxRounds ?? 24,
|
|
4543
|
+
steps,
|
|
4544
|
+
...(task.scopeHint !== undefined ? { scopeHint: task.scopeHint } : {}),
|
|
4545
|
+
...(task.machineToken !== undefined ? { machineToken: task.machineToken } : {}),
|
|
4546
|
+
...(task.apiBase !== undefined ? { apiBase: task.apiBase } : {}),
|
|
4547
|
+
});
|
|
4548
|
+
}
|
|
4549
|
+
if (credentials.api_key !== undefined) {
|
|
4550
|
+
// 0.8.3-rc.1 — when extractCredentials short-circuited
|
|
4551
|
+
// before postVerifyLoop ran, no captures were written.
|
|
4552
|
+
// Emit a synthetic extract round so auto-promote can
|
|
4553
|
+
// build a "navigate + extract" skill from this run.
|
|
4554
|
+
if (skippedPostVerify) {
|
|
4555
|
+
await this.writeFastPathSyntheticCapture(task.service, 0, true);
|
|
4556
|
+
}
|
|
4557
|
+
return {
|
|
4558
|
+
success: true,
|
|
4559
|
+
credentials,
|
|
4560
|
+
steps,
|
|
4561
|
+
...this.resultTail(),
|
|
4562
|
+
};
|
|
4563
|
+
}
|
|
4564
|
+
// 0.8.2-rc.10 — same sentinel-pattern routing the runOAuthFlow
|
|
4565
|
+
// path uses. The post-verify loop sets lastPostVerifyDoneReason
|
|
4566
|
+
// with [stuck_loop] or [existing_account_no_extract] markers
|
|
4567
|
+
// when it bails on a planner-loop or pre-existing-key state;
|
|
4568
|
+
// surface those distinctly rather than as the generic
|
|
4569
|
+
// no_credentials_after_already_signed_in.
|
|
4570
|
+
if (this.lastPostVerifyDoneReason !== null &&
|
|
4571
|
+
this.lastPostVerifyDoneReason.startsWith("[stuck_loop]")) {
|
|
4572
|
+
return {
|
|
4573
|
+
success: false,
|
|
4574
|
+
error: `planner_stuck: ${task.service}'s dashboard re-picked the same step repeatedly ` +
|
|
4575
|
+
`with no inventory change and the bot's hardcoded API-key URL fallbacks did not ` +
|
|
4576
|
+
`advance the page — finish the signup manually.`,
|
|
4577
|
+
steps,
|
|
4578
|
+
...this.resultTail(),
|
|
4579
|
+
};
|
|
4580
|
+
}
|
|
4581
|
+
if (this.lastPostVerifyDoneReason !== null &&
|
|
4582
|
+
this.lastPostVerifyDoneReason.startsWith("[existing_account_no_extract]")) {
|
|
4583
|
+
return {
|
|
4584
|
+
success: false,
|
|
4585
|
+
error: `existing_account_no_extract: ${task.service}'s dashboard shows pre-existing API ` +
|
|
4586
|
+
`keys for this identity but the values are masked and unrecoverable — wipe the ` +
|
|
4587
|
+
`test identity's account on ${task.service} or sign in manually and reveal the key.`,
|
|
4588
|
+
steps,
|
|
4589
|
+
...this.resultTail(),
|
|
4590
|
+
};
|
|
4591
|
+
}
|
|
3226
4592
|
return {
|
|
3227
4593
|
success: false,
|
|
3228
|
-
error:
|
|
3229
|
-
|
|
3230
|
-
`test identity's account on ${task.service} or sign in manually and reveal the key.`,
|
|
4594
|
+
error: "no_credentials_after_already_signed_in: bot detected an authenticated dashboard " +
|
|
4595
|
+
"but post-OAuth navigation did not surface an API key. Sign in manually and generate the token.",
|
|
3231
4596
|
steps,
|
|
3232
4597
|
...this.resultTail(),
|
|
3233
4598
|
};
|
|
3234
4599
|
}
|
|
3235
|
-
|
|
3236
|
-
|
|
3237
|
-
|
|
3238
|
-
|
|
3239
|
-
|
|
3240
|
-
|
|
3241
|
-
|
|
4600
|
+
case "submitted": {
|
|
4601
|
+
// Multi-step signup: a clean submit can land on ANOTHER form step
|
|
4602
|
+
// (amplitude: a dedicated "Create your password" page) rather than
|
|
4603
|
+
// the dashboard or a verify-email screen. Detect a continuation
|
|
4604
|
+
// form step and run the fill-submit phase again on it, bounded,
|
|
4605
|
+
// before treating the submit as done — otherwise the post-submit
|
|
4606
|
+
// logic below polls the inbox for a verification email the
|
|
4607
|
+
// half-finished signup never triggers. Conservative (a visible
|
|
4608
|
+
// empty password input + a submit control, NOT a login or
|
|
4609
|
+
// check-your-email page), so a genuine email-verification flow
|
|
4610
|
+
// isn't mistaken for a form step.
|
|
4611
|
+
if (multiStepRounds < MAX_MULTI_STEP_ROUNDS) {
|
|
4612
|
+
const stepLabel = await this.detectContinuationFormStep();
|
|
4613
|
+
if (stepLabel !== null) {
|
|
4614
|
+
multiStepRounds += 1;
|
|
4615
|
+
steps.push(`Post-submit: continuation form step detected (${stepLabel}) — ` +
|
|
4616
|
+
`filling + submitting (step ${multiStepRounds + 1}).`);
|
|
4617
|
+
outcome = await this.planExecuteWithRetry(task, fillValues, steps);
|
|
4618
|
+
continue dispatch;
|
|
4619
|
+
}
|
|
4620
|
+
}
|
|
4621
|
+
break dispatch;
|
|
4622
|
+
}
|
|
3242
4623
|
}
|
|
3243
|
-
case "submitted":
|
|
3244
|
-
break;
|
|
3245
4624
|
}
|
|
3246
4625
|
await saveDebugSnapshot(this.browser, "after-submit");
|
|
3247
4626
|
// Step 6: Extract creds from page.
|
|
@@ -3250,10 +4629,25 @@ export class SignupAgent {
|
|
|
3250
4629
|
// Step 7: Email verification + post-verification navigation.
|
|
3251
4630
|
let verificationFailed;
|
|
3252
4631
|
if (credentials.api_key === undefined && credentials.username === undefined) {
|
|
3253
|
-
// S3: read the post-submit page
|
|
3254
|
-
//
|
|
3255
|
-
//
|
|
3256
|
-
|
|
4632
|
+
// S3: read the post-submit page to decide both the no-inbox bail
|
|
4633
|
+
// (M2) and, when an inbox exists, the poll duration. The page is
|
|
4634
|
+
// read up to TWICE: once immediately, then — only if the first
|
|
4635
|
+
// read was inconclusive — again after a short settle, because SPA
|
|
4636
|
+
// signups render the "check your email" screen a beat after submit
|
|
4637
|
+
// and sampling once races that render (the bug behind the
|
|
4638
|
+
// Postmark/ElevenLabs/Browserbase/Grafana false `verification_not_sent`).
|
|
4639
|
+
let postSubmitText = await this.browser.extractText();
|
|
4640
|
+
let expectsEmail = expectsVerificationEmail(postSubmitText);
|
|
4641
|
+
if (!expectsEmail && !submitWasRejected(postSubmitText)) {
|
|
4642
|
+
await this.browser.wait(SUBMIT_SETTLE_SECONDS);
|
|
4643
|
+
postSubmitText = await this.browser.extractText();
|
|
4644
|
+
expectsEmail = expectsVerificationEmail(postSubmitText);
|
|
4645
|
+
}
|
|
4646
|
+
// A clean submit that did NOT visibly reject created an account —
|
|
4647
|
+
// verification mail is plausibly inbound even without a "check
|
|
4648
|
+
// your email" screen. Distinguish that from a rejected submit
|
|
4649
|
+
// (already-registered, validation error) where no mail is coming.
|
|
4650
|
+
const submitRejected = submitWasRejected(postSubmitText);
|
|
3257
4651
|
if (task.inbox === undefined) {
|
|
3258
4652
|
// M2/S3: no inbox to receive a verification email (the SES
|
|
3259
4653
|
// inbound pipeline is mothballed — TODOS M1). If the page is
|
|
@@ -3273,20 +4667,34 @@ export class SignupAgent {
|
|
|
3273
4667
|
}
|
|
3274
4668
|
}
|
|
3275
4669
|
else {
|
|
3276
|
-
// S3: don't blind-wait
|
|
3277
|
-
//
|
|
3278
|
-
//
|
|
4670
|
+
// S3: don't blind-wait, but don't under-poll a clean submit
|
|
4671
|
+
// either. Three cases:
|
|
4672
|
+
// - page says "check your email" → full timeout (mail is
|
|
4673
|
+
// definitely coming).
|
|
4674
|
+
// - submit visibly rejected → 45s probe (no account was
|
|
4675
|
+
// created; no mail is coming).
|
|
4676
|
+
// - inconclusive but submit clean → 120s floor (an account was
|
|
4677
|
+
// created, so transactional mail is plausibly inbound and
|
|
4678
|
+
// can outlast the 45s probe; bounded below the full timeout).
|
|
3279
4679
|
const verificationTimeoutSeconds = expectsEmail
|
|
3280
4680
|
? (task.verificationTimeoutSeconds ?? 180)
|
|
3281
|
-
:
|
|
4681
|
+
: submitRejected
|
|
4682
|
+
? VERIFICATION_PROBE_SECONDS
|
|
4683
|
+
: SUBMITTED_PROBE_FLOOR_SECONDS;
|
|
3282
4684
|
steps.push(expectsEmail
|
|
3283
4685
|
? `Post-submit page asks to check email — polling inbox (up to ${verificationTimeoutSeconds}s)...`
|
|
3284
|
-
:
|
|
4686
|
+
: submitRejected
|
|
4687
|
+
? `Post-submit page shows a rejected submit — short ${verificationTimeoutSeconds}s probe (S3: no account created, no verification email expected)...`
|
|
4688
|
+
: `Post-submit page is inconclusive but submit was clean — polling inbox up to ${verificationTimeoutSeconds}s (S3: an account may have been created and mail can lag)...`);
|
|
3285
4689
|
try {
|
|
3286
|
-
const email = await this.waitForVerificationEmail(task.inbox, task.email, verificationTimeoutSeconds);
|
|
4690
|
+
const email = await this.waitForVerificationEmail(task.inbox, this.pendingVerificationAlias ?? task.email, verificationTimeoutSeconds);
|
|
3287
4691
|
steps.push(`Received: "${email.subject}" from ${email.from_address}`);
|
|
3288
|
-
if (email.parsed_links.length > 0) {
|
|
3289
|
-
|
|
4692
|
+
if (email.parsed_links.length > 0 || (email.body_html ?? "") !== "") {
|
|
4693
|
+
// URL-keyword scorer first; if it can't see past a click-tracker
|
|
4694
|
+
// wrapper, fall back to matching the link's ANCHOR TEXT in the
|
|
4695
|
+
// HTML body (amplitude's SendGrid-wrapped "Activate account").
|
|
4696
|
+
const verifyLink = this.pickVerificationLink(Array.from(email.parsed_links)) ??
|
|
4697
|
+
pickVerificationLinkFromHtml(email.body_html ?? "");
|
|
3290
4698
|
if (verifyLink !== null) {
|
|
3291
4699
|
steps.push(`Following verification link: ${verifyLink}`);
|
|
3292
4700
|
await this.browser.goto(verifyLink);
|
|
@@ -3312,12 +4720,22 @@ export class SignupAgent {
|
|
|
3312
4720
|
});
|
|
3313
4721
|
}
|
|
3314
4722
|
}
|
|
4723
|
+
else if (email.parsed_codes.length > 0) {
|
|
4724
|
+
credentials = await this.enterEmailVerificationCode(email.parsed_codes[0] ?? "", task, password, steps);
|
|
4725
|
+
}
|
|
3315
4726
|
else {
|
|
3316
4727
|
steps.push("Email had no usable verification link.");
|
|
3317
4728
|
}
|
|
3318
4729
|
}
|
|
4730
|
+
else if (email.parsed_codes.length > 0) {
|
|
4731
|
+
// No links at all, but the email carries a numeric code
|
|
4732
|
+
// (plausible: "Enter 4011 to verify your email address").
|
|
4733
|
+
// The signup page transitioned to a code-input step after
|
|
4734
|
+
// submit — type the code in rather than waiting for a link.
|
|
4735
|
+
credentials = await this.enterEmailVerificationCode(email.parsed_codes[0] ?? "", task, password, steps);
|
|
4736
|
+
}
|
|
3319
4737
|
else {
|
|
3320
|
-
steps.push("Email had no parsed links — skipping verification
|
|
4738
|
+
steps.push("Email had no parsed links or codes — skipping verification.");
|
|
3321
4739
|
}
|
|
3322
4740
|
}
|
|
3323
4741
|
catch (err) {
|
|
@@ -3325,7 +4743,9 @@ export class SignupAgent {
|
|
|
3325
4743
|
steps.push(`Inbox poll failed: ${detail}`);
|
|
3326
4744
|
verificationFailed = expectsEmail
|
|
3327
4745
|
? `verification_not_sent: form submitted and the page asked to check email, but none arrived in ${verificationTimeoutSeconds}s — the service likely withheld it (anti-abuse) or requires manual signup`
|
|
3328
|
-
:
|
|
4746
|
+
: submitRejected
|
|
4747
|
+
? `verification_not_sent: form submitted but the page reported a rejected submit (already-registered / validation error) and no mail arrived in the ${verificationTimeoutSeconds}s probe — no account was created`
|
|
4748
|
+
: `verification_not_sent: form submitted cleanly but no "check your email" prompt appeared and none arrived in ${verificationTimeoutSeconds}s — the service likely withheld it (fresh-domain anti-abuse) or verifies by another channel (SMS / authenticator)`;
|
|
3329
4749
|
}
|
|
3330
4750
|
}
|
|
3331
4751
|
}
|
|
@@ -3396,10 +4816,46 @@ export class SignupAgent {
|
|
|
3396
4816
|
// services that don't gate OAuth on Turnstile).
|
|
3397
4817
|
try {
|
|
3398
4818
|
const captcha = await this.browser.solveVisibleCaptcha(20_000);
|
|
3399
|
-
if (captcha.found) {
|
|
3400
|
-
steps.push(captcha.
|
|
3401
|
-
|
|
3402
|
-
|
|
4819
|
+
if (captcha.found && captcha.solved) {
|
|
4820
|
+
steps.push(`OAuth: ticked the visible ${captcha.kind} checkbox before clicking the ${provider.label} affordance`);
|
|
4821
|
+
}
|
|
4822
|
+
else if (captcha.found && !captcha.solved) {
|
|
4823
|
+
// Tier-2 click-and-wait timed out. For reCAPTCHA v2 this is the
|
|
4824
|
+
// SAME state the form-submit gate (runCaptchaGate) recovers from
|
|
4825
|
+
// by escalating to the third-party solver — mirror that path here
|
|
4826
|
+
// so OAuth-first flows aren't left clicking a Google button that
|
|
4827
|
+
// the service keeps gated behind an unsolved checkbox (replit,
|
|
4828
|
+
// uploadcare). Turnstile is deliberately NOT escalated: Cloudflare
|
|
4829
|
+
// scores at the IP layer, so a solver-issued token is rejected
|
|
4830
|
+
// anyway and only burns the 2Captcha balance.
|
|
4831
|
+
let solvedViaTier3 = false;
|
|
4832
|
+
if (captcha.kind === "recaptcha" && this.captchaSolver?.isAvailable() === true) {
|
|
4833
|
+
const sitekey = await this.browser.extractRecaptchaSitekey();
|
|
4834
|
+
const pageUrl = (await this.browser.getState().catch(() => null))?.url;
|
|
4835
|
+
if (sitekey !== null && pageUrl !== undefined) {
|
|
4836
|
+
steps.push(`OAuth: Tier 3 — submitting reCAPTCHA sitekey to 2Captcha (${sitekey.slice(0, 10)}…)`);
|
|
4837
|
+
const solveRes = await this.captchaSolver.solveRecaptchaV2({ sitekey, pageUrl });
|
|
4838
|
+
if (solveRes.kind === "ok") {
|
|
4839
|
+
const injected = await this.browser.injectRecaptchaToken(solveRes.token);
|
|
4840
|
+
if (injected) {
|
|
4841
|
+
solvedViaTier3 = true;
|
|
4842
|
+
steps.push(`OAuth: Tier 3 solved the reCAPTCHA in ${Math.round(solveRes.durationMs / 1000)}s via 2Captcha — clicking the ${provider.label} affordance`);
|
|
4843
|
+
}
|
|
4844
|
+
else {
|
|
4845
|
+
steps.push(`OAuth: Tier 3 token arrived but page injection failed — clicking the ${provider.label} affordance anyway`);
|
|
4846
|
+
}
|
|
4847
|
+
}
|
|
4848
|
+
else {
|
|
4849
|
+
steps.push(`OAuth: Tier 3 ${solveRes.kind}` +
|
|
4850
|
+
("reason" in solveRes ? `: ${solveRes.reason}` : "") +
|
|
4851
|
+
("durationMs" in solveRes ? ` (${Math.round(solveRes.durationMs / 1000)}s)` : "") +
|
|
4852
|
+
` — clicking the ${provider.label} affordance anyway`);
|
|
4853
|
+
}
|
|
4854
|
+
}
|
|
4855
|
+
}
|
|
4856
|
+
if (!solvedViaTier3) {
|
|
4857
|
+
steps.push(`OAuth: visible ${captcha.kind} present but did not solve in 20s — clicking the ${provider.label} affordance anyway`);
|
|
4858
|
+
}
|
|
3403
4859
|
}
|
|
3404
4860
|
}
|
|
3405
4861
|
catch (err) {
|
|
@@ -3407,7 +4863,21 @@ export class SignupAgent {
|
|
|
3407
4863
|
steps.push(`OAuth: visible-captcha precheck failed (non-fatal): ${err instanceof Error ? err.message : String(err)}`);
|
|
3408
4864
|
}
|
|
3409
4865
|
steps.push(`OAuth: clicking the ${provider.label} sign-in affordance`);
|
|
3410
|
-
|
|
4866
|
+
// Google Identity Services (GSI) / FedCM does NOT redirect — clicking the
|
|
4867
|
+
// widget raises a browser-native FedCM dialog or a popup and returns a
|
|
4868
|
+
// JWT to a JS callback. The classic startOAuth waits for a provider
|
|
4869
|
+
// redirect that never comes, so it falsely concludes "signed in" and the
|
|
4870
|
+
// session never persists (northflank). Detect GSI and drive it over CDP.
|
|
4871
|
+
let gsiHandled = false;
|
|
4872
|
+
if (provider.id === "google" && (await this.browser.hasGoogleGsiAffordance())) {
|
|
4873
|
+
const gsi = await this.browser.tryGoogleGsiLogin(oauthSelector);
|
|
4874
|
+
gsiHandled = true;
|
|
4875
|
+
steps.push(`OAuth: Google Identity Services / FedCM widget — resolved via ${gsi.via}` +
|
|
4876
|
+
(gsi.ok ? "" : " (no FedCM dialog or popup appeared — the widget may need a different trigger)"));
|
|
4877
|
+
}
|
|
4878
|
+
if (!gsiHandled) {
|
|
4879
|
+
await this.browser.startOAuth(oauthSelector);
|
|
4880
|
+
}
|
|
3411
4881
|
await this.browser.wait(3);
|
|
3412
4882
|
await saveDebugSnapshot(this.browser, "oauth-after-click");
|
|
3413
4883
|
// Bounded consent walk — handles account-chooser → consent as two
|
|
@@ -3434,6 +4904,21 @@ export class SignupAgent {
|
|
|
3434
4904
|
await this.browser.wait(1);
|
|
3435
4905
|
continue;
|
|
3436
4906
|
}
|
|
4907
|
+
// Google "Choose an account" chooser. Its "…to continue to <app>" copy
|
|
4908
|
+
// matches the consent classifier, but it is an account PICKER — it needs
|
|
4909
|
+
// a card CLICK, not a scope approve. Google shows it before the real
|
|
4910
|
+
// consent right after a fresh relogin (the first OAuth re-confirms the
|
|
4911
|
+
// account). Without handling it here the bot tries to approve, stalls,
|
|
4912
|
+
// and the page flips to needs_login → abort (every Google service fails
|
|
4913
|
+
// until an OAuth is done once). Click the account card and re-read; the
|
|
4914
|
+
// next pass lands on the real consent screen (or back at the service).
|
|
4915
|
+
if (provider.id === "google" &&
|
|
4916
|
+
/\/(?:accountchooser|chooseaccount|oauthchooseaccount)/i.test(url)) {
|
|
4917
|
+
const clicked = await this.tryClickGoogleChooserCard();
|
|
4918
|
+
steps.push(`OAuth: Google account chooser — ${clicked ? "clicked the account card" : "no clickable account card found"}`);
|
|
4919
|
+
await this.browser.wait(2);
|
|
4920
|
+
continue;
|
|
4921
|
+
}
|
|
3437
4922
|
const authState = provider.classifyAuthState(url, body);
|
|
3438
4923
|
steps.push(`OAuth: ${provider.label} auth state = ${authState} (url=${url.slice(0, 120)})`);
|
|
3439
4924
|
if (authState === "not_provider")
|
|
@@ -3668,6 +5153,30 @@ export class SignupAgent {
|
|
|
3668
5153
|
return this.oauthAbort("oauth_consent_needs_review", `${provider.label} consent page (URL unparseable) lists scope-grant phrases: ` +
|
|
3669
5154
|
`[${dangerPhrases.join(" | ")}]. Pausing for manual review.`, steps);
|
|
3670
5155
|
}
|
|
5156
|
+
// Google's newer consent URL hides the scope= param behind an
|
|
5157
|
+
// opaque `part=` token, so extractOAuthScopes() returned null
|
|
5158
|
+
// even on an entirely-basic email/profile consent (measured on
|
|
5159
|
+
// uploadcare). The visible DOM is the only remaining signal: if
|
|
5160
|
+
// it lists ONLY openid/email/profile-family grants (and the
|
|
5161
|
+
// danger scraper above already cleared it), this is exactly the
|
|
5162
|
+
// consent the URL-readable happy path auto-approves — so recover
|
|
5163
|
+
// it here instead of blocking. Anything ambiguous falls through
|
|
5164
|
+
// to the conservative abort below. Mirror the basic-scopes happy
|
|
5165
|
+
// path: set consentAlreadyApproved, advance, handle !advanced.
|
|
5166
|
+
if (provider.id === "google" &&
|
|
5167
|
+
!consentAlreadyApproved &&
|
|
5168
|
+
googleConsentIsBasicFromDom(body)) {
|
|
5169
|
+
steps.push("OAuth: consent scopes unreadable from URL but DOM lists only " +
|
|
5170
|
+
"basic email/profile scopes — auto-approving");
|
|
5171
|
+
consentAlreadyApproved = true;
|
|
5172
|
+
const advanced = await this.browser.advanceOAuthConsent(provider.id);
|
|
5173
|
+
if (!advanced) {
|
|
5174
|
+
return this.oauthAbort("oauth_consent_needs_review", `basic-only consent read from the ${provider.label} DOM but no ` +
|
|
5175
|
+
`approve control found on the consent page — approve it manually.`, steps);
|
|
5176
|
+
}
|
|
5177
|
+
await this.browser.wait(3);
|
|
5178
|
+
continue;
|
|
5179
|
+
}
|
|
3671
5180
|
// F16 — order matters here. The post-grant intermediate page
|
|
3672
5181
|
// (after blind-consent approved on iter 1) is also classified
|
|
3673
5182
|
// as "consent" with unreadable scopes. If we check the blind-
|
|
@@ -3743,8 +5252,93 @@ export class SignupAgent {
|
|
|
3743
5252
|
// for same-tab redirects) and drive post-OAuth onboarding.
|
|
3744
5253
|
await this.browser.settleAfterOAuth();
|
|
3745
5254
|
await this.browser.wait(2);
|
|
5255
|
+
// Token-exchange settle. Stytch/WorkOS-style services (groq) bounce the
|
|
5256
|
+
// OAuth back to a callback page (/authenticate?token=…) and complete an
|
|
5257
|
+
// ASYNC token→session exchange there, THEN redirect to the dashboard.
|
|
5258
|
+
// With a warm Google session the round-trip is near-instant, so the bot
|
|
5259
|
+
// arrives at the callback while the exchange is still in flight — and
|
|
5260
|
+
// acting now (the rc.20 second-click retry, or post-verify navigation)
|
|
5261
|
+
// interrupts it, stranding the run on the login page. Give a callback-
|
|
5262
|
+
// shaped URL a chance to redirect itself away before we touch anything.
|
|
5263
|
+
{
|
|
5264
|
+
// Wait while EITHER the URL is still callback/login-shaped OR the page
|
|
5265
|
+
// shows async-session processing copy ("Creating your organization…").
|
|
5266
|
+
// Budget 24s — MEASURED: Stytch B2B's discovery+org-creation+session
|
|
5267
|
+
// chain takes ~5-7s but varies, and the bot's own page reads add jitter;
|
|
5268
|
+
// a short URL-only wait exits mid-provisioning and the rc.20 retry then
|
|
5269
|
+
// re-clicks OAuth and aborts it. Re-read text each tick.
|
|
5270
|
+
let settled = false;
|
|
5271
|
+
for (let i = 0; i < 12; i++) {
|
|
5272
|
+
const url = this.browser.currentUrl();
|
|
5273
|
+
const text = await this.browser.extractText().catch(() => "");
|
|
5274
|
+
if (!isLoginPageUrl(url) && !isAuthProcessingText(text)) {
|
|
5275
|
+
settled = true;
|
|
5276
|
+
break;
|
|
5277
|
+
}
|
|
5278
|
+
if (i === 0 && isAuthProcessingText(text)) {
|
|
5279
|
+
steps.push("OAuth: session is provisioning (auth-processing screen) — holding, not touching the page.");
|
|
5280
|
+
}
|
|
5281
|
+
await this.browser.wait(2);
|
|
5282
|
+
}
|
|
5283
|
+
const settledUrl = this.browser.currentUrl();
|
|
5284
|
+
steps.push(`OAuth: waited for the callback to settle — now at ${pathOf(settledUrl)}` +
|
|
5285
|
+
(settled ? " (redirected to the app)" : " (still login/processing-shaped)"));
|
|
5286
|
+
}
|
|
5287
|
+
// Dead-route escape. The OAuth often returns to the SIGNUP url it
|
|
5288
|
+
// started from (northflank: app.northflank.com/signup). For an account
|
|
5289
|
+
// that now EXISTS, /signup (and /login, /register…) is a dead route the
|
|
5290
|
+
// SPA can't render — it hangs on a "Connecting" shell forever and the
|
|
5291
|
+
// post-verify planner reads it as "signed out." Navigating to the app
|
|
5292
|
+
// ORIGIN ROOT lets the service redirect an authenticated user to its
|
|
5293
|
+
// real dashboard. Generalizes: a service already on its dashboard has a
|
|
5294
|
+
// non-auth path here and is left alone.
|
|
5295
|
+
if (isSignupOrLoginRoute(this.browser.currentUrl())) {
|
|
5296
|
+
const root = originRoot(this.browser.currentUrl());
|
|
5297
|
+
if (root !== null) {
|
|
5298
|
+
steps.push(`OAuth: post-auth landing is a signup/login route (${pathOf(this.browser.currentUrl())}) — ` +
|
|
5299
|
+
`navigating to the app root (${root}) so the service routes us to the dashboard.`);
|
|
5300
|
+
try {
|
|
5301
|
+
await this.browser.goto(root);
|
|
5302
|
+
await this.browser.wait(2);
|
|
5303
|
+
}
|
|
5304
|
+
catch {
|
|
5305
|
+
// navigation hiccup — the post-verify loop re-reads regardless.
|
|
5306
|
+
}
|
|
5307
|
+
}
|
|
5308
|
+
}
|
|
3746
5309
|
await saveDebugSnapshot(this.browser, "oauth-post-consent");
|
|
3747
5310
|
steps.push(`OAuth: signed in via ${provider.label} — driving post-OAuth onboarding to the API key`);
|
|
5311
|
+
// amplitude class — OAuth drops the bot into the service's READ-ONLY DEMO
|
|
5312
|
+
// sandbox (app.amplitude.com/analytics/demo) instead of a real account: it
|
|
5313
|
+
// has NO API key, and the only route to a real org is the prominent
|
|
5314
|
+
// "Create a free account" CTA, which opens the real /signup form. Detect
|
|
5315
|
+
// the demo state and click that CTA, then re-route to form-fill (the
|
|
5316
|
+
// email/name/password form the bot now completes, multi-step password
|
|
5317
|
+
// included). MEASURED 2026-06-04: without this the post-verify loop hunts
|
|
5318
|
+
// the demo for a key that isn't there → oauth_onboarding_failed.
|
|
5319
|
+
{
|
|
5320
|
+
await this.browser.wait(2); // let the post-OAuth redirect settle onto the demo
|
|
5321
|
+
const demoState = await this.browser.getState();
|
|
5322
|
+
const demoText = await this.browser.extractText().catch(() => "");
|
|
5323
|
+
if (isSandboxDemoState(demoState.url, demoText)) {
|
|
5324
|
+
const cta = findCreateAccountCta(await this.browser.extractInteractiveElements());
|
|
5325
|
+
if (cta !== null) {
|
|
5326
|
+
steps.push(`OAuth: landed in ${task.service}'s read-only demo sandbox ` +
|
|
5327
|
+
`(${pathOf(demoState.url)}) — clicking ` +
|
|
5328
|
+
`"${(cta.visibleText ?? "Create a free account").trim()}" to escape into ` +
|
|
5329
|
+
`the real signup form.`);
|
|
5330
|
+
try {
|
|
5331
|
+
await this.browser.click(cta.selector);
|
|
5332
|
+
await this.browser.wait(2);
|
|
5333
|
+
}
|
|
5334
|
+
catch (err) {
|
|
5335
|
+
steps.push(`OAuth: demo-escape click threw (${err instanceof Error ? err.message : String(err)}) — ` +
|
|
5336
|
+
`falling back to form-fill anyway.`);
|
|
5337
|
+
}
|
|
5338
|
+
return OAUTH_FALL_BACK_TO_FORM_FILL;
|
|
5339
|
+
}
|
|
5340
|
+
}
|
|
5341
|
+
}
|
|
3748
5342
|
// rc.20 — login-loop detection. Services like Groq complete the
|
|
3749
5343
|
// Google OAuth handshake server-side but redirect back to a
|
|
3750
5344
|
// login-looking page (/authenticate) where the user has to click
|
|
@@ -3766,6 +5360,43 @@ export class SignupAgent {
|
|
|
3766
5360
|
const postOAuthState = await this.browser.getState();
|
|
3767
5361
|
const postOAuthInv = await this.buildInventory(steps, [provider.id]);
|
|
3768
5362
|
const loopBtn = isLoginLoopState(postOAuthState.url, postOAuthInv, provider.id);
|
|
5363
|
+
// amplitude class — post-OAuth we're STUCK on a login page (the provider
|
|
5364
|
+
// button is still present, or the URL is a login route) that carries an
|
|
5365
|
+
// in-page SIGNUP CTA. Google signed in fine, but the service has no
|
|
5366
|
+
// account/org for this identity and expects us to CREATE one via the
|
|
5367
|
+
// page's "Don't have an account? Sign up for free" link. The naive
|
|
5368
|
+
// loopBtn path below would re-trigger OAuth and loop until
|
|
5369
|
+
// oauth_loop_detected. Instead: click the signup CTA and re-route into
|
|
5370
|
+
// the email/password signup path (same sentinel the detectGoogleNoAccount
|
|
5371
|
+
// gate uses ~40 lines below). CONSERVATIVE: only fires in the STUCK state
|
|
5372
|
+
// (loopBtn or a login URL) and only when the page is NOT already a signup
|
|
5373
|
+
// form, so a dashboard that successfully landed but carries a stray
|
|
5374
|
+
// signup link is untouched, and a service that legitimately needs a
|
|
5375
|
+
// second OAuth click (no signup CTA) falls through. NOTE: gate on
|
|
5376
|
+
// classify !== "signup", NOT === "login": amplitude's Org-Login SSO page
|
|
5377
|
+
// has no password field, so classifySignupHtml returns "other".
|
|
5378
|
+
if ((loopBtn !== null || isLoginPageUrl(postOAuthState.url)) &&
|
|
5379
|
+
classifySignupHtml(postOAuthState.html) !== "signup") {
|
|
5380
|
+
const signupCta = findSignupCtaElement(postOAuthInv);
|
|
5381
|
+
if (signupCta !== null) {
|
|
5382
|
+
const ctaText = (signupCta.visibleText ??
|
|
5383
|
+
signupCta.ariaLabel ??
|
|
5384
|
+
"sign up").trim();
|
|
5385
|
+
steps.push(`Post-OAuth: ${task.service} shows a login page with a signup CTA ("${ctaText}") — ` +
|
|
5386
|
+
`${provider.label} identity has no account; clicking signup to create one.`);
|
|
5387
|
+
try {
|
|
5388
|
+
await this.browser.click(signupCta.selector);
|
|
5389
|
+
await this.browser.wait(2);
|
|
5390
|
+
}
|
|
5391
|
+
catch (err) {
|
|
5392
|
+
steps.push(`Post-OAuth: clicking the signup CTA threw (${err instanceof Error ? err.message : String(err)}) — ` +
|
|
5393
|
+
`falling back to form-fill anyway.`);
|
|
5394
|
+
}
|
|
5395
|
+
// Re-route into the email/password signup path: runSignup catches
|
|
5396
|
+
// this sentinel and re-runs form-fill on the now-signup page.
|
|
5397
|
+
return OAUTH_FALL_BACK_TO_FORM_FILL;
|
|
5398
|
+
}
|
|
5399
|
+
}
|
|
3769
5400
|
if (loopBtn !== null) {
|
|
3770
5401
|
steps.push(`Post-OAuth: landed on a login-like page (${pathOf(postOAuthState.url)}) ` +
|
|
3771
5402
|
`with a ${provider.label} sign-in button still visible — service requires a ` +
|
|
@@ -3807,6 +5438,20 @@ export class SignupAgent {
|
|
|
3807
5438
|
const gateState = await this.browser.getState();
|
|
3808
5439
|
const gateText = await this.browser.extractText().catch(() => "");
|
|
3809
5440
|
const gateInv = postOAuthInv;
|
|
5441
|
+
// (a0) Google-login-only / no-account (plunk class). OAuth
|
|
5442
|
+
// completed but the service bounced back saying this Google
|
|
5443
|
+
// identity has no account (e.g. plunk's
|
|
5444
|
+
// /auth/login?message=No%20account%20found…). MUST run before the
|
|
5445
|
+
// manual-login-fallback gate below — this page IS a /login form, so
|
|
5446
|
+
// detectManualLoginFallback would otherwise swallow it as
|
|
5447
|
+
// oauth_session_not_persisted and abort. The account simply needs
|
|
5448
|
+
// creating via email, so re-route to form-fill instead of bailing.
|
|
5449
|
+
if (detectGoogleNoAccount(gateState.url, gateText)) {
|
|
5450
|
+
steps.push(`OAuth: ${provider.label} sign-in succeeded but ${task.service} has no account for ` +
|
|
5451
|
+
`this identity (login-only OAuth, ${pathOf(gateState.url)}) — abandoning OAuth and ` +
|
|
5452
|
+
`falling back to email/password signup to create the account.`);
|
|
5453
|
+
return OAUTH_FALL_BACK_TO_FORM_FILL;
|
|
5454
|
+
}
|
|
3810
5455
|
// (a) Manual-login fallback (DigitalOcean, Hyperbolic). Service
|
|
3811
5456
|
// dropped the OAuth session and rendered a /login form with
|
|
3812
5457
|
// email + password inputs. Bot can't manually log in.
|
|
@@ -4269,7 +5914,12 @@ ${formatInventory(input.inventory)}`,
|
|
|
4269
5914
|
// email rather than "verify", and that broader matcher catches both.
|
|
4270
5915
|
async waitForVerificationEmail(inbox, alias, totalSeconds) {
|
|
4271
5916
|
const deadline = Date.now() + totalSeconds * 1000;
|
|
4272
|
-
|
|
5917
|
+
// `verif` (not `verify`) so the matcher also catches "verification" —
|
|
5918
|
+
// "verification" does NOT contain the substring "verify" (…ifi… vs
|
|
5919
|
+
// …ify), which silently dropped plausible's "4011 is your Plausible
|
|
5920
|
+
// email verification code" and timed the whole signup out. `code` /
|
|
5921
|
+
// `one[- ]?time` / `otp` catch code-based verification subjects too.
|
|
5922
|
+
const pattern = /verif|confirm|welcome|activate|complete|finish|set\s*up|\bcode\b|one[\s-]?time|\botp\b|sign[\s-]?up/i;
|
|
4273
5923
|
let lastErr = null;
|
|
4274
5924
|
while (Date.now() < deadline) {
|
|
4275
5925
|
const remainingSeconds = Math.max(1, Math.floor((deadline - Date.now()) / 1000));
|
|
@@ -4293,6 +5943,36 @@ ${formatInventory(input.inventory)}`,
|
|
|
4293
5943
|
}
|
|
4294
5944
|
throw lastErr ?? new Error("verification email did not arrive in time");
|
|
4295
5945
|
}
|
|
5946
|
+
// Code-based email verification (plausible: "Enter 4011 to verify your
|
|
5947
|
+
// email address"). The signup email carried a numeric code and no
|
|
5948
|
+
// clickable link, and the page transitioned to a code-input step after
|
|
5949
|
+
// submit. Seed the post-verify planner with the code so it fills the
|
|
5950
|
+
// input + clicks Verify, then drives on to the API key. Generalizes to
|
|
5951
|
+
// every service that verifies by emailed code rather than link.
|
|
5952
|
+
async enterEmailVerificationCode(code, task, password, steps) {
|
|
5953
|
+
if (code.length === 0) {
|
|
5954
|
+
steps.push("Verification email exposed a code field but it was empty — skipping.");
|
|
5955
|
+
return {};
|
|
5956
|
+
}
|
|
5957
|
+
steps.push(`Email carries a verification CODE (${code}) and no link — entering it on the page.`);
|
|
5958
|
+
// The post-submit "enter code" view may still be hydrating.
|
|
5959
|
+
await this.browser.waitForFormReady();
|
|
5960
|
+
const hint = `Email verification code retrieved: "${code}". The current page has a ` +
|
|
5961
|
+
`verification-code / OTP input (placeholder like "Code" / "Verification code", ` +
|
|
5962
|
+
`or several single-digit boxes — fill the FIRST and the browser auto-distributes). ` +
|
|
5963
|
+
`Issue {"kind":"fill","selector":"…","value":"${code}"} on it, then NEXT round click ` +
|
|
5964
|
+
`the Verify / Confirm / Continue / Submit button.`;
|
|
5965
|
+
return this.postVerifyLoop({
|
|
5966
|
+
service: task.service,
|
|
5967
|
+
credentials: { email: task.email, password },
|
|
5968
|
+
maxRounds: task.postVerifyMaxRounds ?? 6,
|
|
5969
|
+
steps,
|
|
5970
|
+
initialHint: hint,
|
|
5971
|
+
...(task.scopeHint !== undefined ? { scopeHint: task.scopeHint } : {}),
|
|
5972
|
+
...(task.machineToken !== undefined ? { machineToken: task.machineToken } : {}),
|
|
5973
|
+
...(task.apiBase !== undefined ? { apiBase: task.apiBase } : {}),
|
|
5974
|
+
});
|
|
5975
|
+
}
|
|
4296
5976
|
// Drive the browser toward the API key after the account exists —
|
|
4297
5977
|
// used by BOTH the email-verification path and the OAuth path (T9).
|
|
4298
5978
|
// Each round asks Claude what to do next given the current page; we
|
|
@@ -4445,6 +6125,206 @@ ${formatInventory(input.inventory)}`,
|
|
|
4445
6125
|
}
|
|
4446
6126
|
return out;
|
|
4447
6127
|
}
|
|
6128
|
+
// Run every visible-credential extraction tier the post-verify loop
|
|
6129
|
+
// uses (legacy regex/clipboard/hidden-input + DOM-proximity labeled),
|
|
6130
|
+
// merging first-wins into a single bundle. Used by attemptMintNewKey
|
|
6131
|
+
// so the freshly-minted key — which may render as a modal value, a
|
|
6132
|
+
// copy-button-only token, or a labeled table row — is caught by
|
|
6133
|
+
// whichever tier fits the vendor's reveal UI.
|
|
6134
|
+
async harvestVisibleCredentials() {
|
|
6135
|
+
const out = {};
|
|
6136
|
+
try {
|
|
6137
|
+
const legacy = await this.extractCredentials();
|
|
6138
|
+
for (const [k, v] of Object.entries(legacy)) {
|
|
6139
|
+
if (out[k] === undefined)
|
|
6140
|
+
out[k] = v;
|
|
6141
|
+
}
|
|
6142
|
+
}
|
|
6143
|
+
catch {
|
|
6144
|
+
// best-effort — fall through to DOM-proximity
|
|
6145
|
+
}
|
|
6146
|
+
try {
|
|
6147
|
+
const labeled = await this.extractFromDomProximity();
|
|
6148
|
+
for (const [k, v] of Object.entries(labeled)) {
|
|
6149
|
+
if (out[k] === undefined)
|
|
6150
|
+
out[k] = v;
|
|
6151
|
+
}
|
|
6152
|
+
}
|
|
6153
|
+
catch {
|
|
6154
|
+
// best-effort
|
|
6155
|
+
}
|
|
6156
|
+
return out;
|
|
6157
|
+
}
|
|
6158
|
+
// SUCCEED on an already-signed-in / existing-account dashboard.
|
|
6159
|
+
//
|
|
6160
|
+
// When the bot lands on an authenticated dashboard whose API-keys
|
|
6161
|
+
// page shows only NAMES of pre-existing keys (values are shown once
|
|
6162
|
+
// at create-time and are unrecoverable), the historic behavior was to
|
|
6163
|
+
// bail with existing_account_no_extract / no_credentials_after_already
|
|
6164
|
+
// _signed_in. That's a usable outcome the bot threw away: a new key is
|
|
6165
|
+
// a perfectly valid credential.
|
|
6166
|
+
//
|
|
6167
|
+
// This routine, given the current (existing-account) state:
|
|
6168
|
+
// 1. Tries to extract a READABLE key right where we are — some
|
|
6169
|
+
// dashboards do show a usable value (a default key on a freshly
|
|
6170
|
+
// reused account, or a reveal affordance the loop hadn't fired).
|
|
6171
|
+
// 2. If the current page isn't a keys page, walks the same
|
|
6172
|
+
// hardcoded keys-path fallbacks the stuck-loop escalation uses,
|
|
6173
|
+
// re-trying the readable-key extract at each.
|
|
6174
|
+
// 3. On a keys page with no readable key, finds a "Create new key /
|
|
6175
|
+
// Generate API key / New token" affordance and CLICKS it, then
|
|
6176
|
+
// harvests the freshly-minted value (modal reveal + copy-button +
|
|
6177
|
+
// clipboard + labeled-row, via harvestVisibleCredentials, after a
|
|
6178
|
+
// short poll for the server round-trip that mints the key).
|
|
6179
|
+
//
|
|
6180
|
+
// Returns the minted/extracted credentials on success, or null when
|
|
6181
|
+
// there is genuinely no way to produce a key for this identity (no
|
|
6182
|
+
// create affordance anywhere — e.g. key creation is paywalled). The
|
|
6183
|
+
// caller then falls through to the honest existing_account bail.
|
|
6184
|
+
//
|
|
6185
|
+
// Best-effort throughout: any browser error degrades to "couldn't
|
|
6186
|
+
// mint" (null) rather than throwing — the existing classifier remains
|
|
6187
|
+
// the safety net.
|
|
6188
|
+
async attemptMintNewKey(steps) {
|
|
6189
|
+
// The set of keys-page URLs we've navigated, so the fallback walk
|
|
6190
|
+
// doesn't revisit one and the create-affordance search doesn't
|
|
6191
|
+
// re-click on a page already shown to lack one.
|
|
6192
|
+
const visitedKeysUrls = new Set();
|
|
6193
|
+
// Attempt readable-extract → create-and-extract on whatever page is
|
|
6194
|
+
// currently loaded. Returns credentials on success, null otherwise.
|
|
6195
|
+
const tryHere = async () => {
|
|
6196
|
+
// (a) A readable key already on the page (or behind a reveal
|
|
6197
|
+
// affordance the post-verify loop hadn't clicked yet).
|
|
6198
|
+
let creds = await this.harvestVisibleCredentials();
|
|
6199
|
+
if (hasAnyExtractedCredential(creds)) {
|
|
6200
|
+
steps.push("Existing-account recovery: a readable key was already present on the keys page — extracted it.");
|
|
6201
|
+
return creds;
|
|
6202
|
+
}
|
|
6203
|
+
// (b) Reveal pass — a masked-but-revealable existing key.
|
|
6204
|
+
try {
|
|
6205
|
+
const revealRes = await this.browser.revealMaskedCredentials();
|
|
6206
|
+
if (revealRes.clicked > 0) {
|
|
6207
|
+
await this.browser.wait(1);
|
|
6208
|
+
creds = await this.harvestVisibleCredentials();
|
|
6209
|
+
if (hasAnyExtractedCredential(creds)) {
|
|
6210
|
+
steps.push(`Existing-account recovery: revealed a masked existing key (clicked ${revealRes.clicked}) and extracted it.`);
|
|
6211
|
+
return creds;
|
|
6212
|
+
}
|
|
6213
|
+
}
|
|
6214
|
+
}
|
|
6215
|
+
catch {
|
|
6216
|
+
// best-effort reveal
|
|
6217
|
+
}
|
|
6218
|
+
// (c) Mint a fresh key. Find + click a create affordance, then
|
|
6219
|
+
// harvest the newly-shown value.
|
|
6220
|
+
const inventory = await this.buildInventory(steps, undefined, 80);
|
|
6221
|
+
const createBtn = findCreateKeyAffordance(inventory);
|
|
6222
|
+
if (createBtn === null)
|
|
6223
|
+
return null;
|
|
6224
|
+
const label = (createBtn.visibleText ??
|
|
6225
|
+
createBtn.ariaLabel ??
|
|
6226
|
+
createBtn.title ??
|
|
6227
|
+
"create key").trim();
|
|
6228
|
+
steps.push(`Existing-account recovery: no readable key — clicking a key-minting affordance ${JSON.stringify(label.slice(0, 40))}.`);
|
|
6229
|
+
try {
|
|
6230
|
+
await this.browser.click(createBtn.selector);
|
|
6231
|
+
}
|
|
6232
|
+
catch (err) {
|
|
6233
|
+
steps.push(`Existing-account recovery: create-key click failed (${err instanceof Error ? err.message : String(err)}).`);
|
|
6234
|
+
return null;
|
|
6235
|
+
}
|
|
6236
|
+
// Poll for the freshly-minted key — minting is a server
|
|
6237
|
+
// round-trip (Render/Mistral/Mailtrap render the value into a
|
|
6238
|
+
// modal after the POST returns). Reuse the modal-reveal poll
|
|
6239
|
+
// budget the click branch uses elsewhere (~8s), early-exiting the
|
|
6240
|
+
// moment any tier surfaces a credential. A confirmation dialog
|
|
6241
|
+
// ("Name your key" → Create) is common; fire the reveal pass each
|
|
6242
|
+
// round so a modal that needs a second confirm-then-show click is
|
|
6243
|
+
// still harvested.
|
|
6244
|
+
const deadline = Date.now() + 8000;
|
|
6245
|
+
while (Date.now() < deadline) {
|
|
6246
|
+
await this.browser.wait(0.5);
|
|
6247
|
+
const minted = await this.harvestVisibleCredentials();
|
|
6248
|
+
if (hasAnyExtractedCredential(minted)) {
|
|
6249
|
+
steps.push("Existing-account recovery: extracted the freshly-minted key.");
|
|
6250
|
+
return minted;
|
|
6251
|
+
}
|
|
6252
|
+
// A two-step create modal: clicking the page-level "Create key"
|
|
6253
|
+
// opened a "name + confirm" dialog. Click a now-visible confirm
|
|
6254
|
+
// affordance once, then keep polling.
|
|
6255
|
+
try {
|
|
6256
|
+
const modalInv = await this.browser.extractInteractiveElements();
|
|
6257
|
+
const confirmBtn = findCreateKeyAffordance(modalInv);
|
|
6258
|
+
if (confirmBtn !== null &&
|
|
6259
|
+
confirmBtn.selector !== createBtn.selector) {
|
|
6260
|
+
await this.browser.click(confirmBtn.selector);
|
|
6261
|
+
}
|
|
6262
|
+
}
|
|
6263
|
+
catch {
|
|
6264
|
+
// best-effort confirm
|
|
6265
|
+
}
|
|
6266
|
+
}
|
|
6267
|
+
// Minted but the value didn't surface — try one last reveal +
|
|
6268
|
+
// harvest (some vendors render the new key masked with a Show
|
|
6269
|
+
// toggle even on first display).
|
|
6270
|
+
try {
|
|
6271
|
+
const revealRes = await this.browser.revealMaskedCredentials();
|
|
6272
|
+
if (revealRes.clicked > 0) {
|
|
6273
|
+
await this.browser.wait(1);
|
|
6274
|
+
const afterReveal = await this.harvestVisibleCredentials();
|
|
6275
|
+
if (hasAnyExtractedCredential(afterReveal)) {
|
|
6276
|
+
steps.push("Existing-account recovery: revealed and extracted the freshly-minted key.");
|
|
6277
|
+
return afterReveal;
|
|
6278
|
+
}
|
|
6279
|
+
}
|
|
6280
|
+
}
|
|
6281
|
+
catch {
|
|
6282
|
+
// best-effort
|
|
6283
|
+
}
|
|
6284
|
+
return null;
|
|
6285
|
+
};
|
|
6286
|
+
// Step 1 — try on the current page first.
|
|
6287
|
+
try {
|
|
6288
|
+
const state = await this.browser.getState();
|
|
6289
|
+
visitedKeysUrls.add(state.url);
|
|
6290
|
+
if (EXISTING_KEY_URL_HINT.test(state.url)) {
|
|
6291
|
+
const here = await tryHere();
|
|
6292
|
+
if (here !== null)
|
|
6293
|
+
return here;
|
|
6294
|
+
}
|
|
6295
|
+
}
|
|
6296
|
+
catch {
|
|
6297
|
+
// best-effort — fall through to the fallback walk
|
|
6298
|
+
}
|
|
6299
|
+
// Step 2 — walk the hardcoded keys-path fallbacks. Even if Step 1
|
|
6300
|
+
// ran (current page WAS a keys page but had no affordance), a
|
|
6301
|
+
// different keys URL on the same origin may carry the create
|
|
6302
|
+
// control (org-scoped vs account-scoped keys pages).
|
|
6303
|
+
for (let i = 0; i < STUCK_LOOP_FALLBACK_PATHS.length; i++) {
|
|
6304
|
+
let currentUrl;
|
|
6305
|
+
try {
|
|
6306
|
+
currentUrl = (await this.browser.getState()).url;
|
|
6307
|
+
}
|
|
6308
|
+
catch {
|
|
6309
|
+
break;
|
|
6310
|
+
}
|
|
6311
|
+
const fallback = pickStuckLoopFallbackUrl(currentUrl, visitedKeysUrls);
|
|
6312
|
+
if (fallback === null)
|
|
6313
|
+
break;
|
|
6314
|
+
visitedKeysUrls.add(fallback);
|
|
6315
|
+
try {
|
|
6316
|
+
await this.browser.goto(fallback);
|
|
6317
|
+
await this.browser.waitForInteractiveDom(5, 15_000);
|
|
6318
|
+
}
|
|
6319
|
+
catch {
|
|
6320
|
+
continue;
|
|
6321
|
+
}
|
|
6322
|
+
const here = await tryHere();
|
|
6323
|
+
if (here !== null)
|
|
6324
|
+
return here;
|
|
6325
|
+
}
|
|
6326
|
+
return null;
|
|
6327
|
+
}
|
|
4448
6328
|
async postVerifyLoop(args) {
|
|
4449
6329
|
let credentials = await this.extractCredentials();
|
|
4450
6330
|
// 0.8.2-rc.15 — also seed DOM-proximity at loop entry. If the
|
|
@@ -4475,6 +6355,16 @@ ${formatInventory(input.inventory)}`,
|
|
|
4475
6355
|
// so the loop can bail with oauth_session_not_persisted instead of
|
|
4476
6356
|
// thrashing maxRounds and mislabeling it oauth_onboarding_failed.
|
|
4477
6357
|
let oauthLoginRequests = 0;
|
|
6358
|
+
// Consecutive rounds on an OAuth run where the page is STILL a login /
|
|
6359
|
+
// authenticate screen. The planner usually doesn't return {"kind":
|
|
6360
|
+
// "login"} here — it keeps CLICKING "Sign in with Google" (groq,
|
|
6361
|
+
// northflank, amplitude), so the oauthLoginRequests counter above
|
|
6362
|
+
// never trips. But the structural fact is decisive and service-
|
|
6363
|
+
// agnostic: after OAuth, an authenticated bot is on a dashboard, not a
|
|
6364
|
+
// login page. N consecutive login-page rounds ⇒ the callback never
|
|
6365
|
+
// persisted (anti-bot/IP rejection) ⇒ oauth_session_not_persisted, not
|
|
6366
|
+
// a navigation bug. Generalizes without per-service URLs.
|
|
6367
|
+
let consecutiveOauthLoginPageRounds = 0;
|
|
4478
6368
|
let planFailures = 0;
|
|
4479
6369
|
// 0.8.2-rc.6 — separate counter for upstream-blip retries. Doesn't
|
|
4480
6370
|
// gate planFailures (so a transient 502 won't push us into the
|
|
@@ -4491,7 +6381,7 @@ ${formatInventory(input.inventory)}`,
|
|
|
4491
6381
|
// truncated (the S3-class trap: the planner sees a key-shaped
|
|
4492
6382
|
// string and keeps asking to extract it forever), or when the
|
|
4493
6383
|
// planner's last step was rejected.
|
|
4494
|
-
let hint;
|
|
6384
|
+
let hint = args.initialHint;
|
|
4495
6385
|
// rc.27 — when the email_otp gate handler retrieved a code from
|
|
4496
6386
|
// the operator's gmail, seed the FIRST round's hint with the
|
|
4497
6387
|
// code + explicit fill+submit instructions. Cleared after one
|
|
@@ -4544,6 +6434,14 @@ ${formatInventory(input.inventory)}`,
|
|
|
4544
6434
|
// navigate produced no progress. Inject a hint forcing a CLICK
|
|
4545
6435
|
// on something visible in the current inventory.
|
|
4546
6436
|
let prevNavigateFromUrl = null;
|
|
6437
|
+
// Stalled-wizard breaker. Tracks a content signature of the page +
|
|
6438
|
+
// the effect of each executed action, so we can detect an onboarding
|
|
6439
|
+
// wizard that re-presents itself (clicks don't register) and break
|
|
6440
|
+
// out instead of burning every round on it. See isStalledOnActions.
|
|
6441
|
+
let prevContentSig = null;
|
|
6442
|
+
let lastActionKind = null;
|
|
6443
|
+
let lastActionSelector = null;
|
|
6444
|
+
const actionEffects = [];
|
|
4547
6445
|
// 0.8.2-rc.10 — escalation for the stuck-loop detector.
|
|
4548
6446
|
//
|
|
4549
6447
|
// The existing detector injects a re-plan hint when the planner
|
|
@@ -4611,6 +6509,10 @@ ${formatInventory(input.inventory)}`,
|
|
|
4611
6509
|
// Gate URLs we've already polled the operator's gmail for, so a
|
|
4612
6510
|
// multi-round wait on the same email-OTP page doesn't re-poll.
|
|
4613
6511
|
const otpPolledUrls = new Set();
|
|
6512
|
+
// Running summary of the steps the planner has taken, fed back into
|
|
6513
|
+
// each planPostVerifyStep call so the (stateless) planner stops
|
|
6514
|
+
// re-doing completed onboarding steps and re-navigating dead URLs.
|
|
6515
|
+
const priorActions = [];
|
|
4614
6516
|
for (let round = 0; round < args.maxRounds; round++) {
|
|
4615
6517
|
const currentCredentialKeyCount = Object.keys(credentials).filter((k) => !NON_CREDENTIAL_KEYS.has(k)).length;
|
|
4616
6518
|
if (currentCredentialKeyCount > lastCredentialKeyCount) {
|
|
@@ -4682,6 +6584,125 @@ ${formatInventory(input.inventory)}`,
|
|
|
4682
6584
|
await this.browser.wait(2);
|
|
4683
6585
|
continue;
|
|
4684
6586
|
}
|
|
6587
|
+
// clerk class — Google account chooser inside the post-verify loop.
|
|
6588
|
+
// The planner re-clicked "Sign in with Google", which opened
|
|
6589
|
+
// accounts.google.com's chooser (.../accountchooser?...). That page
|
|
6590
|
+
// carries a stray "Loading" label (so the hydration guard below would
|
|
6591
|
+
// burn all its ticks idling) and tryClickGoogleChooserCard is only
|
|
6592
|
+
// wired into runOAuthFlow — so nothing here clicks the account card.
|
|
6593
|
+
// Detect the chooser by URL or its "Choose an account" copy, click
|
|
6594
|
+
// the card to continue OAuth, then skip the rest of this round's
|
|
6595
|
+
// planning (the next round re-reads the post-chooser page).
|
|
6596
|
+
const chooserText = await this.browser.extractText().catch(() => "");
|
|
6597
|
+
if (/accounts\.google\.com\/.*(accountchooser|chooseaccount|oauthchooseaccount)/i.test(state.url) ||
|
|
6598
|
+
/choose an account/i.test(chooserText)) {
|
|
6599
|
+
await this.tryClickGoogleChooserCard();
|
|
6600
|
+
args.steps.push(`Post-verify round ${round}: Google account chooser — clicked the account card to continue OAuth`);
|
|
6601
|
+
await this.browser.wait(2);
|
|
6602
|
+
try {
|
|
6603
|
+
[state, inventory] = await Promise.all([
|
|
6604
|
+
this.browser.getState(),
|
|
6605
|
+
this.buildInventory(args.steps, undefined, 80),
|
|
6606
|
+
]);
|
|
6607
|
+
}
|
|
6608
|
+
catch {
|
|
6609
|
+
// mid-navigation read after the card click — the next round
|
|
6610
|
+
// re-reads, so just fall through to it.
|
|
6611
|
+
}
|
|
6612
|
+
continue;
|
|
6613
|
+
}
|
|
6614
|
+
// SPA hydration guard. A post-OAuth dashboard (northflank's
|
|
6615
|
+
// /settings/access-tokens, PostHog) can render a "Connecting"/loading
|
|
6616
|
+
// shell while its JS bundle + websocket finish — slow over a
|
|
6617
|
+
// residential tunnel. The shell often carries a stray element or two
|
|
6618
|
+
// (a logo link, the <noscript>), so gating on an EMPTY inventory
|
|
6619
|
+
// misses it; the loading-shell TEXT is the authoritative "not yet
|
|
6620
|
+
// rendered" signal. Wait while that text persists, then proceed with
|
|
6621
|
+
// whatever's there (an honest "still a shell" beats a premature done —
|
|
6622
|
+
// and if the SPA never hydrates, e.g. a blocked websocket, the bound
|
|
6623
|
+
// keeps us from hanging).
|
|
6624
|
+
//
|
|
6625
|
+
// Budget = 6x3s = 18s. MEASURED: a dashboard SPA gated on a websocket
|
|
6626
|
+
// (northflank's wss://platform.northflank.com/websocket) hydrates in
|
|
6627
|
+
// ~12-15s over the tunnel. A larger budget BACKFIRES on a page that
|
|
6628
|
+
// will NEVER hydrate (e.g. an authed user stranded on /signup): the
|
|
6629
|
+
// wait re-runs every round and burns the 600s run cap. The escape for
|
|
6630
|
+
// a never-hydrating route is navigate-to-root post-OAuth, not a longer
|
|
6631
|
+
// wait here.
|
|
6632
|
+
//
|
|
6633
|
+
// ADAPTIVE exception (MEASURED 2026-06-04, clerk): an OAuth/SSO
|
|
6634
|
+
// CALLBACK route does a token exchange that renders even slower than a
|
|
6635
|
+
// plain dashboard — clerk's `/sign-in/sso-callback` outlasts 18s and
|
|
6636
|
+
// the bot bailed at the edge with `oauth_session_not_persisted`. On a
|
|
6637
|
+
// callback route the SPA IS making progress, so 12x3s = 36s of
|
|
6638
|
+
// patience is warranted; everywhere else the 6-tick budget holds so a
|
|
6639
|
+
// genuinely-stuck route still hits the navigate-to-root escape fast.
|
|
6640
|
+
// Read the URL fresh each round (it may redirect off the callback).
|
|
6641
|
+
const HYDRATION_TICKS = isOAuthCallbackRoute(state.url) ? 12 : 6;
|
|
6642
|
+
for (let hydrationWait = 0; hydrationWait < HYDRATION_TICKS &&
|
|
6643
|
+
isLoadingShellText(await this.browser.extractText().catch(() => "")); hydrationWait++) {
|
|
6644
|
+
args.steps.push(`Post-verify round ${round}: ${pathOf(state.url)} is a loading shell ` +
|
|
6645
|
+
`(hydration wait ${hydrationWait + 1}/${HYDRATION_TICKS}) — waiting for the SPA to render`);
|
|
6646
|
+
await this.browser.wait(3);
|
|
6647
|
+
try {
|
|
6648
|
+
[state, inventory] = await Promise.all([
|
|
6649
|
+
this.browser.getState(),
|
|
6650
|
+
this.buildInventory(args.steps, undefined, 80),
|
|
6651
|
+
]);
|
|
6652
|
+
}
|
|
6653
|
+
catch {
|
|
6654
|
+
// mid-navigation read — keep the prior state/inventory and let
|
|
6655
|
+
// the next hydration tick (or the planner) retry.
|
|
6656
|
+
}
|
|
6657
|
+
}
|
|
6658
|
+
// Stalled-wizard breaker. Build a content signature (URL + each
|
|
6659
|
+
// inventory element's selector + label) and judge whether the
|
|
6660
|
+
// PREVIOUS executed action changed the page. If the last few
|
|
6661
|
+
// page-mutating actions all left the page identical, a wizard is
|
|
6662
|
+
// re-presenting itself and clicking it does nothing — stop here so
|
|
6663
|
+
// we don't waste the remaining rounds + LLM budget. (axiom: 4×
|
|
6664
|
+
// role-card re-clicks that never advanced.)
|
|
6665
|
+
const contentSig = (state.url +
|
|
6666
|
+
"§" +
|
|
6667
|
+
inventory
|
|
6668
|
+
.map((e) => `${e.selector}·${(e.visibleText ?? e.ariaLabel ?? "").slice(0, 24)}`)
|
|
6669
|
+
.join("|")).slice(0, 4000);
|
|
6670
|
+
const pageUnchanged = prevContentSig !== null && contentSig === prevContentSig;
|
|
6671
|
+
if (lastActionKind !== null) {
|
|
6672
|
+
actionEffects.push({ kind: lastActionKind, pageUnchanged, selector: lastActionSelector });
|
|
6673
|
+
}
|
|
6674
|
+
prevContentSig = contentSig;
|
|
6675
|
+
if (isStalledOnActions(actionEffects)) {
|
|
6676
|
+
args.steps.push(`Post-verify: STALLED — the last 3 page-mutating actions left the page ` +
|
|
6677
|
+
`identical (${state.url}). An onboarding wizard is re-presenting itself ` +
|
|
6678
|
+
`(clicks not registering); giving up instead of burning the round budget.`);
|
|
6679
|
+
break;
|
|
6680
|
+
}
|
|
6681
|
+
// Non-persisting-OAuth detector (A5, broadened). On an OAuth run the
|
|
6682
|
+
// bot has ALREADY authenticated before this loop, so landing on a
|
|
6683
|
+
// login page means the callback was rejected. The planner usually
|
|
6684
|
+
// keeps clicking "Sign in with Google" rather than returning a
|
|
6685
|
+
// {"kind":"login"} step, so the oauthLoginRequests counter misses
|
|
6686
|
+
// it — track the structural fact (consecutive login-page rounds)
|
|
6687
|
+
// instead. Generalizes across services (groq/northflank/amplitude)
|
|
6688
|
+
// without per-service URLs; reclassifies these off the misleading
|
|
6689
|
+
// oauth_onboarding_failed label into the truthful (and unwinnable-
|
|
6690
|
+
// without-residential-egress) oauth_session_not_persisted wall.
|
|
6691
|
+
if (args.credentials === undefined && isLoginPageUrl(state.url)) {
|
|
6692
|
+
consecutiveOauthLoginPageRounds += 1;
|
|
6693
|
+
if (consecutiveOauthLoginPageRounds >= 3) {
|
|
6694
|
+
args.steps.push(`Post-verify: OAuth run still on a login page (${pathOf(state.url)}) for ` +
|
|
6695
|
+
`${consecutiveOauthLoginPageRounds} rounds — the OAuth callback never persisted; bailing.`);
|
|
6696
|
+
throw new OAuthSessionNotPersistedError(`oauth_session_not_persisted: signed in to ${args.service} via OAuth but the page ` +
|
|
6697
|
+
`still presents a login screen (${pathOf(state.url)}) after ` +
|
|
6698
|
+
`${consecutiveOauthLoginPageRounds} rounds — the OAuth callback never established a ` +
|
|
6699
|
+
`session (anti-bot / IP rejection of the callback). Not a navigation bug; needs ` +
|
|
6700
|
+
`residential egress or manual signup.`);
|
|
6701
|
+
}
|
|
6702
|
+
}
|
|
6703
|
+
else {
|
|
6704
|
+
consecutiveOauthLoginPageRounds = 0;
|
|
6705
|
+
}
|
|
4685
6706
|
// Email-OTP gate that surfaced AFTER OAuth (the pre-OAuth signup
|
|
4686
6707
|
// gate never saw it, so pendingOtpCode is unset). Convex's
|
|
4687
6708
|
// radar-challenge sends a 6-digit code to the operator's Google
|
|
@@ -4729,6 +6750,7 @@ ${formatInventory(input.inventory)}`,
|
|
|
4729
6750
|
inventory,
|
|
4730
6751
|
...(hint !== undefined ? { hint } : {}),
|
|
4731
6752
|
...(args.scopeHint !== undefined ? { scopeHint: args.scopeHint } : {}),
|
|
6753
|
+
...(priorActions.length > 0 ? { priorActions: priorActions.slice(-10) } : {}),
|
|
4732
6754
|
});
|
|
4733
6755
|
}
|
|
4734
6756
|
catch (err) {
|
|
@@ -4789,6 +6811,17 @@ ${formatInventory(input.inventory)}`,
|
|
|
4789
6811
|
// GitHub issue, leaking the credential. Redactor patterns mirror
|
|
4790
6812
|
// tools/archived-harvester/redact.mjs — defense in depth.
|
|
4791
6813
|
args.steps.push(`Post-verify ${round + 1}/${args.maxRounds}: ${nextStep.kind} — ${redactCredentials(nextStep.reason)}`);
|
|
6814
|
+
// Feed this action back into the next round's planner context so it
|
|
6815
|
+
// doesn't loop. Concise: where we were, what we did, why.
|
|
6816
|
+
{
|
|
6817
|
+
const where = state.url.replace(/^https?:\/\//, "").slice(0, 40);
|
|
6818
|
+
const target = "selector" in nextStep && nextStep.selector !== undefined
|
|
6819
|
+
? ` ${nextStep.selector.slice(0, 24)}`
|
|
6820
|
+
: "url" in nextStep && nextStep.url !== undefined
|
|
6821
|
+
? ` →${nextStep.url.replace(/^https?:\/\//, "").slice(0, 36)}`
|
|
6822
|
+
: "";
|
|
6823
|
+
priorActions.push(`@${where} ${nextStep.kind}${target}: ${redactCredentials(nextStep.reason).slice(0, 60)}`);
|
|
6824
|
+
}
|
|
4792
6825
|
// Dump this round's real page state + inventory in the E1
|
|
4793
6826
|
// eval-corpus format so onboarding adapters can be iterated
|
|
4794
6827
|
// offline without re-running the rate-limited OAuth handshake.
|
|
@@ -5087,7 +7120,7 @@ ${formatInventory(input.inventory)}`,
|
|
|
5087
7120
|
hint = undefined;
|
|
5088
7121
|
continue;
|
|
5089
7122
|
}
|
|
5090
|
-
const fallback = pickStuckLoopFallbackUrl(state.url, triedFallbackUrls);
|
|
7123
|
+
const fallback = pickStuckLoopFallbackUrl(state.url, triedFallbackUrls, args.service);
|
|
5091
7124
|
if (fallback !== null) {
|
|
5092
7125
|
triedFallbackUrls.add(fallback);
|
|
5093
7126
|
args.steps.push(`Post-verify: stuck-loop detected ${stuckFiresAtUrl}x at ${state.url} — escalating to a hardcoded API-key URL: ${fallback}`);
|
|
@@ -5154,6 +7187,14 @@ ${formatInventory(input.inventory)}`,
|
|
|
5154
7187
|
prevSignature = null;
|
|
5155
7188
|
prevInventorySize = inventory.length;
|
|
5156
7189
|
}
|
|
7190
|
+
// Record the kind of the step we're ABOUT to execute (all re-plan
|
|
7191
|
+
// `continue` guards are behind us here) so next round can judge
|
|
7192
|
+
// whether it changed the page — the stalled-wizard breaker above.
|
|
7193
|
+
lastActionKind = nextStep.kind;
|
|
7194
|
+
lastActionSelector =
|
|
7195
|
+
"selector" in nextStep && typeof nextStep.selector === "string"
|
|
7196
|
+
? nextStep.selector
|
|
7197
|
+
: null;
|
|
5157
7198
|
if (nextStep.kind === "done") {
|
|
5158
7199
|
// When the planner bails because it encountered Google's
|
|
5159
7200
|
// device-verification challenge mid-post-verify (Algolia +
|
|
@@ -5633,24 +7674,69 @@ ${formatInventory(input.inventory)}`,
|
|
|
5633
7674
|
// correctly, the state is just unrecoverable for this identity.
|
|
5634
7675
|
const alreadyClassified = this.lastPostVerifyDoneReason !== null &&
|
|
5635
7676
|
this.lastPostVerifyDoneReason.startsWith("[");
|
|
5636
|
-
|
|
5637
|
-
|
|
5638
|
-
|
|
7677
|
+
const noCredentialYet = credentials.api_key === undefined && credentials.username === undefined;
|
|
7678
|
+
// Distinct from a generic prior classification: ONLY the existing-
|
|
7679
|
+
// account path is recoverable by minting. A [stuck_loop] / [paywall]
|
|
7680
|
+
// / [anti_bot] marker is a different failure the mint flow can't fix,
|
|
7681
|
+
// so leave those alone.
|
|
7682
|
+
const alreadyExistingAccount = this.lastPostVerifyDoneReason !== null &&
|
|
7683
|
+
this.lastPostVerifyDoneReason.startsWith("[existing_account_no_extract]");
|
|
7684
|
+
// The mint flow is appropriate for the existing-account /
|
|
7685
|
+
// already-signed-in category ONLY. A [stuck_loop] / [paywall] /
|
|
7686
|
+
// [anti_bot] marker is a different, non-recoverable failure — leave
|
|
7687
|
+
// those alone. An UNclassified exit reaching here IS the
|
|
7688
|
+
// already-signed-in case (postVerifyLoop only runs on an
|
|
7689
|
+
// authenticated dashboard — `already_oauth` / post-OAuth), so it's
|
|
7690
|
+
// eligible too; the mint flow self-gates by requiring a real keys
|
|
7691
|
+
// page + a create affordance before it acts.
|
|
7692
|
+
const mintEligible = noCredentialYet && (!alreadyClassified || alreadyExistingAccount);
|
|
7693
|
+
if (mintEligible) {
|
|
7694
|
+
// SUCCEED-EVEN-WHEN-ACCOUNT-EXISTS: before bailing, navigate to
|
|
7695
|
+
// the keys page and either extract a readable key or mint a fresh
|
|
7696
|
+
// one. A new key is a valid outcome. attemptMintNewKey returns
|
|
7697
|
+
// null when there is genuinely no create affordance anywhere (key
|
|
7698
|
+
// creation paywalled / no keys page) — then we fall through to the
|
|
7699
|
+
// honest bail.
|
|
7700
|
+
let minted = null;
|
|
5639
7701
|
try {
|
|
5640
|
-
|
|
5641
|
-
const finalText = await this.browser.extractText().catch(() => "");
|
|
5642
|
-
if (detectExistingAccountNoExtract({
|
|
5643
|
-
url: finalState.url,
|
|
5644
|
-
pageText: finalText,
|
|
5645
|
-
lastPlannerReason: this.lastPostVerifyDoneReason ?? "",
|
|
5646
|
-
})) {
|
|
5647
|
-
this.lastPostVerifyDoneReason =
|
|
5648
|
-
`[existing_account_no_extract] at ${finalState.url}; latest planner reason: ${this.lastPostVerifyDoneReason ?? "(none — loop exhausted)"}`;
|
|
5649
|
-
args.steps.push("Post-verify: classified as existing_account_no_extract — masked pre-existing key on an authenticated dashboard.");
|
|
5650
|
-
}
|
|
7702
|
+
minted = await this.attemptMintNewKey(args.steps);
|
|
5651
7703
|
}
|
|
5652
7704
|
catch {
|
|
5653
|
-
// best-effort
|
|
7705
|
+
// best-effort — degrade to the existing classifier below
|
|
7706
|
+
}
|
|
7707
|
+
if (minted !== null && hasAnyExtractedCredential(minted)) {
|
|
7708
|
+
for (const [k, v] of Object.entries(minted)) {
|
|
7709
|
+
if (credentials[k] === undefined)
|
|
7710
|
+
credentials[k] = v;
|
|
7711
|
+
}
|
|
7712
|
+
// Clear any existing-account sentinel — we recovered.
|
|
7713
|
+
if (alreadyExistingAccount)
|
|
7714
|
+
this.lastPostVerifyDoneReason = null;
|
|
7715
|
+
args.steps.push("Post-verify: existing-account / already-signed-in dashboard recovered — minted/extracted a usable key instead of bailing.");
|
|
7716
|
+
return credentials;
|
|
7717
|
+
}
|
|
7718
|
+
// Mint failed. If the masked-pre-existing-key shape is detectable
|
|
7719
|
+
// AND not already flagged by the stuck-loop early-exit, mark the
|
|
7720
|
+
// honest existing_account_no_extract bail so the caller surfaces
|
|
7721
|
+
// the precise status rather than the generic
|
|
7722
|
+
// no_credentials_after_already_signed_in.
|
|
7723
|
+
if (!alreadyExistingAccount) {
|
|
7724
|
+
try {
|
|
7725
|
+
const finalState = await this.browser.getState();
|
|
7726
|
+
const finalText = await this.browser.extractText().catch(() => "");
|
|
7727
|
+
if (detectExistingAccountNoExtract({
|
|
7728
|
+
url: finalState.url,
|
|
7729
|
+
pageText: finalText,
|
|
7730
|
+
lastPlannerReason: this.lastPostVerifyDoneReason ?? "",
|
|
7731
|
+
})) {
|
|
7732
|
+
this.lastPostVerifyDoneReason =
|
|
7733
|
+
`[existing_account_no_extract] at ${finalState.url}; latest planner reason: ${this.lastPostVerifyDoneReason ?? "(none — loop exhausted)"}`;
|
|
7734
|
+
args.steps.push("Post-verify: classified as existing_account_no_extract — pre-existing keys are masked and no key-minting affordance was found.");
|
|
7735
|
+
}
|
|
7736
|
+
}
|
|
7737
|
+
catch {
|
|
7738
|
+
// best-effort classifier — never block returning credentials
|
|
7739
|
+
}
|
|
5654
7740
|
}
|
|
5655
7741
|
}
|
|
5656
7742
|
return credentials;
|
|
@@ -5761,7 +7847,23 @@ Schema:
|
|
|
5761
7847
|
invent or guess a selector — one not in the inventory is rejected.
|
|
5762
7848
|
- If the element you want is NOT in the inventory, use {"kind":"navigate"}
|
|
5763
7849
|
to a likely settings URL instead of guessing a selector.
|
|
5764
|
-
|
|
7850
|
+
${input.priorActions !== undefined && input.priorActions.length > 0
|
|
7851
|
+
? `
|
|
7852
|
+
STEPS ALREADY TAKEN this session (most recent last). You plan ONE step
|
|
7853
|
+
at a time and do not otherwise remember earlier rounds — use this list
|
|
7854
|
+
so you do NOT loop:
|
|
7855
|
+
${input.priorActions.map((a, i) => ` ${i + 1}. ${a}`).join("\n")}
|
|
7856
|
+
- Do NOT repeat a completed onboarding-wizard step. If you already
|
|
7857
|
+
selected a role / company-size / use-case or accepted the terms, that
|
|
7858
|
+
step is DONE — move forward, never back to it.
|
|
7859
|
+
- Do NOT re-issue a {"kind":"navigate"} to a URL that already appears
|
|
7860
|
+
above and did not advance you. If a settings URL errored or bounced
|
|
7861
|
+
you back, try a DIFFERENT path or click a dashboard link instead.
|
|
7862
|
+
- If the last 3+ steps above are the same kind on the same URL with no
|
|
7863
|
+
progress, you are stuck — try a genuinely different action or return
|
|
7864
|
+
{"kind":"done"}.
|
|
7865
|
+
`
|
|
7866
|
+
: ""}
|
|
5765
7867
|
Strategy:
|
|
5766
7868
|
- If a FULL, untruncated API key is visible, return {"kind":"extract"}.
|
|
5767
7869
|
- **MULTI-CREDENTIAL SERVICES** — when the page shows TWO OR MORE
|
|
@@ -5773,10 +7875,13 @@ Strategy:
|
|
|
5773
7875
|
labels EVERY visible credential in the format
|
|
5774
7876
|
\`<canonical_label>='<value>'\` (use SINGLE quotes around values).
|
|
5775
7877
|
The bot's labeled-extractor will pull EACH labeled value into the
|
|
5776
|
-
credentials object. Example
|
|
5777
|
-
|
|
5778
|
-
|
|
5779
|
-
a
|
|
7878
|
+
credentials object. Example SHAPE (the bracketed parts are
|
|
7879
|
+
PLACEHOLDERS — you MUST substitute the REAL values visible on the
|
|
7880
|
+
CURRENT page; NEVER emit these literal bracket strings or any example
|
|
7881
|
+
values, and never name a service that is not the one you are on):
|
|
7882
|
+
"The API Keys page shows cloud_name='<real cloud_name from this page>'
|
|
7883
|
+
and api_key='<real api_key from this page>' in the table; api_secret
|
|
7884
|
+
is hidden behind a Reveal button."
|
|
5780
7885
|
Use the standard canonical labels: api_key, api_secret, secret_key,
|
|
5781
7886
|
publishable_key, access_token, client_id, client_secret, cloud_name,
|
|
5782
7887
|
application_id, admin_api_key, search_api_key, account_sid,
|
|
@@ -5794,10 +7899,11 @@ Strategy:
|
|
|
5794
7899
|
behind a Reveal button, return {"kind":"extract"} NOW for the
|
|
5795
7900
|
visible labels (the bot's labeled extractor folds them into the
|
|
5796
7901
|
credentials bundle) AND in the same reason field flag the masked
|
|
5797
|
-
credential so the bot's automatic reveal pass fires. Example
|
|
5798
|
-
|
|
5799
|
-
|
|
5800
|
-
|
|
7902
|
+
credential so the bot's automatic reveal pass fires. Example SHAPE
|
|
7903
|
+
(substitute the REAL values from the current page — the bracketed
|
|
7904
|
+
parts are placeholders, never emit them literally): "cloud_name='<real
|
|
7905
|
+
value>' and api_key='<real value>' are visible in the table;
|
|
7906
|
+
api_secret is hidden behind a Reveal button — please unmask." The masked
|
|
5801
7907
|
credential's label MUST appear with one of the trigger words
|
|
5802
7908
|
(masked / hidden / reveal / unmask / bullets / asterisks) so the
|
|
5803
7909
|
reveal pass triggers. Do this BEFORE attempting any explicit
|
|
@@ -5813,9 +7919,17 @@ Strategy:
|
|
|
5813
7919
|
capture whatever IS visible (even if just a cloud_name with no
|
|
5814
7920
|
api_secret) and return the partial bundle to the caller, which is
|
|
5815
7921
|
more useful than five wasted rounds of clicking a dead reveal.
|
|
5816
|
-
- To reach API keys,
|
|
5817
|
-
|
|
5818
|
-
|
|
7922
|
+
- To reach API keys, PREFER clicking a visible "API Keys" / "Tokens" /
|
|
7923
|
+
"Developer" / "Settings" link in the INVENTORY (a verified selector) — that
|
|
7924
|
+
always lands on the real page. Only use {"kind":"navigate"} to a GUESSED
|
|
7925
|
+
settings URL when NO such link is in the inventory, and NEVER guess the same
|
|
7926
|
+
URL twice. These pages usually live under user/ACCOUNT settings, not a
|
|
7927
|
+
project or workspace's settings.
|
|
7928
|
+
- **404 RECOVERY.** If the page is a 404 / "not found" / "page doesn't exist"
|
|
7929
|
+
/ "we couldn't find" (a guessed URL missed), do NOT retry it or guess
|
|
7930
|
+
another URL. {"kind":"navigate"} to the service's app ROOT/dashboard (the
|
|
7931
|
+
bare origin, e.g. https://app.<service>.com/) and find the API-keys link in
|
|
7932
|
+
the nav from there.
|
|
5819
7933
|
- **EXCEPT** when the page has a very small inventory (5 or fewer elements)
|
|
5820
7934
|
and one of them is an onboarding CTA — patterns like "Get started",
|
|
5821
7935
|
"Continue", "Activate", "Enable API", "Start free trial", "Set up".
|
|
@@ -5839,7 +7953,21 @@ Strategy:
|
|
|
5839
7953
|
"done" while a card-radio cluster is still visible.
|
|
5840
7954
|
${loginGuidance}
|
|
5841
7955
|
- If we're on a "verify your phone" / "verify email" wall, return done (we can't solve those).
|
|
5842
|
-
-
|
|
7956
|
+
- **EMPTY DASHBOARD — create the first resource.** Many services do NOT expose
|
|
7957
|
+
an API key until you create your first organization / project / cluster /
|
|
7958
|
+
database / service / workspace. If the dashboard shows NO existing resources
|
|
7959
|
+
(an empty state, "Create your first…", "No projects/clusters yet", "Get
|
|
7960
|
+
started by creating…", or just a lone "Create"/"New <resource>"/"+ New" CTA
|
|
7961
|
+
and nothing else useful), CLICK that CTA, then on the following rounds fill
|
|
7962
|
+
the minimal required fields (use a generated name like ts-<random> for
|
|
7963
|
+
name/slug fields, pick the first/free option for plans/regions) and confirm.
|
|
7964
|
+
The API-keys / tokens page appears only AFTER a resource exists. Do NOT
|
|
7965
|
+
return {"kind":"done"} or {"kind":"login"} on an empty dashboard while a
|
|
7966
|
+
create-resource CTA is visible — that is the path forward, not a dead end.
|
|
7967
|
+
- **Pre-filled fields are DONE — advance, don't re-touch.** If a required
|
|
7968
|
+
onboarding field (first name, company, email) is ALREADY populated, or a
|
|
7969
|
+
required selectable is ALREADY selected, do NOT re-fill/re-select it — click
|
|
7970
|
+
Continue / Next / Submit to move forward. Re-filling a satisfied field loops.
|
|
5843
7971
|
- For ANY dropdown — native (tag=select) OR a custom combobox (role=combobox / aria-haspopup=listbox, common on modern React apps like Sentry / Stripe / Vercel) — use {"kind":"select"}. "click" on a combobox trigger opens it but does not pick an option; do not click it repeatedly.
|
|
5844
7972
|
- When you need a SPECIFIC option from the dropdown — e.g. "Project: Read" on Sentry's permissions picker, or a specific region — include "option_text" with the visible label. The executor matches it case-insensitively as a substring. Omit "option_text" when any option is fine (a placeholder country picker).
|
|
5845
7973
|
- A post-OAuth onboarding form (organization name, region, terms) is normal — fill/select/check its fields and click Continue to advance toward the dashboard; do not return "done" just because it is a form.
|
|
@@ -5992,6 +8120,29 @@ ${formatInventory(input.inventory)}${input.hint !== undefined ? `\n\nIMPORTANT
|
|
|
5992
8120
|
// purpose — a "Continue with Google" / "Login with Google" /
|
|
5993
8121
|
// icon-only Google button all count when the bot has a
|
|
5994
8122
|
// provider session).
|
|
8123
|
+
// After a form submit, is the page a CONTINUATION step of the SAME signup
|
|
8124
|
+
// (amplitude's dedicated "Create your password" page is the canonical case)
|
|
8125
|
+
// rather than a dashboard, a credentials page, or a verify-your-email
|
|
8126
|
+
// screen? Returns a short label for the step trail, or null. Reused
|
|
8127
|
+
// fillValues already carry the password, so re-running planExecuteWithRetry
|
|
8128
|
+
// fills it. See isContinuationFormStep for the (conservative) signals.
|
|
8129
|
+
async detectContinuationFormStep() {
|
|
8130
|
+
let html = "";
|
|
8131
|
+
let url = "";
|
|
8132
|
+
let inventory;
|
|
8133
|
+
try {
|
|
8134
|
+
const state = await this.browser.getState();
|
|
8135
|
+
html = state.html;
|
|
8136
|
+
url = state.url;
|
|
8137
|
+
inventory = await this.browser.extractInteractiveElements();
|
|
8138
|
+
}
|
|
8139
|
+
catch {
|
|
8140
|
+
return null;
|
|
8141
|
+
}
|
|
8142
|
+
return isContinuationFormStep(html, inventory)
|
|
8143
|
+
? `password step at ${pathOf(url)}`
|
|
8144
|
+
: null;
|
|
8145
|
+
}
|
|
5995
8146
|
async looksLikeSignupPage() {
|
|
5996
8147
|
const state = await this.browser.getState();
|
|
5997
8148
|
// 1. URL-path shortcut. If we navigated to a signup-shaped path
|