@trusty-squire/mcp 0.8.16 → 0.8.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bot/agent.d.ts +33 -2
- package/dist/bot/agent.d.ts.map +1 -1
- package/dist/bot/agent.js +1747 -213
- package/dist/bot/agent.js.map +1 -1
- package/dist/bot/browser.d.ts +29 -1
- package/dist/bot/browser.d.ts.map +1 -1
- package/dist/bot/browser.js +796 -48
- package/dist/bot/browser.js.map +1 -1
- package/dist/bot/captcha-solver-2captcha.d.ts +12 -0
- package/dist/bot/captcha-solver-2captcha.d.ts.map +1 -1
- package/dist/bot/captcha-solver-2captcha.js +28 -5
- package/dist/bot/captcha-solver-2captcha.js.map +1 -1
- package/dist/bot/google-login.d.ts.map +1 -1
- package/dist/bot/google-login.js +39 -0
- package/dist/bot/google-login.js.map +1 -1
- package/dist/bot/index.d.ts +1 -1
- package/dist/bot/index.d.ts.map +1 -1
- package/dist/bot/oauth-providers.d.ts.map +1 -1
- package/dist/bot/oauth-providers.js +13 -3
- package/dist/bot/oauth-providers.js.map +1 -1
- package/dist/tools/signup-telemetry.d.ts +2 -2
- package/dist/tools/signup-telemetry.d.ts.map +1 -1
- package/dist/tools/signup-telemetry.js.map +1 -1
- package/package.json +1 -1
package/dist/bot/agent.js
CHANGED
|
@@ -15,7 +15,7 @@ import { sendTelegramHeightenedAuth } from "./telegram-notify.js";
|
|
|
15
15
|
import { TwoCaptchaSolver } from "./captcha-solver-2captcha.js";
|
|
16
16
|
import { redactCredentials } from "./redact.js";
|
|
17
17
|
import { readOperatorOtp, fromDomainFromUrl } from "./read-otp.js";
|
|
18
|
-
import { loggedInProviders, clearProviderLoggedIn } from "./login-state.js";
|
|
18
|
+
import { loggedInProviders, clearProviderLoggedIn, markProviderLoggedIn, } from "./login-state.js";
|
|
19
19
|
import { saveDebugSnapshot } from "./debug.js";
|
|
20
20
|
import { captureOnboardingRound } from "./onboarding-capture.js";
|
|
21
21
|
import { wasRecentlyPrewarmed, recordPrewarmSuccess } from "./prewarm-cache.js";
|
|
@@ -570,6 +570,72 @@ export function isGoogleSearchUrl(url) {
|
|
|
570
570
|
return false;
|
|
571
571
|
}
|
|
572
572
|
}
|
|
573
|
+
// Google's NEWER consent screen (URL form
|
|
574
|
+
// `accounts.google.com/signin/oauth/id?...&part=<opaque-token>`) hides
|
|
575
|
+
// the requested scopes behind the opaque `part=` token — there is no
|
|
576
|
+
// `scope=` query param to read, so extractOAuthScopes() returns null.
|
|
577
|
+
// The only remaining signal is the visible DOM: the consent page lists
|
|
578
|
+
// each requested item as a templated phrase. These pattern sets let us
|
|
579
|
+
// classify that DOM as basic-only vs. reaching beyond identity.
|
|
580
|
+
//
|
|
581
|
+
// BASIC = the openid/email/profile family — the exact thing the
|
|
582
|
+
// URL-readable happy path (scopesAreBasic → auto-approve) already
|
|
583
|
+
// approves without a human. We require a positive basic signal so an
|
|
584
|
+
// empty/ambiguous DOM never counts as basic.
|
|
585
|
+
const GOOGLE_BASIC_CONSENT_PHRASES = [
|
|
586
|
+
// "See your primary Google Account email address"
|
|
587
|
+
/see\s+your\s+primary\s+google\s+account\s+email\s+address/i,
|
|
588
|
+
// generic email-address grant wording
|
|
589
|
+
/\byour\s+(?:primary\s+)?(?:google\s+account\s+)?email\s+address\b/i,
|
|
590
|
+
// "See your personal info, including any personal info you've made
|
|
591
|
+
// publicly available" / "See your public profile"
|
|
592
|
+
/see\s+your\s+personal\s+info/i,
|
|
593
|
+
/your\s+public\s+profile/i,
|
|
594
|
+
// "Associate you with your personal info on Google"
|
|
595
|
+
/associate\s+you\s+with\s+your\s+personal\s+info/i,
|
|
596
|
+
];
|
|
597
|
+
// Sensitive (non-basic) scope-grant wording. Any hit means the consent
|
|
598
|
+
// reaches beyond identity — never auto-approve. Kept broad on purpose:
|
|
599
|
+
// a false "non-basic" only costs a manual review, but a missed one
|
|
600
|
+
// would auto-approve a sensitive grant.
|
|
601
|
+
const GOOGLE_NON_BASIC_CONSENT_PHRASES = [
|
|
602
|
+
/\bcontacts?\b/i,
|
|
603
|
+
/\bcalendars?\b/i,
|
|
604
|
+
/\b(?:google\s+)?drive\b/i,
|
|
605
|
+
/\byour\s+files?\b/i,
|
|
606
|
+
/\bgmail\b/i,
|
|
607
|
+
/send\s+(?:email|mail|messages)/i,
|
|
608
|
+
/\bspreadsheets?\b/i,
|
|
609
|
+
/\bsheets\b/i,
|
|
610
|
+
/\bphotos\b/i,
|
|
611
|
+
/\byoutube\b/i,
|
|
612
|
+
/\bon\s+your\s+behalf\b/i,
|
|
613
|
+
/\bmanage\s+your\b/i,
|
|
614
|
+
/\bedit\s+your\b/i,
|
|
615
|
+
/\bdelete\s+your\b/i,
|
|
616
|
+
/see\s+and\s+download\s+your/i,
|
|
617
|
+
];
|
|
618
|
+
// "basic" = the consent DOM lists ONLY openid/email/profile-family
|
|
619
|
+
// grants. See the block comment above for WHY this exists (Google hides
|
|
620
|
+
// scopes behind `part=` in the new consent URL; the visible phrases are
|
|
621
|
+
// the only signal, and a basic-only consent is what the URL-readable
|
|
622
|
+
// path auto-approves anyway). Returns false on ambiguous/empty so the
|
|
623
|
+
// caller keeps its conservative oauth_consent_needs_review abort —
|
|
624
|
+
// this gate only RECOVERS the basic-only case, never widens approval.
|
|
625
|
+
// Exported for unit testing.
|
|
626
|
+
export function googleConsentIsBasicFromDom(bodyText) {
|
|
627
|
+
// Reuse the existing danger scraper as the first backstop — if it
|
|
628
|
+
// flags any sensitive scope-grant phrase, this is not basic-only.
|
|
629
|
+
if (scrapeGoogleScopePhrases(bodyText).length > 0)
|
|
630
|
+
return false;
|
|
631
|
+
const hasNonBasic = GOOGLE_NON_BASIC_CONSENT_PHRASES.some((p) => p.test(bodyText));
|
|
632
|
+
if (hasNonBasic)
|
|
633
|
+
return false;
|
|
634
|
+
// Require a positive basic signal: an empty/ambiguous DOM (no
|
|
635
|
+
// recognizable grant wording) returns false so the caller does not
|
|
636
|
+
// approve blind.
|
|
637
|
+
return GOOGLE_BASIC_CONSENT_PHRASES.some((p) => p.test(bodyText));
|
|
638
|
+
}
|
|
573
639
|
// The set of value_kinds the planner is allowed to emit. Kept as a
|
|
574
640
|
// runtime array so validation and the exhaustive `valueFor` switch
|
|
575
641
|
// share one source of truth.
|
|
@@ -1017,6 +1083,369 @@ export function hostMatchesServiceDomain(hostname, serviceSlug) {
|
|
|
1017
1083
|
const normalized = firstLabel.replace(/[^a-z0-9]/g, "");
|
|
1018
1084
|
return normalized === serviceSlug;
|
|
1019
1085
|
}
|
|
1086
|
+
// Strip HTML tags + decode the handful of entities that show up in the
|
|
1087
|
+
// copy we key on, then lowercase. We classify on the VISIBLE COPY because
|
|
1088
|
+
// that's the only thing that reliably distinguishes a signup form from a
|
|
1089
|
+
// login form — both have an <input type="password"> and an email field,
|
|
1090
|
+
// so structure alone is ambiguous (the exact bug looksLikeSignupPage
|
|
1091
|
+
// can't see past). The decoded entities matter: "Create account" or
|
|
1092
|
+
// a "Don't have an account?" link would otherwise hide the
|
|
1093
|
+
// discriminating phrase behind an entity.
|
|
1094
|
+
function stripHtmlToText(html) {
|
|
1095
|
+
return html
|
|
1096
|
+
.replace(/<script\b[^>]*>[\s\S]*?<\/script>/gi, " ")
|
|
1097
|
+
.replace(/<style\b[^>]*>[\s\S]*?<\/style>/gi, " ")
|
|
1098
|
+
.replace(/<[^>]+>/g, " ")
|
|
1099
|
+
.replace(/ /gi, " ")
|
|
1100
|
+
.replace(/&/gi, "&")
|
|
1101
|
+
.replace(/'/gi, "'")
|
|
1102
|
+
.replace(/'/g, "'")
|
|
1103
|
+
.replace(/"/gi, '"')
|
|
1104
|
+
.replace(/\s+/g, " ")
|
|
1105
|
+
.toLowerCase();
|
|
1106
|
+
}
|
|
1107
|
+
// Classify a fetched page as a signup form, a login form, or neither.
|
|
1108
|
+
//
|
|
1109
|
+
// WHY this exists: looksLikeSignupPage() answers "does this page have a
|
|
1110
|
+
// form?" — which a LOGIN page also satisfies (email + password + a
|
|
1111
|
+
// "Continue with Google" button). The discriminator is the COPY, not the
|
|
1112
|
+
// structure: a real email-signup form carries create-account CTA text
|
|
1113
|
+
// ("create account", "sign up", "get started", "register"); a login form
|
|
1114
|
+
// carries "sign in" / "log in" / "welcome back" and lacks the create CTA.
|
|
1115
|
+
// This is the heart of the stale-URL fix — a curated /signup that
|
|
1116
|
+
// silently serves the login SPA classifies as "login" here, which lets
|
|
1117
|
+
// the resolver reject it and probe for the real signup path.
|
|
1118
|
+
export function classifySignupHtml(html, title) {
|
|
1119
|
+
const text = stripHtmlToText(html);
|
|
1120
|
+
const titleLower = (title ?? "").toLowerCase();
|
|
1121
|
+
// 404 / error shell wins regardless of stray form copy — a "not found"
|
|
1122
|
+
// title is the strongest "this isn't the page you wanted" signal.
|
|
1123
|
+
if (titleLower.includes("404") ||
|
|
1124
|
+
titleLower.includes("not found") ||
|
|
1125
|
+
titleLower.includes("page not found")) {
|
|
1126
|
+
return "other";
|
|
1127
|
+
}
|
|
1128
|
+
// A password field is the structural prerequisite for an auth form. We
|
|
1129
|
+
// regex the RAW html (not the stripped text) because attribute values
|
|
1130
|
+
// live inside the tags the stripper removes. Either the input type or a
|
|
1131
|
+
// name="password"/id="password" counts — some SPAs render the field
|
|
1132
|
+
// without an explicit type=password.
|
|
1133
|
+
const hasPassword = /type\s*=\s*["']?password["']?/i.test(html) ||
|
|
1134
|
+
/(?:name|id)\s*=\s*["']?password["']?/i.test(html);
|
|
1135
|
+
// Create-account CTA copy — the signup discriminator. "sign up" is
|
|
1136
|
+
// word-bounded so it matches "sign up" but not "designup"; "get
|
|
1137
|
+
// started" and "register" round out the common variants.
|
|
1138
|
+
const hasSignupCta = /\bcreate (?:an )?account\b/.test(text) ||
|
|
1139
|
+
/\bcreate your account\b/.test(text) ||
|
|
1140
|
+
/\bsign[\s-]?up\b/.test(text) ||
|
|
1141
|
+
/\bget started\b/.test(text) ||
|
|
1142
|
+
/\bregister\b/.test(text);
|
|
1143
|
+
// Generic login copy — present on any sign-IN form.
|
|
1144
|
+
const hasLoginCopy = /\bsign in\b/.test(text) ||
|
|
1145
|
+
/\blog[\s-]?in\b/.test(text) ||
|
|
1146
|
+
/\bwelcome back\b/.test(text);
|
|
1147
|
+
// LOGIN-DOMINANT headings: even when a "Sign up" link sits in the
|
|
1148
|
+
// footer ("Don't have an account? Sign up"), these headings mean the
|
|
1149
|
+
// PRIMARY form is login. Used to veto a false "signup" read.
|
|
1150
|
+
const loginDominant = /\bsign in to your account\b/.test(text) ||
|
|
1151
|
+
/\bwelcome back\b/.test(text) ||
|
|
1152
|
+
/\blog[\s-]?in to\b/.test(text);
|
|
1153
|
+
if (hasPassword && hasSignupCta && !loginDominant) {
|
|
1154
|
+
// Has the form AND advertises account creation, and isn't a login
|
|
1155
|
+
// page that merely links to signup — this is the page we want.
|
|
1156
|
+
return "signup";
|
|
1157
|
+
}
|
|
1158
|
+
// A login-dominant heading wins even when a stray signup link bumped
|
|
1159
|
+
// hasSignupCta (the "Don't have an account? Sign up" footer case).
|
|
1160
|
+
if (loginDominant && hasPassword) {
|
|
1161
|
+
return "login";
|
|
1162
|
+
}
|
|
1163
|
+
if (hasLoginCopy && !hasSignupCta) {
|
|
1164
|
+
// Login copy with no create-account CTA anywhere — a sign-in form.
|
|
1165
|
+
return "login";
|
|
1166
|
+
}
|
|
1167
|
+
// No password field and no clear CTA → marketing page / empty SPA shell
|
|
1168
|
+
// / 404 body. Not a form we can fill.
|
|
1169
|
+
return "other";
|
|
1170
|
+
}
|
|
1171
|
+
// Pull the email address an email-verification wall names ("check your
|
|
1172
|
+
// <addr> inbox", "we sent a link to <addr>"). Returns the first email-shaped
|
|
1173
|
+
// token, or null. Used to poll the RIGHT alias when the wall was reached
|
|
1174
|
+
// without a fresh submit (a pending account may carry an alias from a prior
|
|
1175
|
+
// run, not task.email). Exported for unit tests.
|
|
1176
|
+
export function extractVerifyWallAlias(text) {
|
|
1177
|
+
const re = /[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]{2,}/gi;
|
|
1178
|
+
let m;
|
|
1179
|
+
while ((m = re.exec(text)) !== null) {
|
|
1180
|
+
const addr = m[0];
|
|
1181
|
+
// Reject email-SHAPED asset references — raw HTML carries script/style
|
|
1182
|
+
// srcs like "amplitude-analytics-browser@2.42.4-fe68beca4b18.js" that the
|
|
1183
|
+
// pattern otherwise matches. A real verification alias never ends in a
|
|
1184
|
+
// file extension.
|
|
1185
|
+
if (/\.(?:js|mjs|css|map|png|jpe?g|svg|gif|ico|woff2?|ttf|webp)$/i.test(addr)) {
|
|
1186
|
+
continue;
|
|
1187
|
+
}
|
|
1188
|
+
return addr;
|
|
1189
|
+
}
|
|
1190
|
+
return null;
|
|
1191
|
+
}
|
|
1192
|
+
// Pure: does this post-submit page look like a CONTINUATION step of the same
|
|
1193
|
+
// signup (a dedicated "Create your password" page — amplitude's step 2 — is the
|
|
1194
|
+
// canonical case) rather than a dashboard, a credentials page, or a
|
|
1195
|
+
// verify-your-email screen? Conservative on purpose: requires a VISIBLE, EMPTY
|
|
1196
|
+
// password input the bot still needs to fill AND a create/continue-style submit
|
|
1197
|
+
// control, and the page must NOT read as a verify-your-email screen or a login
|
|
1198
|
+
// form (a "sign in" page also has a password field, but re-filling it with the
|
|
1199
|
+
// run's generated password would just fail). Exported for unit tests.
|
|
1200
|
+
export function isContinuationFormStep(html, inventory) {
|
|
1201
|
+
// A verify-your-email page is finished by the inbox poll, not re-filled.
|
|
1202
|
+
if (expectsVerificationEmail(html))
|
|
1203
|
+
return false;
|
|
1204
|
+
// A login page must not be mistaken for a signup continuation.
|
|
1205
|
+
if (classifySignupHtml(html) === "login")
|
|
1206
|
+
return false;
|
|
1207
|
+
const hasEmptyPassword = inventory.some((e) => e.tag === "input" &&
|
|
1208
|
+
e.type === "password" &&
|
|
1209
|
+
e.visible !== false &&
|
|
1210
|
+
(e.value ?? "") === "");
|
|
1211
|
+
if (!hasEmptyPassword)
|
|
1212
|
+
return false;
|
|
1213
|
+
return inventory.some((e) => {
|
|
1214
|
+
if (e.tag !== "button" && e.type !== "submit")
|
|
1215
|
+
return false;
|
|
1216
|
+
const t = `${e.visibleText ?? ""} ${e.ariaLabel ?? ""}`.toLowerCase();
|
|
1217
|
+
return /\b(?:create|continue|sign[\s-]?up|next|submit|finish|get started|done)\b/.test(t);
|
|
1218
|
+
});
|
|
1219
|
+
}
|
|
1220
|
+
// Find the in-page "create an account" affordance on a LOGIN page that
|
|
1221
|
+
// also advertises signup ("Don't have an account? Sign up for free" —
|
|
1222
|
+
// the amplitude case). After Google OAuth, such a service has signed the
|
|
1223
|
+
// identity in but has no account/org for it, and expects the in-page
|
|
1224
|
+
// signup CTA to be clicked to create one. We surface that element so the
|
|
1225
|
+
// post-OAuth recovery can click it and re-route into the email/password
|
|
1226
|
+
// signup path, instead of re-triggering OAuth in a loop.
|
|
1227
|
+
//
|
|
1228
|
+
// A login page carries BOTH a "Sign in" submit button AND a "Sign up"
|
|
1229
|
+
// link — we want the latter. Returns null when no signup affordance is
|
|
1230
|
+
// present (so callers fall through to the existing re-OAuth path).
|
|
1231
|
+
export function findSignupCtaElement(inventory) {
|
|
1232
|
+
// Signup intent: "sign up" / "sign up for free" / "create (an) account" /
|
|
1233
|
+
// "register" / "get started". Word-bounded so "signup" matches but
|
|
1234
|
+
// "designup" doesn't.
|
|
1235
|
+
const signupIntent = /\b(?:sign[\s-]?up(?:\s+for\s+free)?|create\s+(?:an?\s+)?account|register|get\s+started)\b/i;
|
|
1236
|
+
// OAuth affordances ("Continue with Google", "Sign in with GitHub") —
|
|
1237
|
+
// clicking these re-triggers the OAuth handshake, the exact loop we're
|
|
1238
|
+
// trying to escape. EXCLUDE them even though "sign in with" brushes the
|
|
1239
|
+
// loginIntent regex below.
|
|
1240
|
+
const oauthAffordance = /continue with|sign in with|log ?in with/i;
|
|
1241
|
+
// Pure login affordance ("Sign in" / "Log in") WITHOUT a signup word —
|
|
1242
|
+
// a login page's primary submit button. EXCLUDE it; we want the signup
|
|
1243
|
+
// link sitting next to it, not the sign-in button.
|
|
1244
|
+
const loginIntent = /\b(?:sign[\s-]?in|log[\s-]?in)\b/i;
|
|
1245
|
+
let best = null;
|
|
1246
|
+
for (const el of inventory) {
|
|
1247
|
+
// Only clickable affordances — an <a>, a <button>, or anything with an
|
|
1248
|
+
// explicit button role. A signup CTA is one of these; a bare <div>
|
|
1249
|
+
// label isn't reliably clickable.
|
|
1250
|
+
const isClickable = el.tag === "a" ||
|
|
1251
|
+
el.tag === "button" ||
|
|
1252
|
+
(el.role ?? "").toLowerCase() === "button";
|
|
1253
|
+
if (!isClickable)
|
|
1254
|
+
continue;
|
|
1255
|
+
const label = `${el.visibleText ?? ""} ${el.ariaLabel ?? ""}`.trim();
|
|
1256
|
+
if (label === "")
|
|
1257
|
+
continue;
|
|
1258
|
+
// EXCLUDE OAuth buttons — clicking re-OAuths (the loop we're escaping).
|
|
1259
|
+
if (oauthAffordance.test(label))
|
|
1260
|
+
continue;
|
|
1261
|
+
// Must read as a signup affordance.
|
|
1262
|
+
if (!signupIntent.test(label))
|
|
1263
|
+
continue;
|
|
1264
|
+
// EXCLUDE a pure login button — one whose label reads as sign-IN but
|
|
1265
|
+
// carries no signup word. (signupIntent already matched this element's
|
|
1266
|
+
// own label, so this guard is defensive: it drops anything that is
|
|
1267
|
+
// login-only despite a stray match.)
|
|
1268
|
+
if (loginIntent.test(label) && !signupIntent.test(label))
|
|
1269
|
+
continue;
|
|
1270
|
+
// Prefer an <a>/<button> over a role=button div — a real link/button is
|
|
1271
|
+
// the canonical signup CTA. First clickable match wins; an anchor or
|
|
1272
|
+
// button upgrades a prior role-button-div pick.
|
|
1273
|
+
if (best === null) {
|
|
1274
|
+
best = el;
|
|
1275
|
+
}
|
|
1276
|
+
else if (best.tag !== "a" &&
|
|
1277
|
+
best.tag !== "button" &&
|
|
1278
|
+
(el.tag === "a" || el.tag === "button")) {
|
|
1279
|
+
best = el;
|
|
1280
|
+
}
|
|
1281
|
+
}
|
|
1282
|
+
return best;
|
|
1283
|
+
}
|
|
1284
|
+
// True when a post-OAuth page is a read-only DEMO / sandbox the service drops
|
|
1285
|
+
// new users into (amplitude: app.amplitude.com/analytics/demo) rather than a
|
|
1286
|
+
// real account — there is no API key here, and a real org needs the page's
|
|
1287
|
+
// "Create a free account" CTA. Conservative: a `/demo` URL segment OR explicit
|
|
1288
|
+
// demo copy ("you are currently in the … demo" / "this is a demo"). Exported
|
|
1289
|
+
// for unit tests.
|
|
1290
|
+
export function isSandboxDemoState(url, bodyText) {
|
|
1291
|
+
try {
|
|
1292
|
+
const path = new URL(url).pathname.toLowerCase();
|
|
1293
|
+
if (/(?:^|\/)demo(?:\/|$)/.test(path))
|
|
1294
|
+
return true;
|
|
1295
|
+
}
|
|
1296
|
+
catch {
|
|
1297
|
+
// fall through to the text check
|
|
1298
|
+
}
|
|
1299
|
+
return /you are currently in the .{0,30}demo|this is (?:a|the) .{0,20}demo|viewing (?:the )?demo|demo (?:account|environment|workspace)\b/i.test(bodyText);
|
|
1300
|
+
}
|
|
1301
|
+
// Find the "Create a free account" CTA that escapes a demo/sandbox into the
|
|
1302
|
+
// real signup. Distinct from findSignupCtaElement because the demo phrasing
|
|
1303
|
+
// ("Create a free account") has "free" between "a" and "account", which that
|
|
1304
|
+
// helper's tighter regex doesn't match. Clickable tags only. Exported for
|
|
1305
|
+
// unit tests.
|
|
1306
|
+
export function findCreateAccountCta(inventory) {
|
|
1307
|
+
const re = /create\s+(?:a\s+)?(?:free\s+)?account|sign\s*up\s+for\s+free|get\s+started\s+for\s+free/i;
|
|
1308
|
+
for (const e of inventory) {
|
|
1309
|
+
if (e.tag !== "a" && e.tag !== "button" && e.role !== "button")
|
|
1310
|
+
continue;
|
|
1311
|
+
const text = `${e.visibleText ?? ""} ${e.ariaLabel ?? ""}`.trim();
|
|
1312
|
+
if (re.test(text))
|
|
1313
|
+
return e;
|
|
1314
|
+
}
|
|
1315
|
+
return null;
|
|
1316
|
+
}
|
|
1317
|
+
// Conventional signup paths to probe, in priority order. Small + ordered
|
|
1318
|
+
// on purpose — we want the FIRST real signup form, not a fan-out across
|
|
1319
|
+
// dozens of guesses that each cost a round-trip over a residential
|
|
1320
|
+
// tunnel. "/auth/signup" sits high because it catches the plunk case
|
|
1321
|
+
// (app.useplunk.com/auth/signup 308 → next-app.useplunk.com/auth/signup).
|
|
1322
|
+
const CONVENTIONAL_SIGNUP_PATHS = [
|
|
1323
|
+
"/signup",
|
|
1324
|
+
"/auth/signup",
|
|
1325
|
+
"/sign-up",
|
|
1326
|
+
"/register",
|
|
1327
|
+
"/users/sign_up",
|
|
1328
|
+
"/account/signup",
|
|
1329
|
+
"/join",
|
|
1330
|
+
];
|
|
1331
|
+
// Host-prefix swaps: dashboards live behind app./console./dashboard./www.,
|
|
1332
|
+
// but the signup form often lives on auth. or the bare apex. Swapping the
|
|
1333
|
+
// leading label widens the probe to those hosts without fanning out
|
|
1334
|
+
// blindly across arbitrary subdomains.
|
|
1335
|
+
const SIGNUP_HOST_PREFIX_SWAPS = [
|
|
1336
|
+
[/^app\./, "auth."],
|
|
1337
|
+
[/^www\./, "auth."],
|
|
1338
|
+
[/^console\./, "auth."],
|
|
1339
|
+
[/^dashboard\./, "auth."],
|
|
1340
|
+
];
|
|
1341
|
+
// Build the ordered, de-duped candidate URL set for the probe: every
|
|
1342
|
+
// conventional path across (the hint host, the prefix-swapped hosts, and
|
|
1343
|
+
// the bare eTLD+1). The resolver's final domain-safety check guards
|
|
1344
|
+
// against a candidate that ends up redirecting off-domain.
|
|
1345
|
+
function buildSignupCandidates(hint) {
|
|
1346
|
+
const hosts = new Set([hint.hostname]);
|
|
1347
|
+
for (const [from, to] of SIGNUP_HOST_PREFIX_SWAPS) {
|
|
1348
|
+
if (from.test(hint.hostname)) {
|
|
1349
|
+
hosts.add(hint.hostname.replace(from, to));
|
|
1350
|
+
}
|
|
1351
|
+
}
|
|
1352
|
+
const registered = getDomain(hint.hostname);
|
|
1353
|
+
if (registered !== null)
|
|
1354
|
+
hosts.add(registered);
|
|
1355
|
+
const candidates = [];
|
|
1356
|
+
const seen = new Set();
|
|
1357
|
+
// Path-major so each path is tried across all hosts before the next
|
|
1358
|
+
// path — "/signup" everywhere, then "/auth/signup" everywhere, etc.
|
|
1359
|
+
for (const path of CONVENTIONAL_SIGNUP_PATHS) {
|
|
1360
|
+
for (const host of hosts) {
|
|
1361
|
+
const url = `https://${host}${path}`;
|
|
1362
|
+
if (!seen.has(url)) {
|
|
1363
|
+
seen.add(url);
|
|
1364
|
+
candidates.push(url);
|
|
1365
|
+
}
|
|
1366
|
+
}
|
|
1367
|
+
}
|
|
1368
|
+
return candidates;
|
|
1369
|
+
}
|
|
1370
|
+
// Tier A of the signup-URL resolver — the HTTP fast-path. Given a hint URL
|
|
1371
|
+
// (curated YAML or a guess) and an injectable redirect-following fetcher,
|
|
1372
|
+
// return a URL that actually serves a signup FORM, or null if the HTTP
|
|
1373
|
+
// probe can't resolve one (the caller then escalates to the landing-page
|
|
1374
|
+
// CTA or the Google-search fallback).
|
|
1375
|
+
//
|
|
1376
|
+
// `fetchText` is injected so this is unit-testable with a fake — in
|
|
1377
|
+
// production it's bound to BrowserController.fetchText, which egresses
|
|
1378
|
+
// through the same residential proxy + cookie jar as the real navigation,
|
|
1379
|
+
// so a redirect/HTML read here is representative of what the browser would
|
|
1380
|
+
// land on. Pure-ish: no browser, no globals beyond the PSL helper.
|
|
1381
|
+
export async function resolveSignupUrlByProbe(hintUrl, serviceSlug, fetchText, log) {
|
|
1382
|
+
const note = (m) => log?.(m);
|
|
1383
|
+
let hint;
|
|
1384
|
+
try {
|
|
1385
|
+
hint = new URL(hintUrl);
|
|
1386
|
+
}
|
|
1387
|
+
catch {
|
|
1388
|
+
note(`[signup-url] hint ${hintUrl} is not a URL — skipping HTTP probe`);
|
|
1389
|
+
return null;
|
|
1390
|
+
}
|
|
1391
|
+
// Fast path: the hint itself, followed through redirects. A 308 chain
|
|
1392
|
+
// (plunk's app. → next-app.) resolves here for free.
|
|
1393
|
+
const hintRes = await fetchText(hintUrl);
|
|
1394
|
+
if (hintRes !== null && classifySignupHtml(hintRes.bodyText) === "signup") {
|
|
1395
|
+
if (hintRes.finalUrl !== hintUrl) {
|
|
1396
|
+
note(`[signup-url] hint ${hintUrl} redirected to signup ${hintRes.finalUrl}`);
|
|
1397
|
+
}
|
|
1398
|
+
else {
|
|
1399
|
+
note(`[signup-url] hint ${hintUrl} is already a signup form`);
|
|
1400
|
+
}
|
|
1401
|
+
return hintRes.finalUrl;
|
|
1402
|
+
}
|
|
1403
|
+
note(`[signup-url] hint ${hintUrl} did not classify as signup` +
|
|
1404
|
+
(hintRes === null
|
|
1405
|
+
? " (fetch failed)"
|
|
1406
|
+
: ` (${classifySignupHtml(hintRes.bodyText)})`));
|
|
1407
|
+
// The hint's registered domain (eTLD+1) is the trusted anchor — it's the
|
|
1408
|
+
// curated/guessed signup_url we were told to start from. A conventional-
|
|
1409
|
+
// path candidate is in-bounds when it stays on that SAME registered
|
|
1410
|
+
// domain, which is the robust check: the service SLUG frequently isn't
|
|
1411
|
+
// the domain label (plunk's site is useplunk.com, railway's is
|
|
1412
|
+
// railway.com), so matching the candidate against the slug wrongly
|
|
1413
|
+
// rejected legitimate same-site redirects (plunk app.→next-app.). We keep
|
|
1414
|
+
// a slug match as a secondary allowance for a curated hint that itself
|
|
1415
|
+
// points at a canonical site on a different registered domain.
|
|
1416
|
+
const hintDomain = getDomain(hint.hostname.toLowerCase());
|
|
1417
|
+
// Probe the conventional paths. The first one that BOTH classifies as a
|
|
1418
|
+
// signup form AND stays on the service's own registered domain wins. The
|
|
1419
|
+
// domain check guards against a path that redirects to a third party
|
|
1420
|
+
// (e.g. a generic SSO portal on a different registered domain).
|
|
1421
|
+
for (const candidate of buildSignupCandidates(hint)) {
|
|
1422
|
+
if (candidate === hintUrl)
|
|
1423
|
+
continue; // already tried as the hint
|
|
1424
|
+
const res = await fetchText(candidate);
|
|
1425
|
+
if (res === null)
|
|
1426
|
+
continue;
|
|
1427
|
+
if (classifySignupHtml(res.bodyText) !== "signup")
|
|
1428
|
+
continue;
|
|
1429
|
+
let finalHost;
|
|
1430
|
+
try {
|
|
1431
|
+
finalHost = new URL(res.finalUrl).hostname;
|
|
1432
|
+
}
|
|
1433
|
+
catch {
|
|
1434
|
+
continue;
|
|
1435
|
+
}
|
|
1436
|
+
const finalDomain = getDomain(finalHost.toLowerCase());
|
|
1437
|
+
const sameRegisteredDomain = hintDomain !== null && finalDomain !== null && finalDomain === hintDomain;
|
|
1438
|
+
if (!sameRegisteredDomain && !hostMatchesServiceDomain(finalHost, serviceSlug)) {
|
|
1439
|
+
note(`[signup-url] candidate ${candidate} → ${res.finalUrl} rejected: ` +
|
|
1440
|
+
`off-domain (hint domain ${hintDomain ?? "?"})`);
|
|
1441
|
+
continue;
|
|
1442
|
+
}
|
|
1443
|
+
note(`[signup-url] resolved via probe: ${candidate} → ${res.finalUrl}`);
|
|
1444
|
+
return res.finalUrl;
|
|
1445
|
+
}
|
|
1446
|
+
note(`[signup-url] no conventional signup path resolved for ${hintUrl}`);
|
|
1447
|
+
return null;
|
|
1448
|
+
}
|
|
1020
1449
|
// BUG-3 GUARD — diagnostic flag for the Inventory snapshot. Stricter
|
|
1021
1450
|
// than detectAntiBotBlock (no "cf-turnstile" / "recaptcha" raw-HTML
|
|
1022
1451
|
// matches) because the previous regex false-positive matched legitimate
|
|
@@ -1083,6 +1512,39 @@ export function detectAlreadySignedIn(args) {
|
|
|
1083
1512
|
(e.type === "email" || e.type === "password" || e.type === "tel"));
|
|
1084
1513
|
if (hasCredentialInput)
|
|
1085
1514
|
return false;
|
|
1515
|
+
// Signal 0 — a strong post-login URL path. An onboarding /
|
|
1516
|
+
// getting-started / welcome route is only reachable AFTER you're
|
|
1517
|
+
// authenticated (you cannot see a "you're all set, next steps" wizard
|
|
1518
|
+
// without a session), so the URL alone is conclusive here — unlike the
|
|
1519
|
+
// weaker dashboard paths in Signal 3, no paired creation-CTA is needed.
|
|
1520
|
+
// last9 lands the bot on /v2/organizations/<slug>/getting-started with
|
|
1521
|
+
// its Google session already active; its buttons ("Choose your region",
|
|
1522
|
+
// "You're all set! Next steps", "Upgrade Plan") matched none of the CTA
|
|
1523
|
+
// vocabularies below, so it used to bail `oauth_required` — claiming
|
|
1524
|
+
// "only OAuth/SSO signup, no email/password form" while the bot was in
|
|
1525
|
+
// fact fully signed in. The precondition above already ruled out a
|
|
1526
|
+
// signup chooser (no credential input).
|
|
1527
|
+
// ...UNLESS the page still presents a signup/OAuth chooser (a
|
|
1528
|
+
// "Continue with Google" button or a bare "Sign up"/"Log in"). Some
|
|
1529
|
+
// services route the login chooser through an /onboarding-style URL; if
|
|
1530
|
+
// a provider button is visible, the bot must OAuth via it, not treat the
|
|
1531
|
+
// page as already-authenticated. (PostHog TS-1923.)
|
|
1532
|
+
const hasSignupAffordance = inventory.some((e) => {
|
|
1533
|
+
const t = `${e.visibleText ?? ""} ${e.ariaLabel ?? ""}`
|
|
1534
|
+
.toLowerCase()
|
|
1535
|
+
.replace(/\s+/g, " ")
|
|
1536
|
+
.trim();
|
|
1537
|
+
return (/\b(?:continue with|sign ?up with|sign ?in with|log ?in with|with (?:google|github|gitlab|microsoft|apple))\b/.test(t) || /^(?:sign ?up|sign ?in|log ?in|create (?:an )?account)$/.test(t));
|
|
1538
|
+
});
|
|
1539
|
+
try {
|
|
1540
|
+
if (!hasSignupAffordance &&
|
|
1541
|
+
/\/(?:getting-started|get-started|onboarding|welcome)(?:\/|$)/i.test(new URL(url).pathname)) {
|
|
1542
|
+
return true;
|
|
1543
|
+
}
|
|
1544
|
+
}
|
|
1545
|
+
catch {
|
|
1546
|
+
// malformed URL — fall through to the other signals
|
|
1547
|
+
}
|
|
1086
1548
|
const visibleTextOf = (e) => `${e.visibleText ?? ""} ${e.ariaLabel ?? ""}`.trim();
|
|
1087
1549
|
// Signal 1 — strict nav-keyword match (the canonical Sentry-class case).
|
|
1088
1550
|
const AUTH_KEYWORDS = /^\s*(?:sign out|log out|dashboard|projects|settings|profile|my account|account settings|workspaces)\s*$/i;
|
|
@@ -1326,13 +1788,25 @@ export function findOAuthButton(inventory, provider) {
|
|
|
1326
1788
|
const href = (e.href ?? "").toLowerCase();
|
|
1327
1789
|
if (href.length > 0 && hrefRe.test(href))
|
|
1328
1790
|
return e;
|
|
1329
|
-
// 2. Icon-only button — named only by a descendant img/svg.
|
|
1330
|
-
//
|
|
1331
|
-
//
|
|
1332
|
-
//
|
|
1333
|
-
//
|
|
1334
|
-
|
|
1335
|
-
|
|
1791
|
+
// 2. Icon-only (logo) button — named only by a descendant img/svg.
|
|
1792
|
+
// Truly-empty visibleText is the clean case. But a logo button whose
|
|
1793
|
+
// <svg> carries a <title>GitHub</title> LEAKS that title into
|
|
1794
|
+
// textContent (northflank renders "GitHubGitHub" — doubled, which
|
|
1795
|
+
// also defeats the \bgithub\b match in path 3), so it isn't strictly
|
|
1796
|
+
// empty. Treat it as icon-only too WHEN its visible text is nothing
|
|
1797
|
+
// but the provider name (any number of times): strip every keyword
|
|
1798
|
+
// occurrence and require no residue. A nav link like "GitHub's
|
|
1799
|
+
// Privacy Policy" leaves residue and is correctly rejected. The
|
|
1800
|
+
// iconLabel must still independently name the provider, so a stray
|
|
1801
|
+
// one-word label can't false-positive.
|
|
1802
|
+
const kw = keyword.toLowerCase();
|
|
1803
|
+
const residue = visibleText
|
|
1804
|
+
.toLowerCase()
|
|
1805
|
+
.split(kw)
|
|
1806
|
+
.join("")
|
|
1807
|
+
.replace(/[\s·|/–-]+/g, "");
|
|
1808
|
+
const isLogoOnly = visibleText.length === 0 || residue.length === 0;
|
|
1809
|
+
if (isLogoOnly && keywordRe.test((e.iconLabel ?? "").toLowerCase())) {
|
|
1336
1810
|
return e;
|
|
1337
1811
|
}
|
|
1338
1812
|
// 3. Visible text / accessible label naming the provider + an
|
|
@@ -1344,7 +1818,16 @@ export function findOAuthButton(inventory, provider) {
|
|
|
1344
1818
|
.trim();
|
|
1345
1819
|
if (!keywordRe.test(text))
|
|
1346
1820
|
continue;
|
|
1347
|
-
|
|
1821
|
+
// "with <provider>" is the OAuth-button idiom and is accepted
|
|
1822
|
+
// directly — it survives an SVG accessible name glued to the verb.
|
|
1823
|
+
// elevenlabs renders its button text as "GoogleSign up with Google",
|
|
1824
|
+
// which fuses "sign" into "googlesign" so the bare \bsign\b check
|
|
1825
|
+
// misses, but "with google" still matches. (A blanket camelCase split
|
|
1826
|
+
// can't be used to un-glue it — it would mangle the provider name
|
|
1827
|
+
// itself, e.g. "GitHub" → "Git Hub".)
|
|
1828
|
+
const withProviderRe = new RegExp(`\\bwith ${keyword}\\b`);
|
|
1829
|
+
if (/\b(sign|signup|signin|continue|log ?in|connect|auth)\b/.test(text) ||
|
|
1830
|
+
withProviderRe.test(text)) {
|
|
1348
1831
|
return e;
|
|
1349
1832
|
}
|
|
1350
1833
|
// rc.39 — minimal-label OAuth buttons. Some auth UIs render the
|
|
@@ -1424,15 +1907,24 @@ export function isLoginLoopState(url, inventory, provider) {
|
|
|
1424
1907
|
// loop-detect path saw the Google button + the login-shaped URL
|
|
1425
1908
|
// and looped OAuth indefinitely.
|
|
1426
1909
|
//
|
|
1427
|
-
// When
|
|
1428
|
-
//
|
|
1429
|
-
//
|
|
1430
|
-
//
|
|
1431
|
-
//
|
|
1432
|
-
//
|
|
1433
|
-
//
|
|
1434
|
-
|
|
1435
|
-
|
|
1910
|
+
// When a PASSWORD input is visible alongside (2) an OAuth button for
|
|
1911
|
+
// the provider we just used, the page is a genuine hybrid
|
|
1912
|
+
// credential-creation form (Clerk/Auth0: email + password [+ turnstile]),
|
|
1913
|
+
// not a loop. Return null so the caller falls through to the
|
|
1914
|
+
// post-verify flow — its planner drives the form-fill, the captcha
|
|
1915
|
+
// gate, and the Continue click the same way the form-fill phase does.
|
|
1916
|
+
//
|
|
1917
|
+
// A BARE EMAIL field does NOT count: it's the near-universal "or
|
|
1918
|
+
// continue with email" magic-link/OTP alternative that sits next to
|
|
1919
|
+
// the OAuth buttons on an ordinary login page (groq's /authenticate,
|
|
1920
|
+
// northflank's /login, …). Treating that as a hybrid form suppressed
|
|
1921
|
+
// the login-loop OAuth retry these services REQUIRE — they finalize
|
|
1922
|
+
// the Stytch/WorkOS session only on a second OAuth click — and
|
|
1923
|
+
// stranded them at oauth_session_not_persisted. The email-OTP case
|
|
1924
|
+
// that genuinely needs the planner is caught separately downstream
|
|
1925
|
+
// (detectEmailOtpGate), so narrowing to password here is safe.
|
|
1926
|
+
const hasPasswordInput = inventory.some((e) => e.tag === "input" && e.type === "password");
|
|
1927
|
+
if (hasPasswordInput)
|
|
1436
1928
|
return null;
|
|
1437
1929
|
return findOAuthButton(inventory, provider);
|
|
1438
1930
|
}
|
|
@@ -1504,6 +1996,47 @@ export function detectSsoRestriction(pageText) {
|
|
|
1504
1996
|
// "Single Sign-On is required", "SSO organization membership".
|
|
1505
1997
|
return /(?:managed\s+via\s+(?:sso|single\s+sign-?on)|sso[\s-]?managed|sso\s+organization|single\s+sign-?on\s+is\s+required|enforced\s+by\s+(?:sso|saml))/.test(lower);
|
|
1506
1998
|
}
|
|
1999
|
+
// Google-OAuth-is-LOGIN-ONLY (plunk class). Some services accept Google
|
|
2000
|
+
// only to log an EXISTING account in; they do NOT auto-provision a new
|
|
2001
|
+
// account for a first-time Google identity. The OAuth handshake
|
|
2002
|
+
// completes, then the service bounces back to its login page with an
|
|
2003
|
+
// explicit "no account" message — e.g. plunk lands on
|
|
2004
|
+
// `…/auth/login?message=No%20account%20found%20for%20this%20Google%20account`.
|
|
2005
|
+
//
|
|
2006
|
+
// WHY a dedicated detector: this state otherwise trips
|
|
2007
|
+
// detectManualLoginFallback (it IS a /login form) and aborts as
|
|
2008
|
+
// `oauth_session_not_persisted` — misleading, because nothing dropped
|
|
2009
|
+
// the session; the account simply was never created. The correct
|
|
2010
|
+
// recovery is to abandon OAuth and create the account via the
|
|
2011
|
+
// email/password form. Caller re-routes to form-fill on a true return.
|
|
2012
|
+
//
|
|
2013
|
+
// Conservative by design: matches the URL query AND body text against
|
|
2014
|
+
// CLEAR no-account / must-sign-up phrasing. A normal consent page or a
|
|
2015
|
+
// post-login dashboard (which never carries these phrases) must NOT
|
|
2016
|
+
// match, or we'd wrongly abandon a working OAuth session.
|
|
2017
|
+
export function detectGoogleNoAccount(url, bodyText) {
|
|
2018
|
+
// Inspect the decoded query string (where plunk parks its message)
|
|
2019
|
+
// plus the page body — both lowercased for case-insensitive matching.
|
|
2020
|
+
let query = "";
|
|
2021
|
+
try {
|
|
2022
|
+
const u = new URL(url);
|
|
2023
|
+
query = decodeURIComponent(u.search).toLowerCase();
|
|
2024
|
+
}
|
|
2025
|
+
catch {
|
|
2026
|
+
query = "";
|
|
2027
|
+
}
|
|
2028
|
+
const haystack = `${query}\n${bodyText.toLowerCase()}`;
|
|
2029
|
+
// MEASURED 2026-06-04 (clerk): after Google OAuth, clerk bounces to its
|
|
2030
|
+
// sign-in showing "The External Account was not found" — Google signed
|
|
2031
|
+
// in but no clerk account exists for this identity (same class as plunk's
|
|
2032
|
+
// "No account found"). The added "…not found" / "couldn't find an
|
|
2033
|
+
// account" / "no such account" variants below catch clerk's wording.
|
|
2034
|
+
// Every phrase still requires the word "account" (or "external account"),
|
|
2035
|
+
// so a bare 404 "Page not found" does NOT trip this and abandon a working
|
|
2036
|
+
// OAuth session.
|
|
2037
|
+
const noAccountPhrase = /no account found|external account was not found|account (?:was )?not found|no (?:such )?account (?:found|exists)|account (?:doesn['’]?t|does not) exist|couldn['’]?t find (?:an|your) account|no account associated|sign up (?:first|to continue)|create an account|[?&]google-auth-error|register first/;
|
|
2038
|
+
return noAccountPhrase.test(haystack);
|
|
2039
|
+
}
|
|
1507
2040
|
// (d) Stuck-on-Google-OAuth-screens (Upstash class). After
|
|
1508
2041
|
// settleAfterOAuth the URL is STILL on accounts.google.com — the
|
|
1509
2042
|
// handshake didn't redirect through to the service. Most common
|
|
@@ -1523,6 +2056,25 @@ export function detectStuckOnGoogleOAuth(url) {
|
|
|
1523
2056
|
return false;
|
|
1524
2057
|
}
|
|
1525
2058
|
}
|
|
2059
|
+
// Is the current URL an OAuth/SSO CALLBACK route — the redirect target
|
|
2060
|
+
// where the SPA exchanges the provider code for a session? MEASURED
|
|
2061
|
+
// 2026-06-04: clerk's `/sign-in/sso-callback` does a token exchange that
|
|
2062
|
+
// renders even slower than its already-slow dashboard (~15s over the
|
|
2063
|
+
// residential proxy). On a callback route the SPA IS making progress, so
|
|
2064
|
+
// the post-verify hydration loop grants it a larger budget; on every
|
|
2065
|
+
// other route the smaller budget holds (a never-hydrating page must not
|
|
2066
|
+
// burn the run cap). Matches on the pathname only (PSL-safe via URL parse,
|
|
2067
|
+
// try/catch → false for non-URLs).
|
|
2068
|
+
export function isOAuthCallbackRoute(url) {
|
|
2069
|
+
let pathname = "";
|
|
2070
|
+
try {
|
|
2071
|
+
pathname = new URL(url).pathname;
|
|
2072
|
+
}
|
|
2073
|
+
catch {
|
|
2074
|
+
return false;
|
|
2075
|
+
}
|
|
2076
|
+
return /\/sso-callback|\/oauth\/callback|\/auth\/callback|\/callback(?:\/|$)|\/login\/callback/i.test(pathname);
|
|
2077
|
+
}
|
|
1526
2078
|
// Scan the inventory for the first OAuth affordance among `providers`,
|
|
1527
2079
|
// in order — the auto-prefer decision passes every provider the
|
|
1528
2080
|
// profile has a session for. Returns the matched provider + element.
|
|
@@ -1581,16 +2133,27 @@ export function findSignInAdvanceButton(inventory, providers) {
|
|
|
1581
2133
|
// actually has a session for. `findFirstOAuthButton` walks this list in
|
|
1582
2134
|
// order and uses the first provider the PAGE offers, so order = preference.
|
|
1583
2135
|
//
|
|
1584
|
-
//
|
|
1585
|
-
//
|
|
1586
|
-
//
|
|
1587
|
-
//
|
|
1588
|
-
//
|
|
1589
|
-
//
|
|
1590
|
-
//
|
|
1591
|
-
//
|
|
2136
|
+
// RULE 1 — respect an explicit pin when its session is warm. The operator
|
|
2137
|
+
// pins a provider for a reason the bot can't see from the page: e.g.
|
|
2138
|
+
// northflank surfaces Google only as on-demand One-Tap (a FedCM widget the
|
|
2139
|
+
// redirect flow can't drive) while its GitHub button is a clean redirect, so
|
|
2140
|
+
// the service is pinned github. Leading with the warm pin honors that, with
|
|
2141
|
+
// the OTHER warm provider kept as a fallback for pages that only render it.
|
|
2142
|
+
// (This became safe once `login` was fixed to establish the session through
|
|
2143
|
+
// the bot's egress proxy — a warm GitHub session no longer dies on an IP
|
|
2144
|
+
// jump, so it doesn't hit the /authorize 2FA wall the way a stale one did.)
|
|
2145
|
+
//
|
|
2146
|
+
// RULE 2 — with NO pin, Google leads when present: empirically its OAuth
|
|
2147
|
+
// blocks less hard than a cold GitHub flow.
|
|
1592
2148
|
export function orderOAuthCandidates(pinned, loggedIn) {
|
|
1593
2149
|
if (pinned !== undefined) {
|
|
2150
|
+
if (loggedIn.includes(pinned)) {
|
|
2151
|
+
const others = loggedIn
|
|
2152
|
+
.filter((p) => p !== pinned)
|
|
2153
|
+
.sort((a, b) => (a === "google" ? -1 : b === "google" ? 1 : 0));
|
|
2154
|
+
return [pinned, ...others];
|
|
2155
|
+
}
|
|
2156
|
+
// Pin's session isn't warm — fall back to whatever IS (Google preferred).
|
|
1594
2157
|
if (pinned !== "google" && loggedIn.includes("google"))
|
|
1595
2158
|
return ["google", pinned];
|
|
1596
2159
|
return [pinned];
|
|
@@ -2228,12 +2791,174 @@ export function pickVerificationLink(links) {
|
|
|
2228
2791
|
const top = scored[0];
|
|
2229
2792
|
return top !== undefined && top.score > 0 ? top.url : null;
|
|
2230
2793
|
}
|
|
2794
|
+
// Pick a verification link by its ANCHOR TEXT in the email HTML — the fallback
|
|
2795
|
+
// when pickVerificationLink (which scores the URL) fails because the link is
|
|
2796
|
+
// wrapped in a click-tracker that hides the keyword behind a redirect. MEASURED
|
|
2797
|
+
// on amplitude (2026-06-04): its "Activate account" link is a
|
|
2798
|
+
// u…ct.sendgrid.net/ls/click?upn=… URL (no "activate" in the URL), so the
|
|
2799
|
+
// URL scorer returned null and the bot fell to a false-positive "code" (the
|
|
2800
|
+
// year "2025"). The anchor TEXT still reads "Activate account". Pure + exported
|
|
2801
|
+
// for unit tests.
|
|
2802
|
+
export function pickVerificationLinkFromHtml(bodyHtml) {
|
|
2803
|
+
const anchorRe = /<a\b[^>]*href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/gi;
|
|
2804
|
+
let best = null;
|
|
2805
|
+
let m;
|
|
2806
|
+
while ((m = anchorRe.exec(bodyHtml)) !== null) {
|
|
2807
|
+
const href = (m[1] ?? "").replace(/&/g, "&");
|
|
2808
|
+
if (!/^https?:\/\//i.test(href))
|
|
2809
|
+
continue;
|
|
2810
|
+
const text = (m[2] ?? "")
|
|
2811
|
+
.replace(/<[^>]+>/g, " ")
|
|
2812
|
+
.replace(/&[a-z]+;/gi, " ")
|
|
2813
|
+
.replace(/\s+/g, " ")
|
|
2814
|
+
.trim()
|
|
2815
|
+
.toLowerCase();
|
|
2816
|
+
let score = 0;
|
|
2817
|
+
if (/\b(?:verify|confirm|activate)\b/.test(text))
|
|
2818
|
+
score += 10;
|
|
2819
|
+
if (/verify (?:your )?email|confirm (?:your )?email|activate (?:your )?account|complete (?:your )?sign[\s-]?up/.test(text)) {
|
|
2820
|
+
score += 5;
|
|
2821
|
+
}
|
|
2822
|
+
if (/get started|finish setting up/.test(text))
|
|
2823
|
+
score += 3;
|
|
2824
|
+
if (/unsubscribe|preferences|manage|view (?:in|this) (?:browser|email)|privacy|terms/.test(text)) {
|
|
2825
|
+
score -= 10;
|
|
2826
|
+
}
|
|
2827
|
+
if (score > (best?.score ?? 0))
|
|
2828
|
+
best = { url: href, score };
|
|
2829
|
+
}
|
|
2830
|
+
return best !== null && best.score > 0 ? best.url : null;
|
|
2831
|
+
}
|
|
2231
2832
|
// Discriminates LLMPair from LLMClient. LLMPair has `primary` (an
|
|
2232
2833
|
// LLMClient); LLMClient has `createMessage`. They're mutually exclusive
|
|
2233
2834
|
// shapes so a structural check is reliable.
|
|
2234
2835
|
function isLLMPair(x) {
|
|
2235
2836
|
return "primary" in x && typeof x.primary === "object" && x.primary !== null;
|
|
2236
2837
|
}
|
|
2838
|
+
// True when the last `threshold` executed ACTIONS (click/select/check/
|
|
2839
|
+
// fill — steps meant to mutate the page) each left the page content
|
|
2840
|
+
// UNCHANGED. That is the signature of a broken onboarding wizard that
|
|
2841
|
+
// re-presents itself no matter what the bot clicks (the axiom case,
|
|
2842
|
+
// measured 2026-06-03): the planner keeps correctly reacting to a
|
|
2843
|
+
// visibly-unfilled form, but the click never registers, so without this
|
|
2844
|
+
// the run burns all 24 rounds + LLM budget re-clicking the same card.
|
|
2845
|
+
// Navigates / waits / extracts are excluded — they legitimately don't
|
|
2846
|
+
// change the current DOM (navigate changes URL, wait pauses). Pure +
|
|
2847
|
+
// exported for unit tests.
|
|
2848
|
+
export function isStalledOnActions(effects, threshold = 3) {
|
|
2849
|
+
if (effects.length < threshold)
|
|
2850
|
+
return false;
|
|
2851
|
+
const ACTION_KINDS = new Set(["click", "select", "check", "fill"]);
|
|
2852
|
+
const recent = effects.slice(-threshold);
|
|
2853
|
+
if (!recent.every((e) => ACTION_KINDS.has(e.kind) && e.pageUnchanged)) {
|
|
2854
|
+
return false;
|
|
2855
|
+
}
|
|
2856
|
+
// A genuine stall RE-acts on the SAME element (the planner keeps clicking
|
|
2857
|
+
// one card whose click never registers). Acting on DISTINCT selectors is
|
|
2858
|
+
// PROGRESS through a multi-field wizard — selecting role, then company
|
|
2859
|
+
// size, then a plan doesn't change the inventory, but each is a different
|
|
2860
|
+
// choice (axiom). Only call it stalled when a selector REPEATS (fewer
|
|
2861
|
+
// distinct selectors than actions). All-distinct → let the wizard finish.
|
|
2862
|
+
const selectors = recent.map((e) => e.selector ?? "");
|
|
2863
|
+
const distinct = new Set(selectors).size;
|
|
2864
|
+
// If selectors weren't recorded (older callers pass none), fall back to the
|
|
2865
|
+
// original kind+unchanged behavior so existing tests/paths don't regress.
|
|
2866
|
+
const anyRecorded = recent.some((e) => e.selector !== undefined);
|
|
2867
|
+
if (!anyRecorded)
|
|
2868
|
+
return true;
|
|
2869
|
+
return distinct < threshold;
|
|
2870
|
+
}
|
|
2871
|
+
// True when a URL reads as a login / authentication screen. Service-
|
|
2872
|
+
// agnostic (path-based, no per-service hosts) — used to detect a
|
|
2873
|
+
// non-persisting OAuth session: after a successful OAuth, an
|
|
2874
|
+
// authenticated bot lands on a dashboard, not a login page. Pure +
|
|
2875
|
+
// exported for tests.
|
|
2876
|
+
export function isLoginPageUrl(url) {
|
|
2877
|
+
try {
|
|
2878
|
+
const path = new URL(url).pathname.toLowerCase();
|
|
2879
|
+
if (/(?:^|\/)(?:login|signin|sign-in|authenticate|sso)(?:\/|$)/.test(path)) {
|
|
2880
|
+
return true;
|
|
2881
|
+
}
|
|
2882
|
+
}
|
|
2883
|
+
catch {
|
|
2884
|
+
return false;
|
|
2885
|
+
}
|
|
2886
|
+
// Some providers keep the path stable but flag the failed auth in the
|
|
2887
|
+
// query (amplitude: /login?google-auth-error=…).
|
|
2888
|
+
return /[?&]google-auth-error\b/i.test(url);
|
|
2889
|
+
}
|
|
2890
|
+
// A pre-account route (signup OR login OR register) — the set of paths an
|
|
2891
|
+
// AUTHENTICATED user has no business sitting on. Broader than
|
|
2892
|
+
// isLoginPageUrl (which is tuned for the OAuth-callback-loop detector and
|
|
2893
|
+
// deliberately excludes /signup). Used for the post-OAuth dead-route
|
|
2894
|
+
// escape. Exported for unit tests.
|
|
2895
|
+
export function isSignupOrLoginRoute(url) {
|
|
2896
|
+
try {
|
|
2897
|
+
const path = new URL(url).pathname.toLowerCase();
|
|
2898
|
+
return /(?:^|\/)(?:login|signin|sign-in|sign[_-]?up|signup|register|authenticate|sso)(?:\/|$)/.test(path);
|
|
2899
|
+
}
|
|
2900
|
+
catch {
|
|
2901
|
+
return false;
|
|
2902
|
+
}
|
|
2903
|
+
}
|
|
2904
|
+
// The scheme://host root of a URL (no path/query) — the place a service
|
|
2905
|
+
// redirects an authenticated user to their dashboard. Null on a malformed
|
|
2906
|
+
// URL. Exported for unit tests.
|
|
2907
|
+
export function originRoot(url) {
|
|
2908
|
+
try {
|
|
2909
|
+
return new URL(url).origin + "/";
|
|
2910
|
+
}
|
|
2911
|
+
catch {
|
|
2912
|
+
return null;
|
|
2913
|
+
}
|
|
2914
|
+
}
|
|
2915
|
+
// A modern SPA dashboard often paints a "Connecting…" / "Loading…" shell
|
|
2916
|
+
// (plus the static <noscript> "enable JavaScript" fallback) for a beat
|
|
2917
|
+
// while its JS bundle and websocket finish — especially over a
|
|
2918
|
+
// high-latency residential tunnel. During that window the page has ZERO
|
|
2919
|
+
// interactive elements. northflank's /settings/access-tokens lands on
|
|
2920
|
+
// exactly this shell post-OAuth; the post-verify planner reads the empty
|
|
2921
|
+
// inventory and concludes {"kind":"done","no elements"} ~2s in, abandoning
|
|
2922
|
+
// a page that was about to render the token UI. Detect the shell so the
|
|
2923
|
+
// caller can wait for hydration instead of giving up. Matched ONLY
|
|
2924
|
+
// alongside an empty inventory, so the narrow phrasing here won't swallow
|
|
2925
|
+
// a real dashboard that merely contains the word "loading". Exported for
|
|
2926
|
+
// unit tests.
|
|
2927
|
+
export function isLoadingShellText(text) {
|
|
2928
|
+
// The Google account chooser ("Choose an account to continue to <App>")
|
|
2929
|
+
// carries a stray "Loading" label but is an ACTIONABLE page, not a
|
|
2930
|
+
// hydration shell — the clerk post-verify loop must click the account
|
|
2931
|
+
// card, not idle through the hydration-wait ticks. Veto the shell read
|
|
2932
|
+
// before the generic "loading" match below can fire on it.
|
|
2933
|
+
if (/choose an account/i.test(text))
|
|
2934
|
+
return false;
|
|
2935
|
+
// ONLY transient "still rendering" copy. The <noscript> fallback
|
|
2936
|
+
// ("This application cannot function without JavaScript…") is PERMANENT
|
|
2937
|
+
// in the DOM and was matched here by mistake — it made northflank (whose
|
|
2938
|
+
// noscript text never leaves the body) read as a perpetual loading shell,
|
|
2939
|
+
// so the hydration waits never exited. JS-enabled pages keep that text
|
|
2940
|
+
// forever, so it is not a signal.
|
|
2941
|
+
return /\bconnecting\b|\bloading\b|please wait|getting things ready|initiali[sz]ing/i.test(text);
|
|
2942
|
+
}
|
|
2943
|
+
// Transient "the session is being established RIGHT NOW" copy. MEASURED on
|
|
2944
|
+
// groq (Stytch B2B): after the OAuth callback, /authenticate shows
|
|
2945
|
+
// "Logging in…" then "Creating your organization…" for ~5-7s of async
|
|
2946
|
+
// discovery+org-creation+session calls before redirecting to the dashboard.
|
|
2947
|
+
// Interrupting that window (navigating away, or — worse — re-clicking the
|
|
2948
|
+
// OAuth button) ABORTS the org creation and the session never finalizes,
|
|
2949
|
+
// which is exactly how the bot was failing groq. When this text is present
|
|
2950
|
+
// the bot must WAIT, never act. Generalizes to any async-session auth
|
|
2951
|
+
// (Stytch / WorkOS / Auth0 org provisioning). Exported for unit tests.
|
|
2952
|
+
export function isAuthProcessingText(text) {
|
|
2953
|
+
return /logging in|signing in|creating your organization|creating your account|setting up your account|authenticating|finishing (?:sign|log)|redirecting you|one moment/i.test(text);
|
|
2954
|
+
}
|
|
2955
|
+
// Sentinel returned by runOAuthFlow when the OAuth path is a dead end
|
|
2956
|
+
// that the email/password form-fill path can still recover (Google
|
|
2957
|
+
// login-only services that never created an account — see
|
|
2958
|
+
// detectGoogleNoAccount). runSignup catches it and re-runs the form-fill
|
|
2959
|
+
// path with OAuth-first suppressed. A unique const so it can't collide
|
|
2960
|
+
// with any SignupResult.error string.
|
|
2961
|
+
const OAUTH_FALL_BACK_TO_FORM_FILL = "__fall_back_to_form_fill__";
|
|
2237
2962
|
export class SignupAgent {
|
|
2238
2963
|
browser;
|
|
2239
2964
|
// Per-run counter so a single SignupAgent (which lives one run) can't
|
|
@@ -2326,7 +3051,13 @@ export class SignupAgent {
|
|
|
2326
3051
|
}
|
|
2327
3052
|
else if (detected.variant === "recaptcha_v3") {
|
|
2328
3053
|
this.invisibleCaptcha = { kind: "recaptcha", variant: "recaptcha_v3" };
|
|
2329
|
-
|
|
3054
|
+
// Invisible reCAPTCHA scores in the background, but its token is only
|
|
3055
|
+
// minted when grecaptcha.execute() runs — and a form like amplitude's
|
|
3056
|
+
// REQUIRES that token to submit. Mint it now (passes on our ~1.0
|
|
3057
|
+
// score) so the imminent submit carries a valid g-recaptcha-response,
|
|
3058
|
+
// instead of submitting with an empty token and silently no-op'ing.
|
|
3059
|
+
const minted = await this.browser.triggerInvisibleRecaptcha();
|
|
3060
|
+
steps.push(`${label} captcha: invisible reCAPTCHA v3 — ${minted ? "minted score token via grecaptcha.execute()" : "badge present, token not minted (form may submit it itself)"}`);
|
|
2330
3061
|
}
|
|
2331
3062
|
}
|
|
2332
3063
|
return { found: false, solved: false, blocked: false, kind: "turnstile" };
|
|
@@ -2344,7 +3075,15 @@ export class SignupAgent {
|
|
|
2344
3075
|
result.kind === "recaptcha" &&
|
|
2345
3076
|
this.captchaSolver?.isAvailable() === true) {
|
|
2346
3077
|
const sitekey = await this.browser.extractRecaptchaSitekey();
|
|
2347
|
-
if (sitekey
|
|
3078
|
+
if (sitekey === null) {
|
|
3079
|
+
// result.kind said "recaptcha" but no key with the reCAPTCHA `6L`
|
|
3080
|
+
// format is on the page — almost always an hCaptcha/Turnstile
|
|
3081
|
+
// widget misbucketed by the host-input heuristic. 2Captcha's
|
|
3082
|
+
// reCAPTCHA endpoint would reject the wrong-provider key
|
|
3083
|
+
// (ERROR_WRONG_GOOGLEKEY); skip it and surface the real shape.
|
|
3084
|
+
steps.push(`${label} captcha: no genuine reCAPTCHA sitekey on page (widget is likely hCaptcha/Turnstile) — skipping 2Captcha`);
|
|
3085
|
+
}
|
|
3086
|
+
else {
|
|
2348
3087
|
const pageUrl = (await this.browser.getState().catch(() => null))?.url;
|
|
2349
3088
|
if (pageUrl !== undefined) {
|
|
2350
3089
|
steps.push(`${label} captcha: Tier 3 — submitting sitekey to 2Captcha (${sitekey.slice(0, 10)}…)`);
|
|
@@ -2370,6 +3109,38 @@ export class SignupAgent {
|
|
|
2370
3109
|
}
|
|
2371
3110
|
}
|
|
2372
3111
|
}
|
|
3112
|
+
// Tier 3 for hCaptcha (plausible). Distinct provider, distinct
|
|
3113
|
+
// 2Captcha method (method=hcaptcha) + a UUID sitekey the reCAPTCHA
|
|
3114
|
+
// `6L` guard rejects — so it needs its own extractor, solver call,
|
|
3115
|
+
// and h-captcha-response injector. Same structure as reCAPTCHA Tier 3.
|
|
3116
|
+
if (!result.solved &&
|
|
3117
|
+
result.kind === "hcaptcha" &&
|
|
3118
|
+
this.captchaSolver?.isAvailable() === true) {
|
|
3119
|
+
const sitekey = await this.browser.extractHcaptchaSitekey();
|
|
3120
|
+
const pageUrl = (await this.browser.getState().catch(() => null))?.url;
|
|
3121
|
+
if (sitekey !== null && pageUrl !== undefined) {
|
|
3122
|
+
steps.push(`${label} captcha: Tier 3 — submitting hCaptcha sitekey to 2Captcha (${sitekey.slice(0, 10)}…)`);
|
|
3123
|
+
const solveRes = await this.captchaSolver.solveHcaptcha({ sitekey, pageUrl });
|
|
3124
|
+
if (solveRes.kind === "ok") {
|
|
3125
|
+
const injected = await this.browser.injectHcaptchaToken(solveRes.token);
|
|
3126
|
+
if (injected) {
|
|
3127
|
+
steps.push(`${label} captcha: Tier 3 hCaptcha solved in ${Math.round(solveRes.durationMs / 1000)}s via 2Captcha`);
|
|
3128
|
+
result = { ...result, solved: true };
|
|
3129
|
+
}
|
|
3130
|
+
else {
|
|
3131
|
+
steps.push(`${label} captcha: Tier 3 hCaptcha token arrived but page injection failed — captcha stays blocked`);
|
|
3132
|
+
}
|
|
3133
|
+
}
|
|
3134
|
+
else {
|
|
3135
|
+
steps.push(`${label} captcha: Tier 3 hCaptcha ${solveRes.kind}` +
|
|
3136
|
+
("reason" in solveRes ? `: ${solveRes.reason}` : "") +
|
|
3137
|
+
("durationMs" in solveRes ? ` (${Math.round(solveRes.durationMs / 1000)}s)` : ""));
|
|
3138
|
+
}
|
|
3139
|
+
}
|
|
3140
|
+
else if (sitekey === null) {
|
|
3141
|
+
steps.push(`${label} captcha: hCaptcha widget detected but no sitekey found — cannot Tier-3 solve`);
|
|
3142
|
+
}
|
|
3143
|
+
}
|
|
2373
3144
|
// rc.32 — forensic snapshot after the captcha attempt. Without
|
|
2374
3145
|
// this, the only snapshot near the captcha is the pre-fill one
|
|
2375
3146
|
// taken BEFORE the click, so when a Turnstile fails to solve we
|
|
@@ -2445,7 +3216,14 @@ export class SignupAgent {
|
|
|
2445
3216
|
// click or a post-submit validation error ("the page advanced")
|
|
2446
3217
|
// gets more headroom. All bounded by the 15-call LLM breaker + the
|
|
2447
3218
|
// F2 top-level deadline.
|
|
2448
|
-
async planExecuteWithRetry(task, fillValues, steps
|
|
3219
|
+
async planExecuteWithRetry(task, fillValues, steps,
|
|
3220
|
+
// When true, suppress the OAuth-first scan entirely and go straight
|
|
3221
|
+
// to form-fill. Set by the re-route after the OAuth path discovered
|
|
3222
|
+
// the Google identity has no account (detectGoogleNoAccount) — the
|
|
3223
|
+
// page still carries a "Continue with Google" button, so without
|
|
3224
|
+
// this the scan would re-pick OAuth and loop right back into the
|
|
3225
|
+
// same no-account bounce. One-shot equivalent of committedToEmailPath.
|
|
3226
|
+
forceFormFill = false) {
|
|
2449
3227
|
const MAX_ERROR_REPLANS = 2;
|
|
2450
3228
|
// 0.8.3-rc.1 — widened from 4 to 6 so submit_disabled re-plans
|
|
2451
3229
|
// get more attempts to identify the gating control. Mailgun's
|
|
@@ -2479,7 +3257,7 @@ export class SignupAgent {
|
|
|
2479
3257
|
// "Continue with Google" button and reroutes — exactly the
|
|
2480
3258
|
// regression that produced the Security Code challenge on
|
|
2481
3259
|
// methoxine's account during the rc.30 Railway run.
|
|
2482
|
-
let committedToEmailPath =
|
|
3260
|
+
let committedToEmailPath = forceFormFill;
|
|
2483
3261
|
const oauthCandidates = await this.resolveOAuthCandidates(task, steps);
|
|
2484
3262
|
for (;;) {
|
|
2485
3263
|
await this.browser.waitForFormReady();
|
|
@@ -2499,6 +3277,50 @@ export class SignupAgent {
|
|
|
2499
3277
|
this.browser.getState(),
|
|
2500
3278
|
this.buildInventory(steps, oauthCandidates),
|
|
2501
3279
|
]);
|
|
3280
|
+
// Email-verification WALL reached without a fresh submit — e.g. OAuth
|
|
3281
|
+
// landed on a pending account's "Verify your email — check <addr>" page.
|
|
3282
|
+
// A real signup form still has fields to fill; a wall has only
|
|
3283
|
+
// Open-Gmail / Resend / Return buttons, on which the form-fill planner
|
|
3284
|
+
// stalls. Route to the post-submit inbox-poll + verification-link flow
|
|
3285
|
+
// instead, polling the alias the wall names (which may differ from
|
|
3286
|
+
// task.email when a prior run created the pending account).
|
|
3287
|
+
{
|
|
3288
|
+
// Use the already-fetched state.html (don't call extractText() again —
|
|
3289
|
+
// an extra read would shift queue-backed test mocks and isn't needed:
|
|
3290
|
+
// the verification copy is in the rendered HTML).
|
|
3291
|
+
const wallText = state.html;
|
|
3292
|
+
const hasFillableInput = inventory.some((e) => e.tag === "input" &&
|
|
3293
|
+
(e.type === "email" ||
|
|
3294
|
+
e.type === "text" ||
|
|
3295
|
+
e.type === "password" ||
|
|
3296
|
+
e.type === null) &&
|
|
3297
|
+
e.visible !== false);
|
|
3298
|
+
if (!hasFillableInput && expectsVerificationEmail(wallText)) {
|
|
3299
|
+
const alias = extractVerifyWallAlias(wallText);
|
|
3300
|
+
this.pendingVerificationAlias = alias;
|
|
3301
|
+
steps.push(`Form: email-verification wall (no fields to fill${alias !== null ? `, check ${alias}` : ""}) — ` +
|
|
3302
|
+
`routing to the inbox-poll + verification-link flow.`);
|
|
3303
|
+
// The named link may be stale (a pending account from a prior run);
|
|
3304
|
+
// click "Resend verification email" if present to refresh it.
|
|
3305
|
+
const resend = inventory.find((e) => {
|
|
3306
|
+
if (e.tag !== "button" && e.tag !== "a")
|
|
3307
|
+
return false;
|
|
3308
|
+
const t = `${e.visibleText ?? ""} ${e.ariaLabel ?? ""}`.toLowerCase();
|
|
3309
|
+
return /resend (?:verification )?(?:email|link)|send (?:it )?again/.test(t);
|
|
3310
|
+
});
|
|
3311
|
+
if (resend !== undefined) {
|
|
3312
|
+
try {
|
|
3313
|
+
await this.browser.click(resend.selector);
|
|
3314
|
+
steps.push(`Form: clicked "Resend verification email" to refresh the link.`);
|
|
3315
|
+
await this.browser.wait(2);
|
|
3316
|
+
}
|
|
3317
|
+
catch {
|
|
3318
|
+
// non-fatal — poll for whatever's already in the inbox
|
|
3319
|
+
}
|
|
3320
|
+
}
|
|
3321
|
+
return { kind: "submitted" };
|
|
3322
|
+
}
|
|
3323
|
+
}
|
|
2502
3324
|
// OAuth-first (T6/T13 + auto-prefer): when the page carries a
|
|
2503
3325
|
// "Sign in with <provider>" affordance for a provider the bot can
|
|
2504
3326
|
// use, that button unconditionally outranks any form field — hand
|
|
@@ -2527,11 +3349,20 @@ export class SignupAgent {
|
|
|
2527
3349
|
}
|
|
2528
3350
|
// SSO buttons frequently load async — Mistral renders its
|
|
2529
3351
|
// icon-only provider buttons after the email form. Re-extract
|
|
2530
|
-
// a couple of times before giving up on the OAuth path.
|
|
2531
|
-
|
|
3352
|
+
// a couple of times before giving up on the OAuth path. On a
|
|
3353
|
+
// websocket-gated SPA (northflank) the WHOLE page — provider
|
|
3354
|
+
// buttons included — renders only after a ~15s hydration, so a
|
|
3355
|
+
// "Connecting"/loading shell warrants far more patience than the
|
|
3356
|
+
// default 2 retries (otherwise the bot gives up at ~6s and wrongly
|
|
3357
|
+
// falls back to the email-signup path before the GitHub button
|
|
3358
|
+
// even exists).
|
|
3359
|
+
const oauthScanShell = isLoadingShellText(await this.browser.extractText().catch(() => ""));
|
|
3360
|
+
const maxOauthScanRetries = oauthScanShell ? 8 : 2;
|
|
3361
|
+
if (oauthScanRetries < maxOauthScanRetries) {
|
|
2532
3362
|
oauthScanRetries += 1;
|
|
2533
3363
|
steps.push(`OAuth-first: no provider affordance yet — waiting for an ` +
|
|
2534
|
-
`async render (retry ${oauthScanRetries}
|
|
3364
|
+
`async render (retry ${oauthScanRetries}/${maxOauthScanRetries}` +
|
|
3365
|
+
`${oauthScanShell ? ", page still a loading shell" : ""})`);
|
|
2535
3366
|
await this.browser.wait(3);
|
|
2536
3367
|
continue;
|
|
2537
3368
|
}
|
|
@@ -2621,7 +3452,7 @@ export class SignupAgent {
|
|
|
2621
3452
|
// providers, the situation is recoverable — surface the
|
|
2622
3453
|
// specific provider to seed.
|
|
2623
3454
|
const visibleProviders = detectOAuthProvidersInInventory(inventory);
|
|
2624
|
-
const haveSessions =
|
|
3455
|
+
const haveSessions = await this.effectiveLoggedInProviders();
|
|
2625
3456
|
const missingProviders = visibleProviders.filter((p) => !haveSessions.includes(p));
|
|
2626
3457
|
if (missingProviders.length > 0 &&
|
|
2627
3458
|
// Only surface needs_oauth_provider_session when the user
|
|
@@ -2791,6 +3622,17 @@ export class SignupAgent {
|
|
|
2791
3622
|
// stuck-tracker so a legitimate later click isn't false-positive
|
|
2792
3623
|
// rejected.
|
|
2793
3624
|
lastNoProgressClickSelectors = new Set();
|
|
3625
|
+
// Deterministic agreement-checkbox guard — runs BEFORE the captcha
|
|
3626
|
+
// gate + submit so the form is fully satisfied at submit time. The
|
|
3627
|
+
// LLM planner sometimes skips a required TOS box (amplitude: it
|
|
3628
|
+
// read the box as one of the adjacent card-radios), and when the
|
|
3629
|
+
// service doesn't disable submit for an unchecked box, the click
|
|
3630
|
+
// silently no-ops. This ticks terms/privacy/consent boxes while
|
|
3631
|
+
// never touching marketing opt-ins. Best-effort: never throws.
|
|
3632
|
+
const agreementBoxes = await this.browser.checkRequiredAgreementBoxes();
|
|
3633
|
+
if (agreementBoxes.length > 0) {
|
|
3634
|
+
steps.push(`Form: checked required agreement box(es): [${agreementBoxes.join(", ")}]`);
|
|
3635
|
+
}
|
|
2794
3636
|
// Captcha gate + submit.
|
|
2795
3637
|
const preGate = await this.runCaptchaGate("Pre-submit", steps);
|
|
2796
3638
|
if (preGate.blocked)
|
|
@@ -3032,12 +3874,33 @@ export class SignupAgent {
|
|
|
3032
3874
|
return false;
|
|
3033
3875
|
}
|
|
3034
3876
|
}
|
|
3877
|
+
// Which OAuth providers can the bot actually use right now — the UNION of
|
|
3878
|
+
// the logged-in-providers.json marker (a memo) and a LIVE read of the
|
|
3879
|
+
// browser's cookie jar. The cookie jar is ground truth, so a warm session
|
|
3880
|
+
// is never invisible just because the marker drifted (the GitHub-skipped-
|
|
3881
|
+
// for-Google bug). Self-heals the marker for any live session it was
|
|
3882
|
+
// missing. Falls back to the marker alone if the cookie read fails.
|
|
3883
|
+
async effectiveLoggedInProviders() {
|
|
3884
|
+
const fromMarker = loggedInProviders();
|
|
3885
|
+
let live = [];
|
|
3886
|
+
try {
|
|
3887
|
+
live = await this.browser.detectSessionProviders();
|
|
3888
|
+
}
|
|
3889
|
+
catch {
|
|
3890
|
+
live = [];
|
|
3891
|
+
}
|
|
3892
|
+
for (const p of live) {
|
|
3893
|
+
if (!fromMarker.includes(p))
|
|
3894
|
+
markProviderLoggedIn(p);
|
|
3895
|
+
}
|
|
3896
|
+
return [...new Set([...fromMarker, ...live])];
|
|
3897
|
+
}
|
|
3035
3898
|
async resolveOAuthCandidates(task, steps) {
|
|
3036
3899
|
if (task.forceForm === true) {
|
|
3037
3900
|
steps.push("Force-form: OAuth-first scan suppressed — taking the email/password path");
|
|
3038
3901
|
return [];
|
|
3039
3902
|
}
|
|
3040
|
-
const ordered = orderOAuthCandidates(task.oauthProvider,
|
|
3903
|
+
const ordered = orderOAuthCandidates(task.oauthProvider, await this.effectiveLoggedInProviders());
|
|
3041
3904
|
if (ordered.length === 0)
|
|
3042
3905
|
return [];
|
|
3043
3906
|
const pinNote = task.oauthProvider !== undefined &&
|
|
@@ -3106,6 +3969,11 @@ export class SignupAgent {
|
|
|
3106
3969
|
// it. Cleared once the loop emits a step that targets the OTP
|
|
3107
3970
|
// input, so the hint doesn't echo into later unrelated rounds.
|
|
3108
3971
|
pendingOtpCode = null;
|
|
3972
|
+
// Set when planExecuteWithRetry routes an email-verification WALL (reached
|
|
3973
|
+
// without a fresh submit — e.g. OAuth landed on a pending account's "Verify
|
|
3974
|
+
// your email — check <addr>" page) into the post-submit email flow. The poll
|
|
3975
|
+
// targets this alias (the one the wall names) instead of task.email.
|
|
3976
|
+
pendingVerificationAlias = null;
|
|
3109
3977
|
// rc.39 — when postVerifyLoop exits because the planner returned
|
|
3110
3978
|
// `done`, capture the planner's stated reason so the caller can
|
|
3111
3979
|
// factor it into paywall classification. Koyeb (and similar)
|
|
@@ -3359,6 +4227,30 @@ export class SignupAgent {
|
|
|
3359
4227
|
: {}),
|
|
3360
4228
|
}));
|
|
3361
4229
|
let signupUrl = guessed;
|
|
4230
|
+
// Tier A — HTTP fast-path signup-URL resolver. Before committing to
|
|
4231
|
+
// the (~6-minute) navigation, probe the candidate over the SAME
|
|
4232
|
+
// proxy via the context request API and confirm it actually serves a
|
|
4233
|
+
// signup FORM (not a login SPA / 404). Curated signup_urls go stale
|
|
4234
|
+
// (plunk's app.useplunk.com/signup now 404s and silently serves the
|
|
4235
|
+
// login page; the real form moved to next-app.useplunk.com/auth/
|
|
4236
|
+
// signup). The probe follows redirects + tries conventional paths
|
|
4237
|
+
// and adopts a better URL when it finds one. Non-Google URLs only —
|
|
4238
|
+
// a Google-search URL is the explicit fallback path, not a hint.
|
|
4239
|
+
if (!isGoogleSearchUrl(signupUrl)) {
|
|
4240
|
+
const serviceSlug = task.service.toLowerCase().replace(/[^a-z0-9]/g, "");
|
|
4241
|
+
const resolved = await resolveSignupUrlByProbe(signupUrl, serviceSlug, (u) => this.browser.fetchText(u), (m) => steps.push(m));
|
|
4242
|
+
if (resolved !== null && resolved !== signupUrl) {
|
|
4243
|
+
steps.push(`[signup-url] resolved ${signupUrl} → ${resolved}`);
|
|
4244
|
+
// A curated URL that the resolver had to move is a stale-YAML
|
|
4245
|
+
// signal worth surfacing in telemetry (curated URLs are
|
|
4246
|
+
// supposed to be the trusted, hand-verified path).
|
|
4247
|
+
if (task.signupUrl !== undefined) {
|
|
4248
|
+
steps.push(`⚠ curated signup_url for ${task.service} looks stale ` +
|
|
4249
|
+
`(${signupUrl}); using ${resolved}`);
|
|
4250
|
+
}
|
|
4251
|
+
signupUrl = resolved;
|
|
4252
|
+
}
|
|
4253
|
+
}
|
|
3362
4254
|
// Prewarm the target origin before hitting the (often-strict) signup
|
|
3363
4255
|
// page. Two things this buys us:
|
|
3364
4256
|
// 1. First-party cookies on the root domain. Cloudflare's
|
|
@@ -3381,28 +4273,96 @@ export class SignupAgent {
|
|
|
3381
4273
|
// PERF: goto() awaits domcontentloaded; the subsequent
|
|
3382
4274
|
// waitForFormReady in planExecuteWithRetry handles SPA settle.
|
|
3383
4275
|
// No need for a blind 2s dwell here.
|
|
3384
|
-
//
|
|
3385
|
-
//
|
|
3386
|
-
//
|
|
3387
|
-
//
|
|
3388
|
-
// the bot recover from a wrong canonical guess (e.g. a service
|
|
3389
|
-
// that uses /register or a non-`.com` TLD).
|
|
4276
|
+
// After load: does the rendered page look like a signup form?
|
|
4277
|
+
// looksLikeSignupPage() can't tell signup from login (both have
|
|
4278
|
+
// email+password), so we ALSO classify the rendered HTML's copy via
|
|
4279
|
+
// classifySignupHtml — that's what distinguishes the two.
|
|
3390
4280
|
//
|
|
3391
|
-
// A curated task.signupUrl is trusted
|
|
3392
|
-
//
|
|
3393
|
-
//
|
|
3394
|
-
//
|
|
3395
|
-
//
|
|
3396
|
-
//
|
|
3397
|
-
|
|
3398
|
-
|
|
3399
|
-
|
|
3400
|
-
|
|
3401
|
-
|
|
3402
|
-
|
|
3403
|
-
|
|
3404
|
-
}
|
|
3405
|
-
|
|
4281
|
+
// A curated task.signupUrl is no longer trusted blindly: it can land
|
|
4282
|
+
// on a login page (a stale path the SPA reroutes to /login). We
|
|
4283
|
+
// trigger recovery for BOTH guessed and curated URLs — but
|
|
4284
|
+
// conservatively for curated ones, to avoid regressing a good
|
|
4285
|
+
// curated URL: recover ONLY when the copy classifies as "login" or
|
|
4286
|
+
// "other" AND looksLikeSignupPage also disagrees. The structural
|
|
4287
|
+
// check is the backstop for an OAuth-only signup page ("Continue
|
|
4288
|
+
// with Google", no email/password copy) that classifySignupHtml
|
|
4289
|
+
// would otherwise read as "other". (A promoted-skill URL is replay-
|
|
4290
|
+
// verified and a guessed URL that's wrong is recovered here too.)
|
|
4291
|
+
let needsRecovery = false;
|
|
4292
|
+
if (task.signupUrl === undefined) {
|
|
4293
|
+
needsRecovery = !(await this.looksLikeSignupPage());
|
|
4294
|
+
}
|
|
4295
|
+
else {
|
|
4296
|
+
const rendered = (await this.browser.getState()).html;
|
|
4297
|
+
const klass = classifySignupHtml(rendered);
|
|
4298
|
+
if (klass !== "signup" && !(await this.looksLikeSignupPage())) {
|
|
4299
|
+
needsRecovery = true;
|
|
4300
|
+
steps.push(`curated signup_url for ${task.service} rendered as "${klass}", not a signup form — attempting recovery`);
|
|
4301
|
+
}
|
|
4302
|
+
}
|
|
4303
|
+
if (needsRecovery) {
|
|
4304
|
+
if (task.signupUrl === undefined) {
|
|
4305
|
+
steps.push(`${signupUrl} didn't look like a signup page — attempting recovery`);
|
|
4306
|
+
}
|
|
4307
|
+
// Tier B — landing-page CTA self-heal. Before the heavyweight
|
|
4308
|
+
// Google-search path, navigate to the site root and click the
|
|
4309
|
+
// highest-scored signup CTA (same scorer the planner uses). This
|
|
4310
|
+
// catches static-host SPAs that serve a 200 empty shell for every
|
|
4311
|
+
// path (so the HTTP probe can't tell signup from login) but DO
|
|
4312
|
+
// render a real "Sign up" CTA once the JS hydrates on the root.
|
|
4313
|
+
const root = originRoot(signupUrl);
|
|
4314
|
+
let recovered = false;
|
|
4315
|
+
if (root !== null) {
|
|
4316
|
+
steps.push(`[signup-url] Tier B: landing-page CTA at ${root}`);
|
|
4317
|
+
try {
|
|
4318
|
+
await this.runPrewarm(root, steps);
|
|
4319
|
+
await this.browser.goto(root);
|
|
4320
|
+
const inventory = await this.browser.extractInteractiveElements();
|
|
4321
|
+
// Score every interactive element's text; pick the best
|
|
4322
|
+
// signup CTA. Providers are driven negative by scoreSignupButton
|
|
4323
|
+
// (we want the email-signup affordance, not an OAuth button).
|
|
4324
|
+
let best = null;
|
|
4325
|
+
for (const el of inventory) {
|
|
4326
|
+
const label = el.visibleText ?? el.ariaLabel ?? el.iconLabel ?? el.title ?? "";
|
|
4327
|
+
if (label.trim().length === 0)
|
|
4328
|
+
continue;
|
|
4329
|
+
const score = scoreSignupButton(label, ["google", "github"]);
|
|
4330
|
+
if (best === null || score > best.score)
|
|
4331
|
+
best = { el, score };
|
|
4332
|
+
}
|
|
4333
|
+
if (best !== null && best.score > 0) {
|
|
4334
|
+
steps.push(`[signup-url] Tier B clicking CTA "${(best.el.visibleText ?? best.el.ariaLabel ?? "").slice(0, 40)}" (score ${best.score})`);
|
|
4335
|
+
await this.browser.click(best.el.selector);
|
|
4336
|
+
const landed = (await this.browser.getState()).html;
|
|
4337
|
+
if (classifySignupHtml(landed) === "signup") {
|
|
4338
|
+
const url = this.browser.currentUrl();
|
|
4339
|
+
steps.push(`[signup-url] Tier B recovered signup page: ${url}`);
|
|
4340
|
+
signupUrl = url;
|
|
4341
|
+
recovered = true;
|
|
4342
|
+
}
|
|
4343
|
+
else {
|
|
4344
|
+
steps.push(`[signup-url] Tier B click did not reach a signup form — falling through to search`);
|
|
4345
|
+
}
|
|
4346
|
+
}
|
|
4347
|
+
else {
|
|
4348
|
+
steps.push(`[signup-url] Tier B found no scoring signup CTA on ${root}`);
|
|
4349
|
+
}
|
|
4350
|
+
}
|
|
4351
|
+
catch (err) {
|
|
4352
|
+
steps.push(`[signup-url] Tier B failed (non-fatal): ${err instanceof Error ? err.message : String(err)}`);
|
|
4353
|
+
}
|
|
4354
|
+
}
|
|
4355
|
+
// Final fallback — the existing Google-search + findSignupLink
|
|
4356
|
+
// path, unchanged. Only when Tier B didn't recover.
|
|
4357
|
+
if (!recovered) {
|
|
4358
|
+
const fallbackSearch = `https://www.google.com/search?q=${encodeURIComponent(`${task.service} signup`)}`;
|
|
4359
|
+
await this.browser.goto(fallbackSearch);
|
|
4360
|
+
// PERF: domcontentloaded from goto() + findSignupLink reads
|
|
4361
|
+
// the DOM itself — no blind dwell needed.
|
|
4362
|
+
signupUrl = fallbackSearch;
|
|
4363
|
+
}
|
|
4364
|
+
}
|
|
4365
|
+
if (isGoogleSearchUrl(signupUrl)) {
|
|
3406
4366
|
steps.push("Searching for signup page...");
|
|
3407
4367
|
const found = await this.findSignupLink(task.service);
|
|
3408
4368
|
if (found !== null) {
|
|
@@ -3420,17 +4380,15 @@ export class SignupAgent {
|
|
|
3420
4380
|
// fallback, the bot is sitting on a SERP with no usable
|
|
3421
4381
|
// destination — abort rather than let the form-fill planner
|
|
3422
4382
|
// happily fill the Google search box.
|
|
3423
|
-
|
|
3424
|
-
|
|
3425
|
-
|
|
3426
|
-
|
|
3427
|
-
|
|
3428
|
-
|
|
3429
|
-
|
|
3430
|
-
|
|
3431
|
-
|
|
3432
|
-
};
|
|
3433
|
-
}
|
|
4383
|
+
return {
|
|
4384
|
+
success: false,
|
|
4385
|
+
error: `no_signup_link: searched for ${task.service}'s signup page and ` +
|
|
4386
|
+
`found no on-domain candidates. The service likely doesn't have ` +
|
|
4387
|
+
`a public self-serve signup, or the bot's domain guard rejected ` +
|
|
4388
|
+
`every match. Sign up manually.`,
|
|
4389
|
+
steps,
|
|
4390
|
+
...this.resultTail(),
|
|
4391
|
+
};
|
|
3434
4392
|
}
|
|
3435
4393
|
}
|
|
3436
4394
|
// Steps 2-5: plan the form, fill it, submit — via the
|
|
@@ -3446,147 +4404,223 @@ export class SignupAgent {
|
|
|
3446
4404
|
// `literal` has no fixed value — resolved per-action.
|
|
3447
4405
|
literal: "",
|
|
3448
4406
|
};
|
|
3449
|
-
|
|
3450
|
-
|
|
3451
|
-
|
|
3452
|
-
|
|
3453
|
-
|
|
3454
|
-
|
|
3455
|
-
|
|
3456
|
-
|
|
3457
|
-
|
|
3458
|
-
|
|
3459
|
-
|
|
3460
|
-
|
|
3461
|
-
|
|
3462
|
-
|
|
3463
|
-
|
|
3464
|
-
|
|
3465
|
-
|
|
3466
|
-
|
|
3467
|
-
|
|
3468
|
-
|
|
3469
|
-
steps,
|
|
3470
|
-
...this.resultTail(),
|
|
3471
|
-
};
|
|
3472
|
-
case "oauth_required":
|
|
3473
|
-
return {
|
|
3474
|
-
success: false,
|
|
3475
|
-
error: `oauth_required: ${task.service} offers only OAuth/SSO signup — there is no email/password form to automate.`,
|
|
3476
|
-
steps,
|
|
3477
|
-
...this.resultTail(),
|
|
3478
|
-
};
|
|
3479
|
-
case "needs_oauth_provider_session": {
|
|
3480
|
-
// rc.33-task — actionable: name the missing provider so
|
|
3481
|
-
// the user runs the right `mcp login` command. When more
|
|
3482
|
-
// than one provider is missing, the message lists them and
|
|
3483
|
-
// recommends any single one (operator picks).
|
|
3484
|
-
const missing = outcome.missingProviders;
|
|
3485
|
-
const have = outcome.haveSessions;
|
|
3486
|
-
const firstMissing = missing[0];
|
|
3487
|
-
const missingLabel = missing
|
|
3488
|
-
.map((p) => OAUTH_PROVIDERS[p].label)
|
|
3489
|
-
.join(" / ");
|
|
3490
|
-
const haveLabel = have.length > 0
|
|
3491
|
-
? have.map((p) => OAUTH_PROVIDERS[p].label).join(", ")
|
|
3492
|
-
: "(none)";
|
|
3493
|
-
return {
|
|
3494
|
-
success: false,
|
|
3495
|
-
error: `needs_oauth_provider_session: ${task.service} offers ${missingLabel} OAuth ` +
|
|
3496
|
-
`but the bot's chrome profile has no ${missingLabel} session ` +
|
|
3497
|
-
`(currently signed in to: ${haveLabel}). ` +
|
|
3498
|
-
`Run \`npx @trusty-squire/mcp login --provider=${firstMissing}\` ` +
|
|
3499
|
-
`to seed the session, then retry.`,
|
|
3500
|
-
steps,
|
|
3501
|
-
...this.resultTail(),
|
|
3502
|
-
};
|
|
3503
|
-
}
|
|
3504
|
-
case "anti_bot_blocked":
|
|
3505
|
-
return {
|
|
3506
|
-
success: false,
|
|
3507
|
-
error: `anti_bot_blocked: ${task.service}'s ${outcome.vendor} anti-bot interstitial would ` +
|
|
3508
|
-
`not clear — the bot's IP/fingerprint did not pass ${outcome.vendor}'s server-side ` +
|
|
3509
|
-
`risk score. This is a soft block (no challenge to solve); the user should sign up ` +
|
|
3510
|
-
`manually.`,
|
|
3511
|
-
steps,
|
|
3512
|
-
...this.resultTail(),
|
|
3513
|
-
};
|
|
3514
|
-
case "oauth":
|
|
3515
|
-
// T6/T7 — OAuth-first path. runOAuthFlow drives the consent
|
|
3516
|
-
// handshake and post-OAuth onboarding to its own terminal
|
|
3517
|
-
// SignupResult; there is no form submit / email verification.
|
|
3518
|
-
return await this.runOAuthFlow(task, outcome.selector, outcome.provider, steps);
|
|
3519
|
-
case "already_oauth": {
|
|
3520
|
-
// F17 — page rendered an authenticated dashboard (a
|
|
3521
|
-
// previous OAuth bind already linked the account). Skip
|
|
3522
|
-
// consent + form-fill, navigate straight to the API key.
|
|
3523
|
-
// Uses the same post-OAuth loop runOAuthFlow uses after a
|
|
3524
|
-
// successful handshake.
|
|
3525
|
-
let credentials = await this.extractCredentials();
|
|
3526
|
-
const skippedPostVerify = credentials.api_key !== undefined;
|
|
3527
|
-
if (credentials.api_key === undefined) {
|
|
3528
|
-
credentials = await this.postVerifyLoop({
|
|
3529
|
-
service: task.service,
|
|
3530
|
-
maxRounds: task.postVerifyMaxRounds ?? 24,
|
|
4407
|
+
// `outcome` is re-computed when the OAuth path signals a form-fill
|
|
4408
|
+
// fall-back (Google login-only / no-account, e.g. plunk): the
|
|
4409
|
+
// `case "oauth"` handler re-runs planExecuteWithRetry with OAuth-
|
|
4410
|
+
// first suppressed and loops back through this same switch, so
|
|
4411
|
+
// every terminal case (submitted, planning_failed, …) stays in one
|
|
4412
|
+
// place. Bounded to a single re-route so a service that keeps
|
|
4413
|
+
// bouncing can't spin here.
|
|
4414
|
+
let outcome = await this.planExecuteWithRetry(task, fillValues, steps);
|
|
4415
|
+
let oauthFallbackUsed = false;
|
|
4416
|
+
// Multi-step signup guard (amplitude: email/name step → a dedicated
|
|
4417
|
+
// "Create your password" step). Bounds how many continuation form steps
|
|
4418
|
+
// we'll fill after the first submit before treating the signup as done.
|
|
4419
|
+
let multiStepRounds = 0;
|
|
4420
|
+
const MAX_MULTI_STEP_ROUNDS = 3;
|
|
4421
|
+
dispatch: for (;;) {
|
|
4422
|
+
switch (outcome.kind) {
|
|
4423
|
+
case "captcha_blocked":
|
|
4424
|
+
return {
|
|
4425
|
+
success: false,
|
|
4426
|
+
error: `captcha_blocked: ${outcome.captchaKind} challenge did not resolve. The site flagged this session.`,
|
|
3531
4427
|
steps,
|
|
3532
|
-
...(
|
|
3533
|
-
|
|
3534
|
-
|
|
3535
|
-
});
|
|
3536
|
-
}
|
|
3537
|
-
if (credentials.api_key !== undefined) {
|
|
3538
|
-
// 0.8.3-rc.1 — when extractCredentials short-circuited
|
|
3539
|
-
// before postVerifyLoop ran, no captures were written.
|
|
3540
|
-
// Emit a synthetic extract round so auto-promote can
|
|
3541
|
-
// build a "navigate + extract" skill from this run.
|
|
3542
|
-
if (skippedPostVerify) {
|
|
3543
|
-
await this.writeFastPathSyntheticCapture(task.service, 0, true);
|
|
3544
|
-
}
|
|
4428
|
+
...this.resultTail(),
|
|
4429
|
+
};
|
|
4430
|
+
case "submit_failed":
|
|
3545
4431
|
return {
|
|
3546
|
-
success:
|
|
3547
|
-
|
|
4432
|
+
success: false,
|
|
4433
|
+
error: `submit_failed: could not click the signup button — ${outcome.reason}`,
|
|
4434
|
+
steps,
|
|
4435
|
+
...this.resultTail(),
|
|
4436
|
+
};
|
|
4437
|
+
case "planning_failed":
|
|
4438
|
+
return {
|
|
4439
|
+
success: false,
|
|
4440
|
+
error: `planning_failed: ${outcome.reason}`,
|
|
4441
|
+
steps,
|
|
4442
|
+
...this.resultTail(),
|
|
4443
|
+
};
|
|
4444
|
+
case "oauth_required":
|
|
4445
|
+
return {
|
|
4446
|
+
success: false,
|
|
4447
|
+
error: `oauth_required: ${task.service} offers only OAuth/SSO signup — there is no email/password form to automate.`,
|
|
4448
|
+
steps,
|
|
4449
|
+
...this.resultTail(),
|
|
4450
|
+
};
|
|
4451
|
+
case "needs_oauth_provider_session": {
|
|
4452
|
+
// rc.33-task — actionable: name the missing provider so
|
|
4453
|
+
// the user runs the right `mcp login` command. When more
|
|
4454
|
+
// than one provider is missing, the message lists them and
|
|
4455
|
+
// recommends any single one (operator picks).
|
|
4456
|
+
const missing = outcome.missingProviders;
|
|
4457
|
+
const have = outcome.haveSessions;
|
|
4458
|
+
const firstMissing = missing[0];
|
|
4459
|
+
const missingLabel = missing
|
|
4460
|
+
.map((p) => OAUTH_PROVIDERS[p].label)
|
|
4461
|
+
.join(" / ");
|
|
4462
|
+
const haveLabel = have.length > 0
|
|
4463
|
+
? have.map((p) => OAUTH_PROVIDERS[p].label).join(", ")
|
|
4464
|
+
: "(none)";
|
|
4465
|
+
return {
|
|
4466
|
+
success: false,
|
|
4467
|
+
error: `needs_oauth_provider_session: ${task.service} offers ${missingLabel} OAuth ` +
|
|
4468
|
+
`but the bot's chrome profile has no ${missingLabel} session ` +
|
|
4469
|
+
`(currently signed in to: ${haveLabel}). ` +
|
|
4470
|
+
`Run \`npx @trusty-squire/mcp login --provider=${firstMissing}\` ` +
|
|
4471
|
+
`to seed the session, then retry.`,
|
|
3548
4472
|
steps,
|
|
3549
4473
|
...this.resultTail(),
|
|
3550
4474
|
};
|
|
3551
4475
|
}
|
|
3552
|
-
|
|
3553
|
-
// path uses. The post-verify loop sets lastPostVerifyDoneReason
|
|
3554
|
-
// with [stuck_loop] or [existing_account_no_extract] markers
|
|
3555
|
-
// when it bails on a planner-loop or pre-existing-key state;
|
|
3556
|
-
// surface those distinctly rather than as the generic
|
|
3557
|
-
// no_credentials_after_already_signed_in.
|
|
3558
|
-
if (this.lastPostVerifyDoneReason !== null &&
|
|
3559
|
-
this.lastPostVerifyDoneReason.startsWith("[stuck_loop]")) {
|
|
4476
|
+
case "anti_bot_blocked":
|
|
3560
4477
|
return {
|
|
3561
4478
|
success: false,
|
|
3562
|
-
error: `
|
|
3563
|
-
`
|
|
3564
|
-
`
|
|
4479
|
+
error: `anti_bot_blocked: ${task.service}'s ${outcome.vendor} anti-bot interstitial would ` +
|
|
4480
|
+
`not clear — the bot's IP/fingerprint did not pass ${outcome.vendor}'s server-side ` +
|
|
4481
|
+
`risk score. This is a soft block (no challenge to solve); the user should sign up ` +
|
|
4482
|
+
`manually.`,
|
|
3565
4483
|
steps,
|
|
3566
4484
|
...this.resultTail(),
|
|
3567
4485
|
};
|
|
4486
|
+
case "oauth": {
|
|
4487
|
+
// T6/T7 — OAuth-first path. runOAuthFlow drives the consent
|
|
4488
|
+
// handshake and post-OAuth onboarding to its own terminal
|
|
4489
|
+
// SignupResult; there is no form submit / email verification.
|
|
4490
|
+
const oauthResult = await this.runOAuthFlow(task, outcome.selector, outcome.provider, steps);
|
|
4491
|
+
// Google login-only / no-account (plunk): OAuth is a dead end
|
|
4492
|
+
// but the email/password form can still create the account.
|
|
4493
|
+
// Re-run the form-fill path ONCE with OAuth-first suppressed
|
|
4494
|
+
// (forceFormFill) — re-navigate to the signup form first since
|
|
4495
|
+
// the OAuth flow left us on the service's /login page — then
|
|
4496
|
+
// loop back through this switch to dispatch the new outcome.
|
|
4497
|
+
if (oauthResult === OAUTH_FALL_BACK_TO_FORM_FILL) {
|
|
4498
|
+
if (oauthFallbackUsed) {
|
|
4499
|
+
// Already fell back once and OAuth came up again — refuse
|
|
4500
|
+
// to ping-pong. Surface the dead end honestly.
|
|
4501
|
+
return {
|
|
4502
|
+
success: false,
|
|
4503
|
+
error: `oauth_required: ${task.service}'s OAuth is login-only (no account for this ` +
|
|
4504
|
+
`identity) and the email/password fall-back did not complete a signup.`,
|
|
4505
|
+
steps,
|
|
4506
|
+
...this.resultTail(),
|
|
4507
|
+
};
|
|
4508
|
+
}
|
|
4509
|
+
oauthFallbackUsed = true;
|
|
4510
|
+
// If the OAuth recovery already left us ON a signup form (the
|
|
4511
|
+
// amplitude demo-escape clicked "Create a free account" → the real
|
|
4512
|
+
// /signup form), fill it IN PLACE — re-navigating to task.signupUrl
|
|
4513
|
+
// could bounce back to the demo. Otherwise re-navigate (the
|
|
4514
|
+
// login-only / no-account case left us on a /login page).
|
|
4515
|
+
const onSignupFormHtml = (await this.browser.getState().catch(() => null))?.html ?? "";
|
|
4516
|
+
if (classifySignupHtml(onSignupFormHtml) === "signup") {
|
|
4517
|
+
steps.push(`OAuth recovery already on a signup form ` +
|
|
4518
|
+
`(${pathOf(this.browser.currentUrl())}) — filling in place.`);
|
|
4519
|
+
}
|
|
4520
|
+
else {
|
|
4521
|
+
const formUrl = task.signupUrl ?? this.browser.currentUrl();
|
|
4522
|
+
steps.push(`Re-routing to email/password signup at ${formUrl} after OAuth no-account.`);
|
|
4523
|
+
await this.browser.goto(formUrl);
|
|
4524
|
+
}
|
|
4525
|
+
outcome = await this.planExecuteWithRetry(task, fillValues, steps,
|
|
4526
|
+
/* forceFormFill */ true);
|
|
4527
|
+
continue dispatch;
|
|
4528
|
+
}
|
|
4529
|
+
return oauthResult;
|
|
3568
4530
|
}
|
|
3569
|
-
|
|
3570
|
-
|
|
4531
|
+
case "already_oauth": {
|
|
4532
|
+
// F17 — page rendered an authenticated dashboard (a
|
|
4533
|
+
// previous OAuth bind already linked the account). Skip
|
|
4534
|
+
// consent + form-fill, navigate straight to the API key.
|
|
4535
|
+
// Uses the same post-OAuth loop runOAuthFlow uses after a
|
|
4536
|
+
// successful handshake.
|
|
4537
|
+
let credentials = await this.extractCredentials();
|
|
4538
|
+
const skippedPostVerify = credentials.api_key !== undefined;
|
|
4539
|
+
if (credentials.api_key === undefined) {
|
|
4540
|
+
credentials = await this.postVerifyLoop({
|
|
4541
|
+
service: task.service,
|
|
4542
|
+
maxRounds: task.postVerifyMaxRounds ?? 24,
|
|
4543
|
+
steps,
|
|
4544
|
+
...(task.scopeHint !== undefined ? { scopeHint: task.scopeHint } : {}),
|
|
4545
|
+
...(task.machineToken !== undefined ? { machineToken: task.machineToken } : {}),
|
|
4546
|
+
...(task.apiBase !== undefined ? { apiBase: task.apiBase } : {}),
|
|
4547
|
+
});
|
|
4548
|
+
}
|
|
4549
|
+
if (credentials.api_key !== undefined) {
|
|
4550
|
+
// 0.8.3-rc.1 — when extractCredentials short-circuited
|
|
4551
|
+
// before postVerifyLoop ran, no captures were written.
|
|
4552
|
+
// Emit a synthetic extract round so auto-promote can
|
|
4553
|
+
// build a "navigate + extract" skill from this run.
|
|
4554
|
+
if (skippedPostVerify) {
|
|
4555
|
+
await this.writeFastPathSyntheticCapture(task.service, 0, true);
|
|
4556
|
+
}
|
|
4557
|
+
return {
|
|
4558
|
+
success: true,
|
|
4559
|
+
credentials,
|
|
4560
|
+
steps,
|
|
4561
|
+
...this.resultTail(),
|
|
4562
|
+
};
|
|
4563
|
+
}
|
|
4564
|
+
// 0.8.2-rc.10 — same sentinel-pattern routing the runOAuthFlow
|
|
4565
|
+
// path uses. The post-verify loop sets lastPostVerifyDoneReason
|
|
4566
|
+
// with [stuck_loop] or [existing_account_no_extract] markers
|
|
4567
|
+
// when it bails on a planner-loop or pre-existing-key state;
|
|
4568
|
+
// surface those distinctly rather than as the generic
|
|
4569
|
+
// no_credentials_after_already_signed_in.
|
|
4570
|
+
if (this.lastPostVerifyDoneReason !== null &&
|
|
4571
|
+
this.lastPostVerifyDoneReason.startsWith("[stuck_loop]")) {
|
|
4572
|
+
return {
|
|
4573
|
+
success: false,
|
|
4574
|
+
error: `planner_stuck: ${task.service}'s dashboard re-picked the same step repeatedly ` +
|
|
4575
|
+
`with no inventory change and the bot's hardcoded API-key URL fallbacks did not ` +
|
|
4576
|
+
`advance the page — finish the signup manually.`,
|
|
4577
|
+
steps,
|
|
4578
|
+
...this.resultTail(),
|
|
4579
|
+
};
|
|
4580
|
+
}
|
|
4581
|
+
if (this.lastPostVerifyDoneReason !== null &&
|
|
4582
|
+
this.lastPostVerifyDoneReason.startsWith("[existing_account_no_extract]")) {
|
|
4583
|
+
return {
|
|
4584
|
+
success: false,
|
|
4585
|
+
error: `existing_account_no_extract: ${task.service}'s dashboard shows pre-existing API ` +
|
|
4586
|
+
`keys for this identity but the values are masked and unrecoverable — wipe the ` +
|
|
4587
|
+
`test identity's account on ${task.service} or sign in manually and reveal the key.`,
|
|
4588
|
+
steps,
|
|
4589
|
+
...this.resultTail(),
|
|
4590
|
+
};
|
|
4591
|
+
}
|
|
3571
4592
|
return {
|
|
3572
4593
|
success: false,
|
|
3573
|
-
error:
|
|
3574
|
-
|
|
3575
|
-
`test identity's account on ${task.service} or sign in manually and reveal the key.`,
|
|
4594
|
+
error: "no_credentials_after_already_signed_in: bot detected an authenticated dashboard " +
|
|
4595
|
+
"but post-OAuth navigation did not surface an API key. Sign in manually and generate the token.",
|
|
3576
4596
|
steps,
|
|
3577
4597
|
...this.resultTail(),
|
|
3578
4598
|
};
|
|
3579
4599
|
}
|
|
3580
|
-
|
|
3581
|
-
|
|
3582
|
-
|
|
3583
|
-
|
|
3584
|
-
|
|
3585
|
-
|
|
3586
|
-
|
|
4600
|
+
case "submitted": {
|
|
4601
|
+
// Multi-step signup: a clean submit can land on ANOTHER form step
|
|
4602
|
+
// (amplitude: a dedicated "Create your password" page) rather than
|
|
4603
|
+
// the dashboard or a verify-email screen. Detect a continuation
|
|
4604
|
+
// form step and run the fill-submit phase again on it, bounded,
|
|
4605
|
+
// before treating the submit as done — otherwise the post-submit
|
|
4606
|
+
// logic below polls the inbox for a verification email the
|
|
4607
|
+
// half-finished signup never triggers. Conservative (a visible
|
|
4608
|
+
// empty password input + a submit control, NOT a login or
|
|
4609
|
+
// check-your-email page), so a genuine email-verification flow
|
|
4610
|
+
// isn't mistaken for a form step.
|
|
4611
|
+
if (multiStepRounds < MAX_MULTI_STEP_ROUNDS) {
|
|
4612
|
+
const stepLabel = await this.detectContinuationFormStep();
|
|
4613
|
+
if (stepLabel !== null) {
|
|
4614
|
+
multiStepRounds += 1;
|
|
4615
|
+
steps.push(`Post-submit: continuation form step detected (${stepLabel}) — ` +
|
|
4616
|
+
`filling + submitting (step ${multiStepRounds + 1}).`);
|
|
4617
|
+
outcome = await this.planExecuteWithRetry(task, fillValues, steps);
|
|
4618
|
+
continue dispatch;
|
|
4619
|
+
}
|
|
4620
|
+
}
|
|
4621
|
+
break dispatch;
|
|
4622
|
+
}
|
|
3587
4623
|
}
|
|
3588
|
-
case "submitted":
|
|
3589
|
-
break;
|
|
3590
4624
|
}
|
|
3591
4625
|
await saveDebugSnapshot(this.browser, "after-submit");
|
|
3592
4626
|
// Step 6: Extract creds from page.
|
|
@@ -3653,10 +4687,14 @@ export class SignupAgent {
|
|
|
3653
4687
|
? `Post-submit page shows a rejected submit — short ${verificationTimeoutSeconds}s probe (S3: no account created, no verification email expected)...`
|
|
3654
4688
|
: `Post-submit page is inconclusive but submit was clean — polling inbox up to ${verificationTimeoutSeconds}s (S3: an account may have been created and mail can lag)...`);
|
|
3655
4689
|
try {
|
|
3656
|
-
const email = await this.waitForVerificationEmail(task.inbox, task.email, verificationTimeoutSeconds);
|
|
4690
|
+
const email = await this.waitForVerificationEmail(task.inbox, this.pendingVerificationAlias ?? task.email, verificationTimeoutSeconds);
|
|
3657
4691
|
steps.push(`Received: "${email.subject}" from ${email.from_address}`);
|
|
3658
|
-
if (email.parsed_links.length > 0) {
|
|
3659
|
-
|
|
4692
|
+
if (email.parsed_links.length > 0 || (email.body_html ?? "") !== "") {
|
|
4693
|
+
// URL-keyword scorer first; if it can't see past a click-tracker
|
|
4694
|
+
// wrapper, fall back to matching the link's ANCHOR TEXT in the
|
|
4695
|
+
// HTML body (amplitude's SendGrid-wrapped "Activate account").
|
|
4696
|
+
const verifyLink = this.pickVerificationLink(Array.from(email.parsed_links)) ??
|
|
4697
|
+
pickVerificationLinkFromHtml(email.body_html ?? "");
|
|
3660
4698
|
if (verifyLink !== null) {
|
|
3661
4699
|
steps.push(`Following verification link: ${verifyLink}`);
|
|
3662
4700
|
await this.browser.goto(verifyLink);
|
|
@@ -3682,12 +4720,22 @@ export class SignupAgent {
|
|
|
3682
4720
|
});
|
|
3683
4721
|
}
|
|
3684
4722
|
}
|
|
4723
|
+
else if (email.parsed_codes.length > 0) {
|
|
4724
|
+
credentials = await this.enterEmailVerificationCode(email.parsed_codes[0] ?? "", task, password, steps);
|
|
4725
|
+
}
|
|
3685
4726
|
else {
|
|
3686
4727
|
steps.push("Email had no usable verification link.");
|
|
3687
4728
|
}
|
|
3688
4729
|
}
|
|
4730
|
+
else if (email.parsed_codes.length > 0) {
|
|
4731
|
+
// No links at all, but the email carries a numeric code
|
|
4732
|
+
// (plausible: "Enter 4011 to verify your email address").
|
|
4733
|
+
// The signup page transitioned to a code-input step after
|
|
4734
|
+
// submit — type the code in rather than waiting for a link.
|
|
4735
|
+
credentials = await this.enterEmailVerificationCode(email.parsed_codes[0] ?? "", task, password, steps);
|
|
4736
|
+
}
|
|
3689
4737
|
else {
|
|
3690
|
-
steps.push("Email had no parsed links — skipping verification
|
|
4738
|
+
steps.push("Email had no parsed links or codes — skipping verification.");
|
|
3691
4739
|
}
|
|
3692
4740
|
}
|
|
3693
4741
|
catch (err) {
|
|
@@ -3768,10 +4816,46 @@ export class SignupAgent {
|
|
|
3768
4816
|
// services that don't gate OAuth on Turnstile).
|
|
3769
4817
|
try {
|
|
3770
4818
|
const captcha = await this.browser.solveVisibleCaptcha(20_000);
|
|
3771
|
-
if (captcha.found) {
|
|
3772
|
-
steps.push(captcha.
|
|
3773
|
-
|
|
3774
|
-
|
|
4819
|
+
if (captcha.found && captcha.solved) {
|
|
4820
|
+
steps.push(`OAuth: ticked the visible ${captcha.kind} checkbox before clicking the ${provider.label} affordance`);
|
|
4821
|
+
}
|
|
4822
|
+
else if (captcha.found && !captcha.solved) {
|
|
4823
|
+
// Tier-2 click-and-wait timed out. For reCAPTCHA v2 this is the
|
|
4824
|
+
// SAME state the form-submit gate (runCaptchaGate) recovers from
|
|
4825
|
+
// by escalating to the third-party solver — mirror that path here
|
|
4826
|
+
// so OAuth-first flows aren't left clicking a Google button that
|
|
4827
|
+
// the service keeps gated behind an unsolved checkbox (replit,
|
|
4828
|
+
// uploadcare). Turnstile is deliberately NOT escalated: Cloudflare
|
|
4829
|
+
// scores at the IP layer, so a solver-issued token is rejected
|
|
4830
|
+
// anyway and only burns the 2Captcha balance.
|
|
4831
|
+
let solvedViaTier3 = false;
|
|
4832
|
+
if (captcha.kind === "recaptcha" && this.captchaSolver?.isAvailable() === true) {
|
|
4833
|
+
const sitekey = await this.browser.extractRecaptchaSitekey();
|
|
4834
|
+
const pageUrl = (await this.browser.getState().catch(() => null))?.url;
|
|
4835
|
+
if (sitekey !== null && pageUrl !== undefined) {
|
|
4836
|
+
steps.push(`OAuth: Tier 3 — submitting reCAPTCHA sitekey to 2Captcha (${sitekey.slice(0, 10)}…)`);
|
|
4837
|
+
const solveRes = await this.captchaSolver.solveRecaptchaV2({ sitekey, pageUrl });
|
|
4838
|
+
if (solveRes.kind === "ok") {
|
|
4839
|
+
const injected = await this.browser.injectRecaptchaToken(solveRes.token);
|
|
4840
|
+
if (injected) {
|
|
4841
|
+
solvedViaTier3 = true;
|
|
4842
|
+
steps.push(`OAuth: Tier 3 solved the reCAPTCHA in ${Math.round(solveRes.durationMs / 1000)}s via 2Captcha — clicking the ${provider.label} affordance`);
|
|
4843
|
+
}
|
|
4844
|
+
else {
|
|
4845
|
+
steps.push(`OAuth: Tier 3 token arrived but page injection failed — clicking the ${provider.label} affordance anyway`);
|
|
4846
|
+
}
|
|
4847
|
+
}
|
|
4848
|
+
else {
|
|
4849
|
+
steps.push(`OAuth: Tier 3 ${solveRes.kind}` +
|
|
4850
|
+
("reason" in solveRes ? `: ${solveRes.reason}` : "") +
|
|
4851
|
+
("durationMs" in solveRes ? ` (${Math.round(solveRes.durationMs / 1000)}s)` : "") +
|
|
4852
|
+
` — clicking the ${provider.label} affordance anyway`);
|
|
4853
|
+
}
|
|
4854
|
+
}
|
|
4855
|
+
}
|
|
4856
|
+
if (!solvedViaTier3) {
|
|
4857
|
+
steps.push(`OAuth: visible ${captcha.kind} present but did not solve in 20s — clicking the ${provider.label} affordance anyway`);
|
|
4858
|
+
}
|
|
3775
4859
|
}
|
|
3776
4860
|
}
|
|
3777
4861
|
catch (err) {
|
|
@@ -3779,7 +4863,21 @@ export class SignupAgent {
|
|
|
3779
4863
|
steps.push(`OAuth: visible-captcha precheck failed (non-fatal): ${err instanceof Error ? err.message : String(err)}`);
|
|
3780
4864
|
}
|
|
3781
4865
|
steps.push(`OAuth: clicking the ${provider.label} sign-in affordance`);
|
|
3782
|
-
|
|
4866
|
+
// Google Identity Services (GSI) / FedCM does NOT redirect — clicking the
|
|
4867
|
+
// widget raises a browser-native FedCM dialog or a popup and returns a
|
|
4868
|
+
// JWT to a JS callback. The classic startOAuth waits for a provider
|
|
4869
|
+
// redirect that never comes, so it falsely concludes "signed in" and the
|
|
4870
|
+
// session never persists (northflank). Detect GSI and drive it over CDP.
|
|
4871
|
+
let gsiHandled = false;
|
|
4872
|
+
if (provider.id === "google" && (await this.browser.hasGoogleGsiAffordance())) {
|
|
4873
|
+
const gsi = await this.browser.tryGoogleGsiLogin(oauthSelector);
|
|
4874
|
+
gsiHandled = true;
|
|
4875
|
+
steps.push(`OAuth: Google Identity Services / FedCM widget — resolved via ${gsi.via}` +
|
|
4876
|
+
(gsi.ok ? "" : " (no FedCM dialog or popup appeared — the widget may need a different trigger)"));
|
|
4877
|
+
}
|
|
4878
|
+
if (!gsiHandled) {
|
|
4879
|
+
await this.browser.startOAuth(oauthSelector);
|
|
4880
|
+
}
|
|
3783
4881
|
await this.browser.wait(3);
|
|
3784
4882
|
await saveDebugSnapshot(this.browser, "oauth-after-click");
|
|
3785
4883
|
// Bounded consent walk — handles account-chooser → consent as two
|
|
@@ -3806,6 +4904,21 @@ export class SignupAgent {
|
|
|
3806
4904
|
await this.browser.wait(1);
|
|
3807
4905
|
continue;
|
|
3808
4906
|
}
|
|
4907
|
+
// Google "Choose an account" chooser. Its "…to continue to <app>" copy
|
|
4908
|
+
// matches the consent classifier, but it is an account PICKER — it needs
|
|
4909
|
+
// a card CLICK, not a scope approve. Google shows it before the real
|
|
4910
|
+
// consent right after a fresh relogin (the first OAuth re-confirms the
|
|
4911
|
+
// account). Without handling it here the bot tries to approve, stalls,
|
|
4912
|
+
// and the page flips to needs_login → abort (every Google service fails
|
|
4913
|
+
// until an OAuth is done once). Click the account card and re-read; the
|
|
4914
|
+
// next pass lands on the real consent screen (or back at the service).
|
|
4915
|
+
if (provider.id === "google" &&
|
|
4916
|
+
/\/(?:accountchooser|chooseaccount|oauthchooseaccount)/i.test(url)) {
|
|
4917
|
+
const clicked = await this.tryClickGoogleChooserCard();
|
|
4918
|
+
steps.push(`OAuth: Google account chooser — ${clicked ? "clicked the account card" : "no clickable account card found"}`);
|
|
4919
|
+
await this.browser.wait(2);
|
|
4920
|
+
continue;
|
|
4921
|
+
}
|
|
3809
4922
|
const authState = provider.classifyAuthState(url, body);
|
|
3810
4923
|
steps.push(`OAuth: ${provider.label} auth state = ${authState} (url=${url.slice(0, 120)})`);
|
|
3811
4924
|
if (authState === "not_provider")
|
|
@@ -4040,6 +5153,30 @@ export class SignupAgent {
|
|
|
4040
5153
|
return this.oauthAbort("oauth_consent_needs_review", `${provider.label} consent page (URL unparseable) lists scope-grant phrases: ` +
|
|
4041
5154
|
`[${dangerPhrases.join(" | ")}]. Pausing for manual review.`, steps);
|
|
4042
5155
|
}
|
|
5156
|
+
// Google's newer consent URL hides the scope= param behind an
|
|
5157
|
+
// opaque `part=` token, so extractOAuthScopes() returned null
|
|
5158
|
+
// even on an entirely-basic email/profile consent (measured on
|
|
5159
|
+
// uploadcare). The visible DOM is the only remaining signal: if
|
|
5160
|
+
// it lists ONLY openid/email/profile-family grants (and the
|
|
5161
|
+
// danger scraper above already cleared it), this is exactly the
|
|
5162
|
+
// consent the URL-readable happy path auto-approves — so recover
|
|
5163
|
+
// it here instead of blocking. Anything ambiguous falls through
|
|
5164
|
+
// to the conservative abort below. Mirror the basic-scopes happy
|
|
5165
|
+
// path: set consentAlreadyApproved, advance, handle !advanced.
|
|
5166
|
+
if (provider.id === "google" &&
|
|
5167
|
+
!consentAlreadyApproved &&
|
|
5168
|
+
googleConsentIsBasicFromDom(body)) {
|
|
5169
|
+
steps.push("OAuth: consent scopes unreadable from URL but DOM lists only " +
|
|
5170
|
+
"basic email/profile scopes — auto-approving");
|
|
5171
|
+
consentAlreadyApproved = true;
|
|
5172
|
+
const advanced = await this.browser.advanceOAuthConsent(provider.id);
|
|
5173
|
+
if (!advanced) {
|
|
5174
|
+
return this.oauthAbort("oauth_consent_needs_review", `basic-only consent read from the ${provider.label} DOM but no ` +
|
|
5175
|
+
`approve control found on the consent page — approve it manually.`, steps);
|
|
5176
|
+
}
|
|
5177
|
+
await this.browser.wait(3);
|
|
5178
|
+
continue;
|
|
5179
|
+
}
|
|
4043
5180
|
// F16 — order matters here. The post-grant intermediate page
|
|
4044
5181
|
// (after blind-consent approved on iter 1) is also classified
|
|
4045
5182
|
// as "consent" with unreadable scopes. If we check the blind-
|
|
@@ -4115,8 +5252,93 @@ export class SignupAgent {
|
|
|
4115
5252
|
// for same-tab redirects) and drive post-OAuth onboarding.
|
|
4116
5253
|
await this.browser.settleAfterOAuth();
|
|
4117
5254
|
await this.browser.wait(2);
|
|
5255
|
+
// Token-exchange settle. Stytch/WorkOS-style services (groq) bounce the
|
|
5256
|
+
// OAuth back to a callback page (/authenticate?token=…) and complete an
|
|
5257
|
+
// ASYNC token→session exchange there, THEN redirect to the dashboard.
|
|
5258
|
+
// With a warm Google session the round-trip is near-instant, so the bot
|
|
5259
|
+
// arrives at the callback while the exchange is still in flight — and
|
|
5260
|
+
// acting now (the rc.20 second-click retry, or post-verify navigation)
|
|
5261
|
+
// interrupts it, stranding the run on the login page. Give a callback-
|
|
5262
|
+
// shaped URL a chance to redirect itself away before we touch anything.
|
|
5263
|
+
{
|
|
5264
|
+
// Wait while EITHER the URL is still callback/login-shaped OR the page
|
|
5265
|
+
// shows async-session processing copy ("Creating your organization…").
|
|
5266
|
+
// Budget 24s — MEASURED: Stytch B2B's discovery+org-creation+session
|
|
5267
|
+
// chain takes ~5-7s but varies, and the bot's own page reads add jitter;
|
|
5268
|
+
// a short URL-only wait exits mid-provisioning and the rc.20 retry then
|
|
5269
|
+
// re-clicks OAuth and aborts it. Re-read text each tick.
|
|
5270
|
+
let settled = false;
|
|
5271
|
+
for (let i = 0; i < 12; i++) {
|
|
5272
|
+
const url = this.browser.currentUrl();
|
|
5273
|
+
const text = await this.browser.extractText().catch(() => "");
|
|
5274
|
+
if (!isLoginPageUrl(url) && !isAuthProcessingText(text)) {
|
|
5275
|
+
settled = true;
|
|
5276
|
+
break;
|
|
5277
|
+
}
|
|
5278
|
+
if (i === 0 && isAuthProcessingText(text)) {
|
|
5279
|
+
steps.push("OAuth: session is provisioning (auth-processing screen) — holding, not touching the page.");
|
|
5280
|
+
}
|
|
5281
|
+
await this.browser.wait(2);
|
|
5282
|
+
}
|
|
5283
|
+
const settledUrl = this.browser.currentUrl();
|
|
5284
|
+
steps.push(`OAuth: waited for the callback to settle — now at ${pathOf(settledUrl)}` +
|
|
5285
|
+
(settled ? " (redirected to the app)" : " (still login/processing-shaped)"));
|
|
5286
|
+
}
|
|
5287
|
+
// Dead-route escape. The OAuth often returns to the SIGNUP url it
|
|
5288
|
+
// started from (northflank: app.northflank.com/signup). For an account
|
|
5289
|
+
// that now EXISTS, /signup (and /login, /register…) is a dead route the
|
|
5290
|
+
// SPA can't render — it hangs on a "Connecting" shell forever and the
|
|
5291
|
+
// post-verify planner reads it as "signed out." Navigating to the app
|
|
5292
|
+
// ORIGIN ROOT lets the service redirect an authenticated user to its
|
|
5293
|
+
// real dashboard. Generalizes: a service already on its dashboard has a
|
|
5294
|
+
// non-auth path here and is left alone.
|
|
5295
|
+
if (isSignupOrLoginRoute(this.browser.currentUrl())) {
|
|
5296
|
+
const root = originRoot(this.browser.currentUrl());
|
|
5297
|
+
if (root !== null) {
|
|
5298
|
+
steps.push(`OAuth: post-auth landing is a signup/login route (${pathOf(this.browser.currentUrl())}) — ` +
|
|
5299
|
+
`navigating to the app root (${root}) so the service routes us to the dashboard.`);
|
|
5300
|
+
try {
|
|
5301
|
+
await this.browser.goto(root);
|
|
5302
|
+
await this.browser.wait(2);
|
|
5303
|
+
}
|
|
5304
|
+
catch {
|
|
5305
|
+
// navigation hiccup — the post-verify loop re-reads regardless.
|
|
5306
|
+
}
|
|
5307
|
+
}
|
|
5308
|
+
}
|
|
4118
5309
|
await saveDebugSnapshot(this.browser, "oauth-post-consent");
|
|
4119
5310
|
steps.push(`OAuth: signed in via ${provider.label} — driving post-OAuth onboarding to the API key`);
|
|
5311
|
+
// amplitude class — OAuth drops the bot into the service's READ-ONLY DEMO
|
|
5312
|
+
// sandbox (app.amplitude.com/analytics/demo) instead of a real account: it
|
|
5313
|
+
// has NO API key, and the only route to a real org is the prominent
|
|
5314
|
+
// "Create a free account" CTA, which opens the real /signup form. Detect
|
|
5315
|
+
// the demo state and click that CTA, then re-route to form-fill (the
|
|
5316
|
+
// email/name/password form the bot now completes, multi-step password
|
|
5317
|
+
// included). MEASURED 2026-06-04: without this the post-verify loop hunts
|
|
5318
|
+
// the demo for a key that isn't there → oauth_onboarding_failed.
|
|
5319
|
+
{
|
|
5320
|
+
await this.browser.wait(2); // let the post-OAuth redirect settle onto the demo
|
|
5321
|
+
const demoState = await this.browser.getState();
|
|
5322
|
+
const demoText = await this.browser.extractText().catch(() => "");
|
|
5323
|
+
if (isSandboxDemoState(demoState.url, demoText)) {
|
|
5324
|
+
const cta = findCreateAccountCta(await this.browser.extractInteractiveElements());
|
|
5325
|
+
if (cta !== null) {
|
|
5326
|
+
steps.push(`OAuth: landed in ${task.service}'s read-only demo sandbox ` +
|
|
5327
|
+
`(${pathOf(demoState.url)}) — clicking ` +
|
|
5328
|
+
`"${(cta.visibleText ?? "Create a free account").trim()}" to escape into ` +
|
|
5329
|
+
`the real signup form.`);
|
|
5330
|
+
try {
|
|
5331
|
+
await this.browser.click(cta.selector);
|
|
5332
|
+
await this.browser.wait(2);
|
|
5333
|
+
}
|
|
5334
|
+
catch (err) {
|
|
5335
|
+
steps.push(`OAuth: demo-escape click threw (${err instanceof Error ? err.message : String(err)}) — ` +
|
|
5336
|
+
`falling back to form-fill anyway.`);
|
|
5337
|
+
}
|
|
5338
|
+
return OAUTH_FALL_BACK_TO_FORM_FILL;
|
|
5339
|
+
}
|
|
5340
|
+
}
|
|
5341
|
+
}
|
|
4120
5342
|
// rc.20 — login-loop detection. Services like Groq complete the
|
|
4121
5343
|
// Google OAuth handshake server-side but redirect back to a
|
|
4122
5344
|
// login-looking page (/authenticate) where the user has to click
|
|
@@ -4138,6 +5360,43 @@ export class SignupAgent {
|
|
|
4138
5360
|
const postOAuthState = await this.browser.getState();
|
|
4139
5361
|
const postOAuthInv = await this.buildInventory(steps, [provider.id]);
|
|
4140
5362
|
const loopBtn = isLoginLoopState(postOAuthState.url, postOAuthInv, provider.id);
|
|
5363
|
+
// amplitude class — post-OAuth we're STUCK on a login page (the provider
|
|
5364
|
+
// button is still present, or the URL is a login route) that carries an
|
|
5365
|
+
// in-page SIGNUP CTA. Google signed in fine, but the service has no
|
|
5366
|
+
// account/org for this identity and expects us to CREATE one via the
|
|
5367
|
+
// page's "Don't have an account? Sign up for free" link. The naive
|
|
5368
|
+
// loopBtn path below would re-trigger OAuth and loop until
|
|
5369
|
+
// oauth_loop_detected. Instead: click the signup CTA and re-route into
|
|
5370
|
+
// the email/password signup path (same sentinel the detectGoogleNoAccount
|
|
5371
|
+
// gate uses ~40 lines below). CONSERVATIVE: only fires in the STUCK state
|
|
5372
|
+
// (loopBtn or a login URL) and only when the page is NOT already a signup
|
|
5373
|
+
// form, so a dashboard that successfully landed but carries a stray
|
|
5374
|
+
// signup link is untouched, and a service that legitimately needs a
|
|
5375
|
+
// second OAuth click (no signup CTA) falls through. NOTE: gate on
|
|
5376
|
+
// classify !== "signup", NOT === "login": amplitude's Org-Login SSO page
|
|
5377
|
+
// has no password field, so classifySignupHtml returns "other".
|
|
5378
|
+
if ((loopBtn !== null || isLoginPageUrl(postOAuthState.url)) &&
|
|
5379
|
+
classifySignupHtml(postOAuthState.html) !== "signup") {
|
|
5380
|
+
const signupCta = findSignupCtaElement(postOAuthInv);
|
|
5381
|
+
if (signupCta !== null) {
|
|
5382
|
+
const ctaText = (signupCta.visibleText ??
|
|
5383
|
+
signupCta.ariaLabel ??
|
|
5384
|
+
"sign up").trim();
|
|
5385
|
+
steps.push(`Post-OAuth: ${task.service} shows a login page with a signup CTA ("${ctaText}") — ` +
|
|
5386
|
+
`${provider.label} identity has no account; clicking signup to create one.`);
|
|
5387
|
+
try {
|
|
5388
|
+
await this.browser.click(signupCta.selector);
|
|
5389
|
+
await this.browser.wait(2);
|
|
5390
|
+
}
|
|
5391
|
+
catch (err) {
|
|
5392
|
+
steps.push(`Post-OAuth: clicking the signup CTA threw (${err instanceof Error ? err.message : String(err)}) — ` +
|
|
5393
|
+
`falling back to form-fill anyway.`);
|
|
5394
|
+
}
|
|
5395
|
+
// Re-route into the email/password signup path: runSignup catches
|
|
5396
|
+
// this sentinel and re-runs form-fill on the now-signup page.
|
|
5397
|
+
return OAUTH_FALL_BACK_TO_FORM_FILL;
|
|
5398
|
+
}
|
|
5399
|
+
}
|
|
4141
5400
|
if (loopBtn !== null) {
|
|
4142
5401
|
steps.push(`Post-OAuth: landed on a login-like page (${pathOf(postOAuthState.url)}) ` +
|
|
4143
5402
|
`with a ${provider.label} sign-in button still visible — service requires a ` +
|
|
@@ -4179,6 +5438,20 @@ export class SignupAgent {
|
|
|
4179
5438
|
const gateState = await this.browser.getState();
|
|
4180
5439
|
const gateText = await this.browser.extractText().catch(() => "");
|
|
4181
5440
|
const gateInv = postOAuthInv;
|
|
5441
|
+
// (a0) Google-login-only / no-account (plunk class). OAuth
|
|
5442
|
+
// completed but the service bounced back saying this Google
|
|
5443
|
+
// identity has no account (e.g. plunk's
|
|
5444
|
+
// /auth/login?message=No%20account%20found…). MUST run before the
|
|
5445
|
+
// manual-login-fallback gate below — this page IS a /login form, so
|
|
5446
|
+
// detectManualLoginFallback would otherwise swallow it as
|
|
5447
|
+
// oauth_session_not_persisted and abort. The account simply needs
|
|
5448
|
+
// creating via email, so re-route to form-fill instead of bailing.
|
|
5449
|
+
if (detectGoogleNoAccount(gateState.url, gateText)) {
|
|
5450
|
+
steps.push(`OAuth: ${provider.label} sign-in succeeded but ${task.service} has no account for ` +
|
|
5451
|
+
`this identity (login-only OAuth, ${pathOf(gateState.url)}) — abandoning OAuth and ` +
|
|
5452
|
+
`falling back to email/password signup to create the account.`);
|
|
5453
|
+
return OAUTH_FALL_BACK_TO_FORM_FILL;
|
|
5454
|
+
}
|
|
4182
5455
|
// (a) Manual-login fallback (DigitalOcean, Hyperbolic). Service
|
|
4183
5456
|
// dropped the OAuth session and rendered a /login form with
|
|
4184
5457
|
// email + password inputs. Bot can't manually log in.
|
|
@@ -4641,7 +5914,12 @@ ${formatInventory(input.inventory)}`,
|
|
|
4641
5914
|
// email rather than "verify", and that broader matcher catches both.
|
|
4642
5915
|
async waitForVerificationEmail(inbox, alias, totalSeconds) {
|
|
4643
5916
|
const deadline = Date.now() + totalSeconds * 1000;
|
|
4644
|
-
|
|
5917
|
+
// `verif` (not `verify`) so the matcher also catches "verification" —
|
|
5918
|
+
// "verification" does NOT contain the substring "verify" (…ifi… vs
|
|
5919
|
+
// …ify), which silently dropped plausible's "4011 is your Plausible
|
|
5920
|
+
// email verification code" and timed the whole signup out. `code` /
|
|
5921
|
+
// `one[- ]?time` / `otp` catch code-based verification subjects too.
|
|
5922
|
+
const pattern = /verif|confirm|welcome|activate|complete|finish|set\s*up|\bcode\b|one[\s-]?time|\botp\b|sign[\s-]?up/i;
|
|
4645
5923
|
let lastErr = null;
|
|
4646
5924
|
while (Date.now() < deadline) {
|
|
4647
5925
|
const remainingSeconds = Math.max(1, Math.floor((deadline - Date.now()) / 1000));
|
|
@@ -4665,6 +5943,36 @@ ${formatInventory(input.inventory)}`,
|
|
|
4665
5943
|
}
|
|
4666
5944
|
throw lastErr ?? new Error("verification email did not arrive in time");
|
|
4667
5945
|
}
|
|
5946
|
+
// Code-based email verification (plausible: "Enter 4011 to verify your
|
|
5947
|
+
// email address"). The signup email carried a numeric code and no
|
|
5948
|
+
// clickable link, and the page transitioned to a code-input step after
|
|
5949
|
+
// submit. Seed the post-verify planner with the code so it fills the
|
|
5950
|
+
// input + clicks Verify, then drives on to the API key. Generalizes to
|
|
5951
|
+
// every service that verifies by emailed code rather than link.
|
|
5952
|
+
async enterEmailVerificationCode(code, task, password, steps) {
|
|
5953
|
+
if (code.length === 0) {
|
|
5954
|
+
steps.push("Verification email exposed a code field but it was empty — skipping.");
|
|
5955
|
+
return {};
|
|
5956
|
+
}
|
|
5957
|
+
steps.push(`Email carries a verification CODE (${code}) and no link — entering it on the page.`);
|
|
5958
|
+
// The post-submit "enter code" view may still be hydrating.
|
|
5959
|
+
await this.browser.waitForFormReady();
|
|
5960
|
+
const hint = `Email verification code retrieved: "${code}". The current page has a ` +
|
|
5961
|
+
`verification-code / OTP input (placeholder like "Code" / "Verification code", ` +
|
|
5962
|
+
`or several single-digit boxes — fill the FIRST and the browser auto-distributes). ` +
|
|
5963
|
+
`Issue {"kind":"fill","selector":"…","value":"${code}"} on it, then NEXT round click ` +
|
|
5964
|
+
`the Verify / Confirm / Continue / Submit button.`;
|
|
5965
|
+
return this.postVerifyLoop({
|
|
5966
|
+
service: task.service,
|
|
5967
|
+
credentials: { email: task.email, password },
|
|
5968
|
+
maxRounds: task.postVerifyMaxRounds ?? 6,
|
|
5969
|
+
steps,
|
|
5970
|
+
initialHint: hint,
|
|
5971
|
+
...(task.scopeHint !== undefined ? { scopeHint: task.scopeHint } : {}),
|
|
5972
|
+
...(task.machineToken !== undefined ? { machineToken: task.machineToken } : {}),
|
|
5973
|
+
...(task.apiBase !== undefined ? { apiBase: task.apiBase } : {}),
|
|
5974
|
+
});
|
|
5975
|
+
}
|
|
4668
5976
|
// Drive the browser toward the API key after the account exists —
|
|
4669
5977
|
// used by BOTH the email-verification path and the OAuth path (T9).
|
|
4670
5978
|
// Each round asks Claude what to do next given the current page; we
|
|
@@ -5047,6 +6355,16 @@ ${formatInventory(input.inventory)}`,
|
|
|
5047
6355
|
// so the loop can bail with oauth_session_not_persisted instead of
|
|
5048
6356
|
// thrashing maxRounds and mislabeling it oauth_onboarding_failed.
|
|
5049
6357
|
let oauthLoginRequests = 0;
|
|
6358
|
+
// Consecutive rounds on an OAuth run where the page is STILL a login /
|
|
6359
|
+
// authenticate screen. The planner usually doesn't return {"kind":
|
|
6360
|
+
// "login"} here — it keeps CLICKING "Sign in with Google" (groq,
|
|
6361
|
+
// northflank, amplitude), so the oauthLoginRequests counter above
|
|
6362
|
+
// never trips. But the structural fact is decisive and service-
|
|
6363
|
+
// agnostic: after OAuth, an authenticated bot is on a dashboard, not a
|
|
6364
|
+
// login page. N consecutive login-page rounds ⇒ the callback never
|
|
6365
|
+
// persisted (anti-bot/IP rejection) ⇒ oauth_session_not_persisted, not
|
|
6366
|
+
// a navigation bug. Generalizes without per-service URLs.
|
|
6367
|
+
let consecutiveOauthLoginPageRounds = 0;
|
|
5050
6368
|
let planFailures = 0;
|
|
5051
6369
|
// 0.8.2-rc.6 — separate counter for upstream-blip retries. Doesn't
|
|
5052
6370
|
// gate planFailures (so a transient 502 won't push us into the
|
|
@@ -5063,7 +6381,7 @@ ${formatInventory(input.inventory)}`,
|
|
|
5063
6381
|
// truncated (the S3-class trap: the planner sees a key-shaped
|
|
5064
6382
|
// string and keeps asking to extract it forever), or when the
|
|
5065
6383
|
// planner's last step was rejected.
|
|
5066
|
-
let hint;
|
|
6384
|
+
let hint = args.initialHint;
|
|
5067
6385
|
// rc.27 — when the email_otp gate handler retrieved a code from
|
|
5068
6386
|
// the operator's gmail, seed the FIRST round's hint with the
|
|
5069
6387
|
// code + explicit fill+submit instructions. Cleared after one
|
|
@@ -5116,6 +6434,14 @@ ${formatInventory(input.inventory)}`,
|
|
|
5116
6434
|
// navigate produced no progress. Inject a hint forcing a CLICK
|
|
5117
6435
|
// on something visible in the current inventory.
|
|
5118
6436
|
let prevNavigateFromUrl = null;
|
|
6437
|
+
// Stalled-wizard breaker. Tracks a content signature of the page +
|
|
6438
|
+
// the effect of each executed action, so we can detect an onboarding
|
|
6439
|
+
// wizard that re-presents itself (clicks don't register) and break
|
|
6440
|
+
// out instead of burning every round on it. See isStalledOnActions.
|
|
6441
|
+
let prevContentSig = null;
|
|
6442
|
+
let lastActionKind = null;
|
|
6443
|
+
let lastActionSelector = null;
|
|
6444
|
+
const actionEffects = [];
|
|
5119
6445
|
// 0.8.2-rc.10 — escalation for the stuck-loop detector.
|
|
5120
6446
|
//
|
|
5121
6447
|
// The existing detector injects a re-plan hint when the planner
|
|
@@ -5183,6 +6509,10 @@ ${formatInventory(input.inventory)}`,
|
|
|
5183
6509
|
// Gate URLs we've already polled the operator's gmail for, so a
|
|
5184
6510
|
// multi-round wait on the same email-OTP page doesn't re-poll.
|
|
5185
6511
|
const otpPolledUrls = new Set();
|
|
6512
|
+
// Running summary of the steps the planner has taken, fed back into
|
|
6513
|
+
// each planPostVerifyStep call so the (stateless) planner stops
|
|
6514
|
+
// re-doing completed onboarding steps and re-navigating dead URLs.
|
|
6515
|
+
const priorActions = [];
|
|
5186
6516
|
for (let round = 0; round < args.maxRounds; round++) {
|
|
5187
6517
|
const currentCredentialKeyCount = Object.keys(credentials).filter((k) => !NON_CREDENTIAL_KEYS.has(k)).length;
|
|
5188
6518
|
if (currentCredentialKeyCount > lastCredentialKeyCount) {
|
|
@@ -5254,6 +6584,125 @@ ${formatInventory(input.inventory)}`,
|
|
|
5254
6584
|
await this.browser.wait(2);
|
|
5255
6585
|
continue;
|
|
5256
6586
|
}
|
|
6587
|
+
// clerk class — Google account chooser inside the post-verify loop.
|
|
6588
|
+
// The planner re-clicked "Sign in with Google", which opened
|
|
6589
|
+
// accounts.google.com's chooser (.../accountchooser?...). That page
|
|
6590
|
+
// carries a stray "Loading" label (so the hydration guard below would
|
|
6591
|
+
// burn all its ticks idling) and tryClickGoogleChooserCard is only
|
|
6592
|
+
// wired into runOAuthFlow — so nothing here clicks the account card.
|
|
6593
|
+
// Detect the chooser by URL or its "Choose an account" copy, click
|
|
6594
|
+
// the card to continue OAuth, then skip the rest of this round's
|
|
6595
|
+
// planning (the next round re-reads the post-chooser page).
|
|
6596
|
+
const chooserText = await this.browser.extractText().catch(() => "");
|
|
6597
|
+
if (/accounts\.google\.com\/.*(accountchooser|chooseaccount|oauthchooseaccount)/i.test(state.url) ||
|
|
6598
|
+
/choose an account/i.test(chooserText)) {
|
|
6599
|
+
await this.tryClickGoogleChooserCard();
|
|
6600
|
+
args.steps.push(`Post-verify round ${round}: Google account chooser — clicked the account card to continue OAuth`);
|
|
6601
|
+
await this.browser.wait(2);
|
|
6602
|
+
try {
|
|
6603
|
+
[state, inventory] = await Promise.all([
|
|
6604
|
+
this.browser.getState(),
|
|
6605
|
+
this.buildInventory(args.steps, undefined, 80),
|
|
6606
|
+
]);
|
|
6607
|
+
}
|
|
6608
|
+
catch {
|
|
6609
|
+
// mid-navigation read after the card click — the next round
|
|
6610
|
+
// re-reads, so just fall through to it.
|
|
6611
|
+
}
|
|
6612
|
+
continue;
|
|
6613
|
+
}
|
|
6614
|
+
// SPA hydration guard. A post-OAuth dashboard (northflank's
|
|
6615
|
+
// /settings/access-tokens, PostHog) can render a "Connecting"/loading
|
|
6616
|
+
// shell while its JS bundle + websocket finish — slow over a
|
|
6617
|
+
// residential tunnel. The shell often carries a stray element or two
|
|
6618
|
+
// (a logo link, the <noscript>), so gating on an EMPTY inventory
|
|
6619
|
+
// misses it; the loading-shell TEXT is the authoritative "not yet
|
|
6620
|
+
// rendered" signal. Wait while that text persists, then proceed with
|
|
6621
|
+
// whatever's there (an honest "still a shell" beats a premature done —
|
|
6622
|
+
// and if the SPA never hydrates, e.g. a blocked websocket, the bound
|
|
6623
|
+
// keeps us from hanging).
|
|
6624
|
+
//
|
|
6625
|
+
// Budget = 6x3s = 18s. MEASURED: a dashboard SPA gated on a websocket
|
|
6626
|
+
// (northflank's wss://platform.northflank.com/websocket) hydrates in
|
|
6627
|
+
// ~12-15s over the tunnel. A larger budget BACKFIRES on a page that
|
|
6628
|
+
// will NEVER hydrate (e.g. an authed user stranded on /signup): the
|
|
6629
|
+
// wait re-runs every round and burns the 600s run cap. The escape for
|
|
6630
|
+
// a never-hydrating route is navigate-to-root post-OAuth, not a longer
|
|
6631
|
+
// wait here.
|
|
6632
|
+
//
|
|
6633
|
+
// ADAPTIVE exception (MEASURED 2026-06-04, clerk): an OAuth/SSO
|
|
6634
|
+
// CALLBACK route does a token exchange that renders even slower than a
|
|
6635
|
+
// plain dashboard — clerk's `/sign-in/sso-callback` outlasts 18s and
|
|
6636
|
+
// the bot bailed at the edge with `oauth_session_not_persisted`. On a
|
|
6637
|
+
// callback route the SPA IS making progress, so 12x3s = 36s of
|
|
6638
|
+
// patience is warranted; everywhere else the 6-tick budget holds so a
|
|
6639
|
+
// genuinely-stuck route still hits the navigate-to-root escape fast.
|
|
6640
|
+
// Read the URL fresh each round (it may redirect off the callback).
|
|
6641
|
+
const HYDRATION_TICKS = isOAuthCallbackRoute(state.url) ? 12 : 6;
|
|
6642
|
+
for (let hydrationWait = 0; hydrationWait < HYDRATION_TICKS &&
|
|
6643
|
+
isLoadingShellText(await this.browser.extractText().catch(() => "")); hydrationWait++) {
|
|
6644
|
+
args.steps.push(`Post-verify round ${round}: ${pathOf(state.url)} is a loading shell ` +
|
|
6645
|
+
`(hydration wait ${hydrationWait + 1}/${HYDRATION_TICKS}) — waiting for the SPA to render`);
|
|
6646
|
+
await this.browser.wait(3);
|
|
6647
|
+
try {
|
|
6648
|
+
[state, inventory] = await Promise.all([
|
|
6649
|
+
this.browser.getState(),
|
|
6650
|
+
this.buildInventory(args.steps, undefined, 80),
|
|
6651
|
+
]);
|
|
6652
|
+
}
|
|
6653
|
+
catch {
|
|
6654
|
+
// mid-navigation read — keep the prior state/inventory and let
|
|
6655
|
+
// the next hydration tick (or the planner) retry.
|
|
6656
|
+
}
|
|
6657
|
+
}
|
|
6658
|
+
// Stalled-wizard breaker. Build a content signature (URL + each
|
|
6659
|
+
// inventory element's selector + label) and judge whether the
|
|
6660
|
+
// PREVIOUS executed action changed the page. If the last few
|
|
6661
|
+
// page-mutating actions all left the page identical, a wizard is
|
|
6662
|
+
// re-presenting itself and clicking it does nothing — stop here so
|
|
6663
|
+
// we don't waste the remaining rounds + LLM budget. (axiom: 4×
|
|
6664
|
+
// role-card re-clicks that never advanced.)
|
|
6665
|
+
const contentSig = (state.url +
|
|
6666
|
+
"§" +
|
|
6667
|
+
inventory
|
|
6668
|
+
.map((e) => `${e.selector}·${(e.visibleText ?? e.ariaLabel ?? "").slice(0, 24)}`)
|
|
6669
|
+
.join("|")).slice(0, 4000);
|
|
6670
|
+
const pageUnchanged = prevContentSig !== null && contentSig === prevContentSig;
|
|
6671
|
+
if (lastActionKind !== null) {
|
|
6672
|
+
actionEffects.push({ kind: lastActionKind, pageUnchanged, selector: lastActionSelector });
|
|
6673
|
+
}
|
|
6674
|
+
prevContentSig = contentSig;
|
|
6675
|
+
if (isStalledOnActions(actionEffects)) {
|
|
6676
|
+
args.steps.push(`Post-verify: STALLED — the last 3 page-mutating actions left the page ` +
|
|
6677
|
+
`identical (${state.url}). An onboarding wizard is re-presenting itself ` +
|
|
6678
|
+
`(clicks not registering); giving up instead of burning the round budget.`);
|
|
6679
|
+
break;
|
|
6680
|
+
}
|
|
6681
|
+
// Non-persisting-OAuth detector (A5, broadened). On an OAuth run the
|
|
6682
|
+
// bot has ALREADY authenticated before this loop, so landing on a
|
|
6683
|
+
// login page means the callback was rejected. The planner usually
|
|
6684
|
+
// keeps clicking "Sign in with Google" rather than returning a
|
|
6685
|
+
// {"kind":"login"} step, so the oauthLoginRequests counter misses
|
|
6686
|
+
// it — track the structural fact (consecutive login-page rounds)
|
|
6687
|
+
// instead. Generalizes across services (groq/northflank/amplitude)
|
|
6688
|
+
// without per-service URLs; reclassifies these off the misleading
|
|
6689
|
+
// oauth_onboarding_failed label into the truthful (and unwinnable-
|
|
6690
|
+
// without-residential-egress) oauth_session_not_persisted wall.
|
|
6691
|
+
if (args.credentials === undefined && isLoginPageUrl(state.url)) {
|
|
6692
|
+
consecutiveOauthLoginPageRounds += 1;
|
|
6693
|
+
if (consecutiveOauthLoginPageRounds >= 3) {
|
|
6694
|
+
args.steps.push(`Post-verify: OAuth run still on a login page (${pathOf(state.url)}) for ` +
|
|
6695
|
+
`${consecutiveOauthLoginPageRounds} rounds — the OAuth callback never persisted; bailing.`);
|
|
6696
|
+
throw new OAuthSessionNotPersistedError(`oauth_session_not_persisted: signed in to ${args.service} via OAuth but the page ` +
|
|
6697
|
+
`still presents a login screen (${pathOf(state.url)}) after ` +
|
|
6698
|
+
`${consecutiveOauthLoginPageRounds} rounds — the OAuth callback never established a ` +
|
|
6699
|
+
`session (anti-bot / IP rejection of the callback). Not a navigation bug; needs ` +
|
|
6700
|
+
`residential egress or manual signup.`);
|
|
6701
|
+
}
|
|
6702
|
+
}
|
|
6703
|
+
else {
|
|
6704
|
+
consecutiveOauthLoginPageRounds = 0;
|
|
6705
|
+
}
|
|
5257
6706
|
// Email-OTP gate that surfaced AFTER OAuth (the pre-OAuth signup
|
|
5258
6707
|
// gate never saw it, so pendingOtpCode is unset). Convex's
|
|
5259
6708
|
// radar-challenge sends a 6-digit code to the operator's Google
|
|
@@ -5301,6 +6750,7 @@ ${formatInventory(input.inventory)}`,
|
|
|
5301
6750
|
inventory,
|
|
5302
6751
|
...(hint !== undefined ? { hint } : {}),
|
|
5303
6752
|
...(args.scopeHint !== undefined ? { scopeHint: args.scopeHint } : {}),
|
|
6753
|
+
...(priorActions.length > 0 ? { priorActions: priorActions.slice(-10) } : {}),
|
|
5304
6754
|
});
|
|
5305
6755
|
}
|
|
5306
6756
|
catch (err) {
|
|
@@ -5361,6 +6811,17 @@ ${formatInventory(input.inventory)}`,
|
|
|
5361
6811
|
// GitHub issue, leaking the credential. Redactor patterns mirror
|
|
5362
6812
|
// tools/archived-harvester/redact.mjs — defense in depth.
|
|
5363
6813
|
args.steps.push(`Post-verify ${round + 1}/${args.maxRounds}: ${nextStep.kind} — ${redactCredentials(nextStep.reason)}`);
|
|
6814
|
+
// Feed this action back into the next round's planner context so it
|
|
6815
|
+
// doesn't loop. Concise: where we were, what we did, why.
|
|
6816
|
+
{
|
|
6817
|
+
const where = state.url.replace(/^https?:\/\//, "").slice(0, 40);
|
|
6818
|
+
const target = "selector" in nextStep && nextStep.selector !== undefined
|
|
6819
|
+
? ` ${nextStep.selector.slice(0, 24)}`
|
|
6820
|
+
: "url" in nextStep && nextStep.url !== undefined
|
|
6821
|
+
? ` →${nextStep.url.replace(/^https?:\/\//, "").slice(0, 36)}`
|
|
6822
|
+
: "";
|
|
6823
|
+
priorActions.push(`@${where} ${nextStep.kind}${target}: ${redactCredentials(nextStep.reason).slice(0, 60)}`);
|
|
6824
|
+
}
|
|
5364
6825
|
// Dump this round's real page state + inventory in the E1
|
|
5365
6826
|
// eval-corpus format so onboarding adapters can be iterated
|
|
5366
6827
|
// offline without re-running the rate-limited OAuth handshake.
|
|
@@ -5726,6 +7187,14 @@ ${formatInventory(input.inventory)}`,
|
|
|
5726
7187
|
prevSignature = null;
|
|
5727
7188
|
prevInventorySize = inventory.length;
|
|
5728
7189
|
}
|
|
7190
|
+
// Record the kind of the step we're ABOUT to execute (all re-plan
|
|
7191
|
+
// `continue` guards are behind us here) so next round can judge
|
|
7192
|
+
// whether it changed the page — the stalled-wizard breaker above.
|
|
7193
|
+
lastActionKind = nextStep.kind;
|
|
7194
|
+
lastActionSelector =
|
|
7195
|
+
"selector" in nextStep && typeof nextStep.selector === "string"
|
|
7196
|
+
? nextStep.selector
|
|
7197
|
+
: null;
|
|
5729
7198
|
if (nextStep.kind === "done") {
|
|
5730
7199
|
// When the planner bails because it encountered Google's
|
|
5731
7200
|
// device-verification challenge mid-post-verify (Algolia +
|
|
@@ -6378,7 +7847,23 @@ Schema:
|
|
|
6378
7847
|
invent or guess a selector — one not in the inventory is rejected.
|
|
6379
7848
|
- If the element you want is NOT in the inventory, use {"kind":"navigate"}
|
|
6380
7849
|
to a likely settings URL instead of guessing a selector.
|
|
6381
|
-
|
|
7850
|
+
${input.priorActions !== undefined && input.priorActions.length > 0
|
|
7851
|
+
? `
|
|
7852
|
+
STEPS ALREADY TAKEN this session (most recent last). You plan ONE step
|
|
7853
|
+
at a time and do not otherwise remember earlier rounds — use this list
|
|
7854
|
+
so you do NOT loop:
|
|
7855
|
+
${input.priorActions.map((a, i) => ` ${i + 1}. ${a}`).join("\n")}
|
|
7856
|
+
- Do NOT repeat a completed onboarding-wizard step. If you already
|
|
7857
|
+
selected a role / company-size / use-case or accepted the terms, that
|
|
7858
|
+
step is DONE — move forward, never back to it.
|
|
7859
|
+
- Do NOT re-issue a {"kind":"navigate"} to a URL that already appears
|
|
7860
|
+
above and did not advance you. If a settings URL errored or bounced
|
|
7861
|
+
you back, try a DIFFERENT path or click a dashboard link instead.
|
|
7862
|
+
- If the last 3+ steps above are the same kind on the same URL with no
|
|
7863
|
+
progress, you are stuck — try a genuinely different action or return
|
|
7864
|
+
{"kind":"done"}.
|
|
7865
|
+
`
|
|
7866
|
+
: ""}
|
|
6382
7867
|
Strategy:
|
|
6383
7868
|
- If a FULL, untruncated API key is visible, return {"kind":"extract"}.
|
|
6384
7869
|
- **MULTI-CREDENTIAL SERVICES** — when the page shows TWO OR MORE
|
|
@@ -6390,10 +7875,13 @@ Strategy:
|
|
|
6390
7875
|
labels EVERY visible credential in the format
|
|
6391
7876
|
\`<canonical_label>='<value>'\` (use SINGLE quotes around values).
|
|
6392
7877
|
The bot's labeled-extractor will pull EACH labeled value into the
|
|
6393
|
-
credentials object. Example
|
|
6394
|
-
|
|
6395
|
-
|
|
6396
|
-
a
|
|
7878
|
+
credentials object. Example SHAPE (the bracketed parts are
|
|
7879
|
+
PLACEHOLDERS — you MUST substitute the REAL values visible on the
|
|
7880
|
+
CURRENT page; NEVER emit these literal bracket strings or any example
|
|
7881
|
+
values, and never name a service that is not the one you are on):
|
|
7882
|
+
"The API Keys page shows cloud_name='<real cloud_name from this page>'
|
|
7883
|
+
and api_key='<real api_key from this page>' in the table; api_secret
|
|
7884
|
+
is hidden behind a Reveal button."
|
|
6397
7885
|
Use the standard canonical labels: api_key, api_secret, secret_key,
|
|
6398
7886
|
publishable_key, access_token, client_id, client_secret, cloud_name,
|
|
6399
7887
|
application_id, admin_api_key, search_api_key, account_sid,
|
|
@@ -6411,10 +7899,11 @@ Strategy:
|
|
|
6411
7899
|
behind a Reveal button, return {"kind":"extract"} NOW for the
|
|
6412
7900
|
visible labels (the bot's labeled extractor folds them into the
|
|
6413
7901
|
credentials bundle) AND in the same reason field flag the masked
|
|
6414
|
-
credential so the bot's automatic reveal pass fires. Example
|
|
6415
|
-
|
|
6416
|
-
|
|
6417
|
-
|
|
7902
|
+
credential so the bot's automatic reveal pass fires. Example SHAPE
|
|
7903
|
+
(substitute the REAL values from the current page — the bracketed
|
|
7904
|
+
parts are placeholders, never emit them literally): "cloud_name='<real
|
|
7905
|
+
value>' and api_key='<real value>' are visible in the table;
|
|
7906
|
+
api_secret is hidden behind a Reveal button — please unmask." The masked
|
|
6418
7907
|
credential's label MUST appear with one of the trigger words
|
|
6419
7908
|
(masked / hidden / reveal / unmask / bullets / asterisks) so the
|
|
6420
7909
|
reveal pass triggers. Do this BEFORE attempting any explicit
|
|
@@ -6430,9 +7919,17 @@ Strategy:
|
|
|
6430
7919
|
capture whatever IS visible (even if just a cloud_name with no
|
|
6431
7920
|
api_secret) and return the partial bundle to the caller, which is
|
|
6432
7921
|
more useful than five wasted rounds of clicking a dead reveal.
|
|
6433
|
-
- To reach API keys,
|
|
6434
|
-
|
|
6435
|
-
|
|
7922
|
+
- To reach API keys, PREFER clicking a visible "API Keys" / "Tokens" /
|
|
7923
|
+
"Developer" / "Settings" link in the INVENTORY (a verified selector) — that
|
|
7924
|
+
always lands on the real page. Only use {"kind":"navigate"} to a GUESSED
|
|
7925
|
+
settings URL when NO such link is in the inventory, and NEVER guess the same
|
|
7926
|
+
URL twice. These pages usually live under user/ACCOUNT settings, not a
|
|
7927
|
+
project or workspace's settings.
|
|
7928
|
+
- **404 RECOVERY.** If the page is a 404 / "not found" / "page doesn't exist"
|
|
7929
|
+
/ "we couldn't find" (a guessed URL missed), do NOT retry it or guess
|
|
7930
|
+
another URL. {"kind":"navigate"} to the service's app ROOT/dashboard (the
|
|
7931
|
+
bare origin, e.g. https://app.<service>.com/) and find the API-keys link in
|
|
7932
|
+
the nav from there.
|
|
6436
7933
|
- **EXCEPT** when the page has a very small inventory (5 or fewer elements)
|
|
6437
7934
|
and one of them is an onboarding CTA — patterns like "Get started",
|
|
6438
7935
|
"Continue", "Activate", "Enable API", "Start free trial", "Set up".
|
|
@@ -6456,7 +7953,21 @@ Strategy:
|
|
|
6456
7953
|
"done" while a card-radio cluster is still visible.
|
|
6457
7954
|
${loginGuidance}
|
|
6458
7955
|
- If we're on a "verify your phone" / "verify email" wall, return done (we can't solve those).
|
|
6459
|
-
-
|
|
7956
|
+
- **EMPTY DASHBOARD — create the first resource.** Many services do NOT expose
|
|
7957
|
+
an API key until you create your first organization / project / cluster /
|
|
7958
|
+
database / service / workspace. If the dashboard shows NO existing resources
|
|
7959
|
+
(an empty state, "Create your first…", "No projects/clusters yet", "Get
|
|
7960
|
+
started by creating…", or just a lone "Create"/"New <resource>"/"+ New" CTA
|
|
7961
|
+
and nothing else useful), CLICK that CTA, then on the following rounds fill
|
|
7962
|
+
the minimal required fields (use a generated name like ts-<random> for
|
|
7963
|
+
name/slug fields, pick the first/free option for plans/regions) and confirm.
|
|
7964
|
+
The API-keys / tokens page appears only AFTER a resource exists. Do NOT
|
|
7965
|
+
return {"kind":"done"} or {"kind":"login"} on an empty dashboard while a
|
|
7966
|
+
create-resource CTA is visible — that is the path forward, not a dead end.
|
|
7967
|
+
- **Pre-filled fields are DONE — advance, don't re-touch.** If a required
|
|
7968
|
+
onboarding field (first name, company, email) is ALREADY populated, or a
|
|
7969
|
+
required selectable is ALREADY selected, do NOT re-fill/re-select it — click
|
|
7970
|
+
Continue / Next / Submit to move forward. Re-filling a satisfied field loops.
|
|
6460
7971
|
- For ANY dropdown — native (tag=select) OR a custom combobox (role=combobox / aria-haspopup=listbox, common on modern React apps like Sentry / Stripe / Vercel) — use {"kind":"select"}. "click" on a combobox trigger opens it but does not pick an option; do not click it repeatedly.
|
|
6461
7972
|
- When you need a SPECIFIC option from the dropdown — e.g. "Project: Read" on Sentry's permissions picker, or a specific region — include "option_text" with the visible label. The executor matches it case-insensitively as a substring. Omit "option_text" when any option is fine (a placeholder country picker).
|
|
6462
7973
|
- A post-OAuth onboarding form (organization name, region, terms) is normal — fill/select/check its fields and click Continue to advance toward the dashboard; do not return "done" just because it is a form.
|
|
@@ -6609,6 +8120,29 @@ ${formatInventory(input.inventory)}${input.hint !== undefined ? `\n\nIMPORTANT
|
|
|
6609
8120
|
// purpose — a "Continue with Google" / "Login with Google" /
|
|
6610
8121
|
// icon-only Google button all count when the bot has a
|
|
6611
8122
|
// provider session).
|
|
8123
|
+
// After a form submit, is the page a CONTINUATION step of the SAME signup
|
|
8124
|
+
// (amplitude's dedicated "Create your password" page is the canonical case)
|
|
8125
|
+
// rather than a dashboard, a credentials page, or a verify-your-email
|
|
8126
|
+
// screen? Returns a short label for the step trail, or null. Reused
|
|
8127
|
+
// fillValues already carry the password, so re-running planExecuteWithRetry
|
|
8128
|
+
// fills it. See isContinuationFormStep for the (conservative) signals.
|
|
8129
|
+
async detectContinuationFormStep() {
|
|
8130
|
+
let html = "";
|
|
8131
|
+
let url = "";
|
|
8132
|
+
let inventory;
|
|
8133
|
+
try {
|
|
8134
|
+
const state = await this.browser.getState();
|
|
8135
|
+
html = state.html;
|
|
8136
|
+
url = state.url;
|
|
8137
|
+
inventory = await this.browser.extractInteractiveElements();
|
|
8138
|
+
}
|
|
8139
|
+
catch {
|
|
8140
|
+
return null;
|
|
8141
|
+
}
|
|
8142
|
+
return isContinuationFormStep(html, inventory)
|
|
8143
|
+
? `password step at ${pathOf(url)}`
|
|
8144
|
+
: null;
|
|
8145
|
+
}
|
|
6612
8146
|
async looksLikeSignupPage() {
|
|
6613
8147
|
const state = await this.browser.getState();
|
|
6614
8148
|
// 1. URL-path shortcut. If we navigated to a signup-shaped path
|