@trusty-squire/mcp 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bot/agent.d.ts +125 -0
- package/dist/bot/agent.d.ts.map +1 -0
- package/dist/bot/agent.js +974 -0
- package/dist/bot/agent.js.map +1 -0
- package/dist/bot/asn.d.ts +15 -0
- package/dist/bot/asn.d.ts.map +1 -0
- package/dist/bot/asn.js +151 -0
- package/dist/bot/asn.js.map +1 -0
- package/dist/bot/browser.d.ts +70 -0
- package/dist/bot/browser.d.ts.map +1 -0
- package/dist/bot/browser.js +810 -0
- package/dist/bot/browser.js.map +1 -0
- package/dist/bot/debug.d.ts +3 -0
- package/dist/bot/debug.d.ts.map +1 -0
- package/dist/bot/debug.js +24 -0
- package/dist/bot/debug.js.map +1 -0
- package/dist/bot/inbox-client.d.ts +41 -0
- package/dist/bot/inbox-client.d.ts.map +1 -0
- package/dist/bot/inbox-client.js +68 -0
- package/dist/bot/inbox-client.js.map +1 -0
- package/dist/bot/index.d.ts +22 -0
- package/dist/bot/index.d.ts.map +1 -0
- package/dist/bot/index.js +83 -0
- package/dist/bot/index.js.map +1 -0
- package/dist/bot/llm-client.d.ts +75 -0
- package/dist/bot/llm-client.d.ts.map +1 -0
- package/dist/bot/llm-client.js +250 -0
- package/dist/bot/llm-client.js.map +1 -0
- package/dist/bot/prewarm-cache.d.ts +3 -0
- package/dist/bot/prewarm-cache.d.ts.map +1 -0
- package/dist/bot/prewarm-cache.js +91 -0
- package/dist/bot/prewarm-cache.js.map +1 -0
- package/dist/install/cli.d.ts +1 -1
- package/dist/install/cli.d.ts.map +1 -1
- package/dist/install/cli.js +1 -1
- package/dist/install/cli.js.map +1 -1
- package/dist/tools/provision-any.js +1 -1
- package/dist/tools/provision-any.js.map +1 -1
- package/dist/tools/wait-for-approval.d.ts +1 -1
- package/package.json +10 -7
|
@@ -0,0 +1,974 @@
|
|
|
1
|
+
// AI agent that controls browser navigation using Claude.
|
|
2
|
+
//
|
|
3
|
+
// Two responsibilities live here:
|
|
4
|
+
// 1. Plan how to fill a signup form (delegated to Claude — see analyzeSignupForm)
|
|
5
|
+
// 2. Execute the plan + handle the email-verification handshake
|
|
6
|
+
//
|
|
7
|
+
// The "plan" is a small JSON schema Claude emits, so this file stays a thin
|
|
8
|
+
// executor; the prompt is the contract. If a service breaks we tweak the
|
|
9
|
+
// prompt rather than threading service-specific logic through the agent.
|
|
10
|
+
import { saveDebugSnapshot } from "./debug.js";
|
|
11
|
+
import { wasRecentlyPrewarmed, recordPrewarmSuccess } from "./prewarm-cache.js";
|
|
12
|
+
import { pickLLMPair, } from "./llm-client.js";
|
|
13
|
+
// Hard cap on LLM calls per signup. A signup that runs away to 20+ calls
|
|
14
|
+
// is both expensive and almost certainly stuck in a planning loop. 15
|
|
15
|
+
// covers: 2 initial form plans, 1 re-plan pair on validation, plus 6
|
|
16
|
+
// post-verify rounds, with headroom. Override via env when debugging.
|
|
17
|
+
const MAX_LLM_CALLS_PER_SIGNUP = Number.parseInt(process.env.UNIVERSAL_BOT_MAX_LLM_CALLS ?? "15", 10);
|
|
18
|
+
// "Cheap mode" routes via OpenRouter to a budget vision model. Turn on
|
|
19
|
+
// when running large batches; turn off when you need maximum form-fill
|
|
20
|
+
// quality on tricky sites.
|
|
21
|
+
const PREFER_CHEAP_LLM = process.env.UNIVERSAL_BOT_PREFER_CHEAP === "true";
|
|
22
|
+
export class LLMCallBudgetExceeded extends Error {
|
|
23
|
+
constructor(budget) {
|
|
24
|
+
super(`signup exceeded LLM call budget of ${budget}`);
|
|
25
|
+
this.name = "LLMCallBudgetExceeded";
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
// The set of value_kinds the planner is allowed to emit. Kept as a
|
|
29
|
+
// runtime array so validation and the exhaustive `valueFor` switch
|
|
30
|
+
// share one source of truth.
|
|
31
|
+
const FILL_VALUE_KINDS = [
|
|
32
|
+
"email",
|
|
33
|
+
"password",
|
|
34
|
+
"name",
|
|
35
|
+
"username",
|
|
36
|
+
"company",
|
|
37
|
+
"literal",
|
|
38
|
+
];
|
|
39
|
+
// Parsers/validators live at module scope so callLLM() can pass them
|
|
40
|
+
// as plain functions. Each parser MUST throw on any malformed reply —
|
|
41
|
+
// the throw is the signal callLLM uses to trigger the premium-fallback
|
|
42
|
+
// retry. The model output is untrusted: every field an executor later
|
|
43
|
+
// reads (selector, value_kind, url, ...) is validated here so a
|
|
44
|
+
// `{kind:"fill"}` with no selector can never reach browser.type().
|
|
45
|
+
// Shared fence-strip + `{...}` extraction + JSON.parse. Both planners
|
|
46
|
+
// take the same raw LLM reply shape, so the boilerplate lives once.
|
|
47
|
+
function extractJsonObject(raw) {
|
|
48
|
+
const trimmed = raw.trim();
|
|
49
|
+
// Tolerate models that wrap their reply in markdown fences.
|
|
50
|
+
const fenced = trimmed.match(/```(?:json)?\s*([\s\S]+?)\s*```/);
|
|
51
|
+
const candidate = fenced !== null && fenced[1] !== undefined ? fenced[1] : trimmed;
|
|
52
|
+
const match = candidate.match(/\{[\s\S]*\}/);
|
|
53
|
+
if (match === null) {
|
|
54
|
+
throw new Error(`no JSON object in reply: ${raw.slice(0, 200)}`);
|
|
55
|
+
}
|
|
56
|
+
let parsed;
|
|
57
|
+
try {
|
|
58
|
+
parsed = JSON.parse(match[0]);
|
|
59
|
+
}
|
|
60
|
+
catch (err) {
|
|
61
|
+
throw new Error(`JSON.parse failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
62
|
+
}
|
|
63
|
+
if (typeof parsed !== "object" || parsed === null || Array.isArray(parsed)) {
|
|
64
|
+
throw new Error("top-level JSON is not an object");
|
|
65
|
+
}
|
|
66
|
+
// A plain object's own enumerable string keys are exactly
|
|
67
|
+
// Record<string, unknown> — no cast needed once we've ruled out
|
|
68
|
+
// null/array above.
|
|
69
|
+
const obj = {};
|
|
70
|
+
for (const [k, v] of Object.entries(parsed))
|
|
71
|
+
obj[k] = v;
|
|
72
|
+
return obj;
|
|
73
|
+
}
|
|
74
|
+
// Narrow `unknown` to a non-null object map.
|
|
75
|
+
function asObject(value, context) {
|
|
76
|
+
if (typeof value !== "object" || value === null || Array.isArray(value)) {
|
|
77
|
+
throw new Error(`${context}: expected an object`);
|
|
78
|
+
}
|
|
79
|
+
const obj = {};
|
|
80
|
+
for (const [k, v] of Object.entries(value))
|
|
81
|
+
obj[k] = v;
|
|
82
|
+
return obj;
|
|
83
|
+
}
|
|
84
|
+
// Narrow `unknown` to a non-empty string.
|
|
85
|
+
function requireString(obj, key, context) {
|
|
86
|
+
const v = obj[key];
|
|
87
|
+
if (typeof v !== "string" || v.length === 0) {
|
|
88
|
+
throw new Error(`${context}: missing or empty string field "${key}"`);
|
|
89
|
+
}
|
|
90
|
+
return v;
|
|
91
|
+
}
|
|
92
|
+
function isFillValueKind(value) {
|
|
93
|
+
// .some() over the readonly tuple avoids a widening cast: each
|
|
94
|
+
// element compares as the literal-union type against `value`.
|
|
95
|
+
return (typeof value === "string" &&
|
|
96
|
+
FILL_VALUE_KINDS.some((kind) => kind === value));
|
|
97
|
+
}
|
|
98
|
+
function validateFillAction(obj) {
|
|
99
|
+
const selector = requireString(obj, "selector", "fill action");
|
|
100
|
+
const valueKind = obj["value_kind"];
|
|
101
|
+
if (!isFillValueKind(valueKind)) {
|
|
102
|
+
throw new Error(`fill action: invalid value_kind ${JSON.stringify(valueKind)}`);
|
|
103
|
+
}
|
|
104
|
+
// `reason` is advisory only — tolerate it missing so a model that
|
|
105
|
+
// skips the field doesn't trigger an otherwise-pointless retry.
|
|
106
|
+
const reason = typeof obj["reason"] === "string" ? obj["reason"] : "";
|
|
107
|
+
const literal = typeof obj["literal"] === "string" ? obj["literal"] : undefined;
|
|
108
|
+
if (valueKind === "literal" && literal === undefined) {
|
|
109
|
+
throw new Error('fill action: value_kind "literal" requires a "literal" string');
|
|
110
|
+
}
|
|
111
|
+
return literal !== undefined
|
|
112
|
+
? { kind: "fill", selector, value_kind: valueKind, literal, reason }
|
|
113
|
+
: { kind: "fill", selector, value_kind: valueKind, reason };
|
|
114
|
+
}
|
|
115
|
+
function validateAction(value, index) {
|
|
116
|
+
const obj = asObject(value, `action[${index}]`);
|
|
117
|
+
const kind = obj["kind"];
|
|
118
|
+
switch (kind) {
|
|
119
|
+
case "fill":
|
|
120
|
+
return validateFillAction(obj);
|
|
121
|
+
case "check":
|
|
122
|
+
return {
|
|
123
|
+
kind: "check",
|
|
124
|
+
selector: requireString(obj, "selector", `action[${index}] (check)`),
|
|
125
|
+
reason: typeof obj["reason"] === "string" ? obj["reason"] : "",
|
|
126
|
+
};
|
|
127
|
+
case "click":
|
|
128
|
+
return {
|
|
129
|
+
kind: "click",
|
|
130
|
+
selector: requireString(obj, "selector", `action[${index}] (click)`),
|
|
131
|
+
reason: typeof obj["reason"] === "string" ? obj["reason"] : "",
|
|
132
|
+
};
|
|
133
|
+
default:
|
|
134
|
+
throw new Error(`action[${index}]: unknown kind ${JSON.stringify(kind)}`);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
export function parseSignupPlan(raw) {
|
|
138
|
+
const obj = extractJsonObject(raw);
|
|
139
|
+
const rawActions = obj["actions"];
|
|
140
|
+
if (!Array.isArray(rawActions)) {
|
|
141
|
+
throw new Error("signup plan missing actions[]");
|
|
142
|
+
}
|
|
143
|
+
const actions = rawActions.map((a, i) => validateAction(a, i));
|
|
144
|
+
const submitSelector = requireString(obj, "submit_selector", "signup plan");
|
|
145
|
+
const confidence = obj["confidence"];
|
|
146
|
+
if (confidence !== "high" && confidence !== "medium" && confidence !== "low") {
|
|
147
|
+
throw new Error(`signup plan: invalid confidence ${JSON.stringify(confidence)}`);
|
|
148
|
+
}
|
|
149
|
+
const notes = typeof obj["notes"] === "string" ? obj["notes"] : undefined;
|
|
150
|
+
return notes !== undefined
|
|
151
|
+
? { actions, submit_selector: submitSelector, confidence, notes }
|
|
152
|
+
: { actions, submit_selector: submitSelector, confidence };
|
|
153
|
+
}
|
|
154
|
+
export function parsePostVerifyStep(raw) {
|
|
155
|
+
const obj = extractJsonObject(raw);
|
|
156
|
+
const kind = obj["kind"];
|
|
157
|
+
// `reason` is required by the schema but advisory; default it so a
|
|
158
|
+
// model omitting it doesn't trip a retry on an otherwise-valid step.
|
|
159
|
+
const reason = typeof obj["reason"] === "string" ? obj["reason"] : "";
|
|
160
|
+
switch (kind) {
|
|
161
|
+
case "done":
|
|
162
|
+
return { kind: "done", reason };
|
|
163
|
+
case "extract":
|
|
164
|
+
return { kind: "extract", reason };
|
|
165
|
+
case "click":
|
|
166
|
+
return {
|
|
167
|
+
kind: "click",
|
|
168
|
+
selector: requireString(obj, "selector", "post-verify click step"),
|
|
169
|
+
reason,
|
|
170
|
+
};
|
|
171
|
+
case "fill":
|
|
172
|
+
return {
|
|
173
|
+
kind: "fill",
|
|
174
|
+
selector: requireString(obj, "selector", "post-verify fill step"),
|
|
175
|
+
value: requireString(obj, "value", "post-verify fill step"),
|
|
176
|
+
reason,
|
|
177
|
+
};
|
|
178
|
+
case "navigate":
|
|
179
|
+
return {
|
|
180
|
+
kind: "navigate",
|
|
181
|
+
url: requireString(obj, "url", "post-verify navigate step"),
|
|
182
|
+
reason,
|
|
183
|
+
};
|
|
184
|
+
case "wait": {
|
|
185
|
+
const seconds = obj["seconds"];
|
|
186
|
+
if (typeof seconds !== "number" || !Number.isFinite(seconds) || seconds < 0) {
|
|
187
|
+
throw new Error("post-verify wait step: invalid seconds");
|
|
188
|
+
}
|
|
189
|
+
return { kind: "wait", seconds, reason };
|
|
190
|
+
}
|
|
191
|
+
default:
|
|
192
|
+
throw new Error(`post-verify step: unknown kind ${JSON.stringify(kind)}`);
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
// Upper bound on a credential we'll accept from a labeled match.
|
|
196
|
+
// Real API keys / bearer tokens are short (Stripe ~32, JWT ~hundreds
|
|
197
|
+
// but our labeled patterns don't target JWTs). Captcha challenge
|
|
198
|
+
// tokens are very long: g-recaptcha-response runs ~500-2000 chars and
|
|
199
|
+
// cf-turnstile-response is similar. A 100-char ceiling cleanly admits
|
|
200
|
+
// real keys and rejects every captcha token shape we've seen.
|
|
201
|
+
const MAX_CREDENTIAL_LENGTH = 100;
|
|
202
|
+
// Substrings that, if present in a candidate, mark it as a
|
|
203
|
+
// challenge/cookie token rather than a credential. Cloudflare clearance
|
|
204
|
+
// cookies (`__cf`, `cf_clearance`), CDN challenge paths (`cdn-cgi`),
|
|
205
|
+
// and the visible field/param names of the two captcha widgets.
|
|
206
|
+
const CAPTCHA_TOKEN_MARKERS = [
|
|
207
|
+
"__cf",
|
|
208
|
+
"cf_clearance",
|
|
209
|
+
"cdn-cgi",
|
|
210
|
+
"cf-turnstile-response",
|
|
211
|
+
"g-recaptcha-response",
|
|
212
|
+
"h-captcha-response",
|
|
213
|
+
];
|
|
214
|
+
// Pull an API key out of the *visible* page text.
|
|
215
|
+
//
|
|
216
|
+
// Two strategies, in priority order:
|
|
217
|
+
// 1. Known service-specific prefixes (re_, sk_live_, …) — high
|
|
218
|
+
// confidence, the prefix itself is the proof.
|
|
219
|
+
// 2. Labeled patterns ("api key: <value>") — lower confidence, so
|
|
220
|
+
// they carry guard rails: the value must sit IMMEDIATELY after
|
|
221
|
+
// the label (a small bounded gap of spaces/colon/equals, NOT
|
|
222
|
+
// arbitrary whitespace that could span unrelated page sections),
|
|
223
|
+
// must be under MAX_CREDENTIAL_LENGTH, and must not look like a
|
|
224
|
+
// captcha/cookie token. Without these, a `g-recaptcha-response`
|
|
225
|
+
// value or a session token elsewhere in the body could be
|
|
226
|
+
// mistaken for `credentials.api_key`.
|
|
227
|
+
//
|
|
228
|
+
// Exported for unit testing — the regex tuning here is the load-
|
|
229
|
+
// bearing logic and deserves direct coverage.
|
|
230
|
+
export function extractApiKeyFromText(text) {
|
|
231
|
+
const prefixed = [
|
|
232
|
+
/\bre_[a-zA-Z0-9]{20,}\b/, // Resend
|
|
233
|
+
/\bsk_(?:live|test)_[a-zA-Z0-9]{20,}\b/, // Stripe secret
|
|
234
|
+
/\bpk_(?:live|test)_[a-zA-Z0-9]{20,}\b/, // Stripe public
|
|
235
|
+
/\bkey-[a-f0-9]{32}\b/, // Mailgun
|
|
236
|
+
/\bphc_[a-zA-Z0-9]{32,}\b/, // PostHog
|
|
237
|
+
/\bSG\.[a-zA-Z0-9_\-]{20,}\.[a-zA-Z0-9_\-]{20,}\b/, // SendGrid
|
|
238
|
+
];
|
|
239
|
+
for (const pattern of prefixed) {
|
|
240
|
+
const match = text.match(pattern);
|
|
241
|
+
if (match !== null)
|
|
242
|
+
return match[0];
|
|
243
|
+
}
|
|
244
|
+
// Labeled patterns. The gap between label and value is
|
|
245
|
+
// `[ \t]*[:=]?[ \t]*` — only spaces/tabs, never a newline — so the
|
|
246
|
+
// value must be adjacent to its label. The value charset excludes
|
|
247
|
+
// the captcha-token shape implicitly via the length ceiling, and we
|
|
248
|
+
// re-check markers explicitly below for the dot-bearing bearer case.
|
|
249
|
+
const labeled = [
|
|
250
|
+
/(?:api[_\s-]?key|access[_\s-]?token|secret[_\s-]?key)[ \t]*[:=]?[ \t]*([a-zA-Z0-9_\-]{20,})/i,
|
|
251
|
+
/\b[Bb]earer[ \t]+([a-zA-Z0-9_\-.]{30,})/,
|
|
252
|
+
];
|
|
253
|
+
for (const pattern of labeled) {
|
|
254
|
+
const match = text.match(pattern);
|
|
255
|
+
const candidate = match?.[1];
|
|
256
|
+
if (candidate === undefined)
|
|
257
|
+
continue;
|
|
258
|
+
// A captcha challenge token: too long for a real key, and/or
|
|
259
|
+
// carries a known cookie/widget marker.
|
|
260
|
+
if (candidate.length > MAX_CREDENTIAL_LENGTH)
|
|
261
|
+
continue;
|
|
262
|
+
const lower = candidate.toLowerCase();
|
|
263
|
+
if (CAPTCHA_TOKEN_MARKERS.some((marker) => lower.includes(marker)))
|
|
264
|
+
continue;
|
|
265
|
+
return candidate;
|
|
266
|
+
}
|
|
267
|
+
return null;
|
|
268
|
+
}
|
|
269
|
+
// Choose which link in a verification email to click. Scores each URL
|
|
270
|
+
// by keyword and picks the best — but only if it scored positive.
|
|
271
|
+
//
|
|
272
|
+
// Exported for unit testing. The all-negative case is the bug this
|
|
273
|
+
// guards: an email whose only links are unsubscribe/preferences scores
|
|
274
|
+
// <= 0 everywhere, and an earlier version returned links[0] anyway,
|
|
275
|
+
// navigating the bot straight to an unsubscribe URL.
|
|
276
|
+
export function pickVerificationLink(links) {
|
|
277
|
+
const scored = links.map((url) => {
|
|
278
|
+
const lower = url.toLowerCase();
|
|
279
|
+
let score = 0;
|
|
280
|
+
if (lower.includes("verify") || lower.includes("confirm"))
|
|
281
|
+
score += 10;
|
|
282
|
+
if (lower.includes("activate"))
|
|
283
|
+
score += 8;
|
|
284
|
+
if (lower.includes("welcome"))
|
|
285
|
+
score += 3;
|
|
286
|
+
if (lower.includes("unsubscribe") || lower.includes("preferences"))
|
|
287
|
+
score -= 10;
|
|
288
|
+
return { url, score };
|
|
289
|
+
});
|
|
290
|
+
scored.sort((a, b) => b.score - a.score);
|
|
291
|
+
const top = scored[0];
|
|
292
|
+
return top !== undefined && top.score > 0 ? top.url : null;
|
|
293
|
+
}
|
|
294
|
+
// Discriminates LLMPair from LLMClient. LLMPair has `primary` (an
|
|
295
|
+
// LLMClient); LLMClient has `createMessage`. They're mutually exclusive
|
|
296
|
+
// shapes so a structural check is reliable.
|
|
297
|
+
function isLLMPair(x) {
|
|
298
|
+
return "primary" in x && typeof x.primary === "object" && x.primary !== null;
|
|
299
|
+
}
|
|
300
|
+
export class SignupAgent {
|
|
301
|
+
browser;
|
|
302
|
+
// Per-run counter so a single SignupAgent (which lives one run) can't
|
|
303
|
+
// burn through more than MAX_LLM_CALLS_PER_SIGNUP. Reset isn't needed
|
|
304
|
+
// because each signup gets a fresh SignupAgent in index.ts.
|
|
305
|
+
llmCallCount = 0;
|
|
306
|
+
// Tracks which backend handled each call, for debugging cost/quality.
|
|
307
|
+
// backends_used[i] is the .name string of the LLMClient that produced
|
|
308
|
+
// the i-th reply this run.
|
|
309
|
+
backendsUsed = [];
|
|
310
|
+
llmPair;
|
|
311
|
+
// Captcha encounter state for the current run. Updated by the
|
|
312
|
+
// pre/post-submit/re-plan captcha gates in signup(); read by the
|
|
313
|
+
// result builder. We track the *last* encounter (overwrites win)
|
|
314
|
+
// because a "blocked" outcome is more diagnostic than an earlier
|
|
315
|
+
// "solved" one and we always want the failure mode in the result.
|
|
316
|
+
captchaEncounter = undefined;
|
|
317
|
+
// Helper: build the common trailing fields every SignupResult needs.
|
|
318
|
+
// This used to be inlined at each return site (6 of them); a one-line
|
|
319
|
+
// helper keeps the captcha field from being forgotten on a future
|
|
320
|
+
// refactor that adds a 7th return path.
|
|
321
|
+
resultTail() {
|
|
322
|
+
return {
|
|
323
|
+
llm_calls: this.llmCallCount,
|
|
324
|
+
llm_backends: [...this.backendsUsed],
|
|
325
|
+
browser_channel: this.browser.channel,
|
|
326
|
+
...(this.captchaEncounter !== undefined ? { captcha: this.captchaEncounter } : {}),
|
|
327
|
+
};
|
|
328
|
+
}
|
|
329
|
+
// Run one Tier-2 visible-captcha gate. There are three gates in a
|
|
330
|
+
// run (pre-submit, post-submit, re-plan). Each solveVisibleCaptcha()
|
|
331
|
+
// call burns up to its 30s timeout.
|
|
332
|
+
//
|
|
333
|
+
// Short-circuit: Cloudflare/reCAPTCHA scoring is sticky per session
|
|
334
|
+
// — once one gate reports `blocked`, every later gate is doomed to
|
|
335
|
+
// the same outcome, so probing them just burns another 30s each (up
|
|
336
|
+
// to 90s wasted on a run that's already lost). Once captchaEncounter
|
|
337
|
+
// records a block, subsequent gates skip the browser call entirely
|
|
338
|
+
// and report blocked immediately with the original kind.
|
|
339
|
+
async runCaptchaGate(label, steps) {
|
|
340
|
+
if (this.captchaEncounter !== undefined && this.captchaEncounter.blocked) {
|
|
341
|
+
const kind = this.captchaEncounter.kind;
|
|
342
|
+
steps.push(`${label} captcha gate skipped — session already captcha-blocked (${kind}).`);
|
|
343
|
+
return { found: true, solved: false, blocked: true, kind };
|
|
344
|
+
}
|
|
345
|
+
const result = await this.browser.solveVisibleCaptcha();
|
|
346
|
+
if (!result.found) {
|
|
347
|
+
return { found: false, solved: false, blocked: false, kind: "turnstile" };
|
|
348
|
+
}
|
|
349
|
+
steps.push(`${label} captcha (${result.kind}): ${result.solved ? "solved" : "NOT solved (timeout)"}`);
|
|
350
|
+
this.captchaEncounter = { kind: result.kind, blocked: !result.solved };
|
|
351
|
+
return { found: true, solved: result.solved, blocked: !result.solved, kind: result.kind };
|
|
352
|
+
}
|
|
353
|
+
// Execute a planned set of fill/check/click actions. Used by both
|
|
354
|
+
// the initial-plan and the validation re-plan paths so the step
|
|
355
|
+
// logging stays consistent (the re-plan copy historically dropped
|
|
356
|
+
// the per-action steps.push entries the initial path had).
|
|
357
|
+
async executePlan(plan, fillValues, steps) {
|
|
358
|
+
for (const action of plan.actions) {
|
|
359
|
+
try {
|
|
360
|
+
if (action.kind === "fill") {
|
|
361
|
+
// `literal` is per-action; everything else is a fixed value.
|
|
362
|
+
const value = action.value_kind === "literal"
|
|
363
|
+
? action.literal ?? ""
|
|
364
|
+
: fillValues[action.value_kind];
|
|
365
|
+
steps.push(`Fill ${action.value_kind} → ${action.selector}`);
|
|
366
|
+
await this.browser.type(action.selector, value);
|
|
367
|
+
}
|
|
368
|
+
else if (action.kind === "check") {
|
|
369
|
+
steps.push(`Check ${action.selector} (${action.reason})`);
|
|
370
|
+
await this.browser.check(action.selector);
|
|
371
|
+
}
|
|
372
|
+
else {
|
|
373
|
+
steps.push(`Click ${action.selector} (${action.reason})`);
|
|
374
|
+
await this.browser.click(action.selector);
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
catch (err) {
|
|
378
|
+
steps.push(`⚠ action failed (${action.kind} ${action.selector}): ${err instanceof Error ? err.message : String(err)}`);
|
|
379
|
+
// continue — a missing optional field shouldn't abort the whole signup
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
constructor(browser, llm) {
|
|
384
|
+
this.browser = browser;
|
|
385
|
+
if (llm === undefined) {
|
|
386
|
+
this.llmPair = pickLLMPair({ preferCheap: PREFER_CHEAP_LLM });
|
|
387
|
+
}
|
|
388
|
+
else if (isLLMPair(llm)) {
|
|
389
|
+
// Caller passed an explicit LLMPair — use it directly so they
|
|
390
|
+
// control both primary and premium-fallback selection.
|
|
391
|
+
this.llmPair = llm;
|
|
392
|
+
}
|
|
393
|
+
else {
|
|
394
|
+
// Caller passed a single LLMClient — no premium fallback in that
|
|
395
|
+
// case. Tests and the MCP-Sampling future path use this.
|
|
396
|
+
this.llmPair = { primary: llm, premium: null };
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
// Read-only view of how many calls landed on which backend. Exported
|
|
400
|
+
// through SignupResult.llm_backends so tests and ops can verify the
|
|
401
|
+
// dual-mode fallback is actually engaging when expected.
|
|
402
|
+
get backends() {
|
|
403
|
+
return this.backendsUsed;
|
|
404
|
+
}
|
|
405
|
+
// Single entry point for every LLM call. Increments the per-run
|
|
406
|
+
// counter, throws if we'd exceed the budget. When a `parse` callback
|
|
407
|
+
// is supplied and the primary's reply fails to parse, this method
|
|
408
|
+
// automatically retries once with the premium client (if available).
|
|
409
|
+
// The retry counts against the same budget.
|
|
410
|
+
async callLLM(args) {
|
|
411
|
+
// Use a typed helper because the conditional return type confuses
|
|
412
|
+
// the inner narrowing.
|
|
413
|
+
return (await this.callLLMInner(args));
|
|
414
|
+
}
|
|
415
|
+
async callLLMInner(args) {
|
|
416
|
+
const callOne = async (client) => {
|
|
417
|
+
if (this.llmCallCount >= MAX_LLM_CALLS_PER_SIGNUP) {
|
|
418
|
+
throw new LLMCallBudgetExceeded(MAX_LLM_CALLS_PER_SIGNUP);
|
|
419
|
+
}
|
|
420
|
+
this.llmCallCount += 1;
|
|
421
|
+
const resp = await client.createMessage({
|
|
422
|
+
system: args.system,
|
|
423
|
+
user: args.userBlocks,
|
|
424
|
+
max_tokens: args.maxTokens,
|
|
425
|
+
});
|
|
426
|
+
this.backendsUsed.push(resp.backend);
|
|
427
|
+
return resp.text;
|
|
428
|
+
};
|
|
429
|
+
const primaryRaw = await callOne(this.llmPair.primary);
|
|
430
|
+
if (args.parse === undefined)
|
|
431
|
+
return primaryRaw;
|
|
432
|
+
try {
|
|
433
|
+
return args.parse(primaryRaw);
|
|
434
|
+
}
|
|
435
|
+
catch (primaryErr) {
|
|
436
|
+
// Primary couldn't produce a parseable reply. If we have a premium
|
|
437
|
+
// fallback, retry once with it. The premium retry uses the SAME
|
|
438
|
+
// prompt — Sonnet is consistently better at structured JSON, so a
|
|
439
|
+
// fresh look at the same input usually suffices.
|
|
440
|
+
if (this.llmPair.premium === null)
|
|
441
|
+
throw primaryErr;
|
|
442
|
+
const premiumRaw = await callOne(this.llmPair.premium);
|
|
443
|
+
try {
|
|
444
|
+
return args.parse(premiumRaw);
|
|
445
|
+
}
|
|
446
|
+
catch (premiumErr) {
|
|
447
|
+
// Both failed. Surface the premium error because it's the more
|
|
448
|
+
// informative one (Sonnet's "I can't comply" is more diagnostic
|
|
449
|
+
// than Gemini's "{ actions: [... unbalanced bracket}").
|
|
450
|
+
throw premiumErr;
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
// Run the right flavor of prewarm against a URL and update both the
|
|
455
|
+
// visible step trail and the persistent cache. Failures are
|
|
456
|
+
// swallowed (logged into steps[]) because prewarm is always best-
|
|
457
|
+
// effort — its value is conditioning the next page load, and a
|
|
458
|
+
// missing prewarm just means we hit that page cold.
|
|
459
|
+
async runPrewarm(url, steps) {
|
|
460
|
+
let origin;
|
|
461
|
+
try {
|
|
462
|
+
origin = new URL(url).origin;
|
|
463
|
+
}
|
|
464
|
+
catch {
|
|
465
|
+
// Caller passed something un-parseable. Don't bother trying.
|
|
466
|
+
return;
|
|
467
|
+
}
|
|
468
|
+
let mode;
|
|
469
|
+
try {
|
|
470
|
+
mode = (await wasRecentlyPrewarmed(origin)) ? "fast" : "referrer-chain";
|
|
471
|
+
}
|
|
472
|
+
catch {
|
|
473
|
+
// Cache read fail — bias toward the cheap mode so we don't blow
|
|
474
|
+
// 30s on every signup when the cache file is unreadable.
|
|
475
|
+
mode = "fast";
|
|
476
|
+
}
|
|
477
|
+
steps.push(`Prewarming ${origin} (${mode})`);
|
|
478
|
+
try {
|
|
479
|
+
await this.browser.prewarm(url, mode);
|
|
480
|
+
// Only record on success — and only when we did the meaningful
|
|
481
|
+
// (referrer-chain) prewarm. A fast prewarm against a cold cache
|
|
482
|
+
// shouldn't pretend we've done the work.
|
|
483
|
+
if (mode === "referrer-chain") {
|
|
484
|
+
try {
|
|
485
|
+
await recordPrewarmSuccess(origin);
|
|
486
|
+
}
|
|
487
|
+
catch {
|
|
488
|
+
// Cache write failure already logged in prewarm-cache module.
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
catch (err) {
|
|
493
|
+
steps.push(`⚠ prewarm failed (non-fatal): ${err instanceof Error ? err.message : String(err)}`);
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
async signup(task) {
|
|
497
|
+
const steps = [];
|
|
498
|
+
const password = task.generatePassword();
|
|
499
|
+
const displayName = "Trusty Squire Bot";
|
|
500
|
+
const username = `tsbot${Date.now().toString().slice(-7)}`;
|
|
501
|
+
try {
|
|
502
|
+
// Step 1: Navigate to signup page
|
|
503
|
+
const signupUrl = task.signupUrl ?? `https://www.google.com/search?q=${encodeURIComponent(`${task.service} signup`)}`;
|
|
504
|
+
// Prewarm the target origin before hitting the (often-strict) signup
|
|
505
|
+
// page. Two things this buys us:
|
|
506
|
+
// 1. First-party cookies on the root domain. Cloudflare's
|
|
507
|
+
// cf_clearance + reCAPTCHA v3's score JS both record a
|
|
508
|
+
// browsing-history signal; a cold landing on /sign_up reads
|
|
509
|
+
// as bot-like vs. a session that visited the marketing root
|
|
510
|
+
// first.
|
|
511
|
+
// 2. A wall-clock window for the scoring JS to calibrate on a
|
|
512
|
+
// benign page. Postmark and Resend both ship reCAPTCHA v3 /
|
|
513
|
+
// Turnstile that scores the whole session, not just the
|
|
514
|
+
// submit moment.
|
|
515
|
+
//
|
|
516
|
+
// Mode selection: the heavy "referrer-chain" prewarm is what
|
|
517
|
+
// actually moves the v3 score (~30-45s of wall clock; google
|
|
518
|
+
// search → click → scroll → navigate). The light "fast" mode
|
|
519
|
+
// is dwell-only (~2s). We use the cache to decide: cold cache
|
|
520
|
+
// means do the heavy one and cache the result; warm cache means
|
|
521
|
+
// we've recently established cookies for this domain and the
|
|
522
|
+
// light version is enough.
|
|
523
|
+
//
|
|
524
|
+
// Skip entirely when the URL is a Google-search fallback (no
|
|
525
|
+
// real origin to warm) or when prewarm itself fails (don't fail
|
|
526
|
+
// the run just because the marketing site is down).
|
|
527
|
+
if (task.signupUrl !== undefined) {
|
|
528
|
+
await this.runPrewarm(signupUrl, steps);
|
|
529
|
+
}
|
|
530
|
+
steps.push(`Navigating to ${signupUrl}`);
|
|
531
|
+
await this.browser.goto(signupUrl);
|
|
532
|
+
await this.browser.wait(2);
|
|
533
|
+
if (task.signupUrl === undefined) {
|
|
534
|
+
steps.push("Searching for signup page...");
|
|
535
|
+
const found = await this.findSignupLink();
|
|
536
|
+
if (found !== null) {
|
|
537
|
+
// Now that we know the real signup origin, prewarm it before
|
|
538
|
+
// the deep navigation. Same rationale as above.
|
|
539
|
+
await this.runPrewarm(found, steps);
|
|
540
|
+
steps.push(`Found signup link: ${found}`);
|
|
541
|
+
await this.browser.goto(found);
|
|
542
|
+
await this.browser.wait(2);
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
// Step 2: Plan the form fill with Claude.
|
|
546
|
+
steps.push("Asking Claude to plan the signup form fill...");
|
|
547
|
+
await saveDebugSnapshot(this.browser, "before-fill");
|
|
548
|
+
const state = await this.browser.getState();
|
|
549
|
+
const plan = await this.planSignupForm({
|
|
550
|
+
service: task.service,
|
|
551
|
+
url: state.url,
|
|
552
|
+
html: state.html,
|
|
553
|
+
screenshot: state.screenshot,
|
|
554
|
+
});
|
|
555
|
+
steps.push(`Plan: ${plan.actions.length} action(s), confidence=${plan.confidence}${plan.notes !== undefined ? ` — ${plan.notes}` : ""}`);
|
|
556
|
+
// Step 3: Execute the plan.
|
|
557
|
+
const fillValues = {
|
|
558
|
+
email: task.email,
|
|
559
|
+
password,
|
|
560
|
+
name: displayName,
|
|
561
|
+
username,
|
|
562
|
+
company: "Trusty Squire",
|
|
563
|
+
// `literal` has no fixed value — resolved per-action below.
|
|
564
|
+
literal: "",
|
|
565
|
+
};
|
|
566
|
+
await this.executePlan(plan, fillValues, steps);
|
|
567
|
+
// Tier 2 captcha (pre-submit): check for a visible
|
|
568
|
+
// Turnstile/reCAPTCHA widget rendered inline with the form.
|
|
569
|
+
// Many sites render the widget alongside the form on first
|
|
570
|
+
// load; failing to interact with it leaves cf-turnstile-response
|
|
571
|
+
// empty and the submit gets server-side-rejected with a generic
|
|
572
|
+
// validation error we'd waste a re-plan trying to debug.
|
|
573
|
+
const preSubmitGate = await this.runCaptchaGate("Pre-submit", steps);
|
|
574
|
+
if (preSubmitGate.blocked) {
|
|
575
|
+
return {
|
|
576
|
+
success: false,
|
|
577
|
+
error: `captcha_blocked: visible ${preSubmitGate.kind} challenge did not resolve. The site flagged this session.`,
|
|
578
|
+
steps,
|
|
579
|
+
...this.resultTail(),
|
|
580
|
+
};
|
|
581
|
+
}
|
|
582
|
+
// Step 4: Submit. clickSubmit() disambiguates when the planned
|
|
583
|
+
// selector matches several button[type=submit] (OAuth buttons are
|
|
584
|
+
// submit-typed too). A submit click that fails means the form was
|
|
585
|
+
// never submitted — fail fast here rather than fall through into
|
|
586
|
+
// the multi-minute verification-email poll for an email that can
|
|
587
|
+
// never arrive.
|
|
588
|
+
steps.push(`Submit → ${plan.submit_selector}`);
|
|
589
|
+
try {
|
|
590
|
+
await this.browser.clickSubmit(plan.submit_selector);
|
|
591
|
+
}
|
|
592
|
+
catch (err) {
|
|
593
|
+
const reason = err instanceof Error ? err.message : String(err);
|
|
594
|
+
steps.push(`⚠ submit click failed: ${reason}`);
|
|
595
|
+
return {
|
|
596
|
+
success: false,
|
|
597
|
+
error: `submit_failed: could not click the signup button — ${reason}`,
|
|
598
|
+
steps,
|
|
599
|
+
...this.resultTail(),
|
|
600
|
+
};
|
|
601
|
+
}
|
|
602
|
+
await this.browser.wait(5);
|
|
603
|
+
// Tier 2 captcha (post-submit): some services only render the
|
|
604
|
+
// challenge after form submission (deferred rendering). Same
|
|
605
|
+
// shape as the pre-submit check.
|
|
606
|
+
const postSubmitGate = await this.runCaptchaGate("Post-submit", steps);
|
|
607
|
+
if (postSubmitGate.blocked) {
|
|
608
|
+
return {
|
|
609
|
+
success: false,
|
|
610
|
+
error: `captcha_blocked: post-submit ${postSubmitGate.kind} challenge did not resolve.`,
|
|
611
|
+
steps,
|
|
612
|
+
...this.resultTail(),
|
|
613
|
+
};
|
|
614
|
+
}
|
|
615
|
+
if (postSubmitGate.found && postSubmitGate.solved) {
|
|
616
|
+
// Re-click submit so the populated token ships with the form.
|
|
617
|
+
try {
|
|
618
|
+
await this.browser.click(plan.submit_selector);
|
|
619
|
+
await this.browser.wait(3);
|
|
620
|
+
}
|
|
621
|
+
catch (err) {
|
|
622
|
+
steps.push(`⚠ post-captcha submit retry failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
623
|
+
}
|
|
624
|
+
}
|
|
625
|
+
await saveDebugSnapshot(this.browser, "after-submit");
|
|
626
|
+
// Step 5: Detect post-submit validation errors — if visible text contains
|
|
627
|
+
// hints like "required", "must be between", "please accept", we re-plan
|
|
628
|
+
// once with the new state. This handles the Postmark-style server-side
|
|
629
|
+
// validation case.
|
|
630
|
+
const afterSubmitText = (await this.browser.extractText()).slice(0, 4000);
|
|
631
|
+
if (this.looksLikeValidationFailure(afterSubmitText)) {
|
|
632
|
+
steps.push("Post-submit text suggests validation errors — re-planning...");
|
|
633
|
+
const state2 = await this.browser.getState();
|
|
634
|
+
const plan2 = await this.planSignupForm({
|
|
635
|
+
service: task.service,
|
|
636
|
+
url: state2.url,
|
|
637
|
+
html: state2.html,
|
|
638
|
+
screenshot: state2.screenshot,
|
|
639
|
+
hint: `Previous submit produced validation errors. Visible page text snippet: ${afterSubmitText.slice(0, 800)}`,
|
|
640
|
+
});
|
|
641
|
+
await this.executePlan(plan2, fillValues, steps);
|
|
642
|
+
// Re-plan path: same captcha guard as initial submit. If the
|
|
643
|
+
// first submit triggered captcha rendering, the second pass
|
|
644
|
+
// sees it inline.
|
|
645
|
+
const replanGate = await this.runCaptchaGate("Re-plan", steps);
|
|
646
|
+
if (replanGate.blocked) {
|
|
647
|
+
return {
|
|
648
|
+
success: false,
|
|
649
|
+
error: `captcha_blocked: re-plan ${replanGate.kind} challenge did not resolve.`,
|
|
650
|
+
steps,
|
|
651
|
+
...this.resultTail(),
|
|
652
|
+
};
|
|
653
|
+
}
|
|
654
|
+
try {
|
|
655
|
+
await this.browser.clickSubmit(plan2.submit_selector);
|
|
656
|
+
}
|
|
657
|
+
catch (err) {
|
|
658
|
+
const reason = err instanceof Error ? err.message : String(err);
|
|
659
|
+
steps.push(`⚠ re-plan submit click failed: ${reason}`);
|
|
660
|
+
return {
|
|
661
|
+
success: false,
|
|
662
|
+
error: `submit_failed: re-plan submit could not click the signup button — ${reason}`,
|
|
663
|
+
steps,
|
|
664
|
+
...this.resultTail(),
|
|
665
|
+
};
|
|
666
|
+
}
|
|
667
|
+
await this.browser.wait(5);
|
|
668
|
+
await saveDebugSnapshot(this.browser, "after-resubmit");
|
|
669
|
+
}
|
|
670
|
+
// Step 6: Extract creds from page.
|
|
671
|
+
steps.push("Extracting credentials from page...");
|
|
672
|
+
let credentials = await this.extractCredentials();
|
|
673
|
+
// Step 7: Email verification + post-verification navigation.
|
|
674
|
+
if (credentials.api_key === undefined && credentials.username === undefined && task.inbox !== undefined) {
|
|
675
|
+
const verificationTimeoutSeconds = task.verificationTimeoutSeconds ?? 300;
|
|
676
|
+
steps.push(`No credentials on page — polling inbox for verification email (up to ${verificationTimeoutSeconds}s)...`);
|
|
677
|
+
try {
|
|
678
|
+
const email = await this.waitForVerificationEmail(task.inbox, task.email, verificationTimeoutSeconds);
|
|
679
|
+
steps.push(`Received: "${email.subject}" from ${email.from_address}`);
|
|
680
|
+
if (email.parsed_links.length > 0) {
|
|
681
|
+
const verifyLink = this.pickVerificationLink(Array.from(email.parsed_links));
|
|
682
|
+
if (verifyLink !== null) {
|
|
683
|
+
steps.push(`Following verification link: ${verifyLink}`);
|
|
684
|
+
await this.browser.goto(verifyLink);
|
|
685
|
+
await this.browser.wait(3);
|
|
686
|
+
await saveDebugSnapshot(this.browser, "after-verify");
|
|
687
|
+
// Try extracting first — many services drop the API key
|
|
688
|
+
// straight onto the landing page after verification.
|
|
689
|
+
credentials = await this.extractCredentials();
|
|
690
|
+
// If no creds yet, run the Claude-planned navigation loop.
|
|
691
|
+
if (credentials.api_key === undefined && credentials.username === undefined) {
|
|
692
|
+
const maxRounds = task.postVerifyMaxRounds ?? 6;
|
|
693
|
+
credentials = await this.postVerifyLoop({
|
|
694
|
+
service: task.service,
|
|
695
|
+
email: task.email,
|
|
696
|
+
password,
|
|
697
|
+
maxRounds,
|
|
698
|
+
steps,
|
|
699
|
+
});
|
|
700
|
+
}
|
|
701
|
+
}
|
|
702
|
+
else {
|
|
703
|
+
steps.push("Email had no usable verification link.");
|
|
704
|
+
}
|
|
705
|
+
}
|
|
706
|
+
else {
|
|
707
|
+
steps.push("Email had no parsed links — skipping verification click.");
|
|
708
|
+
}
|
|
709
|
+
}
|
|
710
|
+
catch (err) {
|
|
711
|
+
steps.push(`Inbox poll failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
712
|
+
}
|
|
713
|
+
}
|
|
714
|
+
if (credentials.api_key !== undefined || credentials.username !== undefined) {
|
|
715
|
+
return {
|
|
716
|
+
success: true,
|
|
717
|
+
credentials: { ...credentials, password, email: task.email },
|
|
718
|
+
steps,
|
|
719
|
+
...this.resultTail(),
|
|
720
|
+
};
|
|
721
|
+
}
|
|
722
|
+
return {
|
|
723
|
+
success: false,
|
|
724
|
+
error: "Could not find credentials on page or via email",
|
|
725
|
+
steps,
|
|
726
|
+
...this.resultTail(),
|
|
727
|
+
};
|
|
728
|
+
}
|
|
729
|
+
catch (error) {
|
|
730
|
+
// Budget-exceeded is a structured error we want to surface as
|
|
731
|
+
// such, rather than burying as a generic failure.
|
|
732
|
+
const errorMessage = error instanceof LLMCallBudgetExceeded
|
|
733
|
+
? `LLM budget exceeded (${this.llmCallCount} calls) — likely stuck in a planning loop. Consider raising UNIVERSAL_BOT_MAX_LLM_CALLS or inspecting the steps.`
|
|
734
|
+
: error instanceof Error
|
|
735
|
+
? error.message
|
|
736
|
+
: String(error);
|
|
737
|
+
return {
|
|
738
|
+
success: false,
|
|
739
|
+
error: errorMessage,
|
|
740
|
+
steps,
|
|
741
|
+
...this.resultTail(),
|
|
742
|
+
};
|
|
743
|
+
}
|
|
744
|
+
}
|
|
745
|
+
// ------------ Claude planner ------------
|
|
746
|
+
async planSignupForm(input) {
|
|
747
|
+
// Trim HTML to just <form>...</form> regions if possible — the prompt
|
|
748
|
+
// budget matters and most pages have a lot of marketing chrome.
|
|
749
|
+
const trimmedHtml = this.extractFormHtml(input.html);
|
|
750
|
+
const systemPrompt = `You analyze a web signup form and emit a JSON plan describing how to fill it.
|
|
751
|
+
Output rules:
|
|
752
|
+
- Reply with ONE JSON object only. No prose, no markdown.
|
|
753
|
+
- Schema:
|
|
754
|
+
{
|
|
755
|
+
"actions": [
|
|
756
|
+
{"kind":"fill","selector":"CSS_SELECTOR","value_kind":"email|password|name|username|company|literal","literal":"only when value_kind=literal","reason":"why"},
|
|
757
|
+
{"kind":"check","selector":"CSS_SELECTOR","reason":"TOS / marketing-opt-in / etc."},
|
|
758
|
+
{"kind":"click","selector":"CSS_SELECTOR","reason":"e.g. accept cookies before form is reachable"}
|
|
759
|
+
],
|
|
760
|
+
"submit_selector": "CSS_SELECTOR for the primary signup button",
|
|
761
|
+
"confidence": "high|medium|low",
|
|
762
|
+
"notes": "optional caveats"
|
|
763
|
+
}
|
|
764
|
+
- Prefer stable selectors: name attributes, id, then aria-label. Avoid nth-child unless unavoidable.
|
|
765
|
+
- Include the TOS/agree checkbox if one is required.
|
|
766
|
+
- If a cookie banner is blocking the form, click "Accept" first.
|
|
767
|
+
- Do NOT include password confirmation as a separate action unless the form has a visible second password field.
|
|
768
|
+
- Skip optional/marketing-opt-in checkboxes.
|
|
769
|
+
- For "name" use a realistic full name. For "username" generate a plausible 7-15 char handle.`;
|
|
770
|
+
const userBlocks = [
|
|
771
|
+
{ kind: "image", media_type: "image/png", data_base64: input.screenshot },
|
|
772
|
+
{
|
|
773
|
+
kind: "text",
|
|
774
|
+
text: `Service: ${input.service}
|
|
775
|
+
URL: ${input.url}
|
|
776
|
+
${input.hint !== undefined ? `Hint: ${input.hint}\n` : ""}
|
|
777
|
+
Form HTML (trimmed):
|
|
778
|
+
${trimmedHtml}`,
|
|
779
|
+
},
|
|
780
|
+
];
|
|
781
|
+
return this.callLLM({
|
|
782
|
+
system: systemPrompt,
|
|
783
|
+
userBlocks,
|
|
784
|
+
maxTokens: 1500,
|
|
785
|
+
parse: parseSignupPlan,
|
|
786
|
+
});
|
|
787
|
+
}
|
|
788
|
+
// Extract just <form> elements from the HTML — drops marketing + scripts.
|
|
789
|
+
extractFormHtml(html) {
|
|
790
|
+
const forms = [];
|
|
791
|
+
const re = /<form\b[\s\S]*?<\/form>/gi;
|
|
792
|
+
let m;
|
|
793
|
+
while ((m = re.exec(html)) !== null)
|
|
794
|
+
forms.push(m[0]);
|
|
795
|
+
const joined = forms.join("\n");
|
|
796
|
+
if (joined.length === 0) {
|
|
797
|
+
// No <form> — fall back to body but cap aggressively
|
|
798
|
+
return html.slice(0, 20000);
|
|
799
|
+
}
|
|
800
|
+
// Cap at 20k chars (~5k tokens) to leave room for the screenshot and reply
|
|
801
|
+
return joined.slice(0, 20000);
|
|
802
|
+
}
|
|
803
|
+
looksLikeValidationFailure(text) {
|
|
804
|
+
const t = text.toLowerCase();
|
|
805
|
+
return (t.includes("must be between") ||
|
|
806
|
+
t.includes("is required") ||
|
|
807
|
+
t.includes("please accept") ||
|
|
808
|
+
t.includes("required field") ||
|
|
809
|
+
t.includes("invalid email") ||
|
|
810
|
+
t.includes("password must"));
|
|
811
|
+
}
|
|
812
|
+
pickVerificationLink(links) {
|
|
813
|
+
return pickVerificationLink(links);
|
|
814
|
+
}
|
|
815
|
+
// The InboxClient long-poll caps at 120s server-side (matches the API
|
|
816
|
+
// contract). To wait longer we just loop. We also re-issue with a fresh
|
|
817
|
+
// matcher each pass — services occasionally send a "complete signup"
|
|
818
|
+
// email rather than "verify", and that broader matcher catches both.
|
|
819
|
+
async waitForVerificationEmail(inbox, alias, totalSeconds) {
|
|
820
|
+
const deadline = Date.now() + totalSeconds * 1000;
|
|
821
|
+
const pattern = /verify|confirm|welcome|activate|complete|finish|set\s*up/i;
|
|
822
|
+
let lastErr = null;
|
|
823
|
+
while (Date.now() < deadline) {
|
|
824
|
+
const remainingSeconds = Math.max(1, Math.floor((deadline - Date.now()) / 1000));
|
|
825
|
+
const window = Math.min(remainingSeconds, 90);
|
|
826
|
+
try {
|
|
827
|
+
const email = await inbox.waitForEmail({
|
|
828
|
+
alias,
|
|
829
|
+
matcher: { subject: pattern },
|
|
830
|
+
timeout_seconds: window,
|
|
831
|
+
});
|
|
832
|
+
return email;
|
|
833
|
+
}
|
|
834
|
+
catch (err) {
|
|
835
|
+
lastErr = err;
|
|
836
|
+
// EmailTimeoutError: keep looping until our own deadline.
|
|
837
|
+
// Other errors (alias inactive, network): bail.
|
|
838
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
839
|
+
if (!/timeout|timed out/i.test(message))
|
|
840
|
+
throw err;
|
|
841
|
+
}
|
|
842
|
+
}
|
|
843
|
+
throw lastErr ?? new Error("verification email did not arrive in time");
|
|
844
|
+
}
|
|
845
|
+
// After verification, drive the browser toward the API key. Each round
|
|
846
|
+
// asks Claude what to do next given the current page; we stop when
|
|
847
|
+
// Claude says "done" or when we extract a credential. Bounded by
|
|
848
|
+
// maxRounds so a confused agent can't burn the whole context window.
|
|
849
|
+
async postVerifyLoop(args) {
|
|
850
|
+
let credentials = await this.extractCredentials();
|
|
851
|
+
for (let round = 0; round < args.maxRounds; round++) {
|
|
852
|
+
if (credentials.api_key !== undefined || credentials.username !== undefined) {
|
|
853
|
+
args.steps.push(`Post-verify: credentials found on round ${round}.`);
|
|
854
|
+
return credentials;
|
|
855
|
+
}
|
|
856
|
+
const state = await this.browser.getState();
|
|
857
|
+
let nextStep;
|
|
858
|
+
try {
|
|
859
|
+
nextStep = await this.planPostVerifyStep({
|
|
860
|
+
service: args.service,
|
|
861
|
+
email: args.email,
|
|
862
|
+
password: args.password,
|
|
863
|
+
round,
|
|
864
|
+
maxRounds: args.maxRounds,
|
|
865
|
+
state,
|
|
866
|
+
});
|
|
867
|
+
}
|
|
868
|
+
catch (err) {
|
|
869
|
+
args.steps.push(`Post-verify round ${round}: planner failed (${err instanceof Error ? err.message : String(err)}). Stopping.`);
|
|
870
|
+
break;
|
|
871
|
+
}
|
|
872
|
+
args.steps.push(`Post-verify ${round + 1}/${args.maxRounds}: ${nextStep.kind} — ${nextStep.reason}`);
|
|
873
|
+
if (nextStep.kind === "done")
|
|
874
|
+
break;
|
|
875
|
+
try {
|
|
876
|
+
if (nextStep.kind === "extract") {
|
|
877
|
+
credentials = await this.extractCredentials();
|
|
878
|
+
}
|
|
879
|
+
else if (nextStep.kind === "click") {
|
|
880
|
+
await this.browser.click(nextStep.selector);
|
|
881
|
+
await this.browser.wait(2);
|
|
882
|
+
}
|
|
883
|
+
else if (nextStep.kind === "fill") {
|
|
884
|
+
await this.browser.type(nextStep.selector, nextStep.value);
|
|
885
|
+
}
|
|
886
|
+
else if (nextStep.kind === "navigate") {
|
|
887
|
+
await this.browser.goto(nextStep.url);
|
|
888
|
+
await this.browser.wait(3);
|
|
889
|
+
}
|
|
890
|
+
else if (nextStep.kind === "wait") {
|
|
891
|
+
await this.browser.wait(Math.min(nextStep.seconds, 15));
|
|
892
|
+
}
|
|
893
|
+
}
|
|
894
|
+
catch (err) {
|
|
895
|
+
args.steps.push(`Post-verify action failed (${nextStep.kind}): ${err instanceof Error ? err.message : String(err)}`);
|
|
896
|
+
// Don't bail — Claude may recover on the next round.
|
|
897
|
+
}
|
|
898
|
+
credentials = await this.extractCredentials();
|
|
899
|
+
}
|
|
900
|
+
return credentials;
|
|
901
|
+
}
|
|
902
|
+
async planPostVerifyStep(input) {
|
|
903
|
+
const visibleText = (input.state.html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim()).slice(0, 2500);
|
|
904
|
+
const systemPrompt = `You are driving a headless browser after a SaaS signup verification.
|
|
905
|
+
Your goal: surface the user's API key (or any credential — token, secret, app id) so it can be extracted from the page.
|
|
906
|
+
|
|
907
|
+
You may issue ONE step per turn. Reply with a single JSON object, no prose.
|
|
908
|
+
|
|
909
|
+
Schema:
|
|
910
|
+
{"kind":"done","reason":"why we should stop"}
|
|
911
|
+
{"kind":"extract","reason":"the API key is now visible on this page"}
|
|
912
|
+
{"kind":"click","selector":"CSS","reason":"e.g. dismiss onboarding modal / open API keys page"}
|
|
913
|
+
{"kind":"fill","selector":"CSS","value":"value","reason":"unusual — only for required project-name etc."}
|
|
914
|
+
{"kind":"navigate","url":"https://...","reason":"e.g. go directly to /settings/api"}
|
|
915
|
+
{"kind":"wait","seconds":N,"reason":"page is still loading"}
|
|
916
|
+
|
|
917
|
+
Strategy:
|
|
918
|
+
- If the API key text is visible, return {"kind":"extract"}.
|
|
919
|
+
- If there's a dashboard menu link like "API Keys" / "Tokens" / "Developer", click it.
|
|
920
|
+
- If there's an onboarding modal blocking, dismiss it.
|
|
921
|
+
- If we're on a "verify your phone" / "verify email" wall, return done (we can't solve those).
|
|
922
|
+
- If the page wants the user to create a project before showing keys, fill the minimum and click create.
|
|
923
|
+
- Round ${input.round + 1} of ${input.maxRounds}. Prefer "done" if you're not making progress.`;
|
|
924
|
+
const userBlocks = [
|
|
925
|
+
{ kind: "image", media_type: "image/png", data_base64: input.state.screenshot },
|
|
926
|
+
{
|
|
927
|
+
kind: "text",
|
|
928
|
+
text: `Service: ${input.service}
|
|
929
|
+
URL: ${input.state.url}
|
|
930
|
+
Title: ${input.state.title}
|
|
931
|
+
Round: ${input.round + 1}/${input.maxRounds}
|
|
932
|
+
|
|
933
|
+
Visible text (truncated):
|
|
934
|
+
${visibleText}`,
|
|
935
|
+
},
|
|
936
|
+
];
|
|
937
|
+
return this.callLLM({
|
|
938
|
+
system: systemPrompt,
|
|
939
|
+
userBlocks,
|
|
940
|
+
maxTokens: 500,
|
|
941
|
+
parse: parsePostVerifyStep,
|
|
942
|
+
});
|
|
943
|
+
}
|
|
944
|
+
async findSignupLink() {
|
|
945
|
+
const html = (await this.browser.getState()).html;
|
|
946
|
+
const re = /href="([^"]*(?:signup|register|sign-up|create-account|join)[^"]*)"/gi;
|
|
947
|
+
let m;
|
|
948
|
+
while ((m = re.exec(html)) !== null) {
|
|
949
|
+
const href = m[1];
|
|
950
|
+
if (href === undefined)
|
|
951
|
+
continue;
|
|
952
|
+
if (href.includes("signin") || href.includes("login"))
|
|
953
|
+
continue;
|
|
954
|
+
if (href.startsWith("http"))
|
|
955
|
+
return href;
|
|
956
|
+
if (href.startsWith("//"))
|
|
957
|
+
return `https:${href}`;
|
|
958
|
+
}
|
|
959
|
+
return null;
|
|
960
|
+
}
|
|
961
|
+
async extractCredentials() {
|
|
962
|
+
// IMPORTANT: pull credentials from the *visible* page text, not the raw
|
|
963
|
+
// HTML. Reading from HTML matches anti-bot challenge JS (Cloudflare
|
|
964
|
+
// Turnstile, hCaptcha) whose challenge tokens look like API keys to a
|
|
965
|
+
// naive regex.
|
|
966
|
+
const text = await this.browser.extractText();
|
|
967
|
+
const credentials = {};
|
|
968
|
+
const apiKey = extractApiKeyFromText(text);
|
|
969
|
+
if (apiKey !== null)
|
|
970
|
+
credentials.api_key = apiKey;
|
|
971
|
+
return credentials;
|
|
972
|
+
}
|
|
973
|
+
}
|
|
974
|
+
//# sourceMappingURL=agent.js.map
|