@trusty-squire/mcp 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bot/agent.d.ts +125 -0
- package/dist/bot/agent.d.ts.map +1 -0
- package/dist/bot/agent.js +974 -0
- package/dist/bot/agent.js.map +1 -0
- package/dist/bot/asn.d.ts +15 -0
- package/dist/bot/asn.d.ts.map +1 -0
- package/dist/bot/asn.js +151 -0
- package/dist/bot/asn.js.map +1 -0
- package/dist/bot/browser.d.ts +70 -0
- package/dist/bot/browser.d.ts.map +1 -0
- package/dist/bot/browser.js +810 -0
- package/dist/bot/browser.js.map +1 -0
- package/dist/bot/debug.d.ts +3 -0
- package/dist/bot/debug.d.ts.map +1 -0
- package/dist/bot/debug.js +24 -0
- package/dist/bot/debug.js.map +1 -0
- package/dist/bot/inbox-client.d.ts +41 -0
- package/dist/bot/inbox-client.d.ts.map +1 -0
- package/dist/bot/inbox-client.js +68 -0
- package/dist/bot/inbox-client.js.map +1 -0
- package/dist/bot/index.d.ts +22 -0
- package/dist/bot/index.d.ts.map +1 -0
- package/dist/bot/index.js +83 -0
- package/dist/bot/index.js.map +1 -0
- package/dist/bot/llm-client.d.ts +75 -0
- package/dist/bot/llm-client.d.ts.map +1 -0
- package/dist/bot/llm-client.js +250 -0
- package/dist/bot/llm-client.js.map +1 -0
- package/dist/bot/prewarm-cache.d.ts +3 -0
- package/dist/bot/prewarm-cache.d.ts.map +1 -0
- package/dist/bot/prewarm-cache.js +91 -0
- package/dist/bot/prewarm-cache.js.map +1 -0
- package/dist/install/cli.d.ts +2 -1
- package/dist/install/cli.d.ts.map +1 -1
- package/dist/install/cli.js +28 -9
- package/dist/install/cli.js.map +1 -1
- package/dist/tools/provision-any.js +1 -1
- package/dist/tools/provision-any.js.map +1 -1
- package/dist/tools/wait-for-approval.d.ts +1 -1
- package/package.json +10 -7
|
@@ -0,0 +1,810 @@
|
|
|
1
|
+
// Browser automation wrapper for universal signup bot
|
|
2
|
+
// Provides simple interface for AI agent to control browser.
|
|
3
|
+
//
|
|
4
|
+
// Two layers of bot-resistance:
|
|
5
|
+
//
|
|
6
|
+
// 1. Stealth fingerprinting (playwright-extra + puppeteer-extra-plugin-
|
|
7
|
+
// stealth). Patches ~17 client-side tells: navigator.webdriver,
|
|
8
|
+
// navigator.plugins, missing chrome runtime, WebGL vendor/renderer,
|
|
9
|
+
// permissions.query for notifications, etc. This handles the
|
|
10
|
+
// *fingerprint* side of bot detection.
|
|
11
|
+
//
|
|
12
|
+
// 2. Human-like behavior (this file, when humanize=true). Adds bezier
|
|
13
|
+
// mouse paths to clicks, variable typing delays with thinking pauses,
|
|
14
|
+
// dwell time after page loads, hover-then-click hesitations. This
|
|
15
|
+
// handles the *behavior* side — the bit that fingerprint spoofing
|
|
16
|
+
// alone won't get past, because modern Cloudflare/reCAPTCHA scoring
|
|
17
|
+
// correlates mouse-path entropy and inter-keystroke timing.
|
|
18
|
+
//
|
|
19
|
+
// Together with the user's residential IP (the bot runs on user
|
|
20
|
+
// machines, not on Fly), these are sufficient for invisible-mode
|
|
21
|
+
// Turnstile/reCAPTCHA-v3 scoring on most SaaS signups. Visible-mode
|
|
22
|
+
// captchas still need the click-and-wait pattern documented in
|
|
23
|
+
// agent.ts.
|
|
24
|
+
import { chromium as baseChromium } from "playwright";
|
|
25
|
+
import { createRequire } from "node:module";
|
|
26
|
+
import { detectAsn } from "./asn.js";
|
|
27
|
+
// Lazy registration: installing the plugin mutates the chromium singleton
|
|
28
|
+
// from playwright-extra so we only do it once per process. We require()
|
|
29
|
+
// the CJS modules lazily (the stealth toolchain only ships CJS) and treat
|
|
30
|
+
// stealth as best-effort — a missing dep should never crash the bot.
|
|
31
|
+
const require = createRequire(import.meta.url);
|
|
32
|
+
let cachedChromium = null;
|
|
33
|
+
function getChromium() {
|
|
34
|
+
if (cachedChromium !== null)
|
|
35
|
+
return cachedChromium;
|
|
36
|
+
try {
|
|
37
|
+
const { chromium: extra } = require("playwright-extra");
|
|
38
|
+
const stealth = require("puppeteer-extra-plugin-stealth");
|
|
39
|
+
extra.use(stealth());
|
|
40
|
+
cachedChromium = extra;
|
|
41
|
+
}
|
|
42
|
+
catch (err) {
|
|
43
|
+
// Fall back to vanilla playwright if stealth isn't installed. The bot
|
|
44
|
+
// still works, it's just easier to fingerprint as a bot.
|
|
45
|
+
console.warn(`[universal-bot] stealth plugin unavailable, falling back to vanilla chromium: ${err instanceof Error ? err.message : String(err)}`);
|
|
46
|
+
cachedChromium = baseChromium;
|
|
47
|
+
}
|
|
48
|
+
return cachedChromium;
|
|
49
|
+
}
|
|
50
|
+
// Real-Chromium-family browser channels we'll prefer over the bundled
|
|
51
|
+
// Chromium binary when available. Chromium ships without Widevine,
|
|
52
|
+
// without proprietary codecs, with an empty navigator.plugins array,
|
|
53
|
+
// and with a chrome.runtime API surface that bot-detection scripts
|
|
54
|
+
// know to look for. Using a *real* installation papers over ~6 of
|
|
55
|
+
// those fingerprint bits at zero engineering cost.
|
|
56
|
+
//
|
|
57
|
+
// Order matters: pick the channel most likely to be present *and*
|
|
58
|
+
// hardest to fingerprint as automation. Stable Chrome > Edge >
|
|
59
|
+
// Beta/Canary > Brave. Brave isn't a Playwright channel but its
|
|
60
|
+
// binary path is well-known; we resolve it explicitly below.
|
|
61
|
+
const PREFERRED_CHANNELS = [
|
|
62
|
+
"chrome",
|
|
63
|
+
"msedge",
|
|
64
|
+
"chrome-beta",
|
|
65
|
+
"chrome-canary",
|
|
66
|
+
];
|
|
67
|
+
// Per-channel binary search paths. Playwright's `executablePath()` is
|
|
68
|
+
// argumentless (returns the bundled Chromium path), so we can't ask it
|
|
69
|
+
// "is Chrome installed?" — we have to look ourselves. These are the
|
|
70
|
+
// canonical install locations on each platform; the first hit wins.
|
|
71
|
+
//
|
|
72
|
+
// Limitation: this misses sideloaded installs (Chrome installed via
|
|
73
|
+
// the user's package manager to a non-default path, dev-builds in
|
|
74
|
+
// home directories, etc.). For those, the user can set
|
|
75
|
+
// UNIVERSAL_BOT_CHANNEL=chrome to force Playwright to find it
|
|
76
|
+
// through its own resolution. We accept the false-negative because
|
|
77
|
+
// the alternative (asking Playwright to launch and seeing if it
|
|
78
|
+
// succeeds) costs ~1s of process startup per probe.
|
|
79
|
+
const CHANNEL_PATHS = {
|
|
80
|
+
chrome: [
|
|
81
|
+
// macOS
|
|
82
|
+
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
|
83
|
+
// Linux
|
|
84
|
+
"/usr/bin/google-chrome",
|
|
85
|
+
"/usr/bin/google-chrome-stable",
|
|
86
|
+
"/opt/google/chrome/chrome",
|
|
87
|
+
// Windows — Playwright resolves these via channel anyway, but list
|
|
88
|
+
// for completeness on cross-platform Node runs.
|
|
89
|
+
"C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
|
|
90
|
+
"C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe",
|
|
91
|
+
],
|
|
92
|
+
msedge: [
|
|
93
|
+
"/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
|
|
94
|
+
"/usr/bin/microsoft-edge",
|
|
95
|
+
"/usr/bin/microsoft-edge-stable",
|
|
96
|
+
"C:\\Program Files (x86)\\Microsoft\\Edge\\Application\\msedge.exe",
|
|
97
|
+
],
|
|
98
|
+
"chrome-beta": [
|
|
99
|
+
"/Applications/Google Chrome Beta.app/Contents/MacOS/Google Chrome Beta",
|
|
100
|
+
"/usr/bin/google-chrome-beta",
|
|
101
|
+
],
|
|
102
|
+
"chrome-canary": [
|
|
103
|
+
"/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary",
|
|
104
|
+
"/usr/bin/google-chrome-unstable",
|
|
105
|
+
],
|
|
106
|
+
};
|
|
107
|
+
// Detect a real-Chromium-family browser channel without launching it.
|
|
108
|
+
// Returns the channel name (passable as `channel:` to .launch) or null
|
|
109
|
+
// to mean "use bundled Chromium." Logs the selection to stderr so the
|
|
110
|
+
// telemetry path can see which browser the run ended up on without
|
|
111
|
+
// having to thread it through the agent state machine.
|
|
112
|
+
async function detectChromiumChannel() {
|
|
113
|
+
// Skip detection in tests / when explicitly opting out. The unit tests
|
|
114
|
+
// launch hundreds of browsers and shouldn't probe the filesystem each
|
|
115
|
+
// time; they also can't rely on real Chrome being present.
|
|
116
|
+
if (process.env.UNIVERSAL_BOT_CHANNEL === "bundled")
|
|
117
|
+
return null;
|
|
118
|
+
if (process.env.UNIVERSAL_BOT_CHANNEL !== undefined) {
|
|
119
|
+
// Explicit override — caller knows what they want.
|
|
120
|
+
return process.env.UNIVERSAL_BOT_CHANNEL;
|
|
121
|
+
}
|
|
122
|
+
const fsMod = await import("node:fs");
|
|
123
|
+
for (const channel of PREFERRED_CHANNELS) {
|
|
124
|
+
const candidatePaths = CHANNEL_PATHS[channel] ?? [];
|
|
125
|
+
for (const candidate of candidatePaths) {
|
|
126
|
+
try {
|
|
127
|
+
if (fsMod.existsSync(candidate))
|
|
128
|
+
return channel;
|
|
129
|
+
}
|
|
130
|
+
catch {
|
|
131
|
+
// permission errors etc. — skip this candidate, try the next
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
return null;
|
|
136
|
+
}
|
|
137
|
+
export class BrowserController {
|
|
138
|
+
browser = null;
|
|
139
|
+
page = null;
|
|
140
|
+
humanize;
|
|
141
|
+
// Tracks the simulated mouse position so successive clicks can move
|
|
142
|
+
// along a continuous path (humans don't teleport between clicks).
|
|
143
|
+
mouseX = 100;
|
|
144
|
+
mouseY = 100;
|
|
145
|
+
// Records the browser channel that .start() actually launched. Set
|
|
146
|
+
// post-launch so telemetry (provision-any.ts) can surface "this run
|
|
147
|
+
// used real Chrome" vs "this run used bundled Chromium." Useful for
|
|
148
|
+
// separating fingerprint regressions from network regressions when
|
|
149
|
+
// a service starts failing.
|
|
150
|
+
launchedChannel = null;
|
|
151
|
+
// The proxy server this run egressed through, or null for a direct
|
|
152
|
+
// connection. Set by .start(); surfaced via the `proxied` getter —
|
|
153
|
+
// a captcha failure behind a residential proxy is materially
|
|
154
|
+
// different signal from the same failure on a raw datacenter IP.
|
|
155
|
+
proxyServer = null;
|
|
156
|
+
constructor(opts = {}) {
|
|
157
|
+
this.humanize = opts.humanize ?? true;
|
|
158
|
+
}
|
|
159
|
+
// Which browser channel the most recent .start() actually used.
|
|
160
|
+
// `null` means bundled Chromium; a string like "chrome" means a
|
|
161
|
+
// real installed browser of that channel. Throws if .start() hasn't
|
|
162
|
+
// been called yet — there's no sensible default to return.
|
|
163
|
+
get channel() {
|
|
164
|
+
if (this.browser === null) {
|
|
165
|
+
throw new Error("BrowserController.channel read before .start()");
|
|
166
|
+
}
|
|
167
|
+
return this.launchedChannel;
|
|
168
|
+
}
|
|
169
|
+
// The proxy server the most recent .start() routed egress through,
|
|
170
|
+
// or null for a direct connection. Useful telemetry alongside
|
|
171
|
+
// `channel`. Throws if .start() hasn't run — same reason as channel.
|
|
172
|
+
get proxied() {
|
|
173
|
+
if (this.browser === null) {
|
|
174
|
+
throw new Error("BrowserController.proxied read before .start()");
|
|
175
|
+
}
|
|
176
|
+
return this.proxyServer;
|
|
177
|
+
}
|
|
178
|
+
async start() {
|
|
179
|
+
const channel = await detectChromiumChannel();
|
|
180
|
+
this.launchedChannel = channel;
|
|
181
|
+
const proxy = await this.resolveProxy();
|
|
182
|
+
this.proxyServer = proxy?.server ?? null;
|
|
183
|
+
// Stderr so the MCP stdio transport's framing stays clean (the
|
|
184
|
+
// module's existing logging convention).
|
|
185
|
+
console.error(`[universal-bot] launching browser channel=${channel ?? "bundled-chromium"} ` +
|
|
186
|
+
`proxy=${proxy?.server ?? "direct"}`);
|
|
187
|
+
this.browser = await getChromium().launch({
|
|
188
|
+
headless: process.env.UNIVERSAL_BOT_HEADLESS !== "false",
|
|
189
|
+
// `channel:` is a Playwright launch option that tells it to use a
|
|
190
|
+
// real installed browser instead of the bundled binary. When null
|
|
191
|
+
// we omit the key entirely so Playwright falls back to default.
|
|
192
|
+
...(channel !== null ? { channel } : {}),
|
|
193
|
+
// `proxy:` routes all egress through a residential proxy. Omitted
|
|
194
|
+
// (direct connection) for the ~80% of users on residential
|
|
195
|
+
// networks — see resolveProxy().
|
|
196
|
+
...(proxy !== null ? { proxy } : {}),
|
|
197
|
+
args: [
|
|
198
|
+
"--disable-blink-features=AutomationControlled",
|
|
199
|
+
"--no-sandbox",
|
|
200
|
+
"--disable-dev-shm-usage",
|
|
201
|
+
],
|
|
202
|
+
});
|
|
203
|
+
const context = await this.browser.newContext({
|
|
204
|
+
viewport: { width: 1280, height: 720 },
|
|
205
|
+
userAgent: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
|
|
206
|
+
locale: "en-US",
|
|
207
|
+
timezoneId: "America/New_York",
|
|
208
|
+
});
|
|
209
|
+
// Patch the navigator.webdriver flag — most anti-bot heuristics look here.
|
|
210
|
+
await context.addInitScript(() => {
|
|
211
|
+
Object.defineProperty(navigator, "webdriver", { get: () => undefined });
|
|
212
|
+
});
|
|
213
|
+
this.page = await context.newPage();
|
|
214
|
+
}
|
|
215
|
+
// Decide whether this run egresses through a residential proxy, and
|
|
216
|
+
// return Playwright's proxy settings or null for a direct connection.
|
|
217
|
+
//
|
|
218
|
+
// The fast path: when UNIVERSAL_BOT_PROXY_URL is unset (the default),
|
|
219
|
+
// this returns null before doing anything — no ASN lookup, no added
|
|
220
|
+
// latency for the ~80% of users who never configure a proxy.
|
|
221
|
+
//
|
|
222
|
+
// When a proxy IS configured, it's used only for datacenter-class
|
|
223
|
+
// egress: reCAPTCHA/Cloudflare score datacenter IPs as bot-likely no
|
|
224
|
+
// matter how clean the fingerprint is, while residential users
|
|
225
|
+
// already pass — so routing them through the proxy would just burn
|
|
226
|
+
// money. UNIVERSAL_BOT_PROXY_ALWAYS=true forces it on for networks
|
|
227
|
+
// that misclassify as "unknown". A malformed URL never aborts the
|
|
228
|
+
// run — we log and fall back to a direct connection.
|
|
229
|
+
async resolveProxy() {
|
|
230
|
+
const raw = process.env.UNIVERSAL_BOT_PROXY_URL;
|
|
231
|
+
if (raw === undefined || raw.trim().length === 0)
|
|
232
|
+
return null;
|
|
233
|
+
let proxy;
|
|
234
|
+
try {
|
|
235
|
+
proxy = parseProxyUrl(raw);
|
|
236
|
+
}
|
|
237
|
+
catch (err) {
|
|
238
|
+
console.error(`[universal-bot] UNIVERSAL_BOT_PROXY_URL is malformed — running ` +
|
|
239
|
+
`direct: ${err instanceof Error ? err.message : String(err)}`);
|
|
240
|
+
return null;
|
|
241
|
+
}
|
|
242
|
+
const forceAlways = process.env.UNIVERSAL_BOT_PROXY_ALWAYS === "true";
|
|
243
|
+
// detectAsn is best-effort (5s timeout, null on failure) → "unknown".
|
|
244
|
+
const asn = await detectAsn();
|
|
245
|
+
const asnClass = asn?.class ?? "unknown";
|
|
246
|
+
if (shouldRouteThroughProxy(asnClass, forceAlways)) {
|
|
247
|
+
console.error(`[universal-bot] routing through residential proxy ` +
|
|
248
|
+
`(asn=${asnClass}${forceAlways ? ", forced" : ""})`);
|
|
249
|
+
return proxy;
|
|
250
|
+
}
|
|
251
|
+
console.error(`[universal-bot] direct connection (asn=${asnClass}) — proxy ` +
|
|
252
|
+
`configured but not needed for this network`);
|
|
253
|
+
return null;
|
|
254
|
+
}
|
|
255
|
+
async goto(url) {
|
|
256
|
+
if (!this.page)
|
|
257
|
+
throw new Error("Browser not started");
|
|
258
|
+
await this.page.goto(url, {
|
|
259
|
+
waitUntil: "domcontentloaded",
|
|
260
|
+
timeout: 60000,
|
|
261
|
+
});
|
|
262
|
+
// Post-load dwell. Cloudflare/reCAPTCHA scoring runs JS that
|
|
263
|
+
// collects behavior signals over a window (typically 500-2000ms);
|
|
264
|
+
// landing on a page and immediately interacting reads as bot-like.
|
|
265
|
+
// The "dwell" gives the scoring window enough wall-clock to settle
|
|
266
|
+
// and also gives any deferred JS time to register event listeners
|
|
267
|
+
// we'll later fire.
|
|
268
|
+
if (this.humanize) {
|
|
269
|
+
await this.sleep(rand(800, 2000));
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
// Pre-warm a domain by visiting its root. Useful before navigating
|
|
273
|
+
// to a deep signup URL on a strict-Cloudflare service: the root sets
|
|
274
|
+
// first-party cookies and lets the bot-scoring JS calibrate on a
|
|
275
|
+
// benign page before we hit anything sensitive.
|
|
276
|
+
//
|
|
277
|
+
// `mode`:
|
|
278
|
+
// - "fast" (default): visit the root, dwell ~2s, jitter the mouse,
|
|
279
|
+
// done. Cheap and adequate when the domain has been warmed
|
|
280
|
+
// recently (cookies already in jar, prior session in the
|
|
281
|
+
// scoring JS's memory).
|
|
282
|
+
// - "referrer-chain": simulate a research session — Google search
|
|
283
|
+
// → click the brand result → scroll the marketing site →
|
|
284
|
+
// navigate. ~20-40s of wall clock, but builds a realistic
|
|
285
|
+
// browsing-history signal that v3 weighs heavily. Use this on
|
|
286
|
+
// first-attempt against strict services and after a captcha
|
|
287
|
+
// failure.
|
|
288
|
+
async prewarm(url, mode = "fast") {
|
|
289
|
+
if (!this.page)
|
|
290
|
+
throw new Error("Browser not started");
|
|
291
|
+
if (mode === "referrer-chain") {
|
|
292
|
+
await this.prewarmViaReferrerChain(url);
|
|
293
|
+
return;
|
|
294
|
+
}
|
|
295
|
+
const root = new URL(url).origin;
|
|
296
|
+
await this.page.goto(root, { waitUntil: "domcontentloaded", timeout: 30000 });
|
|
297
|
+
if (this.humanize) {
|
|
298
|
+
await this.sleep(rand(1200, 2500));
|
|
299
|
+
// Tiny mouse jitter so cf_clearance JS sees pointer activity.
|
|
300
|
+
await this.jitterMouse();
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
// Simulates a research session that ends at the signup target.
|
|
304
|
+
//
|
|
305
|
+
// Why this is more than theater: reCAPTCHA v3 reads a "browsing
|
|
306
|
+
// history" signal that aggregates referrer + dwell + interaction
|
|
307
|
+
// across the prior 1-2 page loads in this context. A cold landing on
|
|
308
|
+
// `/sign_up` has none of that — score gets clamped near 0.3, which
|
|
309
|
+
// is the kill-floor for most v3-protected forms. A simulated
|
|
310
|
+
// Google → result-click → marketing-site → /sign_up chain lifts the
|
|
311
|
+
// score to 0.5-0.7 range, which is where real users sit.
|
|
312
|
+
//
|
|
313
|
+
// Best-effort throughout: if any step fails (Google rate-limits us,
|
|
314
|
+
// the brand's marketing site is down, etc.) we degrade to the fast
|
|
315
|
+
// prewarm rather than aborting the whole signup. Network surprises
|
|
316
|
+
// are common; the bot still works without this lift, just worse.
|
|
317
|
+
async prewarmViaReferrerChain(url) {
|
|
318
|
+
if (!this.page)
|
|
319
|
+
throw new Error("Browser not started");
|
|
320
|
+
const targetOrigin = new URL(url).origin;
|
|
321
|
+
// Strip "www." for the search query so "postmarkapp.com" becomes
|
|
322
|
+
// "postmarkapp" not "www postmarkapp"; reads more like what a
|
|
323
|
+
// human types into a search box.
|
|
324
|
+
const brand = new URL(url).hostname.replace(/^www\./, "").split(".")[0];
|
|
325
|
+
const searchUrl = `https://www.google.com/search?q=${encodeURIComponent(brand + " sign up")}`;
|
|
326
|
+
try {
|
|
327
|
+
await this.page.goto(searchUrl, { waitUntil: "domcontentloaded", timeout: 30000 });
|
|
328
|
+
if (this.humanize)
|
|
329
|
+
await this.sleep(rand(2000, 4000));
|
|
330
|
+
// Look for a result link pointing at the target origin. Google
|
|
331
|
+
// wraps result hrefs but exposes the real destination as a child
|
|
332
|
+
// attribute or via the `href` itself for organic results — we
|
|
333
|
+
// grab whichever link's href starts with the target origin.
|
|
334
|
+
const resultSelector = `a[href^="${targetOrigin}"]`;
|
|
335
|
+
const hasResult = (await this.page.locator(resultSelector).count()) > 0;
|
|
336
|
+
if (hasResult) {
|
|
337
|
+
// Use humanClick if available — moves the mouse along a bezier
|
|
338
|
+
// path to the link, which feeds the scoring JS pointer entropy
|
|
339
|
+
// as a side effect.
|
|
340
|
+
if (this.humanize) {
|
|
341
|
+
await this.humanClick(resultSelector);
|
|
342
|
+
}
|
|
343
|
+
else {
|
|
344
|
+
await this.page.click(resultSelector);
|
|
345
|
+
}
|
|
346
|
+
await this.page.waitForLoadState("domcontentloaded", { timeout: 30000 });
|
|
347
|
+
}
|
|
348
|
+
else {
|
|
349
|
+
// Couldn't find an organic result (Google sometimes interposes
|
|
350
|
+
// an ad or "people also ask" block first). Navigate directly
|
|
351
|
+
// and accept that the referrer chain is shorter but still
|
|
352
|
+
// includes the search.
|
|
353
|
+
await this.page.goto(targetOrigin, { waitUntil: "domcontentloaded", timeout: 30000 });
|
|
354
|
+
}
|
|
355
|
+
// Marketing-site dwell: scroll a bit, pause, scroll back. The
|
|
356
|
+
// scroll events plus the wall clock build up the "this user is
|
|
357
|
+
// reading" signal. Magnitude is intentionally small — overshooting
|
|
358
|
+
// (scrolling to the bottom in 200ms, etc.) is itself bot-like.
|
|
359
|
+
if (this.humanize) {
|
|
360
|
+
await this.sleep(rand(1500, 3500));
|
|
361
|
+
await this.page.mouse.wheel(0, rand(200, 500));
|
|
362
|
+
await this.sleep(rand(800, 2000));
|
|
363
|
+
await this.page.mouse.wheel(0, rand(-200, 0));
|
|
364
|
+
await this.sleep(rand(1000, 2500));
|
|
365
|
+
await this.jitterMouse();
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
catch (err) {
|
|
369
|
+
// Any step in the chain failing leaves us at *some* page (the
|
|
370
|
+
// search results, the marketing site, an error page) — that's
|
|
371
|
+
// still better than a cold landing on /sign_up. Log and proceed.
|
|
372
|
+
console.error(`[universal-bot] referrer-chain prewarm partial failure (non-fatal): ${err instanceof Error ? err.message : String(err)}`);
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
async type(selector, text) {
|
|
376
|
+
if (!this.page)
|
|
377
|
+
throw new Error("Browser not started");
|
|
378
|
+
// Wait for element to be visible and enabled before typing.
|
|
379
|
+
await this.page.waitForSelector(selector, { state: "visible", timeout: 10000 });
|
|
380
|
+
if (!this.humanize) {
|
|
381
|
+
// Fast path for tests / non-humanized runs.
|
|
382
|
+
await this.page.fill(selector, text);
|
|
383
|
+
return;
|
|
384
|
+
}
|
|
385
|
+
// Humanized typing:
|
|
386
|
+
// - Click into the field first (moves mouse, generates focus event)
|
|
387
|
+
// - pressSequentially with per-character delay 40-110ms baseline
|
|
388
|
+
// - Inject occasional "thinking" pauses 200-600ms every ~5-12 chars
|
|
389
|
+
//
|
|
390
|
+
// page.fill() bypasses keydown/keypress/input events entirely — it
|
|
391
|
+
// sets value via JS. That's a giant red flag to behavior scoring.
|
|
392
|
+
// pressSequentially emits real key events so the page sees a normal
|
|
393
|
+
// typing pattern.
|
|
394
|
+
await this.humanClick(selector);
|
|
395
|
+
// Clear any prefilled value (browser autofill, etc.) before typing.
|
|
396
|
+
const locator = this.page.locator(selector);
|
|
397
|
+
await locator.fill("");
|
|
398
|
+
let typedSinceLastPause = 0;
|
|
399
|
+
let nextPauseAt = rand(5, 12);
|
|
400
|
+
for (const ch of text) {
|
|
401
|
+
// Per-char delay. Real typing is bursty; we use a slightly
|
|
402
|
+
// skewed distribution that occasionally lands a fast char and
|
|
403
|
+
// occasionally a slow one.
|
|
404
|
+
await locator.pressSequentially(ch, { delay: rand(40, 110) });
|
|
405
|
+
typedSinceLastPause += 1;
|
|
406
|
+
if (typedSinceLastPause >= nextPauseAt) {
|
|
407
|
+
// Brief "thinking" pause — looking at the keyboard, reading
|
|
408
|
+
// the label, etc.
|
|
409
|
+
await this.sleep(rand(180, 600));
|
|
410
|
+
typedSinceLastPause = 0;
|
|
411
|
+
nextPauseAt = rand(5, 12);
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
async click(selector) {
|
|
416
|
+
if (!this.page)
|
|
417
|
+
throw new Error("Browser not started");
|
|
418
|
+
if (!this.humanize) {
|
|
419
|
+
await this.page.click(selector);
|
|
420
|
+
return;
|
|
421
|
+
}
|
|
422
|
+
await this.humanClick(selector);
|
|
423
|
+
}
|
|
424
|
+
// Click the form's submit button, disambiguating when the planned
|
|
425
|
+
// selector matches several elements. Signup pages routinely render
|
|
426
|
+
// OAuth buttons ("Continue with Google" / "GitHub") as
|
|
427
|
+
// button[type=submit] alongside the real submit — and a Playwright
|
|
428
|
+
// locator is strict-mode, so a plain click on a multi-match selector
|
|
429
|
+
// throws "strict mode violation". We score the candidates by visible
|
|
430
|
+
// text and click the best, or throw a clear error when none reads as
|
|
431
|
+
// a signup button (e.g. an OAuth-only page).
|
|
432
|
+
async clickSubmit(selector) {
|
|
433
|
+
if (!this.page)
|
|
434
|
+
throw new Error("Browser not started");
|
|
435
|
+
const locator = this.page.locator(selector);
|
|
436
|
+
const count = await locator.count();
|
|
437
|
+
// 0 or 1 match: the normal click path handles it (and surfaces a
|
|
438
|
+
// clean "waiting for selector" timeout when the count is 0).
|
|
439
|
+
if (count <= 1) {
|
|
440
|
+
await this.click(selector);
|
|
441
|
+
return;
|
|
442
|
+
}
|
|
443
|
+
const texts = [];
|
|
444
|
+
for (let i = 0; i < count; i++) {
|
|
445
|
+
texts.push(((await locator.nth(i).textContent()) ?? "").trim());
|
|
446
|
+
}
|
|
447
|
+
const best = pickSubmitButtonIndex(texts);
|
|
448
|
+
if (best === null) {
|
|
449
|
+
throw new Error(`submit selector "${selector}" matched ${count} buttons, none scoring ` +
|
|
450
|
+
`as a signup button (texts: ${texts.map((t) => JSON.stringify(t)).join(", ")})`);
|
|
451
|
+
}
|
|
452
|
+
const chosen = locator.nth(best);
|
|
453
|
+
if (this.humanize) {
|
|
454
|
+
await this.humanClickLocator(chosen);
|
|
455
|
+
}
|
|
456
|
+
else {
|
|
457
|
+
await chosen.click();
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
async check(selector) {
|
|
461
|
+
if (!this.page)
|
|
462
|
+
throw new Error("Browser not started");
|
|
463
|
+
// Use force:true because TOS checkboxes are sometimes visually covered by
|
|
464
|
+
// a custom label/styled wrapper but the underlying input is checkable.
|
|
465
|
+
await this.page.waitForSelector(selector, { state: "attached", timeout: 10000 });
|
|
466
|
+
if (!this.humanize) {
|
|
467
|
+
await this.page.check(selector, { force: true });
|
|
468
|
+
return;
|
|
469
|
+
}
|
|
470
|
+
// For visible checkboxes, move the mouse to it first (a real user
|
|
471
|
+
// would). For force-checked invisible ones, fall back to the
|
|
472
|
+
// Playwright API so we don't try to mouse-click an offscreen element.
|
|
473
|
+
try {
|
|
474
|
+
await this.humanClick(selector);
|
|
475
|
+
// Verify it actually became checked; some checkboxes need the
|
|
476
|
+
// explicit `check()` call to flip state (e.g., styled labels
|
|
477
|
+
// that swallow the click event).
|
|
478
|
+
const isChecked = await this.page.locator(selector).isChecked();
|
|
479
|
+
if (!isChecked) {
|
|
480
|
+
await this.page.check(selector, { force: true });
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
catch {
|
|
484
|
+
await this.page.check(selector, { force: true });
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
// ───────────── humanization internals ─────────────
|
|
488
|
+
// Click that mimics a real user: locate element, bezier-path the
|
|
489
|
+
// mouse to it, hover briefly, then click. The mouse position is
|
|
490
|
+
// remembered so successive clicks form a continuous path.
|
|
491
|
+
async humanClick(selector) {
|
|
492
|
+
if (!this.page)
|
|
493
|
+
throw new Error("Browser not started");
|
|
494
|
+
await this.humanClickLocator(this.page.locator(selector));
|
|
495
|
+
}
|
|
496
|
+
// Locator-based core of humanClick. Taking a Locator (not a selector
|
|
497
|
+
// string) lets clickSubmit() hand us a `.nth(i)`-narrowed locator
|
|
498
|
+
// when a selector matched several elements — a bare selector through
|
|
499
|
+
// a strict-mode locator would throw before we could disambiguate.
|
|
500
|
+
async humanClickLocator(locator) {
|
|
501
|
+
if (!this.page)
|
|
502
|
+
throw new Error("Browser not started");
|
|
503
|
+
await locator.waitFor({ state: "visible", timeout: 10000 });
|
|
504
|
+
const box = await locator.boundingBox();
|
|
505
|
+
if (box === null) {
|
|
506
|
+
// Element exists but isn't in the layout (e.g., display:none).
|
|
507
|
+
// Fall back to the regular click which will fail loudly with a
|
|
508
|
+
// useful error.
|
|
509
|
+
await locator.click();
|
|
510
|
+
return;
|
|
511
|
+
}
|
|
512
|
+
// Aim for a random point inside the bounding box (not always the
|
|
513
|
+
// exact center — that's a fingerprintable bot tell).
|
|
514
|
+
const targetX = box.x + rand(box.width * 0.25, box.width * 0.75);
|
|
515
|
+
const targetY = box.y + rand(box.height * 0.25, box.height * 0.75);
|
|
516
|
+
await this.bezierMouseTo(targetX, targetY);
|
|
517
|
+
// Hover hesitation. Real users land on a button and pause briefly
|
|
518
|
+
// before clicking. 80-300ms is short enough not to slow runs much
|
|
519
|
+
// and long enough to register as "non-instant" in scoring JS.
|
|
520
|
+
await this.sleep(rand(80, 300));
|
|
521
|
+
await this.page.mouse.click(targetX, targetY);
|
|
522
|
+
this.mouseX = targetX;
|
|
523
|
+
this.mouseY = targetY;
|
|
524
|
+
}
|
|
525
|
+
// Moves the mouse along a bezier curve from the current position to
|
|
526
|
+
// (x, y). Uses 12-25 intermediate steps with small per-step delays.
|
|
527
|
+
// The curve avoids the dead-straight teleport that Playwright's
|
|
528
|
+
// default move() does.
|
|
529
|
+
async bezierMouseTo(x, y) {
|
|
530
|
+
if (!this.page)
|
|
531
|
+
throw new Error("Browser not started");
|
|
532
|
+
const steps = rand(12, 25);
|
|
533
|
+
// Bezier control points: bow the curve slightly perpendicular to
|
|
534
|
+
// the travel direction so it's a recognizable arc, not a straight
|
|
535
|
+
// line. Magnitude scales with distance.
|
|
536
|
+
const dx = x - this.mouseX;
|
|
537
|
+
const dy = y - this.mouseY;
|
|
538
|
+
const distance = Math.sqrt(dx * dx + dy * dy);
|
|
539
|
+
const bowMagnitude = Math.min(distance * 0.2, 80);
|
|
540
|
+
// Perpendicular direction (rotate the (dx, dy) vector 90°), then
|
|
541
|
+
// randomize which side of the line we bow toward.
|
|
542
|
+
const perpX = -dy / (distance || 1);
|
|
543
|
+
const perpY = dx / (distance || 1);
|
|
544
|
+
const sign = Math.random() < 0.5 ? -1 : 1;
|
|
545
|
+
const cx = this.mouseX + dx / 2 + perpX * bowMagnitude * sign;
|
|
546
|
+
const cy = this.mouseY + dy / 2 + perpY * bowMagnitude * sign;
|
|
547
|
+
for (let i = 1; i <= steps; i++) {
|
|
548
|
+
const t = i / steps;
|
|
549
|
+
// Quadratic bezier: (1-t)^2 * P0 + 2(1-t)t * P1 + t^2 * P2
|
|
550
|
+
const oneMinusT = 1 - t;
|
|
551
|
+
const px = oneMinusT * oneMinusT * this.mouseX + 2 * oneMinusT * t * cx + t * t * x;
|
|
552
|
+
const py = oneMinusT * oneMinusT * this.mouseY + 2 * oneMinusT * t * cy + t * t * y;
|
|
553
|
+
await this.page.mouse.move(px, py);
|
|
554
|
+
// 6-18ms per step → ~150-400ms total travel for a typical click.
|
|
555
|
+
await this.sleep(rand(6, 18));
|
|
556
|
+
}
|
|
557
|
+
}
|
|
558
|
+
// ───────────── Tier 2 captcha handling ─────────────
|
|
559
|
+
// Detects and handles visible-mode captcha widgets (Cloudflare
|
|
560
|
+
// Turnstile, reCAPTCHA v2 checkbox). Returns:
|
|
561
|
+
// { found: false } - no widget present
|
|
562
|
+
// { found: true, solved: true } - we clicked it and the page
|
|
563
|
+
// accepted the resulting token
|
|
564
|
+
// { found: true, solved: false } - we clicked it but the
|
|
565
|
+
// challenge didn't pass
|
|
566
|
+
// within the timeout
|
|
567
|
+
//
|
|
568
|
+
// Strategy: locate the third-party iframe, click at the checkbox's
|
|
569
|
+
// typical position (inside the widget's bounding box, near the
|
|
570
|
+
// left), then poll for the success signal:
|
|
571
|
+
// - Turnstile: `input[name="cf-turnstile-response"][value]` populated
|
|
572
|
+
// - reCAPTCHA: `textarea[name="g-recaptcha-response"]` populated
|
|
573
|
+
//
|
|
574
|
+
// The click + wait is the entire "solve." The challenge JS runs
|
|
575
|
+
// inside the iframe under Cloudflare/Google's origin — we can't
|
|
576
|
+
// touch it directly. What we CAN do is trigger the click that
|
|
577
|
+
// starts the challenge, then wait for the widget's host page to
|
|
578
|
+
// receive the token via postMessage and inject it into the form.
|
|
579
|
+
//
|
|
580
|
+
// Honest limits:
|
|
581
|
+
// - "Invisible" Turnstile/reCAPTCHA-v3 doesn't need this method
|
|
582
|
+
// because there's no widget to click; the existing Tier 1
|
|
583
|
+
// humanization is what gets you past those.
|
|
584
|
+
// - When CF decides this user is suspicious enough to issue a
|
|
585
|
+
// full challenge image grid, this method won't help — the
|
|
586
|
+
// iframe will render the grid, our click won't solve it, and
|
|
587
|
+
// we'll time out with `solved: false`.
|
|
588
|
+
async solveVisibleCaptcha(timeoutMs = 30000) {
|
|
589
|
+
if (!this.page)
|
|
590
|
+
throw new Error("Browser not started");
|
|
591
|
+
// Locate the widget. Turnstile and reCAPTCHA both use distinctive
|
|
592
|
+
// iframe URLs that are easy to discriminate.
|
|
593
|
+
const widget = await this.findCaptchaWidget();
|
|
594
|
+
if (widget === null)
|
|
595
|
+
return { found: false };
|
|
596
|
+
// Click at the checkbox position. Turnstile's checkbox sits at
|
|
597
|
+
// roughly (28, 32) inside its iframe (the iframe is typically
|
|
598
|
+
// 300x65 with the box on the left). reCAPTCHA v2 checkbox is at
|
|
599
|
+
// (30, 30) inside a 304x78 iframe. Both tolerate clicks anywhere
|
|
600
|
+
// in the left 60px of the widget.
|
|
601
|
+
const clickX = widget.box.x + 28;
|
|
602
|
+
const clickY = widget.box.y + widget.box.height / 2;
|
|
603
|
+
// Use the humanized path so the click looks like a real user
|
|
604
|
+
// tapping the box (Cloudflare's post-click challenge correlates
|
|
605
|
+
// mouse-entry velocity with bot-likelihood).
|
|
606
|
+
if (this.humanize) {
|
|
607
|
+
await this.bezierMouseTo(clickX, clickY);
|
|
608
|
+
await this.sleep(rand(120, 350));
|
|
609
|
+
}
|
|
610
|
+
await this.page.mouse.click(clickX, clickY);
|
|
611
|
+
this.mouseX = clickX;
|
|
612
|
+
this.mouseY = clickY;
|
|
613
|
+
// Poll for the success token. We check both Turnstile and reCAPTCHA
|
|
614
|
+
// selectors because some sites embed multiple widgets and we want
|
|
615
|
+
// either to count.
|
|
616
|
+
const start = Date.now();
|
|
617
|
+
const pollIntervalMs = 500;
|
|
618
|
+
while (Date.now() - start < timeoutMs) {
|
|
619
|
+
await this.sleep(pollIntervalMs);
|
|
620
|
+
const solved = await this.page.evaluate(() => {
|
|
621
|
+
const turnstile = document.querySelector('input[name="cf-turnstile-response"]');
|
|
622
|
+
if (turnstile !== null && turnstile.value.length > 0)
|
|
623
|
+
return true;
|
|
624
|
+
const recaptcha = document.querySelector('textarea[name="g-recaptcha-response"]');
|
|
625
|
+
if (recaptcha !== null && recaptcha.value.length > 0)
|
|
626
|
+
return true;
|
|
627
|
+
// Some Turnstile installs use a managed mode that emits its
|
|
628
|
+
// own attribute on the host div when solved.
|
|
629
|
+
const cfManaged = document.querySelector(".cf-turnstile[data-state='success']");
|
|
630
|
+
if (cfManaged !== null)
|
|
631
|
+
return true;
|
|
632
|
+
return false;
|
|
633
|
+
});
|
|
634
|
+
if (solved) {
|
|
635
|
+
return { found: true, solved: true, kind: widget.kind };
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
// Timed out — the challenge didn't pass. We don't loop or retry
|
|
639
|
+
// because Cloudflare scoring is sticky for a given session; a
|
|
640
|
+
// failed solve usually means the entire session is flagged and
|
|
641
|
+
// further clicks won't help.
|
|
642
|
+
return { found: true, solved: false, kind: widget.kind };
|
|
643
|
+
}
|
|
644
|
+
// Locates the captcha widget on the current page. Returns the
|
|
645
|
+
// iframe's bounding box and which provider it is, or null if no
|
|
646
|
+
// visible widget is present.
|
|
647
|
+
async findCaptchaWidget() {
|
|
648
|
+
if (!this.page)
|
|
649
|
+
throw new Error("Browser not started");
|
|
650
|
+
// Cloudflare Turnstile iframes look like:
|
|
651
|
+
// https://challenges.cloudflare.com/cdn-cgi/challenge-platform/h/g/turnstile/if/...
|
|
652
|
+
// reCAPTCHA v2 iframes look like:
|
|
653
|
+
// https://www.google.com/recaptcha/api2/anchor?...
|
|
654
|
+
const candidates = [
|
|
655
|
+
{ kind: "turnstile", selector: 'iframe[src*="challenges.cloudflare.com"]' },
|
|
656
|
+
{ kind: "recaptcha", selector: 'iframe[src*="recaptcha/api2"]' },
|
|
657
|
+
];
|
|
658
|
+
for (const { kind, selector } of candidates) {
|
|
659
|
+
const locator = this.page.locator(selector);
|
|
660
|
+
const count = await locator.count();
|
|
661
|
+
if (count === 0)
|
|
662
|
+
continue;
|
|
663
|
+
// Some pages embed multiple widgets (e.g., one in the signup
|
|
664
|
+
// form, one in a hidden login modal). Take the first visible
|
|
665
|
+
// one with a non-trivial bounding box.
|
|
666
|
+
for (let i = 0; i < count; i++) {
|
|
667
|
+
const el = locator.nth(i);
|
|
668
|
+
const box = await el.boundingBox();
|
|
669
|
+
if (box === null)
|
|
670
|
+
continue;
|
|
671
|
+
if (box.width < 50 || box.height < 30)
|
|
672
|
+
continue; // hidden/clipped
|
|
673
|
+
return { kind, box };
|
|
674
|
+
}
|
|
675
|
+
}
|
|
676
|
+
return null;
|
|
677
|
+
}
|
|
678
|
+
// Small mouse wiggle near the current position. Used during prewarm
|
|
679
|
+
// so the page sees pointer events before we navigate away.
|
|
680
|
+
async jitterMouse() {
|
|
681
|
+
if (!this.page)
|
|
682
|
+
throw new Error("Browser not started");
|
|
683
|
+
const wiggles = rand(2, 5);
|
|
684
|
+
for (let i = 0; i < wiggles; i++) {
|
|
685
|
+
const nx = this.mouseX + rand(-50, 50);
|
|
686
|
+
const ny = this.mouseY + rand(-50, 50);
|
|
687
|
+
await this.page.mouse.move(nx, ny);
|
|
688
|
+
this.mouseX = nx;
|
|
689
|
+
this.mouseY = ny;
|
|
690
|
+
await this.sleep(rand(40, 120));
|
|
691
|
+
}
|
|
692
|
+
}
|
|
693
|
+
sleep(ms) {
|
|
694
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
695
|
+
}
|
|
696
|
+
async wait(seconds) {
|
|
697
|
+
await new Promise((resolve) => setTimeout(resolve, seconds * 1000));
|
|
698
|
+
}
|
|
699
|
+
async screenshot() {
|
|
700
|
+
if (!this.page)
|
|
701
|
+
throw new Error("Browser not started");
|
|
702
|
+
const buffer = await this.page.screenshot({ fullPage: false });
|
|
703
|
+
return buffer.toString("base64");
|
|
704
|
+
}
|
|
705
|
+
async getState() {
|
|
706
|
+
if (!this.page)
|
|
707
|
+
throw new Error("Browser not started");
|
|
708
|
+
return {
|
|
709
|
+
url: this.page.url(),
|
|
710
|
+
title: await this.page.title(),
|
|
711
|
+
html: await this.page.content(),
|
|
712
|
+
screenshot: await this.screenshot(),
|
|
713
|
+
};
|
|
714
|
+
}
|
|
715
|
+
async extractText() {
|
|
716
|
+
if (!this.page)
|
|
717
|
+
throw new Error("Browser not started");
|
|
718
|
+
return await this.page.textContent("body") || "";
|
|
719
|
+
}
|
|
720
|
+
async close() {
|
|
721
|
+
if (this.page)
|
|
722
|
+
await this.page.close();
|
|
723
|
+
if (this.browser)
|
|
724
|
+
await this.browser.close();
|
|
725
|
+
}
|
|
726
|
+
}
|
|
727
|
+
// Random integer in [min, max]. We use Math.random() (not crypto)
|
|
728
|
+
// because these values are used for timing only — predictability
|
|
729
|
+
// isn't a security concern. The shape of the distribution matters
|
|
730
|
+
// for behavior scoring, but uniform-in-range is close enough to the
|
|
731
|
+
// human distribution that scorers can't reliably distinguish.
|
|
732
|
+
function rand(min, max) {
|
|
733
|
+
return Math.floor(min + Math.random() * (max - min + 1));
|
|
734
|
+
}
|
|
735
|
+
// Score signup-form submit candidates by visible text; return the index
|
|
736
|
+
// of the best, or null when none scores positive. Signup pages commonly
|
|
737
|
+
// render OAuth buttons ("Continue with Google" / "GitHub") as
|
|
738
|
+
// button[type=submit] next to the real account-creation button, so a
|
|
739
|
+
// generic selector resolves to several — this picks the right one.
|
|
740
|
+
//
|
|
741
|
+
// Same shape and rationale as agent.ts's pickVerificationLink: a positive
|
|
742
|
+
// score gate so an OAuth-only page (every candidate negative) returns
|
|
743
|
+
// null rather than mis-clicking "Continue with Google".
|
|
744
|
+
//
|
|
745
|
+
// Exported for unit testing — the scoring is the load-bearing logic.
|
|
746
|
+
export function pickSubmitButtonIndex(texts) {
|
|
747
|
+
let bestIndex = null;
|
|
748
|
+
let bestScore = 0;
|
|
749
|
+
texts.forEach((raw, i) => {
|
|
750
|
+
const t = raw.toLowerCase();
|
|
751
|
+
let score = 0;
|
|
752
|
+
if (t.includes("create account") || t.includes("create your account"))
|
|
753
|
+
score += 12;
|
|
754
|
+
if (t.includes("sign up") || t.includes("signup"))
|
|
755
|
+
score += 10;
|
|
756
|
+
if (t.includes("register"))
|
|
757
|
+
score += 8;
|
|
758
|
+
if (t.includes("get started"))
|
|
759
|
+
score += 6;
|
|
760
|
+
// "Continue" is often the real submit on single-field signup forms;
|
|
761
|
+
// weak positive so it wins over nothing but loses to OAuth markers.
|
|
762
|
+
if (t.includes("continue"))
|
|
763
|
+
score += 2;
|
|
764
|
+
// OAuth / SSO buttons are submit-typed too — the provider name is
|
|
765
|
+
// the reliable discriminator, so drive those firmly negative.
|
|
766
|
+
if (/\b(google|github|gitlab|microsoft|apple|facebook|okta|sso)\b/.test(t))
|
|
767
|
+
score -= 20;
|
|
768
|
+
if (t.includes("sign in") || t.includes("log in") || t.includes("login"))
|
|
769
|
+
score -= 12;
|
|
770
|
+
if (score > bestScore) {
|
|
771
|
+
bestScore = score;
|
|
772
|
+
bestIndex = i;
|
|
773
|
+
}
|
|
774
|
+
});
|
|
775
|
+
return bestIndex;
|
|
776
|
+
}
|
|
777
|
+
// Parse a UNIVERSAL_BOT_PROXY_URL — e.g. "http://user:pass@host:8080" or
|
|
778
|
+
// "socks5://host:1080" — into Playwright's proxy option shape. Playwright
|
|
779
|
+
// wants credentials separate from `server`, so we split them out and
|
|
780
|
+
// percent-decode them (residential providers embed session IDs with
|
|
781
|
+
// reserved characters in the username, which arrive %-encoded).
|
|
782
|
+
//
|
|
783
|
+
// Throws on a URL the WHATWG parser rejects, or one with no host (a bare
|
|
784
|
+
// "host:port" parses as a scheme with an empty host) — the caller logs
|
|
785
|
+
// and falls back to a direct connection.
|
|
786
|
+
//
|
|
787
|
+
// Exported for unit testing — URL parsing is the error-prone bit.
|
|
788
|
+
export function parseProxyUrl(raw) {
|
|
789
|
+
const u = new URL(raw.trim());
|
|
790
|
+
if (u.hostname.length === 0) {
|
|
791
|
+
throw new Error(`proxy URL has no host: "${raw}" (expected e.g. http://host:port)`);
|
|
792
|
+
}
|
|
793
|
+
// `host` includes the port; `protocol` keeps its trailing ":".
|
|
794
|
+
const settings = { server: `${u.protocol}//${u.host}` };
|
|
795
|
+
if (u.username.length > 0)
|
|
796
|
+
settings.username = decodeURIComponent(u.username);
|
|
797
|
+
if (u.password.length > 0)
|
|
798
|
+
settings.password = decodeURIComponent(u.password);
|
|
799
|
+
return settings;
|
|
800
|
+
}
|
|
801
|
+
// Should this run route through the configured proxy? True when the
|
|
802
|
+
// egress network is datacenter-class (the case the proxy exists for) or
|
|
803
|
+
// when the operator forced it on. Residential/unknown without the
|
|
804
|
+
// override stay direct — the ~80% who don't need it pay nothing.
|
|
805
|
+
//
|
|
806
|
+
// Exported for unit testing.
|
|
807
|
+
export function shouldRouteThroughProxy(asnClass, forceAlways) {
|
|
808
|
+
return forceAlways || asnClass === "datacenter";
|
|
809
|
+
}
|
|
810
|
+
//# sourceMappingURL=browser.js.map
|