spider-browser 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +354 -0
- package/dist/index.d.ts +1223 -0
- package/dist/index.js +3609 -0
- package/dist/index.js.map +1 -0
- package/package.json +48 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,3609 @@
|
|
|
1
|
+
var __defProp = Object.defineProperty;
|
|
2
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
3
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
5
|
+
var __esm = (fn, res) => function __init() {
|
|
6
|
+
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
|
|
7
|
+
};
|
|
8
|
+
var __export = (target, all) => {
|
|
9
|
+
for (var name in all)
|
|
10
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
11
|
+
};
|
|
12
|
+
var __copyProps = (to, from, except, desc) => {
|
|
13
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
14
|
+
for (let key of __getOwnPropNames(from))
|
|
15
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
16
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
17
|
+
}
|
|
18
|
+
return to;
|
|
19
|
+
};
|
|
20
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
21
|
+
|
|
22
|
+
// utils/errors.ts
|
|
23
|
+
var SpiderError, ConnectionError, AuthError, RateLimitError, BlockedError, BackendUnavailableError, TimeoutError, ProtocolError, NavigationError, LLMError;
|
|
24
|
+
var init_errors = __esm({
|
|
25
|
+
"utils/errors.ts"() {
|
|
26
|
+
"use strict";
|
|
27
|
+
SpiderError = class extends Error {
|
|
28
|
+
constructor(message, code, retryable = false) {
|
|
29
|
+
super(message);
|
|
30
|
+
this.code = code;
|
|
31
|
+
this.retryable = retryable;
|
|
32
|
+
this.name = "SpiderError";
|
|
33
|
+
}
|
|
34
|
+
};
|
|
35
|
+
ConnectionError = class extends SpiderError {
|
|
36
|
+
constructor(message, wsCode) {
|
|
37
|
+
super(message, "CONNECTION_ERROR", true);
|
|
38
|
+
this.wsCode = wsCode;
|
|
39
|
+
this.name = "ConnectionError";
|
|
40
|
+
}
|
|
41
|
+
};
|
|
42
|
+
AuthError = class extends SpiderError {
|
|
43
|
+
constructor(message) {
|
|
44
|
+
super(message, "AUTH_ERROR", false);
|
|
45
|
+
this.name = "AuthError";
|
|
46
|
+
}
|
|
47
|
+
};
|
|
48
|
+
RateLimitError = class extends SpiderError {
|
|
49
|
+
constructor(message, retryAfterMs) {
|
|
50
|
+
super(message, "RATE_LIMIT", true);
|
|
51
|
+
this.retryAfterMs = retryAfterMs;
|
|
52
|
+
this.name = "RateLimitError";
|
|
53
|
+
}
|
|
54
|
+
};
|
|
55
|
+
BlockedError = class extends SpiderError {
|
|
56
|
+
constructor(message) {
|
|
57
|
+
super(message, "BLOCKED", true);
|
|
58
|
+
this.name = "BlockedError";
|
|
59
|
+
}
|
|
60
|
+
};
|
|
61
|
+
BackendUnavailableError = class extends SpiderError {
|
|
62
|
+
constructor(message) {
|
|
63
|
+
super(message, "BACKEND_UNAVAILABLE", true);
|
|
64
|
+
this.name = "BackendUnavailableError";
|
|
65
|
+
}
|
|
66
|
+
};
|
|
67
|
+
TimeoutError = class extends SpiderError {
|
|
68
|
+
constructor(message) {
|
|
69
|
+
super(message, "TIMEOUT", true);
|
|
70
|
+
this.name = "TimeoutError";
|
|
71
|
+
}
|
|
72
|
+
};
|
|
73
|
+
ProtocolError = class extends SpiderError {
|
|
74
|
+
constructor(message) {
|
|
75
|
+
super(message, "PROTOCOL_ERROR", false);
|
|
76
|
+
this.name = "ProtocolError";
|
|
77
|
+
}
|
|
78
|
+
};
|
|
79
|
+
NavigationError = class extends SpiderError {
|
|
80
|
+
constructor(message) {
|
|
81
|
+
super(message, "NAVIGATION_ERROR", true);
|
|
82
|
+
this.name = "NavigationError";
|
|
83
|
+
}
|
|
84
|
+
};
|
|
85
|
+
LLMError = class extends SpiderError {
|
|
86
|
+
constructor(message) {
|
|
87
|
+
super(message, "LLM_ERROR", true);
|
|
88
|
+
this.name = "LLMError";
|
|
89
|
+
}
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
// ai/providers/openai.ts
|
|
95
|
+
var openai_exports = {};
|
|
96
|
+
__export(openai_exports, {
|
|
97
|
+
OpenAICompatibleProvider: () => OpenAICompatibleProvider
|
|
98
|
+
});
|
|
99
|
+
var DEFAULT_URLS, OpenAICompatibleProvider;
|
|
100
|
+
var init_openai = __esm({
|
|
101
|
+
"ai/providers/openai.ts"() {
|
|
102
|
+
"use strict";
|
|
103
|
+
init_errors();
|
|
104
|
+
DEFAULT_URLS = {
|
|
105
|
+
openai: "https://api.openai.com/v1/chat/completions",
|
|
106
|
+
openrouter: "https://openrouter.ai/api/v1/chat/completions"
|
|
107
|
+
};
|
|
108
|
+
OpenAICompatibleProvider = class {
|
|
109
|
+
url;
|
|
110
|
+
apiKey;
|
|
111
|
+
model;
|
|
112
|
+
maxTokens;
|
|
113
|
+
temperature;
|
|
114
|
+
constructor(config) {
|
|
115
|
+
this.url = config.baseUrl ?? DEFAULT_URLS[config.provider] ?? DEFAULT_URLS.openai;
|
|
116
|
+
this.apiKey = config.apiKey;
|
|
117
|
+
this.model = config.model;
|
|
118
|
+
this.maxTokens = config.maxTokens ?? 4096;
|
|
119
|
+
this.temperature = config.temperature ?? 0.1;
|
|
120
|
+
}
|
|
121
|
+
async chat(messages, options) {
|
|
122
|
+
const body = {
|
|
123
|
+
model: this.model,
|
|
124
|
+
messages,
|
|
125
|
+
max_tokens: this.maxTokens,
|
|
126
|
+
temperature: this.temperature
|
|
127
|
+
};
|
|
128
|
+
if (options?.jsonMode) {
|
|
129
|
+
body.response_format = { type: "json_object" };
|
|
130
|
+
}
|
|
131
|
+
const resp = await fetch(this.url, {
|
|
132
|
+
method: "POST",
|
|
133
|
+
headers: {
|
|
134
|
+
"Content-Type": "application/json",
|
|
135
|
+
Authorization: `Bearer ${this.apiKey}`
|
|
136
|
+
},
|
|
137
|
+
body: JSON.stringify(body)
|
|
138
|
+
});
|
|
139
|
+
if (!resp.ok) {
|
|
140
|
+
const text = await resp.text().catch(() => "");
|
|
141
|
+
throw new LLMError(`OpenAI API error ${resp.status}: ${text}`);
|
|
142
|
+
}
|
|
143
|
+
const json = await resp.json();
|
|
144
|
+
const content = json?.choices?.[0]?.message?.content;
|
|
145
|
+
if (typeof content !== "string") {
|
|
146
|
+
throw new LLMError("OpenAI response missing choices[0].message.content");
|
|
147
|
+
}
|
|
148
|
+
return content;
|
|
149
|
+
}
|
|
150
|
+
async chatJSON(messages) {
|
|
151
|
+
const text = await this.chat(messages, { jsonMode: true });
|
|
152
|
+
try {
|
|
153
|
+
return JSON.parse(text);
|
|
154
|
+
} catch {
|
|
155
|
+
const match = text.match(/```(?:json)?\s*([\s\S]*?)```/);
|
|
156
|
+
if (match) {
|
|
157
|
+
return JSON.parse(match[1]);
|
|
158
|
+
}
|
|
159
|
+
throw new LLMError(`LLM response is not valid JSON: ${text.slice(0, 200)}`);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
};
|
|
163
|
+
}
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
// ai/providers/anthropic.ts
|
|
167
|
+
var anthropic_exports = {};
|
|
168
|
+
__export(anthropic_exports, {
|
|
169
|
+
AnthropicProvider: () => AnthropicProvider
|
|
170
|
+
});
|
|
171
|
+
var AnthropicProvider;
|
|
172
|
+
var init_anthropic = __esm({
|
|
173
|
+
"ai/providers/anthropic.ts"() {
|
|
174
|
+
"use strict";
|
|
175
|
+
init_errors();
|
|
176
|
+
AnthropicProvider = class {
|
|
177
|
+
url;
|
|
178
|
+
apiKey;
|
|
179
|
+
model;
|
|
180
|
+
maxTokens;
|
|
181
|
+
temperature;
|
|
182
|
+
constructor(config) {
|
|
183
|
+
this.url = config.baseUrl ?? "https://api.anthropic.com/v1/messages";
|
|
184
|
+
this.apiKey = config.apiKey;
|
|
185
|
+
this.model = config.model;
|
|
186
|
+
this.maxTokens = config.maxTokens ?? 4096;
|
|
187
|
+
this.temperature = config.temperature ?? 0.1;
|
|
188
|
+
}
|
|
189
|
+
async chat(messages, _options) {
|
|
190
|
+
const systemMsg = messages.find((m) => m.role === "system");
|
|
191
|
+
const userMessages = messages.filter((m) => m.role !== "system");
|
|
192
|
+
const anthropicMessages = userMessages.map((m) => {
|
|
193
|
+
if (typeof m.content === "string") {
|
|
194
|
+
return { role: m.role, content: m.content };
|
|
195
|
+
}
|
|
196
|
+
const parts = m.content.map((part) => {
|
|
197
|
+
if (part.type === "text") {
|
|
198
|
+
return { type: "text", text: part.text };
|
|
199
|
+
}
|
|
200
|
+
const dataUrl = part.image_url.url;
|
|
201
|
+
const match = dataUrl.match(/^data:(image\/\w+);base64,(.+)$/);
|
|
202
|
+
if (match) {
|
|
203
|
+
return {
|
|
204
|
+
type: "image",
|
|
205
|
+
source: {
|
|
206
|
+
type: "base64",
|
|
207
|
+
media_type: match[1],
|
|
208
|
+
data: match[2]
|
|
209
|
+
}
|
|
210
|
+
};
|
|
211
|
+
}
|
|
212
|
+
return {
|
|
213
|
+
type: "image",
|
|
214
|
+
source: { type: "url", url: dataUrl }
|
|
215
|
+
};
|
|
216
|
+
});
|
|
217
|
+
return { role: m.role, content: parts };
|
|
218
|
+
});
|
|
219
|
+
const body = {
|
|
220
|
+
model: this.model,
|
|
221
|
+
max_tokens: this.maxTokens,
|
|
222
|
+
temperature: this.temperature,
|
|
223
|
+
messages: anthropicMessages
|
|
224
|
+
};
|
|
225
|
+
if (systemMsg) {
|
|
226
|
+
body.system = typeof systemMsg.content === "string" ? systemMsg.content : systemMsg.content.map((p) => p.text).join("\n");
|
|
227
|
+
}
|
|
228
|
+
const resp = await fetch(this.url, {
|
|
229
|
+
method: "POST",
|
|
230
|
+
headers: {
|
|
231
|
+
"Content-Type": "application/json",
|
|
232
|
+
"x-api-key": this.apiKey,
|
|
233
|
+
"anthropic-version": "2023-06-01"
|
|
234
|
+
},
|
|
235
|
+
body: JSON.stringify(body)
|
|
236
|
+
});
|
|
237
|
+
if (!resp.ok) {
|
|
238
|
+
const text = await resp.text().catch(() => "");
|
|
239
|
+
throw new LLMError(`Anthropic API error ${resp.status}: ${text}`);
|
|
240
|
+
}
|
|
241
|
+
const json = await resp.json();
|
|
242
|
+
const content = json?.content?.[0]?.text;
|
|
243
|
+
if (typeof content !== "string") {
|
|
244
|
+
throw new LLMError("Anthropic response missing content[0].text");
|
|
245
|
+
}
|
|
246
|
+
return content;
|
|
247
|
+
}
|
|
248
|
+
async chatJSON(messages) {
|
|
249
|
+
const text = await this.chat(messages);
|
|
250
|
+
try {
|
|
251
|
+
return JSON.parse(text);
|
|
252
|
+
} catch {
|
|
253
|
+
const match = text.match(/```(?:json)?\s*([\s\S]*?)```/);
|
|
254
|
+
if (match) {
|
|
255
|
+
return JSON.parse(match[1]);
|
|
256
|
+
}
|
|
257
|
+
throw new LLMError(`LLM response is not valid JSON: ${text.slice(0, 200)}`);
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
};
|
|
261
|
+
}
|
|
262
|
+
});
|
|
263
|
+
|
|
264
|
+
// protocol/transport.ts
|
|
265
|
+
init_errors();
|
|
266
|
+
import WebSocket from "ws";
|
|
267
|
+
|
|
268
|
+
// utils/logger.ts
|
|
269
|
+
var LEVELS = {
|
|
270
|
+
debug: 0,
|
|
271
|
+
info: 1,
|
|
272
|
+
warn: 2,
|
|
273
|
+
error: 3,
|
|
274
|
+
silent: 4
|
|
275
|
+
};
|
|
276
|
+
var Logger = class {
|
|
277
|
+
level;
|
|
278
|
+
constructor(level = "info") {
|
|
279
|
+
this.level = LEVELS[level];
|
|
280
|
+
}
|
|
281
|
+
setLevel(level) {
|
|
282
|
+
this.level = LEVELS[level];
|
|
283
|
+
}
|
|
284
|
+
debug(msg, data) {
|
|
285
|
+
if (this.level <= LEVELS.debug) this.log("DEBUG", msg, data);
|
|
286
|
+
}
|
|
287
|
+
info(msg, data) {
|
|
288
|
+
if (this.level <= LEVELS.info) this.log("INFO", msg, data);
|
|
289
|
+
}
|
|
290
|
+
warn(msg, data) {
|
|
291
|
+
if (this.level <= LEVELS.warn) this.log("WARN", msg, data);
|
|
292
|
+
}
|
|
293
|
+
error(msg, data) {
|
|
294
|
+
if (this.level <= LEVELS.error) this.log("ERROR", msg, data);
|
|
295
|
+
}
|
|
296
|
+
log(level, msg, data) {
|
|
297
|
+
const ts = (/* @__PURE__ */ new Date()).toISOString();
|
|
298
|
+
const extra = data ? " " + JSON.stringify(data) : "";
|
|
299
|
+
const line = `[${ts}] ${level} spider-browser: ${msg}${extra}`;
|
|
300
|
+
if (level === "ERROR") {
|
|
301
|
+
console.error(line);
|
|
302
|
+
} else if (level === "WARN") {
|
|
303
|
+
console.warn(line);
|
|
304
|
+
} else {
|
|
305
|
+
console.log(line);
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
};
|
|
309
|
+
var logger = new Logger("info");
|
|
310
|
+
|
|
311
|
+
// protocol/transport.ts
|
|
312
|
+
var Transport = class {
|
|
313
|
+
ws = null;
|
|
314
|
+
messageHandler = null;
|
|
315
|
+
currentBrowser;
|
|
316
|
+
opts;
|
|
317
|
+
emitter;
|
|
318
|
+
_stealthLevel;
|
|
319
|
+
/** Generation counter — incremented on each connect. Prevents stale WS messages from being processed. */
|
|
320
|
+
generation = 0;
|
|
321
|
+
/** Credits remaining from last upgrade response (x-sc header). */
|
|
322
|
+
_upgradeCredits;
|
|
323
|
+
/** Stealth tier from last upgrade response (x-sr header). */
|
|
324
|
+
_upgradeStealthTier;
|
|
325
|
+
/** Credits consumed during this session (from Spider.metering event). */
|
|
326
|
+
_sessionCreditsUsed;
|
|
327
|
+
constructor(opts, emitter) {
|
|
328
|
+
this.opts = opts;
|
|
329
|
+
this.currentBrowser = opts.browser === "auto" ? "chrome" : opts.browser;
|
|
330
|
+
this.emitter = emitter;
|
|
331
|
+
this._stealthLevel = opts.stealthLevel ?? 0;
|
|
332
|
+
}
|
|
333
|
+
get browser() {
|
|
334
|
+
return this.currentBrowser;
|
|
335
|
+
}
|
|
336
|
+
get connected() {
|
|
337
|
+
return this.ws?.readyState === WebSocket.OPEN;
|
|
338
|
+
}
|
|
339
|
+
get stealthLevel() {
|
|
340
|
+
return this._stealthLevel;
|
|
341
|
+
}
|
|
342
|
+
set stealthLevel(level) {
|
|
343
|
+
this._stealthLevel = Math.max(0, Math.min(3, level));
|
|
344
|
+
}
|
|
345
|
+
/** Credits remaining from the WebSocket upgrade response. */
|
|
346
|
+
get upgradeCredits() {
|
|
347
|
+
return this._upgradeCredits;
|
|
348
|
+
}
|
|
349
|
+
/** Active stealth tier from the WebSocket upgrade response. */
|
|
350
|
+
get upgradeStealthTier() {
|
|
351
|
+
return this._upgradeStealthTier;
|
|
352
|
+
}
|
|
353
|
+
/** Credits consumed during this session (from server Spider.metering event). */
|
|
354
|
+
get sessionCreditsUsed() {
|
|
355
|
+
return this._sessionCreditsUsed;
|
|
356
|
+
}
|
|
357
|
+
/**
|
|
358
|
+
* Request the current session cost from the server via Spider.getMetering.
|
|
359
|
+
* This is a synchronous CDP-style request/response — no event-loop timing issues.
|
|
360
|
+
* Returns the credits used so far, or the last known value if the request fails.
|
|
361
|
+
*/
|
|
362
|
+
async requestMetering(timeoutMs = 3e3) {
|
|
363
|
+
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
|
|
364
|
+
return this._sessionCreditsUsed ?? 0;
|
|
365
|
+
}
|
|
366
|
+
const meteringId = 2147483640;
|
|
367
|
+
return new Promise((resolve) => {
|
|
368
|
+
const timer = setTimeout(() => {
|
|
369
|
+
resolve(this._sessionCreditsUsed ?? 0);
|
|
370
|
+
}, timeoutMs);
|
|
371
|
+
const origHandler = this.messageHandler;
|
|
372
|
+
this.messageHandler = (data) => {
|
|
373
|
+
if (data.includes(`"id":${meteringId}`)) {
|
|
374
|
+
try {
|
|
375
|
+
const msg = JSON.parse(data);
|
|
376
|
+
if (msg.id === meteringId && msg.result?.credits_used !== void 0) {
|
|
377
|
+
clearTimeout(timer);
|
|
378
|
+
this._sessionCreditsUsed = msg.result.credits_used;
|
|
379
|
+
this.messageHandler = origHandler;
|
|
380
|
+
resolve(msg.result.credits_used);
|
|
381
|
+
return;
|
|
382
|
+
}
|
|
383
|
+
} catch {
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
if (origHandler) origHandler(data);
|
|
387
|
+
};
|
|
388
|
+
try {
|
|
389
|
+
this.send(JSON.stringify({ id: meteringId, method: "Spider.getMetering" }));
|
|
390
|
+
} catch {
|
|
391
|
+
clearTimeout(timer);
|
|
392
|
+
this.messageHandler = origHandler;
|
|
393
|
+
resolve(this._sessionCreditsUsed ?? 0);
|
|
394
|
+
}
|
|
395
|
+
});
|
|
396
|
+
}
|
|
397
|
+
/** Set the handler that receives raw JSON messages from the WebSocket. */
|
|
398
|
+
onMessage(handler) {
|
|
399
|
+
this.messageHandler = handler;
|
|
400
|
+
}
|
|
401
|
+
/** Connect to the browser_server WebSocket with retry. */
|
|
402
|
+
async connect(maxAttempts = 3) {
|
|
403
|
+
if (this.ws && this.ws.readyState === WebSocket.OPEN) {
|
|
404
|
+
return;
|
|
405
|
+
}
|
|
406
|
+
let lastError;
|
|
407
|
+
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
|
408
|
+
try {
|
|
409
|
+
return await this.connectInternal();
|
|
410
|
+
} catch (err) {
|
|
411
|
+
lastError = err instanceof Error ? err : new Error(String(err));
|
|
412
|
+
if (lastError instanceof AuthError) throw lastError;
|
|
413
|
+
if (attempt < maxAttempts) {
|
|
414
|
+
const backoff = 500 * attempt;
|
|
415
|
+
logger.warn(`connect attempt ${attempt}/${maxAttempts} failed, retrying in ${backoff}ms`, {
|
|
416
|
+
error: lastError.message
|
|
417
|
+
});
|
|
418
|
+
await new Promise((r) => setTimeout(r, backoff));
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
throw lastError;
|
|
423
|
+
}
|
|
424
|
+
/** Reconnect with a different browser type (used by retry engine). */
|
|
425
|
+
async reconnect(browser) {
|
|
426
|
+
const prev = this.currentBrowser;
|
|
427
|
+
this.currentBrowser = browser;
|
|
428
|
+
this.close();
|
|
429
|
+
logger.info(`switching browser: ${prev} -> ${browser}`);
|
|
430
|
+
return this.connectInternal();
|
|
431
|
+
}
|
|
432
|
+
/** Send a raw JSON string through the WebSocket. */
|
|
433
|
+
send(data) {
|
|
434
|
+
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
|
|
435
|
+
throw new ConnectionError("WebSocket is not connected");
|
|
436
|
+
}
|
|
437
|
+
this.ws.send(data);
|
|
438
|
+
}
|
|
439
|
+
/** Close the WebSocket connection, removing event listeners to prevent data mixing. */
|
|
440
|
+
close() {
|
|
441
|
+
if (this.ws) {
|
|
442
|
+
const ws = this.ws;
|
|
443
|
+
this.ws = null;
|
|
444
|
+
ws.removeAllListeners("message");
|
|
445
|
+
ws.removeAllListeners("open");
|
|
446
|
+
ws.removeAllListeners("close");
|
|
447
|
+
ws.removeAllListeners("upgrade");
|
|
448
|
+
ws.removeAllListeners("error");
|
|
449
|
+
ws.on("error", () => {
|
|
450
|
+
});
|
|
451
|
+
try {
|
|
452
|
+
ws.terminate();
|
|
453
|
+
} catch {
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
buildUrl() {
|
|
458
|
+
const base = this.opts.serverUrl.replace(/\/$/, "");
|
|
459
|
+
const params = new URLSearchParams();
|
|
460
|
+
params.set("token", this.opts.apiKey);
|
|
461
|
+
if (this.currentBrowser !== "auto") {
|
|
462
|
+
params.set("browser", this.currentBrowser);
|
|
463
|
+
}
|
|
464
|
+
if (this.opts.url) {
|
|
465
|
+
params.set("url", this.opts.url);
|
|
466
|
+
}
|
|
467
|
+
if (this.opts.captcha && this.opts.captcha !== "off") {
|
|
468
|
+
params.set("ai_captcha", this.opts.captcha);
|
|
469
|
+
}
|
|
470
|
+
if (this._stealthLevel > 0) {
|
|
471
|
+
params.set("s", String(this._stealthLevel));
|
|
472
|
+
}
|
|
473
|
+
return `${base}/v1/browser?${params.toString()}`;
|
|
474
|
+
}
|
|
475
|
+
connectInternal() {
|
|
476
|
+
const gen = ++this.generation;
|
|
477
|
+
return new Promise((resolve, reject) => {
|
|
478
|
+
const url = this.buildUrl();
|
|
479
|
+
logger.debug(`connecting to ${url.replace(/token=[^&]+/, "token=***")}`);
|
|
480
|
+
const ws = new WebSocket(url);
|
|
481
|
+
let resolved = false;
|
|
482
|
+
const connectMs = this.opts.connectTimeoutMs ?? 3e4;
|
|
483
|
+
const timeout = setTimeout(() => {
|
|
484
|
+
if (!resolved) {
|
|
485
|
+
resolved = true;
|
|
486
|
+
ws.removeAllListeners();
|
|
487
|
+
ws.on("error", () => {
|
|
488
|
+
});
|
|
489
|
+
ws.terminate();
|
|
490
|
+
reject(new TimeoutError(`WebSocket connection timeout (${connectMs}ms)`));
|
|
491
|
+
}
|
|
492
|
+
}, connectMs);
|
|
493
|
+
ws.on("upgrade", (response) => {
|
|
494
|
+
const sc = response.headers["x-sc"];
|
|
495
|
+
const sr = response.headers["x-sr"];
|
|
496
|
+
if (sc) {
|
|
497
|
+
this._upgradeCredits = parseFloat(String(sc));
|
|
498
|
+
}
|
|
499
|
+
if (sr) {
|
|
500
|
+
this._upgradeStealthTier = parseInt(String(sr), 10);
|
|
501
|
+
}
|
|
502
|
+
if (this._upgradeCredits !== void 0) {
|
|
503
|
+
this.emitter.emit("metering", {
|
|
504
|
+
credits: this._upgradeCredits,
|
|
505
|
+
rate: this._upgradeStealthTier ?? 0
|
|
506
|
+
});
|
|
507
|
+
}
|
|
508
|
+
});
|
|
509
|
+
ws.on("open", () => {
|
|
510
|
+
if (!resolved) {
|
|
511
|
+
resolved = true;
|
|
512
|
+
clearTimeout(timeout);
|
|
513
|
+
this.ws = ws;
|
|
514
|
+
this.emitter.emit("ws.open", {});
|
|
515
|
+
logger.info(`connected (browser=${this.currentBrowser}, stealth=${this._stealthLevel})`);
|
|
516
|
+
resolve();
|
|
517
|
+
}
|
|
518
|
+
});
|
|
519
|
+
ws.on("message", (raw) => {
|
|
520
|
+
if (gen !== this.generation) return;
|
|
521
|
+
const str = raw.toString();
|
|
522
|
+
if (str.includes('"Spider.metering"')) {
|
|
523
|
+
try {
|
|
524
|
+
const msg = JSON.parse(str);
|
|
525
|
+
if (msg.method === "Spider.metering" && msg.params?.credits_used !== void 0) {
|
|
526
|
+
this._sessionCreditsUsed = msg.params.credits_used;
|
|
527
|
+
this.emitter.emit("metering", {
|
|
528
|
+
credits: this._upgradeCredits ?? 0,
|
|
529
|
+
rate: this._upgradeStealthTier ?? 0,
|
|
530
|
+
session_credits_used: msg.params.credits_used
|
|
531
|
+
});
|
|
532
|
+
return;
|
|
533
|
+
}
|
|
534
|
+
} catch {
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
if (this.messageHandler) {
|
|
538
|
+
this.messageHandler(str);
|
|
539
|
+
}
|
|
540
|
+
});
|
|
541
|
+
ws.on("close", (code, reason) => {
|
|
542
|
+
const reasonStr = reason.toString();
|
|
543
|
+
if (gen === this.generation) {
|
|
544
|
+
this.emitter.emit("ws.close", { code, reason: reasonStr });
|
|
545
|
+
}
|
|
546
|
+
if (!resolved) {
|
|
547
|
+
resolved = true;
|
|
548
|
+
clearTimeout(timeout);
|
|
549
|
+
if (code === 4001 || code === 4002) {
|
|
550
|
+
reject(new AuthError(`Authentication failed (code ${code}): ${reasonStr}`));
|
|
551
|
+
} else if (code === 4003) {
|
|
552
|
+
reject(new BackendUnavailableError(`Backend unavailable: ${reasonStr}`));
|
|
553
|
+
} else {
|
|
554
|
+
reject(new ConnectionError(`WebSocket closed during connect: ${code} ${reasonStr}`, code));
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
});
|
|
558
|
+
ws.on("error", (err) => {
|
|
559
|
+
if (gen === this.generation) {
|
|
560
|
+
this.emitter.emit("ws.error", { error: err });
|
|
561
|
+
}
|
|
562
|
+
if (!resolved) {
|
|
563
|
+
resolved = true;
|
|
564
|
+
clearTimeout(timeout);
|
|
565
|
+
reject(new ConnectionError(`WebSocket error: ${err.message}`));
|
|
566
|
+
}
|
|
567
|
+
});
|
|
568
|
+
});
|
|
569
|
+
}
|
|
570
|
+
};
|
|
571
|
+
|
|
572
|
+
// protocol/cdp-session.ts
|
|
573
|
+
init_errors();
|
|
574
|
+
var CDPSession = class {
|
|
575
|
+
nextId = 1;
|
|
576
|
+
pending = /* @__PURE__ */ new Map();
|
|
577
|
+
eventHandlers = /* @__PURE__ */ new Map();
|
|
578
|
+
transport;
|
|
579
|
+
/** CDP session ID from Target.attachToTarget (required for page-level commands). */
|
|
580
|
+
targetSessionId;
|
|
581
|
+
commandTimeoutMs;
|
|
582
|
+
constructor(transport, opts) {
|
|
583
|
+
this.transport = transport;
|
|
584
|
+
this.commandTimeoutMs = opts?.commandTimeoutMs ?? 3e4;
|
|
585
|
+
}
|
|
586
|
+
/** Get the attached target session ID. */
|
|
587
|
+
get sessionId() {
|
|
588
|
+
return this.targetSessionId;
|
|
589
|
+
}
|
|
590
|
+
/** Process a raw message from the transport. Returns true if handled. */
|
|
591
|
+
handleMessage(data) {
|
|
592
|
+
let msg;
|
|
593
|
+
try {
|
|
594
|
+
msg = JSON.parse(data);
|
|
595
|
+
} catch {
|
|
596
|
+
return false;
|
|
597
|
+
}
|
|
598
|
+
if (typeof msg.id === "number") {
|
|
599
|
+
const pending = this.pending.get(msg.id);
|
|
600
|
+
if (pending) {
|
|
601
|
+
this.pending.delete(msg.id);
|
|
602
|
+
clearTimeout(pending.timer);
|
|
603
|
+
pending.resolve(msg);
|
|
604
|
+
return true;
|
|
605
|
+
}
|
|
606
|
+
return false;
|
|
607
|
+
}
|
|
608
|
+
if (typeof msg.method === "string") {
|
|
609
|
+
const handlers = this.eventHandlers.get(msg.method);
|
|
610
|
+
if (handlers) {
|
|
611
|
+
const params = msg.params ?? {};
|
|
612
|
+
for (const h of handlers) {
|
|
613
|
+
try {
|
|
614
|
+
h(params);
|
|
615
|
+
} catch {
|
|
616
|
+
}
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
const wildcardHandlers = this.eventHandlers.get("*");
|
|
620
|
+
if (wildcardHandlers) {
|
|
621
|
+
for (const h of wildcardHandlers) {
|
|
622
|
+
try {
|
|
623
|
+
h({ method: msg.method, ...msg.params ?? {} });
|
|
624
|
+
} catch {
|
|
625
|
+
}
|
|
626
|
+
}
|
|
627
|
+
}
|
|
628
|
+
return true;
|
|
629
|
+
}
|
|
630
|
+
return false;
|
|
631
|
+
}
|
|
632
|
+
/** Send a CDP command and wait for the response. */
|
|
633
|
+
async send(method, params) {
|
|
634
|
+
const id = this.nextId++;
|
|
635
|
+
const cmd = { id, method, params: params ?? {} };
|
|
636
|
+
return new Promise((resolve, reject) => {
|
|
637
|
+
const timer = setTimeout(() => {
|
|
638
|
+
this.pending.delete(id);
|
|
639
|
+
reject(new TimeoutError(`CDP command timeout: ${method} (${this.commandTimeoutMs}ms)`));
|
|
640
|
+
}, this.commandTimeoutMs);
|
|
641
|
+
this.pending.set(id, { resolve, reject, timer });
|
|
642
|
+
try {
|
|
643
|
+
this.transport.send(JSON.stringify(cmd));
|
|
644
|
+
} catch (err) {
|
|
645
|
+
this.pending.delete(id);
|
|
646
|
+
clearTimeout(timer);
|
|
647
|
+
reject(err);
|
|
648
|
+
}
|
|
649
|
+
});
|
|
650
|
+
}
|
|
651
|
+
/**
|
|
652
|
+
* Send a CDP command scoped to the attached page session.
|
|
653
|
+
* This is what you use for Page.*, Runtime.*, Input.* commands.
|
|
654
|
+
*/
|
|
655
|
+
async sendToTarget(method, params) {
|
|
656
|
+
if (!this.targetSessionId) {
|
|
657
|
+
throw new ProtocolError("No target session \u2014 call attachToPage() first");
|
|
658
|
+
}
|
|
659
|
+
const id = this.nextId++;
|
|
660
|
+
const cmd = {
|
|
661
|
+
id,
|
|
662
|
+
method,
|
|
663
|
+
params: params ?? {},
|
|
664
|
+
sessionId: this.targetSessionId
|
|
665
|
+
};
|
|
666
|
+
return new Promise((resolve, reject) => {
|
|
667
|
+
const timer = setTimeout(() => {
|
|
668
|
+
this.pending.delete(id);
|
|
669
|
+
reject(new TimeoutError(`CDP command timeout: ${method} (${this.commandTimeoutMs}ms)`));
|
|
670
|
+
}, this.commandTimeoutMs);
|
|
671
|
+
this.pending.set(id, { resolve, reject, timer });
|
|
672
|
+
try {
|
|
673
|
+
this.transport.send(JSON.stringify(cmd));
|
|
674
|
+
} catch (err) {
|
|
675
|
+
this.pending.delete(id);
|
|
676
|
+
clearTimeout(timer);
|
|
677
|
+
reject(err);
|
|
678
|
+
}
|
|
679
|
+
});
|
|
680
|
+
}
|
|
681
|
+
/** Subscribe to a CDP event. */
|
|
682
|
+
on(method, handler) {
|
|
683
|
+
let set = this.eventHandlers.get(method);
|
|
684
|
+
if (!set) {
|
|
685
|
+
set = /* @__PURE__ */ new Set();
|
|
686
|
+
this.eventHandlers.set(method, set);
|
|
687
|
+
}
|
|
688
|
+
set.add(handler);
|
|
689
|
+
}
|
|
690
|
+
/** Unsubscribe from a CDP event. */
|
|
691
|
+
off(method, handler) {
|
|
692
|
+
this.eventHandlers.get(method)?.delete(handler);
|
|
693
|
+
}
|
|
694
|
+
// -------------------------------------------------------------------
|
|
695
|
+
// Browser-level target management
|
|
696
|
+
// -------------------------------------------------------------------
|
|
697
|
+
/**
|
|
698
|
+
* Discover page targets, find or create one, attach to it, and enable
|
|
699
|
+
* the required CDP domains (Page, Runtime).
|
|
700
|
+
*
|
|
701
|
+
* This is the key initialization step — without this, Page.navigate and
|
|
702
|
+
* Runtime.evaluate won't work because we're connected at browser level.
|
|
703
|
+
*/
|
|
704
|
+
async attachToPage() {
|
|
705
|
+
await this.send("Target.setDiscoverTargets", { discover: true });
|
|
706
|
+
let pageTargetId;
|
|
707
|
+
logger.debug("creating fresh page target for session isolation");
|
|
708
|
+
const createResp = await this.send("Target.createTarget", { url: "about:blank" });
|
|
709
|
+
pageTargetId = createResp.result?.targetId;
|
|
710
|
+
if (!pageTargetId) {
|
|
711
|
+
throw new ProtocolError("Failed to create page target");
|
|
712
|
+
}
|
|
713
|
+
logger.debug(`created page target: ${pageTargetId}`);
|
|
714
|
+
const attachResp = await this.send("Target.attachToTarget", {
|
|
715
|
+
targetId: pageTargetId,
|
|
716
|
+
flatten: true
|
|
717
|
+
});
|
|
718
|
+
let sessionId = attachResp.result?.sessionId;
|
|
719
|
+
if (!sessionId) {
|
|
720
|
+
sessionId = await new Promise((resolve, reject) => {
|
|
721
|
+
const timeout = setTimeout(() => {
|
|
722
|
+
this.off("Target.attachedToTarget", handler);
|
|
723
|
+
reject(new TimeoutError("Timeout waiting for Target.attachedToTarget event"));
|
|
724
|
+
}, 5e3);
|
|
725
|
+
const handler = (params) => {
|
|
726
|
+
const sid = params.sessionId;
|
|
727
|
+
if (sid) {
|
|
728
|
+
clearTimeout(timeout);
|
|
729
|
+
this.off("Target.attachedToTarget", handler);
|
|
730
|
+
resolve(sid);
|
|
731
|
+
}
|
|
732
|
+
};
|
|
733
|
+
this.on("Target.attachedToTarget", handler);
|
|
734
|
+
});
|
|
735
|
+
}
|
|
736
|
+
this.targetSessionId = sessionId;
|
|
737
|
+
logger.info(`attached to page target`, { targetId: pageTargetId, sessionId });
|
|
738
|
+
await this.sendToTarget("Page.enable");
|
|
739
|
+
await this.sendToTarget("Runtime.enable");
|
|
740
|
+
return sessionId;
|
|
741
|
+
}
|
|
742
|
+
// -------------------------------------------------------------------
|
|
743
|
+
// High-level CDP commands (all use sendToTarget for page-scoped ops)
|
|
744
|
+
// -------------------------------------------------------------------
|
|
745
|
+
/** Capture a screenshot as base64 PNG (10s timeout to avoid blocking on heavy pages). */
|
|
746
|
+
async captureScreenshot() {
|
|
747
|
+
const screenshotTimeout = Math.min(this.commandTimeoutMs, 1e4);
|
|
748
|
+
const id = this.nextId++;
|
|
749
|
+
const cmd = {
|
|
750
|
+
id,
|
|
751
|
+
method: "Page.captureScreenshot",
|
|
752
|
+
params: { format: "png" },
|
|
753
|
+
sessionId: this.targetSessionId
|
|
754
|
+
};
|
|
755
|
+
const resp = await new Promise((resolve, reject) => {
|
|
756
|
+
const timer = setTimeout(() => {
|
|
757
|
+
this.pending.delete(id);
|
|
758
|
+
reject(new TimeoutError(`CDP command timeout: Page.captureScreenshot (${screenshotTimeout}ms)`));
|
|
759
|
+
}, screenshotTimeout);
|
|
760
|
+
this.pending.set(id, { resolve, reject, timer });
|
|
761
|
+
try {
|
|
762
|
+
this.transport.send(JSON.stringify(cmd));
|
|
763
|
+
} catch (err) {
|
|
764
|
+
this.pending.delete(id);
|
|
765
|
+
clearTimeout(timer);
|
|
766
|
+
reject(err);
|
|
767
|
+
}
|
|
768
|
+
});
|
|
769
|
+
const data = resp.result?.["data"];
|
|
770
|
+
if (typeof data !== "string") {
|
|
771
|
+
throw new ProtocolError("captureScreenshot: missing result.data");
|
|
772
|
+
}
|
|
773
|
+
return data;
|
|
774
|
+
}
|
|
775
|
+
/** Get full page HTML. */
|
|
776
|
+
async getHTML() {
|
|
777
|
+
const resp = await this.sendToTarget("Runtime.evaluate", {
|
|
778
|
+
expression: "document.documentElement.outerHTML",
|
|
779
|
+
returnByValue: true
|
|
780
|
+
});
|
|
781
|
+
return this.extractEvalValue(resp);
|
|
782
|
+
}
|
|
783
|
+
/** Evaluate a JavaScript expression and return the value. */
|
|
784
|
+
async evaluate(expression) {
|
|
785
|
+
const resp = await this.sendToTarget("Runtime.evaluate", {
|
|
786
|
+
expression,
|
|
787
|
+
returnByValue: true
|
|
788
|
+
});
|
|
789
|
+
return this.extractEvalValue(resp);
|
|
790
|
+
}
|
|
791
|
+
/** Wait for a CDP event to fire, with timeout. Returns true if event fired, false on timeout. */
|
|
792
|
+
waitForEvent(method, timeoutMs) {
|
|
793
|
+
return new Promise((resolve) => {
|
|
794
|
+
const timer = setTimeout(() => {
|
|
795
|
+
this.off(method, handler);
|
|
796
|
+
resolve(false);
|
|
797
|
+
}, timeoutMs);
|
|
798
|
+
const handler = () => {
|
|
799
|
+
clearTimeout(timer);
|
|
800
|
+
this.off(method, handler);
|
|
801
|
+
resolve(true);
|
|
802
|
+
};
|
|
803
|
+
this.on(method, handler);
|
|
804
|
+
});
|
|
805
|
+
}
|
|
806
|
+
/** Navigate to a URL and wait for the page to load. */
|
|
807
|
+
async navigate(url) {
|
|
808
|
+
const loadPromise = this.waitForEvent("Page.loadEventFired", 3e4);
|
|
809
|
+
const stopPromise = this.waitForEvent("Page.frameStoppedLoading", 35e3);
|
|
810
|
+
const resp = await this.sendToTarget("Page.navigate", { url });
|
|
811
|
+
const errorText = resp.result?.errorText;
|
|
812
|
+
if (errorText) {
|
|
813
|
+
loadPromise.catch(() => {
|
|
814
|
+
});
|
|
815
|
+
stopPromise.catch(() => {
|
|
816
|
+
});
|
|
817
|
+
if (isRetryableNavError(errorText)) {
|
|
818
|
+
throw new NavigationError(`Navigation failed: ${errorText}`);
|
|
819
|
+
}
|
|
820
|
+
throw new ProtocolError(`Navigation failed: ${errorText}`);
|
|
821
|
+
}
|
|
822
|
+
const loaded = await loadPromise;
|
|
823
|
+
if (!loaded) {
|
|
824
|
+
await stopPromise;
|
|
825
|
+
}
|
|
826
|
+
}
|
|
827
|
+
/** Dispatch a mouse event. */
|
|
828
|
+
async dispatchMouseEvent(type, x, y, button = "none", clickCount = 0) {
|
|
829
|
+
await this.sendToTarget("Input.dispatchMouseEvent", { type, x, y, button, clickCount });
|
|
830
|
+
}
|
|
831
|
+
/** Click at coordinates (mouseMoved -> mousePressed -> mouseReleased). */
|
|
832
|
+
async clickPoint(x, y) {
|
|
833
|
+
await this.dispatchMouseEvent("mouseMoved", x, y, "none", 0);
|
|
834
|
+
await this.dispatchMouseEvent("mousePressed", x, y, "left", 1);
|
|
835
|
+
await this.dispatchMouseEvent("mouseReleased", x, y, "left", 1);
|
|
836
|
+
}
|
|
837
|
+
/** Right-click at coordinates. */
|
|
838
|
+
async rightClickPoint(x, y) {
|
|
839
|
+
await this.dispatchMouseEvent("mouseMoved", x, y, "none", 0);
|
|
840
|
+
await this.dispatchMouseEvent("mousePressed", x, y, "right", 1);
|
|
841
|
+
await this.dispatchMouseEvent("mouseReleased", x, y, "right", 1);
|
|
842
|
+
}
|
|
843
|
+
/** Double-click at coordinates. */
|
|
844
|
+
async doubleClickPoint(x, y) {
|
|
845
|
+
await this.dispatchMouseEvent("mouseMoved", x, y, "none", 0);
|
|
846
|
+
await this.dispatchMouseEvent("mousePressed", x, y, "left", 1);
|
|
847
|
+
await this.dispatchMouseEvent("mouseReleased", x, y, "left", 1);
|
|
848
|
+
await this.dispatchMouseEvent("mousePressed", x, y, "left", 2);
|
|
849
|
+
await this.dispatchMouseEvent("mouseReleased", x, y, "left", 2);
|
|
850
|
+
}
|
|
851
|
+
/** Click and hold at coordinates for a duration. */
|
|
852
|
+
async clickHoldPoint(x, y, holdMs) {
|
|
853
|
+
await this.dispatchMouseEvent("mouseMoved", x, y, "none", 0);
|
|
854
|
+
await this.dispatchMouseEvent("mousePressed", x, y, "left", 1);
|
|
855
|
+
await sleep(holdMs);
|
|
856
|
+
await this.dispatchMouseEvent("mouseReleased", x, y, "left", 1);
|
|
857
|
+
}
|
|
858
|
+
/** Hover (mouseMoved only). */
|
|
859
|
+
async hoverPoint(x, y) {
|
|
860
|
+
await this.dispatchMouseEvent("mouseMoved", x, y, "none", 0);
|
|
861
|
+
}
|
|
862
|
+
/** Smooth drag from point to point (10-step interpolation). */
|
|
863
|
+
async dragPoint(fromX, fromY, toX, toY) {
|
|
864
|
+
const steps = 10;
|
|
865
|
+
await this.dispatchMouseEvent("mouseMoved", fromX, fromY, "none", 0);
|
|
866
|
+
await this.dispatchMouseEvent("mousePressed", fromX, fromY, "left", 1);
|
|
867
|
+
for (let i = 1; i <= steps; i++) {
|
|
868
|
+
const t = i / steps;
|
|
869
|
+
const x = fromX + (toX - fromX) * t;
|
|
870
|
+
const y = fromY + (toY - fromY) * t;
|
|
871
|
+
await this.dispatchMouseEvent("mouseMoved", x, y, "left", 0);
|
|
872
|
+
await sleep(16);
|
|
873
|
+
}
|
|
874
|
+
await this.dispatchMouseEvent("mouseReleased", toX, toY, "left", 1);
|
|
875
|
+
}
|
|
876
|
+
/** Insert text via Input.insertText. */
|
|
877
|
+
async insertText(text) {
|
|
878
|
+
await this.sendToTarget("Input.insertText", { text });
|
|
879
|
+
}
|
|
880
|
+
/** Press a key (keyDown + keyUp). */
|
|
881
|
+
async pressKey(key, code, keyCode) {
|
|
882
|
+
await this.sendToTarget("Input.dispatchKeyEvent", {
|
|
883
|
+
type: "keyDown",
|
|
884
|
+
key,
|
|
885
|
+
code,
|
|
886
|
+
windowsVirtualKeyCode: keyCode,
|
|
887
|
+
text: key
|
|
888
|
+
});
|
|
889
|
+
await this.sendToTarget("Input.dispatchKeyEvent", {
|
|
890
|
+
type: "keyUp",
|
|
891
|
+
key,
|
|
892
|
+
code,
|
|
893
|
+
windowsVirtualKeyCode: keyCode
|
|
894
|
+
});
|
|
895
|
+
}
|
|
896
|
+
/** Send keyDown event. */
|
|
897
|
+
async keyDown(key, code, keyCode) {
|
|
898
|
+
await this.sendToTarget("Input.dispatchKeyEvent", {
|
|
899
|
+
type: "keyDown",
|
|
900
|
+
key,
|
|
901
|
+
code,
|
|
902
|
+
windowsVirtualKeyCode: keyCode,
|
|
903
|
+
text: key
|
|
904
|
+
});
|
|
905
|
+
}
|
|
906
|
+
/** Send keyUp event. */
|
|
907
|
+
async keyUp(key, code, keyCode) {
|
|
908
|
+
await this.sendToTarget("Input.dispatchKeyEvent", {
|
|
909
|
+
type: "keyUp",
|
|
910
|
+
key,
|
|
911
|
+
code,
|
|
912
|
+
windowsVirtualKeyCode: keyCode
|
|
913
|
+
});
|
|
914
|
+
}
|
|
915
|
+
/** Set viewport via Emulation.setDeviceMetricsOverride. */
|
|
916
|
+
async setViewport(width, height, deviceScaleFactor = 2, mobile = false) {
|
|
917
|
+
await this.sendToTarget("Emulation.setDeviceMetricsOverride", {
|
|
918
|
+
width,
|
|
919
|
+
height,
|
|
920
|
+
deviceScaleFactor,
|
|
921
|
+
mobile
|
|
922
|
+
});
|
|
923
|
+
}
|
|
924
|
+
/** Clean up all pending commands and event handlers. Rejects in-flight commands. */
|
|
925
|
+
destroy() {
|
|
926
|
+
const pendingSnapshot = [...this.pending.values()];
|
|
927
|
+
this.pending.clear();
|
|
928
|
+
this.eventHandlers.clear();
|
|
929
|
+
this.targetSessionId = void 0;
|
|
930
|
+
for (const pending of pendingSnapshot) {
|
|
931
|
+
clearTimeout(pending.timer);
|
|
932
|
+
pending.reject(new Error("session destroyed"));
|
|
933
|
+
}
|
|
934
|
+
}
|
|
935
|
+
extractEvalValue(resp) {
|
|
936
|
+
if (resp.error) {
|
|
937
|
+
throw new ProtocolError(`CDP error: ${resp.error.message}`);
|
|
938
|
+
}
|
|
939
|
+
const result = resp.result;
|
|
940
|
+
return result?.result?.value;
|
|
941
|
+
}
|
|
942
|
+
};
|
|
943
|
+
var RETRYABLE_NAV_ERRORS = [
|
|
944
|
+
"net::ERR_ABORTED",
|
|
945
|
+
"net::ERR_CONNECTION_RESET",
|
|
946
|
+
"net::ERR_CONNECTION_CLOSED",
|
|
947
|
+
"net::ERR_CONNECTION_REFUSED",
|
|
948
|
+
"net::ERR_CONNECTION_TIMED_OUT",
|
|
949
|
+
"net::ERR_TIMED_OUT",
|
|
950
|
+
"net::ERR_EMPTY_RESPONSE",
|
|
951
|
+
"net::ERR_SOCKET_NOT_CONNECTED",
|
|
952
|
+
"net::ERR_NETWORK_CHANGED"
|
|
953
|
+
];
|
|
954
|
+
function isRetryableNavError(errorText) {
|
|
955
|
+
return RETRYABLE_NAV_ERRORS.some((e) => errorText.includes(e));
|
|
956
|
+
}
|
|
957
|
+
function sleep(ms) {
|
|
958
|
+
return new Promise((r) => setTimeout(r, ms));
|
|
959
|
+
}
|
|
960
|
+
|
|
961
|
+
// protocol/bidi-session.ts
|
|
962
|
+
init_errors();
|
|
963
|
+
var BiDiSession = class {
|
|
964
|
+
nextId = 1;
|
|
965
|
+
pending = /* @__PURE__ */ new Map();
|
|
966
|
+
eventHandlers = /* @__PURE__ */ new Map();
|
|
967
|
+
transport;
|
|
968
|
+
browsingContext;
|
|
969
|
+
commandTimeoutMs;
|
|
970
|
+
constructor(transport, opts) {
|
|
971
|
+
this.transport = transport;
|
|
972
|
+
this.commandTimeoutMs = opts?.commandTimeoutMs ?? 3e4;
|
|
973
|
+
}
|
|
974
|
+
get context() {
|
|
975
|
+
return this.browsingContext;
|
|
976
|
+
}
|
|
977
|
+
/** Process a raw message from the transport. Returns true if handled. */
|
|
978
|
+
handleMessage(data) {
|
|
979
|
+
let msg;
|
|
980
|
+
try {
|
|
981
|
+
msg = JSON.parse(data);
|
|
982
|
+
} catch {
|
|
983
|
+
return false;
|
|
984
|
+
}
|
|
985
|
+
if (typeof msg.id === "number" && typeof msg.type === "string") {
|
|
986
|
+
const pending = this.pending.get(msg.id);
|
|
987
|
+
if (pending) {
|
|
988
|
+
this.pending.delete(msg.id);
|
|
989
|
+
clearTimeout(pending.timer);
|
|
990
|
+
pending.resolve(msg);
|
|
991
|
+
return true;
|
|
992
|
+
}
|
|
993
|
+
return false;
|
|
994
|
+
}
|
|
995
|
+
if (msg.type === "event" && typeof msg.method === "string") {
|
|
996
|
+
const handlers = this.eventHandlers.get(msg.method);
|
|
997
|
+
if (handlers) {
|
|
998
|
+
const params = msg.params ?? {};
|
|
999
|
+
for (const h of handlers) {
|
|
1000
|
+
try {
|
|
1001
|
+
h(params);
|
|
1002
|
+
} catch {
|
|
1003
|
+
}
|
|
1004
|
+
}
|
|
1005
|
+
}
|
|
1006
|
+
return true;
|
|
1007
|
+
}
|
|
1008
|
+
return false;
|
|
1009
|
+
}
|
|
1010
|
+
/** Send a BiDi command and wait for the response. */
|
|
1011
|
+
async send(method, params) {
|
|
1012
|
+
const id = this.nextId++;
|
|
1013
|
+
const cmd = { id, method, params };
|
|
1014
|
+
return new Promise((resolve, reject) => {
|
|
1015
|
+
const timer = setTimeout(() => {
|
|
1016
|
+
this.pending.delete(id);
|
|
1017
|
+
reject(new TimeoutError(`BiDi command timeout: ${method} (${this.commandTimeoutMs}ms)`));
|
|
1018
|
+
}, this.commandTimeoutMs);
|
|
1019
|
+
this.pending.set(id, { resolve, reject, timer });
|
|
1020
|
+
try {
|
|
1021
|
+
this.transport.send(JSON.stringify(cmd));
|
|
1022
|
+
} catch (err) {
|
|
1023
|
+
this.pending.delete(id);
|
|
1024
|
+
clearTimeout(timer);
|
|
1025
|
+
reject(err);
|
|
1026
|
+
}
|
|
1027
|
+
});
|
|
1028
|
+
}
|
|
1029
|
+
/** Subscribe to a BiDi event. */
|
|
1030
|
+
on(method, handler) {
|
|
1031
|
+
let set = this.eventHandlers.get(method);
|
|
1032
|
+
if (!set) {
|
|
1033
|
+
set = /* @__PURE__ */ new Set();
|
|
1034
|
+
this.eventHandlers.set(method, set);
|
|
1035
|
+
}
|
|
1036
|
+
set.add(handler);
|
|
1037
|
+
}
|
|
1038
|
+
/** Unsubscribe from a BiDi event. */
|
|
1039
|
+
off(method, handler) {
|
|
1040
|
+
this.eventHandlers.get(method)?.delete(handler);
|
|
1041
|
+
}
|
|
1042
|
+
// -------------------------------------------------------------------
|
|
1043
|
+
// High-level BiDi commands
|
|
1044
|
+
// -------------------------------------------------------------------
|
|
1045
|
+
/**
|
|
1046
|
+
* Get or create a browsing context.
|
|
1047
|
+
*
|
|
1048
|
+
* The browser_server's Firefox relay proxies to geckodriver which already has
|
|
1049
|
+
* a session with a browsing context. We try multiple strategies:
|
|
1050
|
+
* 1. browsingContext.getTree (standard BiDi)
|
|
1051
|
+
* 2. session.status (to get session info, then extract context)
|
|
1052
|
+
* 3. browsingContext.create as last resort
|
|
1053
|
+
*
|
|
1054
|
+
* If all discovery fails, we set a placeholder context that gets replaced
|
|
1055
|
+
* on the first successful navigate response.
|
|
1056
|
+
*/
|
|
1057
|
+
async getOrCreateContext() {
|
|
1058
|
+
if (this.browsingContext) return this.browsingContext;
|
|
1059
|
+
try {
|
|
1060
|
+
const savedTimeout = this.commandTimeoutMs;
|
|
1061
|
+
try {
|
|
1062
|
+
this.commandTimeoutMs = 5e3;
|
|
1063
|
+
const resp = await this.send("browsingContext.getTree", {});
|
|
1064
|
+
const contexts = resp.result?.contexts;
|
|
1065
|
+
if (Array.isArray(contexts) && contexts.length > 0) {
|
|
1066
|
+
this.browsingContext = contexts[0].context;
|
|
1067
|
+
return this.browsingContext;
|
|
1068
|
+
}
|
|
1069
|
+
} finally {
|
|
1070
|
+
this.commandTimeoutMs = savedTimeout;
|
|
1071
|
+
}
|
|
1072
|
+
} catch {
|
|
1073
|
+
}
|
|
1074
|
+
try {
|
|
1075
|
+
const savedTimeout = this.commandTimeoutMs;
|
|
1076
|
+
try {
|
|
1077
|
+
this.commandTimeoutMs = 5e3;
|
|
1078
|
+
const createResp = await this.send("browsingContext.create", { type: "tab" });
|
|
1079
|
+
const ctx = createResp.result?.context;
|
|
1080
|
+
if (ctx) {
|
|
1081
|
+
this.browsingContext = ctx;
|
|
1082
|
+
return this.browsingContext;
|
|
1083
|
+
}
|
|
1084
|
+
} finally {
|
|
1085
|
+
this.commandTimeoutMs = savedTimeout;
|
|
1086
|
+
}
|
|
1087
|
+
} catch {
|
|
1088
|
+
}
|
|
1089
|
+
this.browsingContext = "__default__";
|
|
1090
|
+
return this.browsingContext;
|
|
1091
|
+
}
|
|
1092
|
+
/** Set the browsing context (called when we discover it from a response). */
|
|
1093
|
+
setContext(contextId) {
|
|
1094
|
+
this.browsingContext = contextId;
|
|
1095
|
+
}
|
|
1096
|
+
/** Navigate to a URL. */
|
|
1097
|
+
async navigate(url) {
|
|
1098
|
+
const ctx = await this.getOrCreateContext();
|
|
1099
|
+
const resp = await this.send("browsingContext.navigate", {
|
|
1100
|
+
context: ctx,
|
|
1101
|
+
url,
|
|
1102
|
+
wait: "complete"
|
|
1103
|
+
});
|
|
1104
|
+
if (this.browsingContext === "__default__") {
|
|
1105
|
+
const realCtx = resp.result?.navigation ? resp?.params?.context : void 0;
|
|
1106
|
+
if (realCtx) {
|
|
1107
|
+
this.browsingContext = realCtx;
|
|
1108
|
+
}
|
|
1109
|
+
}
|
|
1110
|
+
}
|
|
1111
|
+
/** Capture a screenshot as base64 PNG. */
|
|
1112
|
+
async captureScreenshot() {
|
|
1113
|
+
const ctx = await this.getOrCreateContext();
|
|
1114
|
+
const resp = await this.send("browsingContext.captureScreenshot", { context: ctx });
|
|
1115
|
+
const data = resp.result?.data;
|
|
1116
|
+
if (typeof data !== "string") {
|
|
1117
|
+
throw new ProtocolError("captureScreenshot: missing result.data");
|
|
1118
|
+
}
|
|
1119
|
+
return data;
|
|
1120
|
+
}
|
|
1121
|
+
/**
|
|
1122
|
+
* Evaluate JavaScript and return the value.
|
|
1123
|
+
*
|
|
1124
|
+
* BiDi script.evaluate returns results in this format:
|
|
1125
|
+
* { result: { type: "string", value: "..." } }
|
|
1126
|
+
* { result: { type: "number", value: 42 } }
|
|
1127
|
+
* { result: { type: "boolean", value: true } }
|
|
1128
|
+
* { result: { type: "object", value: [...entries...] } }
|
|
1129
|
+
* { result: { type: "null" } }
|
|
1130
|
+
* { result: { type: "undefined" } }
|
|
1131
|
+
* { result: { type: "array", value: [...] } }
|
|
1132
|
+
*/
|
|
1133
|
+
async evaluate(expression) {
|
|
1134
|
+
const ctx = await this.getOrCreateContext();
|
|
1135
|
+
const resp = await this.send("script.evaluate", {
|
|
1136
|
+
expression,
|
|
1137
|
+
target: { context: ctx },
|
|
1138
|
+
awaitPromise: false,
|
|
1139
|
+
resultOwnership: "none"
|
|
1140
|
+
});
|
|
1141
|
+
if (resp.type === "error") {
|
|
1142
|
+
throw new ProtocolError(`BiDi script error: ${resp.message ?? resp.error}`);
|
|
1143
|
+
}
|
|
1144
|
+
const resultObj = resp.result?.result ?? resp.result;
|
|
1145
|
+
return this.extractBiDiValue(resultObj);
|
|
1146
|
+
}
|
|
1147
|
+
/** Extract a JS value from a BiDi remote value object. */
|
|
1148
|
+
extractBiDiValue(remoteValue) {
|
|
1149
|
+
if (!remoteValue) return void 0;
|
|
1150
|
+
const type = remoteValue.type;
|
|
1151
|
+
switch (type) {
|
|
1152
|
+
case "undefined":
|
|
1153
|
+
return void 0;
|
|
1154
|
+
case "null":
|
|
1155
|
+
return null;
|
|
1156
|
+
case "string":
|
|
1157
|
+
case "number":
|
|
1158
|
+
case "boolean":
|
|
1159
|
+
case "bigint":
|
|
1160
|
+
return remoteValue.value;
|
|
1161
|
+
case "array":
|
|
1162
|
+
if (Array.isArray(remoteValue.value)) {
|
|
1163
|
+
return remoteValue.value.map((v) => this.extractBiDiValue(v));
|
|
1164
|
+
}
|
|
1165
|
+
return remoteValue.value;
|
|
1166
|
+
case "object":
|
|
1167
|
+
if (Array.isArray(remoteValue.value)) {
|
|
1168
|
+
const obj = {};
|
|
1169
|
+
for (const entry of remoteValue.value) {
|
|
1170
|
+
if (Array.isArray(entry) && entry.length === 2) {
|
|
1171
|
+
const key = typeof entry[0] === "string" ? entry[0] : entry[0]?.value ?? String(entry[0]);
|
|
1172
|
+
obj[key] = this.extractBiDiValue(entry[1]);
|
|
1173
|
+
}
|
|
1174
|
+
}
|
|
1175
|
+
return obj;
|
|
1176
|
+
}
|
|
1177
|
+
return remoteValue.value;
|
|
1178
|
+
default:
|
|
1179
|
+
return remoteValue.value ?? remoteValue;
|
|
1180
|
+
}
|
|
1181
|
+
}
|
|
1182
|
+
/** Get full page HTML. */
|
|
1183
|
+
async getHTML() {
|
|
1184
|
+
return await this.evaluate("document.documentElement.outerHTML");
|
|
1185
|
+
}
|
|
1186
|
+
/**
|
|
1187
|
+
* Perform actions (BiDi input.performActions).
|
|
1188
|
+
* This is the BiDi way to do mouse/keyboard events.
|
|
1189
|
+
*/
|
|
1190
|
+
async performActions(actions) {
|
|
1191
|
+
const ctx = await this.getOrCreateContext();
|
|
1192
|
+
await this.send("input.performActions", {
|
|
1193
|
+
context: ctx,
|
|
1194
|
+
actions
|
|
1195
|
+
});
|
|
1196
|
+
}
|
|
1197
|
+
/** Click at coordinates via BiDi input actions. */
|
|
1198
|
+
async clickPoint(x, y) {
|
|
1199
|
+
await this.performActions([
|
|
1200
|
+
{
|
|
1201
|
+
type: "pointer",
|
|
1202
|
+
id: "mouse",
|
|
1203
|
+
actions: [
|
|
1204
|
+
{ type: "pointerMove", x: Math.round(x), y: Math.round(y) },
|
|
1205
|
+
{ type: "pointerDown", button: 0 },
|
|
1206
|
+
{ type: "pointerUp", button: 0 }
|
|
1207
|
+
]
|
|
1208
|
+
}
|
|
1209
|
+
]);
|
|
1210
|
+
}
|
|
1211
|
+
/** Insert text via BiDi input actions. */
|
|
1212
|
+
async insertText(text) {
|
|
1213
|
+
const actions = text.split("").map((ch) => [
|
|
1214
|
+
{ type: "keyDown", value: ch },
|
|
1215
|
+
{ type: "keyUp", value: ch }
|
|
1216
|
+
]).flat();
|
|
1217
|
+
await this.performActions([{ type: "key", id: "keyboard", actions }]);
|
|
1218
|
+
}
|
|
1219
|
+
/** Clean up all pending commands and event handlers. Rejects in-flight commands. */
|
|
1220
|
+
destroy() {
|
|
1221
|
+
const pendingSnapshot = [...this.pending.values()];
|
|
1222
|
+
this.pending.clear();
|
|
1223
|
+
this.eventHandlers.clear();
|
|
1224
|
+
this.browsingContext = void 0;
|
|
1225
|
+
for (const pending of pendingSnapshot) {
|
|
1226
|
+
clearTimeout(pending.timer);
|
|
1227
|
+
pending.reject(new Error("session destroyed"));
|
|
1228
|
+
}
|
|
1229
|
+
}
|
|
1230
|
+
};
|
|
1231
|
+
|
|
1232
|
+
// protocol/types.ts
|
|
1233
|
+
var KEY_MAP = {
|
|
1234
|
+
Enter: { key: "Enter", code: "Enter", keyCode: 13 },
|
|
1235
|
+
Tab: { key: "Tab", code: "Tab", keyCode: 9 },
|
|
1236
|
+
Escape: { key: "Escape", code: "Escape", keyCode: 27 },
|
|
1237
|
+
Backspace: { key: "Backspace", code: "Backspace", keyCode: 8 },
|
|
1238
|
+
Delete: { key: "Delete", code: "Delete", keyCode: 46 },
|
|
1239
|
+
Space: { key: " ", code: "Space", keyCode: 32 },
|
|
1240
|
+
" ": { key: " ", code: "Space", keyCode: 32 },
|
|
1241
|
+
ArrowLeft: { key: "ArrowLeft", code: "ArrowLeft", keyCode: 37 },
|
|
1242
|
+
ArrowUp: { key: "ArrowUp", code: "ArrowUp", keyCode: 38 },
|
|
1243
|
+
ArrowRight: { key: "ArrowRight", code: "ArrowRight", keyCode: 39 },
|
|
1244
|
+
ArrowDown: { key: "ArrowDown", code: "ArrowDown", keyCode: 40 },
|
|
1245
|
+
Home: { key: "Home", code: "Home", keyCode: 36 },
|
|
1246
|
+
End: { key: "End", code: "End", keyCode: 35 },
|
|
1247
|
+
PageUp: { key: "PageUp", code: "PageUp", keyCode: 33 },
|
|
1248
|
+
PageDown: { key: "PageDown", code: "PageDown", keyCode: 34 }
|
|
1249
|
+
};
|
|
1250
|
+
function getKeyParams(keyName) {
|
|
1251
|
+
return KEY_MAP[keyName] ?? { key: keyName, code: keyName, keyCode: 0 };
|
|
1252
|
+
}
|
|
1253
|
+
|
|
1254
|
+
// protocol/protocol-adapter.ts
|
|
1255
|
+
var ProtocolAdapter = class {
|
|
1256
|
+
cdp = null;
|
|
1257
|
+
bidi = null;
|
|
1258
|
+
transport;
|
|
1259
|
+
emitter;
|
|
1260
|
+
protocol;
|
|
1261
|
+
adapterOpts;
|
|
1262
|
+
constructor(transport, emitter, browser, opts) {
|
|
1263
|
+
this.transport = transport;
|
|
1264
|
+
this.emitter = emitter;
|
|
1265
|
+
this.adapterOpts = opts ?? {};
|
|
1266
|
+
const sessionOpts = opts?.commandTimeoutMs ? { commandTimeoutMs: opts.commandTimeoutMs } : void 0;
|
|
1267
|
+
if (browser === "auto") {
|
|
1268
|
+
this.protocol = "auto";
|
|
1269
|
+
} else if (browser === "firefox") {
|
|
1270
|
+
this.protocol = "bidi";
|
|
1271
|
+
this.bidi = new BiDiSession(transport, sessionOpts);
|
|
1272
|
+
} else {
|
|
1273
|
+
this.protocol = "cdp";
|
|
1274
|
+
this.cdp = new CDPSession(transport, sessionOpts);
|
|
1275
|
+
}
|
|
1276
|
+
transport.onMessage((data) => this.routeMessage(data));
|
|
1277
|
+
}
|
|
1278
|
+
/** Whether this adapter uses CDP or BiDi (or 'auto' before init). */
|
|
1279
|
+
get protocolType() {
|
|
1280
|
+
return this.protocol;
|
|
1281
|
+
}
|
|
1282
|
+
/** Route incoming WebSocket messages to the right session + spider events. */
|
|
1283
|
+
routeMessage(data) {
|
|
1284
|
+
try {
|
|
1285
|
+
const msg = JSON.parse(data);
|
|
1286
|
+
if (typeof msg.method === "string" && msg.method.startsWith("Spider.")) {
|
|
1287
|
+
this.handleSpiderEvent(msg.method, msg.params ?? {});
|
|
1288
|
+
return;
|
|
1289
|
+
}
|
|
1290
|
+
} catch {
|
|
1291
|
+
}
|
|
1292
|
+
if (this.cdp) {
|
|
1293
|
+
this.cdp.handleMessage(data);
|
|
1294
|
+
} else if (this.bidi) {
|
|
1295
|
+
this.bidi.handleMessage(data);
|
|
1296
|
+
}
|
|
1297
|
+
}
|
|
1298
|
+
/** Handle Spider.* custom events from the browser_server. */
|
|
1299
|
+
handleSpiderEvent(method, params) {
|
|
1300
|
+
switch (method) {
|
|
1301
|
+
case "Spider.captchaDetected":
|
|
1302
|
+
this.emitter.emit("captcha.detected", {
|
|
1303
|
+
types: params.types ?? [],
|
|
1304
|
+
url: params.url ?? ""
|
|
1305
|
+
});
|
|
1306
|
+
break;
|
|
1307
|
+
case "Spider.captchaSolving":
|
|
1308
|
+
this.emitter.emit("captcha.solving", {
|
|
1309
|
+
types: params.types ?? [],
|
|
1310
|
+
url: params.url ?? "",
|
|
1311
|
+
round: params.round ?? 0
|
|
1312
|
+
});
|
|
1313
|
+
break;
|
|
1314
|
+
case "Spider.captchaSolved":
|
|
1315
|
+
this.emitter.emit("captcha.solved", { url: params.url ?? "" });
|
|
1316
|
+
break;
|
|
1317
|
+
case "Spider.captchaFailed":
|
|
1318
|
+
this.emitter.emit("captcha.failed", {
|
|
1319
|
+
url: params.url ?? "",
|
|
1320
|
+
reason: params.reason ?? ""
|
|
1321
|
+
});
|
|
1322
|
+
break;
|
|
1323
|
+
default:
|
|
1324
|
+
logger.debug(`unhandled Spider event: ${method}`, params);
|
|
1325
|
+
}
|
|
1326
|
+
}
|
|
1327
|
+
// -------------------------------------------------------------------
|
|
1328
|
+
// Unified interface — all methods work for both CDP and BiDi
|
|
1329
|
+
// -------------------------------------------------------------------
|
|
1330
|
+
/**
|
|
1331
|
+
* Initialize the protocol session.
|
|
1332
|
+
*
|
|
1333
|
+
* For CDP (Chrome/Servo/LightPanda): discovers page targets, attaches to one
|
|
1334
|
+
* with flatten:true to get a sessionId, then enables Page + Runtime domains
|
|
1335
|
+
* on that target. This is required because browser_server proxies to Chrome's
|
|
1336
|
+
* browser-level CDP endpoint.
|
|
1337
|
+
*
|
|
1338
|
+
* For BiDi (Firefox): gets or creates a browsing context.
|
|
1339
|
+
*
|
|
1340
|
+
* For "auto" mode: tries CDP first (Target.setDiscoverTargets). If it fails
|
|
1341
|
+
* (e.g. we actually got a BiDi session), falls back to BiDi.
|
|
1342
|
+
*/
|
|
1343
|
+
async init() {
|
|
1344
|
+
if (this.protocol === "auto") {
|
|
1345
|
+
await this.autoDetectAndInit();
|
|
1346
|
+
return;
|
|
1347
|
+
}
|
|
1348
|
+
if (this.cdp) {
|
|
1349
|
+
await this.cdp.attachToPage();
|
|
1350
|
+
} else if (this.bidi) {
|
|
1351
|
+
await this.bidi.getOrCreateContext();
|
|
1352
|
+
}
|
|
1353
|
+
}
|
|
1354
|
+
/**
|
|
1355
|
+
* Auto-detect protocol by trying CDP first, falling back to BiDi.
|
|
1356
|
+
* Used when browser type is "auto" and we don't know what the server gave us.
|
|
1357
|
+
*/
|
|
1358
|
+
async autoDetectAndInit() {
|
|
1359
|
+
const sessionOpts = this.adapterOpts.commandTimeoutMs ? { commandTimeoutMs: this.adapterOpts.commandTimeoutMs } : void 0;
|
|
1360
|
+
try {
|
|
1361
|
+
this.cdp = new CDPSession(this.transport, sessionOpts);
|
|
1362
|
+
this.transport.onMessage((data) => this.routeMessage(data));
|
|
1363
|
+
await this.cdp.attachToPage();
|
|
1364
|
+
this.protocol = "cdp";
|
|
1365
|
+
logger.info("auto-detected CDP protocol");
|
|
1366
|
+
return;
|
|
1367
|
+
} catch {
|
|
1368
|
+
this.cdp?.destroy();
|
|
1369
|
+
this.cdp = null;
|
|
1370
|
+
}
|
|
1371
|
+
this.bidi = new BiDiSession(this.transport, sessionOpts);
|
|
1372
|
+
this.transport.onMessage((data) => this.routeMessage(data));
|
|
1373
|
+
await this.bidi.getOrCreateContext();
|
|
1374
|
+
this.protocol = "bidi";
|
|
1375
|
+
logger.info("auto-detected BiDi protocol");
|
|
1376
|
+
}
|
|
1377
|
+
/** Navigate to URL. */
|
|
1378
|
+
async navigate(url) {
|
|
1379
|
+
if (this.cdp) {
|
|
1380
|
+
await this.cdp.navigate(url);
|
|
1381
|
+
} else {
|
|
1382
|
+
await this.bidi.navigate(url);
|
|
1383
|
+
}
|
|
1384
|
+
}
|
|
1385
|
+
/** Get page HTML. */
|
|
1386
|
+
async getHTML() {
|
|
1387
|
+
if (this.cdp) {
|
|
1388
|
+
return this.cdp.getHTML();
|
|
1389
|
+
}
|
|
1390
|
+
return this.bidi.getHTML();
|
|
1391
|
+
}
|
|
1392
|
+
/** Evaluate JavaScript expression. */
|
|
1393
|
+
async evaluate(expression) {
|
|
1394
|
+
if (this.cdp) {
|
|
1395
|
+
return this.cdp.evaluate(expression);
|
|
1396
|
+
}
|
|
1397
|
+
return this.bidi.evaluate(expression);
|
|
1398
|
+
}
|
|
1399
|
+
/** Capture screenshot as base64 PNG. */
|
|
1400
|
+
async captureScreenshot() {
|
|
1401
|
+
if (this.cdp) {
|
|
1402
|
+
return this.cdp.captureScreenshot();
|
|
1403
|
+
}
|
|
1404
|
+
return this.bidi.captureScreenshot();
|
|
1405
|
+
}
|
|
1406
|
+
/** Click at viewport coordinates. */
|
|
1407
|
+
async clickPoint(x, y) {
|
|
1408
|
+
if (this.cdp) {
|
|
1409
|
+
await this.cdp.clickPoint(x, y);
|
|
1410
|
+
} else {
|
|
1411
|
+
await this.bidi.clickPoint(x, y);
|
|
1412
|
+
}
|
|
1413
|
+
}
|
|
1414
|
+
/** Right-click at coordinates. */
|
|
1415
|
+
async rightClickPoint(x, y) {
|
|
1416
|
+
if (this.cdp) {
|
|
1417
|
+
await this.cdp.rightClickPoint(x, y);
|
|
1418
|
+
} else {
|
|
1419
|
+
await this.bidi.performActions([
|
|
1420
|
+
{
|
|
1421
|
+
type: "pointer",
|
|
1422
|
+
id: "mouse",
|
|
1423
|
+
actions: [
|
|
1424
|
+
{ type: "pointerMove", x: Math.round(x), y: Math.round(y) },
|
|
1425
|
+
{ type: "pointerDown", button: 2 },
|
|
1426
|
+
{ type: "pointerUp", button: 2 }
|
|
1427
|
+
]
|
|
1428
|
+
}
|
|
1429
|
+
]);
|
|
1430
|
+
}
|
|
1431
|
+
}
|
|
1432
|
+
/** Double-click at coordinates. */
|
|
1433
|
+
async doubleClickPoint(x, y) {
|
|
1434
|
+
if (this.cdp) {
|
|
1435
|
+
await this.cdp.doubleClickPoint(x, y);
|
|
1436
|
+
} else {
|
|
1437
|
+
await this.bidi.performActions([
|
|
1438
|
+
{
|
|
1439
|
+
type: "pointer",
|
|
1440
|
+
id: "mouse",
|
|
1441
|
+
actions: [
|
|
1442
|
+
{ type: "pointerMove", x: Math.round(x), y: Math.round(y) },
|
|
1443
|
+
{ type: "pointerDown", button: 0 },
|
|
1444
|
+
{ type: "pointerUp", button: 0 },
|
|
1445
|
+
{ type: "pointerDown", button: 0 },
|
|
1446
|
+
{ type: "pointerUp", button: 0 }
|
|
1447
|
+
]
|
|
1448
|
+
}
|
|
1449
|
+
]);
|
|
1450
|
+
}
|
|
1451
|
+
}
|
|
1452
|
+
/** Click and hold at coordinates. */
|
|
1453
|
+
async clickHoldPoint(x, y, holdMs) {
|
|
1454
|
+
if (this.cdp) {
|
|
1455
|
+
await this.cdp.clickHoldPoint(x, y, holdMs);
|
|
1456
|
+
} else {
|
|
1457
|
+
await this.bidi.performActions([
|
|
1458
|
+
{
|
|
1459
|
+
type: "pointer",
|
|
1460
|
+
id: "mouse",
|
|
1461
|
+
actions: [
|
|
1462
|
+
{ type: "pointerMove", x: Math.round(x), y: Math.round(y) },
|
|
1463
|
+
{ type: "pointerDown", button: 0 },
|
|
1464
|
+
{ type: "pause", duration: holdMs },
|
|
1465
|
+
{ type: "pointerUp", button: 0 }
|
|
1466
|
+
]
|
|
1467
|
+
}
|
|
1468
|
+
]);
|
|
1469
|
+
}
|
|
1470
|
+
}
|
|
1471
|
+
/** Hover at coordinates. */
|
|
1472
|
+
async hoverPoint(x, y) {
|
|
1473
|
+
if (this.cdp) {
|
|
1474
|
+
await this.cdp.hoverPoint(x, y);
|
|
1475
|
+
} else {
|
|
1476
|
+
await this.bidi.performActions([
|
|
1477
|
+
{
|
|
1478
|
+
type: "pointer",
|
|
1479
|
+
id: "mouse",
|
|
1480
|
+
actions: [{ type: "pointerMove", x: Math.round(x), y: Math.round(y) }]
|
|
1481
|
+
}
|
|
1482
|
+
]);
|
|
1483
|
+
}
|
|
1484
|
+
}
|
|
1485
|
+
/** Smooth drag from point to point. */
|
|
1486
|
+
async dragPoint(fromX, fromY, toX, toY) {
|
|
1487
|
+
if (this.cdp) {
|
|
1488
|
+
await this.cdp.dragPoint(fromX, fromY, toX, toY);
|
|
1489
|
+
} else {
|
|
1490
|
+
const steps = 10;
|
|
1491
|
+
const actions = [
|
|
1492
|
+
{ type: "pointerMove", x: Math.round(fromX), y: Math.round(fromY) },
|
|
1493
|
+
{ type: "pointerDown", button: 0 }
|
|
1494
|
+
];
|
|
1495
|
+
for (let i = 1; i <= steps; i++) {
|
|
1496
|
+
const t = i / steps;
|
|
1497
|
+
actions.push({
|
|
1498
|
+
type: "pointerMove",
|
|
1499
|
+
x: Math.round(fromX + (toX - fromX) * t),
|
|
1500
|
+
y: Math.round(fromY + (toY - fromY) * t),
|
|
1501
|
+
duration: 16
|
|
1502
|
+
});
|
|
1503
|
+
}
|
|
1504
|
+
actions.push({ type: "pointerUp", button: 0 });
|
|
1505
|
+
await this.bidi.performActions([{ type: "pointer", id: "mouse", actions }]);
|
|
1506
|
+
}
|
|
1507
|
+
}
|
|
1508
|
+
/** Insert text. */
|
|
1509
|
+
async insertText(text) {
|
|
1510
|
+
if (this.cdp) {
|
|
1511
|
+
await this.cdp.insertText(text);
|
|
1512
|
+
} else {
|
|
1513
|
+
await this.bidi.insertText(text);
|
|
1514
|
+
}
|
|
1515
|
+
}
|
|
1516
|
+
/** Press a named key (e.g. "Enter", "Tab"). */
|
|
1517
|
+
async pressKey(keyName) {
|
|
1518
|
+
const { key, code, keyCode } = getKeyParams(keyName);
|
|
1519
|
+
if (this.cdp) {
|
|
1520
|
+
await this.cdp.pressKey(key, code, keyCode);
|
|
1521
|
+
} else {
|
|
1522
|
+
await this.bidi.performActions([
|
|
1523
|
+
{
|
|
1524
|
+
type: "key",
|
|
1525
|
+
id: "keyboard",
|
|
1526
|
+
actions: [
|
|
1527
|
+
{ type: "keyDown", value: key },
|
|
1528
|
+
{ type: "keyUp", value: key }
|
|
1529
|
+
]
|
|
1530
|
+
}
|
|
1531
|
+
]);
|
|
1532
|
+
}
|
|
1533
|
+
}
|
|
1534
|
+
/** Send keyDown. */
|
|
1535
|
+
async keyDown(keyName) {
|
|
1536
|
+
const { key, code, keyCode } = getKeyParams(keyName);
|
|
1537
|
+
if (this.cdp) {
|
|
1538
|
+
await this.cdp.keyDown(key, code, keyCode);
|
|
1539
|
+
} else {
|
|
1540
|
+
await this.bidi.performActions([
|
|
1541
|
+
{ type: "key", id: "keyboard", actions: [{ type: "keyDown", value: key }] }
|
|
1542
|
+
]);
|
|
1543
|
+
}
|
|
1544
|
+
}
|
|
1545
|
+
/** Send keyUp. */
|
|
1546
|
+
async keyUp(keyName) {
|
|
1547
|
+
const { key, code, keyCode } = getKeyParams(keyName);
|
|
1548
|
+
if (this.cdp) {
|
|
1549
|
+
await this.cdp.keyUp(key, code, keyCode);
|
|
1550
|
+
} else {
|
|
1551
|
+
await this.bidi.performActions([
|
|
1552
|
+
{ type: "key", id: "keyboard", actions: [{ type: "keyUp", value: key }] }
|
|
1553
|
+
]);
|
|
1554
|
+
}
|
|
1555
|
+
}
|
|
1556
|
+
/** Set viewport dimensions. */
|
|
1557
|
+
async setViewport(width, height, deviceScaleFactor = 2, mobile = false) {
|
|
1558
|
+
if (this.cdp) {
|
|
1559
|
+
await this.cdp.setViewport(width, height, deviceScaleFactor, mobile);
|
|
1560
|
+
} else {
|
|
1561
|
+
await this.bidi.evaluate(
|
|
1562
|
+
`window.resizeTo(${width}, ${height})`
|
|
1563
|
+
);
|
|
1564
|
+
}
|
|
1565
|
+
}
|
|
1566
|
+
/** Subscribe to a CDP domain event (only relevant for CDP). */
|
|
1567
|
+
onProtocolEvent(method, handler) {
|
|
1568
|
+
if (this.cdp) {
|
|
1569
|
+
this.cdp.on(method, handler);
|
|
1570
|
+
} else if (this.bidi) {
|
|
1571
|
+
this.bidi.on(method, handler);
|
|
1572
|
+
}
|
|
1573
|
+
}
|
|
1574
|
+
/** Clean up resources. Nulls references first to prevent stale message routing. */
|
|
1575
|
+
destroy() {
|
|
1576
|
+
const cdp = this.cdp;
|
|
1577
|
+
const bidi = this.bidi;
|
|
1578
|
+
this.cdp = null;
|
|
1579
|
+
this.bidi = null;
|
|
1580
|
+
cdp?.destroy();
|
|
1581
|
+
bidi?.destroy();
|
|
1582
|
+
}
|
|
1583
|
+
};
|
|
1584
|
+
|
|
1585
|
+
// events/emitter.ts
|
|
1586
|
+
var SpiderEventEmitter = class {
|
|
1587
|
+
handlers = /* @__PURE__ */ new Map();
|
|
1588
|
+
on(event, handler) {
|
|
1589
|
+
let set = this.handlers.get(event);
|
|
1590
|
+
if (!set) {
|
|
1591
|
+
set = /* @__PURE__ */ new Set();
|
|
1592
|
+
this.handlers.set(event, set);
|
|
1593
|
+
}
|
|
1594
|
+
set.add(handler);
|
|
1595
|
+
return this;
|
|
1596
|
+
}
|
|
1597
|
+
off(event, handler) {
|
|
1598
|
+
this.handlers.get(event)?.delete(handler);
|
|
1599
|
+
return this;
|
|
1600
|
+
}
|
|
1601
|
+
once(event, handler) {
|
|
1602
|
+
const wrapped = (data) => {
|
|
1603
|
+
this.off(event, wrapped);
|
|
1604
|
+
handler(data);
|
|
1605
|
+
};
|
|
1606
|
+
return this.on(event, wrapped);
|
|
1607
|
+
}
|
|
1608
|
+
emit(event, data) {
|
|
1609
|
+
const set = this.handlers.get(event);
|
|
1610
|
+
if (set) {
|
|
1611
|
+
for (const handler of set) {
|
|
1612
|
+
try {
|
|
1613
|
+
handler(data);
|
|
1614
|
+
} catch {
|
|
1615
|
+
}
|
|
1616
|
+
}
|
|
1617
|
+
}
|
|
1618
|
+
}
|
|
1619
|
+
removeAllListeners(event) {
|
|
1620
|
+
if (event) {
|
|
1621
|
+
this.handlers.delete(event);
|
|
1622
|
+
} else {
|
|
1623
|
+
this.handlers.clear();
|
|
1624
|
+
}
|
|
1625
|
+
}
|
|
1626
|
+
};
|
|
1627
|
+
|
|
1628
|
+
// page.ts
|
|
1629
|
+
init_errors();
|
|
1630
|
+
var SpiderPage = class {
|
|
1631
|
+
/** @internal */
|
|
1632
|
+
constructor(adapter) {
|
|
1633
|
+
this.adapter = adapter;
|
|
1634
|
+
}
|
|
1635
|
+
// -------------------------------------------------------------------
|
|
1636
|
+
// Navigation
|
|
1637
|
+
// -------------------------------------------------------------------
|
|
1638
|
+
/** Navigate to a URL and wait for load. */
|
|
1639
|
+
async goto(url) {
|
|
1640
|
+
await this.adapter.navigate(url);
|
|
1641
|
+
}
|
|
1642
|
+
/** Go back in browser history. */
|
|
1643
|
+
async goBack() {
|
|
1644
|
+
await this.adapter.evaluate("window.history.back()");
|
|
1645
|
+
}
|
|
1646
|
+
/** Go forward in browser history. */
|
|
1647
|
+
async goForward() {
|
|
1648
|
+
await this.adapter.evaluate("window.history.forward()");
|
|
1649
|
+
}
|
|
1650
|
+
/** Reload the page. */
|
|
1651
|
+
async reload() {
|
|
1652
|
+
await this.adapter.evaluate("window.location.reload()");
|
|
1653
|
+
}
|
|
1654
|
+
// -------------------------------------------------------------------
|
|
1655
|
+
// Content
|
|
1656
|
+
// -------------------------------------------------------------------
|
|
1657
|
+
/**
|
|
1658
|
+
* Get the full page HTML, ensuring the page is ready first.
|
|
1659
|
+
*
|
|
1660
|
+
* Waits for network idle + DOM stability, then checks content quality.
|
|
1661
|
+
* If the content seems incomplete (too short or looks like a loading state),
|
|
1662
|
+
* does incremental waits with exponential backoff before returning.
|
|
1663
|
+
*
|
|
1664
|
+
* @param waitMs Max time to wait for readiness (default: 8000ms).
|
|
1665
|
+
* Pass 0 to skip readiness checks and return immediately.
|
|
1666
|
+
* @param minLength Minimum content length to consider "good" (default: 1000).
|
|
1667
|
+
*/
|
|
1668
|
+
async content(waitMs = 8e3, minLength = 1e3) {
|
|
1669
|
+
if (waitMs > 0) {
|
|
1670
|
+
await this.waitForNetworkIdle(waitMs);
|
|
1671
|
+
}
|
|
1672
|
+
let html = await this.adapter.getHTML() ?? "";
|
|
1673
|
+
if (waitMs > 0 && this.isInterstitialContent(html)) {
|
|
1674
|
+
const interstitialWaits = [4e3, 4e3, 4e3];
|
|
1675
|
+
for (const wait of interstitialWaits) {
|
|
1676
|
+
await sleep2(wait);
|
|
1677
|
+
html = await this.adapter.getHTML() ?? "";
|
|
1678
|
+
if (!this.isInterstitialContent(html)) break;
|
|
1679
|
+
}
|
|
1680
|
+
if (this.isInterstitialContent(html)) {
|
|
1681
|
+
throw new BlockedError("Page stuck on interstitial challenge");
|
|
1682
|
+
}
|
|
1683
|
+
}
|
|
1684
|
+
if (waitMs > 0 && this.isRateLimitContent(html)) {
|
|
1685
|
+
throw new BlockedError("Rate limit exceeded (site-level)");
|
|
1686
|
+
}
|
|
1687
|
+
if (waitMs > 0 && html.length < minLength) {
|
|
1688
|
+
const increments = [300, 500, 800, 1200];
|
|
1689
|
+
for (const extra of increments) {
|
|
1690
|
+
await sleep2(extra);
|
|
1691
|
+
const updated = await this.adapter.getHTML();
|
|
1692
|
+
if (updated.length > html.length) {
|
|
1693
|
+
html = updated;
|
|
1694
|
+
}
|
|
1695
|
+
if (html.length >= minLength) break;
|
|
1696
|
+
}
|
|
1697
|
+
}
|
|
1698
|
+
return html;
|
|
1699
|
+
}
|
|
1700
|
+
/**
|
|
1701
|
+
* Get the raw page HTML without any readiness waiting.
|
|
1702
|
+
* Use this when you need immediate access or have already waited.
|
|
1703
|
+
*/
|
|
1704
|
+
async rawContent() {
|
|
1705
|
+
return this.adapter.getHTML();
|
|
1706
|
+
}
|
|
1707
|
+
/** Get the page title. */
|
|
1708
|
+
async title() {
|
|
1709
|
+
return await this.adapter.evaluate("document.title");
|
|
1710
|
+
}
|
|
1711
|
+
/** Get the current page URL. */
|
|
1712
|
+
async url() {
|
|
1713
|
+
return await this.adapter.evaluate("window.location.href");
|
|
1714
|
+
}
|
|
1715
|
+
/** Capture a screenshot as base64 PNG. */
|
|
1716
|
+
async screenshot() {
|
|
1717
|
+
return this.adapter.captureScreenshot();
|
|
1718
|
+
}
|
|
1719
|
+
/** Evaluate arbitrary JavaScript and return the result. */
|
|
1720
|
+
async evaluate(expression) {
|
|
1721
|
+
return this.adapter.evaluate(expression);
|
|
1722
|
+
}
|
|
1723
|
+
// -------------------------------------------------------------------
|
|
1724
|
+
// Click Actions
|
|
1725
|
+
// -------------------------------------------------------------------
|
|
1726
|
+
/** Click an element by CSS selector. */
|
|
1727
|
+
async click(selector) {
|
|
1728
|
+
const { x, y } = await this.getElementCenter(selector);
|
|
1729
|
+
await this.adapter.clickPoint(x, y);
|
|
1730
|
+
}
|
|
1731
|
+
/** Click at specific viewport coordinates. */
|
|
1732
|
+
async clickAt(x, y) {
|
|
1733
|
+
await this.adapter.clickPoint(x, y);
|
|
1734
|
+
}
|
|
1735
|
+
/** Double-click an element by CSS selector. */
|
|
1736
|
+
async dblclick(selector) {
|
|
1737
|
+
const { x, y } = await this.getElementCenter(selector);
|
|
1738
|
+
await this.adapter.doubleClickPoint(x, y);
|
|
1739
|
+
}
|
|
1740
|
+
/** Right-click an element by CSS selector. */
|
|
1741
|
+
async rightClick(selector) {
|
|
1742
|
+
const { x, y } = await this.getElementCenter(selector);
|
|
1743
|
+
await this.adapter.rightClickPoint(x, y);
|
|
1744
|
+
}
|
|
1745
|
+
/** Click all elements matching a selector. */
|
|
1746
|
+
async clickAll(selector) {
|
|
1747
|
+
const points = await this.adapter.evaluate(`
|
|
1748
|
+
(function() {
|
|
1749
|
+
const els = document.querySelectorAll(${JSON.stringify(selector)});
|
|
1750
|
+
return Array.from(els).map(el => {
|
|
1751
|
+
const r = el.getBoundingClientRect();
|
|
1752
|
+
return { x: r.x + r.width / 2, y: r.y + r.height / 2 };
|
|
1753
|
+
});
|
|
1754
|
+
})()
|
|
1755
|
+
`);
|
|
1756
|
+
if (Array.isArray(points)) {
|
|
1757
|
+
for (const pt of points) {
|
|
1758
|
+
await this.adapter.clickPoint(pt.x, pt.y);
|
|
1759
|
+
await sleep2(100);
|
|
1760
|
+
}
|
|
1761
|
+
}
|
|
1762
|
+
}
|
|
1763
|
+
// -------------------------------------------------------------------
|
|
1764
|
+
// Input Actions
|
|
1765
|
+
// -------------------------------------------------------------------
|
|
1766
|
+
/** Fill a form field — focus, clear existing value, type new value. */
|
|
1767
|
+
async fill(selector, value) {
|
|
1768
|
+
await this.adapter.evaluate(`
|
|
1769
|
+
(function() {
|
|
1770
|
+
const el = document.querySelector(${JSON.stringify(selector)});
|
|
1771
|
+
if (el) { el.focus(); el.value = ''; }
|
|
1772
|
+
})()
|
|
1773
|
+
`);
|
|
1774
|
+
try {
|
|
1775
|
+
const { x, y } = await this.getElementCenter(selector);
|
|
1776
|
+
await this.adapter.clickPoint(x, y);
|
|
1777
|
+
} catch {
|
|
1778
|
+
}
|
|
1779
|
+
await this.adapter.insertText(value);
|
|
1780
|
+
await this.adapter.evaluate(`
|
|
1781
|
+
(function() {
|
|
1782
|
+
const el = document.querySelector(${JSON.stringify(selector)});
|
|
1783
|
+
if (el) {
|
|
1784
|
+
el.dispatchEvent(new Event('input', { bubbles: true }));
|
|
1785
|
+
el.dispatchEvent(new Event('change', { bubbles: true }));
|
|
1786
|
+
}
|
|
1787
|
+
})()
|
|
1788
|
+
`);
|
|
1789
|
+
}
|
|
1790
|
+
/** Type text into the currently focused element. */
|
|
1791
|
+
async type(value) {
|
|
1792
|
+
await this.adapter.insertText(value);
|
|
1793
|
+
}
|
|
1794
|
+
/** Press a named key (e.g. "Enter", "Tab", "Escape"). */
|
|
1795
|
+
async press(key) {
|
|
1796
|
+
await this.adapter.pressKey(key);
|
|
1797
|
+
}
|
|
1798
|
+
/** Clear an input field. */
|
|
1799
|
+
async clear(selector) {
|
|
1800
|
+
await this.adapter.evaluate(
|
|
1801
|
+
`document.querySelector(${JSON.stringify(selector)}).value = ''`
|
|
1802
|
+
);
|
|
1803
|
+
}
|
|
1804
|
+
/** Select an option in a <select> element. */
|
|
1805
|
+
async select(selector, value) {
|
|
1806
|
+
await this.adapter.evaluate(`
|
|
1807
|
+
(function() {
|
|
1808
|
+
const el = document.querySelector(${JSON.stringify(selector)});
|
|
1809
|
+
if (el) {
|
|
1810
|
+
el.value = ${JSON.stringify(value)};
|
|
1811
|
+
el.dispatchEvent(new Event('change', { bubbles: true }));
|
|
1812
|
+
}
|
|
1813
|
+
})()
|
|
1814
|
+
`);
|
|
1815
|
+
}
|
|
1816
|
+
// -------------------------------------------------------------------
|
|
1817
|
+
// Focus & Hover
|
|
1818
|
+
// -------------------------------------------------------------------
|
|
1819
|
+
/** Focus an element. */
|
|
1820
|
+
async focus(selector) {
|
|
1821
|
+
await this.adapter.evaluate(
|
|
1822
|
+
`document.querySelector(${JSON.stringify(selector)})?.focus()`
|
|
1823
|
+
);
|
|
1824
|
+
}
|
|
1825
|
+
/** Blur (unfocus) an element. */
|
|
1826
|
+
async blur(selector) {
|
|
1827
|
+
await this.adapter.evaluate(
|
|
1828
|
+
`document.querySelector(${JSON.stringify(selector)})?.blur()`
|
|
1829
|
+
);
|
|
1830
|
+
}
|
|
1831
|
+
/** Hover over an element. */
|
|
1832
|
+
async hover(selector) {
|
|
1833
|
+
const { x, y } = await this.getElementCenter(selector);
|
|
1834
|
+
await this.adapter.hoverPoint(x, y);
|
|
1835
|
+
}
|
|
1836
|
+
// -------------------------------------------------------------------
|
|
1837
|
+
// Drag
|
|
1838
|
+
// -------------------------------------------------------------------
|
|
1839
|
+
/** Drag from one element to another. */
|
|
1840
|
+
async drag(fromSelector, toSelector) {
|
|
1841
|
+
const from = await this.getElementCenter(fromSelector);
|
|
1842
|
+
const to = await this.getElementCenter(toSelector);
|
|
1843
|
+
await this.adapter.dragPoint(from.x, from.y, to.x, to.y);
|
|
1844
|
+
}
|
|
1845
|
+
// -------------------------------------------------------------------
|
|
1846
|
+
// Scroll
|
|
1847
|
+
// -------------------------------------------------------------------
|
|
1848
|
+
/** Scroll vertically by pixels (positive = down). */
|
|
1849
|
+
async scrollY(pixels) {
|
|
1850
|
+
await this.adapter.evaluate(`window.scrollBy(0, ${pixels})`);
|
|
1851
|
+
}
|
|
1852
|
+
/** Scroll horizontally by pixels (positive = right). */
|
|
1853
|
+
async scrollX(pixels) {
|
|
1854
|
+
await this.adapter.evaluate(`window.scrollBy(${pixels}, 0)`);
|
|
1855
|
+
}
|
|
1856
|
+
/** Scroll an element into view. */
|
|
1857
|
+
async scrollTo(selector) {
|
|
1858
|
+
await this.adapter.evaluate(
|
|
1859
|
+
`document.querySelector(${JSON.stringify(selector)})?.scrollIntoView({ behavior: 'smooth', block: 'center' })`
|
|
1860
|
+
);
|
|
1861
|
+
}
|
|
1862
|
+
/** Scroll to absolute page coordinates. */
|
|
1863
|
+
async scrollToPoint(x, y) {
|
|
1864
|
+
await this.adapter.evaluate(`window.scrollTo(${x}, ${y})`);
|
|
1865
|
+
}
|
|
1866
|
+
// -------------------------------------------------------------------
|
|
1867
|
+
// Wait
|
|
1868
|
+
// -------------------------------------------------------------------
|
|
1869
|
+
/** Wait for a CSS selector to appear in the DOM. */
|
|
1870
|
+
async waitForSelector(selector, timeoutMs = 5e3) {
|
|
1871
|
+
const interval = 100;
|
|
1872
|
+
const maxIter = Math.ceil(timeoutMs / interval);
|
|
1873
|
+
const checkJs = `!!document.querySelector(${JSON.stringify(selector)})`;
|
|
1874
|
+
for (let i = 0; i < maxIter; i++) {
|
|
1875
|
+
const found = await this.adapter.evaluate(checkJs);
|
|
1876
|
+
if (found) return;
|
|
1877
|
+
await sleep2(interval);
|
|
1878
|
+
}
|
|
1879
|
+
throw new TimeoutError(`Timeout waiting for selector: ${selector}`);
|
|
1880
|
+
}
|
|
1881
|
+
/** Wait for navigation/page load (simple delay). */
|
|
1882
|
+
async waitForNavigation(timeoutMs = 5e3) {
|
|
1883
|
+
await sleep2(Math.min(timeoutMs, 1e3));
|
|
1884
|
+
}
|
|
1885
|
+
/**
|
|
1886
|
+
* Wait until the page is fully loaded and DOM is stable.
|
|
1887
|
+
*
|
|
1888
|
+
* Checks:
|
|
1889
|
+
* 1. document.readyState === 'complete'
|
|
1890
|
+
* 2. DOM content length stabilizes (no changes for 500ms)
|
|
1891
|
+
*
|
|
1892
|
+
* Use after goto() for SPAs and dynamic pages to ensure all
|
|
1893
|
+
* content is rendered before extracting HTML.
|
|
1894
|
+
*/
|
|
1895
|
+
async waitForReady(timeoutMs = 1e4) {
|
|
1896
|
+
const start = Date.now();
|
|
1897
|
+
const pollInterval = 200;
|
|
1898
|
+
const stableThreshold = 500;
|
|
1899
|
+
while (Date.now() - start < timeoutMs) {
|
|
1900
|
+
const state = await this.adapter.evaluate("document.readyState");
|
|
1901
|
+
if (state === "complete") break;
|
|
1902
|
+
await sleep2(pollInterval);
|
|
1903
|
+
}
|
|
1904
|
+
let lastLength = 0;
|
|
1905
|
+
let stableSince = Date.now();
|
|
1906
|
+
while (Date.now() - start < timeoutMs) {
|
|
1907
|
+
const length = await this.adapter.evaluate(
|
|
1908
|
+
"document.documentElement.innerHTML.length"
|
|
1909
|
+
);
|
|
1910
|
+
if (length !== lastLength) {
|
|
1911
|
+
lastLength = length;
|
|
1912
|
+
stableSince = Date.now();
|
|
1913
|
+
} else if (Date.now() - stableSince >= stableThreshold) {
|
|
1914
|
+
return;
|
|
1915
|
+
}
|
|
1916
|
+
await sleep2(pollInterval);
|
|
1917
|
+
}
|
|
1918
|
+
}
|
|
1919
|
+
/**
|
|
1920
|
+
* Wait until page content exceeds a minimum length.
|
|
1921
|
+
* Useful for SPAs where content loads asynchronously.
|
|
1922
|
+
*/
|
|
1923
|
+
async waitForContent(minLength = 500, timeoutMs = 8e3) {
|
|
1924
|
+
const start = Date.now();
|
|
1925
|
+
while (Date.now() - start < timeoutMs) {
|
|
1926
|
+
const length = await this.adapter.evaluate(
|
|
1927
|
+
"document.documentElement.innerHTML.length"
|
|
1928
|
+
);
|
|
1929
|
+
if (length >= minLength) return;
|
|
1930
|
+
await sleep2(200);
|
|
1931
|
+
}
|
|
1932
|
+
}
|
|
1933
|
+
/**
|
|
1934
|
+
* Wait for network idle + DOM stability (cross-platform).
|
|
1935
|
+
*
|
|
1936
|
+
* Uses the Performance/Resource Timing API and MutationObserver
|
|
1937
|
+
* (works in both Chrome/CDP and Firefox/BiDi) to detect when:
|
|
1938
|
+
* 1. document.readyState === 'complete'
|
|
1939
|
+
* 2. No new network resources loading (PerformanceObserver)
|
|
1940
|
+
* 3. DOM mutations have settled
|
|
1941
|
+
*
|
|
1942
|
+
* This is more comprehensive than waitForReady() — it also
|
|
1943
|
+
* catches lazy-loaded images, XHR/fetch requests, and script-injected content.
|
|
1944
|
+
*/
|
|
1945
|
+
async waitForNetworkIdle(timeoutMs = 8e3) {
|
|
1946
|
+
const start = Date.now();
|
|
1947
|
+
const pollInterval = 250;
|
|
1948
|
+
while (Date.now() - start < timeoutMs) {
|
|
1949
|
+
const state = await this.adapter.evaluate("document.readyState");
|
|
1950
|
+
if (state === "complete") break;
|
|
1951
|
+
await sleep2(pollInterval);
|
|
1952
|
+
}
|
|
1953
|
+
const idleMs = 400;
|
|
1954
|
+
const remaining = Math.max(1e3, timeoutMs - (Date.now() - start));
|
|
1955
|
+
try {
|
|
1956
|
+
await this.adapter.evaluate(`
|
|
1957
|
+
new Promise((resolve) => {
|
|
1958
|
+
let lastActivity = Date.now();
|
|
1959
|
+
const idleThreshold = ${idleMs};
|
|
1960
|
+
const deadline = Date.now() + ${remaining};
|
|
1961
|
+
|
|
1962
|
+
// Track resource loads
|
|
1963
|
+
const perfObs = new PerformanceObserver(() => { lastActivity = Date.now(); });
|
|
1964
|
+
try { perfObs.observe({ entryTypes: ['resource'] }); } catch(e) {}
|
|
1965
|
+
|
|
1966
|
+
// Track DOM mutations
|
|
1967
|
+
const mutObs = new MutationObserver(() => { lastActivity = Date.now(); });
|
|
1968
|
+
mutObs.observe(document.documentElement, {
|
|
1969
|
+
childList: true, subtree: true, attributes: true
|
|
1970
|
+
});
|
|
1971
|
+
|
|
1972
|
+
const check = () => {
|
|
1973
|
+
const now = Date.now();
|
|
1974
|
+
if (now >= deadline || (now - lastActivity >= idleThreshold)) {
|
|
1975
|
+
perfObs.disconnect();
|
|
1976
|
+
mutObs.disconnect();
|
|
1977
|
+
resolve(true);
|
|
1978
|
+
return;
|
|
1979
|
+
}
|
|
1980
|
+
setTimeout(check, 100);
|
|
1981
|
+
};
|
|
1982
|
+
setTimeout(check, idleThreshold);
|
|
1983
|
+
})
|
|
1984
|
+
`);
|
|
1985
|
+
} catch {
|
|
1986
|
+
await sleep2(500);
|
|
1987
|
+
}
|
|
1988
|
+
}
|
|
1989
|
+
// -------------------------------------------------------------------
|
|
1990
|
+
// Viewport
|
|
1991
|
+
// -------------------------------------------------------------------
|
|
1992
|
+
/** Set the viewport dimensions. */
|
|
1993
|
+
async setViewport(width, height, deviceScaleFactor = 2, mobile = false) {
|
|
1994
|
+
await this.adapter.setViewport(width, height, deviceScaleFactor, mobile);
|
|
1995
|
+
}
|
|
1996
|
+
// -------------------------------------------------------------------
|
|
1997
|
+
// DOM Queries
|
|
1998
|
+
// -------------------------------------------------------------------
|
|
1999
|
+
/** Query a single element and return its outer HTML. */
|
|
2000
|
+
async querySelector(selector) {
|
|
2001
|
+
return await this.adapter.evaluate(
|
|
2002
|
+
`document.querySelector(${JSON.stringify(selector)})?.outerHTML ?? null`
|
|
2003
|
+
);
|
|
2004
|
+
}
|
|
2005
|
+
/** Query all matching elements and return their outer HTML. */
|
|
2006
|
+
async querySelectorAll(selector) {
|
|
2007
|
+
return await this.adapter.evaluate(`
|
|
2008
|
+
Array.from(document.querySelectorAll(${JSON.stringify(selector)})).map(el => el.outerHTML)
|
|
2009
|
+
`);
|
|
2010
|
+
}
|
|
2011
|
+
/** Get text content of an element. */
|
|
2012
|
+
async textContent(selector) {
|
|
2013
|
+
return await this.adapter.evaluate(
|
|
2014
|
+
`document.querySelector(${JSON.stringify(selector)})?.textContent ?? null`
|
|
2015
|
+
);
|
|
2016
|
+
}
|
|
2017
|
+
// -------------------------------------------------------------------
|
|
2018
|
+
// Internals
|
|
2019
|
+
// -------------------------------------------------------------------
|
|
2020
|
+
/** Get the center coordinates of a DOM element (scrolls into view first). */
|
|
2021
|
+
async getElementCenter(selector) {
|
|
2022
|
+
const result = await this.adapter.evaluate(`
|
|
2023
|
+
(function() {
|
|
2024
|
+
const el = document.querySelector(${JSON.stringify(selector)});
|
|
2025
|
+
if (!el) return null;
|
|
2026
|
+
el.scrollIntoView({ block: 'center', behavior: 'instant' });
|
|
2027
|
+
const r = el.getBoundingClientRect();
|
|
2028
|
+
return { x: r.x + r.width / 2, y: r.y + r.height / 2 };
|
|
2029
|
+
})()
|
|
2030
|
+
`);
|
|
2031
|
+
if (!result) {
|
|
2032
|
+
throw new Error(`Element not found: ${selector}`);
|
|
2033
|
+
}
|
|
2034
|
+
return result;
|
|
2035
|
+
}
|
|
2036
|
+
/** @internal Replace the adapter (used during browser switching). */
|
|
2037
|
+
_setAdapter(adapter) {
|
|
2038
|
+
this.adapter = adapter;
|
|
2039
|
+
}
|
|
2040
|
+
/**
|
|
2041
|
+
* Detect challenge interstitials that may auto-resolve (e.g. Cloudflare "Just a moment...").
|
|
2042
|
+
* These pages show briefly before redirecting to the real content.
|
|
2043
|
+
*/
|
|
2044
|
+
isInterstitialContent(html) {
|
|
2045
|
+
if (html.length > 15e3) return false;
|
|
2046
|
+
const lower = html.toLowerCase();
|
|
2047
|
+
return lower.includes("just a moment") || lower.includes("checking your browser") || lower.includes("please wait while we verify") || lower.includes("ddos-guard") || lower.includes("challenge-platform");
|
|
2048
|
+
}
|
|
2049
|
+
/**
|
|
2050
|
+
* Detect site-level rate limiting in page content.
|
|
2051
|
+
* Browser rotation gives a new profile which bypasses per-session rate limits.
|
|
2052
|
+
*/
|
|
2053
|
+
isRateLimitContent(html) {
|
|
2054
|
+
if (html.length > 2e4) return false;
|
|
2055
|
+
const lower = html.toLowerCase();
|
|
2056
|
+
return lower.includes("rate limit exceeded") || lower.includes("too many requests") || lower.includes("rate limit") && lower.includes("please try again");
|
|
2057
|
+
}
|
|
2058
|
+
};
|
|
2059
|
+
function sleep2(ms) {
|
|
2060
|
+
return new Promise((r) => setTimeout(r, ms));
|
|
2061
|
+
}
|
|
2062
|
+
|
|
2063
|
+
// retry/failure-tracker.ts
|
|
2064
|
+
var FAILURE_TTL_MS = 10 * 60 * 1e3;
|
|
2065
|
+
var ROTATE_AFTER_FAILURES = 2;
|
|
2066
|
+
var FailureTracker = class {
|
|
2067
|
+
failures = /* @__PURE__ */ new Map();
|
|
2068
|
+
key(domain, browser) {
|
|
2069
|
+
return `${domain}::${browser}`;
|
|
2070
|
+
}
|
|
2071
|
+
/** Record a failure for a domain + browser. */
|
|
2072
|
+
recordFailure(domain, browser) {
|
|
2073
|
+
const k = this.key(domain, browser);
|
|
2074
|
+
const existing = this.failures.get(k);
|
|
2075
|
+
if (existing) {
|
|
2076
|
+
existing.count++;
|
|
2077
|
+
existing.lastFailure = Date.now();
|
|
2078
|
+
} else {
|
|
2079
|
+
this.failures.set(k, { count: 1, lastFailure: Date.now() });
|
|
2080
|
+
}
|
|
2081
|
+
}
|
|
2082
|
+
/** Record a success — clears the failure counter. */
|
|
2083
|
+
recordSuccess(domain, browser) {
|
|
2084
|
+
this.failures.delete(this.key(domain, browser));
|
|
2085
|
+
}
|
|
2086
|
+
/** Get failure count (0 if expired or not found). */
|
|
2087
|
+
failureCount(domain, browser) {
|
|
2088
|
+
const record = this.failures.get(this.key(domain, browser));
|
|
2089
|
+
if (!record) return 0;
|
|
2090
|
+
if (Date.now() - record.lastFailure > FAILURE_TTL_MS) {
|
|
2091
|
+
this.failures.delete(this.key(domain, browser));
|
|
2092
|
+
return 0;
|
|
2093
|
+
}
|
|
2094
|
+
return record.count;
|
|
2095
|
+
}
|
|
2096
|
+
/** Get total failures across all browsers for a domain. */
|
|
2097
|
+
totalFailureCount(domain) {
|
|
2098
|
+
let total = 0;
|
|
2099
|
+
for (const [key, record] of this.failures) {
|
|
2100
|
+
if (key.startsWith(`${domain}::`)) {
|
|
2101
|
+
if (Date.now() - record.lastFailure < FAILURE_TTL_MS) {
|
|
2102
|
+
total += record.count;
|
|
2103
|
+
}
|
|
2104
|
+
}
|
|
2105
|
+
}
|
|
2106
|
+
return total;
|
|
2107
|
+
}
|
|
2108
|
+
/** Clear all failure records for a domain (used on stealth escalation). */
|
|
2109
|
+
clear(domain) {
|
|
2110
|
+
for (const key of this.failures.keys()) {
|
|
2111
|
+
if (key.startsWith(`${domain}::`)) {
|
|
2112
|
+
this.failures.delete(key);
|
|
2113
|
+
}
|
|
2114
|
+
}
|
|
2115
|
+
}
|
|
2116
|
+
/** Clean expired entries. */
|
|
2117
|
+
cleanup() {
|
|
2118
|
+
const now = Date.now();
|
|
2119
|
+
for (const [key, record] of this.failures) {
|
|
2120
|
+
if (now - record.lastFailure > FAILURE_TTL_MS) {
|
|
2121
|
+
this.failures.delete(key);
|
|
2122
|
+
}
|
|
2123
|
+
}
|
|
2124
|
+
}
|
|
2125
|
+
};
|
|
2126
|
+
|
|
2127
|
+
// retry/browser-selector.ts
|
|
2128
|
+
var PRIMARY_ROTATION = [
|
|
2129
|
+
"chrome",
|
|
2130
|
+
"chrome-new"
|
|
2131
|
+
];
|
|
2132
|
+
var EXTENDED_ROTATION = [];
|
|
2133
|
+
var BROWSER_ROTATION = [
|
|
2134
|
+
...PRIMARY_ROTATION,
|
|
2135
|
+
...EXTENDED_ROTATION
|
|
2136
|
+
];
|
|
2137
|
+
var BrowserSelector = class {
|
|
2138
|
+
tracker;
|
|
2139
|
+
rotationIndex = 0;
|
|
2140
|
+
constructor(tracker) {
|
|
2141
|
+
this.tracker = tracker;
|
|
2142
|
+
}
|
|
2143
|
+
/** Get the current failure tracker. */
|
|
2144
|
+
get failureTracker() {
|
|
2145
|
+
return this.tracker;
|
|
2146
|
+
}
|
|
2147
|
+
/**
|
|
2148
|
+
* Check if the current browser should be rotated for a domain.
|
|
2149
|
+
*/
|
|
2150
|
+
shouldRotate(domain, currentBrowser) {
|
|
2151
|
+
return this.tracker.failureCount(domain, currentBrowser) >= ROTATE_AFTER_FAILURES;
|
|
2152
|
+
}
|
|
2153
|
+
/**
|
|
2154
|
+
* Pick the next browser to try, given the current one has failed.
|
|
2155
|
+
* Returns the next browser in rotation that hasn't exceeded the failure threshold.
|
|
2156
|
+
* Returns null if all browsers have been exhausted.
|
|
2157
|
+
*/
|
|
2158
|
+
nextBrowser(domain, currentBrowser) {
|
|
2159
|
+
const currentIdx = BROWSER_ROTATION.indexOf(currentBrowser);
|
|
2160
|
+
for (let offset = 1; offset < BROWSER_ROTATION.length; offset++) {
|
|
2161
|
+
const idx = (currentIdx + offset) % BROWSER_ROTATION.length;
|
|
2162
|
+
const candidate = BROWSER_ROTATION[idx];
|
|
2163
|
+
if (this.tracker.failureCount(domain, candidate) < ROTATE_AFTER_FAILURES) {
|
|
2164
|
+
return candidate;
|
|
2165
|
+
}
|
|
2166
|
+
}
|
|
2167
|
+
return null;
|
|
2168
|
+
}
|
|
2169
|
+
/**
|
|
2170
|
+
* Choose the best browser for a domain (mirrors hints.rs choose_browser_for_domain).
|
|
2171
|
+
* Uses failure history to skip browsers that have been failing.
|
|
2172
|
+
*/
|
|
2173
|
+
chooseBrowser(domain, fallback) {
|
|
2174
|
+
for (const browser of BROWSER_ROTATION) {
|
|
2175
|
+
if (this.tracker.failureCount(domain, browser) < ROTATE_AFTER_FAILURES) {
|
|
2176
|
+
return browser;
|
|
2177
|
+
}
|
|
2178
|
+
}
|
|
2179
|
+
return fallback;
|
|
2180
|
+
}
|
|
2181
|
+
};
|
|
2182
|
+
|
|
2183
|
+
// retry/retry-engine.ts
|
|
2184
|
+
init_errors();
|
|
2185
|
+
|
|
2186
|
+
// retry/keyword-classifier.ts
|
|
2187
|
+
var KeywordClassifier = class {
|
|
2188
|
+
root;
|
|
2189
|
+
constructor(rules) {
|
|
2190
|
+
this.root = { children: /* @__PURE__ */ new Map(), output: void 0, fail: void 0, dict: void 0 };
|
|
2191
|
+
for (const [keywords, cls] of rules) {
|
|
2192
|
+
for (const kw of keywords) {
|
|
2193
|
+
this.insert(kw.toLowerCase(), cls);
|
|
2194
|
+
}
|
|
2195
|
+
}
|
|
2196
|
+
this.buildFailureLinks();
|
|
2197
|
+
}
|
|
2198
|
+
/**
|
|
2199
|
+
* Classify a string by scanning it once for all keywords.
|
|
2200
|
+
* Returns the classification of the highest-priority matching keyword, or undefined.
|
|
2201
|
+
*/
|
|
2202
|
+
classify(text) {
|
|
2203
|
+
let node = this.root;
|
|
2204
|
+
for (let i = 0; i < text.length; i++) {
|
|
2205
|
+
let ch = text.charCodeAt(i);
|
|
2206
|
+
if (ch >= 65 && ch <= 90) ch += 32;
|
|
2207
|
+
while (node !== this.root && !node.children.has(ch)) {
|
|
2208
|
+
node = node.fail;
|
|
2209
|
+
}
|
|
2210
|
+
node = node.children.get(ch) ?? this.root;
|
|
2211
|
+
if (node.output !== void 0) return node.output;
|
|
2212
|
+
if (node.dict !== void 0 && node.dict.output !== void 0) {
|
|
2213
|
+
return node.dict.output;
|
|
2214
|
+
}
|
|
2215
|
+
}
|
|
2216
|
+
return void 0;
|
|
2217
|
+
}
|
|
2218
|
+
insert(word, cls) {
|
|
2219
|
+
let node = this.root;
|
|
2220
|
+
for (let i = 0; i < word.length; i++) {
|
|
2221
|
+
const ch = word.charCodeAt(i);
|
|
2222
|
+
let child = node.children.get(ch);
|
|
2223
|
+
if (!child) {
|
|
2224
|
+
child = { children: /* @__PURE__ */ new Map(), output: void 0, fail: void 0, dict: void 0 };
|
|
2225
|
+
node.children.set(ch, child);
|
|
2226
|
+
}
|
|
2227
|
+
node = child;
|
|
2228
|
+
}
|
|
2229
|
+
if (node.output === void 0) {
|
|
2230
|
+
node.output = cls;
|
|
2231
|
+
}
|
|
2232
|
+
}
|
|
2233
|
+
buildFailureLinks() {
|
|
2234
|
+
const queue = [];
|
|
2235
|
+
for (const child of this.root.children.values()) {
|
|
2236
|
+
child.fail = this.root;
|
|
2237
|
+
child.dict = this.root;
|
|
2238
|
+
queue.push(child);
|
|
2239
|
+
}
|
|
2240
|
+
let head = 0;
|
|
2241
|
+
while (head < queue.length) {
|
|
2242
|
+
const node = queue[head++];
|
|
2243
|
+
for (const [ch, child] of node.children) {
|
|
2244
|
+
let fail = node.fail;
|
|
2245
|
+
while (fail !== this.root && !fail.children.has(ch)) {
|
|
2246
|
+
fail = fail.fail;
|
|
2247
|
+
}
|
|
2248
|
+
child.fail = fail.children.get(ch) ?? this.root;
|
|
2249
|
+
if (child.fail === child) child.fail = this.root;
|
|
2250
|
+
child.dict = child.fail.output !== void 0 ? child.fail : child.fail.dict ?? void 0;
|
|
2251
|
+
queue.push(child);
|
|
2252
|
+
}
|
|
2253
|
+
}
|
|
2254
|
+
}
|
|
2255
|
+
};
|
|
2256
|
+
|
|
2257
|
+
// retry/retry-engine.ts
|
|
2258
|
+
var errorClassifier = new KeywordClassifier([
|
|
2259
|
+
// Blocked — checked first (most common heuristic case)
|
|
2260
|
+
// NOTE: err_aborted is NOT here — it's handled specially in classifyError().
|
|
2261
|
+
// Server already retries ERR_ABORTED internally; when it reaches the client,
|
|
2262
|
+
// it's usually session interference (shared Chrome) not actual blocking.
|
|
2263
|
+
// Reconnecting (new session) is the right fix, not stealth escalation.
|
|
2264
|
+
[[
|
|
2265
|
+
"bot detect",
|
|
2266
|
+
"are you a robot",
|
|
2267
|
+
"blocked",
|
|
2268
|
+
"403",
|
|
2269
|
+
"captcha",
|
|
2270
|
+
"network security",
|
|
2271
|
+
"human verification",
|
|
2272
|
+
"verify you are human",
|
|
2273
|
+
"checking your browser",
|
|
2274
|
+
"bot protection",
|
|
2275
|
+
"automated access",
|
|
2276
|
+
"pardon our interruption",
|
|
2277
|
+
"powered and protected by",
|
|
2278
|
+
"request could not be processed",
|
|
2279
|
+
"access to this page has been denied",
|
|
2280
|
+
"access denied",
|
|
2281
|
+
"please complete the security check",
|
|
2282
|
+
"enable cookies",
|
|
2283
|
+
"browser check",
|
|
2284
|
+
"just a moment",
|
|
2285
|
+
"rate limit exceeded",
|
|
2286
|
+
"too many requests",
|
|
2287
|
+
"err_blocked_by_client"
|
|
2288
|
+
], "blocked"],
|
|
2289
|
+
// Auth
|
|
2290
|
+
[["401", "402", "unauthorized"], "auth"],
|
|
2291
|
+
// Backend down
|
|
2292
|
+
[["backend unavailable", "no backend", "service unavailable", "503", "failed to create page target", "unexpected server response"], "backend_down"],
|
|
2293
|
+
// Transient (connection)
|
|
2294
|
+
[["err_connection_reset", "err_connection_closed", "err_empty_response", "err_ssl_protocol_error", "err_ssl_version_or_cipher_mismatch", "err_cert", "timeout"], "transient"],
|
|
2295
|
+
// Transient (WebSocket / session)
|
|
2296
|
+
[["websocket is not connected", "websocket closed", "session with given id not found", "content contamination", "insufficient content"], "transient"]
|
|
2297
|
+
]);
|
|
2298
|
+
var disconnectionClassifier = new KeywordClassifier([
|
|
2299
|
+
// NOT disconnections (page-level) — checked first
|
|
2300
|
+
// NOTE: err_aborted is intentionally NOT here — at high concurrency it's
|
|
2301
|
+
// usually session interference (shared Chrome process), so reconnecting
|
|
2302
|
+
// (new session) is the correct fix. For NavigationError, undefined → true.
|
|
2303
|
+
[["err_blocked_by_client"], false],
|
|
2304
|
+
// Actual disconnections (socket hang up = server killed the connection)
|
|
2305
|
+
// Content contamination = shared Chrome leaked another session's content → new session fixes it
|
|
2306
|
+
[[
|
|
2307
|
+
"websocket is not connected",
|
|
2308
|
+
"websocket closed",
|
|
2309
|
+
"session destroyed",
|
|
2310
|
+
"session with given id not found",
|
|
2311
|
+
"err_connection_reset",
|
|
2312
|
+
"err_connection_closed",
|
|
2313
|
+
"err_empty_response",
|
|
2314
|
+
"socket hang up",
|
|
2315
|
+
"err_aborted",
|
|
2316
|
+
"content contamination",
|
|
2317
|
+
"insufficient content",
|
|
2318
|
+
"err_ssl_protocol_error",
|
|
2319
|
+
"err_ssl_version_or_cipher_mismatch"
|
|
2320
|
+
], true]
|
|
2321
|
+
]);
|
|
2322
|
+
var RetryEngine = class {
|
|
2323
|
+
opts;
|
|
2324
|
+
selector;
|
|
2325
|
+
currentStealthLevel;
|
|
2326
|
+
maxStealthLevel;
|
|
2327
|
+
retryTimeoutMs;
|
|
2328
|
+
commandTimeoutMs;
|
|
2329
|
+
/** Browser backends that returned 503/unavailable — persists across stealth levels. */
|
|
2330
|
+
downBackends = /* @__PURE__ */ new Set();
|
|
2331
|
+
constructor(opts) {
|
|
2332
|
+
this.opts = opts;
|
|
2333
|
+
this.selector = new BrowserSelector(new FailureTracker());
|
|
2334
|
+
this.currentStealthLevel = opts.transportOpts.stealthLevel ?? 0;
|
|
2335
|
+
this.maxStealthLevel = opts.maxStealthLevel ?? 3;
|
|
2336
|
+
this.retryTimeoutMs = opts.retryTimeoutMs ?? 15e3;
|
|
2337
|
+
this.commandTimeoutMs = opts.commandTimeoutMs ?? 3e4;
|
|
2338
|
+
}
|
|
2339
|
+
/** Current stealth level (0=auto, 1-3=explicit tiers). */
|
|
2340
|
+
get stealthLevel() {
|
|
2341
|
+
return this.currentStealthLevel;
|
|
2342
|
+
}
|
|
2343
|
+
/**
|
|
2344
|
+
* Execute an action with stealth-first retry across browsers and stealth levels.
|
|
2345
|
+
*
|
|
2346
|
+
* Phase 1: Escalate stealth across primary browsers (chrome, chrome-new).
|
|
2347
|
+
* Blocked errors skip remaining primary browsers and immediately escalate stealth.
|
|
2348
|
+
*
|
|
2349
|
+
* Phase 2: At max stealth only, try extended browsers (firefox, lightpanda, servo)
|
|
2350
|
+
* for a different engine fingerprint with the best proxy quality.
|
|
2351
|
+
*/
|
|
2352
|
+
async execute(fn, ctx) {
|
|
2353
|
+
let lastError;
|
|
2354
|
+
let totalAttempts = 0;
|
|
2355
|
+
const budget = this.opts.maxRetries + 1;
|
|
2356
|
+
this.downBackends.clear();
|
|
2357
|
+
const stealthLevels = this.getStealthProgression();
|
|
2358
|
+
const initialBrowser = ctx.transport.browser;
|
|
2359
|
+
let consecutiveDisconnects = 0;
|
|
2360
|
+
let wasBlocked = false;
|
|
2361
|
+
for (let si = 0; si < stealthLevels.length; si++) {
|
|
2362
|
+
if (totalAttempts >= budget) break;
|
|
2363
|
+
const stealth = stealthLevels[si];
|
|
2364
|
+
if (si > 0) {
|
|
2365
|
+
const prev = stealthLevels[si - 1];
|
|
2366
|
+
this.currentStealthLevel = stealth;
|
|
2367
|
+
ctx.transport.stealthLevel = stealth;
|
|
2368
|
+
logger.info(`retry: escalating stealth ${prev} -> ${stealth}`);
|
|
2369
|
+
this.opts.emitter.emit("stealth.escalated", {
|
|
2370
|
+
from: prev,
|
|
2371
|
+
to: stealth,
|
|
2372
|
+
reason: lastError ? this.classifyError(lastError) : "exhausted"
|
|
2373
|
+
});
|
|
2374
|
+
const domain = this.extractDomain(ctx.currentUrl);
|
|
2375
|
+
if (domain) this.selector.failureTracker.clear(domain);
|
|
2376
|
+
}
|
|
2377
|
+
const primaryBrowsers = si === 0 ? this.orderedPrimaryBrowsers(initialBrowser) : [...PRIMARY_ROTATION];
|
|
2378
|
+
let triedAny = false;
|
|
2379
|
+
for (const browser of primaryBrowsers) {
|
|
2380
|
+
if (totalAttempts >= budget) break;
|
|
2381
|
+
if (this.downBackends.has(browser)) continue;
|
|
2382
|
+
if (consecutiveDisconnects >= 6) {
|
|
2383
|
+
logger.warn("retry: 6+ consecutive disconnects, server overloaded \u2014 aborting");
|
|
2384
|
+
break;
|
|
2385
|
+
}
|
|
2386
|
+
const result = await this.tryBrowser(fn, ctx, browser, stealth, totalAttempts, budget, true);
|
|
2387
|
+
totalAttempts = result.totalAttempts;
|
|
2388
|
+
if (result.success) {
|
|
2389
|
+
consecutiveDisconnects = 0;
|
|
2390
|
+
return result.value;
|
|
2391
|
+
}
|
|
2392
|
+
if (result.triedAction) triedAny = true;
|
|
2393
|
+
if (result.lastError) {
|
|
2394
|
+
lastError = result.lastError;
|
|
2395
|
+
const errorClass = this.classifyError(lastError);
|
|
2396
|
+
wasBlocked = errorClass === "blocked";
|
|
2397
|
+
if (errorClass === "auth") throw lastError;
|
|
2398
|
+
if (this.isDisconnectionError(lastError)) {
|
|
2399
|
+
consecutiveDisconnects++;
|
|
2400
|
+
} else {
|
|
2401
|
+
consecutiveDisconnects = 0;
|
|
2402
|
+
}
|
|
2403
|
+
if (wasBlocked) break;
|
|
2404
|
+
}
|
|
2405
|
+
}
|
|
2406
|
+
if (!triedAny) {
|
|
2407
|
+
logger.warn("retry: all browser backends unavailable, stopping");
|
|
2408
|
+
break;
|
|
2409
|
+
}
|
|
2410
|
+
}
|
|
2411
|
+
if (wasBlocked && totalAttempts < budget) {
|
|
2412
|
+
for (const browser of EXTENDED_ROTATION) {
|
|
2413
|
+
if (totalAttempts >= budget) break;
|
|
2414
|
+
if (this.downBackends.has(browser)) continue;
|
|
2415
|
+
const maxStealth = stealthLevels[stealthLevels.length - 1] ?? this.maxStealthLevel;
|
|
2416
|
+
const result = await this.tryBrowser(fn, ctx, browser, maxStealth, totalAttempts, budget, false);
|
|
2417
|
+
totalAttempts = result.totalAttempts;
|
|
2418
|
+
if (result.success) return result.value;
|
|
2419
|
+
if (result.lastError) {
|
|
2420
|
+
lastError = result.lastError;
|
|
2421
|
+
if (this.classifyError(lastError) === "auth") throw lastError;
|
|
2422
|
+
}
|
|
2423
|
+
}
|
|
2424
|
+
}
|
|
2425
|
+
throw lastError ?? new Error("All browsers and stealth levels exhausted");
|
|
2426
|
+
}
|
|
2427
|
+
/**
|
|
2428
|
+
* Attempt an action on a specific browser, with optional transient retries.
|
|
2429
|
+
*
|
|
2430
|
+
* @param allowTransientRetries If true, retries up to 2x for transient errors
|
|
2431
|
+
* (reconnect + retry). If false, a single attempt only (for extended browsers).
|
|
2432
|
+
*/
|
|
2433
|
+
async tryBrowser(fn, ctx, browser, stealth, totalAttempts, budget, allowTransientRetries) {
|
|
2434
|
+
let lastError;
|
|
2435
|
+
if (totalAttempts > 0) {
|
|
2436
|
+
try {
|
|
2437
|
+
const prevBrowser = ctx.transport.browser;
|
|
2438
|
+
logger.info(`retry: switching ${prevBrowser} -> ${browser} (stealth=${stealth})`);
|
|
2439
|
+
this.opts.emitter.emit("browser.switching", {
|
|
2440
|
+
from: prevBrowser,
|
|
2441
|
+
to: browser,
|
|
2442
|
+
reason: lastError ? this.classifyError(lastError) : "rotation"
|
|
2443
|
+
});
|
|
2444
|
+
const prevTimeout = this.opts.transportOpts.connectTimeoutMs;
|
|
2445
|
+
this.opts.transportOpts.connectTimeoutMs = this.retryTimeoutMs;
|
|
2446
|
+
await this.switchBrowser(ctx, browser);
|
|
2447
|
+
this.opts.transportOpts.connectTimeoutMs = prevTimeout;
|
|
2448
|
+
this.opts.emitter.emit("browser.switched", { browser });
|
|
2449
|
+
} catch (switchErr) {
|
|
2450
|
+
logger.warn(`retry: switch to ${browser} failed, skipping`, {
|
|
2451
|
+
error: switchErr instanceof Error ? switchErr.message : String(switchErr)
|
|
2452
|
+
});
|
|
2453
|
+
if (switchErr instanceof BackendUnavailableError) {
|
|
2454
|
+
this.downBackends.add(browser);
|
|
2455
|
+
}
|
|
2456
|
+
return { success: false, totalAttempts, triedAction: false, lastError: switchErr instanceof Error ? switchErr : void 0 };
|
|
2457
|
+
}
|
|
2458
|
+
}
|
|
2459
|
+
const MAX_TRANSIENT_RETRIES = allowTransientRetries ? 2 : 0;
|
|
2460
|
+
const MAX_DISCONNECT_RETRIES = allowTransientRetries ? 2 : 0;
|
|
2461
|
+
let transientRetries = 0;
|
|
2462
|
+
let disconnectRetries = 0;
|
|
2463
|
+
while (totalAttempts < budget) {
|
|
2464
|
+
totalAttempts++;
|
|
2465
|
+
try {
|
|
2466
|
+
const result = await fn();
|
|
2467
|
+
const domain = this.extractDomain(ctx.currentUrl);
|
|
2468
|
+
if (domain) this.selector.failureTracker.recordSuccess(domain, browser);
|
|
2469
|
+
return { success: true, value: result, totalAttempts, triedAction: true };
|
|
2470
|
+
} catch (err) {
|
|
2471
|
+
lastError = err instanceof Error ? err : new Error(String(err));
|
|
2472
|
+
const errorClass = this.classifyError(lastError);
|
|
2473
|
+
logger.warn(`retry: attempt ${totalAttempts}/${budget} failed`, {
|
|
2474
|
+
error: lastError.message,
|
|
2475
|
+
class: errorClass,
|
|
2476
|
+
browser,
|
|
2477
|
+
stealth
|
|
2478
|
+
});
|
|
2479
|
+
this.opts.emitter.emit("retry.attempt", {
|
|
2480
|
+
attempt: totalAttempts,
|
|
2481
|
+
maxRetries: this.opts.maxRetries,
|
|
2482
|
+
error: lastError.message
|
|
2483
|
+
});
|
|
2484
|
+
if (errorClass === "auth") {
|
|
2485
|
+
return { success: false, totalAttempts, triedAction: true, lastError };
|
|
2486
|
+
}
|
|
2487
|
+
if (errorClass === "rate_limit") {
|
|
2488
|
+
const waitMs = lastError instanceof RateLimitError && lastError.retryAfterMs ? lastError.retryAfterMs : 2e3;
|
|
2489
|
+
await sleep3(waitMs);
|
|
2490
|
+
continue;
|
|
2491
|
+
}
|
|
2492
|
+
if (errorClass === "backend_down") {
|
|
2493
|
+
this.downBackends.add(browser);
|
|
2494
|
+
return { success: false, totalAttempts, triedAction: true, lastError };
|
|
2495
|
+
}
|
|
2496
|
+
if (errorClass === "blocked") {
|
|
2497
|
+
const domain2 = this.extractDomain(ctx.currentUrl);
|
|
2498
|
+
if (domain2) this.selector.failureTracker.recordFailure(domain2, browser);
|
|
2499
|
+
return { success: false, totalAttempts, triedAction: true, lastError };
|
|
2500
|
+
}
|
|
2501
|
+
if (errorClass === "transient" && this.isDisconnectionError(lastError)) {
|
|
2502
|
+
if (disconnectRetries < MAX_DISCONNECT_RETRIES) {
|
|
2503
|
+
disconnectRetries++;
|
|
2504
|
+
await sleep3(500);
|
|
2505
|
+
try {
|
|
2506
|
+
await this.switchBrowser(ctx, browser);
|
|
2507
|
+
} catch {
|
|
2508
|
+
const domain3 = this.extractDomain(ctx.currentUrl);
|
|
2509
|
+
if (domain3) this.selector.failureTracker.recordFailure(domain3, browser);
|
|
2510
|
+
return { success: false, totalAttempts, triedAction: true, lastError };
|
|
2511
|
+
}
|
|
2512
|
+
continue;
|
|
2513
|
+
}
|
|
2514
|
+
const domain2 = this.extractDomain(ctx.currentUrl);
|
|
2515
|
+
if (domain2) this.selector.failureTracker.recordFailure(domain2, browser);
|
|
2516
|
+
return { success: false, totalAttempts, triedAction: true, lastError };
|
|
2517
|
+
}
|
|
2518
|
+
if (errorClass === "transient" && transientRetries < MAX_TRANSIENT_RETRIES) {
|
|
2519
|
+
transientRetries++;
|
|
2520
|
+
await sleep3(100);
|
|
2521
|
+
continue;
|
|
2522
|
+
}
|
|
2523
|
+
const domain = this.extractDomain(ctx.currentUrl);
|
|
2524
|
+
if (domain) this.selector.failureTracker.recordFailure(domain, browser);
|
|
2525
|
+
return { success: false, totalAttempts, triedAction: true, lastError };
|
|
2526
|
+
}
|
|
2527
|
+
}
|
|
2528
|
+
return { success: false, totalAttempts, triedAction: true, lastError };
|
|
2529
|
+
}
|
|
2530
|
+
/** Check if an error indicates the WebSocket/session is dead and needs reconnection. */
|
|
2531
|
+
isDisconnectionError(err) {
|
|
2532
|
+
if (err instanceof NavigationError) {
|
|
2533
|
+
const result = disconnectionClassifier.classify(err.message);
|
|
2534
|
+
return result !== false;
|
|
2535
|
+
}
|
|
2536
|
+
return disconnectionClassifier.classify(err.message) === true;
|
|
2537
|
+
}
|
|
2538
|
+
/**
|
|
2539
|
+
* Classify an error to determine retry strategy.
|
|
2540
|
+
*
|
|
2541
|
+
* Fast path: typed error instances (instanceof check, no string scan).
|
|
2542
|
+
* Slow path: Aho-Corasick O(n) single-pass keyword matching on error message.
|
|
2543
|
+
*/
|
|
2544
|
+
classifyError(err) {
|
|
2545
|
+
if (err instanceof AuthError) return "auth";
|
|
2546
|
+
if (err instanceof RateLimitError) return "rate_limit";
|
|
2547
|
+
if (err instanceof BlockedError) return "blocked";
|
|
2548
|
+
if (err instanceof BackendUnavailableError) return "backend_down";
|
|
2549
|
+
if (err instanceof TimeoutError) return "transient";
|
|
2550
|
+
if (err instanceof ConnectionError) {
|
|
2551
|
+
const code = err.wsCode;
|
|
2552
|
+
if (code === 1006 || code === 1011) return "transient";
|
|
2553
|
+
if (code === 4001 || code === 4002) return "auth";
|
|
2554
|
+
return "transient";
|
|
2555
|
+
}
|
|
2556
|
+
if (err instanceof NavigationError) {
|
|
2557
|
+
const cls2 = errorClassifier.classify(err.message);
|
|
2558
|
+
return cls2 === "blocked" ? "blocked" : "transient";
|
|
2559
|
+
}
|
|
2560
|
+
const cls = errorClassifier.classify(err.message);
|
|
2561
|
+
if (cls) return cls;
|
|
2562
|
+
if (err.message.includes("429")) return "rate_limit";
|
|
2563
|
+
return "transient";
|
|
2564
|
+
}
|
|
2565
|
+
/** Reconnect with a (possibly different) browser, re-navigate to the same URL. */
|
|
2566
|
+
async switchBrowser(ctx, newBrowser) {
|
|
2567
|
+
ctx.adapter.destroy();
|
|
2568
|
+
await ctx.transport.reconnect(newBrowser);
|
|
2569
|
+
const adapterOpts = this.commandTimeoutMs !== 3e4 ? { commandTimeoutMs: this.commandTimeoutMs } : void 0;
|
|
2570
|
+
const newAdapter = new ProtocolAdapter(
|
|
2571
|
+
ctx.transport,
|
|
2572
|
+
this.opts.emitter,
|
|
2573
|
+
newBrowser,
|
|
2574
|
+
adapterOpts
|
|
2575
|
+
);
|
|
2576
|
+
await newAdapter.init();
|
|
2577
|
+
ctx.adapter = newAdapter;
|
|
2578
|
+
ctx.onAdapterChanged(newAdapter);
|
|
2579
|
+
if (ctx.currentUrl) {
|
|
2580
|
+
await newAdapter.navigate(ctx.currentUrl);
|
|
2581
|
+
await sleep3(200);
|
|
2582
|
+
}
|
|
2583
|
+
}
|
|
2584
|
+
/**
|
|
2585
|
+
* Get stealth progression: from current level up to maxStealthLevel.
|
|
2586
|
+
* e.g. start=0, max=3 → [0, 1, 2, 3]
|
|
2587
|
+
* e.g. start=2, max=3 → [2, 3]
|
|
2588
|
+
*/
|
|
2589
|
+
getStealthProgression() {
|
|
2590
|
+
const start = this.currentStealthLevel;
|
|
2591
|
+
const levels = [start];
|
|
2592
|
+
let next = start < 1 ? 1 : start + 1;
|
|
2593
|
+
while (next <= this.maxStealthLevel) {
|
|
2594
|
+
levels.push(next);
|
|
2595
|
+
next++;
|
|
2596
|
+
}
|
|
2597
|
+
return levels;
|
|
2598
|
+
}
|
|
2599
|
+
/**
|
|
2600
|
+
* Order PRIMARY browsers starting from `start`, then the rest in primary rotation order.
|
|
2601
|
+
* If `start` is not in PRIMARY_ROTATION, returns primary rotation as-is.
|
|
2602
|
+
*/
|
|
2603
|
+
orderedPrimaryBrowsers(start) {
|
|
2604
|
+
const idx = PRIMARY_ROTATION.indexOf(start);
|
|
2605
|
+
if (idx <= 0) return [...PRIMARY_ROTATION];
|
|
2606
|
+
return [
|
|
2607
|
+
...PRIMARY_ROTATION.slice(idx),
|
|
2608
|
+
...PRIMARY_ROTATION.slice(0, idx)
|
|
2609
|
+
];
|
|
2610
|
+
}
|
|
2611
|
+
extractDomain(url) {
|
|
2612
|
+
if (!url) return void 0;
|
|
2613
|
+
try {
|
|
2614
|
+
return new URL(url).hostname;
|
|
2615
|
+
} catch {
|
|
2616
|
+
return void 0;
|
|
2617
|
+
}
|
|
2618
|
+
}
|
|
2619
|
+
};
|
|
2620
|
+
function sleep3(ms) {
|
|
2621
|
+
return new Promise((r) => setTimeout(r, ms));
|
|
2622
|
+
}
|
|
2623
|
+
|
|
2624
|
+
// ai/llm-provider.ts
|
|
2625
|
+
function createProvider(config) {
|
|
2626
|
+
switch (config.provider) {
|
|
2627
|
+
case "openai":
|
|
2628
|
+
case "openrouter": {
|
|
2629
|
+
const { OpenAICompatibleProvider: OpenAICompatibleProvider2 } = (init_openai(), __toCommonJS(openai_exports));
|
|
2630
|
+
return new OpenAICompatibleProvider2(config);
|
|
2631
|
+
}
|
|
2632
|
+
case "anthropic": {
|
|
2633
|
+
const { AnthropicProvider: AnthropicProvider2 } = (init_anthropic(), __toCommonJS(anthropic_exports));
|
|
2634
|
+
return new AnthropicProvider2(config);
|
|
2635
|
+
}
|
|
2636
|
+
default:
|
|
2637
|
+
throw new Error(`Unknown LLM provider: ${config.provider}`);
|
|
2638
|
+
}
|
|
2639
|
+
}
|
|
2640
|
+
|
|
2641
|
+
// ai/prompts.ts
|
|
2642
|
+
var SYSTEM_PROMPT = `You are an expert web automation agent. You interact with any webpage to solve challenges, fill forms, navigate sites, extract data, and complete complex multi-step tasks.
|
|
2643
|
+
|
|
2644
|
+
## Input
|
|
2645
|
+
Each round you receive:
|
|
2646
|
+
- Screenshot of current page state
|
|
2647
|
+
- URL, title, HTML context
|
|
2648
|
+
- Round number and detected challenge types
|
|
2649
|
+
|
|
2650
|
+
## Output
|
|
2651
|
+
Return a single JSON object (no prose):
|
|
2652
|
+
{
|
|
2653
|
+
"label": "brief action description",
|
|
2654
|
+
"done": true|false,
|
|
2655
|
+
"steps": [...]
|
|
2656
|
+
}
|
|
2657
|
+
Set "done": true when the task is fully complete. Set "done": false to continue.
|
|
2658
|
+
|
|
2659
|
+
## Coordinate System
|
|
2660
|
+
**ClickPoint coordinates use CSS pixels** (same as getBoundingClientRect()).
|
|
2661
|
+
- Screenshot pixels = viewport x DPR. Divide screenshot coordinates by DPR for CSS pixels.
|
|
2662
|
+
- Example: viewport 1280x960 at DPR 2 = screenshot 2560x1920. Visual point (500,400) in screenshot = (250,200) CSS.
|
|
2663
|
+
|
|
2664
|
+
## Actions
|
|
2665
|
+
|
|
2666
|
+
### Click
|
|
2667
|
+
- { "Click": "selector" } - CSS selector click
|
|
2668
|
+
- { "ClickPoint": { "x": 100, "y": 200 } } - CSS pixel coordinates
|
|
2669
|
+
- { "ClickAll": "selector" } - Click all matches
|
|
2670
|
+
- { "DoubleClick": "selector" } / { "DoubleClickPoint": { "x": 0, "y": 0 } }
|
|
2671
|
+
- { "RightClick": "selector" } / { "RightClickPoint": { "x": 0, "y": 0 } }
|
|
2672
|
+
- { "ClickHold": { "selector": "sel", "hold_ms": 500 } } / { "ClickHoldPoint": { "x": 0, "y": 0, "hold_ms": 500 } }
|
|
2673
|
+
- { "WaitForAndClick": "selector" }
|
|
2674
|
+
|
|
2675
|
+
### Drag
|
|
2676
|
+
- { "ClickDrag": { "from": "sel1", "to": "sel2" } }
|
|
2677
|
+
- { "ClickDragPoint": { "from_x": 0, "from_y": 0, "to_x": 100, "to_y": 100 } }
|
|
2678
|
+
|
|
2679
|
+
### Type & Input
|
|
2680
|
+
- { "Fill": { "selector": "input", "value": "text" } } - Clear and type
|
|
2681
|
+
- { "Type": { "value": "text" } } - Type into focused element
|
|
2682
|
+
- { "Clear": "selector" } - Clear input
|
|
2683
|
+
- { "Press": "Enter" } - Press key (Enter, Tab, Escape, ArrowDown, Space, etc.)
|
|
2684
|
+
- { "KeyDown": "Shift" } / { "KeyUp": "Shift" }
|
|
2685
|
+
|
|
2686
|
+
### Select & Focus
|
|
2687
|
+
- { "Select": { "selector": "select", "value": "option" } }
|
|
2688
|
+
- { "Focus": "selector" } / { "Blur": "selector" }
|
|
2689
|
+
- { "Hover": "selector" } / { "HoverPoint": { "x": 0, "y": 0 } }
|
|
2690
|
+
|
|
2691
|
+
### Scroll
|
|
2692
|
+
- { "ScrollY": 300 } - Scroll down (negative = up)
|
|
2693
|
+
- { "ScrollX": 200 } - Scroll right (negative = left)
|
|
2694
|
+
- { "ScrollTo": { "selector": "element" } } - Scroll element into view
|
|
2695
|
+
- { "ScrollToPoint": { "x": 0, "y": 500 } }
|
|
2696
|
+
- { "InfiniteScroll": 5 } - Scroll to bottom repeatedly
|
|
2697
|
+
|
|
2698
|
+
### Wait
|
|
2699
|
+
- { "Wait": 1000 } - Wait milliseconds
|
|
2700
|
+
- { "WaitFor": "selector" } - Wait for element
|
|
2701
|
+
- { "WaitForWithTimeout": { "selector": "sel", "timeout": 5000 } }
|
|
2702
|
+
- { "WaitForNavigation": null } - Wait for page load
|
|
2703
|
+
- { "WaitForDom": { "selector": "sel", "timeout": 5000 } }
|
|
2704
|
+
|
|
2705
|
+
### Navigate
|
|
2706
|
+
- { "Navigate": "https://url" } - Go to URL
|
|
2707
|
+
- { "GoBack": null } / { "GoForward": null } / { "Reload": null }
|
|
2708
|
+
|
|
2709
|
+
### Viewport
|
|
2710
|
+
- { "SetViewport": { "width": 1920, "height": 1080, "device_scale_factor": 2.0 } } - Change viewport/DPR at runtime. Follow with { "Wait": 500 }.
|
|
2711
|
+
|
|
2712
|
+
### JavaScript
|
|
2713
|
+
- { "Evaluate": "javascript code" } - Execute JS on the page
|
|
2714
|
+
|
|
2715
|
+
**Evaluate notes:**
|
|
2716
|
+
- Return values are NOT sent back. To see results, inject into the page:
|
|
2717
|
+
- Title: document.title = JSON.stringify(data) (visible in PAGE TITLE next round)
|
|
2718
|
+
- DOM: inject a visible overlay div with the info (visible in screenshot)
|
|
2719
|
+
- **Do NOT use element.click() in Evaluate** - it does not trigger real browser events (mousedown/pointerdown). Always use real Click/ClickPoint actions for interactions.
|
|
2720
|
+
- **Always pair Evaluate with action steps** in the same round. Never submit a round with ONLY Evaluate.
|
|
2721
|
+
|
|
2722
|
+
## Core Strategy
|
|
2723
|
+
|
|
2724
|
+
1. **Be efficient**: Solve challenges in the fewest rounds possible. Combine Evaluate (read state) + action (click/fill) in the SAME round. Never spend a round only gathering data.
|
|
2725
|
+
2. **Batch operations**: When you need to click/select multiple elements, include multiple Click actions in a single step list rather than spreading across multiple rounds.
|
|
2726
|
+
3. **Evaluate = READ ONLY**: Use Evaluate to read DOM state, computed styles, coordinates. Set results in document.title. NEVER use el.click() inside Evaluate - it does NOT trigger real browser events. Use real Click/ClickPoint for all interactions.
|
|
2727
|
+
4. **Prefer selectors over coordinates**: Use CSS selectors when elements exist in DOM. Reserve ClickPoint for canvas/SVG or when selectors fail.
|
|
2728
|
+
5. **Handle stagnation**: If your last actions had no visible effect, try a different approach - different selector, different interaction method, or use Evaluate to understand why.
|
|
2729
|
+
6. **Never repeat failures**: If something fails twice, change strategy entirely. If verify/submit doesn't advance, your answer is likely wrong - re-examine.
|
|
2730
|
+
7. **Commit and iterate**: Submit your best answer rather than endlessly adjusting. Learn from the result.
|
|
2731
|
+
|
|
2732
|
+
## Captcha & Challenge Strategies
|
|
2733
|
+
|
|
2734
|
+
- **reCAPTCHA checkbox**: Click the iframe first, then the checkbox inside it.
|
|
2735
|
+
- **Cloudflare Turnstile**: The challenge is in an iframe. Look for \`iframe[src*="challenges.cloudflare.com"]\` and click inside it.
|
|
2736
|
+
- **Image selection (reCAPTCHA v2)**: Identify matching images and click them one at a time. After selecting, click the verify button. If incorrect, the grid refreshes - try again.
|
|
2737
|
+
- **Slider/puzzle captchas**: Use ClickDragPoint to drag the slider from start to end position.
|
|
2738
|
+
- **Text captchas**: Read the distorted text carefully, then Fill the answer input and Press Enter.
|
|
2739
|
+
- **Visual puzzles**: Describe what you see, reason about the solution, then act precisely.
|
|
2740
|
+
- **PerimeterX (px-captcha)**: This is a press-and-hold captcha. Find the button element inside the #px-captcha container or iframe with [role="button"]. Use ClickHold with hold_ms: 15000 (15 seconds). Wait for the captcha wrapper to disappear after release.
|
|
2741
|
+
- **DataDome**: Often shows an iframe from geo.captcha-delivery.com. Click inside the iframe to interact with the challenge. May include slider or image selection.
|
|
2742
|
+
- **Arkose Labs / FunCaptcha**: Interactive challenge in an iframe from arkoselabs.com. Follow on-screen instructions \u2014 typically image rotation, matching, or selection puzzles.
|
|
2743
|
+
- **Cookie/consent banners**: Click accept/dismiss buttons to clear overlays before solving the actual captcha.
|
|
2744
|
+
- **Multiple challenge steps**: Some captchas have multiple rounds (e.g., reCAPTCHA may ask to solve 3 image grids). Keep going until done.
|
|
2745
|
+
|
|
2746
|
+
## Output Rules
|
|
2747
|
+
- JSON only, no markdown or prose
|
|
2748
|
+
- Always include "label", "done", and "steps"
|
|
2749
|
+
- "steps" array can have multiple actions per round`;
|
|
2750
|
+
function buildUserMessage(url, html, screenshotB64, extraContext) {
|
|
2751
|
+
const truncatedHtml = truncateHtml(html, 12e3);
|
|
2752
|
+
const userText = `URL: ${url}
|
|
2753
|
+
HTML (truncated):
|
|
2754
|
+
${truncatedHtml}
|
|
2755
|
+
|
|
2756
|
+
${extraContext ?? "Complete the task on this page."}`;
|
|
2757
|
+
return [
|
|
2758
|
+
{ type: "text", text: userText },
|
|
2759
|
+
{
|
|
2760
|
+
type: "image_url",
|
|
2761
|
+
image_url: { url: `data:image/png;base64,${screenshotB64}` }
|
|
2762
|
+
}
|
|
2763
|
+
];
|
|
2764
|
+
}
|
|
2765
|
+
function truncateHtml(html, maxChars) {
|
|
2766
|
+
if (html.length <= maxChars) return html;
|
|
2767
|
+
const slice = html.slice(0, maxChars);
|
|
2768
|
+
const lastClose = slice.lastIndexOf(">");
|
|
2769
|
+
return lastClose > 0 ? html.slice(0, lastClose + 1) : slice;
|
|
2770
|
+
}
|
|
2771
|
+
|
|
2772
|
+
// ai/agent.ts
|
|
2773
|
+
init_errors();
|
|
2774
|
+
var Agent = class {
|
|
2775
|
+
adapter;
|
|
2776
|
+
llm;
|
|
2777
|
+
emitter;
|
|
2778
|
+
opts;
|
|
2779
|
+
constructor(adapter, llm, emitter, options) {
|
|
2780
|
+
this.adapter = adapter;
|
|
2781
|
+
this.llm = llm;
|
|
2782
|
+
this.emitter = emitter;
|
|
2783
|
+
this.opts = {
|
|
2784
|
+
maxRounds: options?.maxRounds ?? 30,
|
|
2785
|
+
stepDelayMs: options?.stepDelayMs ?? 1500,
|
|
2786
|
+
instruction: options?.instruction
|
|
2787
|
+
};
|
|
2788
|
+
}
|
|
2789
|
+
/**
|
|
2790
|
+
* Execute the agent loop until the task is done or max rounds reached.
|
|
2791
|
+
*/
|
|
2792
|
+
async execute(instruction) {
|
|
2793
|
+
let extracted = void 0;
|
|
2794
|
+
let lastLabel = "";
|
|
2795
|
+
await sleep4(500);
|
|
2796
|
+
for (let round = 0; round < this.opts.maxRounds; round++) {
|
|
2797
|
+
let screenshot;
|
|
2798
|
+
try {
|
|
2799
|
+
screenshot = await this.adapter.captureScreenshot();
|
|
2800
|
+
} catch (err) {
|
|
2801
|
+
logger.warn(`agent: screenshot failed round ${round}`, {
|
|
2802
|
+
error: err instanceof Error ? err.message : String(err)
|
|
2803
|
+
});
|
|
2804
|
+
break;
|
|
2805
|
+
}
|
|
2806
|
+
let html;
|
|
2807
|
+
try {
|
|
2808
|
+
html = await this.adapter.getHTML();
|
|
2809
|
+
} catch (err) {
|
|
2810
|
+
logger.warn(`agent: get HTML failed round ${round}`, {
|
|
2811
|
+
error: err instanceof Error ? err.message : String(err)
|
|
2812
|
+
});
|
|
2813
|
+
break;
|
|
2814
|
+
}
|
|
2815
|
+
const [url, title] = await Promise.all([
|
|
2816
|
+
this.adapter.evaluate("window.location.href").catch(() => "unknown"),
|
|
2817
|
+
this.adapter.evaluate("document.title").catch(() => "")
|
|
2818
|
+
]);
|
|
2819
|
+
const context = `Round ${round + 1}/${this.opts.maxRounds}. Task: ${instruction}
|
|
2820
|
+
PAGE TITLE: ${title}`;
|
|
2821
|
+
let plan;
|
|
2822
|
+
try {
|
|
2823
|
+
plan = await this.llm.chatJSON([
|
|
2824
|
+
{ role: "system", content: SYSTEM_PROMPT },
|
|
2825
|
+
{
|
|
2826
|
+
role: "user",
|
|
2827
|
+
content: buildUserMessage(url, html, screenshot, context)
|
|
2828
|
+
}
|
|
2829
|
+
]);
|
|
2830
|
+
} catch (err) {
|
|
2831
|
+
logger.warn(`agent: LLM call failed round ${round}`, {
|
|
2832
|
+
error: err instanceof Error ? err.message : String(err)
|
|
2833
|
+
});
|
|
2834
|
+
await sleep4(2e3);
|
|
2835
|
+
continue;
|
|
2836
|
+
}
|
|
2837
|
+
lastLabel = plan.label ?? "";
|
|
2838
|
+
if (plan.extracted !== void 0) {
|
|
2839
|
+
extracted = plan.extracted;
|
|
2840
|
+
}
|
|
2841
|
+
logger.info(`agent: round ${round + 1}`, {
|
|
2842
|
+
label: plan.label,
|
|
2843
|
+
done: plan.done,
|
|
2844
|
+
steps: plan.steps?.length ?? 0
|
|
2845
|
+
});
|
|
2846
|
+
this.emitter.emit("agent.step", {
|
|
2847
|
+
round: round + 1,
|
|
2848
|
+
label: plan.label,
|
|
2849
|
+
stepsCount: plan.steps?.length ?? 0
|
|
2850
|
+
});
|
|
2851
|
+
if (plan.done) {
|
|
2852
|
+
this.emitter.emit("agent.done", { rounds: round + 1, result: extracted });
|
|
2853
|
+
return { done: true, rounds: round + 1, extracted, label: lastLabel };
|
|
2854
|
+
}
|
|
2855
|
+
if (!plan.steps || plan.steps.length === 0) {
|
|
2856
|
+
logger.info("agent: no steps, retrying");
|
|
2857
|
+
await sleep4(this.opts.stepDelayMs);
|
|
2858
|
+
continue;
|
|
2859
|
+
}
|
|
2860
|
+
for (let i = 0; i < plan.steps.length; i++) {
|
|
2861
|
+
const action = plan.steps[i];
|
|
2862
|
+
try {
|
|
2863
|
+
await executeAction(this.adapter, action);
|
|
2864
|
+
} catch (err) {
|
|
2865
|
+
logger.warn(`agent: action failed round ${round} step ${i}`, {
|
|
2866
|
+
action: JSON.stringify(action).slice(0, 100),
|
|
2867
|
+
error: err instanceof Error ? err.message : String(err)
|
|
2868
|
+
});
|
|
2869
|
+
break;
|
|
2870
|
+
}
|
|
2871
|
+
await sleep4(200);
|
|
2872
|
+
}
|
|
2873
|
+
await sleep4(this.opts.stepDelayMs);
|
|
2874
|
+
}
|
|
2875
|
+
logger.warn("agent: max rounds exceeded");
|
|
2876
|
+
this.emitter.emit("agent.error", {
|
|
2877
|
+
error: "max rounds exceeded",
|
|
2878
|
+
round: this.opts.maxRounds
|
|
2879
|
+
});
|
|
2880
|
+
return { done: false, rounds: this.opts.maxRounds, extracted, label: lastLabel };
|
|
2881
|
+
}
|
|
2882
|
+
};
|
|
2883
|
+
async function executeAction(adapter, action) {
|
|
2884
|
+
if ("Click" in action) {
|
|
2885
|
+
const { x, y } = await getElementCenter(adapter, action.Click);
|
|
2886
|
+
await adapter.clickPoint(x, y);
|
|
2887
|
+
return;
|
|
2888
|
+
}
|
|
2889
|
+
if ("ClickAll" in action) {
|
|
2890
|
+
const selector = action.ClickAll;
|
|
2891
|
+
const points = await adapter.evaluate(`
|
|
2892
|
+
(function() {
|
|
2893
|
+
const els = document.querySelectorAll(${JSON.stringify(selector)});
|
|
2894
|
+
return Array.from(els).map(el => {
|
|
2895
|
+
const r = el.getBoundingClientRect();
|
|
2896
|
+
return { x: r.x + r.width / 2, y: r.y + r.height / 2 };
|
|
2897
|
+
});
|
|
2898
|
+
})()
|
|
2899
|
+
`);
|
|
2900
|
+
if (Array.isArray(points)) {
|
|
2901
|
+
for (const pt of points) {
|
|
2902
|
+
await adapter.clickPoint(pt.x, pt.y);
|
|
2903
|
+
await sleep4(100);
|
|
2904
|
+
}
|
|
2905
|
+
}
|
|
2906
|
+
return;
|
|
2907
|
+
}
|
|
2908
|
+
if ("ClickPoint" in action) {
|
|
2909
|
+
const { x, y } = action.ClickPoint;
|
|
2910
|
+
await adapter.clickPoint(x, y);
|
|
2911
|
+
return;
|
|
2912
|
+
}
|
|
2913
|
+
if ("ClickHold" in action) {
|
|
2914
|
+
const { selector, hold_ms } = action.ClickHold;
|
|
2915
|
+
const { x, y } = await getElementCenter(adapter, selector);
|
|
2916
|
+
await adapter.clickHoldPoint(x, y, hold_ms);
|
|
2917
|
+
return;
|
|
2918
|
+
}
|
|
2919
|
+
if ("ClickHoldPoint" in action) {
|
|
2920
|
+
const { x, y, hold_ms } = action.ClickHoldPoint;
|
|
2921
|
+
await adapter.clickHoldPoint(x, y, hold_ms);
|
|
2922
|
+
return;
|
|
2923
|
+
}
|
|
2924
|
+
if ("DoubleClick" in action) {
|
|
2925
|
+
const { x, y } = await getElementCenter(adapter, action.DoubleClick);
|
|
2926
|
+
await adapter.doubleClickPoint(x, y);
|
|
2927
|
+
return;
|
|
2928
|
+
}
|
|
2929
|
+
if ("DoubleClickPoint" in action) {
|
|
2930
|
+
const { x, y } = action.DoubleClickPoint;
|
|
2931
|
+
await adapter.doubleClickPoint(x, y);
|
|
2932
|
+
return;
|
|
2933
|
+
}
|
|
2934
|
+
if ("RightClick" in action) {
|
|
2935
|
+
const { x, y } = await getElementCenter(adapter, action.RightClick);
|
|
2936
|
+
await adapter.rightClickPoint(x, y);
|
|
2937
|
+
return;
|
|
2938
|
+
}
|
|
2939
|
+
if ("RightClickPoint" in action) {
|
|
2940
|
+
const { x, y } = action.RightClickPoint;
|
|
2941
|
+
await adapter.rightClickPoint(x, y);
|
|
2942
|
+
return;
|
|
2943
|
+
}
|
|
2944
|
+
if ("WaitForAndClick" in action) {
|
|
2945
|
+
const selector = action.WaitForAndClick;
|
|
2946
|
+
await waitForElement(adapter, selector, 5e3);
|
|
2947
|
+
const { x, y } = await getElementCenter(adapter, selector);
|
|
2948
|
+
await adapter.clickPoint(x, y);
|
|
2949
|
+
return;
|
|
2950
|
+
}
|
|
2951
|
+
if ("ClickDrag" in action) {
|
|
2952
|
+
const { from, to } = action.ClickDrag;
|
|
2953
|
+
const f = await getElementCenter(adapter, from);
|
|
2954
|
+
const t = await getElementCenter(adapter, to);
|
|
2955
|
+
await adapter.dragPoint(f.x, f.y, t.x, t.y);
|
|
2956
|
+
return;
|
|
2957
|
+
}
|
|
2958
|
+
if ("ClickDragPoint" in action) {
|
|
2959
|
+
const { from_x, from_y, to_x, to_y } = action.ClickDragPoint;
|
|
2960
|
+
await adapter.dragPoint(from_x, from_y, to_x, to_y);
|
|
2961
|
+
return;
|
|
2962
|
+
}
|
|
2963
|
+
if ("Type" in action) {
|
|
2964
|
+
await adapter.insertText(action.Type.value);
|
|
2965
|
+
return;
|
|
2966
|
+
}
|
|
2967
|
+
if ("Fill" in action) {
|
|
2968
|
+
const { selector, value } = action.Fill;
|
|
2969
|
+
await adapter.evaluate(`
|
|
2970
|
+
(function() {
|
|
2971
|
+
const el = document.querySelector(${JSON.stringify(selector)});
|
|
2972
|
+
if (el) { el.focus(); el.value = ''; }
|
|
2973
|
+
})()
|
|
2974
|
+
`);
|
|
2975
|
+
try {
|
|
2976
|
+
const { x, y } = await getElementCenter(adapter, selector);
|
|
2977
|
+
await adapter.clickPoint(x, y);
|
|
2978
|
+
} catch {
|
|
2979
|
+
}
|
|
2980
|
+
await adapter.insertText(value);
|
|
2981
|
+
await adapter.evaluate(`
|
|
2982
|
+
(function() {
|
|
2983
|
+
const el = document.querySelector(${JSON.stringify(selector)});
|
|
2984
|
+
if (el) {
|
|
2985
|
+
el.dispatchEvent(new Event('input', { bubbles: true }));
|
|
2986
|
+
el.dispatchEvent(new Event('change', { bubbles: true }));
|
|
2987
|
+
}
|
|
2988
|
+
})()
|
|
2989
|
+
`);
|
|
2990
|
+
return;
|
|
2991
|
+
}
|
|
2992
|
+
if ("Clear" in action) {
|
|
2993
|
+
const selector = action.Clear;
|
|
2994
|
+
await adapter.evaluate(`document.querySelector(${JSON.stringify(selector)}).value = ''`);
|
|
2995
|
+
return;
|
|
2996
|
+
}
|
|
2997
|
+
if ("Press" in action) {
|
|
2998
|
+
await adapter.pressKey(action.Press);
|
|
2999
|
+
return;
|
|
3000
|
+
}
|
|
3001
|
+
if ("KeyDown" in action) {
|
|
3002
|
+
await adapter.keyDown(action.KeyDown);
|
|
3003
|
+
return;
|
|
3004
|
+
}
|
|
3005
|
+
if ("KeyUp" in action) {
|
|
3006
|
+
await adapter.keyUp(action.KeyUp);
|
|
3007
|
+
return;
|
|
3008
|
+
}
|
|
3009
|
+
if ("Select" in action) {
|
|
3010
|
+
const { selector, value } = action.Select;
|
|
3011
|
+
await adapter.evaluate(`
|
|
3012
|
+
(function() {
|
|
3013
|
+
const el = document.querySelector(${JSON.stringify(selector)});
|
|
3014
|
+
if (el) {
|
|
3015
|
+
el.value = ${JSON.stringify(value)};
|
|
3016
|
+
el.dispatchEvent(new Event('change', { bubbles: true }));
|
|
3017
|
+
}
|
|
3018
|
+
})()
|
|
3019
|
+
`);
|
|
3020
|
+
return;
|
|
3021
|
+
}
|
|
3022
|
+
if ("Focus" in action) {
|
|
3023
|
+
await adapter.evaluate(`document.querySelector(${JSON.stringify(action.Focus)})?.focus()`);
|
|
3024
|
+
return;
|
|
3025
|
+
}
|
|
3026
|
+
if ("Blur" in action) {
|
|
3027
|
+
await adapter.evaluate(`document.querySelector(${JSON.stringify(action.Blur)})?.blur()`);
|
|
3028
|
+
return;
|
|
3029
|
+
}
|
|
3030
|
+
if ("Hover" in action) {
|
|
3031
|
+
const { x, y } = await getElementCenter(adapter, action.Hover);
|
|
3032
|
+
await adapter.hoverPoint(x, y);
|
|
3033
|
+
return;
|
|
3034
|
+
}
|
|
3035
|
+
if ("HoverPoint" in action) {
|
|
3036
|
+
const { x, y } = action.HoverPoint;
|
|
3037
|
+
await adapter.hoverPoint(x, y);
|
|
3038
|
+
return;
|
|
3039
|
+
}
|
|
3040
|
+
if ("ScrollY" in action) {
|
|
3041
|
+
await adapter.evaluate(`window.scrollBy(0, ${action.ScrollY})`);
|
|
3042
|
+
return;
|
|
3043
|
+
}
|
|
3044
|
+
if ("ScrollX" in action) {
|
|
3045
|
+
await adapter.evaluate(`window.scrollBy(${action.ScrollX}, 0)`);
|
|
3046
|
+
return;
|
|
3047
|
+
}
|
|
3048
|
+
if ("ScrollTo" in action) {
|
|
3049
|
+
const selector = action.ScrollTo.selector;
|
|
3050
|
+
await adapter.evaluate(
|
|
3051
|
+
`document.querySelector(${JSON.stringify(selector)})?.scrollIntoView({ behavior: 'smooth', block: 'center' })`
|
|
3052
|
+
);
|
|
3053
|
+
return;
|
|
3054
|
+
}
|
|
3055
|
+
if ("ScrollToPoint" in action) {
|
|
3056
|
+
const { x, y } = action.ScrollToPoint;
|
|
3057
|
+
await adapter.evaluate(`window.scrollTo(${x}, ${y})`);
|
|
3058
|
+
return;
|
|
3059
|
+
}
|
|
3060
|
+
if ("InfiniteScroll" in action) {
|
|
3061
|
+
const max = action.InfiniteScroll;
|
|
3062
|
+
for (let i = 0; i < max; i++) {
|
|
3063
|
+
await adapter.evaluate("window.scrollTo(0, document.body.scrollHeight)");
|
|
3064
|
+
await sleep4(500);
|
|
3065
|
+
}
|
|
3066
|
+
return;
|
|
3067
|
+
}
|
|
3068
|
+
if ("Wait" in action) {
|
|
3069
|
+
await sleep4(action.Wait);
|
|
3070
|
+
return;
|
|
3071
|
+
}
|
|
3072
|
+
if ("WaitFor" in action) {
|
|
3073
|
+
await waitForElement(adapter, action.WaitFor, 5e3);
|
|
3074
|
+
return;
|
|
3075
|
+
}
|
|
3076
|
+
if ("WaitForWithTimeout" in action) {
|
|
3077
|
+
const { selector, timeout } = action.WaitForWithTimeout;
|
|
3078
|
+
await waitForElement(adapter, selector, timeout);
|
|
3079
|
+
return;
|
|
3080
|
+
}
|
|
3081
|
+
if ("WaitForNavigation" in action) {
|
|
3082
|
+
await sleep4(1e3);
|
|
3083
|
+
return;
|
|
3084
|
+
}
|
|
3085
|
+
if ("WaitForDom" in action) {
|
|
3086
|
+
const timeout = action.WaitForDom.timeout ?? 5e3;
|
|
3087
|
+
await sleep4(timeout);
|
|
3088
|
+
return;
|
|
3089
|
+
}
|
|
3090
|
+
if ("Navigate" in action) {
|
|
3091
|
+
await adapter.navigate(action.Navigate);
|
|
3092
|
+
return;
|
|
3093
|
+
}
|
|
3094
|
+
if ("GoBack" in action) {
|
|
3095
|
+
await adapter.evaluate("window.history.back()");
|
|
3096
|
+
return;
|
|
3097
|
+
}
|
|
3098
|
+
if ("GoForward" in action) {
|
|
3099
|
+
await adapter.evaluate("window.history.forward()");
|
|
3100
|
+
return;
|
|
3101
|
+
}
|
|
3102
|
+
if ("Reload" in action) {
|
|
3103
|
+
await adapter.evaluate("window.location.reload()");
|
|
3104
|
+
return;
|
|
3105
|
+
}
|
|
3106
|
+
if ("SetViewport" in action) {
|
|
3107
|
+
const { width, height, device_scale_factor, mobile } = action.SetViewport;
|
|
3108
|
+
await adapter.setViewport(width, height, device_scale_factor ?? 2, mobile ?? false);
|
|
3109
|
+
return;
|
|
3110
|
+
}
|
|
3111
|
+
if ("Evaluate" in action) {
|
|
3112
|
+
await adapter.evaluate(action.Evaluate);
|
|
3113
|
+
return;
|
|
3114
|
+
}
|
|
3115
|
+
if ("Screenshot" in action) {
|
|
3116
|
+
return;
|
|
3117
|
+
}
|
|
3118
|
+
logger.warn("agent: unknown action", { action: JSON.stringify(action).slice(0, 100) });
|
|
3119
|
+
}
|
|
3120
|
+
async function getElementCenter(adapter, selector) {
|
|
3121
|
+
const result = await adapter.evaluate(`
|
|
3122
|
+
(function() {
|
|
3123
|
+
const el = document.querySelector(${JSON.stringify(selector)});
|
|
3124
|
+
if (!el) return null;
|
|
3125
|
+
el.scrollIntoView({ block: 'center', behavior: 'instant' });
|
|
3126
|
+
const r = el.getBoundingClientRect();
|
|
3127
|
+
return { x: r.x + r.width / 2, y: r.y + r.height / 2 };
|
|
3128
|
+
})()
|
|
3129
|
+
`);
|
|
3130
|
+
if (!result) {
|
|
3131
|
+
throw new Error(`Element not found: ${selector}`);
|
|
3132
|
+
}
|
|
3133
|
+
return result;
|
|
3134
|
+
}
|
|
3135
|
+
async function waitForElement(adapter, selector, timeoutMs) {
|
|
3136
|
+
const interval = 100;
|
|
3137
|
+
const maxIter = Math.ceil(timeoutMs / interval);
|
|
3138
|
+
const checkJs = `!!document.querySelector(${JSON.stringify(selector)})`;
|
|
3139
|
+
for (let i = 0; i < maxIter; i++) {
|
|
3140
|
+
const found = await adapter.evaluate(checkJs);
|
|
3141
|
+
if (found) return;
|
|
3142
|
+
await sleep4(interval);
|
|
3143
|
+
}
|
|
3144
|
+
throw new TimeoutError(`Timeout waiting for element: ${selector}`);
|
|
3145
|
+
}
|
|
3146
|
+
function sleep4(ms) {
|
|
3147
|
+
return new Promise((r) => setTimeout(r, ms));
|
|
3148
|
+
}
|
|
3149
|
+
|
|
3150
|
+
// ai/act.ts
|
|
3151
|
+
async function act(adapter, llm, instruction) {
|
|
3152
|
+
const [screenshot, html, url, title] = await Promise.all([
|
|
3153
|
+
adapter.captureScreenshot(),
|
|
3154
|
+
adapter.getHTML(),
|
|
3155
|
+
adapter.evaluate("window.location.href"),
|
|
3156
|
+
adapter.evaluate("document.title")
|
|
3157
|
+
]);
|
|
3158
|
+
const context = `Task: ${instruction}
|
|
3159
|
+
PAGE TITLE: ${title}`;
|
|
3160
|
+
const plan = await llm.chatJSON([
|
|
3161
|
+
{ role: "system", content: SYSTEM_PROMPT },
|
|
3162
|
+
{
|
|
3163
|
+
role: "user",
|
|
3164
|
+
content: buildUserMessage(url, html, screenshot, context)
|
|
3165
|
+
}
|
|
3166
|
+
]);
|
|
3167
|
+
if (plan.steps && Array.isArray(plan.steps)) {
|
|
3168
|
+
for (const step of plan.steps) {
|
|
3169
|
+
await executeAction(adapter, step);
|
|
3170
|
+
await sleep5(200);
|
|
3171
|
+
}
|
|
3172
|
+
}
|
|
3173
|
+
}
|
|
3174
|
+
function sleep5(ms) {
|
|
3175
|
+
return new Promise((r) => setTimeout(r, ms));
|
|
3176
|
+
}
|
|
3177
|
+
|
|
3178
|
+
// utils/dom.ts
|
|
3179
|
+
var GET_INTERACTIVE_ELEMENTS = `
|
|
3180
|
+
(function() {
|
|
3181
|
+
const interactiveSelectors = [
|
|
3182
|
+
'a[href]',
|
|
3183
|
+
'button',
|
|
3184
|
+
'input',
|
|
3185
|
+
'select',
|
|
3186
|
+
'textarea',
|
|
3187
|
+
'[role="button"]',
|
|
3188
|
+
'[role="link"]',
|
|
3189
|
+
'[role="tab"]',
|
|
3190
|
+
'[role="menuitem"]',
|
|
3191
|
+
'[role="checkbox"]',
|
|
3192
|
+
'[role="radio"]',
|
|
3193
|
+
'[role="switch"]',
|
|
3194
|
+
'[role="combobox"]',
|
|
3195
|
+
'[onclick]',
|
|
3196
|
+
'[tabindex]',
|
|
3197
|
+
'summary',
|
|
3198
|
+
'details',
|
|
3199
|
+
'label',
|
|
3200
|
+
];
|
|
3201
|
+
const seen = new Set();
|
|
3202
|
+
const results = [];
|
|
3203
|
+
for (const sel of interactiveSelectors) {
|
|
3204
|
+
for (const el of document.querySelectorAll(sel)) {
|
|
3205
|
+
if (seen.has(el)) continue;
|
|
3206
|
+
seen.add(el);
|
|
3207
|
+
const r = el.getBoundingClientRect();
|
|
3208
|
+
if (r.width === 0 && r.height === 0) continue;
|
|
3209
|
+
if (r.bottom < 0 || r.right < 0) continue;
|
|
3210
|
+
|
|
3211
|
+
const tag = el.tagName.toLowerCase();
|
|
3212
|
+
const type = el.getAttribute('type') || '';
|
|
3213
|
+
const text = (el.textContent || '').trim().slice(0, 100);
|
|
3214
|
+
const ariaLabel = el.getAttribute('aria-label') || '';
|
|
3215
|
+
const placeholder = el.getAttribute('placeholder') || '';
|
|
3216
|
+
const href = el.getAttribute('href') || '';
|
|
3217
|
+
const value = (el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement)
|
|
3218
|
+
? el.value.slice(0, 50) : '';
|
|
3219
|
+
|
|
3220
|
+
// Build a unique-ish selector
|
|
3221
|
+
let cssSelector = tag;
|
|
3222
|
+
const id = el.getAttribute('id');
|
|
3223
|
+
if (id) {
|
|
3224
|
+
cssSelector = '#' + CSS.escape(id);
|
|
3225
|
+
} else {
|
|
3226
|
+
const cls = el.getAttribute('class');
|
|
3227
|
+
if (cls) {
|
|
3228
|
+
const classes = cls.trim().split(/\\s+/).slice(0, 2);
|
|
3229
|
+
cssSelector = tag + classes.map(c => '.' + CSS.escape(c)).join('');
|
|
3230
|
+
}
|
|
3231
|
+
const name = el.getAttribute('name');
|
|
3232
|
+
if (name) {
|
|
3233
|
+
cssSelector = tag + '[name="' + CSS.escape(name) + '"]';
|
|
3234
|
+
}
|
|
3235
|
+
}
|
|
3236
|
+
|
|
3237
|
+
results.push({
|
|
3238
|
+
selector: cssSelector,
|
|
3239
|
+
tag,
|
|
3240
|
+
type,
|
|
3241
|
+
text,
|
|
3242
|
+
ariaLabel,
|
|
3243
|
+
placeholder,
|
|
3244
|
+
href,
|
|
3245
|
+
value,
|
|
3246
|
+
rect: {
|
|
3247
|
+
x: Math.round(r.x),
|
|
3248
|
+
y: Math.round(r.y),
|
|
3249
|
+
width: Math.round(r.width),
|
|
3250
|
+
height: Math.round(r.height),
|
|
3251
|
+
},
|
|
3252
|
+
});
|
|
3253
|
+
}
|
|
3254
|
+
}
|
|
3255
|
+
return results;
|
|
3256
|
+
})()
|
|
3257
|
+
`;
|
|
3258
|
+
|
|
3259
|
+
// ai/observe.ts
|
|
3260
|
+
async function observe(adapter, instruction, llm) {
|
|
3261
|
+
const elements = await adapter.evaluate(GET_INTERACTIVE_ELEMENTS);
|
|
3262
|
+
if (!Array.isArray(elements) || elements.length === 0) {
|
|
3263
|
+
return [];
|
|
3264
|
+
}
|
|
3265
|
+
if (!instruction || !llm) {
|
|
3266
|
+
return elements;
|
|
3267
|
+
}
|
|
3268
|
+
const elementSummary = elements.map((el, i) => {
|
|
3269
|
+
const parts = [`[${i}] <${el.tag}>`];
|
|
3270
|
+
if (el.text) parts.push(`text="${el.text}"`);
|
|
3271
|
+
if (el.ariaLabel) parts.push(`aria="${el.ariaLabel}"`);
|
|
3272
|
+
if (el.placeholder) parts.push(`placeholder="${el.placeholder}"`);
|
|
3273
|
+
if (el.href) parts.push(`href="${el.href}"`);
|
|
3274
|
+
if (el.type) parts.push(`type="${el.type}"`);
|
|
3275
|
+
return parts.join(" ");
|
|
3276
|
+
}).join("\n");
|
|
3277
|
+
const response = await llm.chatJSON([
|
|
3278
|
+
{
|
|
3279
|
+
role: "system",
|
|
3280
|
+
content: 'You are an element selector. Given a list of page elements and an instruction, return a JSON object with an "indices" array of element indices that match the instruction. Order by relevance (most relevant first). Return {"indices": []} if none match.'
|
|
3281
|
+
},
|
|
3282
|
+
{
|
|
3283
|
+
role: "user",
|
|
3284
|
+
content: `Instruction: ${instruction}
|
|
3285
|
+
|
|
3286
|
+
Elements:
|
|
3287
|
+
${elementSummary}`
|
|
3288
|
+
}
|
|
3289
|
+
]);
|
|
3290
|
+
const indices = response.indices ?? [];
|
|
3291
|
+
return indices.filter((i) => i >= 0 && i < elements.length).map((i, rank) => ({
|
|
3292
|
+
...elements[i],
|
|
3293
|
+
score: 1 - rank / Math.max(indices.length, 1)
|
|
3294
|
+
}));
|
|
3295
|
+
}
|
|
3296
|
+
|
|
3297
|
+
// utils/html.ts
|
|
3298
|
+
function truncateHtml2(html, maxChars = 12e3) {
|
|
3299
|
+
if (html.length <= maxChars) return html;
|
|
3300
|
+
const slice = html.slice(0, maxChars);
|
|
3301
|
+
const lastClose = slice.lastIndexOf(">");
|
|
3302
|
+
return lastClose > 0 ? html.slice(0, lastClose + 1) : slice;
|
|
3303
|
+
}
|
|
3304
|
+
|
|
3305
|
+
// ai/extract.ts
|
|
3306
|
+
async function extract(adapter, llm, instruction, schema) {
|
|
3307
|
+
const [screenshot, html, url, title] = await Promise.all([
|
|
3308
|
+
adapter.captureScreenshot(),
|
|
3309
|
+
adapter.getHTML(),
|
|
3310
|
+
adapter.evaluate("window.location.href"),
|
|
3311
|
+
adapter.evaluate("document.title")
|
|
3312
|
+
]);
|
|
3313
|
+
const truncatedHtml = truncateHtml2(html, 12e3);
|
|
3314
|
+
let schemaDesc = "";
|
|
3315
|
+
if (schema) {
|
|
3316
|
+
try {
|
|
3317
|
+
schemaDesc = `
|
|
3318
|
+
|
|
3319
|
+
Return data matching this JSON schema:
|
|
3320
|
+
${JSON.stringify(schema._def, null, 2)}`;
|
|
3321
|
+
} catch {
|
|
3322
|
+
schemaDesc = "\n\nReturn a JSON object matching the expected structure.";
|
|
3323
|
+
}
|
|
3324
|
+
}
|
|
3325
|
+
const systemPrompt = `You are a data extraction agent. Given a webpage screenshot and HTML, extract the requested information as JSON.${schemaDesc}
|
|
3326
|
+
|
|
3327
|
+
Return ONLY a valid JSON object. No prose, no markdown.`;
|
|
3328
|
+
const userText = `URL: ${url}
|
|
3329
|
+
Title: ${title}
|
|
3330
|
+
Instruction: ${instruction}
|
|
3331
|
+
|
|
3332
|
+
HTML (truncated):
|
|
3333
|
+
${truncatedHtml}`;
|
|
3334
|
+
const result = await llm.chatJSON([
|
|
3335
|
+
{ role: "system", content: systemPrompt },
|
|
3336
|
+
{
|
|
3337
|
+
role: "user",
|
|
3338
|
+
content: [
|
|
3339
|
+
{ type: "text", text: userText },
|
|
3340
|
+
{
|
|
3341
|
+
type: "image_url",
|
|
3342
|
+
image_url: { url: `data:image/png;base64,${screenshot}` }
|
|
3343
|
+
}
|
|
3344
|
+
]
|
|
3345
|
+
}
|
|
3346
|
+
]);
|
|
3347
|
+
if (schema) {
|
|
3348
|
+
return schema.parse(result);
|
|
3349
|
+
}
|
|
3350
|
+
return result;
|
|
3351
|
+
}
|
|
3352
|
+
|
|
3353
|
+
// spider-browser.ts
|
|
3354
|
+
var SpiderBrowser = class {
|
|
3355
|
+
opts;
|
|
3356
|
+
transport = null;
|
|
3357
|
+
adapter = null;
|
|
3358
|
+
retryEngine = null;
|
|
3359
|
+
emitter = new SpiderEventEmitter();
|
|
3360
|
+
_page = null;
|
|
3361
|
+
llmProvider = null;
|
|
3362
|
+
currentUrl;
|
|
3363
|
+
constructor(options) {
|
|
3364
|
+
this.opts = {
|
|
3365
|
+
apiKey: options.apiKey,
|
|
3366
|
+
serverUrl: options.serverUrl ?? "wss://browser.spider.cloud",
|
|
3367
|
+
browser: options.browser ?? "auto",
|
|
3368
|
+
captcha: options.captcha ?? "solve",
|
|
3369
|
+
smartRetry: options.smartRetry ?? true,
|
|
3370
|
+
maxRetries: options.maxRetries ?? 12,
|
|
3371
|
+
stealth: options.stealth ?? 0,
|
|
3372
|
+
maxStealthLevels: options.maxStealthLevels ?? 3,
|
|
3373
|
+
connectTimeoutMs: options.connectTimeoutMs ?? 3e4,
|
|
3374
|
+
commandTimeoutMs: options.commandTimeoutMs ?? 3e4,
|
|
3375
|
+
retryTimeoutMs: options.retryTimeoutMs ?? 15e3,
|
|
3376
|
+
url: options.url,
|
|
3377
|
+
llm: options.llm
|
|
3378
|
+
};
|
|
3379
|
+
if (options.logLevel) {
|
|
3380
|
+
logger.setLevel(options.logLevel);
|
|
3381
|
+
}
|
|
3382
|
+
if (this.opts.llm) {
|
|
3383
|
+
this.llmProvider = createProvider(this.opts.llm);
|
|
3384
|
+
}
|
|
3385
|
+
}
|
|
3386
|
+
/** The active page instance for deterministic browser control. */
|
|
3387
|
+
get page() {
|
|
3388
|
+
if (!this._page) {
|
|
3389
|
+
throw new Error("SpiderBrowser not initialized. Call init() first.");
|
|
3390
|
+
}
|
|
3391
|
+
return this._page;
|
|
3392
|
+
}
|
|
3393
|
+
/** Current browser type. */
|
|
3394
|
+
get browser() {
|
|
3395
|
+
return this.transport?.browser ?? this.opts.browser;
|
|
3396
|
+
}
|
|
3397
|
+
/** Whether the WebSocket is connected. */
|
|
3398
|
+
get connected() {
|
|
3399
|
+
return this.transport?.connected ?? false;
|
|
3400
|
+
}
|
|
3401
|
+
/** Active stealth level (0=auto, 1-3=explicit tiers). */
|
|
3402
|
+
get stealthLevel() {
|
|
3403
|
+
return this.retryEngine?.stealthLevel ?? this.transport?.stealthLevel ?? this.opts.stealth;
|
|
3404
|
+
}
|
|
3405
|
+
/** Credits remaining from last upgrade response. */
|
|
3406
|
+
get credits() {
|
|
3407
|
+
return this.transport?.upgradeCredits;
|
|
3408
|
+
}
|
|
3409
|
+
/** Credits consumed during this session (from server Spider.metering event). */
|
|
3410
|
+
get sessionCreditsUsed() {
|
|
3411
|
+
return this.transport?.sessionCreditsUsed;
|
|
3412
|
+
}
|
|
3413
|
+
/**
|
|
3414
|
+
* Request the exact session cost from the server.
|
|
3415
|
+
* Unlike `sessionCreditsUsed` (which relies on async event delivery),
|
|
3416
|
+
* this sends a Spider.getMetering command and waits for the response.
|
|
3417
|
+
* Call this before close() for accurate per-session metering.
|
|
3418
|
+
*/
|
|
3419
|
+
async getSessionCredits() {
|
|
3420
|
+
if (!this.transport) return 0;
|
|
3421
|
+
return this.transport.requestMetering();
|
|
3422
|
+
}
|
|
3423
|
+
/** Subscribe to events. */
|
|
3424
|
+
on(event, handler) {
|
|
3425
|
+
this.emitter.on(event, handler);
|
|
3426
|
+
return this;
|
|
3427
|
+
}
|
|
3428
|
+
/** Unsubscribe from events. */
|
|
3429
|
+
off(event, handler) {
|
|
3430
|
+
this.emitter.off(event, handler);
|
|
3431
|
+
return this;
|
|
3432
|
+
}
|
|
3433
|
+
/** Subscribe to an event once. */
|
|
3434
|
+
once(event, handler) {
|
|
3435
|
+
this.emitter.once(event, handler);
|
|
3436
|
+
return this;
|
|
3437
|
+
}
|
|
3438
|
+
/**
|
|
3439
|
+
* Connect to the browser server WebSocket and initialize the protocol.
|
|
3440
|
+
*/
|
|
3441
|
+
async init() {
|
|
3442
|
+
const transportOpts = {
|
|
3443
|
+
apiKey: this.opts.apiKey,
|
|
3444
|
+
serverUrl: this.opts.serverUrl,
|
|
3445
|
+
browser: this.opts.browser,
|
|
3446
|
+
url: this.opts.url,
|
|
3447
|
+
captcha: this.opts.captcha,
|
|
3448
|
+
stealthLevel: this.opts.stealth,
|
|
3449
|
+
connectTimeoutMs: this.opts.connectTimeoutMs,
|
|
3450
|
+
commandTimeoutMs: this.opts.commandTimeoutMs
|
|
3451
|
+
};
|
|
3452
|
+
this.transport = new Transport(transportOpts, this.emitter);
|
|
3453
|
+
await this.transport.connect();
|
|
3454
|
+
const activeBrowser = this.transport.browser;
|
|
3455
|
+
this.adapter = new ProtocolAdapter(this.transport, this.emitter, activeBrowser, {
|
|
3456
|
+
commandTimeoutMs: this.opts.commandTimeoutMs
|
|
3457
|
+
});
|
|
3458
|
+
await this.adapter.init();
|
|
3459
|
+
this._page = new SpiderPage(this.adapter);
|
|
3460
|
+
if (this.opts.smartRetry) {
|
|
3461
|
+
const retryOpts = {
|
|
3462
|
+
maxRetries: this.opts.maxRetries,
|
|
3463
|
+
transportOpts,
|
|
3464
|
+
emitter: this.emitter,
|
|
3465
|
+
maxStealthLevel: this.opts.maxStealthLevels,
|
|
3466
|
+
retryTimeoutMs: this.opts.retryTimeoutMs,
|
|
3467
|
+
commandTimeoutMs: this.opts.commandTimeoutMs
|
|
3468
|
+
};
|
|
3469
|
+
this.retryEngine = new RetryEngine(retryOpts);
|
|
3470
|
+
}
|
|
3471
|
+
this.currentUrl = this.opts.url;
|
|
3472
|
+
logger.info("SpiderBrowser initialized", { browser: activeBrowser });
|
|
3473
|
+
}
|
|
3474
|
+
/**
|
|
3475
|
+
* Execute an action with smart retry. On failure, classifies the error and
|
|
3476
|
+
* may switch browsers, reconnect, re-navigate, and retry.
|
|
3477
|
+
*/
|
|
3478
|
+
async withRetry(fn) {
|
|
3479
|
+
if (!this.retryEngine || !this.transport || !this.adapter) {
|
|
3480
|
+
return fn();
|
|
3481
|
+
}
|
|
3482
|
+
return this.retryEngine.execute(fn, {
|
|
3483
|
+
transport: this.transport,
|
|
3484
|
+
adapter: this.adapter,
|
|
3485
|
+
page: this._page,
|
|
3486
|
+
currentUrl: this.currentUrl,
|
|
3487
|
+
onAdapterChanged: (newAdapter) => {
|
|
3488
|
+
this.adapter = newAdapter;
|
|
3489
|
+
this._page._setAdapter(newAdapter);
|
|
3490
|
+
}
|
|
3491
|
+
});
|
|
3492
|
+
}
|
|
3493
|
+
// -------------------------------------------------------------------
|
|
3494
|
+
// Navigation (with retry)
|
|
3495
|
+
// -------------------------------------------------------------------
|
|
3496
|
+
/**
|
|
3497
|
+
* Navigate to a URL with smart retry.
|
|
3498
|
+
*
|
|
3499
|
+
* On ERR_ABORTED: closes the WebSocket, reconnects, and retries.
|
|
3500
|
+
* On bot detection: switches to a different browser and retries.
|
|
3501
|
+
*
|
|
3502
|
+
* Also updates `currentUrl` for subsequent retries on other operations.
|
|
3503
|
+
*/
|
|
3504
|
+
async goto(url) {
|
|
3505
|
+
this.currentUrl = url;
|
|
3506
|
+
await this.withRetry(async () => {
|
|
3507
|
+
await this._page.goto(url);
|
|
3508
|
+
});
|
|
3509
|
+
}
|
|
3510
|
+
// -------------------------------------------------------------------
|
|
3511
|
+
// AI Methods (require LLM config)
|
|
3512
|
+
// -------------------------------------------------------------------
|
|
3513
|
+
/**
|
|
3514
|
+
* Execute a single action from natural language.
|
|
3515
|
+
*
|
|
3516
|
+
* Example: `await browser.act('Click the login button')`
|
|
3517
|
+
*/
|
|
3518
|
+
async act(instruction) {
|
|
3519
|
+
this.requireLLM();
|
|
3520
|
+
await this.withRetry(() => act(this.adapter, this.llmProvider, instruction));
|
|
3521
|
+
}
|
|
3522
|
+
/**
|
|
3523
|
+
* Discover interactive elements on the page.
|
|
3524
|
+
*
|
|
3525
|
+
* Works WITHOUT an LLM — injects DOM traversal to collect elements.
|
|
3526
|
+
* When instruction is provided + LLM is configured, adds ranking/filtering.
|
|
3527
|
+
*/
|
|
3528
|
+
async observe(instruction) {
|
|
3529
|
+
return this.withRetry(
|
|
3530
|
+
() => observe(this.adapter, instruction, this.llmProvider ?? void 0)
|
|
3531
|
+
);
|
|
3532
|
+
}
|
|
3533
|
+
/**
|
|
3534
|
+
* Extract structured data from the page.
|
|
3535
|
+
*
|
|
3536
|
+
* Example: `await browser.extract('Product name and price', { schema: z.object({...}) })`
|
|
3537
|
+
*/
|
|
3538
|
+
async extract(instruction, options) {
|
|
3539
|
+
this.requireLLM();
|
|
3540
|
+
return this.withRetry(
|
|
3541
|
+
() => extract(this.adapter, this.llmProvider, instruction, options?.schema)
|
|
3542
|
+
);
|
|
3543
|
+
}
|
|
3544
|
+
/**
|
|
3545
|
+
* Create an autonomous agent that executes multi-step tasks.
|
|
3546
|
+
*
|
|
3547
|
+
* Uses the same action vocabulary and system prompt as Spider's
|
|
3548
|
+
* server-side captcha solver for consistent behavior.
|
|
3549
|
+
*/
|
|
3550
|
+
agent(options) {
|
|
3551
|
+
this.requireLLM();
|
|
3552
|
+
return new Agent(this.adapter, this.llmProvider, this.emitter, options);
|
|
3553
|
+
}
|
|
3554
|
+
/**
|
|
3555
|
+
* Close the connection and clean up resources.
|
|
3556
|
+
*/
|
|
3557
|
+
async close() {
|
|
3558
|
+
this.adapter?.destroy();
|
|
3559
|
+
this.transport?.close();
|
|
3560
|
+
this.emitter.removeAllListeners();
|
|
3561
|
+
this._page = null;
|
|
3562
|
+
this.adapter = null;
|
|
3563
|
+
this.transport = null;
|
|
3564
|
+
logger.info("SpiderBrowser closed");
|
|
3565
|
+
}
|
|
3566
|
+
requireLLM() {
|
|
3567
|
+
if (!this.llmProvider) {
|
|
3568
|
+
throw new Error(
|
|
3569
|
+
"LLM not configured. Pass `llm` option to SpiderBrowser constructor for AI methods."
|
|
3570
|
+
);
|
|
3571
|
+
}
|
|
3572
|
+
}
|
|
3573
|
+
};
|
|
3574
|
+
|
|
3575
|
+
// index.ts
|
|
3576
|
+
init_errors();
|
|
3577
|
+
export {
|
|
3578
|
+
Agent,
|
|
3579
|
+
AuthError,
|
|
3580
|
+
BROWSER_ROTATION,
|
|
3581
|
+
BackendUnavailableError,
|
|
3582
|
+
BiDiSession,
|
|
3583
|
+
BlockedError,
|
|
3584
|
+
BrowserSelector,
|
|
3585
|
+
CDPSession,
|
|
3586
|
+
ConnectionError,
|
|
3587
|
+
FailureTracker,
|
|
3588
|
+
LLMError,
|
|
3589
|
+
Logger,
|
|
3590
|
+
NavigationError,
|
|
3591
|
+
ProtocolAdapter,
|
|
3592
|
+
ProtocolError,
|
|
3593
|
+
RateLimitError,
|
|
3594
|
+
RetryEngine,
|
|
3595
|
+
SYSTEM_PROMPT,
|
|
3596
|
+
SpiderBrowser,
|
|
3597
|
+
SpiderError,
|
|
3598
|
+
SpiderEventEmitter,
|
|
3599
|
+
SpiderPage,
|
|
3600
|
+
TimeoutError,
|
|
3601
|
+
Transport,
|
|
3602
|
+
act,
|
|
3603
|
+
createProvider,
|
|
3604
|
+
extract,
|
|
3605
|
+
logger,
|
|
3606
|
+
observe,
|
|
3607
|
+
truncateHtml2 as truncateHtml
|
|
3608
|
+
};
|
|
3609
|
+
//# sourceMappingURL=index.js.map
|