lobster-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +389 -0
  2. package/dist/agent/core.js +1013 -0
  3. package/dist/agent/core.js.map +1 -0
  4. package/dist/agent/index.js +1027 -0
  5. package/dist/agent/index.js.map +1 -0
  6. package/dist/brain/index.js +60 -0
  7. package/dist/brain/index.js.map +1 -0
  8. package/dist/browser/dom/index.js +1096 -0
  9. package/dist/browser/dom/index.js.map +1 -0
  10. package/dist/browser/index.js +2034 -0
  11. package/dist/browser/index.js.map +1 -0
  12. package/dist/browser/manager.js +86 -0
  13. package/dist/browser/manager.js.map +1 -0
  14. package/dist/browser/page-adapter.js +1345 -0
  15. package/dist/browser/page-adapter.js.map +1 -0
  16. package/dist/cascade/index.js +138 -0
  17. package/dist/cascade/index.js.map +1 -0
  18. package/dist/config/index.js +110 -0
  19. package/dist/config/index.js.map +1 -0
  20. package/dist/config/schema.js +66 -0
  21. package/dist/config/schema.js.map +1 -0
  22. package/dist/discover/index.js +545 -0
  23. package/dist/discover/index.js.map +1 -0
  24. package/dist/index.js +5529 -0
  25. package/dist/index.js.map +1 -0
  26. package/dist/lib.js +4206 -0
  27. package/dist/lib.js.map +1 -0
  28. package/dist/llm/client.js +379 -0
  29. package/dist/llm/client.js.map +1 -0
  30. package/dist/llm/index.js +397 -0
  31. package/dist/llm/index.js.map +1 -0
  32. package/dist/llm/openai-client.js +214 -0
  33. package/dist/llm/openai-client.js.map +1 -0
  34. package/dist/output/index.js +93 -0
  35. package/dist/output/index.js.map +1 -0
  36. package/dist/pipeline/index.js +802 -0
  37. package/dist/pipeline/index.js.map +1 -0
  38. package/dist/router/decision.js +80 -0
  39. package/dist/router/decision.js.map +1 -0
  40. package/dist/router/index.js +3443 -0
  41. package/dist/router/index.js.map +1 -0
  42. package/dist/types/index.js +23 -0
  43. package/dist/types/index.js.map +1 -0
  44. package/logo.svg +11 -0
  45. package/package.json +65 -0
package/dist/lib.js ADDED
@@ -0,0 +1,4206 @@
1
+ // src/brain/index.ts
2
+ var CLASSIFIER_PROMPT = `You are an intent classifier for a web automation tool. Given a user's question about a webpage, decide what data sources are needed to answer it.
3
+
4
+ Respond ONLY with a JSON object:
5
+ {
6
+ "screenshot": true/false,
7
+ "markdown": true/false,
8
+ "forms": true/false,
9
+ "network": true/false,
10
+ "intent": "brief 5-word description"
11
+ }
12
+
13
+ Rules:
14
+ - screenshot=true ONLY when the answer requires SEEING the page (images, visual layout, colors, charts, what something looks like)
15
+ - markdown=true for ANY question about text content, meaning, topics, summaries
16
+ - forms=true ONLY when specifically asking about form fields or inputs
17
+ - network=true ONLY when asking about APIs, requests, or data fetching
18
+ - Most questions need only markdown=true`;
19
+ async function classifyIntent(prompt, pageTitle, llmCall) {
20
+ if (llmCall) {
21
+ try {
22
+ const systemPrompt = CLASSIFIER_PROMPT + `
23
+ Current page: "${pageTitle}"`;
24
+ const response = await llmCall(systemPrompt, prompt);
25
+ const jsonMatch = response.match(/\{[\s\S]*?\}/);
26
+ if (jsonMatch) {
27
+ const parsed = JSON.parse(jsonMatch[0]);
28
+ return {
29
+ screenshot: !!parsed.screenshot,
30
+ markdown: parsed.markdown !== false,
31
+ forms: !!parsed.forms,
32
+ network: !!parsed.network,
33
+ intent: parsed.intent || "",
34
+ source: "llm"
35
+ };
36
+ }
37
+ } catch {
38
+ }
39
+ }
40
+ return heuristicClassify(prompt);
41
+ }
42
+ function heuristicClassify(prompt) {
43
+ const lower = prompt.toLowerCase();
44
+ const screenshot = /look|see|visual|image|screenshot|screen|what('s| is) (on|showing|displayed|visible)|describe.*layout|picture|colour|color|design|ui |logo|icon|chart|graph|photo|video|banner|appear/i.test(lower);
45
+ const forms = /form|input|field|submit|login|sign.?in|password|checkbox|dropdown|select|textarea|search.?box|fill/i.test(lower);
46
+ const network = /api|network|request|fetch|xhr|endpoint|call.*server|data.*load/i.test(lower);
47
+ return {
48
+ screenshot,
49
+ markdown: true,
50
+ forms,
51
+ network,
52
+ intent: "heuristic classification",
53
+ source: "heuristic"
54
+ };
55
+ }
56
+
57
+ // src/llm/errors.ts
58
+ var InvokeError = class extends Error {
59
+ type;
60
+ retryable;
61
+ rawError;
62
+ rawResponse;
63
+ constructor(type, message, opts) {
64
+ super(message);
65
+ this.name = "InvokeError";
66
+ this.type = type;
67
+ this.retryable = opts?.retryable ?? isRetryable(type);
68
+ this.rawError = opts?.rawError;
69
+ this.rawResponse = opts?.rawResponse;
70
+ }
71
+ };
72
+ function isRetryable(type) {
73
+ switch (type) {
74
+ case "NETWORK_ERROR" /* NETWORK_ERROR */:
75
+ case "RATE_LIMIT" /* RATE_LIMIT */:
76
+ case "SERVER_ERROR" /* SERVER_ERROR */:
77
+ case "NO_TOOL_CALL" /* NO_TOOL_CALL */:
78
+ case "INVALID_TOOL_ARGS" /* INVALID_TOOL_ARGS */:
79
+ case "TOOL_EXECUTION_ERROR" /* TOOL_EXECUTION_ERROR */:
80
+ case "UNKNOWN" /* UNKNOWN */:
81
+ return true;
82
+ case "AUTH_ERROR" /* AUTH_ERROR */:
83
+ case "CONTEXT_LENGTH" /* CONTEXT_LENGTH */:
84
+ case "CONTENT_FILTER" /* CONTENT_FILTER */:
85
+ return false;
86
+ }
87
+ }
88
+
89
+ // src/llm/openai-client.ts
90
+ var OpenAIClient = class {
91
+ config;
92
+ constructor(config) {
93
+ this.config = config;
94
+ }
95
+ /**
96
+ * Build auth headers based on the provider.
97
+ * - OpenAI/Gemini/Ollama: Bearer token
98
+ * - Anthropic: x-api-key header + anthropic-version
99
+ */
100
+ buildHeaders() {
101
+ const headers = {
102
+ "Content-Type": "application/json"
103
+ };
104
+ if (!this.config.apiKey) return headers;
105
+ if (this.config.provider === "anthropic") {
106
+ headers["x-api-key"] = this.config.apiKey;
107
+ headers["anthropic-version"] = "2023-06-01";
108
+ } else {
109
+ headers["Authorization"] = `Bearer ${this.config.apiKey}`;
110
+ }
111
+ return headers;
112
+ }
113
+ /**
114
+ * Build the request body based on provider.
115
+ * Anthropic Messages API is different from OpenAI chat completions.
116
+ */
117
+ buildBody(messages, tools, opts) {
118
+ if (this.config.provider === "anthropic") {
119
+ return this.buildAnthropicBody(messages, tools, opts);
120
+ }
121
+ const body = {
122
+ model: this.config.model,
123
+ messages,
124
+ temperature: this.config.temperature ?? 0.1
125
+ };
126
+ if (tools && tools.length > 0) {
127
+ body.tools = tools;
128
+ body.parallel_tool_calls = false;
129
+ if (opts?.toolChoice) {
130
+ body.tool_choice = typeof opts.toolChoice === "string" ? opts.toolChoice : opts.toolChoice;
131
+ }
132
+ }
133
+ return { url: `${this.config.baseURL}/chat/completions`, body };
134
+ }
135
+ /**
136
+ * Build Anthropic Messages API request.
137
+ * Converts OpenAI-style messages/tools to Anthropic format.
138
+ */
139
+ buildAnthropicBody(messages, tools, opts) {
140
+ let system;
141
+ const anthropicMessages = [];
142
+ for (const msg of messages) {
143
+ if (msg.role === "system") {
144
+ system = msg.content;
145
+ } else {
146
+ anthropicMessages.push({
147
+ role: msg.role === "assistant" ? "assistant" : "user",
148
+ content: msg.content
149
+ });
150
+ }
151
+ }
152
+ const body = {
153
+ model: this.config.model,
154
+ messages: anthropicMessages,
155
+ max_tokens: 4096,
156
+ temperature: this.config.temperature ?? 0.1
157
+ };
158
+ if (system) body.system = system;
159
+ if (tools && tools.length > 0) {
160
+ body.tools = tools.map((t) => {
161
+ const fn = t.function;
162
+ return {
163
+ name: fn.name,
164
+ description: fn.description,
165
+ input_schema: fn.parameters
166
+ };
167
+ });
168
+ if (opts?.toolChoice) {
169
+ if (typeof opts.toolChoice === "string") {
170
+ body.tool_choice = opts.toolChoice === "required" ? { type: "any" } : { type: opts.toolChoice };
171
+ } else {
172
+ body.tool_choice = { type: "tool", name: opts.toolChoice.function.name };
173
+ }
174
+ }
175
+ }
176
+ return { url: `${this.config.baseURL}/messages`, body };
177
+ }
178
+ /**
179
+ * Parse Anthropic response into our unified format.
180
+ */
181
+ parseAnthropicResponse(json) {
182
+ const content = json.content;
183
+ if (!content || !Array.isArray(content)) {
184
+ throw new InvokeError("UNKNOWN" /* UNKNOWN */, "No content in Anthropic response", { rawResponse: json });
185
+ }
186
+ let textContent;
187
+ const toolCalls = [];
188
+ for (const block of content) {
189
+ if (block.type === "text") {
190
+ textContent = block.text;
191
+ } else if (block.type === "tool_use") {
192
+ toolCalls.push({
193
+ id: block.id,
194
+ type: "function",
195
+ function: {
196
+ name: block.name,
197
+ arguments: JSON.stringify(block.input)
198
+ }
199
+ });
200
+ }
201
+ }
202
+ const usage = json.usage;
203
+ return {
204
+ toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
205
+ content: textContent,
206
+ usage: usage ? {
207
+ promptTokens: usage.input_tokens ?? 0,
208
+ completionTokens: usage.output_tokens ?? 0,
209
+ totalTokens: (usage.input_tokens ?? 0) + (usage.output_tokens ?? 0)
210
+ } : void 0
211
+ };
212
+ }
213
+ async chatCompletion(messages, tools, opts) {
214
+ const { url, body } = this.buildBody(messages, tools, opts);
215
+ const headers = this.buildHeaders();
216
+ let response;
217
+ try {
218
+ response = await fetch(url, {
219
+ method: "POST",
220
+ headers,
221
+ body: JSON.stringify(body)
222
+ });
223
+ } catch (err) {
224
+ throw new InvokeError("NETWORK_ERROR" /* NETWORK_ERROR */, `Network error: ${err}`, { rawError: err });
225
+ }
226
+ if (!response.ok) {
227
+ const text = await response.text().catch(() => "");
228
+ if (response.status === 401) {
229
+ throw new InvokeError("AUTH_ERROR" /* AUTH_ERROR */, `Authentication failed: ${text}`, { retryable: false, rawResponse: text });
230
+ }
231
+ if (response.status === 429) {
232
+ throw new InvokeError("RATE_LIMIT" /* RATE_LIMIT */, `Rate limited: ${text}`, { rawResponse: text });
233
+ }
234
+ if (response.status >= 500) {
235
+ throw new InvokeError("SERVER_ERROR" /* SERVER_ERROR */, `Server error ${response.status}: ${text}`, { rawResponse: text });
236
+ }
237
+ throw new InvokeError("UNKNOWN" /* UNKNOWN */, `HTTP ${response.status}: ${text}`, { rawResponse: text });
238
+ }
239
+ const json = await response.json();
240
+ if (this.config.provider === "anthropic") {
241
+ return this.parseAnthropicResponse(json);
242
+ }
243
+ const choice = json.choices?.[0];
244
+ if (!choice) {
245
+ throw new InvokeError("UNKNOWN" /* UNKNOWN */, "No choices in response", { rawResponse: json });
246
+ }
247
+ const message = choice.message;
248
+ const finishReason = choice.finish_reason;
249
+ if (finishReason === "content_filter") {
250
+ throw new InvokeError("CONTENT_FILTER" /* CONTENT_FILTER */, "Content filtered", { retryable: false, rawResponse: json });
251
+ }
252
+ if (finishReason === "length") {
253
+ throw new InvokeError("CONTEXT_LENGTH" /* CONTEXT_LENGTH */, "Context length exceeded", { retryable: false, rawResponse: json });
254
+ }
255
+ const usage = json.usage;
256
+ return {
257
+ toolCalls: message.tool_calls,
258
+ content: message.content,
259
+ usage: usage ? {
260
+ promptTokens: usage.prompt_tokens ?? 0,
261
+ completionTokens: usage.completion_tokens ?? 0,
262
+ totalTokens: usage.total_tokens ?? 0
263
+ } : void 0
264
+ };
265
+ }
266
+ };
267
+
268
+ // src/llm/utils.ts
269
+ function zodToJsonSchema(schema) {
270
+ if ("_def" in schema) {
271
+ const def = schema._def;
272
+ const typeName = def.typeName;
273
+ if (typeName === "ZodObject") {
274
+ const shape = def.shape();
275
+ const properties = {};
276
+ const required = [];
277
+ for (const [key, value] of Object.entries(shape)) {
278
+ properties[key] = zodToJsonSchema(value);
279
+ if (!(value._def?.typeName === "ZodOptional")) {
280
+ required.push(key);
281
+ }
282
+ }
283
+ const result = { type: "object", properties };
284
+ if (required.length > 0) result.required = required;
285
+ if (def.description) result.description = def.description;
286
+ return result;
287
+ }
288
+ if (typeName === "ZodString") {
289
+ const result = { type: "string" };
290
+ if (def.description) result.description = def.description;
291
+ return result;
292
+ }
293
+ if (typeName === "ZodNumber") {
294
+ const result = { type: "number" };
295
+ if (def.description) result.description = def.description;
296
+ return result;
297
+ }
298
+ if (typeName === "ZodBoolean") {
299
+ const result = { type: "boolean" };
300
+ if (def.description) result.description = def.description;
301
+ return result;
302
+ }
303
+ if (typeName === "ZodEnum") {
304
+ return { type: "string", enum: def.values, ...def.description ? { description: def.description } : {} };
305
+ }
306
+ if (typeName === "ZodArray") {
307
+ return { type: "array", items: zodToJsonSchema(def.type), ...def.description ? { description: def.description } : {} };
308
+ }
309
+ if (typeName === "ZodOptional") {
310
+ return zodToJsonSchema(def.innerType);
311
+ }
312
+ if (typeName === "ZodDefault") {
313
+ const inner = zodToJsonSchema(def.innerType);
314
+ return { ...inner, default: def.defaultValue() };
315
+ }
316
+ if (typeName === "ZodUnion") {
317
+ return { oneOf: def.options.map((opt) => zodToJsonSchema(opt)) };
318
+ }
319
+ if (typeName === "ZodRecord") {
320
+ return { type: "object", additionalProperties: zodToJsonSchema(def.valueType) };
321
+ }
322
+ if (typeName === "ZodLiteral") {
323
+ return { const: def.value };
324
+ }
325
+ if (typeName === "ZodAny") {
326
+ return {};
327
+ }
328
+ }
329
+ return { type: "string" };
330
+ }
331
+ function zodToOpenAITool(name, description, schema) {
332
+ return {
333
+ type: "function",
334
+ function: {
335
+ name,
336
+ description,
337
+ parameters: zodToJsonSchema(schema)
338
+ }
339
+ };
340
+ }
341
+
342
+ // src/llm/client.ts
343
+ var LLM = class {
344
+ client;
345
+ config;
346
+ constructor(config) {
347
+ this.config = config;
348
+ this.client = new OpenAIClient({
349
+ baseURL: config.baseURL,
350
+ model: config.model,
351
+ apiKey: config.apiKey,
352
+ temperature: config.temperature,
353
+ provider: config.provider
354
+ });
355
+ }
356
+ async invoke(messages, tool, abortSignal) {
357
+ const openaiTool = zodToOpenAITool(tool.name, tool.description, tool.schema);
358
+ return this.withRetry(async () => {
359
+ if (abortSignal?.aborted) throw new Error("Aborted");
360
+ const response = await this.client.chatCompletion(
361
+ messages,
362
+ [openaiTool],
363
+ { toolChoice: { type: "function", function: { name: tool.name } } }
364
+ );
365
+ const toolCall = response.toolCalls?.[0];
366
+ if (!toolCall) {
367
+ if (response.content) {
368
+ const extracted = extractJsonFromString(response.content);
369
+ if (extracted) {
370
+ const args2 = typeof extracted === "string" ? JSON.parse(extracted) : extracted;
371
+ const result2 = await tool.execute(args2);
372
+ return {
373
+ toolCall: { name: tool.name, args: args2 },
374
+ toolResult: result2,
375
+ usage: response.usage
376
+ };
377
+ }
378
+ }
379
+ throw new InvokeError("NO_TOOL_CALL" /* NO_TOOL_CALL */, "No tool call in response");
380
+ }
381
+ let args;
382
+ try {
383
+ args = JSON.parse(toolCall.function.arguments);
384
+ } catch {
385
+ try {
386
+ args = JSON.parse(JSON.parse(toolCall.function.arguments));
387
+ } catch {
388
+ throw new InvokeError("INVALID_TOOL_ARGS" /* INVALID_TOOL_ARGS */, `Invalid JSON in tool args: ${toolCall.function.arguments}`);
389
+ }
390
+ }
391
+ let result;
392
+ try {
393
+ result = await tool.execute(args);
394
+ } catch (err) {
395
+ throw new InvokeError("TOOL_EXECUTION_ERROR" /* TOOL_EXECUTION_ERROR */, `Tool execution failed: ${err}`, { rawError: err });
396
+ }
397
+ return {
398
+ toolCall: { name: tool.name, args },
399
+ toolResult: result,
400
+ usage: response.usage
401
+ };
402
+ });
403
+ }
404
+ async withRetry(fn) {
405
+ const maxRetries = this.config.maxRetries ?? 3;
406
+ let lastError;
407
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
408
+ try {
409
+ return await fn();
410
+ } catch (err) {
411
+ lastError = err;
412
+ if (err instanceof InvokeError && !err.retryable) throw err;
413
+ if (err instanceof Error && err.name === "AbortError") throw err;
414
+ if (attempt < maxRetries) {
415
+ await new Promise((r) => setTimeout(r, 100 * (attempt + 1)));
416
+ }
417
+ }
418
+ }
419
+ throw lastError;
420
+ }
421
+ };
422
+ function extractJsonFromString(str) {
423
+ const start = str.indexOf("{");
424
+ const end = str.lastIndexOf("}");
425
+ if (start === -1 || end === -1 || end <= start) return null;
426
+ try {
427
+ return JSON.parse(str.slice(start, end + 1));
428
+ } catch {
429
+ return null;
430
+ }
431
+ }
432
+
433
+ // src/browser/manager.ts
434
+ import puppeteer from "puppeteer-core";
435
+ import { existsSync } from "fs";
436
+
437
+ // src/utils/logger.ts
438
+ import chalk from "chalk";
439
+ var log = {
440
+ info: (msg) => console.log(chalk.blue("\u2139"), msg),
441
+ success: (msg) => console.log(chalk.green("\u2713"), msg),
442
+ warn: (msg) => console.log(chalk.yellow("\u26A0"), msg),
443
+ error: (msg) => console.error(chalk.red("\u2717"), msg),
444
+ debug: (msg) => {
445
+ if (process.env.LOBSTER_DEBUG) console.log(chalk.gray("\u22EF"), msg);
446
+ },
447
+ step: (n, msg) => console.log(chalk.cyan(`[${n}]`), msg),
448
+ dim: (msg) => console.log(chalk.dim(msg))
449
+ };
450
+
451
+ // src/browser/manager.ts
452
+ var BrowserManager = class {
453
+ browser = null;
454
+ config;
455
+ constructor(config = {}) {
456
+ this.config = config;
457
+ }
458
+ async connect() {
459
+ if (this.browser?.connected) return this.browser;
460
+ if (this.config.cdpEndpoint) {
461
+ log.debug(`Connecting to CDP endpoint: ${this.config.cdpEndpoint}`);
462
+ this.browser = await puppeteer.connect({
463
+ browserWSEndpoint: this.config.cdpEndpoint
464
+ });
465
+ return this.browser;
466
+ }
467
+ const executablePath = this.config.executablePath || findChrome();
468
+ if (!executablePath) {
469
+ throw new Error(
470
+ "Chrome/Chromium not found. Set LOBSTER_BROWSER_PATH or config browser.executablePath"
471
+ );
472
+ }
473
+ log.debug(`Launching Chrome: ${executablePath}`);
474
+ this.browser = await puppeteer.launch({
475
+ executablePath,
476
+ headless: this.config.headless ?? true,
477
+ args: [
478
+ "--no-sandbox",
479
+ "--disable-setuid-sandbox",
480
+ "--disable-dev-shm-usage",
481
+ "--disable-gpu"
482
+ ]
483
+ });
484
+ return this.browser;
485
+ }
486
+ async newPage() {
487
+ const browser = await this.connect();
488
+ return browser.newPage();
489
+ }
490
+ async close() {
491
+ if (this.browser) {
492
+ await this.browser.close().catch(() => {
493
+ });
494
+ this.browser = null;
495
+ }
496
+ }
497
+ };
498
+ function findChrome() {
499
+ const paths = process.platform === "darwin" ? [
500
+ "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
501
+ "/Applications/Chromium.app/Contents/MacOS/Chromium",
502
+ "/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary"
503
+ ] : process.platform === "win32" ? [
504
+ "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
505
+ "C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe"
506
+ ] : [
507
+ "/usr/bin/google-chrome",
508
+ "/usr/bin/google-chrome-stable",
509
+ "/usr/bin/chromium-browser",
510
+ "/usr/bin/chromium",
511
+ "/snap/bin/chromium"
512
+ ];
513
+ return paths.find((p) => existsSync(p));
514
+ }
515
+
516
+ // src/browser/dom/flat-tree.ts
517
+ var FLAT_TREE_SCRIPT = `
518
+ (() => {
519
+ const INTERACTIVE_TAGS = new Set([
520
+ 'a', 'button', 'input', 'select', 'textarea', 'details', 'summary',
521
+ 'label', 'option', 'fieldset', 'legend',
522
+ ]);
523
+
524
+ const INTERACTIVE_ROLES = new Set([
525
+ 'button', 'link', 'textbox', 'checkbox', 'radio', 'combobox',
526
+ 'listbox', 'menu', 'menuitem', 'tab', 'switch', 'slider',
527
+ 'searchbox', 'spinbutton', 'option', 'menuitemcheckbox', 'menuitemradio',
528
+ ]);
529
+
530
+ const ATTR_WHITELIST = [
531
+ 'type', 'role', 'aria-label', 'aria-expanded', 'aria-selected',
532
+ 'aria-checked', 'aria-disabled', 'placeholder', 'title', 'href',
533
+ 'value', 'name', 'alt', 'src',
534
+ ];
535
+
536
+ let highlightIndex = 0;
537
+ const nodes = {};
538
+ const selectorMap = {};
539
+
540
+ function isVisible(el) {
541
+ if (el.offsetWidth === 0 && el.offsetHeight === 0) return false;
542
+ const style = getComputedStyle(el);
543
+ if (style.display === 'none' || style.visibility === 'hidden' || style.opacity === '0') return false;
544
+ return true;
545
+ }
546
+
547
+ function isInteractive(el) {
548
+ const tag = el.tagName.toLowerCase();
549
+ if (INTERACTIVE_TAGS.has(tag)) return true;
550
+ const role = el.getAttribute('role');
551
+ if (role && INTERACTIVE_ROLES.has(role)) return true;
552
+ if (el.getAttribute('contenteditable') === 'true') return true;
553
+ if (el.getAttribute('tabindex') !== null && parseInt(el.getAttribute('tabindex')) >= 0) return true;
554
+ if (el.onclick || el.getAttribute('onclick')) return true;
555
+ return false;
556
+ }
557
+
558
+ function getAttributes(el) {
559
+ const attrs = {};
560
+ for (const attr of ATTR_WHITELIST) {
561
+ const val = el.getAttribute(attr);
562
+ if (val !== null && val !== '') attrs[attr] = val;
563
+ }
564
+ return attrs;
565
+ }
566
+
567
+ function getScrollable(el) {
568
+ const style = getComputedStyle(el);
569
+ const overflowY = style.overflowY;
570
+ const overflowX = style.overflowX;
571
+ const isScrollableY = (overflowY === 'auto' || overflowY === 'scroll') && el.scrollHeight > el.clientHeight;
572
+ const isScrollableX = (overflowX === 'auto' || overflowX === 'scroll') && el.scrollWidth > el.clientWidth;
573
+ if (!isScrollableY && !isScrollableX) return null;
574
+ return {
575
+ left: el.scrollLeft,
576
+ top: el.scrollTop,
577
+ right: el.scrollWidth - el.clientWidth - el.scrollLeft,
578
+ bottom: el.scrollHeight - el.clientHeight - el.scrollTop,
579
+ };
580
+ }
581
+
582
+ function walk(el, parentId) {
583
+ if (!el || el.nodeType === 8) return; // skip comments
584
+
585
+ if (el.nodeType === 3) { // text node
586
+ const text = el.textContent.trim();
587
+ if (!text) return;
588
+ const id = 'text_' + Math.random().toString(36).slice(2, 8);
589
+ nodes[id] = { id, tagName: '#text', text, parentId };
590
+ if (parentId && nodes[parentId]) {
591
+ nodes[parentId].children = nodes[parentId].children || [];
592
+ nodes[parentId].children.push(id);
593
+ }
594
+ return;
595
+ }
596
+
597
+ if (el.nodeType !== 1) return; // only elements
598
+
599
+ const tag = el.tagName.toLowerCase();
600
+ if (['script', 'style', 'noscript', 'svg', 'path'].includes(tag)) return;
601
+ if (!isVisible(el)) return;
602
+
603
+ const id = tag + '_' + Math.random().toString(36).slice(2, 8);
604
+ const interactive = isInteractive(el);
605
+ const node = {
606
+ id,
607
+ tagName: tag,
608
+ attributes: getAttributes(el),
609
+ parentId,
610
+ children: [],
611
+ isInteractive: interactive,
612
+ };
613
+
614
+ if (interactive) {
615
+ node.highlightIndex = highlightIndex;
616
+ selectorMap[highlightIndex] = id;
617
+ highlightIndex++;
618
+ }
619
+
620
+ const scrollable = getScrollable(el);
621
+ if (scrollable) node.scrollable = scrollable;
622
+
623
+ const text = [];
624
+ for (const child of el.childNodes) {
625
+ if (child.nodeType === 3 && child.textContent.trim()) {
626
+ text.push(child.textContent.trim());
627
+ }
628
+ }
629
+ if (text.length > 0) node.text = text.join(' ').slice(0, 200);
630
+
631
+ nodes[id] = node;
632
+
633
+ if (parentId && nodes[parentId]) {
634
+ nodes[parentId].children.push(id);
635
+ }
636
+
637
+ for (const child of el.children) {
638
+ walk(child, id);
639
+ }
640
+ }
641
+
642
+ const rootId = 'root';
643
+ nodes[rootId] = { id: rootId, tagName: 'body', children: [], attributes: {} };
644
+ for (const child of document.body.children) {
645
+ walk(child, rootId);
646
+ }
647
+
648
+ return { rootId, map: nodes, selectorMap };
649
+ })()
650
+ `;
651
+ function flatTreeToString(tree) {
652
+ const lines = [];
653
+ function walk(nodeId, depth) {
654
+ const node = tree.map[nodeId];
655
+ if (!node) return;
656
+ const indent = " ".repeat(depth);
657
+ if (node.tagName === "#text") {
658
+ if (node.text) lines.push(`${indent}${node.text}`);
659
+ return;
660
+ }
661
+ const attrs = node.attributes || {};
662
+ const attrStr = Object.entries(attrs).map(([k, v]) => v === "" ? k : `${k}="${v}"`).join(" ");
663
+ const prefix = node.highlightIndex !== void 0 ? `[${node.highlightIndex}]` : "";
664
+ const scrollInfo = node.scrollable ? ` |scroll: ${Math.round(node.scrollable.top)}px up, ${Math.round(node.scrollable.bottom)}px down|` : "";
665
+ const text = node.text || "";
666
+ const tag = node.tagName;
667
+ if (prefix || text || node.children?.length > 0) {
668
+ const opening = `${indent}${prefix}<${tag}${attrStr ? " " + attrStr : ""}${scrollInfo}>`;
669
+ if (!node.children?.length || node.children.length === 0 && text) {
670
+ lines.push(`${opening}${text}</>`);
671
+ } else {
672
+ lines.push(`${opening}${text}`);
673
+ for (const childId of node.children || []) {
674
+ walk(childId, depth + 1);
675
+ }
676
+ }
677
+ } else {
678
+ for (const childId of node.children || []) {
679
+ walk(childId, depth);
680
+ }
681
+ }
682
+ }
683
+ walk(tree.rootId, 0);
684
+ return lines.join("\n");
685
+ }
686
+
687
+ // src/browser/dom/snapshot.ts
688
+ function buildSnapshotScript(previousHashes) {
689
+ return SNAPSHOT_SCRIPT_FN(previousHashes || []);
690
+ }
691
+ function SNAPSHOT_SCRIPT_FN(prevHashes) {
692
+ return `
693
+ (() => {
694
+ let idx = 0;
695
+ const __prevHashes = new Set(${JSON.stringify(prevHashes)});
696
+ const __currentHashes = [];
697
+ `;
698
+ }
699
+ var SNAPSHOT_SCRIPT = `
700
+ (() => {
701
+ let idx = 0;
702
+ const __prevHashes = (window.__lobster_prev_hashes) ? new Set(window.__lobster_prev_hashes) : null;
703
+ const __currentHashes = [];
704
+
705
+ const SKIP_TAGS = new Set([
706
+ 'script','style','noscript','svg','path','meta','link','head',
707
+ 'template','slot','colgroup','col',
708
+ ]);
709
+
710
+ const INTERACTIVE_TAGS = new Set([
711
+ 'a','button','input','select','textarea','details','summary','label',
712
+ ]);
713
+
714
+ const INTERACTIVE_ROLES = new Set([
715
+ 'button','link','textbox','checkbox','radio','combobox','listbox',
716
+ 'menu','menuitem','tab','switch','slider','searchbox','spinbutton',
717
+ 'option','menuitemcheckbox','menuitemradio','treeitem',
718
+ ]);
719
+
720
+ const ATTR_WHITELIST = [
721
+ 'type','role','aria-label','aria-expanded','aria-selected','aria-checked',
722
+ 'aria-disabled','aria-haspopup','aria-pressed','placeholder','title',
723
+ 'href','value','name','alt','src','action','method','for',
724
+ 'data-testid','data-id','contenteditable','tabindex',
725
+ ];
726
+
727
+ const AD_PATTERNS = /ad[-_]?banner|ad[-_]?container|google[-_]?ad|doubleclick|adsbygoogle|sponsored|^ad$/i;
728
+
729
+ // \u2500\u2500 Stage 1: Visibility check \u2500\u2500
730
+ function isVisible(el) {
731
+ if (el.offsetWidth === 0 && el.offsetHeight === 0 && el.tagName !== 'INPUT') return false;
732
+ const s = getComputedStyle(el);
733
+ if (s.display === 'none') return false;
734
+ if (s.visibility === 'hidden' || s.visibility === 'collapse') return false;
735
+ if (s.opacity === '0') return false;
736
+ if (s.clipPath === 'inset(100%)') return false;
737
+ // Check for offscreen positioning
738
+ const rect = el.getBoundingClientRect();
739
+ if (rect.right < 0 || rect.bottom < 0) return false;
740
+ return true;
741
+ }
742
+
743
+ // \u2500\u2500 Stage 2: Interactive detection \u2500\u2500
744
+ function isInteractive(el) {
745
+ const tag = el.tagName.toLowerCase();
746
+ if (INTERACTIVE_TAGS.has(tag)) {
747
+ // Skip disabled elements
748
+ if (el.disabled) return false;
749
+ // Skip hidden inputs
750
+ if (tag === 'input' && el.type === 'hidden') return false;
751
+ return true;
752
+ }
753
+ const role = el.getAttribute('role');
754
+ if (role && INTERACTIVE_ROLES.has(role)) return true;
755
+ if (el.contentEditable === 'true') return true;
756
+ if (el.tabIndex >= 0 && el.getAttribute('tabindex') !== null) return true;
757
+ if (el.onclick) return true;
758
+ return false;
759
+ }
760
+
761
+ // \u2500\u2500 Stage 8: Attribute filtering \u2500\u2500
762
+ function getAttrs(el) {
763
+ const parts = [];
764
+ for (const name of ATTR_WHITELIST) {
765
+ let v = el.getAttribute(name);
766
+ if (v === null || v === '') continue;
767
+ // Truncate long values
768
+ if (v.length > 80) v = v.slice(0, 77) + '...';
769
+ // Skip href="javascript:..."
770
+ if (name === 'href' && v.startsWith('javascript:')) continue;
771
+ parts.push(name + '=' + v);
772
+ }
773
+ return parts.length ? ' ' + parts.join(' ') : '';
774
+ }
775
+
776
+ // \u2500\u2500 Stage 9: Ad filtering \u2500\u2500
777
+ function isAd(el) {
778
+ const id = el.id || '';
779
+ const cls = el.className || '';
780
+ if (typeof cls === 'string' && AD_PATTERNS.test(cls)) return true;
781
+ if (AD_PATTERNS.test(id)) return true;
782
+ if (el.tagName === 'IFRAME' && AD_PATTERNS.test(el.src || '')) return true;
783
+ return false;
784
+ }
785
+
786
+ // \u2500\u2500 Stage 10: Scroll info \u2500\u2500
787
+ function getScrollInfo(el) {
788
+ const s = getComputedStyle(el);
789
+ const overflowY = s.overflowY;
790
+ const overflowX = s.overflowX;
791
+ const scrollableY = (overflowY === 'auto' || overflowY === 'scroll') && el.scrollHeight > el.clientHeight;
792
+ const scrollableX = (overflowX === 'auto' || overflowX === 'scroll') && el.scrollWidth > el.clientWidth;
793
+ if (!scrollableY && !scrollableX) return '';
794
+
795
+ const parts = [];
796
+ if (scrollableY) {
797
+ const up = Math.round(el.scrollTop);
798
+ const down = Math.round(el.scrollHeight - el.clientHeight - el.scrollTop);
799
+ if (up > 0) parts.push(up + 'px up');
800
+ if (down > 0) parts.push(down + 'px down');
801
+ }
802
+ if (scrollableX) {
803
+ const left = Math.round(el.scrollLeft);
804
+ const right = Math.round(el.scrollWidth - el.clientWidth - el.scrollLeft);
805
+ if (left > 0) parts.push(left + 'px left');
806
+ if (right > 0) parts.push(right + 'px right');
807
+ }
808
+ return parts.length ? ' |scroll: ' + parts.join(', ') + '|' : '';
809
+ }
810
+
811
+ // \u2500\u2500 Stage 6: Bounding-box dedup \u2500\u2500
812
+ // If a parent and child are both interactive and have ~same bounding box,
813
+ // skip the parent (e.g., <a><button>Click</button></a>)
814
+ function isWrappingInteractive(el) {
815
+ if (!isInteractive(el)) return false;
816
+ const rect = el.getBoundingClientRect();
817
+ if (rect.width === 0 || rect.height === 0) return false;
818
+ for (const child of el.children) {
819
+ if (!isInteractive(child)) continue;
820
+ const cr = child.getBoundingClientRect();
821
+ const overlapX = Math.min(rect.right, cr.right) - Math.max(rect.left, cr.left);
822
+ const overlapY = Math.min(rect.bottom, cr.bottom) - Math.max(rect.top, cr.top);
823
+ const overlapArea = Math.max(0, overlapX) * Math.max(0, overlapY);
824
+ const parentArea = rect.width * rect.height;
825
+ if (parentArea > 0 && overlapArea / parentArea > 0.85) return true;
826
+ }
827
+ return false;
828
+ }
829
+
830
+ // \u2500\u2500 Stage 7: Occlusion detection \u2500\u2500
831
+ function isOccluded(el) {
832
+ const rect = el.getBoundingClientRect();
833
+ if (rect.width === 0 || rect.height === 0) return false;
834
+ const cx = rect.left + rect.width / 2;
835
+ const cy = rect.top + rect.height / 2;
836
+ const topEl = document.elementFromPoint(cx, cy);
837
+ if (!topEl) return false;
838
+ if (topEl === el || el.contains(topEl) || topEl.contains(el)) return false;
839
+ // Check z-index \u2014 if top element is a modal/overlay, mark as occluded
840
+ const topZ = parseInt(getComputedStyle(topEl).zIndex) || 0;
841
+ const elZ = parseInt(getComputedStyle(el).zIndex) || 0;
842
+ return topZ > elZ + 10;
843
+ }
844
+
845
+ // \u2500\u2500 Stage 5: Iframe content extraction \u2500\u2500
846
+ function getIframeContent(iframe, depth, maxDepth) {
847
+ try {
848
+ const doc = iframe.contentDocument;
849
+ if (!doc || !doc.body) return '';
850
+ return '\\n' + walkNode(doc.body, depth, maxDepth);
851
+ } catch { return ''; }
852
+ }
853
+
854
+ // \u2500\u2500 Stage 4: Shadow DOM traversal \u2500\u2500
855
+ function getShadowContent(el, depth, maxDepth) {
856
+ if (!el.shadowRoot) return '';
857
+ let out = '';
858
+ for (const child of el.shadowRoot.childNodes) {
859
+ out += walkNode(child, depth, maxDepth);
860
+ }
861
+ return out;
862
+ }
863
+
864
+ // \u2500\u2500 Input value hint \u2500\u2500
865
+ function getInputHint(el) {
866
+ const tag = el.tagName.toLowerCase();
867
+ if (tag === 'input') {
868
+ const type = el.type || 'text';
869
+ const val = el.value || '';
870
+ const checked = el.checked;
871
+ if (type === 'checkbox' || type === 'radio') {
872
+ return checked ? ' [checked]' : ' [unchecked]';
873
+ }
874
+ if (val) return ' value="' + val.slice(0, 50) + '"';
875
+ }
876
+ if (tag === 'textarea' && el.value) {
877
+ return ' value="' + el.value.slice(0, 50) + '"';
878
+ }
879
+ if (tag === 'select' && el.selectedOptions?.length) {
880
+ return ' selected="' + el.selectedOptions[0].text.slice(0, 40) + '"';
881
+ }
882
+ return '';
883
+ }
884
+
885
+ const MAX_DEPTH = 25;
886
+ const MAX_TEXT = 150;
887
+
888
+ function walkNode(node, depth, maxDepth) {
889
+ if (depth > maxDepth) return '';
890
+ if (!node) return '';
891
+
892
+ // Text node
893
+ if (node.nodeType === 3) {
894
+ const t = node.textContent.trim();
895
+ if (!t) return '';
896
+ const text = t.length > MAX_TEXT ? t.slice(0, MAX_TEXT) + '...' : t;
897
+ return ' '.repeat(depth) + text + '\\n';
898
+ }
899
+
900
+ // Comment node \u2014 skip
901
+ if (node.nodeType === 8) return '';
902
+
903
+ // Only element nodes from here
904
+ if (node.nodeType !== 1) return '';
905
+
906
+ const el = node;
907
+ const tag = el.tagName.toLowerCase();
908
+
909
+ // \u2500\u2500 Stage 3: Skip tags \u2500\u2500
910
+ if (SKIP_TAGS.has(tag)) return '';
911
+
912
+ // \u2500\u2500 Stage 2: Visibility \u2500\u2500
913
+ if (!isVisible(el)) return '';
914
+
915
+ // \u2500\u2500 Stage 9: Ad filtering \u2500\u2500
916
+ if (isAd(el)) return '';
917
+
918
+ // \u2500\u2500 Stage 6: Bbox dedup \u2014 skip wrapping interactive parent \u2500\u2500
919
+ const skipSelf = isWrappingInteractive(el);
920
+
921
+ const indent = ' '.repeat(depth);
922
+ const inter = !skipSelf && isInteractive(el);
923
+ let prefix = '';
924
+ if (inter) {
925
+ const thisIdx = idx++;
926
+ // Hash: tag + text + key attributes for diff tracking
927
+ const hashText = tag + ':' + (el.textContent || '').trim().slice(0, 40) + ':' + (el.getAttribute('href') || '') + ':' + (el.getAttribute('aria-label') || '');
928
+ __currentHashes.push(hashText);
929
+ const isNew = __prevHashes && __prevHashes.size > 0 && !__prevHashes.has(hashText);
930
+ prefix = isNew ? '*[' + thisIdx + ']' : '[' + thisIdx + ']';
931
+ }
932
+
933
+ // \u2500\u2500 Stage 11: Annotate with data-ref \u2500\u2500
934
+ if (inter) {
935
+ try { el.dataset.ref = String(idx - 1); } catch {}
936
+ }
937
+
938
+ // \u2500\u2500 Stage 7: Occlusion check for interactive elements \u2500\u2500
939
+ if (inter && isOccluded(el)) {
940
+ // Still include but mark as occluded
941
+ // (agent needs to know element exists but may need to scroll/close modal)
942
+ }
943
+
944
+ const a = getAttrs(el);
945
+ const scrollInfo = getScrollInfo(el);
946
+ const inputHint = inter ? getInputHint(el) : '';
947
+
948
+ // Leaf text extraction
949
+ let leafText = '';
950
+ if (el.childNodes.length === 1 && el.childNodes[0].nodeType === 3) {
951
+ const t = el.childNodes[0].textContent.trim();
952
+ if (t) leafText = t.length > MAX_TEXT ? t.slice(0, MAX_TEXT) + '...' : t;
953
+ }
954
+
955
+ // \u2500\u2500 Stage 5: Iframe \u2500\u2500
956
+ if (tag === 'iframe') {
957
+ const iframeContent = getIframeContent(el, depth + 1, maxDepth);
958
+ if (iframeContent) {
959
+ return indent + prefix + '<iframe' + a + '>\\n' + iframeContent;
960
+ }
961
+ return '';
962
+ }
963
+
964
+ // Build output
965
+ let out = '';
966
+
967
+ if (skipSelf) {
968
+ // Skip self but render children
969
+ for (const c of el.childNodes) out += walkNode(c, depth, maxDepth);
970
+ out += getShadowContent(el, depth, maxDepth);
971
+ return out;
972
+ }
973
+
974
+ if (inter || leafText || el.children.length === 0) {
975
+ if (leafText) {
976
+ out = indent + prefix + '<' + tag + a + scrollInfo + inputHint + '>' + leafText + '</' + tag + '>\\n';
977
+ } else {
978
+ out = indent + prefix + '<' + tag + a + scrollInfo + inputHint + '>\\n';
979
+ for (const c of el.childNodes) out += walkNode(c, depth + 1, maxDepth);
980
+ out += getShadowContent(el, depth + 1, maxDepth);
981
+ }
982
+ } else {
983
+ // Non-interactive container \u2014 flatten depth if no useful info
984
+ if (scrollInfo) {
985
+ out = indent + '<' + tag + scrollInfo + '>\\n';
986
+ for (const c of el.childNodes) out += walkNode(c, depth + 1, maxDepth);
987
+ out += getShadowContent(el, depth + 1, maxDepth);
988
+ } else {
989
+ for (const c of el.childNodes) out += walkNode(c, depth, maxDepth);
990
+ out += getShadowContent(el, depth, maxDepth);
991
+ }
992
+ }
993
+
994
+ return out;
995
+ }
996
+
997
+ // \u2500\u2500 Page-level scroll info header \u2500\u2500
998
+ const scrollY = window.scrollY;
999
+ const scrollMax = document.documentElement.scrollHeight - window.innerHeight;
1000
+ const scrollPct = scrollMax > 0 ? Math.round((scrollY / scrollMax) * 100) : 0;
1001
+ const vpW = window.innerWidth;
1002
+ const vpH = window.innerHeight;
1003
+ const pageH = document.documentElement.scrollHeight;
1004
+
1005
+ let header = '';
1006
+ header += 'viewport: ' + vpW + 'x' + vpH + ' | page_height: ' + pageH + 'px';
1007
+ header += ' | scroll: ' + scrollPct + '%';
1008
+ if (scrollY > 50) header += ' (' + Math.round(scrollY) + 'px from top)';
1009
+ if (scrollMax - scrollY > 50) header += ' (' + Math.round(scrollMax - scrollY) + 'px more below)';
1010
+ header += '\\n---\\n';
1011
+
1012
+ // Store current hashes for next diff comparison
1013
+ window.__lobster_prev_hashes = __currentHashes;
1014
+
1015
+ return header + walkNode(document.body, 0, MAX_DEPTH);
1016
+ })()
1017
+ `;
1018
+
1019
+ // src/browser/dom/semantic-tree.ts
1020
+ var SEMANTIC_TREE_SCRIPT = `
1021
+ (() => {
1022
+ const SKIP = new Set(['script','style','noscript','svg','head','meta','link','template']);
1023
+
1024
+ const ROLE_MAP = {
1025
+ a: 'link', button: 'button', input: 'textbox', select: 'combobox',
1026
+ textarea: 'textbox', h1: 'heading', h2: 'heading', h3: 'heading',
1027
+ h4: 'heading', h5: 'heading', h6: 'heading', nav: 'navigation',
1028
+ main: 'main', header: 'banner', footer: 'contentinfo', aside: 'complementary',
1029
+ form: 'form', table: 'table', img: 'img', ul: 'list', ol: 'list', li: 'listitem',
1030
+ section: 'region', article: 'article', dialog: 'dialog', details: 'group',
1031
+ summary: 'button', progress: 'progressbar', meter: 'meter', output: 'status',
1032
+ label: 'label', legend: 'legend', fieldset: 'group', option: 'option',
1033
+ tr: 'row', td: 'cell', th: 'columnheader', caption: 'caption',
1034
+ };
1035
+
1036
+ const INTERACTIVE_ROLES = new Set([
1037
+ 'button','link','textbox','checkbox','radio','combobox','listbox',
1038
+ 'menu','menuitem','tab','switch','slider','searchbox','spinbutton',
1039
+ 'option','menuitemcheckbox','menuitemradio','treeitem',
1040
+ ]);
1041
+
1042
+ // \u2500\u2500 W3C Accessible Name Algorithm (simplified) \u2500\u2500
1043
+ function getAccessibleName(el) {
1044
+ // 1. aria-labelledby (highest priority)
1045
+ const labelledBy = el.getAttribute('aria-labelledby');
1046
+ if (labelledBy) {
1047
+ const ids = labelledBy.split(/\\s+/);
1048
+ const parts = ids.map(id => {
1049
+ const ref = document.getElementById(id);
1050
+ return ref ? ref.textContent.trim() : '';
1051
+ }).filter(Boolean);
1052
+ if (parts.length > 0) return parts.join(' ').slice(0, 120);
1053
+ }
1054
+
1055
+ // 2. aria-label
1056
+ const ariaLabel = el.getAttribute('aria-label');
1057
+ if (ariaLabel) return ariaLabel.slice(0, 120);
1058
+
1059
+ // 3. alt (for images)
1060
+ const alt = el.getAttribute('alt');
1061
+ if (alt) return alt.slice(0, 120);
1062
+
1063
+ // 4. title
1064
+ const title = el.getAttribute('title');
1065
+ if (title) return title.slice(0, 120);
1066
+
1067
+ // 5. placeholder (for inputs)
1068
+ const placeholder = el.getAttribute('placeholder');
1069
+ if (placeholder) return placeholder.slice(0, 120);
1070
+
1071
+ // 6. value (for buttons)
1072
+ if (el.tagName === 'INPUT' && (el.type === 'submit' || el.type === 'button')) {
1073
+ const val = el.getAttribute('value');
1074
+ if (val) return val.slice(0, 120);
1075
+ }
1076
+
1077
+ // 7. Associated label
1078
+ if (el.id) {
1079
+ const label = document.querySelector('label[for="' + el.id + '"]');
1080
+ if (label) return label.textContent.trim().slice(0, 120);
1081
+ }
1082
+
1083
+ // 8. Direct text content (only for leaf-ish elements)
1084
+ if (el.children.length <= 2) {
1085
+ const text = el.textContent.trim();
1086
+ if (text && text.length < 120) return text;
1087
+ }
1088
+
1089
+ return '';
1090
+ }
1091
+
1092
+ // \u2500\u2500 XPath generation \u2500\u2500
1093
+ function getXPath(el) {
1094
+ const parts = [];
1095
+ let current = el;
1096
+ while (current && current.nodeType === 1) {
1097
+ let index = 1;
1098
+ let sibling = current.previousElementSibling;
1099
+ while (sibling) {
1100
+ if (sibling.tagName === current.tagName) index++;
1101
+ sibling = sibling.previousElementSibling;
1102
+ }
1103
+ const tag = current.tagName.toLowerCase();
1104
+ parts.unshift(tag + '[' + index + ']');
1105
+ current = current.parentElement;
1106
+ }
1107
+ return '/' + parts.join('/');
1108
+ }
1109
+
1110
+ // \u2500\u2500 Interactivity classification \u2500\u2500
1111
+ function classifyInteractivity(el) {
1112
+ const types = [];
1113
+ const tag = el.tagName.toLowerCase();
1114
+
1115
+ // Native
1116
+ if (['a','button','input','select','textarea','details','summary'].includes(tag)) {
1117
+ if (tag === 'a' && !el.href) {} // anchor without href is not interactive
1118
+ else if (tag === 'input' && el.type === 'hidden') {} // hidden inputs
1119
+ else types.push('native');
1120
+ }
1121
+
1122
+ // ARIA role
1123
+ const role = el.getAttribute('role');
1124
+ if (role && INTERACTIVE_ROLES.has(role)) types.push('aria');
1125
+
1126
+ // Contenteditable
1127
+ if (el.contentEditable === 'true') types.push('contenteditable');
1128
+
1129
+ // Focusable
1130
+ if (el.tabIndex >= 0 && el.getAttribute('tabindex') !== null) types.push('focusable');
1131
+
1132
+ // Event listeners (check onclick and common inline handlers)
1133
+ if (el.onclick || el.onmousedown || el.onkeydown || el.onkeypress ||
1134
+ el.getAttribute('onclick') || el.getAttribute('onmousedown')) {
1135
+ types.push('listener');
1136
+ }
1137
+
1138
+ return types;
1139
+ }
1140
+
1141
+ // \u2500\u2500 Disabled state with fieldset inheritance \u2500\u2500
1142
+ function isDisabled(el) {
1143
+ if (el.disabled) return true;
1144
+ // Check fieldset disabled inheritance
1145
+ let parent = el.parentElement;
1146
+ while (parent) {
1147
+ if (parent.tagName === 'FIELDSET' && parent.disabled) {
1148
+ // Exception: elements inside the first legend child are NOT disabled
1149
+ const firstLegend = parent.querySelector(':scope > legend');
1150
+ if (firstLegend && firstLegend.contains(el)) return false;
1151
+ return true;
1152
+ }
1153
+ parent = parent.parentElement;
1154
+ }
1155
+ return false;
1156
+ }
1157
+
1158
+ // \u2500\u2500 Walk the DOM \u2500\u2500
1159
+ function walk(el, depth, maxDepth) {
1160
+ if (!el || depth > maxDepth) return '';
1161
+
1162
+ if (el.nodeType === 3) {
1163
+ const t = el.textContent.trim();
1164
+ return t ? ' '.repeat(depth) + 'text "' + t.slice(0, 100) + '"\\n' : '';
1165
+ }
1166
+
1167
+ if (el.nodeType !== 1) return '';
1168
+ const tag = el.tagName.toLowerCase();
1169
+ if (SKIP.has(tag)) return '';
1170
+
1171
+ const style = getComputedStyle(el);
1172
+ if (style.display === 'none' || style.visibility === 'hidden') return '';
1173
+
1174
+ const indent = ' '.repeat(depth);
1175
+ const role = el.getAttribute('role') || ROLE_MAP[tag] || '';
1176
+ const name = getAccessibleName(el);
1177
+ const interTypes = classifyInteractivity(el);
1178
+ const interactive = interTypes.length > 0;
1179
+ const disabled = interactive && isDisabled(el);
1180
+
1181
+ let line = indent;
1182
+ line += role || tag;
1183
+
1184
+ if (name) line += ' "' + name.slice(0, 80) + '"';
1185
+
1186
+ if (interactive) {
1187
+ line += ' [' + interTypes.join(',') + ']';
1188
+ if (disabled) line += ' {disabled}';
1189
+ line += ' xpath=' + getXPath(el);
1190
+ }
1191
+
1192
+ // Input state
1193
+ if (tag === 'input') {
1194
+ const type = el.type || 'text';
1195
+ line += ' type=' + type;
1196
+ if (type === 'checkbox' || type === 'radio') {
1197
+ line += el.checked ? ' [checked]' : ' [unchecked]';
1198
+ } else if (el.value) {
1199
+ line += ' value="' + el.value.slice(0, 50) + '"';
1200
+ }
1201
+ }
1202
+ if (tag === 'textarea' && el.value) {
1203
+ line += ' value="' + el.value.slice(0, 50) + '"';
1204
+ }
1205
+ if (tag === 'select') {
1206
+ const opts = Array.from(el.options || []).map(o => ({
1207
+ text: o.text.slice(0, 30),
1208
+ value: o.value,
1209
+ selected: o.selected,
1210
+ }));
1211
+ const selected = opts.find(o => o.selected);
1212
+ if (selected) line += ' selected="' + selected.text + '"';
1213
+ if (opts.length <= 10) {
1214
+ line += ' options=[' + opts.map(o => o.text).join('|') + ']';
1215
+ }
1216
+ }
1217
+
1218
+ line += '\\n';
1219
+
1220
+ let out = line;
1221
+ for (const c of el.childNodes) {
1222
+ out += walk(c, depth + 1, maxDepth);
1223
+ }
1224
+
1225
+ // Shadow DOM
1226
+ if (el.shadowRoot) {
1227
+ for (const c of el.shadowRoot.childNodes) {
1228
+ out += walk(c, depth + 1, maxDepth);
1229
+ }
1230
+ }
1231
+
1232
+ return out;
1233
+ }
1234
+
1235
+ return walk(document.body, 0, 20);
1236
+ })()
1237
+ `;
1238
+
1239
+ // src/browser/dom/markdown.ts
1240
+ var MARKDOWN_SCRIPT = `
1241
+ (() => {
1242
+ const SKIP = new Set(['script','style','noscript','svg','head','template']);
1243
+ const baseUrl = location.href;
1244
+
1245
+ // Resolve relative URLs to absolute
1246
+ function resolveUrl(href) {
1247
+ if (!href || href.startsWith('javascript:') || href.startsWith('#')) return href;
1248
+ try { return new URL(href, baseUrl).href; } catch { return href; }
1249
+ }
1250
+
1251
+ // Escape Markdown special chars in text
1252
+ function escapeText(text) {
1253
+ return text
1254
+ .replace(/\\\\/g, '\\\\\\\\')
1255
+ .replace(/([*_~\`\\[\\]|])/g, '\\\\$1');
1256
+ }
1257
+
1258
+ // State tracking
1259
+ let listDepth = 0;
1260
+ let orderedCounters = [];
1261
+ let inPre = false;
1262
+ let inTable = false;
1263
+
1264
+ function listIndent() { return ' '.repeat(listDepth); }
1265
+
1266
+ function walk(el) {
1267
+ if (!el) return '';
1268
+
1269
+ // Text node
1270
+ if (el.nodeType === 3) {
1271
+ const text = el.textContent || '';
1272
+ if (inPre) return text;
1273
+ // Collapse whitespace
1274
+ const collapsed = text.replace(/\\s+/g, ' ');
1275
+ return collapsed === ' ' && !el.previousSibling && !el.nextSibling ? '' : collapsed;
1276
+ }
1277
+
1278
+ if (el.nodeType !== 1) return '';
1279
+ const tag = el.tagName.toLowerCase();
1280
+ if (SKIP.has(tag)) return '';
1281
+
1282
+ // Visibility check
1283
+ try {
1284
+ const s = getComputedStyle(el);
1285
+ if (s.display === 'none' || s.visibility === 'hidden') return '';
1286
+ } catch {}
1287
+
1288
+ // Get children content
1289
+ function childContent() {
1290
+ let out = '';
1291
+ for (const c of el.childNodes) out += walk(c);
1292
+ return out;
1293
+ }
1294
+
1295
+ switch (tag) {
1296
+ // \u2500\u2500 Headings \u2500\u2500
1297
+ case 'h1': return '\\n\\n# ' + childContent().trim() + '\\n\\n';
1298
+ case 'h2': return '\\n\\n## ' + childContent().trim() + '\\n\\n';
1299
+ case 'h3': return '\\n\\n### ' + childContent().trim() + '\\n\\n';
1300
+ case 'h4': return '\\n\\n#### ' + childContent().trim() + '\\n\\n';
1301
+ case 'h5': return '\\n\\n##### ' + childContent().trim() + '\\n\\n';
1302
+ case 'h6': return '\\n\\n###### ' + childContent().trim() + '\\n\\n';
1303
+
1304
+ // \u2500\u2500 Block elements \u2500\u2500
1305
+ case 'p': return '\\n\\n' + childContent().trim() + '\\n\\n';
1306
+ case 'br': return '\\n';
1307
+ case 'hr': return '\\n\\n---\\n\\n';
1308
+
1309
+ // \u2500\u2500 Inline formatting \u2500\u2500
1310
+ case 'strong': case 'b': {
1311
+ const inner = childContent().trim();
1312
+ return inner ? '**' + inner + '**' : '';
1313
+ }
1314
+ case 'em': case 'i': {
1315
+ const inner = childContent().trim();
1316
+ return inner ? '*' + inner + '*' : '';
1317
+ }
1318
+ case 's': case 'del': case 'strike': {
1319
+ const inner = childContent().trim();
1320
+ return inner ? '~~' + inner + '~~' : '';
1321
+ }
1322
+ case 'code': {
1323
+ if (inPre) return childContent();
1324
+ const inner = childContent();
1325
+ return inner ? '\\x60' + inner + '\\x60' : '';
1326
+ }
1327
+
1328
+ // \u2500\u2500 Code blocks \u2500\u2500
1329
+ case 'pre': {
1330
+ inPre = true;
1331
+ const inner = childContent();
1332
+ inPre = false;
1333
+ const lang = el.querySelector('code')?.className?.match(/language-(\\w+)/)?.[1] || '';
1334
+ return '\\n\\n\\x60\\x60\\x60' + lang + '\\n' + inner.trim() + '\\n\\x60\\x60\\x60\\n\\n';
1335
+ }
1336
+
1337
+ // \u2500\u2500 Links \u2500\u2500
1338
+ case 'a': {
1339
+ const href = resolveUrl(el.getAttribute('href') || '');
1340
+ const inner = childContent().trim();
1341
+ const name = inner || el.getAttribute('aria-label') || el.getAttribute('title') || '';
1342
+ if (!name) return '';
1343
+ if (!href || href === '#' || href.startsWith('javascript:')) return name;
1344
+ return '[' + name + '](' + href + ')';
1345
+ }
1346
+
1347
+ // \u2500\u2500 Images \u2500\u2500
1348
+ case 'img': {
1349
+ const alt = el.getAttribute('alt') || '';
1350
+ const src = resolveUrl(el.getAttribute('src') || '');
1351
+ return src ? '![' + alt + '](' + src + ')' : '';
1352
+ }
1353
+
1354
+ // \u2500\u2500 Lists \u2500\u2500
1355
+ case 'ul': {
1356
+ listDepth++;
1357
+ orderedCounters.push(0);
1358
+ const inner = childContent();
1359
+ listDepth--;
1360
+ orderedCounters.pop();
1361
+ return '\\n' + inner;
1362
+ }
1363
+ case 'ol': {
1364
+ listDepth++;
1365
+ orderedCounters.push(0);
1366
+ const inner = childContent();
1367
+ listDepth--;
1368
+ orderedCounters.pop();
1369
+ return '\\n' + inner;
1370
+ }
1371
+ case 'li': {
1372
+ const parent = el.parentElement?.tagName?.toLowerCase();
1373
+ const isOrdered = parent === 'ol';
1374
+ const inner = childContent().trim();
1375
+ if (!inner) return '';
1376
+ if (isOrdered) {
1377
+ const counter = orderedCounters.length > 0
1378
+ ? ++orderedCounters[orderedCounters.length - 1] : 1;
1379
+ return listIndent() + counter + '. ' + inner + '\\n';
1380
+ }
1381
+ return listIndent() + '- ' + inner + '\\n';
1382
+ }
1383
+
1384
+ // \u2500\u2500 Blockquote \u2500\u2500
1385
+ case 'blockquote': {
1386
+ const inner = childContent().trim();
1387
+ if (!inner) return '';
1388
+ return '\\n\\n' + inner.split('\\n').map(line => '> ' + line).join('\\n') + '\\n\\n';
1389
+ }
1390
+
1391
+ // \u2500\u2500 Tables \u2500\u2500
1392
+ case 'table': {
1393
+ inTable = true;
1394
+ let out = '\\n\\n';
1395
+ const rows = el.querySelectorAll('tr');
1396
+ let headerDone = false;
1397
+
1398
+ for (let i = 0; i < rows.length; i++) {
1399
+ const cells = rows[i].querySelectorAll('th, td');
1400
+ const isHeader = rows[i].querySelector('th') !== null;
1401
+ const cellTexts = [];
1402
+ for (const cell of cells) {
1403
+ let cellText = '';
1404
+ for (const c of cell.childNodes) cellText += walk(c);
1405
+ cellTexts.push(cellText.trim().replace(/\\|/g, '\\\\|').replace(/\\n/g, ' '));
1406
+ }
1407
+
1408
+ out += '| ' + cellTexts.join(' | ') + ' |\\n';
1409
+
1410
+ if (isHeader && !headerDone) {
1411
+ out += '| ' + cellTexts.map(() => '---').join(' | ') + ' |\\n';
1412
+ headerDone = true;
1413
+ }
1414
+
1415
+ // First data row without headers \u2014 synthesize separator
1416
+ if (i === 0 && !isHeader && !headerDone) {
1417
+ out += '| ' + cellTexts.map(() => '---').join(' | ') + ' |\\n';
1418
+ headerDone = true;
1419
+ }
1420
+ }
1421
+
1422
+ inTable = false;
1423
+ return out + '\\n';
1424
+ }
1425
+ case 'thead': case 'tbody': case 'tfoot':
1426
+ return childContent();
1427
+ case 'tr': case 'td': case 'th':
1428
+ // Handled by table walker above; fallback for orphaned elements
1429
+ return childContent();
1430
+
1431
+ // \u2500\u2500 Definition lists \u2500\u2500
1432
+ case 'dl': return '\\n\\n' + childContent() + '\\n\\n';
1433
+ case 'dt': return '\\n**' + childContent().trim() + '**\\n';
1434
+ case 'dd': return ': ' + childContent().trim() + '\\n';
1435
+
1436
+ // \u2500\u2500 Figure \u2500\u2500
1437
+ case 'figure': return '\\n\\n' + childContent().trim() + '\\n\\n';
1438
+ case 'figcaption': return '\\n*' + childContent().trim() + '*\\n';
1439
+
1440
+ // \u2500\u2500 Details/Summary \u2500\u2500
1441
+ case 'details': return '\\n\\n' + childContent() + '\\n\\n';
1442
+ case 'summary': return '**' + childContent().trim() + '**\\n\\n';
1443
+
1444
+ // \u2500\u2500 Generic blocks \u2500\u2500
1445
+ case 'div': case 'section': case 'article': case 'main': case 'aside':
1446
+ case 'header': case 'footer': case 'nav':
1447
+ return '\\n' + childContent() + '\\n';
1448
+
1449
+ case 'span': case 'small': case 'sub': case 'sup': case 'abbr':
1450
+ case 'time': case 'mark': case 'cite': case 'q':
1451
+ return childContent();
1452
+
1453
+ default:
1454
+ return childContent();
1455
+ }
1456
+ }
1457
+
1458
+ const raw = walk(document.body);
1459
+ // Clean up: collapse 3+ newlines to 2, trim
1460
+ return raw.replace(/\\n{3,}/g, '\\n\\n').replace(/^\\n+|\\n+$/g, '').trim();
1461
+ })()
1462
+ `;
1463
+
1464
+ // src/browser/dom/form-state.ts
1465
+ var FORM_STATE_SCRIPT = `
1466
+ (() => {
1467
+ function extractField(el) {
1468
+ const tag = el.tagName.toLowerCase();
1469
+ const type = (el.getAttribute('type') || tag).toLowerCase();
1470
+
1471
+ // Skip non-user-facing inputs
1472
+ if (['hidden', 'submit', 'button', 'reset', 'image'].includes(type)) return null;
1473
+
1474
+ const name = el.name || el.id || '';
1475
+
1476
+ // Find label via multiple strategies
1477
+ const label =
1478
+ el.getAttribute('aria-label') ||
1479
+ (el.id ? document.querySelector('label[for="' + el.id + '"]')?.textContent?.trim() : null) ||
1480
+ el.closest('label')?.textContent?.trim() ||
1481
+ el.placeholder ||
1482
+ '';
1483
+
1484
+ // Extract value based on type
1485
+ let value;
1486
+ if (tag === 'select') {
1487
+ const selected = el.options[el.selectedIndex];
1488
+ value = selected ? selected.textContent.trim() : '';
1489
+ } else if (type === 'checkbox' || type === 'radio') {
1490
+ value = el.checked;
1491
+ } else if (type === 'password') {
1492
+ value = el.value ? '\u2022\u2022\u2022\u2022' : '';
1493
+ } else if (el.isContentEditable) {
1494
+ value = el.textContent?.trim()?.slice(0, 200) || '';
1495
+ } else {
1496
+ value = el.value || '';
1497
+ }
1498
+
1499
+ return {
1500
+ tag,
1501
+ type,
1502
+ name,
1503
+ label: label.slice(0, 80),
1504
+ value: typeof value === 'string' ? value.slice(0, 200) : value,
1505
+ required: !!el.required,
1506
+ disabled: !!el.disabled,
1507
+ ref: el.dataset?.ref || null,
1508
+ };
1509
+ }
1510
+
1511
+ const result = { forms: [], orphanFields: [] };
1512
+
1513
+ // Collect forms
1514
+ for (const form of document.forms) {
1515
+ const fields = [];
1516
+ for (const el of form.elements) {
1517
+ const field = extractField(el);
1518
+ if (field) fields.push(field);
1519
+ }
1520
+ result.forms.push({
1521
+ id: form.id || '',
1522
+ name: form.name || '',
1523
+ action: form.action || '',
1524
+ method: (form.method || 'get').toUpperCase(),
1525
+ fields,
1526
+ });
1527
+ }
1528
+
1529
+ // Collect orphan fields (not in a <form>)
1530
+ const allInputs = document.querySelectorAll(
1531
+ 'input, textarea, select, [contenteditable="true"]'
1532
+ );
1533
+ for (const el of allInputs) {
1534
+ if (!el.form) {
1535
+ const field = extractField(el);
1536
+ if (field) result.orphanFields.push(field);
1537
+ }
1538
+ }
1539
+
1540
+ return result;
1541
+ })()
1542
+ `;
1543
+
1544
+ // src/browser/interceptor.ts
1545
+ function buildInterceptorScript(pattern) {
1546
+ return `
1547
+ (() => {
1548
+ if (window.__lobster_interceptor__) return;
1549
+ window.__lobster_interceptor__ = { requests: [] };
1550
+ const store = window.__lobster_interceptor__;
1551
+ const pattern = ${JSON.stringify(pattern)};
1552
+
1553
+ // Patch fetch
1554
+ const origFetch = window.fetch;
1555
+ window.fetch = async function(...args) {
1556
+ const url = typeof args[0] === 'string' ? args[0] : args[0]?.url || '';
1557
+ const resp = await origFetch.apply(this, args);
1558
+ if (url.includes(pattern)) {
1559
+ const clone = resp.clone();
1560
+ try {
1561
+ const body = await clone.json();
1562
+ store.requests.push({ url, method: 'GET', status: resp.status, body, timestamp: Date.now() });
1563
+ } catch {}
1564
+ }
1565
+ return resp;
1566
+ };
1567
+
1568
+ // Patch XHR
1569
+ const origOpen = XMLHttpRequest.prototype.open;
1570
+ const origSend = XMLHttpRequest.prototype.send;
1571
+ XMLHttpRequest.prototype.open = function(method, url, ...rest) {
1572
+ this.__url = url;
1573
+ this.__method = method;
1574
+ return origOpen.call(this, method, url, ...rest);
1575
+ };
1576
+ XMLHttpRequest.prototype.send = function(...args) {
1577
+ this.addEventListener('load', function() {
1578
+ if (this.__url && this.__url.includes(pattern)) {
1579
+ try {
1580
+ const body = JSON.parse(this.responseText);
1581
+ store.requests.push({ url: this.__url, method: this.__method, status: this.status, body, timestamp: Date.now() });
1582
+ } catch {}
1583
+ }
1584
+ });
1585
+ return origSend.apply(this, args);
1586
+ };
1587
+ })()
1588
+ `;
1589
+ }
1590
+ var GET_INTERCEPTED_SCRIPT = `
1591
+ (() => {
1592
+ const store = window.__lobster_interceptor__;
1593
+ if (!store) return [];
1594
+ const reqs = [...store.requests];
1595
+ store.requests = [];
1596
+ return reqs;
1597
+ })()
1598
+ `;
1599
+
1600
+ // src/browser/page-adapter.ts
1601
+ var PuppeteerPage = class {
1602
+ page;
1603
+ constructor(page) {
1604
+ this.page = page;
1605
+ }
1606
+ get raw() {
1607
+ return this.page;
1608
+ }
1609
+ async goto(url, options) {
1610
+ await this.page.goto(url, {
1611
+ waitUntil: options?.waitUntil || "networkidle2",
1612
+ timeout: options?.timeout || 3e4
1613
+ });
1614
+ }
1615
+ async goBack() {
1616
+ await this.page.goBack({ waitUntil: "networkidle2" });
1617
+ }
1618
+ async url() {
1619
+ return this.page.url();
1620
+ }
1621
+ async title() {
1622
+ return this.page.title();
1623
+ }
1624
+ async evaluate(js) {
1625
+ return this.page.evaluate(js);
1626
+ }
1627
+ async snapshot(_opts) {
1628
+ return this.page.evaluate(SNAPSHOT_SCRIPT);
1629
+ }
1630
+ async semanticTree(_opts) {
1631
+ return this.page.evaluate(SEMANTIC_TREE_SCRIPT);
1632
+ }
1633
+ async flatTree() {
1634
+ const raw = await this.page.evaluate(FLAT_TREE_SCRIPT);
1635
+ return raw;
1636
+ }
1637
+ async markdown() {
1638
+ return this.page.evaluate(MARKDOWN_SCRIPT);
1639
+ }
1640
+ async browserState() {
1641
+ const state = await this.page.evaluate(`
1642
+ (() => {
1643
+ const scrollY = window.scrollY;
1644
+ const scrollX = window.scrollX;
1645
+ const vpW = window.innerWidth;
1646
+ const vpH = window.innerHeight;
1647
+ const pageW = document.documentElement.scrollWidth;
1648
+ const pageH = document.documentElement.scrollHeight;
1649
+ const maxScrollY = pageH - vpH;
1650
+ return {
1651
+ url: location.href,
1652
+ title: document.title,
1653
+ viewportWidth: vpW,
1654
+ viewportHeight: vpH,
1655
+ pageWidth: pageW,
1656
+ pageHeight: pageH,
1657
+ scrollX: scrollX,
1658
+ scrollY: scrollY,
1659
+ scrollPercent: maxScrollY > 0 ? Math.round((scrollY / maxScrollY) * 100) : 0,
1660
+ pixelsAbove: Math.round(scrollY),
1661
+ pixelsBelow: Math.round(Math.max(0, maxScrollY - scrollY)),
1662
+ };
1663
+ })()
1664
+ `);
1665
+ return state;
1666
+ }
1667
+ async formState() {
1668
+ return this.page.evaluate(FORM_STATE_SCRIPT);
1669
+ }
1670
+ async click(ref) {
1671
+ if (typeof ref === "number") {
1672
+ await this.page.evaluate((idx) => {
1673
+ const el = document.querySelector('[data-ref="' + idx + '"]');
1674
+ if (!el) throw new Error("Element with index " + idx + " not found");
1675
+ const prev = document.activeElement;
1676
+ if (prev && prev !== el && prev !== document.body) {
1677
+ prev.blur();
1678
+ prev.dispatchEvent(new MouseEvent("mouseout", { bubbles: true, cancelable: true }));
1679
+ prev.dispatchEvent(new MouseEvent("mouseleave", { bubbles: false, cancelable: true }));
1680
+ }
1681
+ if (typeof el.scrollIntoViewIfNeeded === "function") {
1682
+ el.scrollIntoViewIfNeeded();
1683
+ } else {
1684
+ el.scrollIntoView({ behavior: "auto", block: "center", inline: "nearest" });
1685
+ }
1686
+ el.dispatchEvent(new MouseEvent("mouseenter", { bubbles: true, cancelable: true }));
1687
+ el.dispatchEvent(new MouseEvent("mouseover", { bubbles: true, cancelable: true }));
1688
+ el.dispatchEvent(new MouseEvent("mousedown", { bubbles: true, cancelable: true }));
1689
+ el.focus();
1690
+ el.dispatchEvent(new MouseEvent("mouseup", { bubbles: true, cancelable: true }));
1691
+ el.dispatchEvent(new MouseEvent("click", { bubbles: true, cancelable: true }));
1692
+ }, ref);
1693
+ await new Promise((r) => setTimeout(r, 200));
1694
+ } else {
1695
+ await this.page.click(ref);
1696
+ }
1697
+ }
1698
+ async typeText(ref, text) {
1699
+ if (typeof ref === "number") {
1700
+ await this.click(ref);
1701
+ await this.page.evaluate((idx, txt) => {
1702
+ const el = document.querySelector('[data-ref="' + idx + '"]');
1703
+ if (!el) throw new Error("Element with index " + idx + " not found");
1704
+ const isInput = el.tagName === "INPUT" || el.tagName === "TEXTAREA";
1705
+ const isContentEditable = el.isContentEditable;
1706
+ if (isContentEditable) {
1707
+ if (el.dispatchEvent(new InputEvent("beforeinput", {
1708
+ bubbles: true,
1709
+ cancelable: true,
1710
+ inputType: "deleteContent"
1711
+ }))) {
1712
+ el.innerText = "";
1713
+ el.dispatchEvent(new InputEvent("input", {
1714
+ bubbles: true,
1715
+ inputType: "deleteContent"
1716
+ }));
1717
+ }
1718
+ if (el.dispatchEvent(new InputEvent("beforeinput", {
1719
+ bubbles: true,
1720
+ cancelable: true,
1721
+ inputType: "insertText",
1722
+ data: txt
1723
+ }))) {
1724
+ el.innerText = txt;
1725
+ el.dispatchEvent(new InputEvent("input", {
1726
+ bubbles: true,
1727
+ inputType: "insertText",
1728
+ data: txt
1729
+ }));
1730
+ }
1731
+ const planAOk = el.innerText.trim() === txt.trim();
1732
+ if (!planAOk) {
1733
+ el.focus();
1734
+ const doc = el.ownerDocument;
1735
+ const sel = (doc.defaultView || window).getSelection();
1736
+ const range = doc.createRange();
1737
+ range.selectNodeContents(el);
1738
+ sel?.removeAllRanges();
1739
+ sel?.addRange(range);
1740
+ doc.execCommand("delete", false);
1741
+ doc.execCommand("insertText", false, txt);
1742
+ }
1743
+ el.dispatchEvent(new Event("change", { bubbles: true }));
1744
+ el.blur();
1745
+ } else if (isInput) {
1746
+ const inputEl = el;
1747
+ const proto = Object.getPrototypeOf(inputEl);
1748
+ const descriptor = Object.getOwnPropertyDescriptor(proto, "value") || Object.getOwnPropertyDescriptor(HTMLInputElement.prototype, "value") || Object.getOwnPropertyDescriptor(HTMLTextAreaElement.prototype, "value");
1749
+ if (descriptor?.set) {
1750
+ descriptor.set.call(inputEl, txt);
1751
+ } else {
1752
+ inputEl.value = txt;
1753
+ }
1754
+ inputEl.dispatchEvent(new Event("input", { bubbles: true }));
1755
+ inputEl.dispatchEvent(new Event("change", { bubbles: true }));
1756
+ } else {
1757
+ el.value = txt;
1758
+ el.dispatchEvent(new Event("input", { bubbles: true }));
1759
+ el.dispatchEvent(new Event("change", { bubbles: true }));
1760
+ }
1761
+ }, ref, text);
1762
+ } else {
1763
+ await this.page.click(ref, { count: 3 });
1764
+ await this.page.keyboard.type(text);
1765
+ }
1766
+ }
1767
+ async pressKey(key) {
1768
+ await this.page.keyboard.press(key);
1769
+ }
1770
+ async selectOption(ref, value) {
1771
+ const selector = typeof ref === "number" ? '[data-ref="' + ref + '"]' : ref;
1772
+ await this.page.select(selector, value);
1773
+ }
1774
+ async scroll(direction, amount) {
1775
+ const distance = amount || 500;
1776
+ const isVertical = direction === "up" || direction === "down";
1777
+ const positive = direction === "down" || direction === "right";
1778
+ const delta = positive ? distance : -distance;
1779
+ await this.page.evaluate((dy, dx, isVert) => {
1780
+ const canScroll = (el2) => {
1781
+ if (!el2) return false;
1782
+ const s = getComputedStyle(el2);
1783
+ if (isVert) {
1784
+ return /(auto|scroll|overlay)/.test(s.overflowY) && el2.scrollHeight > el2.clientHeight && el2.clientHeight >= window.innerHeight * 0.3;
1785
+ } else {
1786
+ return /(auto|scroll|overlay)/.test(s.overflowX) && el2.scrollWidth > el2.clientWidth && el2.clientWidth >= window.innerWidth * 0.3;
1787
+ }
1788
+ };
1789
+ let el = document.activeElement;
1790
+ while (el && !canScroll(el) && el !== document.body) {
1791
+ el = el.parentElement;
1792
+ }
1793
+ if (!canScroll(el)) {
1794
+ el = Array.from(document.querySelectorAll("*")).find(canScroll) || null;
1795
+ }
1796
+ const isPageLevel = !el || el === document.body || el === document.documentElement || el === document.scrollingElement;
1797
+ if (isPageLevel) {
1798
+ if (isVert) {
1799
+ window.scrollBy(0, dy);
1800
+ } else {
1801
+ window.scrollBy(dx, 0);
1802
+ }
1803
+ } else {
1804
+ if (isVert) {
1805
+ el.scrollBy({ top: dy, behavior: "smooth" });
1806
+ } else {
1807
+ el.scrollBy({ left: dx, behavior: "smooth" });
1808
+ }
1809
+ }
1810
+ }, isVertical ? delta : 0, isVertical ? 0 : delta, isVertical);
1811
+ await new Promise((r) => setTimeout(r, 150));
1812
+ }
1813
+ async scrollToElement(ref) {
1814
+ const selector = typeof ref === "number" ? '[data-ref="' + ref + '"]' : ref;
1815
+ await this.page.evaluate((sel) => {
1816
+ const el = document.querySelector(sel);
1817
+ if (!el) return;
1818
+ if (typeof el.scrollIntoViewIfNeeded === "function") {
1819
+ el.scrollIntoViewIfNeeded();
1820
+ } else {
1821
+ el.scrollIntoView({ behavior: "auto", block: "center", inline: "nearest" });
1822
+ }
1823
+ }, selector);
1824
+ }
1825
+ async getCookies(opts) {
1826
+ const cookies = await this.page.cookies();
1827
+ const filtered = opts?.domain ? cookies.filter((c) => c.domain.includes(opts.domain)) : cookies;
1828
+ return filtered.map((c) => ({
1829
+ name: c.name,
1830
+ value: c.value,
1831
+ domain: c.domain,
1832
+ path: c.path,
1833
+ expires: c.expires,
1834
+ httpOnly: c.httpOnly,
1835
+ secure: c.secure,
1836
+ sameSite: c.sameSite
1837
+ }));
1838
+ }
1839
+ async wait(options) {
1840
+ if (typeof options === "number") {
1841
+ await new Promise((r) => setTimeout(r, options * 1e3));
1842
+ return;
1843
+ }
1844
+ if (options.time) {
1845
+ await new Promise((r) => setTimeout(r, options.time * 1e3));
1846
+ }
1847
+ if (options.text) {
1848
+ await this.page.waitForFunction(
1849
+ (t) => document.body.innerText.includes(t),
1850
+ { timeout: options.timeout || 3e4 },
1851
+ options.text
1852
+ );
1853
+ }
1854
+ }
1855
+ async networkRequests(includeStatic) {
1856
+ const entries = await this.page.evaluate(`
1857
+ (() => {
1858
+ const entries = performance.getEntriesByType('resource');
1859
+ const staticTypes = new Set(['img', 'font', 'css', 'script', 'link']);
1860
+ const includeStatic = ${!!includeStatic};
1861
+
1862
+ return entries
1863
+ .filter(e => includeStatic || !staticTypes.has(e.initiatorType))
1864
+ .map(e => ({
1865
+ url: e.name,
1866
+ method: 'GET',
1867
+ status: 200,
1868
+ type: e.initiatorType || 'other',
1869
+ size: e.transferSize || e.encodedBodySize || 0,
1870
+ duration: Math.round(e.duration),
1871
+ }));
1872
+ })()
1873
+ `);
1874
+ return entries || [];
1875
+ }
1876
+ async installInterceptor(pattern) {
1877
+ await this.page.evaluate(buildInterceptorScript(pattern));
1878
+ }
1879
+ async getInterceptedRequests() {
1880
+ return this.page.evaluate(GET_INTERCEPTED_SCRIPT);
1881
+ }
1882
+ async screenshot(opts) {
1883
+ const result = await this.page.screenshot({
1884
+ type: opts?.format || "png",
1885
+ fullPage: opts?.fullPage ?? false
1886
+ });
1887
+ return Buffer.from(result);
1888
+ }
1889
+ async tabs() {
1890
+ const browser = this.page.browser();
1891
+ const pages = await browser.pages();
1892
+ return pages.map((p, i) => ({
1893
+ id: i,
1894
+ url: p.url(),
1895
+ title: "",
1896
+ active: p === this.page
1897
+ }));
1898
+ }
1899
+ async close() {
1900
+ await this.page.close();
1901
+ }
1902
+ };
1903
+
1904
+ // src/browser/dom/interactive.ts
1905
+ var INTERACTIVE_ELEMENTS_SCRIPT = `
1906
+ (() => {
1907
+ const results = [];
1908
+
1909
+ function classify(el) {
1910
+ const tag = el.tagName.toLowerCase();
1911
+ const role = el.getAttribute('role');
1912
+ const types = [];
1913
+
1914
+ // Native interactive
1915
+ if (['a', 'button', 'input', 'select', 'textarea', 'details', 'summary'].includes(tag)) {
1916
+ types.push('native');
1917
+ }
1918
+
1919
+ // ARIA role interactive
1920
+ if (role && ['button', 'link', 'textbox', 'checkbox', 'radio', 'combobox', 'tab', 'switch', 'menuitem', 'slider'].includes(role)) {
1921
+ types.push('aria');
1922
+ }
1923
+
1924
+ // Contenteditable
1925
+ if (el.contentEditable === 'true') types.push('contenteditable');
1926
+
1927
+ // Focusable
1928
+ if (el.tabIndex >= 0 && el.getAttribute('tabindex') !== null) types.push('focusable');
1929
+
1930
+ // Has click listener (approximate)
1931
+ if (el.onclick) types.push('listener');
1932
+
1933
+ return types;
1934
+ }
1935
+
1936
+ let idx = 0;
1937
+ const walker = document.createTreeWalker(document.body, NodeFilter.SHOW_ELEMENT);
1938
+ let node;
1939
+ while (node = walker.nextNode()) {
1940
+ const types = classify(node);
1941
+ if (types.length === 0) continue;
1942
+
1943
+ const style = getComputedStyle(node);
1944
+ if (style.display === 'none' || style.visibility === 'hidden') continue;
1945
+
1946
+ const rect = node.getBoundingClientRect();
1947
+ results.push({
1948
+ index: idx++,
1949
+ tag: node.tagName.toLowerCase(),
1950
+ role: node.getAttribute('role') || '',
1951
+ text: (node.textContent || '').trim().slice(0, 100),
1952
+ types,
1953
+ ariaLabel: node.getAttribute('aria-label') || '',
1954
+ rect: { x: rect.x, y: rect.y, width: rect.width, height: rect.height },
1955
+ });
1956
+ }
1957
+
1958
+ return results;
1959
+ })()
1960
+ `;
1961
+
1962
+ // src/browser/lightpanda.ts
1963
+ var SELF_CLOSING = /* @__PURE__ */ new Set([
1964
+ "area",
1965
+ "base",
1966
+ "br",
1967
+ "col",
1968
+ "embed",
1969
+ "hr",
1970
+ "img",
1971
+ "input",
1972
+ "link",
1973
+ "meta",
1974
+ "param",
1975
+ "source",
1976
+ "track",
1977
+ "wbr"
1978
+ ]);
1979
+ var RAWTEXT_TAGS = /* @__PURE__ */ new Set(["script", "style", "textarea", "title"]);
1980
+ function parseHtml(html) {
1981
+ const root = [];
1982
+ const stack = [{ node: { type: "element", tag: "root", children: root }, children: root }];
1983
+ let pos = 0;
1984
+ function current() {
1985
+ return stack[stack.length - 1];
1986
+ }
1987
+ function addText(text) {
1988
+ if (!text) return;
1989
+ const decoded = decodeEntities(text);
1990
+ if (decoded.trim() || decoded.includes("\n")) {
1991
+ current().children.push({ type: "text", text: decoded });
1992
+ }
1993
+ }
1994
+ while (pos < html.length) {
1995
+ const nextTag = html.indexOf("<", pos);
1996
+ if (nextTag === -1) {
1997
+ addText(html.slice(pos));
1998
+ break;
1999
+ }
2000
+ if (nextTag > pos) {
2001
+ addText(html.slice(pos, nextTag));
2002
+ }
2003
+ if (html.startsWith("<!--", nextTag)) {
2004
+ const endComment = html.indexOf("-->", nextTag + 4);
2005
+ pos = endComment === -1 ? html.length : endComment + 3;
2006
+ continue;
2007
+ }
2008
+ if (html.startsWith("<!", nextTag) || html.startsWith("<?", nextTag)) {
2009
+ const endDoctype = html.indexOf(">", nextTag);
2010
+ pos = endDoctype === -1 ? html.length : endDoctype + 1;
2011
+ continue;
2012
+ }
2013
+ if (html[nextTag + 1] === "/") {
2014
+ const endClose = html.indexOf(">", nextTag);
2015
+ if (endClose === -1) {
2016
+ pos = html.length;
2017
+ break;
2018
+ }
2019
+ const closeTag = html.slice(nextTag + 2, endClose).trim().toLowerCase();
2020
+ pos = endClose + 1;
2021
+ for (let i = stack.length - 1; i > 0; i--) {
2022
+ if (stack[i].node.tag === closeTag) {
2023
+ stack.length = i;
2024
+ break;
2025
+ }
2026
+ }
2027
+ continue;
2028
+ }
2029
+ const tagEnd = html.indexOf(">", nextTag);
2030
+ if (tagEnd === -1) {
2031
+ pos = html.length;
2032
+ break;
2033
+ }
2034
+ const tagContent = html.slice(nextTag + 1, tagEnd);
2035
+ const selfClose = tagContent.endsWith("/");
2036
+ const cleanContent = selfClose ? tagContent.slice(0, -1).trim() : tagContent.trim();
2037
+ const spaceIdx = cleanContent.search(/[\s/]/);
2038
+ const tagName = (spaceIdx === -1 ? cleanContent : cleanContent.slice(0, spaceIdx)).toLowerCase();
2039
+ const attrStr = spaceIdx === -1 ? "" : cleanContent.slice(spaceIdx);
2040
+ if (!tagName || tagName.startsWith("!")) {
2041
+ pos = tagEnd + 1;
2042
+ continue;
2043
+ }
2044
+ const attributes = parseAttributes(attrStr);
2045
+ const isSelfClosing = selfClose || SELF_CLOSING.has(tagName);
2046
+ const node = {
2047
+ type: "element",
2048
+ tag: tagName,
2049
+ attributes,
2050
+ children: isSelfClosing ? void 0 : [],
2051
+ selfClosing: isSelfClosing
2052
+ };
2053
+ current().children.push(node);
2054
+ pos = tagEnd + 1;
2055
+ if (isSelfClosing) continue;
2056
+ if (RAWTEXT_TAGS.has(tagName)) {
2057
+ const endRaw = html.toLowerCase().indexOf(`</${tagName}`, pos);
2058
+ if (endRaw !== -1) {
2059
+ const rawText = html.slice(pos, endRaw);
2060
+ if (rawText.trim()) {
2061
+ node.children.push({ type: "text", text: rawText });
2062
+ }
2063
+ pos = html.indexOf(">", endRaw) + 1;
2064
+ }
2065
+ continue;
2066
+ }
2067
+ stack.push({ node, children: node.children });
2068
+ if (tagName === "p" || tagName === "li" || tagName === "td" || tagName === "th" || tagName === "dt" || tagName === "dd") {
2069
+ if (stack.length >= 3 && stack[stack.length - 2].node.tag === tagName) {
2070
+ stack.splice(stack.length - 2, 1);
2071
+ }
2072
+ }
2073
+ }
2074
+ return root;
2075
+ }
2076
+ function parseAttributes(str) {
2077
+ const attrs = {};
2078
+ const re = /(\w[\w-]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/g;
2079
+ let m;
2080
+ while ((m = re.exec(str)) !== null) {
2081
+ attrs[m[1].toLowerCase()] = decodeEntities(m[2] ?? m[3] ?? m[4] ?? "");
2082
+ }
2083
+ return attrs;
2084
+ }
2085
+ function decodeEntities(text) {
2086
+ return text.replace(/&amp;/g, "&").replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(/&quot;/g, '"').replace(/&#39;/g, "'").replace(/&apos;/g, "'").replace(/&nbsp;/g, " ").replace(/&#(\d+);/g, (_, n) => String.fromCharCode(parseInt(n))).replace(/&#x([0-9a-fA-F]+);/g, (_, n) => String.fromCharCode(parseInt(n, 16)));
2087
+ }
2088
+ var SKIP_TAGS = /* @__PURE__ */ new Set(["script", "style", "noscript", "svg", "head", "template", "iframe"]);
2089
+ var BLOCK_TAGS = /* @__PURE__ */ new Set(["div", "p", "section", "article", "main", "aside", "blockquote", "pre", "ul", "ol", "li", "table", "tr", "td", "th", "h1", "h2", "h3", "h4", "h5", "h6", "br", "hr", "figure", "figcaption", "details", "summary", "dl", "dt", "dd", "header", "footer", "nav", "form"]);
2090
+ var HEADING_LEVELS = { h1: "#", h2: "##", h3: "###", h4: "####", h5: "#####", h6: "######" };
2091
+ var INTERACTIVE_TAGS = /* @__PURE__ */ new Set(["a", "button", "input", "select", "textarea", "details", "summary"]);
2092
+ var INTERACTIVE_ROLES = /* @__PURE__ */ new Set(["button", "link", "textbox", "checkbox", "radio", "combobox", "tab", "switch", "menuitem"]);
2093
+ function extractText(nodes) {
2094
+ let out = "";
2095
+ for (const node of nodes) {
2096
+ if (node.type === "text") {
2097
+ out += node.text;
2098
+ continue;
2099
+ }
2100
+ if (node.type !== "element" || !node.tag) continue;
2101
+ if (SKIP_TAGS.has(node.tag)) continue;
2102
+ const inner = node.children ? extractText(node.children) : "";
2103
+ if (BLOCK_TAGS.has(node.tag)) {
2104
+ out += "\n" + inner.trim() + "\n";
2105
+ } else {
2106
+ out += inner;
2107
+ }
2108
+ }
2109
+ return out.replace(/\n{3,}/g, "\n\n").trim();
2110
+ }
2111
+ function extractMarkdown(nodes, baseUrl) {
2112
+ let listDepth = 0;
2113
+ let olCounter = [];
2114
+ function resolveUrl(href) {
2115
+ if (!href || href.startsWith("javascript:") || href.startsWith("#")) return href;
2116
+ if (href.startsWith("http://") || href.startsWith("https://") || href.startsWith("//")) return href;
2117
+ if (baseUrl) {
2118
+ try {
2119
+ return new URL(href, baseUrl).href;
2120
+ } catch {
2121
+ }
2122
+ }
2123
+ return href;
2124
+ }
2125
+ function walk(nodes2) {
2126
+ let out = "";
2127
+ for (const node of nodes2) {
2128
+ if (node.type === "text") {
2129
+ out += node.text?.replace(/\s+/g, " ") || "";
2130
+ continue;
2131
+ }
2132
+ if (node.type !== "element" || !node.tag) continue;
2133
+ if (SKIP_TAGS.has(node.tag)) continue;
2134
+ const tag = node.tag;
2135
+ const children = node.children || [];
2136
+ const inner = walk(children).trim();
2137
+ if (HEADING_LEVELS[tag]) {
2138
+ out += `
2139
+
2140
+ ${HEADING_LEVELS[tag]} ${inner}
2141
+
2142
+ `;
2143
+ continue;
2144
+ }
2145
+ switch (tag) {
2146
+ case "p":
2147
+ out += `
2148
+
2149
+ ${inner}
2150
+
2151
+ `;
2152
+ break;
2153
+ case "br":
2154
+ out += "\n";
2155
+ break;
2156
+ case "hr":
2157
+ out += "\n\n---\n\n";
2158
+ break;
2159
+ case "strong":
2160
+ case "b":
2161
+ if (inner) out += `**${inner}**`;
2162
+ break;
2163
+ case "em":
2164
+ case "i":
2165
+ if (inner) out += `*${inner}*`;
2166
+ break;
2167
+ case "s":
2168
+ case "del":
2169
+ case "strike":
2170
+ if (inner) out += `~~${inner}~~`;
2171
+ break;
2172
+ case "code":
2173
+ if (inner) out += `\`${inner}\``;
2174
+ break;
2175
+ case "pre": {
2176
+ const lang = children.find((c) => c.tag === "code")?.attributes?.class?.match(/language-(\w+)/)?.[1] || "";
2177
+ out += `
2178
+
2179
+ \`\`\`${lang}
2180
+ ${inner}
2181
+ \`\`\`
2182
+
2183
+ `;
2184
+ break;
2185
+ }
2186
+ case "a": {
2187
+ const href = resolveUrl(node.attributes?.href || "");
2188
+ const text = inner || node.attributes?.["aria-label"] || node.attributes?.title || "";
2189
+ if (!text) break;
2190
+ if (!href || href === "#" || href.startsWith("javascript:")) {
2191
+ out += text;
2192
+ break;
2193
+ }
2194
+ out += `[${text}](${href})`;
2195
+ break;
2196
+ }
2197
+ case "img": {
2198
+ const alt = node.attributes?.alt || "";
2199
+ const src = resolveUrl(node.attributes?.src || "");
2200
+ if (src) out += `![${alt}](${src})`;
2201
+ break;
2202
+ }
2203
+ case "ul":
2204
+ listDepth++;
2205
+ olCounter.push(0);
2206
+ out += "\n" + walk(children);
2207
+ listDepth--;
2208
+ olCounter.pop();
2209
+ break;
2210
+ case "ol":
2211
+ listDepth++;
2212
+ olCounter.push(0);
2213
+ out += "\n" + walk(children);
2214
+ listDepth--;
2215
+ olCounter.pop();
2216
+ break;
2217
+ case "li": {
2218
+ const indent = " ".repeat(Math.max(0, listDepth - 1));
2219
+ const isOrdered = olCounter.length > 0 && olCounter[olCounter.length - 1] >= 0;
2220
+ if (isOrdered && olCounter.length > 0) olCounter[olCounter.length - 1]++;
2221
+ const counter = isOrdered && olCounter.length > 0 ? olCounter[olCounter.length - 1] : 0;
2222
+ const bullet = isOrdered ? `${counter}. ` : "- ";
2223
+ out += `${indent}${bullet}${inner}
2224
+ `;
2225
+ break;
2226
+ }
2227
+ case "blockquote": {
2228
+ if (inner) out += "\n\n" + inner.split("\n").map((l) => `> ${l}`).join("\n") + "\n\n";
2229
+ break;
2230
+ }
2231
+ case "table": {
2232
+ const rows = collectTableRows(children);
2233
+ if (rows.length > 0) {
2234
+ out += "\n\n";
2235
+ for (let i = 0; i < rows.length; i++) {
2236
+ out += "| " + rows[i].join(" | ") + " |\n";
2237
+ if (i === 0) out += "| " + rows[i].map(() => "---").join(" | ") + " |\n";
2238
+ }
2239
+ out += "\n";
2240
+ }
2241
+ break;
2242
+ }
2243
+ case "dt":
2244
+ out += `
2245
+ **${inner}**
2246
+ `;
2247
+ break;
2248
+ case "dd":
2249
+ out += `: ${inner}
2250
+ `;
2251
+ break;
2252
+ case "figcaption":
2253
+ out += `
2254
+ *${inner}*
2255
+ `;
2256
+ break;
2257
+ case "summary":
2258
+ out += `**${inner}**
2259
+
2260
+ `;
2261
+ break;
2262
+ default:
2263
+ if (BLOCK_TAGS.has(tag)) {
2264
+ out += "\n" + walk(children) + "\n";
2265
+ } else {
2266
+ out += walk(children);
2267
+ }
2268
+ }
2269
+ }
2270
+ return out;
2271
+ }
2272
+ function collectTableRows(nodes2) {
2273
+ const rows = [];
2274
+ for (const node of nodes2) {
2275
+ if (node.tag === "tr") {
2276
+ const cells = [];
2277
+ for (const cell of node.children || []) {
2278
+ if (cell.tag === "td" || cell.tag === "th") {
2279
+ cells.push(walk(cell.children || []).trim().replace(/\|/g, "\\|").replace(/\n/g, " "));
2280
+ }
2281
+ }
2282
+ if (cells.length > 0) rows.push(cells);
2283
+ } else if (node.tag === "thead" || node.tag === "tbody" || node.tag === "tfoot") {
2284
+ rows.push(...collectTableRows(node.children || []));
2285
+ }
2286
+ }
2287
+ return rows;
2288
+ }
2289
+ const raw = walk(nodes);
2290
+ return raw.replace(/\n{3,}/g, "\n\n").trim();
2291
+ }
2292
+ function extractSnapshot(nodes) {
2293
+ let idx = 0;
2294
+ const ATTR_WHITELIST = ["type", "role", "aria-label", "placeholder", "href", "value", "name", "alt"];
2295
+ function isInteractive(node) {
2296
+ if (!node.tag) return false;
2297
+ if (INTERACTIVE_TAGS.has(node.tag)) return true;
2298
+ const role = node.attributes?.role;
2299
+ if (role && INTERACTIVE_ROLES.has(role)) return true;
2300
+ if (node.attributes?.contenteditable === "true") return true;
2301
+ if (node.attributes?.tabindex && parseInt(node.attributes.tabindex) >= 0) return true;
2302
+ return false;
2303
+ }
2304
+ function getAttrs(node) {
2305
+ const parts = [];
2306
+ for (const name of ATTR_WHITELIST) {
2307
+ const v = node.attributes?.[name];
2308
+ if (v) parts.push(`${name}=${v.slice(0, 60)}`);
2309
+ }
2310
+ return parts.length ? " " + parts.join(" ") : "";
2311
+ }
2312
+ function walk(nodes2, depth) {
2313
+ let out = "";
2314
+ for (const node of nodes2) {
2315
+ if (node.type === "text") {
2316
+ const t = node.text?.trim();
2317
+ if (t) out += " ".repeat(depth) + t.slice(0, 150) + "\n";
2318
+ continue;
2319
+ }
2320
+ if (node.type !== "element" || !node.tag) continue;
2321
+ if (SKIP_TAGS.has(node.tag)) continue;
2322
+ const indent = " ".repeat(depth);
2323
+ const inter = isInteractive(node);
2324
+ const prefix = inter ? `[${idx++}]` : "";
2325
+ const attrs = getAttrs(node);
2326
+ const leafText = node.children?.length === 1 && node.children[0].type === "text" ? node.children[0].text?.trim().slice(0, 150) || "" : "";
2327
+ if (inter || leafText || !node.children?.length) {
2328
+ if (leafText) {
2329
+ out += `${indent}${prefix}<${node.tag}${attrs}>${leafText}</${node.tag}>
2330
+ `;
2331
+ } else {
2332
+ out += `${indent}${prefix}<${node.tag}${attrs}>
2333
+ `;
2334
+ if (node.children) out += walk(node.children, depth + 1);
2335
+ }
2336
+ } else {
2337
+ if (node.children) out += walk(node.children, depth);
2338
+ }
2339
+ }
2340
+ return out;
2341
+ }
2342
+ return walk(nodes, 0);
2343
+ }
2344
+ function extractLinks(nodes, baseUrl) {
2345
+ const links = [];
2346
+ function walk(nodes2) {
2347
+ for (const node of nodes2) {
2348
+ if (node.type === "element" && node.tag === "a" && node.attributes?.href) {
2349
+ let href = node.attributes.href;
2350
+ if (baseUrl && !href.startsWith("http")) {
2351
+ try {
2352
+ href = new URL(href, baseUrl).href;
2353
+ } catch {
2354
+ }
2355
+ }
2356
+ const text = extractText(node.children || []).trim();
2357
+ if (text && href && !href.startsWith("javascript:")) {
2358
+ links.push({ text: text.slice(0, 200), href });
2359
+ }
2360
+ }
2361
+ if (node.children) walk(node.children);
2362
+ }
2363
+ }
2364
+ walk(nodes);
2365
+ return links;
2366
+ }
2367
+ async function lobsterFetch(url, options) {
2368
+ const timeout = options?.timeout || 3e4;
2369
+ const dump = options?.dump || "markdown";
2370
+ const start = Date.now();
2371
+ const resp = await fetch(url, {
2372
+ headers: {
2373
+ "User-Agent": "LobsterCLI/0.1 (+https://github.com/iexcalibur/lobster-cli)",
2374
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
2375
+ "Accept-Language": "en-US,en;q=0.5",
2376
+ ...options?.headers || {}
2377
+ },
2378
+ redirect: options?.followRedirects !== false ? "follow" : "manual",
2379
+ signal: AbortSignal.timeout(timeout)
2380
+ });
2381
+ if (!resp.ok) {
2382
+ throw new Error(`HTTP ${resp.status} ${resp.statusText}`);
2383
+ }
2384
+ const html = await resp.text();
2385
+ const duration = Date.now() - start;
2386
+ const finalUrl = resp.url || url;
2387
+ const nodes = parseHtml(html);
2388
+ let title = "";
2389
+ function findTitle(nodes2) {
2390
+ for (const node of nodes2) {
2391
+ if (node.tag === "title" && node.children?.[0]?.text) {
2392
+ title = node.children[0].text.trim();
2393
+ return;
2394
+ }
2395
+ if (node.children) findTitle(node.children);
2396
+ }
2397
+ }
2398
+ findTitle(nodes);
2399
+ let content;
2400
+ let links;
2401
+ switch (dump) {
2402
+ case "markdown":
2403
+ content = extractMarkdown(nodes, finalUrl);
2404
+ break;
2405
+ case "text":
2406
+ content = extractText(nodes);
2407
+ break;
2408
+ case "snapshot":
2409
+ content = extractSnapshot(nodes);
2410
+ break;
2411
+ case "html":
2412
+ content = html;
2413
+ break;
2414
+ case "links":
2415
+ links = extractLinks(nodes, finalUrl);
2416
+ content = links.map((l, i) => `${i + 1}. [${l.text}](${l.href})`).join("\n");
2417
+ break;
2418
+ default:
2419
+ content = extractMarkdown(nodes, finalUrl);
2420
+ }
2421
+ return { url, finalUrl, status: resp.status, title, content, links, duration };
2422
+ }
2423
+
2424
+ // src/pipeline/registry.ts
2425
+ var stepHandlers = /* @__PURE__ */ new Map();
2426
+ function registerStep(name, handler) {
2427
+ stepHandlers.set(name, handler);
2428
+ }
2429
+ function getStep(name) {
2430
+ return stepHandlers.get(name);
2431
+ }
2432
+ function getStepNames() {
2433
+ return [...stepHandlers.keys()];
2434
+ }
2435
+
2436
+ // src/pipeline/executor.ts
2437
+ async function executePipeline(steps, page, args, debug = false) {
2438
+ const ctx = { page, args, data: null, debug };
2439
+ for (let i = 0; i < steps.length; i++) {
2440
+ const stepDef = steps[i];
2441
+ const [stepName, params] = Object.entries(stepDef)[0];
2442
+ const handler = getStep(stepName);
2443
+ if (!handler) {
2444
+ throw new Error(`Unknown pipeline step: ${stepName}`);
2445
+ }
2446
+ if (debug) {
2447
+ log.step(i + 1, `${stepName}`);
2448
+ }
2449
+ ctx.data = await handler(ctx, params);
2450
+ if (debug && ctx.data !== void 0) {
2451
+ const preview = JSON.stringify(ctx.data)?.slice(0, 200);
2452
+ log.dim(` \u2192 ${preview}...`);
2453
+ }
2454
+ }
2455
+ return ctx.data;
2456
+ }
2457
+
2458
+ // src/pipeline/template.ts
2459
+ var EXPR_RE = /\$\{\{\s*(.*?)\s*\}\}/g;
2460
+ function renderTemplate(template, ctx) {
2461
+ if (typeof template !== "string") {
2462
+ if (typeof template === "object" && template !== null) {
2463
+ if (Array.isArray(template)) return template.map((v) => renderTemplate(v, ctx));
2464
+ const result = {};
2465
+ for (const [k, v] of Object.entries(template)) {
2466
+ result[k] = renderTemplate(v, ctx);
2467
+ }
2468
+ return result;
2469
+ }
2470
+ return template;
2471
+ }
2472
+ const fullMatch = template.match(/^\$\{\{\s*(.*?)\s*\}\}$/);
2473
+ if (fullMatch) {
2474
+ return evaluateExpression(fullMatch[1], ctx);
2475
+ }
2476
+ return template.replace(EXPR_RE, (_, expr) => {
2477
+ const val = evaluateExpression(expr, ctx);
2478
+ return val === null || val === void 0 ? "" : String(val);
2479
+ });
2480
+ }
2481
+ function evaluateExpression(expr, ctx) {
2482
+ const parts = expr.split(/\s*\|\s*/);
2483
+ let value = resolveValue(parts[0].trim(), ctx);
2484
+ for (let i = 1; i < parts.length; i++) {
2485
+ value = applyFilter(value, parts[i].trim());
2486
+ }
2487
+ return value;
2488
+ }
2489
+ function resolveValue(path, ctx) {
2490
+ const arithMatch = path.match(/^(\w[\w.]*)\s*([+\-*])\s*(\d+)$/);
2491
+ if (arithMatch) {
2492
+ const base = Number(resolvePath(arithMatch[1], ctx));
2493
+ const op = arithMatch[2];
2494
+ const num = Number(arithMatch[3]);
2495
+ if (op === "+") return base + num;
2496
+ if (op === "-") return base - num;
2497
+ if (op === "*") return base * num;
2498
+ }
2499
+ const orMatch = path.match(/^(.+?)\s*\|\|\s*(.+)$/);
2500
+ if (orMatch) {
2501
+ const left = resolvePath(orMatch[1].trim(), ctx);
2502
+ if (left !== null && left !== void 0 && left !== "" && left !== false) return left;
2503
+ const right = orMatch[2].trim();
2504
+ if (right.startsWith("'") && right.endsWith("'") || right.startsWith('"') && right.endsWith('"')) {
2505
+ return right.slice(1, -1);
2506
+ }
2507
+ return resolvePath(right, ctx);
2508
+ }
2509
+ if (path.startsWith("'") && path.endsWith("'") || path.startsWith('"') && path.endsWith('"')) {
2510
+ return path.slice(1, -1);
2511
+ }
2512
+ if (!isNaN(Number(path)) && path !== "") return Number(path);
2513
+ return resolvePath(path, ctx);
2514
+ }
2515
+ function resolvePath(path, ctx) {
2516
+ if (path === "index") return ctx.index ?? 0;
2517
+ const parts = path.split(".");
2518
+ let root;
2519
+ if (parts[0] === "args") {
2520
+ root = ctx.args;
2521
+ parts.shift();
2522
+ } else if (parts[0] === "item") {
2523
+ root = ctx.item;
2524
+ parts.shift();
2525
+ } else if (parts[0] === "data") {
2526
+ root = ctx.data;
2527
+ parts.shift();
2528
+ } else {
2529
+ root = getNestedValue(ctx.item, parts);
2530
+ if (root !== void 0) return root;
2531
+ root = getNestedValue(ctx.args, parts);
2532
+ if (root !== void 0) return root;
2533
+ root = getNestedValue(ctx.data, parts);
2534
+ if (root !== void 0) return root;
2535
+ return void 0;
2536
+ }
2537
+ return getNestedValue(root, parts);
2538
+ }
2539
+ function getNestedValue(obj, parts) {
2540
+ let current = obj;
2541
+ for (const part of parts) {
2542
+ if (current === null || current === void 0) return void 0;
2543
+ if (typeof current === "object") {
2544
+ current = current[part];
2545
+ } else {
2546
+ return void 0;
2547
+ }
2548
+ }
2549
+ return current;
2550
+ }
2551
+ function applyFilter(value, filter) {
2552
+ const match = filter.match(/^(\w+)(?:\((.+)\))?$/);
2553
+ if (!match) return value;
2554
+ const name = match[1];
2555
+ const arg = match[2]?.replace(/^['"]|['"]$/g, "");
2556
+ switch (name) {
2557
+ case "default":
2558
+ return value === null || value === void 0 || value === "" ? arg : value;
2559
+ case "join":
2560
+ return Array.isArray(value) ? value.join(arg || ", ") : value;
2561
+ case "upper":
2562
+ return typeof value === "string" ? value.toUpperCase() : value;
2563
+ case "lower":
2564
+ return typeof value === "string" ? value.toLowerCase() : value;
2565
+ case "trim":
2566
+ return typeof value === "string" ? value.trim() : value;
2567
+ case "truncate": {
2568
+ const len = parseInt(arg || "100");
2569
+ if (typeof value === "string" && value.length > len) return value.slice(0, len) + "...";
2570
+ return value;
2571
+ }
2572
+ case "replace": {
2573
+ if (typeof value !== "string" || !arg) return value;
2574
+ const [from, to] = arg.split(",").map((s) => s.trim().replace(/^['"]|['"]$/g, ""));
2575
+ return value.replaceAll(from, to || "");
2576
+ }
2577
+ case "keys":
2578
+ return typeof value === "object" && value !== null ? Object.keys(value) : [];
2579
+ case "length":
2580
+ return Array.isArray(value) ? value.length : typeof value === "string" ? value.length : 0;
2581
+ case "first":
2582
+ return Array.isArray(value) ? value[0] : value;
2583
+ case "last":
2584
+ return Array.isArray(value) ? value[value.length - 1] : value;
2585
+ case "json":
2586
+ return JSON.stringify(value);
2587
+ case "slugify":
2588
+ return typeof value === "string" ? value.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "") : value;
2589
+ case "sanitize":
2590
+ return typeof value === "string" ? value.replace(/[<>:"/\\|?*\x00-\x1f]/g, "_").replace(/__+/g, "_").trim() : value;
2591
+ case "ext": {
2592
+ if (typeof value !== "string") return "";
2593
+ const extMatch = value.match(/\.([a-zA-Z0-9]+)(?:\?.*)?$/);
2594
+ return extMatch ? extMatch[1] : "";
2595
+ }
2596
+ case "basename": {
2597
+ if (typeof value !== "string") return "";
2598
+ try {
2599
+ return new URL(value).pathname.split("/").pop() || "";
2600
+ } catch {
2601
+ }
2602
+ return value.split("/").pop() || "";
2603
+ }
2604
+ default:
2605
+ return value;
2606
+ }
2607
+ }
2608
+
2609
+ // src/types/adapter.ts
2610
+ var Strategy = /* @__PURE__ */ ((Strategy2) => {
2611
+ Strategy2["PUBLIC"] = "public";
2612
+ Strategy2["COOKIE"] = "cookie";
2613
+ Strategy2["HEADER"] = "header";
2614
+ Strategy2["INTERCEPT"] = "intercept";
2615
+ Strategy2["UI"] = "ui";
2616
+ return Strategy2;
2617
+ })(Strategy || {});
2618
+
2619
+ // src/adapter/registry.ts
2620
+ var REGISTRY_KEY = "__lobster_registry__";
2621
+ if (!globalThis[REGISTRY_KEY]) {
2622
+ globalThis[REGISTRY_KEY] = /* @__PURE__ */ new Map();
2623
+ }
2624
+ function getRegistry() {
2625
+ return globalThis[REGISTRY_KEY];
2626
+ }
2627
+ function cli(def) {
2628
+ const adapter = {
2629
+ site: def.site,
2630
+ name: def.name,
2631
+ description: def.description || `${def.site} ${def.name}`,
2632
+ domain: def.domain,
2633
+ strategy: def.strategy || "public" /* PUBLIC */,
2634
+ browser: def.browser ?? def.strategy !== "public" /* PUBLIC */,
2635
+ args: def.args || [],
2636
+ columns: def.columns,
2637
+ func: def.func,
2638
+ pipeline: def.pipeline,
2639
+ timeoutSeconds: def.timeoutSeconds,
2640
+ navigateBefore: def.navigateBefore
2641
+ };
2642
+ const fullName = `${adapter.site}/${adapter.name}`;
2643
+ getRegistry().set(fullName, adapter);
2644
+ return adapter;
2645
+ }
2646
+ function getAdapter(site, name) {
2647
+ return getRegistry().get(`${site}/${name}`);
2648
+ }
2649
+ function getAdapterBySite(site) {
2650
+ const adapters = [];
2651
+ for (const [key, adapter] of getRegistry()) {
2652
+ if (key.startsWith(`${site}/`)) adapters.push(adapter);
2653
+ }
2654
+ return adapters;
2655
+ }
2656
+ function getAdapterByDomain(domain) {
2657
+ const adapters = [];
2658
+ for (const adapter of getRegistry().values()) {
2659
+ if (adapter.domain && domain.includes(adapter.domain)) adapters.push(adapter);
2660
+ }
2661
+ return adapters;
2662
+ }
2663
+ function getAllAdapters() {
2664
+ return [...getRegistry().values()];
2665
+ }
2666
+ function getAllSites() {
2667
+ const sites = /* @__PURE__ */ new Set();
2668
+ for (const adapter of getRegistry().values()) {
2669
+ sites.add(adapter.site);
2670
+ }
2671
+ return [...sites].sort();
2672
+ }
2673
+
2674
+ // src/discover/explore.ts
2675
+ import { writeFileSync, mkdirSync, existsSync as existsSync2 } from "fs";
2676
+ import { join } from "path";
2677
+ var SITE_ALIASES = {
2678
+ "x.com": "twitter",
2679
+ "twitter.com": "twitter",
2680
+ "news.ycombinator.com": "hackernews",
2681
+ "old.reddit.com": "reddit",
2682
+ "www.reddit.com": "reddit",
2683
+ "bilibili.com": "bilibili",
2684
+ "www.bilibili.com": "bilibili",
2685
+ "zhihu.com": "zhihu",
2686
+ "www.zhihu.com": "zhihu"
2687
+ };
2688
+ var FIELD_ROLES = {
2689
+ title: ["title", "name", "headline", "subject", "text", "caption"],
2690
+ url: ["url", "link", "href", "permalink", "uri", "web_url"],
2691
+ author: ["author", "user", "creator", "owner", "by", "username", "screen_name", "display_name"],
2692
+ score: ["score", "points", "likes", "upvotes", "karma", "vote_count", "favorite_count", "retweet_count"],
2693
+ time: ["time", "date", "created", "created_at", "timestamp", "published", "updated_at", "posted_at"],
2694
+ description: ["description", "summary", "snippet", "excerpt", "body", "content", "selftext"],
2695
+ image: ["image", "thumbnail", "avatar", "icon", "photo", "cover", "poster"],
2696
+ id: ["id", "uid", "pid", "mid", "aid", "bvid"]
2697
+ };
2698
+ var VOLATILE_PARAMS = /* @__PURE__ */ new Set([
2699
+ "_",
2700
+ "t",
2701
+ "ts",
2702
+ "timestamp",
2703
+ "nonce",
2704
+ "rand",
2705
+ "random",
2706
+ "callback",
2707
+ "jsonp",
2708
+ "_t",
2709
+ "__t"
2710
+ ]);
2711
+ function normalizeUrlPattern(urlStr) {
2712
+ try {
2713
+ const u = new URL(urlStr);
2714
+ const parts = u.pathname.split("/");
2715
+ const normalized = parts.map((p) => {
2716
+ if (!p) return p;
2717
+ if (/^\d+$/.test(p)) return "{id}";
2718
+ if (/^[a-f0-9]{8,}$/i.test(p)) return "{hex}";
2719
+ if (/^BV[a-zA-Z0-9]+$/.test(p)) return "{bvid}";
2720
+ if (/^[a-z0-9]{20,}$/i.test(p)) return "{token}";
2721
+ return p;
2722
+ });
2723
+ return u.origin + normalized.join("/");
2724
+ } catch {
2725
+ return urlStr;
2726
+ }
2727
+ }
2728
+ function detectAuth(url, headers) {
2729
+ const indicators = [];
2730
+ if (url.includes("signature") || url.includes("sign=") || url.includes("sig=")) indicators.push("signature");
2731
+ if (url.includes("token=") || url.includes("access_token=")) indicators.push("token");
2732
+ if (url.includes("api_key=") || url.includes("apikey=")) indicators.push("api_key");
2733
+ if (headers) {
2734
+ if (headers["authorization"]?.startsWith("Bearer")) indicators.push("bearer");
2735
+ if (headers["x-csrf-token"] || headers["x-xsrf-token"]) indicators.push("csrf");
2736
+ }
2737
+ return indicators;
2738
+ }
2739
+ function analyzeResponseBody(body) {
2740
+ if (!body || typeof body !== "object") {
2741
+ return { hasItems: false, itemCount: 0, fields: [], fieldRoles: {} };
2742
+ }
2743
+ let items = null;
2744
+ if (Array.isArray(body)) {
2745
+ items = body;
2746
+ } else {
2747
+ const obj = body;
2748
+ for (const key of ["data", "results", "items", "list", "entries", "records", "hits", "posts", "articles", "stories"]) {
2749
+ const val = obj[key];
2750
+ if (Array.isArray(val) && val.length > 0) {
2751
+ items = val;
2752
+ break;
2753
+ }
2754
+ if (val && typeof val === "object" && !Array.isArray(val)) {
2755
+ for (const subKey of ["items", "list", "data", "results", "entries"]) {
2756
+ const subVal = val[subKey];
2757
+ if (Array.isArray(subVal) && subVal.length > 0) {
2758
+ items = subVal;
2759
+ break;
2760
+ }
2761
+ }
2762
+ if (items) break;
2763
+ }
2764
+ }
2765
+ }
2766
+ if (!items || items.length === 0) {
2767
+ return { hasItems: false, itemCount: 0, fields: [], fieldRoles: {} };
2768
+ }
2769
+ const firstItem = items[0];
2770
+ if (!firstItem || typeof firstItem !== "object") {
2771
+ return { hasItems: true, itemCount: items.length, fields: [], fieldRoles: {} };
2772
+ }
2773
+ const fields = Object.keys(firstItem);
2774
+ const fieldRoles = {};
2775
+ for (const field of fields) {
2776
+ const lower = field.toLowerCase();
2777
+ for (const [role, patterns] of Object.entries(FIELD_ROLES)) {
2778
+ if (patterns.some((p) => lower.includes(p))) {
2779
+ fieldRoles[field] = role;
2780
+ break;
2781
+ }
2782
+ }
2783
+ }
2784
+ return { hasItems: true, itemCount: items.length, fields, fieldRoles };
2785
+ }
2786
+ function scoreEndpoint(ep) {
2787
+ let score = 0;
2788
+ if (ep.contentType.includes("json")) score += 10;
2789
+ if (ep.hasItems) score += 5;
2790
+ if (ep.itemCount > 3) score += 3;
2791
+ if (ep.itemCount > 10) score += 2;
2792
+ if (Object.keys(ep.fieldRoles).length > 2) score += 3;
2793
+ if (ep.url.includes("/api/")) score += 5;
2794
+ if (ep.url.includes("/v1/") || ep.url.includes("/v2/") || ep.url.includes("/v3/")) score += 3;
2795
+ const path = new URL(ep.url).pathname.toLowerCase();
2796
+ if (/search|query|find/.test(path)) score += 4;
2797
+ if (/hot|trending|popular|top|feed|timeline/.test(path)) score += 4;
2798
+ if (/list|index|all|latest|recent/.test(path)) score += 3;
2799
+ if (ep.queryParams.some((p) => /search|keyword|query|q/.test(p))) score += 3;
2800
+ if (ep.queryParams.some((p) => /page|offset|cursor|limit|count|num/.test(p))) score += 2;
2801
+ if (ep.method === "GET") score += 2;
2802
+ if (ep.authIndicators.includes("signature")) score -= 2;
2803
+ if (/\.(js|css|png|jpg|gif|svg|woff|ico)/.test(ep.url)) score -= 30;
2804
+ if (/analytics|tracking|pixel|beacon|log\b/.test(ep.url)) score -= 20;
2805
+ return score;
2806
+ }
2807
+ function inferCapabilities(endpoints) {
2808
+ const caps = [];
2809
+ for (const ep of endpoints) {
2810
+ const path = ep.url.toLowerCase();
2811
+ if (/search|query|find/.test(path) && !caps.includes("search")) caps.push("search");
2812
+ if (/hot|trending|popular/.test(path) && !caps.includes("hot")) caps.push("hot");
2813
+ if (/feed|timeline|home/.test(path) && !caps.includes("feed")) caps.push("feed");
2814
+ if (/detail|item\/\{|article\/\{|post\/\{/.test(ep.pattern) && !caps.includes("detail")) caps.push("detail");
2815
+ if (/comment|reply|discuss/.test(path) && !caps.includes("comments")) caps.push("comments");
2816
+ if (/user|profile|me\b|account/.test(path) && !caps.includes("me")) caps.push("me");
2817
+ if (/favorite|bookmark|saved|like/.test(path) && !caps.includes("favorites")) caps.push("favorites");
2818
+ if (/history|watch|read/.test(path) && !caps.includes("history")) caps.push("history");
2819
+ }
2820
+ return caps;
2821
+ }
2822
+ async function smartAutoScroll(page, attempts) {
2823
+ for (let i = 0; i < attempts; i++) {
2824
+ const scrolled = await page.evaluate(`
2825
+ (async () => {
2826
+ const lastHeight = document.body.scrollHeight;
2827
+ window.scrollTo(0, lastHeight);
2828
+
2829
+ // Wait for new content via MutationObserver or timeout
2830
+ const result = await new Promise((resolve) => {
2831
+ let timeoutId;
2832
+ const observer = new MutationObserver(() => {
2833
+ if (document.body.scrollHeight > lastHeight) {
2834
+ clearTimeout(timeoutId);
2835
+ observer.disconnect();
2836
+ setTimeout(() => resolve(true), 100);
2837
+ }
2838
+ });
2839
+ observer.observe(document.body, { childList: true, subtree: true });
2840
+ timeoutId = setTimeout(() => { observer.disconnect(); resolve(false); }, 2000);
2841
+ });
2842
+ return result;
2843
+ })()
2844
+ `);
2845
+ if (!scrolled) break;
2846
+ }
2847
+ }
2848
+ async function interactiveFuzz(page, maxButtons) {
2849
+ await page.evaluate(`
2850
+ (async () => {
2851
+ const clickTargets = [];
2852
+ const selectors = [
2853
+ 'button:not([disabled])',
2854
+ '[role="tab"]',
2855
+ '[role="button"]',
2856
+ '.tab', '.nav-link', '.dropdown-toggle',
2857
+ 'a[data-toggle]', '[data-bs-toggle]',
2858
+ ];
2859
+
2860
+ for (const sel of selectors) {
2861
+ for (const el of document.querySelectorAll(sel)) {
2862
+ const rect = el.getBoundingClientRect();
2863
+ if (rect.width > 0 && rect.height > 0 &&
2864
+ rect.top >= 0 && rect.top < window.innerHeight * 2) {
2865
+ const text = el.textContent?.trim()?.slice(0, 40) || '';
2866
+ // Skip destructive-looking buttons
2867
+ if (/delete|remove|logout|sign.?out|cancel|close/i.test(text)) continue;
2868
+ clickTargets.push(el);
2869
+ }
2870
+ }
2871
+ }
2872
+
2873
+ // Click up to N targets with delays
2874
+ const max = Math.min(${maxButtons}, clickTargets.length);
2875
+ for (let i = 0; i < max; i++) {
2876
+ try {
2877
+ clickTargets[i].click();
2878
+ await new Promise(r => setTimeout(r, 800));
2879
+ } catch {}
2880
+ }
2881
+ })()
2882
+ `);
2883
+ await page.wait(1.5);
2884
+ }
2885
+ async function recoverMissingBodies(page, endpoints) {
2886
+ const needsRecovery = endpoints.filter(
2887
+ (ep) => !ep.hasItems && ep.contentType.includes("json") && ep.score > 5
2888
+ );
2889
+ if (needsRecovery.length === 0) return;
2890
+ const urls = needsRecovery.map((ep) => ep.url).slice(0, 8);
2891
+ const bodies = await page.evaluate(`
2892
+ (async () => {
2893
+ const urls = ${JSON.stringify(urls)};
2894
+ const results = [];
2895
+ for (const url of urls) {
2896
+ try {
2897
+ const resp = await fetch(url, { credentials: 'include' });
2898
+ if (resp.ok) {
2899
+ const json = await resp.json();
2900
+ results.push(json);
2901
+ } else {
2902
+ results.push(null);
2903
+ }
2904
+ } catch { results.push(null); }
2905
+ }
2906
+ return results;
2907
+ })()
2908
+ `);
2909
+ if (!bodies) return;
2910
+ for (let i = 0; i < urls.length; i++) {
2911
+ if (!bodies[i]) continue;
2912
+ const ep = needsRecovery[i];
2913
+ const analysis = analyzeResponseBody(bodies[i]);
2914
+ if (analysis.hasItems) {
2915
+ ep.hasItems = analysis.hasItems;
2916
+ ep.itemCount = analysis.itemCount;
2917
+ ep.fields = analysis.fields;
2918
+ ep.fieldRoles = analysis.fieldRoles;
2919
+ ep.score = scoreEndpoint(ep);
2920
+ }
2921
+ }
2922
+ }
2923
+ function writeArtifacts(dir, result) {
2924
+ if (!existsSync2(dir)) mkdirSync(dir, { recursive: true });
2925
+ writeFileSync(join(dir, "manifest.json"), JSON.stringify({
2926
+ site: result.site,
2927
+ domain: result.domain,
2928
+ framework: result.framework,
2929
+ strategy: result.strategy,
2930
+ capabilities: result.capabilities,
2931
+ endpointCount: result.endpoints.length,
2932
+ exploredAt: (/* @__PURE__ */ new Date()).toISOString()
2933
+ }, null, 2));
2934
+ writeFileSync(join(dir, "endpoints.json"), JSON.stringify(
2935
+ result.endpoints.map((ep) => ({
2936
+ url: ep.url,
2937
+ pattern: ep.pattern,
2938
+ method: ep.method,
2939
+ status: ep.status,
2940
+ score: ep.score,
2941
+ hasItems: ep.hasItems,
2942
+ itemCount: ep.itemCount,
2943
+ fields: ep.fields,
2944
+ fieldRoles: ep.fieldRoles,
2945
+ queryParams: ep.queryParams,
2946
+ authIndicators: ep.authIndicators
2947
+ })),
2948
+ null,
2949
+ 2
2950
+ ));
2951
+ writeFileSync(join(dir, "capabilities.json"), JSON.stringify(
2952
+ result.capabilities.map((cap) => {
2953
+ const matchingEndpoints = result.endpoints.filter((ep) => {
2954
+ const path = ep.url.toLowerCase();
2955
+ if (cap === "search") return /search|query|find/.test(path);
2956
+ if (cap === "hot") return /hot|trending|popular/.test(path);
2957
+ if (cap === "feed") return /feed|timeline|home/.test(path);
2958
+ return false;
2959
+ });
2960
+ return {
2961
+ name: cap,
2962
+ description: `${cap} capability`,
2963
+ endpoint: matchingEndpoints[0]?.pattern || null,
2964
+ strategy: result.strategy,
2965
+ confidence: matchingEndpoints.length > 0 ? 0.8 : 0.5,
2966
+ recommendedColumns: matchingEndpoints[0]?.fields?.slice(0, 6) || []
2967
+ };
2968
+ }),
2969
+ null,
2970
+ 2
2971
+ ));
2972
+ const authSummary = {};
2973
+ for (const ep of result.endpoints) {
2974
+ for (const ind of ep.authIndicators) {
2975
+ if (!authSummary[ind]) authSummary[ind] = [];
2976
+ authSummary[ind].push(ep.pattern);
2977
+ }
2978
+ }
2979
+ writeFileSync(join(dir, "auth.json"), JSON.stringify(authSummary, null, 2));
2980
+ if (result.stores && result.stores.length > 0) {
2981
+ writeFileSync(join(dir, "stores.json"), JSON.stringify(result.stores, null, 2));
2982
+ }
2983
+ log.success(`Artifacts written to ${dir}/`);
2984
+ }
2985
+ async function exploreSite(page, url, options) {
2986
+ const parsedUrl = new URL(url);
2987
+ const domain = parsedUrl.hostname;
2988
+ const site = SITE_ALIASES[domain] || domain.replace(/^www\./, "").split(".")[0];
2989
+ log.info(`Exploring ${url}...`);
2990
+ await page.installInterceptor("");
2991
+ await page.goto(url);
2992
+ await page.wait(options?.wait || 3);
2993
+ if (options?.scroll !== false) {
2994
+ log.debug("Smart auto-scrolling to trigger lazy-loaded APIs...");
2995
+ await smartAutoScroll(page, options?.scrollAttempts || 4);
2996
+ }
2997
+ if (options?.fuzz !== false) {
2998
+ log.debug("Fuzzing interactive elements...");
2999
+ await interactiveFuzz(page, options?.maxButtons || 12);
3000
+ }
3001
+ const rawRequests = await page.getInterceptedRequests();
3002
+ const seen = /* @__PURE__ */ new Set();
3003
+ const endpoints = [];
3004
+ for (const raw of rawRequests) {
3005
+ if (!raw?.url || !raw?.status) continue;
3006
+ if (raw.status < 200 || raw.status >= 400) continue;
3007
+ const pattern = normalizeUrlPattern(raw.url);
3008
+ const dedupeKey = `${raw.method || "GET"}:${pattern}`;
3009
+ if (seen.has(dedupeKey)) continue;
3010
+ seen.add(dedupeKey);
3011
+ let queryParams = [];
3012
+ try {
3013
+ const u = new URL(raw.url);
3014
+ queryParams = [...u.searchParams.keys()].filter((k) => !VOLATILE_PARAMS.has(k));
3015
+ } catch {
3016
+ }
3017
+ const authIndicators = detectAuth(raw.url);
3018
+ const bodyAnalysis = analyzeResponseBody(raw.body);
3019
+ const ep = {
3020
+ url: raw.url,
3021
+ pattern,
3022
+ method: raw.method || "GET",
3023
+ status: raw.status,
3024
+ contentType: "json",
3025
+ queryParams,
3026
+ ...bodyAnalysis,
3027
+ authIndicators
3028
+ };
3029
+ endpoints.push({ ...ep, score: scoreEndpoint(ep) });
3030
+ }
3031
+ log.debug("Recovering missing response bodies...");
3032
+ await recoverMissingBodies(page, endpoints);
3033
+ endpoints.sort((a, b) => b.score - a.score);
3034
+ const framework = await page.evaluate(`
3035
+ (() => {
3036
+ const app = document.querySelector('#app');
3037
+ if (window.__NEXT_DATA__) return 'nextjs';
3038
+ if (window.__NUXT__) return 'nuxt';
3039
+ if (app && app.__vue_app__) {
3040
+ const gp = app.__vue_app__.config?.globalProperties;
3041
+ if (gp?.$pinia) return 'vue+pinia';
3042
+ if (gp?.$store) return 'vue+vuex';
3043
+ return 'vue';
3044
+ }
3045
+ if (app && app.__vue__) return 'vue2';
3046
+ if (window.__REACT_DEVTOOLS_GLOBAL_HOOK__) return 'react';
3047
+ if (document.querySelector('[data-reactroot]') || document.querySelector('#__next') || document.querySelector('#root')?.['_reactRootContainer']) return 'react';
3048
+ if (window.angular || document.querySelector('[ng-version]')) return 'angular';
3049
+ if (window.__svelte_meta) return 'svelte';
3050
+ return 'unknown';
3051
+ })()
3052
+ `).catch(() => "unknown");
3053
+ const stores = await page.evaluate(`
3054
+ (() => {
3055
+ const results = [];
3056
+ const app = document.querySelector('#app');
3057
+
3058
+ // Pinia via __vue_app__
3059
+ if (app && app.__vue_app__) {
3060
+ try {
3061
+ const pinia = app.__vue_app__.config?.globalProperties?.$pinia;
3062
+ if (pinia && pinia._s) {
3063
+ pinia._s.forEach((store, id) => {
3064
+ const actions = Object.keys(store).filter(k =>
3065
+ typeof store[k] === 'function' && !k.startsWith('$') && !k.startsWith('_')
3066
+ );
3067
+ const stateKeys = Object.keys(store).filter(k =>
3068
+ typeof store[k] !== 'function' && !k.startsWith('$') && !k.startsWith('_')
3069
+ );
3070
+ results.push({ name: id, type: 'pinia', actions: actions.slice(0, 20), stateKeys: stateKeys.slice(0, 20) });
3071
+ });
3072
+ }
3073
+ } catch {}
3074
+
3075
+ // Vuex via __vue_app__
3076
+ try {
3077
+ const store = app.__vue_app__.config?.globalProperties?.$store;
3078
+ if (store && store._actions) {
3079
+ const actions = Object.keys(store._actions);
3080
+ results.push({ name: 'vuex', type: 'vuex', actions: actions.slice(0, 20) });
3081
+ }
3082
+ } catch {}
3083
+ }
3084
+
3085
+ // Legacy Pinia global
3086
+ if (results.length === 0 && window.__pinia) {
3087
+ try {
3088
+ const pinia = window.__pinia;
3089
+ for (const [id, store] of pinia._s || []) {
3090
+ const actions = Object.keys(store).filter(k => typeof store[k] === 'function' && !k.startsWith('$') && !k.startsWith('_'));
3091
+ results.push({ name: id, type: 'pinia', actions: actions.slice(0, 20) });
3092
+ }
3093
+ } catch {}
3094
+ }
3095
+
3096
+ return results;
3097
+ })()
3098
+ `).catch(() => []);
3099
+ let strategy = "public";
3100
+ if (endpoints.length > 0) {
3101
+ const topEp = endpoints[0];
3102
+ if (topEp.authIndicators.includes("signature")) strategy = "intercept";
3103
+ else if (topEp.authIndicators.includes("bearer") || topEp.authIndicators.includes("csrf")) strategy = "header";
3104
+ else if (endpoints.some((e) => !e.hasItems) && endpoints.some((e) => e.hasItems)) strategy = "cookie";
3105
+ } else {
3106
+ strategy = "cookie";
3107
+ }
3108
+ const capabilities = inferCapabilities(endpoints);
3109
+ const result = {
3110
+ site,
3111
+ domain,
3112
+ endpoints: endpoints.slice(0, 30),
3113
+ strategy,
3114
+ framework,
3115
+ stores: stores.length > 0 ? stores : void 0,
3116
+ capabilities
3117
+ };
3118
+ const outputDir = options?.outputDir || join(process.cwd(), ".lobster", "explore", site);
3119
+ writeArtifacts(outputDir, result);
3120
+ result.artifactDir = outputDir;
3121
+ return result;
3122
+ }
3123
+
3124
+ // src/discover/synthesize.ts
3125
+ import yaml from "js-yaml";
3126
+ function synthesizeAdapter(result, goal) {
3127
+ const topEndpoints = result.endpoints.filter((e) => e.score > 0).slice(0, 3);
3128
+ if (topEndpoints.length === 0) {
3129
+ return `# No API endpoints discovered for ${result.site}
3130
+ # Try using: lobster agent "your task" --url https://${result.domain}`;
3131
+ }
3132
+ const endpoint = topEndpoints[0];
3133
+ let name = goal || "data";
3134
+ if (!goal) {
3135
+ const path = new URL(endpoint.url).pathname.toLowerCase();
3136
+ if (/search|query/.test(path)) name = "search";
3137
+ else if (/hot|trending|popular/.test(path)) name = "hot";
3138
+ else if (/feed|timeline|home/.test(path)) name = "feed";
3139
+ else if (/top|best|rank/.test(path)) name = "top";
3140
+ }
3141
+ const args = {
3142
+ limit: { type: "int", default: 20 }
3143
+ };
3144
+ for (const param of endpoint.queryParams) {
3145
+ if (/search|keyword|query|q/.test(param)) {
3146
+ args[param] = { required: true, positional: true, help: "Search query" };
3147
+ } else if (/page|offset|cursor/.test(param)) {
3148
+ args[param] = { type: "int", default: 1 };
3149
+ } else if (/limit|count|num|size/.test(param)) {
3150
+ }
3151
+ }
3152
+ const columns = [];
3153
+ const mapTemplate = {};
3154
+ for (const [field, role] of Object.entries(endpoint.fieldRoles)) {
3155
+ if (["title", "url", "author", "score", "time", "description"].includes(role)) {
3156
+ columns.push(role);
3157
+ mapTemplate[role] = `\${{ item.${field} }}`;
3158
+ }
3159
+ }
3160
+ if (columns.length === 0) {
3161
+ for (const field of endpoint.fields.slice(0, 5)) {
3162
+ columns.push(field);
3163
+ mapTemplate[field] = `\${{ item.${field} }}`;
3164
+ }
3165
+ }
3166
+ const pipeline = [];
3167
+ if (result.strategy === "public" && !endpoint.authIndicators.length) {
3168
+ pipeline.push({ fetch: endpoint.url });
3169
+ } else {
3170
+ pipeline.push({ navigate: `https://${result.domain}` });
3171
+ pipeline.push({
3172
+ evaluate: `(async () => { const r = await fetch(${JSON.stringify(endpoint.url)}, {credentials:'include'}); return r.json(); })()`
3173
+ });
3174
+ }
3175
+ if (endpoint.hasItems && endpoint.itemCount > 0) {
3176
+ for (const path of ["data", "results", "items", "list", "data.items", "data.list"]) {
3177
+ pipeline.push({ select: path });
3178
+ break;
3179
+ }
3180
+ }
3181
+ if (Object.keys(mapTemplate).length > 0) {
3182
+ pipeline.push({ map: mapTemplate });
3183
+ }
3184
+ pipeline.push({ limit: "${{ args.limit }}" });
3185
+ const adapter = {
3186
+ site: result.site,
3187
+ name,
3188
+ description: `${name} from ${result.domain}`,
3189
+ domain: result.domain,
3190
+ strategy: result.strategy,
3191
+ browser: result.strategy !== "public",
3192
+ args,
3193
+ pipeline,
3194
+ columns: columns.length > 0 ? columns : void 0
3195
+ };
3196
+ return yaml.dump(adapter, { indent: 2, lineWidth: 120 });
3197
+ }
3198
+
3199
+ // src/cascade/index.ts
3200
+ var CASCADE_ORDER = [
3201
+ "public" /* PUBLIC */,
3202
+ "cookie" /* COOKIE */,
3203
+ "header" /* HEADER */,
3204
+ "intercept" /* INTERCEPT */,
3205
+ "ui" /* UI */
3206
+ ];
3207
+ function buildFetchProbeJs(url, opts) {
3208
+ const credentialsLine = opts.credentials ? `credentials: 'include',` : "";
3209
+ const headerSetup = opts.extractCsrf ? `
3210
+ const cookies = document.cookie.split(';').map(c => c.trim());
3211
+ const csrf = cookies.find(c =>
3212
+ c.startsWith('ct0=') || c.startsWith('csrf_token=') ||
3213
+ c.startsWith('_csrf=') || c.startsWith('XSRF-TOKEN=')
3214
+ )?.split('=').slice(1).join('=');
3215
+ const headers = {};
3216
+ if (csrf) { headers['X-Csrf-Token'] = csrf; headers['X-XSRF-Token'] = csrf; }
3217
+ ` : "const headers = {};";
3218
+ return `
3219
+ (async () => {
3220
+ try {
3221
+ ${headerSetup}
3222
+ const resp = await fetch(${JSON.stringify(url)}, {
3223
+ ${credentialsLine}
3224
+ headers
3225
+ });
3226
+ const status = resp.status;
3227
+ if (!resp.ok) return { status, ok: false };
3228
+ const text = await resp.text();
3229
+ let hasData = false;
3230
+ try {
3231
+ const json = JSON.parse(text);
3232
+ hasData = !!json && (Array.isArray(json) ? json.length > 0 :
3233
+ typeof json === 'object' && Object.keys(json).length > 0);
3234
+ // API-level error codes (common in Chinese sites)
3235
+ if (json.code !== undefined && json.code !== 0) hasData = false;
3236
+ if (json.error || json.message === 'Unauthorized') hasData = false;
3237
+ } catch {}
3238
+ return { status, ok: true, hasData, preview: text.slice(0, 200) };
3239
+ } catch (e) { return { ok: false, error: e.message }; }
3240
+ })()
3241
+ `;
3242
+ }
3243
+ async function probeEndpoint(page, url, strategy) {
3244
+ const result = { strategy, success: false };
3245
+ try {
3246
+ switch (strategy) {
3247
+ case "public" /* PUBLIC */: {
3248
+ const resp = await page.evaluate(buildFetchProbeJs(url, {}));
3249
+ result.statusCode = resp?.status;
3250
+ result.success = resp?.ok && resp?.hasData;
3251
+ result.hasData = resp?.hasData;
3252
+ result.responsePreview = resp?.preview;
3253
+ break;
3254
+ }
3255
+ case "cookie" /* COOKIE */: {
3256
+ const resp = await page.evaluate(buildFetchProbeJs(url, { credentials: true }));
3257
+ result.statusCode = resp?.status;
3258
+ result.success = resp?.ok && resp?.hasData;
3259
+ result.hasData = resp?.hasData;
3260
+ result.responsePreview = resp?.preview;
3261
+ break;
3262
+ }
3263
+ case "header" /* HEADER */: {
3264
+ const resp = await page.evaluate(buildFetchProbeJs(url, { credentials: true, extractCsrf: true }));
3265
+ result.statusCode = resp?.status;
3266
+ result.success = resp?.ok && resp?.hasData;
3267
+ result.hasData = resp?.hasData;
3268
+ result.responsePreview = resp?.preview;
3269
+ break;
3270
+ }
3271
+ case "intercept" /* INTERCEPT */:
3272
+ case "ui" /* UI */:
3273
+ result.success = false;
3274
+ result.error = `Strategy ${strategy} requires site-specific implementation`;
3275
+ break;
3276
+ }
3277
+ } catch (err) {
3278
+ result.success = false;
3279
+ result.error = err.message ?? String(err);
3280
+ }
3281
+ return result;
3282
+ }
3283
+ async function cascadeProbe(page, url, opts = {}) {
3284
+ const maxIdx = opts.maxStrategy ? CASCADE_ORDER.indexOf(opts.maxStrategy) : CASCADE_ORDER.indexOf("header" /* HEADER */);
3285
+ const probes = [];
3286
+ for (let i = 0; i <= Math.min(maxIdx, CASCADE_ORDER.length - 1); i++) {
3287
+ const strategy = CASCADE_ORDER[i];
3288
+ log.debug(`Probing strategy: ${strategy}`);
3289
+ const probe = await probeEndpoint(page, url, strategy);
3290
+ probes.push(probe);
3291
+ if (probe.success) {
3292
+ return {
3293
+ bestStrategy: strategy,
3294
+ probes,
3295
+ confidence: 1 - i * 0.1
3296
+ };
3297
+ }
3298
+ }
3299
+ return {
3300
+ bestStrategy: "cookie" /* COOKIE */,
3301
+ probes,
3302
+ confidence: 0.3
3303
+ };
3304
+ }
3305
+
3306
+ // src/router/decision.ts
3307
+ function makeRoutingDecision(request) {
3308
+ if (request.site && request.command) {
3309
+ const adapter = getAdapter(request.site, request.command);
3310
+ if (adapter) {
3311
+ return {
3312
+ level: 2 /* ADAPTER */,
3313
+ reason: `Matched adapter: ${request.site}/${request.command}`,
3314
+ adapter
3315
+ };
3316
+ }
3317
+ }
3318
+ if (request.url) {
3319
+ try {
3320
+ const domain = new URL(request.url).hostname;
3321
+ const adapters = getAdapterByDomain(domain);
3322
+ if (adapters.length > 0) {
3323
+ return {
3324
+ level: 2 /* ADAPTER */,
3325
+ reason: `Found adapter for domain: ${domain}`,
3326
+ adapter: adapters[0]
3327
+ };
3328
+ }
3329
+ } catch {
3330
+ }
3331
+ }
3332
+ if (request.url && !request.task) {
3333
+ return {
3334
+ level: 0 /* HTTP */,
3335
+ reason: "Direct URL fetch (no task specified)"
3336
+ };
3337
+ }
3338
+ if (request.url) {
3339
+ const url = request.url;
3340
+ if (url.endsWith(".json") || url.includes("/api/") || url.includes("/v1/") || url.includes("/v2/")) {
3341
+ return {
3342
+ level: 0 /* HTTP */,
3343
+ reason: "URL appears to be an API endpoint"
3344
+ };
3345
+ }
3346
+ }
3347
+ if (request.task) {
3348
+ const taskLower = request.task.toLowerCase();
3349
+ const interactionWords = ["click", "scroll", "fill", "type", "login", "sign in", "search", "navigate", "find", "extract", "get"];
3350
+ const needsInteraction = interactionWords.some((w) => taskLower.includes(w));
3351
+ if (needsInteraction || request.url) {
3352
+ return {
3353
+ level: 3 /* AGENT */,
3354
+ reason: "Task requires web interaction"
3355
+ };
3356
+ }
3357
+ }
3358
+ return {
3359
+ level: 3 /* AGENT */,
3360
+ reason: "Defaulting to AI agent for unrecognized task"
3361
+ };
3362
+ }
3363
+
3364
+ // src/agent/core.ts
3365
+ import { readFileSync } from "fs";
3366
+ import { join as join2, dirname } from "path";
3367
+ import { fileURLToPath } from "url";
3368
+
3369
+ // src/agent/tools/click.ts
3370
+ import { z } from "zod";
3371
+ function createClickTool(page) {
3372
+ return {
3373
+ description: "Click on an interactive element by its index number from the page content.",
3374
+ inputSchema: z.object({
3375
+ index: z.number().describe("The index of the element to click")
3376
+ }),
3377
+ execute: async (args) => {
3378
+ await page.click(args.index);
3379
+ return `Clicked element [${args.index}]`;
3380
+ }
3381
+ };
3382
+ }
3383
+
3384
+ // src/agent/tools/type.ts
3385
+ import { z as z2 } from "zod";
3386
+ function createTypeTool(page) {
3387
+ return {
3388
+ description: "Type text into an input field identified by its index number.",
3389
+ inputSchema: z2.object({
3390
+ index: z2.number().describe("The index of the input element"),
3391
+ text: z2.string().describe("The text to type")
3392
+ }),
3393
+ execute: async (args) => {
3394
+ await page.typeText(args.index, args.text);
3395
+ return `Typed "${args.text}" into element [${args.index}]`;
3396
+ }
3397
+ };
3398
+ }
3399
+
3400
+ // src/agent/tools/scroll.ts
3401
+ import { z as z3 } from "zod";
3402
+ function createScrollTool(page) {
3403
+ return {
3404
+ description: "Scroll the page in a given direction. Use to reveal more content.",
3405
+ inputSchema: z3.object({
3406
+ direction: z3.enum(["up", "down", "left", "right"]).describe("Scroll direction"),
3407
+ amount: z3.number().optional().describe("Pixels to scroll (default 500)")
3408
+ }),
3409
+ execute: async (args) => {
3410
+ await page.scroll(args.direction, args.amount);
3411
+ return `Scrolled ${args.direction}${args.amount ? ` ${args.amount}px` : ""}`;
3412
+ }
3413
+ };
3414
+ }
3415
+
3416
+ // src/agent/tools/select.ts
3417
+ import { z as z4 } from "zod";
3418
+ function createSelectTool(page) {
3419
+ return {
3420
+ description: "Select an option from a dropdown/select element by its index.",
3421
+ inputSchema: z4.object({
3422
+ index: z4.number().describe("The index of the select element"),
3423
+ value: z4.string().describe("The option text or value to select")
3424
+ }),
3425
+ execute: async (args) => {
3426
+ await page.selectOption(args.index, args.value);
3427
+ return `Selected "${args.value}" in element [${args.index}]`;
3428
+ }
3429
+ };
3430
+ }
3431
+
3432
+ // src/agent/tools/wait.ts
3433
+ import { z as z5 } from "zod";
3434
+ function createWaitTool() {
3435
+ return {
3436
+ description: "Wait for a specified number of seconds before continuing.",
3437
+ inputSchema: z5.object({
3438
+ seconds: z5.number().min(0.1).max(30).describe("Seconds to wait")
3439
+ }),
3440
+ execute: async (args) => {
3441
+ await new Promise((r) => setTimeout(r, args.seconds * 1e3));
3442
+ return `Waited ${args.seconds} seconds`;
3443
+ }
3444
+ };
3445
+ }
3446
+
3447
+ // src/agent/tools/done.ts
3448
+ import { z as z6 } from "zod";
3449
+ function createDoneTool() {
3450
+ return {
3451
+ description: "Signal that the task is complete. Call this when you have finished the task or cannot proceed further.",
3452
+ inputSchema: z6.object({
3453
+ success: z6.boolean().describe("Whether the task was completed successfully"),
3454
+ text: z6.string().describe("Summary of the result or explanation of failure")
3455
+ }),
3456
+ execute: async (args) => {
3457
+ return JSON.stringify({ done: true, success: args.success, text: args.text });
3458
+ }
3459
+ };
3460
+ }
3461
+
3462
+ // src/agent/tools/ask-user.ts
3463
+ import { z as z7 } from "zod";
3464
+ import { createInterface } from "readline";
3465
+ function createAskUserTool() {
3466
+ return {
3467
+ description: "Ask the user a question when you need clarification or input to proceed.",
3468
+ inputSchema: z7.object({
3469
+ question: z7.string().describe("The question to ask the user")
3470
+ }),
3471
+ execute: async (args) => {
3472
+ const rl = createInterface({ input: process.stdin, output: process.stdout });
3473
+ return new Promise((resolve) => {
3474
+ rl.question(`
3475
+ \u{1F916} Agent asks: ${args.question}
3476
+ > `, (answer) => {
3477
+ rl.close();
3478
+ resolve(`User answered: ${answer}`);
3479
+ });
3480
+ });
3481
+ }
3482
+ };
3483
+ }
3484
+
3485
+ // src/agent/tools/execute-js.ts
3486
+ import { z as z8 } from "zod";
3487
+ function createExecuteJsTool(page) {
3488
+ return {
3489
+ description: "Execute JavaScript code on the current page. Returns the result.",
3490
+ inputSchema: z8.object({
3491
+ code: z8.string().describe("JavaScript code to execute on the page")
3492
+ }),
3493
+ execute: async (args) => {
3494
+ const result = await page.evaluate(args.code);
3495
+ return typeof result === "string" ? result : JSON.stringify(result, null, 2);
3496
+ }
3497
+ };
3498
+ }
3499
+
3500
+ // src/agent/tools/index.ts
3501
+ function createDefaultTools(page) {
3502
+ return {
3503
+ click_element_by_index: createClickTool(page),
3504
+ input_text: createTypeTool(page),
3505
+ scroll: createScrollTool(page),
3506
+ select_dropdown_option: createSelectTool(page),
3507
+ wait: createWaitTool(),
3508
+ done: createDoneTool(),
3509
+ ask_user: createAskUserTool(),
3510
+ execute_javascript: createExecuteJsTool(page)
3511
+ };
3512
+ }
3513
+
3514
+ // src/agent/macro-tool.ts
3515
+ import { z as z9 } from "zod";
3516
+
3517
+ // src/agent/auto-fixer.ts
3518
+ function normalizeResponse(raw, toolName, availableActions, toolSchemas) {
3519
+ let result = { ...raw };
3520
+ if (result.type === "function" && result.function) {
3521
+ const fn = result.function;
3522
+ if (typeof fn.arguments === "string") {
3523
+ try {
3524
+ result = JSON.parse(fn.arguments);
3525
+ } catch {
3526
+ }
3527
+ } else if (typeof fn.arguments === "object") {
3528
+ result = fn.arguments;
3529
+ }
3530
+ }
3531
+ for (const [key, value] of Object.entries(result)) {
3532
+ if (typeof value === "string") {
3533
+ try {
3534
+ const parsed = JSON.parse(value);
3535
+ if (typeof parsed === "object" && parsed !== null) {
3536
+ result[key] = parsed;
3537
+ }
3538
+ } catch {
3539
+ }
3540
+ }
3541
+ }
3542
+ if (!result.action) {
3543
+ for (const actionName of availableActions) {
3544
+ if (actionName in result) {
3545
+ result = {
3546
+ ...result,
3547
+ action: { [actionName]: result[actionName] }
3548
+ };
3549
+ delete result[actionName];
3550
+ break;
3551
+ }
3552
+ }
3553
+ }
3554
+ if (!result.action) {
3555
+ result.action = { wait: { seconds: 1 } };
3556
+ }
3557
+ if (typeof result.action === "string") {
3558
+ if (availableActions.includes(result.action)) {
3559
+ result.action = { [result.action]: {} };
3560
+ } else {
3561
+ result.action = { wait: { seconds: 1 } };
3562
+ }
3563
+ }
3564
+ const action = result.action;
3565
+ for (const [name, input] of Object.entries(action)) {
3566
+ if (typeof input !== "object" || input === null) {
3567
+ if (toolSchemas && toolSchemas[name]) {
3568
+ const schema = toolSchemas[name].inputSchema;
3569
+ const coerced = coercePrimitiveToSchema(input, schema);
3570
+ if (coerced !== null) {
3571
+ action[name] = coerced;
3572
+ continue;
3573
+ }
3574
+ }
3575
+ if (typeof input === "number") {
3576
+ action[name] = { index: input };
3577
+ } else if (typeof input === "string") {
3578
+ action[name] = { text: input };
3579
+ } else {
3580
+ action[name] = {};
3581
+ }
3582
+ }
3583
+ if (toolSchemas && toolSchemas[name] && typeof action[name] === "object") {
3584
+ const schema = toolSchemas[name].inputSchema;
3585
+ const validation = schema.safeParse(action[name]);
3586
+ if (!validation.success) {
3587
+ const fixed = attemptSchemaFix(action[name], schema, validation.error);
3588
+ if (fixed) {
3589
+ action[name] = fixed;
3590
+ }
3591
+ }
3592
+ }
3593
+ }
3594
+ return result;
3595
+ }
3596
+ function coercePrimitiveToSchema(value, schema) {
3597
+ try {
3598
+ const def = schema._def;
3599
+ if (def?.typeName !== "ZodObject") return null;
3600
+ const shape = def.shape();
3601
+ const keys = Object.keys(shape);
3602
+ const requiredKeys = keys.filter((k) => {
3603
+ const fieldDef = shape[k]?._def;
3604
+ return fieldDef?.typeName !== "ZodOptional";
3605
+ });
3606
+ if (requiredKeys.length === 1) {
3607
+ return { [requiredKeys[0]]: value };
3608
+ }
3609
+ const indexField = keys.find((k) => /index|idx|num|number/i.test(k));
3610
+ if (indexField && typeof value === "number") {
3611
+ return { [indexField]: value };
3612
+ }
3613
+ const textField = keys.find((k) => /text|value|query|code|question|url/i.test(k));
3614
+ if (textField && typeof value === "string") {
3615
+ return { [textField]: value };
3616
+ }
3617
+ } catch {
3618
+ }
3619
+ return null;
3620
+ }
3621
+ function attemptSchemaFix(input, schema, error) {
3622
+ try {
3623
+ const def = schema._def;
3624
+ if (def?.typeName !== "ZodObject") return null;
3625
+ const shape = def.shape();
3626
+ const expectedKeys = Object.keys(shape);
3627
+ const inputKeys = Object.keys(input);
3628
+ const fixed = { ...input };
3629
+ for (const issue of error.issues) {
3630
+ if (issue.code === "invalid_type" && issue.path.length === 1) {
3631
+ const key = String(issue.path[0]);
3632
+ const val = input[key];
3633
+ if (issue.expected === "number" && typeof val === "string") {
3634
+ const num = Number(val);
3635
+ if (!isNaN(num)) fixed[key] = num;
3636
+ } else if (issue.expected === "string" && typeof val === "number") {
3637
+ fixed[key] = String(val);
3638
+ } else if (issue.expected === "boolean" && typeof val === "string") {
3639
+ fixed[key] = val === "true";
3640
+ }
3641
+ }
3642
+ if (issue.code === "unrecognized_keys") {
3643
+ for (const k of issue.keys || []) {
3644
+ delete fixed[k];
3645
+ }
3646
+ }
3647
+ }
3648
+ const result = schema.safeParse(fixed);
3649
+ if (result.success) return fixed;
3650
+ } catch {
3651
+ }
3652
+ return null;
3653
+ }
3654
+
3655
+ // src/agent/macro-tool.ts
3656
+ function packMacroTool(tools) {
3657
+ const actionSchemas = [];
3658
+ const toolNames = [];
3659
+ for (const [name, tool] of Object.entries(tools)) {
3660
+ toolNames.push(name);
3661
+ actionSchemas.push(
3662
+ z9.object({ [name]: tool.inputSchema }).describe(tool.description)
3663
+ );
3664
+ }
3665
+ const actionSchema = actionSchemas.length === 1 ? actionSchemas[0] : z9.union(actionSchemas);
3666
+ const macroSchema = z9.object({
3667
+ evaluation_previous_goal: z9.string().optional().describe("Evaluate whether the previous goal was achieved"),
3668
+ memory: z9.string().optional().describe("Important information to remember for future steps"),
3669
+ next_goal: z9.string().optional().describe("The next immediate goal to achieve"),
3670
+ action: actionSchema.describe("The action to take")
3671
+ });
3672
+ return {
3673
+ name: "AgentOutput",
3674
+ description: "The agent's output containing reflection and action. Must be called every step.",
3675
+ schema: macroSchema,
3676
+ execute: async (args) => {
3677
+ const normalized = normalizeResponse(args, "AgentOutput", toolNames, tools);
3678
+ const action = normalized.action;
3679
+ const [toolName, toolInput] = Object.entries(action)[0];
3680
+ const tool = tools[toolName];
3681
+ if (!tool) {
3682
+ return `Error: Unknown tool "${toolName}". Available: ${toolNames.join(", ")}`;
3683
+ }
3684
+ try {
3685
+ const result = await tool.execute(toolInput);
3686
+ return result;
3687
+ } catch (err) {
3688
+ return `Error executing ${toolName}: ${err}`;
3689
+ }
3690
+ }
3691
+ };
3692
+ }
3693
+
3694
+ // src/agent/core.ts
3695
+ var __dirname = dirname(fileURLToPath(import.meta.url));
3696
+ var AgentCore = class {
3697
+ page;
3698
+ config;
3699
+ llm;
3700
+ history = [];
3701
+ _status = "idle";
3702
+ listeners = /* @__PURE__ */ new Map();
3703
+ previousElementHashes = /* @__PURE__ */ new Set();
3704
+ totalWaitTime = 0;
3705
+ constructor(page, config) {
3706
+ this.page = page;
3707
+ this.config = config;
3708
+ this.llm = new LLM(config.llm);
3709
+ }
3710
+ // ── Event system ──
3711
+ on(event, listener) {
3712
+ if (!this.listeners.has(event)) this.listeners.set(event, /* @__PURE__ */ new Set());
3713
+ this.listeners.get(event).add(listener);
3714
+ }
3715
+ off(event, listener) {
3716
+ this.listeners.get(event)?.delete(listener);
3717
+ }
3718
+ emit(event) {
3719
+ const listeners = this.listeners.get(event.type);
3720
+ if (listeners) {
3721
+ for (const fn of listeners) {
3722
+ try {
3723
+ fn(event);
3724
+ } catch {
3725
+ }
3726
+ }
3727
+ }
3728
+ }
3729
+ get status() {
3730
+ return this._status;
3731
+ }
3732
+ setStatus(newStatus) {
3733
+ const prev = this._status;
3734
+ this._status = newStatus;
3735
+ this.emit({ type: "statuschange", status: newStatus, previousStatus: prev });
3736
+ }
3737
+ pushHistory(event) {
3738
+ this.history.push(event);
3739
+ this.emit({ type: "historychange", history: this.history });
3740
+ }
3741
+ async execute(task, abortSignal) {
3742
+ this.setStatus("running");
3743
+ this.history = [];
3744
+ this.previousElementHashes.clear();
3745
+ this.totalWaitTime = 0;
3746
+ const maxSteps = this.config.maxSteps ?? 40;
3747
+ const stepDelay = this.config.stepDelay ?? 0.4;
3748
+ const tools = {
3749
+ ...createDefaultTools(this.page),
3750
+ ...this.config.customTools || {}
3751
+ };
3752
+ for (const [name, tool] of Object.entries(tools)) {
3753
+ if (tool === null) delete tools[name];
3754
+ }
3755
+ const macroTool = packMacroTool(tools);
3756
+ let systemPrompt;
3757
+ try {
3758
+ systemPrompt = readFileSync(join2(__dirname, "prompts", "system.md"), "utf-8");
3759
+ } catch {
3760
+ systemPrompt = "You are an AI web agent that navigates web pages to complete tasks.";
3761
+ }
3762
+ if (this.config.instructions?.system) {
3763
+ systemPrompt += "\n\n" + this.config.instructions.system;
3764
+ }
3765
+ let lastURL = "";
3766
+ for (let step = 1; step <= maxSteps; step++) {
3767
+ if (abortSignal?.aborted) {
3768
+ this.setStatus("error");
3769
+ return { success: false, data: "Aborted", history: this.history };
3770
+ }
3771
+ const browserState = await this.page.browserState().catch(() => ({
3772
+ url: "",
3773
+ title: "",
3774
+ viewportWidth: 0,
3775
+ viewportHeight: 0,
3776
+ pageWidth: 0,
3777
+ pageHeight: 0,
3778
+ scrollX: 0,
3779
+ scrollY: 0,
3780
+ scrollPercent: 0,
3781
+ pixelsAbove: 0,
3782
+ pixelsBelow: 0
3783
+ }));
3784
+ const flatTree = await this.page.flatTree().catch(() => ({ rootId: "", map: {} }));
3785
+ const pageContent = flatTreeToString(flatTree);
3786
+ const currentHashes = /* @__PURE__ */ new Set();
3787
+ let newElementCount = 0;
3788
+ for (const node of Object.values(flatTree.map)) {
3789
+ if (node.isInteractive && node.highlightIndex !== void 0) {
3790
+ const hash = `${node.tagName}:${node.text || ""}:${JSON.stringify(node.attributes || {})}`;
3791
+ currentHashes.add(hash);
3792
+ if (!this.previousElementHashes.has(hash)) {
3793
+ newElementCount++;
3794
+ }
3795
+ }
3796
+ }
3797
+ this.previousElementHashes = currentHashes;
3798
+ const observations = [];
3799
+ if (browserState.url !== lastURL && lastURL) {
3800
+ observations.push(`Navigated to ${browserState.url}`);
3801
+ }
3802
+ lastURL = browserState.url;
3803
+ if (newElementCount > 0 && step > 1) {
3804
+ observations.push(`${newElementCount} new interactive element(s) appeared`);
3805
+ }
3806
+ if (this.totalWaitTime > 3) {
3807
+ observations.push(`Total wait time: ${this.totalWaitTime.toFixed(1)}s \u2014 consider if page is still loading`);
3808
+ }
3809
+ if (step >= maxSteps - 5) {
3810
+ observations.push(`Warning: ${maxSteps - step} steps remaining`);
3811
+ }
3812
+ if (this.config.instructions?.getPageInstructions) {
3813
+ try {
3814
+ const pi = this.config.instructions.getPageInstructions(browserState.url);
3815
+ if (pi) observations.push(`Page instructions: ${pi}`);
3816
+ } catch {
3817
+ }
3818
+ }
3819
+ for (const obs of observations) {
3820
+ this.pushHistory({ type: "observation", message: obs });
3821
+ this.emit({ type: "activity", kind: "observation", message: obs, step });
3822
+ }
3823
+ const userPrompt = assembleUserPrompt(
3824
+ task,
3825
+ pageContent,
3826
+ browserState,
3827
+ this.history,
3828
+ step,
3829
+ maxSteps
3830
+ );
3831
+ const messages = [
3832
+ { role: "system", content: systemPrompt },
3833
+ { role: "user", content: userPrompt }
3834
+ ];
3835
+ log.step(step, `Thinking... (${browserState.url})`);
3836
+ this.emit({ type: "activity", kind: "thinking", message: `Step ${step}: thinking`, step });
3837
+ if (this.config.onBeforeStep) await this.config.onBeforeStep(step);
3838
+ const startTime = Date.now();
3839
+ let result;
3840
+ try {
3841
+ result = await this.llm.invoke(messages, macroTool, abortSignal);
3842
+ } catch (err) {
3843
+ log.error(`LLM error at step ${step}: ${err}`);
3844
+ this.pushHistory({ type: "error", error: String(err), step });
3845
+ this.emit({ type: "activity", kind: "error", message: String(err), step });
3846
+ continue;
3847
+ }
3848
+ const duration = Date.now() - startTime;
3849
+ const args = result.toolCall.args;
3850
+ const action = args.action || args;
3851
+ const [actionName, actionInput] = Object.entries(action)[0] || ["unknown", {}];
3852
+ this.emit({ type: "activity", kind: "executing", message: actionName, step });
3853
+ if (actionName === "wait") {
3854
+ const secs = actionInput?.seconds || 0;
3855
+ this.totalWaitTime += secs;
3856
+ }
3857
+ const stepEvent = {
3858
+ type: "step",
3859
+ step,
3860
+ reflection: {
3861
+ evaluation_previous_goal: args.evaluation_previous_goal || "",
3862
+ memory: args.memory || "",
3863
+ next_goal: args.next_goal || ""
3864
+ },
3865
+ action: { name: actionName, args: actionInput },
3866
+ output: result.toolResult,
3867
+ duration
3868
+ };
3869
+ this.pushHistory(stepEvent);
3870
+ log.step(step, `Action: ${actionName} \u2192 ${result.toolResult.slice(0, 100)}`);
3871
+ this.emit({ type: "activity", kind: "executed", message: `${actionName}: ${result.toolResult.slice(0, 80)}`, step, duration });
3872
+ if (this.config.onAfterStep) await this.config.onAfterStep(this.history);
3873
+ if (actionName === "done") {
3874
+ try {
3875
+ const doneResult = JSON.parse(result.toolResult);
3876
+ this.setStatus("completed");
3877
+ return { success: doneResult.success, data: doneResult.text || result.toolResult, history: this.history };
3878
+ } catch {
3879
+ this.setStatus("completed");
3880
+ return { success: true, data: result.toolResult, history: this.history };
3881
+ }
3882
+ }
3883
+ if (stepDelay > 0) {
3884
+ await new Promise((r) => setTimeout(r, stepDelay * 1e3));
3885
+ }
3886
+ }
3887
+ this.setStatus("error");
3888
+ return { success: false, data: `Reached maximum steps (${maxSteps})`, history: this.history };
3889
+ }
3890
+ };
3891
+ function assembleUserPrompt(task, pageContent, state, history, step, maxSteps) {
3892
+ let prompt = `# Task
3893
+ ${task}
3894
+
3895
+ `;
3896
+ prompt += `# Current Page
3897
+ `;
3898
+ prompt += `URL: ${state.url}
3899
+ `;
3900
+ prompt += `Title: ${state.title}
3901
+ `;
3902
+ prompt += `Viewport: ${state.viewportWidth}x${state.viewportHeight} | Page height: ${state.pageHeight}px
3903
+ `;
3904
+ prompt += `Scroll: ${state.scrollPercent}%`;
3905
+ if (state.pixelsAbove > 50) prompt += ` | ${state.pixelsAbove}px above`;
3906
+ if (state.pixelsBelow > 50) prompt += ` | ${state.pixelsBelow}px below`;
3907
+ prompt += `
3908
+ Step: ${step}/${maxSteps}
3909
+
3910
+ `;
3911
+ prompt += `# Browser State
3912
+ ${pageContent}
3913
+
3914
+ `;
3915
+ if (history.length > 0) {
3916
+ prompt += `# History
3917
+ `;
3918
+ const recent = history.slice(-10);
3919
+ for (const event of recent) {
3920
+ if (event.type === "step") {
3921
+ const s = event;
3922
+ prompt += `<step_${s.step}>
3923
+ `;
3924
+ if (s.reflection) {
3925
+ prompt += ` eval: ${s.reflection.evaluation_previous_goal}
3926
+ `;
3927
+ prompt += ` memory: ${s.reflection.memory}
3928
+ `;
3929
+ prompt += ` goal: ${s.reflection.next_goal}
3930
+ `;
3931
+ }
3932
+ prompt += ` action: ${s.action.name}(${JSON.stringify(s.action.args)})
3933
+ `;
3934
+ prompt += ` result: ${s.output.slice(0, 200)}
3935
+ `;
3936
+ prompt += `</step_${s.step}>
3937
+ `;
3938
+ } else if (event.type === "observation") {
3939
+ prompt += `<sys>${event.message}</sys>
3940
+ `;
3941
+ }
3942
+ }
3943
+ }
3944
+ return prompt;
3945
+ }
3946
+
3947
+ // src/config/index.ts
3948
+ import { readFileSync as readFileSync2, writeFileSync as writeFileSync2, mkdirSync as mkdirSync2, existsSync as existsSync3 } from "fs";
3949
+ import { join as join3 } from "path";
3950
+ import { homedir } from "os";
3951
+ import yaml2 from "js-yaml";
3952
+
3953
+ // src/config/schema.ts
3954
+ import { z as z10 } from "zod";
3955
+ var LLM_PROVIDERS = {
3956
+ openai: {
3957
+ name: "OpenAI",
3958
+ baseURL: "https://api.openai.com/v1",
3959
+ defaultModel: "gpt-4o",
3960
+ keyPrefix: "sk-",
3961
+ keyEnvHint: "https://platform.openai.com/api-keys",
3962
+ models: ["gpt-4o", "gpt-4o-mini", "gpt-4-turbo", "o1", "o1-mini", "o3-mini"]
3963
+ },
3964
+ anthropic: {
3965
+ name: "Anthropic",
3966
+ baseURL: "https://api.anthropic.com/v1",
3967
+ defaultModel: "claude-sonnet-4-20250514",
3968
+ keyPrefix: "sk-ant-",
3969
+ keyEnvHint: "https://console.anthropic.com/settings/keys",
3970
+ models: ["claude-opus-4-20250514", "claude-sonnet-4-20250514", "claude-haiku-4-5-20251001"]
3971
+ },
3972
+ gemini: {
3973
+ name: "Google Gemini",
3974
+ baseURL: "https://generativelanguage.googleapis.com/v1beta/openai",
3975
+ defaultModel: "gemini-2.5-flash",
3976
+ keyPrefix: "AI",
3977
+ keyEnvHint: "https://aistudio.google.com/apikey",
3978
+ models: ["gemini-2.5-flash", "gemini-2.5-flash-lite", "gemini-2.5-pro", "gemini-3-flash-preview"]
3979
+ },
3980
+ ollama: {
3981
+ name: "Ollama (local, free)",
3982
+ baseURL: "http://localhost:11434/v1",
3983
+ defaultModel: "llama3.1",
3984
+ keyPrefix: "",
3985
+ keyEnvHint: "No API key needed \u2014 install from https://ollama.ai",
3986
+ models: ["llama3.1", "llama3.2", "mistral", "codestral", "qwen2.5", "deepseek-r1"]
3987
+ }
3988
+ };
3989
+ var configSchema = z10.object({
3990
+ llm: z10.object({
3991
+ provider: z10.enum(["openai", "anthropic", "gemini", "ollama"]).default("openai"),
3992
+ baseURL: z10.string().default("https://api.openai.com/v1"),
3993
+ model: z10.string().default("gpt-4o"),
3994
+ apiKey: z10.string().default(""),
3995
+ temperature: z10.number().min(0).max(2).default(0.1),
3996
+ maxRetries: z10.number().int().min(0).default(3)
3997
+ }).default({}),
3998
+ browser: z10.object({
3999
+ executablePath: z10.string().default(""),
4000
+ headless: z10.boolean().default(true),
4001
+ connectTimeout: z10.number().default(30),
4002
+ commandTimeout: z10.number().default(60),
4003
+ cdpEndpoint: z10.string().default("")
4004
+ }).default({}),
4005
+ agent: z10.object({
4006
+ maxSteps: z10.number().int().default(40),
4007
+ stepDelay: z10.number().default(0.4)
4008
+ }).default({}),
4009
+ output: z10.object({
4010
+ defaultFormat: z10.enum(["table", "json", "yaml", "markdown", "csv"]).default("table"),
4011
+ color: z10.boolean().default(true)
4012
+ }).default({})
4013
+ });
4014
+
4015
+ // src/config/index.ts
4016
+ var CONFIG_DIR = join3(homedir(), ".lobster");
4017
+ var CONFIG_FILE = join3(CONFIG_DIR, "config.yaml");
4018
+ function ensureConfigDir() {
4019
+ if (!existsSync3(CONFIG_DIR)) {
4020
+ mkdirSync2(CONFIG_DIR, { recursive: true });
4021
+ }
4022
+ }
4023
+ function loadConfig() {
4024
+ ensureConfigDir();
4025
+ let fileConfig = {};
4026
+ if (existsSync3(CONFIG_FILE)) {
4027
+ const raw = readFileSync2(CONFIG_FILE, "utf-8");
4028
+ fileConfig = yaml2.load(raw) || {};
4029
+ }
4030
+ const envOverrides = {};
4031
+ if (process.env.LOBSTER_API_KEY) {
4032
+ envOverrides.llm = { ...fileConfig.llm || {}, apiKey: process.env.LOBSTER_API_KEY };
4033
+ }
4034
+ if (process.env.LOBSTER_MODEL) {
4035
+ envOverrides.llm = { ...envOverrides.llm || fileConfig.llm || {}, model: process.env.LOBSTER_MODEL };
4036
+ }
4037
+ if (process.env.LOBSTER_BASE_URL) {
4038
+ envOverrides.llm = { ...envOverrides.llm || fileConfig.llm || {}, baseURL: process.env.LOBSTER_BASE_URL };
4039
+ }
4040
+ if (process.env.LOBSTER_CDP_ENDPOINT) {
4041
+ envOverrides.browser = { ...fileConfig.browser || {}, cdpEndpoint: process.env.LOBSTER_CDP_ENDPOINT };
4042
+ }
4043
+ if (process.env.LOBSTER_BROWSER_PATH) {
4044
+ envOverrides.browser = { ...envOverrides.browser || fileConfig.browser || {}, executablePath: process.env.LOBSTER_BROWSER_PATH };
4045
+ }
4046
+ const merged = { ...fileConfig, ...envOverrides };
4047
+ return configSchema.parse(merged);
4048
+ }
4049
+ function saveConfig(config) {
4050
+ ensureConfigDir();
4051
+ const existing = loadConfig();
4052
+ const merged = deepMerge(existing, config);
4053
+ writeFileSync2(CONFIG_FILE, yaml2.dump(merged, { indent: 2 }), "utf-8");
4054
+ }
4055
+ function getConfigDir() {
4056
+ return CONFIG_DIR;
4057
+ }
4058
+ function deepMerge(target, source) {
4059
+ const result = { ...target };
4060
+ for (const key of Object.keys(source)) {
4061
+ if (source[key] && typeof source[key] === "object" && !Array.isArray(source[key]) && target[key] && typeof target[key] === "object" && !Array.isArray(target[key])) {
4062
+ result[key] = deepMerge(target[key], source[key]);
4063
+ } else {
4064
+ result[key] = source[key];
4065
+ }
4066
+ }
4067
+ return result;
4068
+ }
4069
+
4070
+ // src/output/table.ts
4071
+ import Table from "cli-table3";
4072
+ function renderTable(data, columns) {
4073
+ if (!Array.isArray(data) || data.length === 0) {
4074
+ return typeof data === "string" ? data : JSON.stringify(data, null, 2);
4075
+ }
4076
+ const cols = columns || Object.keys(data[0]);
4077
+ const table = new Table({
4078
+ head: cols,
4079
+ style: { head: ["cyan"] },
4080
+ wordWrap: true
4081
+ });
4082
+ for (const row of data) {
4083
+ table.push(cols.map((col) => {
4084
+ const val = row[col];
4085
+ if (val === null || val === void 0) return "";
4086
+ return String(val);
4087
+ }));
4088
+ }
4089
+ return table.toString();
4090
+ }
4091
+
4092
+ // src/output/json.ts
4093
+ function renderJson(data) {
4094
+ return JSON.stringify(data, null, 2);
4095
+ }
4096
+
4097
+ // src/output/markdown.ts
4098
+ function renderMarkdown(data, columns) {
4099
+ if (!Array.isArray(data) || data.length === 0) {
4100
+ return typeof data === "string" ? data : JSON.stringify(data, null, 2);
4101
+ }
4102
+ const cols = columns || Object.keys(data[0]);
4103
+ const lines = [];
4104
+ lines.push("| " + cols.join(" | ") + " |");
4105
+ lines.push("| " + cols.map(() => "---").join(" | ") + " |");
4106
+ for (const row of data) {
4107
+ const vals = cols.map((col) => {
4108
+ const val = row[col];
4109
+ if (val === null || val === void 0) return "";
4110
+ return String(val).replace(/\|/g, "\\|");
4111
+ });
4112
+ lines.push("| " + vals.join(" | ") + " |");
4113
+ }
4114
+ return lines.join("\n");
4115
+ }
4116
+
4117
+ // src/output/csv.ts
4118
+ function renderCsv(data, columns) {
4119
+ if (!Array.isArray(data) || data.length === 0) {
4120
+ return typeof data === "string" ? data : JSON.stringify(data);
4121
+ }
4122
+ const cols = columns || Object.keys(data[0]);
4123
+ const lines = [cols.join(",")];
4124
+ for (const row of data) {
4125
+ const vals = cols.map((col) => {
4126
+ const val = row[col];
4127
+ if (val === null || val === void 0) return "";
4128
+ const str = String(val);
4129
+ return str.includes(",") || str.includes('"') || str.includes("\n") ? `"${str.replace(/"/g, '""')}"` : str;
4130
+ });
4131
+ lines.push(vals.join(","));
4132
+ }
4133
+ return lines.join("\n");
4134
+ }
4135
+
4136
+ // src/output/yaml.ts
4137
+ import yaml3 from "js-yaml";
4138
+ function renderYaml(data) {
4139
+ return yaml3.dump(data, { indent: 2, lineWidth: 120 });
4140
+ }
4141
+
4142
+ // src/output/index.ts
4143
+ function render(data, format, columns) {
4144
+ switch (format) {
4145
+ case "table":
4146
+ return renderTable(data, columns);
4147
+ case "json":
4148
+ return renderJson(data);
4149
+ case "markdown":
4150
+ return renderMarkdown(data, columns);
4151
+ case "csv":
4152
+ return renderCsv(data, columns);
4153
+ case "yaml":
4154
+ return renderYaml(data);
4155
+ default:
4156
+ return renderJson(data);
4157
+ }
4158
+ }
4159
+ export {
4160
+ AgentCore,
4161
+ BrowserManager,
4162
+ FLAT_TREE_SCRIPT,
4163
+ FORM_STATE_SCRIPT,
4164
+ GET_INTERCEPTED_SCRIPT,
4165
+ INTERACTIVE_ELEMENTS_SCRIPT,
4166
+ LLM,
4167
+ LLM_PROVIDERS,
4168
+ MARKDOWN_SCRIPT,
4169
+ OpenAIClient,
4170
+ PuppeteerPage,
4171
+ SEMANTIC_TREE_SCRIPT,
4172
+ SNAPSHOT_SCRIPT,
4173
+ Strategy,
4174
+ buildInterceptorScript,
4175
+ buildSnapshotScript,
4176
+ cascadeProbe,
4177
+ classifyIntent,
4178
+ cli,
4179
+ configSchema,
4180
+ executePipeline,
4181
+ exploreSite,
4182
+ extractLinks,
4183
+ extractMarkdown,
4184
+ extractSnapshot,
4185
+ extractText,
4186
+ flatTreeToString,
4187
+ getAdapter,
4188
+ getAdapterByDomain,
4189
+ getAdapterBySite,
4190
+ getAllAdapters,
4191
+ getAllSites,
4192
+ getConfigDir,
4193
+ getStep,
4194
+ getStepNames,
4195
+ heuristicClassify,
4196
+ loadConfig,
4197
+ lobsterFetch,
4198
+ makeRoutingDecision,
4199
+ parseHtml,
4200
+ registerStep,
4201
+ render,
4202
+ renderTemplate,
4203
+ saveConfig,
4204
+ synthesizeAdapter
4205
+ };
4206
+ //# sourceMappingURL=lib.js.map