lobster-cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +389 -0
- package/dist/agent/core.js +1013 -0
- package/dist/agent/core.js.map +1 -0
- package/dist/agent/index.js +1027 -0
- package/dist/agent/index.js.map +1 -0
- package/dist/brain/index.js +60 -0
- package/dist/brain/index.js.map +1 -0
- package/dist/browser/dom/index.js +1096 -0
- package/dist/browser/dom/index.js.map +1 -0
- package/dist/browser/index.js +2034 -0
- package/dist/browser/index.js.map +1 -0
- package/dist/browser/manager.js +86 -0
- package/dist/browser/manager.js.map +1 -0
- package/dist/browser/page-adapter.js +1345 -0
- package/dist/browser/page-adapter.js.map +1 -0
- package/dist/cascade/index.js +138 -0
- package/dist/cascade/index.js.map +1 -0
- package/dist/config/index.js +110 -0
- package/dist/config/index.js.map +1 -0
- package/dist/config/schema.js +66 -0
- package/dist/config/schema.js.map +1 -0
- package/dist/discover/index.js +545 -0
- package/dist/discover/index.js.map +1 -0
- package/dist/index.js +5529 -0
- package/dist/index.js.map +1 -0
- package/dist/lib.js +4206 -0
- package/dist/lib.js.map +1 -0
- package/dist/llm/client.js +379 -0
- package/dist/llm/client.js.map +1 -0
- package/dist/llm/index.js +397 -0
- package/dist/llm/index.js.map +1 -0
- package/dist/llm/openai-client.js +214 -0
- package/dist/llm/openai-client.js.map +1 -0
- package/dist/output/index.js +93 -0
- package/dist/output/index.js.map +1 -0
- package/dist/pipeline/index.js +802 -0
- package/dist/pipeline/index.js.map +1 -0
- package/dist/router/decision.js +80 -0
- package/dist/router/decision.js.map +1 -0
- package/dist/router/index.js +3443 -0
- package/dist/router/index.js.map +1 -0
- package/dist/types/index.js +23 -0
- package/dist/types/index.js.map +1 -0
- package/logo.svg +11 -0
- package/package.json +65 -0
|
@@ -0,0 +1,3443 @@
|
|
|
1
|
+
var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
|
|
2
|
+
get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
|
|
3
|
+
}) : x)(function(x) {
|
|
4
|
+
if (typeof require !== "undefined") return require.apply(this, arguments);
|
|
5
|
+
throw Error('Dynamic require of "' + x + '" is not supported');
|
|
6
|
+
});
|
|
7
|
+
|
|
8
|
+
// src/types/router.ts
|
|
9
|
+
var ExecutionLevel = /* @__PURE__ */ ((ExecutionLevel2) => {
|
|
10
|
+
ExecutionLevel2[ExecutionLevel2["HTTP"] = 0] = "HTTP";
|
|
11
|
+
ExecutionLevel2[ExecutionLevel2["BROWSER"] = 1] = "BROWSER";
|
|
12
|
+
ExecutionLevel2[ExecutionLevel2["ADAPTER"] = 2] = "ADAPTER";
|
|
13
|
+
ExecutionLevel2[ExecutionLevel2["AGENT"] = 3] = "AGENT";
|
|
14
|
+
return ExecutionLevel2;
|
|
15
|
+
})(ExecutionLevel || {});
|
|
16
|
+
|
|
17
|
+
// src/adapter/registry.ts
|
|
18
|
+
var REGISTRY_KEY = "__lobster_registry__";
|
|
19
|
+
if (!globalThis[REGISTRY_KEY]) {
|
|
20
|
+
globalThis[REGISTRY_KEY] = /* @__PURE__ */ new Map();
|
|
21
|
+
}
|
|
22
|
+
function getRegistry() {
|
|
23
|
+
return globalThis[REGISTRY_KEY];
|
|
24
|
+
}
|
|
25
|
+
function getAdapter(site, name) {
|
|
26
|
+
return getRegistry().get(`${site}/${name}`);
|
|
27
|
+
}
|
|
28
|
+
function getAdapterByDomain(domain) {
|
|
29
|
+
const adapters = [];
|
|
30
|
+
for (const adapter of getRegistry().values()) {
|
|
31
|
+
if (adapter.domain && domain.includes(adapter.domain)) adapters.push(adapter);
|
|
32
|
+
}
|
|
33
|
+
return adapters;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// src/router/decision.ts
|
|
37
|
+
function makeRoutingDecision(request) {
|
|
38
|
+
if (request.site && request.command) {
|
|
39
|
+
const adapter = getAdapter(request.site, request.command);
|
|
40
|
+
if (adapter) {
|
|
41
|
+
return {
|
|
42
|
+
level: 2 /* ADAPTER */,
|
|
43
|
+
reason: `Matched adapter: ${request.site}/${request.command}`,
|
|
44
|
+
adapter
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
if (request.url) {
|
|
49
|
+
try {
|
|
50
|
+
const domain = new URL(request.url).hostname;
|
|
51
|
+
const adapters = getAdapterByDomain(domain);
|
|
52
|
+
if (adapters.length > 0) {
|
|
53
|
+
return {
|
|
54
|
+
level: 2 /* ADAPTER */,
|
|
55
|
+
reason: `Found adapter for domain: ${domain}`,
|
|
56
|
+
adapter: adapters[0]
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
} catch {
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
if (request.url && !request.task) {
|
|
63
|
+
return {
|
|
64
|
+
level: 0 /* HTTP */,
|
|
65
|
+
reason: "Direct URL fetch (no task specified)"
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
if (request.url) {
|
|
69
|
+
const url = request.url;
|
|
70
|
+
if (url.endsWith(".json") || url.includes("/api/") || url.includes("/v1/") || url.includes("/v2/")) {
|
|
71
|
+
return {
|
|
72
|
+
level: 0 /* HTTP */,
|
|
73
|
+
reason: "URL appears to be an API endpoint"
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
if (request.task) {
|
|
78
|
+
const taskLower = request.task.toLowerCase();
|
|
79
|
+
const interactionWords = ["click", "scroll", "fill", "type", "login", "sign in", "search", "navigate", "find", "extract", "get"];
|
|
80
|
+
const needsInteraction = interactionWords.some((w) => taskLower.includes(w));
|
|
81
|
+
if (needsInteraction || request.url) {
|
|
82
|
+
return {
|
|
83
|
+
level: 3 /* AGENT */,
|
|
84
|
+
reason: "Task requires web interaction"
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
return {
|
|
89
|
+
level: 3 /* AGENT */,
|
|
90
|
+
reason: "Defaulting to AI agent for unrecognized task"
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// src/http/index.ts
|
|
95
|
+
async function directFetch(url, options) {
|
|
96
|
+
const controller = new AbortController();
|
|
97
|
+
const timeout = options?.timeout || 3e4;
|
|
98
|
+
const timer = setTimeout(() => controller.abort(), timeout);
|
|
99
|
+
try {
|
|
100
|
+
const resp = await fetch(url, {
|
|
101
|
+
method: options?.method || "GET",
|
|
102
|
+
headers: options?.headers,
|
|
103
|
+
signal: controller.signal,
|
|
104
|
+
redirect: "follow"
|
|
105
|
+
});
|
|
106
|
+
const contentType = resp.headers.get("content-type") || "";
|
|
107
|
+
const headers = {};
|
|
108
|
+
resp.headers.forEach((v, k) => {
|
|
109
|
+
headers[k] = v;
|
|
110
|
+
});
|
|
111
|
+
let body;
|
|
112
|
+
if (contentType.includes("json")) {
|
|
113
|
+
body = await resp.json();
|
|
114
|
+
} else {
|
|
115
|
+
body = await resp.text();
|
|
116
|
+
}
|
|
117
|
+
return { url, status: resp.status, headers, body, contentType };
|
|
118
|
+
} finally {
|
|
119
|
+
clearTimeout(timer);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// src/browser/manager.ts
|
|
124
|
+
import puppeteer from "puppeteer-core";
|
|
125
|
+
import { existsSync } from "fs";
|
|
126
|
+
|
|
127
|
+
// src/utils/logger.ts
|
|
128
|
+
import chalk from "chalk";
|
|
129
|
+
var log = {
|
|
130
|
+
info: (msg) => console.log(chalk.blue("\u2139"), msg),
|
|
131
|
+
success: (msg) => console.log(chalk.green("\u2713"), msg),
|
|
132
|
+
warn: (msg) => console.log(chalk.yellow("\u26A0"), msg),
|
|
133
|
+
error: (msg) => console.error(chalk.red("\u2717"), msg),
|
|
134
|
+
debug: (msg) => {
|
|
135
|
+
if (process.env.LOBSTER_DEBUG) console.log(chalk.gray("\u22EF"), msg);
|
|
136
|
+
},
|
|
137
|
+
step: (n, msg) => console.log(chalk.cyan(`[${n}]`), msg),
|
|
138
|
+
dim: (msg) => console.log(chalk.dim(msg))
|
|
139
|
+
};
|
|
140
|
+
|
|
141
|
+
// src/browser/manager.ts
|
|
142
|
+
var BrowserManager = class {
|
|
143
|
+
browser = null;
|
|
144
|
+
config;
|
|
145
|
+
constructor(config = {}) {
|
|
146
|
+
this.config = config;
|
|
147
|
+
}
|
|
148
|
+
async connect() {
|
|
149
|
+
if (this.browser?.connected) return this.browser;
|
|
150
|
+
if (this.config.cdpEndpoint) {
|
|
151
|
+
log.debug(`Connecting to CDP endpoint: ${this.config.cdpEndpoint}`);
|
|
152
|
+
this.browser = await puppeteer.connect({
|
|
153
|
+
browserWSEndpoint: this.config.cdpEndpoint
|
|
154
|
+
});
|
|
155
|
+
return this.browser;
|
|
156
|
+
}
|
|
157
|
+
const executablePath = this.config.executablePath || findChrome();
|
|
158
|
+
if (!executablePath) {
|
|
159
|
+
throw new Error(
|
|
160
|
+
"Chrome/Chromium not found. Set LOBSTER_BROWSER_PATH or config browser.executablePath"
|
|
161
|
+
);
|
|
162
|
+
}
|
|
163
|
+
log.debug(`Launching Chrome: ${executablePath}`);
|
|
164
|
+
this.browser = await puppeteer.launch({
|
|
165
|
+
executablePath,
|
|
166
|
+
headless: this.config.headless ?? true,
|
|
167
|
+
args: [
|
|
168
|
+
"--no-sandbox",
|
|
169
|
+
"--disable-setuid-sandbox",
|
|
170
|
+
"--disable-dev-shm-usage",
|
|
171
|
+
"--disable-gpu"
|
|
172
|
+
]
|
|
173
|
+
});
|
|
174
|
+
return this.browser;
|
|
175
|
+
}
|
|
176
|
+
async newPage() {
|
|
177
|
+
const browser = await this.connect();
|
|
178
|
+
return browser.newPage();
|
|
179
|
+
}
|
|
180
|
+
async close() {
|
|
181
|
+
if (this.browser) {
|
|
182
|
+
await this.browser.close().catch(() => {
|
|
183
|
+
});
|
|
184
|
+
this.browser = null;
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
};
|
|
188
|
+
function findChrome() {
|
|
189
|
+
const paths = process.platform === "darwin" ? [
|
|
190
|
+
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
|
191
|
+
"/Applications/Chromium.app/Contents/MacOS/Chromium",
|
|
192
|
+
"/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary"
|
|
193
|
+
] : process.platform === "win32" ? [
|
|
194
|
+
"C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
|
|
195
|
+
"C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe"
|
|
196
|
+
] : [
|
|
197
|
+
"/usr/bin/google-chrome",
|
|
198
|
+
"/usr/bin/google-chrome-stable",
|
|
199
|
+
"/usr/bin/chromium-browser",
|
|
200
|
+
"/usr/bin/chromium",
|
|
201
|
+
"/snap/bin/chromium"
|
|
202
|
+
];
|
|
203
|
+
return paths.find((p) => existsSync(p));
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// src/browser/dom/flat-tree.ts
|
|
207
|
+
var FLAT_TREE_SCRIPT = `
|
|
208
|
+
(() => {
|
|
209
|
+
const INTERACTIVE_TAGS = new Set([
|
|
210
|
+
'a', 'button', 'input', 'select', 'textarea', 'details', 'summary',
|
|
211
|
+
'label', 'option', 'fieldset', 'legend',
|
|
212
|
+
]);
|
|
213
|
+
|
|
214
|
+
const INTERACTIVE_ROLES = new Set([
|
|
215
|
+
'button', 'link', 'textbox', 'checkbox', 'radio', 'combobox',
|
|
216
|
+
'listbox', 'menu', 'menuitem', 'tab', 'switch', 'slider',
|
|
217
|
+
'searchbox', 'spinbutton', 'option', 'menuitemcheckbox', 'menuitemradio',
|
|
218
|
+
]);
|
|
219
|
+
|
|
220
|
+
const ATTR_WHITELIST = [
|
|
221
|
+
'type', 'role', 'aria-label', 'aria-expanded', 'aria-selected',
|
|
222
|
+
'aria-checked', 'aria-disabled', 'placeholder', 'title', 'href',
|
|
223
|
+
'value', 'name', 'alt', 'src',
|
|
224
|
+
];
|
|
225
|
+
|
|
226
|
+
let highlightIndex = 0;
|
|
227
|
+
const nodes = {};
|
|
228
|
+
const selectorMap = {};
|
|
229
|
+
|
|
230
|
+
function isVisible(el) {
|
|
231
|
+
if (el.offsetWidth === 0 && el.offsetHeight === 0) return false;
|
|
232
|
+
const style = getComputedStyle(el);
|
|
233
|
+
if (style.display === 'none' || style.visibility === 'hidden' || style.opacity === '0') return false;
|
|
234
|
+
return true;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
function isInteractive(el) {
|
|
238
|
+
const tag = el.tagName.toLowerCase();
|
|
239
|
+
if (INTERACTIVE_TAGS.has(tag)) return true;
|
|
240
|
+
const role = el.getAttribute('role');
|
|
241
|
+
if (role && INTERACTIVE_ROLES.has(role)) return true;
|
|
242
|
+
if (el.getAttribute('contenteditable') === 'true') return true;
|
|
243
|
+
if (el.getAttribute('tabindex') !== null && parseInt(el.getAttribute('tabindex')) >= 0) return true;
|
|
244
|
+
if (el.onclick || el.getAttribute('onclick')) return true;
|
|
245
|
+
return false;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
function getAttributes(el) {
|
|
249
|
+
const attrs = {};
|
|
250
|
+
for (const attr of ATTR_WHITELIST) {
|
|
251
|
+
const val = el.getAttribute(attr);
|
|
252
|
+
if (val !== null && val !== '') attrs[attr] = val;
|
|
253
|
+
}
|
|
254
|
+
return attrs;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
function getScrollable(el) {
|
|
258
|
+
const style = getComputedStyle(el);
|
|
259
|
+
const overflowY = style.overflowY;
|
|
260
|
+
const overflowX = style.overflowX;
|
|
261
|
+
const isScrollableY = (overflowY === 'auto' || overflowY === 'scroll') && el.scrollHeight > el.clientHeight;
|
|
262
|
+
const isScrollableX = (overflowX === 'auto' || overflowX === 'scroll') && el.scrollWidth > el.clientWidth;
|
|
263
|
+
if (!isScrollableY && !isScrollableX) return null;
|
|
264
|
+
return {
|
|
265
|
+
left: el.scrollLeft,
|
|
266
|
+
top: el.scrollTop,
|
|
267
|
+
right: el.scrollWidth - el.clientWidth - el.scrollLeft,
|
|
268
|
+
bottom: el.scrollHeight - el.clientHeight - el.scrollTop,
|
|
269
|
+
};
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
function walk(el, parentId) {
|
|
273
|
+
if (!el || el.nodeType === 8) return; // skip comments
|
|
274
|
+
|
|
275
|
+
if (el.nodeType === 3) { // text node
|
|
276
|
+
const text = el.textContent.trim();
|
|
277
|
+
if (!text) return;
|
|
278
|
+
const id = 'text_' + Math.random().toString(36).slice(2, 8);
|
|
279
|
+
nodes[id] = { id, tagName: '#text', text, parentId };
|
|
280
|
+
if (parentId && nodes[parentId]) {
|
|
281
|
+
nodes[parentId].children = nodes[parentId].children || [];
|
|
282
|
+
nodes[parentId].children.push(id);
|
|
283
|
+
}
|
|
284
|
+
return;
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
if (el.nodeType !== 1) return; // only elements
|
|
288
|
+
|
|
289
|
+
const tag = el.tagName.toLowerCase();
|
|
290
|
+
if (['script', 'style', 'noscript', 'svg', 'path'].includes(tag)) return;
|
|
291
|
+
if (!isVisible(el)) return;
|
|
292
|
+
|
|
293
|
+
const id = tag + '_' + Math.random().toString(36).slice(2, 8);
|
|
294
|
+
const interactive = isInteractive(el);
|
|
295
|
+
const node = {
|
|
296
|
+
id,
|
|
297
|
+
tagName: tag,
|
|
298
|
+
attributes: getAttributes(el),
|
|
299
|
+
parentId,
|
|
300
|
+
children: [],
|
|
301
|
+
isInteractive: interactive,
|
|
302
|
+
};
|
|
303
|
+
|
|
304
|
+
if (interactive) {
|
|
305
|
+
node.highlightIndex = highlightIndex;
|
|
306
|
+
selectorMap[highlightIndex] = id;
|
|
307
|
+
highlightIndex++;
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
const scrollable = getScrollable(el);
|
|
311
|
+
if (scrollable) node.scrollable = scrollable;
|
|
312
|
+
|
|
313
|
+
const text = [];
|
|
314
|
+
for (const child of el.childNodes) {
|
|
315
|
+
if (child.nodeType === 3 && child.textContent.trim()) {
|
|
316
|
+
text.push(child.textContent.trim());
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
if (text.length > 0) node.text = text.join(' ').slice(0, 200);
|
|
320
|
+
|
|
321
|
+
nodes[id] = node;
|
|
322
|
+
|
|
323
|
+
if (parentId && nodes[parentId]) {
|
|
324
|
+
nodes[parentId].children.push(id);
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
for (const child of el.children) {
|
|
328
|
+
walk(child, id);
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
const rootId = 'root';
|
|
333
|
+
nodes[rootId] = { id: rootId, tagName: 'body', children: [], attributes: {} };
|
|
334
|
+
for (const child of document.body.children) {
|
|
335
|
+
walk(child, rootId);
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
return { rootId, map: nodes, selectorMap };
|
|
339
|
+
})()
|
|
340
|
+
`;
|
|
341
|
+
function flatTreeToString(tree) {
|
|
342
|
+
const lines = [];
|
|
343
|
+
function walk(nodeId, depth) {
|
|
344
|
+
const node = tree.map[nodeId];
|
|
345
|
+
if (!node) return;
|
|
346
|
+
const indent = " ".repeat(depth);
|
|
347
|
+
if (node.tagName === "#text") {
|
|
348
|
+
if (node.text) lines.push(`${indent}${node.text}`);
|
|
349
|
+
return;
|
|
350
|
+
}
|
|
351
|
+
const attrs = node.attributes || {};
|
|
352
|
+
const attrStr = Object.entries(attrs).map(([k, v]) => v === "" ? k : `${k}="${v}"`).join(" ");
|
|
353
|
+
const prefix = node.highlightIndex !== void 0 ? `[${node.highlightIndex}]` : "";
|
|
354
|
+
const scrollInfo = node.scrollable ? ` |scroll: ${Math.round(node.scrollable.top)}px up, ${Math.round(node.scrollable.bottom)}px down|` : "";
|
|
355
|
+
const text = node.text || "";
|
|
356
|
+
const tag = node.tagName;
|
|
357
|
+
if (prefix || text || node.children?.length > 0) {
|
|
358
|
+
const opening = `${indent}${prefix}<${tag}${attrStr ? " " + attrStr : ""}${scrollInfo}>`;
|
|
359
|
+
if (!node.children?.length || node.children.length === 0 && text) {
|
|
360
|
+
lines.push(`${opening}${text}</>`);
|
|
361
|
+
} else {
|
|
362
|
+
lines.push(`${opening}${text}`);
|
|
363
|
+
for (const childId of node.children || []) {
|
|
364
|
+
walk(childId, depth + 1);
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
} else {
|
|
368
|
+
for (const childId of node.children || []) {
|
|
369
|
+
walk(childId, depth);
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
walk(tree.rootId, 0);
|
|
374
|
+
return lines.join("\n");
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
// src/browser/dom/snapshot.ts
|
|
378
|
+
var SNAPSHOT_SCRIPT = `
|
|
379
|
+
(() => {
|
|
380
|
+
let idx = 0;
|
|
381
|
+
const __prevHashes = (window.__lobster_prev_hashes) ? new Set(window.__lobster_prev_hashes) : null;
|
|
382
|
+
const __currentHashes = [];
|
|
383
|
+
|
|
384
|
+
const SKIP_TAGS = new Set([
|
|
385
|
+
'script','style','noscript','svg','path','meta','link','head',
|
|
386
|
+
'template','slot','colgroup','col',
|
|
387
|
+
]);
|
|
388
|
+
|
|
389
|
+
const INTERACTIVE_TAGS = new Set([
|
|
390
|
+
'a','button','input','select','textarea','details','summary','label',
|
|
391
|
+
]);
|
|
392
|
+
|
|
393
|
+
const INTERACTIVE_ROLES = new Set([
|
|
394
|
+
'button','link','textbox','checkbox','radio','combobox','listbox',
|
|
395
|
+
'menu','menuitem','tab','switch','slider','searchbox','spinbutton',
|
|
396
|
+
'option','menuitemcheckbox','menuitemradio','treeitem',
|
|
397
|
+
]);
|
|
398
|
+
|
|
399
|
+
const ATTR_WHITELIST = [
|
|
400
|
+
'type','role','aria-label','aria-expanded','aria-selected','aria-checked',
|
|
401
|
+
'aria-disabled','aria-haspopup','aria-pressed','placeholder','title',
|
|
402
|
+
'href','value','name','alt','src','action','method','for',
|
|
403
|
+
'data-testid','data-id','contenteditable','tabindex',
|
|
404
|
+
];
|
|
405
|
+
|
|
406
|
+
const AD_PATTERNS = /ad[-_]?banner|ad[-_]?container|google[-_]?ad|doubleclick|adsbygoogle|sponsored|^ad$/i;
|
|
407
|
+
|
|
408
|
+
// \u2500\u2500 Stage 1: Visibility check \u2500\u2500
|
|
409
|
+
function isVisible(el) {
|
|
410
|
+
if (el.offsetWidth === 0 && el.offsetHeight === 0 && el.tagName !== 'INPUT') return false;
|
|
411
|
+
const s = getComputedStyle(el);
|
|
412
|
+
if (s.display === 'none') return false;
|
|
413
|
+
if (s.visibility === 'hidden' || s.visibility === 'collapse') return false;
|
|
414
|
+
if (s.opacity === '0') return false;
|
|
415
|
+
if (s.clipPath === 'inset(100%)') return false;
|
|
416
|
+
// Check for offscreen positioning
|
|
417
|
+
const rect = el.getBoundingClientRect();
|
|
418
|
+
if (rect.right < 0 || rect.bottom < 0) return false;
|
|
419
|
+
return true;
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
// \u2500\u2500 Stage 2: Interactive detection \u2500\u2500
|
|
423
|
+
function isInteractive(el) {
|
|
424
|
+
const tag = el.tagName.toLowerCase();
|
|
425
|
+
if (INTERACTIVE_TAGS.has(tag)) {
|
|
426
|
+
// Skip disabled elements
|
|
427
|
+
if (el.disabled) return false;
|
|
428
|
+
// Skip hidden inputs
|
|
429
|
+
if (tag === 'input' && el.type === 'hidden') return false;
|
|
430
|
+
return true;
|
|
431
|
+
}
|
|
432
|
+
const role = el.getAttribute('role');
|
|
433
|
+
if (role && INTERACTIVE_ROLES.has(role)) return true;
|
|
434
|
+
if (el.contentEditable === 'true') return true;
|
|
435
|
+
if (el.tabIndex >= 0 && el.getAttribute('tabindex') !== null) return true;
|
|
436
|
+
if (el.onclick) return true;
|
|
437
|
+
return false;
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
// \u2500\u2500 Stage 8: Attribute filtering \u2500\u2500
|
|
441
|
+
function getAttrs(el) {
|
|
442
|
+
const parts = [];
|
|
443
|
+
for (const name of ATTR_WHITELIST) {
|
|
444
|
+
let v = el.getAttribute(name);
|
|
445
|
+
if (v === null || v === '') continue;
|
|
446
|
+
// Truncate long values
|
|
447
|
+
if (v.length > 80) v = v.slice(0, 77) + '...';
|
|
448
|
+
// Skip href="javascript:..."
|
|
449
|
+
if (name === 'href' && v.startsWith('javascript:')) continue;
|
|
450
|
+
parts.push(name + '=' + v);
|
|
451
|
+
}
|
|
452
|
+
return parts.length ? ' ' + parts.join(' ') : '';
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
// \u2500\u2500 Stage 9: Ad filtering \u2500\u2500
|
|
456
|
+
function isAd(el) {
|
|
457
|
+
const id = el.id || '';
|
|
458
|
+
const cls = el.className || '';
|
|
459
|
+
if (typeof cls === 'string' && AD_PATTERNS.test(cls)) return true;
|
|
460
|
+
if (AD_PATTERNS.test(id)) return true;
|
|
461
|
+
if (el.tagName === 'IFRAME' && AD_PATTERNS.test(el.src || '')) return true;
|
|
462
|
+
return false;
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
// \u2500\u2500 Stage 10: Scroll info \u2500\u2500
|
|
466
|
+
function getScrollInfo(el) {
|
|
467
|
+
const s = getComputedStyle(el);
|
|
468
|
+
const overflowY = s.overflowY;
|
|
469
|
+
const overflowX = s.overflowX;
|
|
470
|
+
const scrollableY = (overflowY === 'auto' || overflowY === 'scroll') && el.scrollHeight > el.clientHeight;
|
|
471
|
+
const scrollableX = (overflowX === 'auto' || overflowX === 'scroll') && el.scrollWidth > el.clientWidth;
|
|
472
|
+
if (!scrollableY && !scrollableX) return '';
|
|
473
|
+
|
|
474
|
+
const parts = [];
|
|
475
|
+
if (scrollableY) {
|
|
476
|
+
const up = Math.round(el.scrollTop);
|
|
477
|
+
const down = Math.round(el.scrollHeight - el.clientHeight - el.scrollTop);
|
|
478
|
+
if (up > 0) parts.push(up + 'px up');
|
|
479
|
+
if (down > 0) parts.push(down + 'px down');
|
|
480
|
+
}
|
|
481
|
+
if (scrollableX) {
|
|
482
|
+
const left = Math.round(el.scrollLeft);
|
|
483
|
+
const right = Math.round(el.scrollWidth - el.clientWidth - el.scrollLeft);
|
|
484
|
+
if (left > 0) parts.push(left + 'px left');
|
|
485
|
+
if (right > 0) parts.push(right + 'px right');
|
|
486
|
+
}
|
|
487
|
+
return parts.length ? ' |scroll: ' + parts.join(', ') + '|' : '';
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
// \u2500\u2500 Stage 6: Bounding-box dedup \u2500\u2500
|
|
491
|
+
// If a parent and child are both interactive and have ~same bounding box,
|
|
492
|
+
// skip the parent (e.g., <a><button>Click</button></a>)
|
|
493
|
+
function isWrappingInteractive(el) {
|
|
494
|
+
if (!isInteractive(el)) return false;
|
|
495
|
+
const rect = el.getBoundingClientRect();
|
|
496
|
+
if (rect.width === 0 || rect.height === 0) return false;
|
|
497
|
+
for (const child of el.children) {
|
|
498
|
+
if (!isInteractive(child)) continue;
|
|
499
|
+
const cr = child.getBoundingClientRect();
|
|
500
|
+
const overlapX = Math.min(rect.right, cr.right) - Math.max(rect.left, cr.left);
|
|
501
|
+
const overlapY = Math.min(rect.bottom, cr.bottom) - Math.max(rect.top, cr.top);
|
|
502
|
+
const overlapArea = Math.max(0, overlapX) * Math.max(0, overlapY);
|
|
503
|
+
const parentArea = rect.width * rect.height;
|
|
504
|
+
if (parentArea > 0 && overlapArea / parentArea > 0.85) return true;
|
|
505
|
+
}
|
|
506
|
+
return false;
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
// \u2500\u2500 Stage 7: Occlusion detection \u2500\u2500
|
|
510
|
+
function isOccluded(el) {
|
|
511
|
+
const rect = el.getBoundingClientRect();
|
|
512
|
+
if (rect.width === 0 || rect.height === 0) return false;
|
|
513
|
+
const cx = rect.left + rect.width / 2;
|
|
514
|
+
const cy = rect.top + rect.height / 2;
|
|
515
|
+
const topEl = document.elementFromPoint(cx, cy);
|
|
516
|
+
if (!topEl) return false;
|
|
517
|
+
if (topEl === el || el.contains(topEl) || topEl.contains(el)) return false;
|
|
518
|
+
// Check z-index \u2014 if top element is a modal/overlay, mark as occluded
|
|
519
|
+
const topZ = parseInt(getComputedStyle(topEl).zIndex) || 0;
|
|
520
|
+
const elZ = parseInt(getComputedStyle(el).zIndex) || 0;
|
|
521
|
+
return topZ > elZ + 10;
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
// \u2500\u2500 Stage 5: Iframe content extraction \u2500\u2500
|
|
525
|
+
function getIframeContent(iframe, depth, maxDepth) {
|
|
526
|
+
try {
|
|
527
|
+
const doc = iframe.contentDocument;
|
|
528
|
+
if (!doc || !doc.body) return '';
|
|
529
|
+
return '\\n' + walkNode(doc.body, depth, maxDepth);
|
|
530
|
+
} catch { return ''; }
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
// \u2500\u2500 Stage 4: Shadow DOM traversal \u2500\u2500
|
|
534
|
+
function getShadowContent(el, depth, maxDepth) {
|
|
535
|
+
if (!el.shadowRoot) return '';
|
|
536
|
+
let out = '';
|
|
537
|
+
for (const child of el.shadowRoot.childNodes) {
|
|
538
|
+
out += walkNode(child, depth, maxDepth);
|
|
539
|
+
}
|
|
540
|
+
return out;
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
// \u2500\u2500 Input value hint \u2500\u2500
|
|
544
|
+
function getInputHint(el) {
|
|
545
|
+
const tag = el.tagName.toLowerCase();
|
|
546
|
+
if (tag === 'input') {
|
|
547
|
+
const type = el.type || 'text';
|
|
548
|
+
const val = el.value || '';
|
|
549
|
+
const checked = el.checked;
|
|
550
|
+
if (type === 'checkbox' || type === 'radio') {
|
|
551
|
+
return checked ? ' [checked]' : ' [unchecked]';
|
|
552
|
+
}
|
|
553
|
+
if (val) return ' value="' + val.slice(0, 50) + '"';
|
|
554
|
+
}
|
|
555
|
+
if (tag === 'textarea' && el.value) {
|
|
556
|
+
return ' value="' + el.value.slice(0, 50) + '"';
|
|
557
|
+
}
|
|
558
|
+
if (tag === 'select' && el.selectedOptions?.length) {
|
|
559
|
+
return ' selected="' + el.selectedOptions[0].text.slice(0, 40) + '"';
|
|
560
|
+
}
|
|
561
|
+
return '';
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
const MAX_DEPTH = 25;
|
|
565
|
+
const MAX_TEXT = 150;
|
|
566
|
+
|
|
567
|
+
function walkNode(node, depth, maxDepth) {
|
|
568
|
+
if (depth > maxDepth) return '';
|
|
569
|
+
if (!node) return '';
|
|
570
|
+
|
|
571
|
+
// Text node
|
|
572
|
+
if (node.nodeType === 3) {
|
|
573
|
+
const t = node.textContent.trim();
|
|
574
|
+
if (!t) return '';
|
|
575
|
+
const text = t.length > MAX_TEXT ? t.slice(0, MAX_TEXT) + '...' : t;
|
|
576
|
+
return ' '.repeat(depth) + text + '\\n';
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
// Comment node \u2014 skip
|
|
580
|
+
if (node.nodeType === 8) return '';
|
|
581
|
+
|
|
582
|
+
// Only element nodes from here
|
|
583
|
+
if (node.nodeType !== 1) return '';
|
|
584
|
+
|
|
585
|
+
const el = node;
|
|
586
|
+
const tag = el.tagName.toLowerCase();
|
|
587
|
+
|
|
588
|
+
// \u2500\u2500 Stage 3: Skip tags \u2500\u2500
|
|
589
|
+
if (SKIP_TAGS.has(tag)) return '';
|
|
590
|
+
|
|
591
|
+
// \u2500\u2500 Stage 2: Visibility \u2500\u2500
|
|
592
|
+
if (!isVisible(el)) return '';
|
|
593
|
+
|
|
594
|
+
// \u2500\u2500 Stage 9: Ad filtering \u2500\u2500
|
|
595
|
+
if (isAd(el)) return '';
|
|
596
|
+
|
|
597
|
+
// \u2500\u2500 Stage 6: Bbox dedup \u2014 skip wrapping interactive parent \u2500\u2500
|
|
598
|
+
const skipSelf = isWrappingInteractive(el);
|
|
599
|
+
|
|
600
|
+
const indent = ' '.repeat(depth);
|
|
601
|
+
const inter = !skipSelf && isInteractive(el);
|
|
602
|
+
let prefix = '';
|
|
603
|
+
if (inter) {
|
|
604
|
+
const thisIdx = idx++;
|
|
605
|
+
// Hash: tag + text + key attributes for diff tracking
|
|
606
|
+
const hashText = tag + ':' + (el.textContent || '').trim().slice(0, 40) + ':' + (el.getAttribute('href') || '') + ':' + (el.getAttribute('aria-label') || '');
|
|
607
|
+
__currentHashes.push(hashText);
|
|
608
|
+
const isNew = __prevHashes && __prevHashes.size > 0 && !__prevHashes.has(hashText);
|
|
609
|
+
prefix = isNew ? '*[' + thisIdx + ']' : '[' + thisIdx + ']';
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
// \u2500\u2500 Stage 11: Annotate with data-ref \u2500\u2500
|
|
613
|
+
if (inter) {
|
|
614
|
+
try { el.dataset.ref = String(idx - 1); } catch {}
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
// \u2500\u2500 Stage 7: Occlusion check for interactive elements \u2500\u2500
|
|
618
|
+
if (inter && isOccluded(el)) {
|
|
619
|
+
// Still include but mark as occluded
|
|
620
|
+
// (agent needs to know element exists but may need to scroll/close modal)
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
const a = getAttrs(el);
|
|
624
|
+
const scrollInfo = getScrollInfo(el);
|
|
625
|
+
const inputHint = inter ? getInputHint(el) : '';
|
|
626
|
+
|
|
627
|
+
// Leaf text extraction
|
|
628
|
+
let leafText = '';
|
|
629
|
+
if (el.childNodes.length === 1 && el.childNodes[0].nodeType === 3) {
|
|
630
|
+
const t = el.childNodes[0].textContent.trim();
|
|
631
|
+
if (t) leafText = t.length > MAX_TEXT ? t.slice(0, MAX_TEXT) + '...' : t;
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
// \u2500\u2500 Stage 5: Iframe \u2500\u2500
|
|
635
|
+
if (tag === 'iframe') {
|
|
636
|
+
const iframeContent = getIframeContent(el, depth + 1, maxDepth);
|
|
637
|
+
if (iframeContent) {
|
|
638
|
+
return indent + prefix + '<iframe' + a + '>\\n' + iframeContent;
|
|
639
|
+
}
|
|
640
|
+
return '';
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
// Build output
|
|
644
|
+
let out = '';
|
|
645
|
+
|
|
646
|
+
if (skipSelf) {
|
|
647
|
+
// Skip self but render children
|
|
648
|
+
for (const c of el.childNodes) out += walkNode(c, depth, maxDepth);
|
|
649
|
+
out += getShadowContent(el, depth, maxDepth);
|
|
650
|
+
return out;
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
if (inter || leafText || el.children.length === 0) {
|
|
654
|
+
if (leafText) {
|
|
655
|
+
out = indent + prefix + '<' + tag + a + scrollInfo + inputHint + '>' + leafText + '</' + tag + '>\\n';
|
|
656
|
+
} else {
|
|
657
|
+
out = indent + prefix + '<' + tag + a + scrollInfo + inputHint + '>\\n';
|
|
658
|
+
for (const c of el.childNodes) out += walkNode(c, depth + 1, maxDepth);
|
|
659
|
+
out += getShadowContent(el, depth + 1, maxDepth);
|
|
660
|
+
}
|
|
661
|
+
} else {
|
|
662
|
+
// Non-interactive container \u2014 flatten depth if no useful info
|
|
663
|
+
if (scrollInfo) {
|
|
664
|
+
out = indent + '<' + tag + scrollInfo + '>\\n';
|
|
665
|
+
for (const c of el.childNodes) out += walkNode(c, depth + 1, maxDepth);
|
|
666
|
+
out += getShadowContent(el, depth + 1, maxDepth);
|
|
667
|
+
} else {
|
|
668
|
+
for (const c of el.childNodes) out += walkNode(c, depth, maxDepth);
|
|
669
|
+
out += getShadowContent(el, depth, maxDepth);
|
|
670
|
+
}
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
return out;
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
// \u2500\u2500 Page-level scroll info header \u2500\u2500
|
|
677
|
+
const scrollY = window.scrollY;
|
|
678
|
+
const scrollMax = document.documentElement.scrollHeight - window.innerHeight;
|
|
679
|
+
const scrollPct = scrollMax > 0 ? Math.round((scrollY / scrollMax) * 100) : 0;
|
|
680
|
+
const vpW = window.innerWidth;
|
|
681
|
+
const vpH = window.innerHeight;
|
|
682
|
+
const pageH = document.documentElement.scrollHeight;
|
|
683
|
+
|
|
684
|
+
let header = '';
|
|
685
|
+
header += 'viewport: ' + vpW + 'x' + vpH + ' | page_height: ' + pageH + 'px';
|
|
686
|
+
header += ' | scroll: ' + scrollPct + '%';
|
|
687
|
+
if (scrollY > 50) header += ' (' + Math.round(scrollY) + 'px from top)';
|
|
688
|
+
if (scrollMax - scrollY > 50) header += ' (' + Math.round(scrollMax - scrollY) + 'px more below)';
|
|
689
|
+
header += '\\n---\\n';
|
|
690
|
+
|
|
691
|
+
// Store current hashes for next diff comparison
|
|
692
|
+
window.__lobster_prev_hashes = __currentHashes;
|
|
693
|
+
|
|
694
|
+
return header + walkNode(document.body, 0, MAX_DEPTH);
|
|
695
|
+
})()
|
|
696
|
+
`;
|
|
697
|
+
|
|
698
|
+
// src/browser/dom/semantic-tree.ts
|
|
699
|
+
var SEMANTIC_TREE_SCRIPT = `
|
|
700
|
+
(() => {
|
|
701
|
+
const SKIP = new Set(['script','style','noscript','svg','head','meta','link','template']);
|
|
702
|
+
|
|
703
|
+
const ROLE_MAP = {
|
|
704
|
+
a: 'link', button: 'button', input: 'textbox', select: 'combobox',
|
|
705
|
+
textarea: 'textbox', h1: 'heading', h2: 'heading', h3: 'heading',
|
|
706
|
+
h4: 'heading', h5: 'heading', h6: 'heading', nav: 'navigation',
|
|
707
|
+
main: 'main', header: 'banner', footer: 'contentinfo', aside: 'complementary',
|
|
708
|
+
form: 'form', table: 'table', img: 'img', ul: 'list', ol: 'list', li: 'listitem',
|
|
709
|
+
section: 'region', article: 'article', dialog: 'dialog', details: 'group',
|
|
710
|
+
summary: 'button', progress: 'progressbar', meter: 'meter', output: 'status',
|
|
711
|
+
label: 'label', legend: 'legend', fieldset: 'group', option: 'option',
|
|
712
|
+
tr: 'row', td: 'cell', th: 'columnheader', caption: 'caption',
|
|
713
|
+
};
|
|
714
|
+
|
|
715
|
+
const INTERACTIVE_ROLES = new Set([
|
|
716
|
+
'button','link','textbox','checkbox','radio','combobox','listbox',
|
|
717
|
+
'menu','menuitem','tab','switch','slider','searchbox','spinbutton',
|
|
718
|
+
'option','menuitemcheckbox','menuitemradio','treeitem',
|
|
719
|
+
]);
|
|
720
|
+
|
|
721
|
+
// \u2500\u2500 W3C Accessible Name Algorithm (simplified) \u2500\u2500
|
|
722
|
+
function getAccessibleName(el) {
|
|
723
|
+
// 1. aria-labelledby (highest priority)
|
|
724
|
+
const labelledBy = el.getAttribute('aria-labelledby');
|
|
725
|
+
if (labelledBy) {
|
|
726
|
+
const ids = labelledBy.split(/\\s+/);
|
|
727
|
+
const parts = ids.map(id => {
|
|
728
|
+
const ref = document.getElementById(id);
|
|
729
|
+
return ref ? ref.textContent.trim() : '';
|
|
730
|
+
}).filter(Boolean);
|
|
731
|
+
if (parts.length > 0) return parts.join(' ').slice(0, 120);
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
// 2. aria-label
|
|
735
|
+
const ariaLabel = el.getAttribute('aria-label');
|
|
736
|
+
if (ariaLabel) return ariaLabel.slice(0, 120);
|
|
737
|
+
|
|
738
|
+
// 3. alt (for images)
|
|
739
|
+
const alt = el.getAttribute('alt');
|
|
740
|
+
if (alt) return alt.slice(0, 120);
|
|
741
|
+
|
|
742
|
+
// 4. title
|
|
743
|
+
const title = el.getAttribute('title');
|
|
744
|
+
if (title) return title.slice(0, 120);
|
|
745
|
+
|
|
746
|
+
// 5. placeholder (for inputs)
|
|
747
|
+
const placeholder = el.getAttribute('placeholder');
|
|
748
|
+
if (placeholder) return placeholder.slice(0, 120);
|
|
749
|
+
|
|
750
|
+
// 6. value (for buttons)
|
|
751
|
+
if (el.tagName === 'INPUT' && (el.type === 'submit' || el.type === 'button')) {
|
|
752
|
+
const val = el.getAttribute('value');
|
|
753
|
+
if (val) return val.slice(0, 120);
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
// 7. Associated label
|
|
757
|
+
if (el.id) {
|
|
758
|
+
const label = document.querySelector('label[for="' + el.id + '"]');
|
|
759
|
+
if (label) return label.textContent.trim().slice(0, 120);
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
// 8. Direct text content (only for leaf-ish elements)
|
|
763
|
+
if (el.children.length <= 2) {
|
|
764
|
+
const text = el.textContent.trim();
|
|
765
|
+
if (text && text.length < 120) return text;
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
return '';
|
|
769
|
+
}
|
|
770
|
+
|
|
771
|
+
// \u2500\u2500 XPath generation \u2500\u2500
|
|
772
|
+
function getXPath(el) {
|
|
773
|
+
const parts = [];
|
|
774
|
+
let current = el;
|
|
775
|
+
while (current && current.nodeType === 1) {
|
|
776
|
+
let index = 1;
|
|
777
|
+
let sibling = current.previousElementSibling;
|
|
778
|
+
while (sibling) {
|
|
779
|
+
if (sibling.tagName === current.tagName) index++;
|
|
780
|
+
sibling = sibling.previousElementSibling;
|
|
781
|
+
}
|
|
782
|
+
const tag = current.tagName.toLowerCase();
|
|
783
|
+
parts.unshift(tag + '[' + index + ']');
|
|
784
|
+
current = current.parentElement;
|
|
785
|
+
}
|
|
786
|
+
return '/' + parts.join('/');
|
|
787
|
+
}
|
|
788
|
+
|
|
789
|
+
// \u2500\u2500 Interactivity classification \u2500\u2500
|
|
790
|
+
function classifyInteractivity(el) {
|
|
791
|
+
const types = [];
|
|
792
|
+
const tag = el.tagName.toLowerCase();
|
|
793
|
+
|
|
794
|
+
// Native
|
|
795
|
+
if (['a','button','input','select','textarea','details','summary'].includes(tag)) {
|
|
796
|
+
if (tag === 'a' && !el.href) {} // anchor without href is not interactive
|
|
797
|
+
else if (tag === 'input' && el.type === 'hidden') {} // hidden inputs
|
|
798
|
+
else types.push('native');
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
// ARIA role
|
|
802
|
+
const role = el.getAttribute('role');
|
|
803
|
+
if (role && INTERACTIVE_ROLES.has(role)) types.push('aria');
|
|
804
|
+
|
|
805
|
+
// Contenteditable
|
|
806
|
+
if (el.contentEditable === 'true') types.push('contenteditable');
|
|
807
|
+
|
|
808
|
+
// Focusable
|
|
809
|
+
if (el.tabIndex >= 0 && el.getAttribute('tabindex') !== null) types.push('focusable');
|
|
810
|
+
|
|
811
|
+
// Event listeners (check onclick and common inline handlers)
|
|
812
|
+
if (el.onclick || el.onmousedown || el.onkeydown || el.onkeypress ||
|
|
813
|
+
el.getAttribute('onclick') || el.getAttribute('onmousedown')) {
|
|
814
|
+
types.push('listener');
|
|
815
|
+
}
|
|
816
|
+
|
|
817
|
+
return types;
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
// \u2500\u2500 Disabled state with fieldset inheritance \u2500\u2500
|
|
821
|
+
function isDisabled(el) {
|
|
822
|
+
if (el.disabled) return true;
|
|
823
|
+
// Check fieldset disabled inheritance
|
|
824
|
+
let parent = el.parentElement;
|
|
825
|
+
while (parent) {
|
|
826
|
+
if (parent.tagName === 'FIELDSET' && parent.disabled) {
|
|
827
|
+
// Exception: elements inside the first legend child are NOT disabled
|
|
828
|
+
const firstLegend = parent.querySelector(':scope > legend');
|
|
829
|
+
if (firstLegend && firstLegend.contains(el)) return false;
|
|
830
|
+
return true;
|
|
831
|
+
}
|
|
832
|
+
parent = parent.parentElement;
|
|
833
|
+
}
|
|
834
|
+
return false;
|
|
835
|
+
}
|
|
836
|
+
|
|
837
|
+
// \u2500\u2500 Walk the DOM \u2500\u2500
|
|
838
|
+
function walk(el, depth, maxDepth) {
|
|
839
|
+
if (!el || depth > maxDepth) return '';
|
|
840
|
+
|
|
841
|
+
if (el.nodeType === 3) {
|
|
842
|
+
const t = el.textContent.trim();
|
|
843
|
+
return t ? ' '.repeat(depth) + 'text "' + t.slice(0, 100) + '"\\n' : '';
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
if (el.nodeType !== 1) return '';
|
|
847
|
+
const tag = el.tagName.toLowerCase();
|
|
848
|
+
if (SKIP.has(tag)) return '';
|
|
849
|
+
|
|
850
|
+
const style = getComputedStyle(el);
|
|
851
|
+
if (style.display === 'none' || style.visibility === 'hidden') return '';
|
|
852
|
+
|
|
853
|
+
const indent = ' '.repeat(depth);
|
|
854
|
+
const role = el.getAttribute('role') || ROLE_MAP[tag] || '';
|
|
855
|
+
const name = getAccessibleName(el);
|
|
856
|
+
const interTypes = classifyInteractivity(el);
|
|
857
|
+
const interactive = interTypes.length > 0;
|
|
858
|
+
const disabled = interactive && isDisabled(el);
|
|
859
|
+
|
|
860
|
+
let line = indent;
|
|
861
|
+
line += role || tag;
|
|
862
|
+
|
|
863
|
+
if (name) line += ' "' + name.slice(0, 80) + '"';
|
|
864
|
+
|
|
865
|
+
if (interactive) {
|
|
866
|
+
line += ' [' + interTypes.join(',') + ']';
|
|
867
|
+
if (disabled) line += ' {disabled}';
|
|
868
|
+
line += ' xpath=' + getXPath(el);
|
|
869
|
+
}
|
|
870
|
+
|
|
871
|
+
// Input state
|
|
872
|
+
if (tag === 'input') {
|
|
873
|
+
const type = el.type || 'text';
|
|
874
|
+
line += ' type=' + type;
|
|
875
|
+
if (type === 'checkbox' || type === 'radio') {
|
|
876
|
+
line += el.checked ? ' [checked]' : ' [unchecked]';
|
|
877
|
+
} else if (el.value) {
|
|
878
|
+
line += ' value="' + el.value.slice(0, 50) + '"';
|
|
879
|
+
}
|
|
880
|
+
}
|
|
881
|
+
if (tag === 'textarea' && el.value) {
|
|
882
|
+
line += ' value="' + el.value.slice(0, 50) + '"';
|
|
883
|
+
}
|
|
884
|
+
if (tag === 'select') {
|
|
885
|
+
const opts = Array.from(el.options || []).map(o => ({
|
|
886
|
+
text: o.text.slice(0, 30),
|
|
887
|
+
value: o.value,
|
|
888
|
+
selected: o.selected,
|
|
889
|
+
}));
|
|
890
|
+
const selected = opts.find(o => o.selected);
|
|
891
|
+
if (selected) line += ' selected="' + selected.text + '"';
|
|
892
|
+
if (opts.length <= 10) {
|
|
893
|
+
line += ' options=[' + opts.map(o => o.text).join('|') + ']';
|
|
894
|
+
}
|
|
895
|
+
}
|
|
896
|
+
|
|
897
|
+
line += '\\n';
|
|
898
|
+
|
|
899
|
+
let out = line;
|
|
900
|
+
for (const c of el.childNodes) {
|
|
901
|
+
out += walk(c, depth + 1, maxDepth);
|
|
902
|
+
}
|
|
903
|
+
|
|
904
|
+
// Shadow DOM
|
|
905
|
+
if (el.shadowRoot) {
|
|
906
|
+
for (const c of el.shadowRoot.childNodes) {
|
|
907
|
+
out += walk(c, depth + 1, maxDepth);
|
|
908
|
+
}
|
|
909
|
+
}
|
|
910
|
+
|
|
911
|
+
return out;
|
|
912
|
+
}
|
|
913
|
+
|
|
914
|
+
return walk(document.body, 0, 20);
|
|
915
|
+
})()
|
|
916
|
+
`;
|
|
917
|
+
|
|
918
|
+
// src/browser/dom/markdown.ts
|
|
919
|
+
var MARKDOWN_SCRIPT = `
|
|
920
|
+
(() => {
|
|
921
|
+
const SKIP = new Set(['script','style','noscript','svg','head','template']);
|
|
922
|
+
const baseUrl = location.href;
|
|
923
|
+
|
|
924
|
+
// Resolve relative URLs to absolute
|
|
925
|
+
function resolveUrl(href) {
|
|
926
|
+
if (!href || href.startsWith('javascript:') || href.startsWith('#')) return href;
|
|
927
|
+
try { return new URL(href, baseUrl).href; } catch { return href; }
|
|
928
|
+
}
|
|
929
|
+
|
|
930
|
+
// Escape Markdown special chars in text
|
|
931
|
+
function escapeText(text) {
|
|
932
|
+
return text
|
|
933
|
+
.replace(/\\\\/g, '\\\\\\\\')
|
|
934
|
+
.replace(/([*_~\`\\[\\]|])/g, '\\\\$1');
|
|
935
|
+
}
|
|
936
|
+
|
|
937
|
+
// State tracking
|
|
938
|
+
let listDepth = 0;
|
|
939
|
+
let orderedCounters = [];
|
|
940
|
+
let inPre = false;
|
|
941
|
+
let inTable = false;
|
|
942
|
+
|
|
943
|
+
function listIndent() { return ' '.repeat(listDepth); }
|
|
944
|
+
|
|
945
|
+
function walk(el) {
|
|
946
|
+
if (!el) return '';
|
|
947
|
+
|
|
948
|
+
// Text node
|
|
949
|
+
if (el.nodeType === 3) {
|
|
950
|
+
const text = el.textContent || '';
|
|
951
|
+
if (inPre) return text;
|
|
952
|
+
// Collapse whitespace
|
|
953
|
+
const collapsed = text.replace(/\\s+/g, ' ');
|
|
954
|
+
return collapsed === ' ' && !el.previousSibling && !el.nextSibling ? '' : collapsed;
|
|
955
|
+
}
|
|
956
|
+
|
|
957
|
+
if (el.nodeType !== 1) return '';
|
|
958
|
+
const tag = el.tagName.toLowerCase();
|
|
959
|
+
if (SKIP.has(tag)) return '';
|
|
960
|
+
|
|
961
|
+
// Visibility check
|
|
962
|
+
try {
|
|
963
|
+
const s = getComputedStyle(el);
|
|
964
|
+
if (s.display === 'none' || s.visibility === 'hidden') return '';
|
|
965
|
+
} catch {}
|
|
966
|
+
|
|
967
|
+
// Get children content
|
|
968
|
+
function childContent() {
|
|
969
|
+
let out = '';
|
|
970
|
+
for (const c of el.childNodes) out += walk(c);
|
|
971
|
+
return out;
|
|
972
|
+
}
|
|
973
|
+
|
|
974
|
+
switch (tag) {
|
|
975
|
+
// \u2500\u2500 Headings \u2500\u2500
|
|
976
|
+
case 'h1': return '\\n\\n# ' + childContent().trim() + '\\n\\n';
|
|
977
|
+
case 'h2': return '\\n\\n## ' + childContent().trim() + '\\n\\n';
|
|
978
|
+
case 'h3': return '\\n\\n### ' + childContent().trim() + '\\n\\n';
|
|
979
|
+
case 'h4': return '\\n\\n#### ' + childContent().trim() + '\\n\\n';
|
|
980
|
+
case 'h5': return '\\n\\n##### ' + childContent().trim() + '\\n\\n';
|
|
981
|
+
case 'h6': return '\\n\\n###### ' + childContent().trim() + '\\n\\n';
|
|
982
|
+
|
|
983
|
+
// \u2500\u2500 Block elements \u2500\u2500
|
|
984
|
+
case 'p': return '\\n\\n' + childContent().trim() + '\\n\\n';
|
|
985
|
+
case 'br': return '\\n';
|
|
986
|
+
case 'hr': return '\\n\\n---\\n\\n';
|
|
987
|
+
|
|
988
|
+
// \u2500\u2500 Inline formatting \u2500\u2500
|
|
989
|
+
case 'strong': case 'b': {
|
|
990
|
+
const inner = childContent().trim();
|
|
991
|
+
return inner ? '**' + inner + '**' : '';
|
|
992
|
+
}
|
|
993
|
+
case 'em': case 'i': {
|
|
994
|
+
const inner = childContent().trim();
|
|
995
|
+
return inner ? '*' + inner + '*' : '';
|
|
996
|
+
}
|
|
997
|
+
case 's': case 'del': case 'strike': {
|
|
998
|
+
const inner = childContent().trim();
|
|
999
|
+
return inner ? '~~' + inner + '~~' : '';
|
|
1000
|
+
}
|
|
1001
|
+
case 'code': {
|
|
1002
|
+
if (inPre) return childContent();
|
|
1003
|
+
const inner = childContent();
|
|
1004
|
+
return inner ? '\\x60' + inner + '\\x60' : '';
|
|
1005
|
+
}
|
|
1006
|
+
|
|
1007
|
+
// \u2500\u2500 Code blocks \u2500\u2500
|
|
1008
|
+
case 'pre': {
|
|
1009
|
+
inPre = true;
|
|
1010
|
+
const inner = childContent();
|
|
1011
|
+
inPre = false;
|
|
1012
|
+
const lang = el.querySelector('code')?.className?.match(/language-(\\w+)/)?.[1] || '';
|
|
1013
|
+
return '\\n\\n\\x60\\x60\\x60' + lang + '\\n' + inner.trim() + '\\n\\x60\\x60\\x60\\n\\n';
|
|
1014
|
+
}
|
|
1015
|
+
|
|
1016
|
+
// \u2500\u2500 Links \u2500\u2500
|
|
1017
|
+
case 'a': {
|
|
1018
|
+
const href = resolveUrl(el.getAttribute('href') || '');
|
|
1019
|
+
const inner = childContent().trim();
|
|
1020
|
+
const name = inner || el.getAttribute('aria-label') || el.getAttribute('title') || '';
|
|
1021
|
+
if (!name) return '';
|
|
1022
|
+
if (!href || href === '#' || href.startsWith('javascript:')) return name;
|
|
1023
|
+
return '[' + name + '](' + href + ')';
|
|
1024
|
+
}
|
|
1025
|
+
|
|
1026
|
+
// \u2500\u2500 Images \u2500\u2500
|
|
1027
|
+
case 'img': {
|
|
1028
|
+
const alt = el.getAttribute('alt') || '';
|
|
1029
|
+
const src = resolveUrl(el.getAttribute('src') || '');
|
|
1030
|
+
return src ? '' : '';
|
|
1031
|
+
}
|
|
1032
|
+
|
|
1033
|
+
// \u2500\u2500 Lists \u2500\u2500
|
|
1034
|
+
case 'ul': {
|
|
1035
|
+
listDepth++;
|
|
1036
|
+
orderedCounters.push(0);
|
|
1037
|
+
const inner = childContent();
|
|
1038
|
+
listDepth--;
|
|
1039
|
+
orderedCounters.pop();
|
|
1040
|
+
return '\\n' + inner;
|
|
1041
|
+
}
|
|
1042
|
+
case 'ol': {
|
|
1043
|
+
listDepth++;
|
|
1044
|
+
orderedCounters.push(0);
|
|
1045
|
+
const inner = childContent();
|
|
1046
|
+
listDepth--;
|
|
1047
|
+
orderedCounters.pop();
|
|
1048
|
+
return '\\n' + inner;
|
|
1049
|
+
}
|
|
1050
|
+
case 'li': {
|
|
1051
|
+
const parent = el.parentElement?.tagName?.toLowerCase();
|
|
1052
|
+
const isOrdered = parent === 'ol';
|
|
1053
|
+
const inner = childContent().trim();
|
|
1054
|
+
if (!inner) return '';
|
|
1055
|
+
if (isOrdered) {
|
|
1056
|
+
const counter = orderedCounters.length > 0
|
|
1057
|
+
? ++orderedCounters[orderedCounters.length - 1] : 1;
|
|
1058
|
+
return listIndent() + counter + '. ' + inner + '\\n';
|
|
1059
|
+
}
|
|
1060
|
+
return listIndent() + '- ' + inner + '\\n';
|
|
1061
|
+
}
|
|
1062
|
+
|
|
1063
|
+
// \u2500\u2500 Blockquote \u2500\u2500
|
|
1064
|
+
case 'blockquote': {
|
|
1065
|
+
const inner = childContent().trim();
|
|
1066
|
+
if (!inner) return '';
|
|
1067
|
+
return '\\n\\n' + inner.split('\\n').map(line => '> ' + line).join('\\n') + '\\n\\n';
|
|
1068
|
+
}
|
|
1069
|
+
|
|
1070
|
+
// \u2500\u2500 Tables \u2500\u2500
|
|
1071
|
+
case 'table': {
|
|
1072
|
+
inTable = true;
|
|
1073
|
+
let out = '\\n\\n';
|
|
1074
|
+
const rows = el.querySelectorAll('tr');
|
|
1075
|
+
let headerDone = false;
|
|
1076
|
+
|
|
1077
|
+
for (let i = 0; i < rows.length; i++) {
|
|
1078
|
+
const cells = rows[i].querySelectorAll('th, td');
|
|
1079
|
+
const isHeader = rows[i].querySelector('th') !== null;
|
|
1080
|
+
const cellTexts = [];
|
|
1081
|
+
for (const cell of cells) {
|
|
1082
|
+
let cellText = '';
|
|
1083
|
+
for (const c of cell.childNodes) cellText += walk(c);
|
|
1084
|
+
cellTexts.push(cellText.trim().replace(/\\|/g, '\\\\|').replace(/\\n/g, ' '));
|
|
1085
|
+
}
|
|
1086
|
+
|
|
1087
|
+
out += '| ' + cellTexts.join(' | ') + ' |\\n';
|
|
1088
|
+
|
|
1089
|
+
if (isHeader && !headerDone) {
|
|
1090
|
+
out += '| ' + cellTexts.map(() => '---').join(' | ') + ' |\\n';
|
|
1091
|
+
headerDone = true;
|
|
1092
|
+
}
|
|
1093
|
+
|
|
1094
|
+
// First data row without headers \u2014 synthesize separator
|
|
1095
|
+
if (i === 0 && !isHeader && !headerDone) {
|
|
1096
|
+
out += '| ' + cellTexts.map(() => '---').join(' | ') + ' |\\n';
|
|
1097
|
+
headerDone = true;
|
|
1098
|
+
}
|
|
1099
|
+
}
|
|
1100
|
+
|
|
1101
|
+
inTable = false;
|
|
1102
|
+
return out + '\\n';
|
|
1103
|
+
}
|
|
1104
|
+
case 'thead': case 'tbody': case 'tfoot':
|
|
1105
|
+
return childContent();
|
|
1106
|
+
case 'tr': case 'td': case 'th':
|
|
1107
|
+
// Handled by table walker above; fallback for orphaned elements
|
|
1108
|
+
return childContent();
|
|
1109
|
+
|
|
1110
|
+
// \u2500\u2500 Definition lists \u2500\u2500
|
|
1111
|
+
case 'dl': return '\\n\\n' + childContent() + '\\n\\n';
|
|
1112
|
+
case 'dt': return '\\n**' + childContent().trim() + '**\\n';
|
|
1113
|
+
case 'dd': return ': ' + childContent().trim() + '\\n';
|
|
1114
|
+
|
|
1115
|
+
// \u2500\u2500 Figure \u2500\u2500
|
|
1116
|
+
case 'figure': return '\\n\\n' + childContent().trim() + '\\n\\n';
|
|
1117
|
+
case 'figcaption': return '\\n*' + childContent().trim() + '*\\n';
|
|
1118
|
+
|
|
1119
|
+
// \u2500\u2500 Details/Summary \u2500\u2500
|
|
1120
|
+
case 'details': return '\\n\\n' + childContent() + '\\n\\n';
|
|
1121
|
+
case 'summary': return '**' + childContent().trim() + '**\\n\\n';
|
|
1122
|
+
|
|
1123
|
+
// \u2500\u2500 Generic blocks \u2500\u2500
|
|
1124
|
+
case 'div': case 'section': case 'article': case 'main': case 'aside':
|
|
1125
|
+
case 'header': case 'footer': case 'nav':
|
|
1126
|
+
return '\\n' + childContent() + '\\n';
|
|
1127
|
+
|
|
1128
|
+
case 'span': case 'small': case 'sub': case 'sup': case 'abbr':
|
|
1129
|
+
case 'time': case 'mark': case 'cite': case 'q':
|
|
1130
|
+
return childContent();
|
|
1131
|
+
|
|
1132
|
+
default:
|
|
1133
|
+
return childContent();
|
|
1134
|
+
}
|
|
1135
|
+
}
|
|
1136
|
+
|
|
1137
|
+
const raw = walk(document.body);
|
|
1138
|
+
// Clean up: collapse 3+ newlines to 2, trim
|
|
1139
|
+
return raw.replace(/\\n{3,}/g, '\\n\\n').replace(/^\\n+|\\n+$/g, '').trim();
|
|
1140
|
+
})()
|
|
1141
|
+
`;
|
|
1142
|
+
|
|
1143
|
+
// src/browser/dom/form-state.ts
|
|
1144
|
+
var FORM_STATE_SCRIPT = `
|
|
1145
|
+
(() => {
|
|
1146
|
+
function extractField(el) {
|
|
1147
|
+
const tag = el.tagName.toLowerCase();
|
|
1148
|
+
const type = (el.getAttribute('type') || tag).toLowerCase();
|
|
1149
|
+
|
|
1150
|
+
// Skip non-user-facing inputs
|
|
1151
|
+
if (['hidden', 'submit', 'button', 'reset', 'image'].includes(type)) return null;
|
|
1152
|
+
|
|
1153
|
+
const name = el.name || el.id || '';
|
|
1154
|
+
|
|
1155
|
+
// Find label via multiple strategies
|
|
1156
|
+
const label =
|
|
1157
|
+
el.getAttribute('aria-label') ||
|
|
1158
|
+
(el.id ? document.querySelector('label[for="' + el.id + '"]')?.textContent?.trim() : null) ||
|
|
1159
|
+
el.closest('label')?.textContent?.trim() ||
|
|
1160
|
+
el.placeholder ||
|
|
1161
|
+
'';
|
|
1162
|
+
|
|
1163
|
+
// Extract value based on type
|
|
1164
|
+
let value;
|
|
1165
|
+
if (tag === 'select') {
|
|
1166
|
+
const selected = el.options[el.selectedIndex];
|
|
1167
|
+
value = selected ? selected.textContent.trim() : '';
|
|
1168
|
+
} else if (type === 'checkbox' || type === 'radio') {
|
|
1169
|
+
value = el.checked;
|
|
1170
|
+
} else if (type === 'password') {
|
|
1171
|
+
value = el.value ? '\u2022\u2022\u2022\u2022' : '';
|
|
1172
|
+
} else if (el.isContentEditable) {
|
|
1173
|
+
value = el.textContent?.trim()?.slice(0, 200) || '';
|
|
1174
|
+
} else {
|
|
1175
|
+
value = el.value || '';
|
|
1176
|
+
}
|
|
1177
|
+
|
|
1178
|
+
return {
|
|
1179
|
+
tag,
|
|
1180
|
+
type,
|
|
1181
|
+
name,
|
|
1182
|
+
label: label.slice(0, 80),
|
|
1183
|
+
value: typeof value === 'string' ? value.slice(0, 200) : value,
|
|
1184
|
+
required: !!el.required,
|
|
1185
|
+
disabled: !!el.disabled,
|
|
1186
|
+
ref: el.dataset?.ref || null,
|
|
1187
|
+
};
|
|
1188
|
+
}
|
|
1189
|
+
|
|
1190
|
+
const result = { forms: [], orphanFields: [] };
|
|
1191
|
+
|
|
1192
|
+
// Collect forms
|
|
1193
|
+
for (const form of document.forms) {
|
|
1194
|
+
const fields = [];
|
|
1195
|
+
for (const el of form.elements) {
|
|
1196
|
+
const field = extractField(el);
|
|
1197
|
+
if (field) fields.push(field);
|
|
1198
|
+
}
|
|
1199
|
+
result.forms.push({
|
|
1200
|
+
id: form.id || '',
|
|
1201
|
+
name: form.name || '',
|
|
1202
|
+
action: form.action || '',
|
|
1203
|
+
method: (form.method || 'get').toUpperCase(),
|
|
1204
|
+
fields,
|
|
1205
|
+
});
|
|
1206
|
+
}
|
|
1207
|
+
|
|
1208
|
+
// Collect orphan fields (not in a <form>)
|
|
1209
|
+
const allInputs = document.querySelectorAll(
|
|
1210
|
+
'input, textarea, select, [contenteditable="true"]'
|
|
1211
|
+
);
|
|
1212
|
+
for (const el of allInputs) {
|
|
1213
|
+
if (!el.form) {
|
|
1214
|
+
const field = extractField(el);
|
|
1215
|
+
if (field) result.orphanFields.push(field);
|
|
1216
|
+
}
|
|
1217
|
+
}
|
|
1218
|
+
|
|
1219
|
+
return result;
|
|
1220
|
+
})()
|
|
1221
|
+
`;
|
|
1222
|
+
|
|
1223
|
+
// src/browser/interceptor.ts
|
|
1224
|
+
function buildInterceptorScript(pattern) {
|
|
1225
|
+
return `
|
|
1226
|
+
(() => {
|
|
1227
|
+
if (window.__lobster_interceptor__) return;
|
|
1228
|
+
window.__lobster_interceptor__ = { requests: [] };
|
|
1229
|
+
const store = window.__lobster_interceptor__;
|
|
1230
|
+
const pattern = ${JSON.stringify(pattern)};
|
|
1231
|
+
|
|
1232
|
+
// Patch fetch
|
|
1233
|
+
const origFetch = window.fetch;
|
|
1234
|
+
window.fetch = async function(...args) {
|
|
1235
|
+
const url = typeof args[0] === 'string' ? args[0] : args[0]?.url || '';
|
|
1236
|
+
const resp = await origFetch.apply(this, args);
|
|
1237
|
+
if (url.includes(pattern)) {
|
|
1238
|
+
const clone = resp.clone();
|
|
1239
|
+
try {
|
|
1240
|
+
const body = await clone.json();
|
|
1241
|
+
store.requests.push({ url, method: 'GET', status: resp.status, body, timestamp: Date.now() });
|
|
1242
|
+
} catch {}
|
|
1243
|
+
}
|
|
1244
|
+
return resp;
|
|
1245
|
+
};
|
|
1246
|
+
|
|
1247
|
+
// Patch XHR
|
|
1248
|
+
const origOpen = XMLHttpRequest.prototype.open;
|
|
1249
|
+
const origSend = XMLHttpRequest.prototype.send;
|
|
1250
|
+
XMLHttpRequest.prototype.open = function(method, url, ...rest) {
|
|
1251
|
+
this.__url = url;
|
|
1252
|
+
this.__method = method;
|
|
1253
|
+
return origOpen.call(this, method, url, ...rest);
|
|
1254
|
+
};
|
|
1255
|
+
XMLHttpRequest.prototype.send = function(...args) {
|
|
1256
|
+
this.addEventListener('load', function() {
|
|
1257
|
+
if (this.__url && this.__url.includes(pattern)) {
|
|
1258
|
+
try {
|
|
1259
|
+
const body = JSON.parse(this.responseText);
|
|
1260
|
+
store.requests.push({ url: this.__url, method: this.__method, status: this.status, body, timestamp: Date.now() });
|
|
1261
|
+
} catch {}
|
|
1262
|
+
}
|
|
1263
|
+
});
|
|
1264
|
+
return origSend.apply(this, args);
|
|
1265
|
+
};
|
|
1266
|
+
})()
|
|
1267
|
+
`;
|
|
1268
|
+
}
|
|
1269
|
+
var GET_INTERCEPTED_SCRIPT = `
|
|
1270
|
+
(() => {
|
|
1271
|
+
const store = window.__lobster_interceptor__;
|
|
1272
|
+
if (!store) return [];
|
|
1273
|
+
const reqs = [...store.requests];
|
|
1274
|
+
store.requests = [];
|
|
1275
|
+
return reqs;
|
|
1276
|
+
})()
|
|
1277
|
+
`;
|
|
1278
|
+
|
|
1279
|
+
// src/browser/page-adapter.ts
|
|
1280
|
+
var PuppeteerPage = class {
|
|
1281
|
+
page;
|
|
1282
|
+
constructor(page) {
|
|
1283
|
+
this.page = page;
|
|
1284
|
+
}
|
|
1285
|
+
get raw() {
|
|
1286
|
+
return this.page;
|
|
1287
|
+
}
|
|
1288
|
+
async goto(url, options) {
|
|
1289
|
+
await this.page.goto(url, {
|
|
1290
|
+
waitUntil: options?.waitUntil || "networkidle2",
|
|
1291
|
+
timeout: options?.timeout || 3e4
|
|
1292
|
+
});
|
|
1293
|
+
}
|
|
1294
|
+
async goBack() {
|
|
1295
|
+
await this.page.goBack({ waitUntil: "networkidle2" });
|
|
1296
|
+
}
|
|
1297
|
+
async url() {
|
|
1298
|
+
return this.page.url();
|
|
1299
|
+
}
|
|
1300
|
+
async title() {
|
|
1301
|
+
return this.page.title();
|
|
1302
|
+
}
|
|
1303
|
+
async evaluate(js) {
|
|
1304
|
+
return this.page.evaluate(js);
|
|
1305
|
+
}
|
|
1306
|
+
async snapshot(_opts) {
|
|
1307
|
+
return this.page.evaluate(SNAPSHOT_SCRIPT);
|
|
1308
|
+
}
|
|
1309
|
+
async semanticTree(_opts) {
|
|
1310
|
+
return this.page.evaluate(SEMANTIC_TREE_SCRIPT);
|
|
1311
|
+
}
|
|
1312
|
+
async flatTree() {
|
|
1313
|
+
const raw = await this.page.evaluate(FLAT_TREE_SCRIPT);
|
|
1314
|
+
return raw;
|
|
1315
|
+
}
|
|
1316
|
+
async markdown() {
|
|
1317
|
+
return this.page.evaluate(MARKDOWN_SCRIPT);
|
|
1318
|
+
}
|
|
1319
|
+
async browserState() {
|
|
1320
|
+
const state = await this.page.evaluate(`
|
|
1321
|
+
(() => {
|
|
1322
|
+
const scrollY = window.scrollY;
|
|
1323
|
+
const scrollX = window.scrollX;
|
|
1324
|
+
const vpW = window.innerWidth;
|
|
1325
|
+
const vpH = window.innerHeight;
|
|
1326
|
+
const pageW = document.documentElement.scrollWidth;
|
|
1327
|
+
const pageH = document.documentElement.scrollHeight;
|
|
1328
|
+
const maxScrollY = pageH - vpH;
|
|
1329
|
+
return {
|
|
1330
|
+
url: location.href,
|
|
1331
|
+
title: document.title,
|
|
1332
|
+
viewportWidth: vpW,
|
|
1333
|
+
viewportHeight: vpH,
|
|
1334
|
+
pageWidth: pageW,
|
|
1335
|
+
pageHeight: pageH,
|
|
1336
|
+
scrollX: scrollX,
|
|
1337
|
+
scrollY: scrollY,
|
|
1338
|
+
scrollPercent: maxScrollY > 0 ? Math.round((scrollY / maxScrollY) * 100) : 0,
|
|
1339
|
+
pixelsAbove: Math.round(scrollY),
|
|
1340
|
+
pixelsBelow: Math.round(Math.max(0, maxScrollY - scrollY)),
|
|
1341
|
+
};
|
|
1342
|
+
})()
|
|
1343
|
+
`);
|
|
1344
|
+
return state;
|
|
1345
|
+
}
|
|
1346
|
+
async formState() {
|
|
1347
|
+
return this.page.evaluate(FORM_STATE_SCRIPT);
|
|
1348
|
+
}
|
|
1349
|
+
async click(ref) {
|
|
1350
|
+
if (typeof ref === "number") {
|
|
1351
|
+
await this.page.evaluate((idx) => {
|
|
1352
|
+
const el = document.querySelector('[data-ref="' + idx + '"]');
|
|
1353
|
+
if (!el) throw new Error("Element with index " + idx + " not found");
|
|
1354
|
+
const prev = document.activeElement;
|
|
1355
|
+
if (prev && prev !== el && prev !== document.body) {
|
|
1356
|
+
prev.blur();
|
|
1357
|
+
prev.dispatchEvent(new MouseEvent("mouseout", { bubbles: true, cancelable: true }));
|
|
1358
|
+
prev.dispatchEvent(new MouseEvent("mouseleave", { bubbles: false, cancelable: true }));
|
|
1359
|
+
}
|
|
1360
|
+
if (typeof el.scrollIntoViewIfNeeded === "function") {
|
|
1361
|
+
el.scrollIntoViewIfNeeded();
|
|
1362
|
+
} else {
|
|
1363
|
+
el.scrollIntoView({ behavior: "auto", block: "center", inline: "nearest" });
|
|
1364
|
+
}
|
|
1365
|
+
el.dispatchEvent(new MouseEvent("mouseenter", { bubbles: true, cancelable: true }));
|
|
1366
|
+
el.dispatchEvent(new MouseEvent("mouseover", { bubbles: true, cancelable: true }));
|
|
1367
|
+
el.dispatchEvent(new MouseEvent("mousedown", { bubbles: true, cancelable: true }));
|
|
1368
|
+
el.focus();
|
|
1369
|
+
el.dispatchEvent(new MouseEvent("mouseup", { bubbles: true, cancelable: true }));
|
|
1370
|
+
el.dispatchEvent(new MouseEvent("click", { bubbles: true, cancelable: true }));
|
|
1371
|
+
}, ref);
|
|
1372
|
+
await new Promise((r) => setTimeout(r, 200));
|
|
1373
|
+
} else {
|
|
1374
|
+
await this.page.click(ref);
|
|
1375
|
+
}
|
|
1376
|
+
}
|
|
1377
|
+
async typeText(ref, text) {
|
|
1378
|
+
if (typeof ref === "number") {
|
|
1379
|
+
await this.click(ref);
|
|
1380
|
+
await this.page.evaluate((idx, txt) => {
|
|
1381
|
+
const el = document.querySelector('[data-ref="' + idx + '"]');
|
|
1382
|
+
if (!el) throw new Error("Element with index " + idx + " not found");
|
|
1383
|
+
const isInput = el.tagName === "INPUT" || el.tagName === "TEXTAREA";
|
|
1384
|
+
const isContentEditable = el.isContentEditable;
|
|
1385
|
+
if (isContentEditable) {
|
|
1386
|
+
if (el.dispatchEvent(new InputEvent("beforeinput", {
|
|
1387
|
+
bubbles: true,
|
|
1388
|
+
cancelable: true,
|
|
1389
|
+
inputType: "deleteContent"
|
|
1390
|
+
}))) {
|
|
1391
|
+
el.innerText = "";
|
|
1392
|
+
el.dispatchEvent(new InputEvent("input", {
|
|
1393
|
+
bubbles: true,
|
|
1394
|
+
inputType: "deleteContent"
|
|
1395
|
+
}));
|
|
1396
|
+
}
|
|
1397
|
+
if (el.dispatchEvent(new InputEvent("beforeinput", {
|
|
1398
|
+
bubbles: true,
|
|
1399
|
+
cancelable: true,
|
|
1400
|
+
inputType: "insertText",
|
|
1401
|
+
data: txt
|
|
1402
|
+
}))) {
|
|
1403
|
+
el.innerText = txt;
|
|
1404
|
+
el.dispatchEvent(new InputEvent("input", {
|
|
1405
|
+
bubbles: true,
|
|
1406
|
+
inputType: "insertText",
|
|
1407
|
+
data: txt
|
|
1408
|
+
}));
|
|
1409
|
+
}
|
|
1410
|
+
const planAOk = el.innerText.trim() === txt.trim();
|
|
1411
|
+
if (!planAOk) {
|
|
1412
|
+
el.focus();
|
|
1413
|
+
const doc = el.ownerDocument;
|
|
1414
|
+
const sel = (doc.defaultView || window).getSelection();
|
|
1415
|
+
const range = doc.createRange();
|
|
1416
|
+
range.selectNodeContents(el);
|
|
1417
|
+
sel?.removeAllRanges();
|
|
1418
|
+
sel?.addRange(range);
|
|
1419
|
+
doc.execCommand("delete", false);
|
|
1420
|
+
doc.execCommand("insertText", false, txt);
|
|
1421
|
+
}
|
|
1422
|
+
el.dispatchEvent(new Event("change", { bubbles: true }));
|
|
1423
|
+
el.blur();
|
|
1424
|
+
} else if (isInput) {
|
|
1425
|
+
const inputEl = el;
|
|
1426
|
+
const proto = Object.getPrototypeOf(inputEl);
|
|
1427
|
+
const descriptor = Object.getOwnPropertyDescriptor(proto, "value") || Object.getOwnPropertyDescriptor(HTMLInputElement.prototype, "value") || Object.getOwnPropertyDescriptor(HTMLTextAreaElement.prototype, "value");
|
|
1428
|
+
if (descriptor?.set) {
|
|
1429
|
+
descriptor.set.call(inputEl, txt);
|
|
1430
|
+
} else {
|
|
1431
|
+
inputEl.value = txt;
|
|
1432
|
+
}
|
|
1433
|
+
inputEl.dispatchEvent(new Event("input", { bubbles: true }));
|
|
1434
|
+
inputEl.dispatchEvent(new Event("change", { bubbles: true }));
|
|
1435
|
+
} else {
|
|
1436
|
+
el.value = txt;
|
|
1437
|
+
el.dispatchEvent(new Event("input", { bubbles: true }));
|
|
1438
|
+
el.dispatchEvent(new Event("change", { bubbles: true }));
|
|
1439
|
+
}
|
|
1440
|
+
}, ref, text);
|
|
1441
|
+
} else {
|
|
1442
|
+
await this.page.click(ref, { count: 3 });
|
|
1443
|
+
await this.page.keyboard.type(text);
|
|
1444
|
+
}
|
|
1445
|
+
}
|
|
1446
|
+
async pressKey(key) {
|
|
1447
|
+
await this.page.keyboard.press(key);
|
|
1448
|
+
}
|
|
1449
|
+
async selectOption(ref, value) {
|
|
1450
|
+
const selector = typeof ref === "number" ? '[data-ref="' + ref + '"]' : ref;
|
|
1451
|
+
await this.page.select(selector, value);
|
|
1452
|
+
}
|
|
1453
|
+
async scroll(direction, amount) {
|
|
1454
|
+
const distance = amount || 500;
|
|
1455
|
+
const isVertical = direction === "up" || direction === "down";
|
|
1456
|
+
const positive = direction === "down" || direction === "right";
|
|
1457
|
+
const delta = positive ? distance : -distance;
|
|
1458
|
+
await this.page.evaluate((dy, dx, isVert) => {
|
|
1459
|
+
const canScroll = (el2) => {
|
|
1460
|
+
if (!el2) return false;
|
|
1461
|
+
const s = getComputedStyle(el2);
|
|
1462
|
+
if (isVert) {
|
|
1463
|
+
return /(auto|scroll|overlay)/.test(s.overflowY) && el2.scrollHeight > el2.clientHeight && el2.clientHeight >= window.innerHeight * 0.3;
|
|
1464
|
+
} else {
|
|
1465
|
+
return /(auto|scroll|overlay)/.test(s.overflowX) && el2.scrollWidth > el2.clientWidth && el2.clientWidth >= window.innerWidth * 0.3;
|
|
1466
|
+
}
|
|
1467
|
+
};
|
|
1468
|
+
let el = document.activeElement;
|
|
1469
|
+
while (el && !canScroll(el) && el !== document.body) {
|
|
1470
|
+
el = el.parentElement;
|
|
1471
|
+
}
|
|
1472
|
+
if (!canScroll(el)) {
|
|
1473
|
+
el = Array.from(document.querySelectorAll("*")).find(canScroll) || null;
|
|
1474
|
+
}
|
|
1475
|
+
const isPageLevel = !el || el === document.body || el === document.documentElement || el === document.scrollingElement;
|
|
1476
|
+
if (isPageLevel) {
|
|
1477
|
+
if (isVert) {
|
|
1478
|
+
window.scrollBy(0, dy);
|
|
1479
|
+
} else {
|
|
1480
|
+
window.scrollBy(dx, 0);
|
|
1481
|
+
}
|
|
1482
|
+
} else {
|
|
1483
|
+
if (isVert) {
|
|
1484
|
+
el.scrollBy({ top: dy, behavior: "smooth" });
|
|
1485
|
+
} else {
|
|
1486
|
+
el.scrollBy({ left: dx, behavior: "smooth" });
|
|
1487
|
+
}
|
|
1488
|
+
}
|
|
1489
|
+
}, isVertical ? delta : 0, isVertical ? 0 : delta, isVertical);
|
|
1490
|
+
await new Promise((r) => setTimeout(r, 150));
|
|
1491
|
+
}
|
|
1492
|
+
async scrollToElement(ref) {
|
|
1493
|
+
const selector = typeof ref === "number" ? '[data-ref="' + ref + '"]' : ref;
|
|
1494
|
+
await this.page.evaluate((sel) => {
|
|
1495
|
+
const el = document.querySelector(sel);
|
|
1496
|
+
if (!el) return;
|
|
1497
|
+
if (typeof el.scrollIntoViewIfNeeded === "function") {
|
|
1498
|
+
el.scrollIntoViewIfNeeded();
|
|
1499
|
+
} else {
|
|
1500
|
+
el.scrollIntoView({ behavior: "auto", block: "center", inline: "nearest" });
|
|
1501
|
+
}
|
|
1502
|
+
}, selector);
|
|
1503
|
+
}
|
|
1504
|
+
async getCookies(opts) {
|
|
1505
|
+
const cookies = await this.page.cookies();
|
|
1506
|
+
const filtered = opts?.domain ? cookies.filter((c) => c.domain.includes(opts.domain)) : cookies;
|
|
1507
|
+
return filtered.map((c) => ({
|
|
1508
|
+
name: c.name,
|
|
1509
|
+
value: c.value,
|
|
1510
|
+
domain: c.domain,
|
|
1511
|
+
path: c.path,
|
|
1512
|
+
expires: c.expires,
|
|
1513
|
+
httpOnly: c.httpOnly,
|
|
1514
|
+
secure: c.secure,
|
|
1515
|
+
sameSite: c.sameSite
|
|
1516
|
+
}));
|
|
1517
|
+
}
|
|
1518
|
+
async wait(options) {
|
|
1519
|
+
if (typeof options === "number") {
|
|
1520
|
+
await new Promise((r) => setTimeout(r, options * 1e3));
|
|
1521
|
+
return;
|
|
1522
|
+
}
|
|
1523
|
+
if (options.time) {
|
|
1524
|
+
await new Promise((r) => setTimeout(r, options.time * 1e3));
|
|
1525
|
+
}
|
|
1526
|
+
if (options.text) {
|
|
1527
|
+
await this.page.waitForFunction(
|
|
1528
|
+
(t) => document.body.innerText.includes(t),
|
|
1529
|
+
{ timeout: options.timeout || 3e4 },
|
|
1530
|
+
options.text
|
|
1531
|
+
);
|
|
1532
|
+
}
|
|
1533
|
+
}
|
|
1534
|
+
async networkRequests(includeStatic) {
|
|
1535
|
+
const entries = await this.page.evaluate(`
|
|
1536
|
+
(() => {
|
|
1537
|
+
const entries = performance.getEntriesByType('resource');
|
|
1538
|
+
const staticTypes = new Set(['img', 'font', 'css', 'script', 'link']);
|
|
1539
|
+
const includeStatic = ${!!includeStatic};
|
|
1540
|
+
|
|
1541
|
+
return entries
|
|
1542
|
+
.filter(e => includeStatic || !staticTypes.has(e.initiatorType))
|
|
1543
|
+
.map(e => ({
|
|
1544
|
+
url: e.name,
|
|
1545
|
+
method: 'GET',
|
|
1546
|
+
status: 200,
|
|
1547
|
+
type: e.initiatorType || 'other',
|
|
1548
|
+
size: e.transferSize || e.encodedBodySize || 0,
|
|
1549
|
+
duration: Math.round(e.duration),
|
|
1550
|
+
}));
|
|
1551
|
+
})()
|
|
1552
|
+
`);
|
|
1553
|
+
return entries || [];
|
|
1554
|
+
}
|
|
1555
|
+
async installInterceptor(pattern) {
|
|
1556
|
+
await this.page.evaluate(buildInterceptorScript(pattern));
|
|
1557
|
+
}
|
|
1558
|
+
async getInterceptedRequests() {
|
|
1559
|
+
return this.page.evaluate(GET_INTERCEPTED_SCRIPT);
|
|
1560
|
+
}
|
|
1561
|
+
async screenshot(opts) {
|
|
1562
|
+
const result = await this.page.screenshot({
|
|
1563
|
+
type: opts?.format || "png",
|
|
1564
|
+
fullPage: opts?.fullPage ?? false
|
|
1565
|
+
});
|
|
1566
|
+
return Buffer.from(result);
|
|
1567
|
+
}
|
|
1568
|
+
async tabs() {
|
|
1569
|
+
const browser = this.page.browser();
|
|
1570
|
+
const pages = await browser.pages();
|
|
1571
|
+
return pages.map((p, i) => ({
|
|
1572
|
+
id: i,
|
|
1573
|
+
url: p.url(),
|
|
1574
|
+
title: "",
|
|
1575
|
+
active: p === this.page
|
|
1576
|
+
}));
|
|
1577
|
+
}
|
|
1578
|
+
async close() {
|
|
1579
|
+
await this.page.close();
|
|
1580
|
+
}
|
|
1581
|
+
};
|
|
1582
|
+
|
|
1583
|
+
// src/agent/core.ts
|
|
1584
|
+
import { readFileSync } from "fs";
|
|
1585
|
+
import { join, dirname } from "path";
|
|
1586
|
+
import { fileURLToPath } from "url";
|
|
1587
|
+
|
|
1588
|
+
// src/llm/errors.ts
|
|
1589
|
+
var InvokeError = class extends Error {
|
|
1590
|
+
type;
|
|
1591
|
+
retryable;
|
|
1592
|
+
rawError;
|
|
1593
|
+
rawResponse;
|
|
1594
|
+
constructor(type, message, opts) {
|
|
1595
|
+
super(message);
|
|
1596
|
+
this.name = "InvokeError";
|
|
1597
|
+
this.type = type;
|
|
1598
|
+
this.retryable = opts?.retryable ?? isRetryable(type);
|
|
1599
|
+
this.rawError = opts?.rawError;
|
|
1600
|
+
this.rawResponse = opts?.rawResponse;
|
|
1601
|
+
}
|
|
1602
|
+
};
|
|
1603
|
+
function isRetryable(type) {
|
|
1604
|
+
switch (type) {
|
|
1605
|
+
case "NETWORK_ERROR" /* NETWORK_ERROR */:
|
|
1606
|
+
case "RATE_LIMIT" /* RATE_LIMIT */:
|
|
1607
|
+
case "SERVER_ERROR" /* SERVER_ERROR */:
|
|
1608
|
+
case "NO_TOOL_CALL" /* NO_TOOL_CALL */:
|
|
1609
|
+
case "INVALID_TOOL_ARGS" /* INVALID_TOOL_ARGS */:
|
|
1610
|
+
case "TOOL_EXECUTION_ERROR" /* TOOL_EXECUTION_ERROR */:
|
|
1611
|
+
case "UNKNOWN" /* UNKNOWN */:
|
|
1612
|
+
return true;
|
|
1613
|
+
case "AUTH_ERROR" /* AUTH_ERROR */:
|
|
1614
|
+
case "CONTEXT_LENGTH" /* CONTEXT_LENGTH */:
|
|
1615
|
+
case "CONTENT_FILTER" /* CONTENT_FILTER */:
|
|
1616
|
+
return false;
|
|
1617
|
+
}
|
|
1618
|
+
}
|
|
1619
|
+
|
|
1620
|
+
// src/llm/openai-client.ts
|
|
1621
|
+
var OpenAIClient = class {
|
|
1622
|
+
config;
|
|
1623
|
+
constructor(config) {
|
|
1624
|
+
this.config = config;
|
|
1625
|
+
}
|
|
1626
|
+
/**
|
|
1627
|
+
* Build auth headers based on the provider.
|
|
1628
|
+
* - OpenAI/Gemini/Ollama: Bearer token
|
|
1629
|
+
* - Anthropic: x-api-key header + anthropic-version
|
|
1630
|
+
*/
|
|
1631
|
+
buildHeaders() {
|
|
1632
|
+
const headers = {
|
|
1633
|
+
"Content-Type": "application/json"
|
|
1634
|
+
};
|
|
1635
|
+
if (!this.config.apiKey) return headers;
|
|
1636
|
+
if (this.config.provider === "anthropic") {
|
|
1637
|
+
headers["x-api-key"] = this.config.apiKey;
|
|
1638
|
+
headers["anthropic-version"] = "2023-06-01";
|
|
1639
|
+
} else {
|
|
1640
|
+
headers["Authorization"] = `Bearer ${this.config.apiKey}`;
|
|
1641
|
+
}
|
|
1642
|
+
return headers;
|
|
1643
|
+
}
|
|
1644
|
+
/**
|
|
1645
|
+
* Build the request body based on provider.
|
|
1646
|
+
* Anthropic Messages API is different from OpenAI chat completions.
|
|
1647
|
+
*/
|
|
1648
|
+
buildBody(messages, tools, opts) {
|
|
1649
|
+
if (this.config.provider === "anthropic") {
|
|
1650
|
+
return this.buildAnthropicBody(messages, tools, opts);
|
|
1651
|
+
}
|
|
1652
|
+
const body = {
|
|
1653
|
+
model: this.config.model,
|
|
1654
|
+
messages,
|
|
1655
|
+
temperature: this.config.temperature ?? 0.1
|
|
1656
|
+
};
|
|
1657
|
+
if (tools && tools.length > 0) {
|
|
1658
|
+
body.tools = tools;
|
|
1659
|
+
body.parallel_tool_calls = false;
|
|
1660
|
+
if (opts?.toolChoice) {
|
|
1661
|
+
body.tool_choice = typeof opts.toolChoice === "string" ? opts.toolChoice : opts.toolChoice;
|
|
1662
|
+
}
|
|
1663
|
+
}
|
|
1664
|
+
return { url: `${this.config.baseURL}/chat/completions`, body };
|
|
1665
|
+
}
|
|
1666
|
+
/**
|
|
1667
|
+
* Build Anthropic Messages API request.
|
|
1668
|
+
* Converts OpenAI-style messages/tools to Anthropic format.
|
|
1669
|
+
*/
|
|
1670
|
+
buildAnthropicBody(messages, tools, opts) {
|
|
1671
|
+
let system;
|
|
1672
|
+
const anthropicMessages = [];
|
|
1673
|
+
for (const msg of messages) {
|
|
1674
|
+
if (msg.role === "system") {
|
|
1675
|
+
system = msg.content;
|
|
1676
|
+
} else {
|
|
1677
|
+
anthropicMessages.push({
|
|
1678
|
+
role: msg.role === "assistant" ? "assistant" : "user",
|
|
1679
|
+
content: msg.content
|
|
1680
|
+
});
|
|
1681
|
+
}
|
|
1682
|
+
}
|
|
1683
|
+
const body = {
|
|
1684
|
+
model: this.config.model,
|
|
1685
|
+
messages: anthropicMessages,
|
|
1686
|
+
max_tokens: 4096,
|
|
1687
|
+
temperature: this.config.temperature ?? 0.1
|
|
1688
|
+
};
|
|
1689
|
+
if (system) body.system = system;
|
|
1690
|
+
if (tools && tools.length > 0) {
|
|
1691
|
+
body.tools = tools.map((t) => {
|
|
1692
|
+
const fn = t.function;
|
|
1693
|
+
return {
|
|
1694
|
+
name: fn.name,
|
|
1695
|
+
description: fn.description,
|
|
1696
|
+
input_schema: fn.parameters
|
|
1697
|
+
};
|
|
1698
|
+
});
|
|
1699
|
+
if (opts?.toolChoice) {
|
|
1700
|
+
if (typeof opts.toolChoice === "string") {
|
|
1701
|
+
body.tool_choice = opts.toolChoice === "required" ? { type: "any" } : { type: opts.toolChoice };
|
|
1702
|
+
} else {
|
|
1703
|
+
body.tool_choice = { type: "tool", name: opts.toolChoice.function.name };
|
|
1704
|
+
}
|
|
1705
|
+
}
|
|
1706
|
+
}
|
|
1707
|
+
return { url: `${this.config.baseURL}/messages`, body };
|
|
1708
|
+
}
|
|
1709
|
+
/**
|
|
1710
|
+
* Parse Anthropic response into our unified format.
|
|
1711
|
+
*/
|
|
1712
|
+
parseAnthropicResponse(json) {
|
|
1713
|
+
const content = json.content;
|
|
1714
|
+
if (!content || !Array.isArray(content)) {
|
|
1715
|
+
throw new InvokeError("UNKNOWN" /* UNKNOWN */, "No content in Anthropic response", { rawResponse: json });
|
|
1716
|
+
}
|
|
1717
|
+
let textContent;
|
|
1718
|
+
const toolCalls = [];
|
|
1719
|
+
for (const block of content) {
|
|
1720
|
+
if (block.type === "text") {
|
|
1721
|
+
textContent = block.text;
|
|
1722
|
+
} else if (block.type === "tool_use") {
|
|
1723
|
+
toolCalls.push({
|
|
1724
|
+
id: block.id,
|
|
1725
|
+
type: "function",
|
|
1726
|
+
function: {
|
|
1727
|
+
name: block.name,
|
|
1728
|
+
arguments: JSON.stringify(block.input)
|
|
1729
|
+
}
|
|
1730
|
+
});
|
|
1731
|
+
}
|
|
1732
|
+
}
|
|
1733
|
+
const usage = json.usage;
|
|
1734
|
+
return {
|
|
1735
|
+
toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
|
|
1736
|
+
content: textContent,
|
|
1737
|
+
usage: usage ? {
|
|
1738
|
+
promptTokens: usage.input_tokens ?? 0,
|
|
1739
|
+
completionTokens: usage.output_tokens ?? 0,
|
|
1740
|
+
totalTokens: (usage.input_tokens ?? 0) + (usage.output_tokens ?? 0)
|
|
1741
|
+
} : void 0
|
|
1742
|
+
};
|
|
1743
|
+
}
|
|
1744
|
+
async chatCompletion(messages, tools, opts) {
|
|
1745
|
+
const { url, body } = this.buildBody(messages, tools, opts);
|
|
1746
|
+
const headers = this.buildHeaders();
|
|
1747
|
+
let response;
|
|
1748
|
+
try {
|
|
1749
|
+
response = await fetch(url, {
|
|
1750
|
+
method: "POST",
|
|
1751
|
+
headers,
|
|
1752
|
+
body: JSON.stringify(body)
|
|
1753
|
+
});
|
|
1754
|
+
} catch (err) {
|
|
1755
|
+
throw new InvokeError("NETWORK_ERROR" /* NETWORK_ERROR */, `Network error: ${err}`, { rawError: err });
|
|
1756
|
+
}
|
|
1757
|
+
if (!response.ok) {
|
|
1758
|
+
const text = await response.text().catch(() => "");
|
|
1759
|
+
if (response.status === 401) {
|
|
1760
|
+
throw new InvokeError("AUTH_ERROR" /* AUTH_ERROR */, `Authentication failed: ${text}`, { retryable: false, rawResponse: text });
|
|
1761
|
+
}
|
|
1762
|
+
if (response.status === 429) {
|
|
1763
|
+
throw new InvokeError("RATE_LIMIT" /* RATE_LIMIT */, `Rate limited: ${text}`, { rawResponse: text });
|
|
1764
|
+
}
|
|
1765
|
+
if (response.status >= 500) {
|
|
1766
|
+
throw new InvokeError("SERVER_ERROR" /* SERVER_ERROR */, `Server error ${response.status}: ${text}`, { rawResponse: text });
|
|
1767
|
+
}
|
|
1768
|
+
throw new InvokeError("UNKNOWN" /* UNKNOWN */, `HTTP ${response.status}: ${text}`, { rawResponse: text });
|
|
1769
|
+
}
|
|
1770
|
+
const json = await response.json();
|
|
1771
|
+
if (this.config.provider === "anthropic") {
|
|
1772
|
+
return this.parseAnthropicResponse(json);
|
|
1773
|
+
}
|
|
1774
|
+
const choice = json.choices?.[0];
|
|
1775
|
+
if (!choice) {
|
|
1776
|
+
throw new InvokeError("UNKNOWN" /* UNKNOWN */, "No choices in response", { rawResponse: json });
|
|
1777
|
+
}
|
|
1778
|
+
const message = choice.message;
|
|
1779
|
+
const finishReason = choice.finish_reason;
|
|
1780
|
+
if (finishReason === "content_filter") {
|
|
1781
|
+
throw new InvokeError("CONTENT_FILTER" /* CONTENT_FILTER */, "Content filtered", { retryable: false, rawResponse: json });
|
|
1782
|
+
}
|
|
1783
|
+
if (finishReason === "length") {
|
|
1784
|
+
throw new InvokeError("CONTEXT_LENGTH" /* CONTEXT_LENGTH */, "Context length exceeded", { retryable: false, rawResponse: json });
|
|
1785
|
+
}
|
|
1786
|
+
const usage = json.usage;
|
|
1787
|
+
return {
|
|
1788
|
+
toolCalls: message.tool_calls,
|
|
1789
|
+
content: message.content,
|
|
1790
|
+
usage: usage ? {
|
|
1791
|
+
promptTokens: usage.prompt_tokens ?? 0,
|
|
1792
|
+
completionTokens: usage.completion_tokens ?? 0,
|
|
1793
|
+
totalTokens: usage.total_tokens ?? 0
|
|
1794
|
+
} : void 0
|
|
1795
|
+
};
|
|
1796
|
+
}
|
|
1797
|
+
};
|
|
1798
|
+
|
|
1799
|
+
// src/llm/utils.ts
|
|
1800
|
+
function zodToJsonSchema(schema) {
|
|
1801
|
+
if ("_def" in schema) {
|
|
1802
|
+
const def = schema._def;
|
|
1803
|
+
const typeName = def.typeName;
|
|
1804
|
+
if (typeName === "ZodObject") {
|
|
1805
|
+
const shape = def.shape();
|
|
1806
|
+
const properties = {};
|
|
1807
|
+
const required = [];
|
|
1808
|
+
for (const [key, value] of Object.entries(shape)) {
|
|
1809
|
+
properties[key] = zodToJsonSchema(value);
|
|
1810
|
+
if (!(value._def?.typeName === "ZodOptional")) {
|
|
1811
|
+
required.push(key);
|
|
1812
|
+
}
|
|
1813
|
+
}
|
|
1814
|
+
const result = { type: "object", properties };
|
|
1815
|
+
if (required.length > 0) result.required = required;
|
|
1816
|
+
if (def.description) result.description = def.description;
|
|
1817
|
+
return result;
|
|
1818
|
+
}
|
|
1819
|
+
if (typeName === "ZodString") {
|
|
1820
|
+
const result = { type: "string" };
|
|
1821
|
+
if (def.description) result.description = def.description;
|
|
1822
|
+
return result;
|
|
1823
|
+
}
|
|
1824
|
+
if (typeName === "ZodNumber") {
|
|
1825
|
+
const result = { type: "number" };
|
|
1826
|
+
if (def.description) result.description = def.description;
|
|
1827
|
+
return result;
|
|
1828
|
+
}
|
|
1829
|
+
if (typeName === "ZodBoolean") {
|
|
1830
|
+
const result = { type: "boolean" };
|
|
1831
|
+
if (def.description) result.description = def.description;
|
|
1832
|
+
return result;
|
|
1833
|
+
}
|
|
1834
|
+
if (typeName === "ZodEnum") {
|
|
1835
|
+
return { type: "string", enum: def.values, ...def.description ? { description: def.description } : {} };
|
|
1836
|
+
}
|
|
1837
|
+
if (typeName === "ZodArray") {
|
|
1838
|
+
return { type: "array", items: zodToJsonSchema(def.type), ...def.description ? { description: def.description } : {} };
|
|
1839
|
+
}
|
|
1840
|
+
if (typeName === "ZodOptional") {
|
|
1841
|
+
return zodToJsonSchema(def.innerType);
|
|
1842
|
+
}
|
|
1843
|
+
if (typeName === "ZodDefault") {
|
|
1844
|
+
const inner = zodToJsonSchema(def.innerType);
|
|
1845
|
+
return { ...inner, default: def.defaultValue() };
|
|
1846
|
+
}
|
|
1847
|
+
if (typeName === "ZodUnion") {
|
|
1848
|
+
return { oneOf: def.options.map((opt) => zodToJsonSchema(opt)) };
|
|
1849
|
+
}
|
|
1850
|
+
if (typeName === "ZodRecord") {
|
|
1851
|
+
return { type: "object", additionalProperties: zodToJsonSchema(def.valueType) };
|
|
1852
|
+
}
|
|
1853
|
+
if (typeName === "ZodLiteral") {
|
|
1854
|
+
return { const: def.value };
|
|
1855
|
+
}
|
|
1856
|
+
if (typeName === "ZodAny") {
|
|
1857
|
+
return {};
|
|
1858
|
+
}
|
|
1859
|
+
}
|
|
1860
|
+
return { type: "string" };
|
|
1861
|
+
}
|
|
1862
|
+
function zodToOpenAITool(name, description, schema) {
|
|
1863
|
+
return {
|
|
1864
|
+
type: "function",
|
|
1865
|
+
function: {
|
|
1866
|
+
name,
|
|
1867
|
+
description,
|
|
1868
|
+
parameters: zodToJsonSchema(schema)
|
|
1869
|
+
}
|
|
1870
|
+
};
|
|
1871
|
+
}
|
|
1872
|
+
|
|
1873
|
+
// src/llm/client.ts
|
|
1874
|
+
var LLM = class {
|
|
1875
|
+
client;
|
|
1876
|
+
config;
|
|
1877
|
+
constructor(config) {
|
|
1878
|
+
this.config = config;
|
|
1879
|
+
this.client = new OpenAIClient({
|
|
1880
|
+
baseURL: config.baseURL,
|
|
1881
|
+
model: config.model,
|
|
1882
|
+
apiKey: config.apiKey,
|
|
1883
|
+
temperature: config.temperature,
|
|
1884
|
+
provider: config.provider
|
|
1885
|
+
});
|
|
1886
|
+
}
|
|
1887
|
+
async invoke(messages, tool, abortSignal) {
|
|
1888
|
+
const openaiTool = zodToOpenAITool(tool.name, tool.description, tool.schema);
|
|
1889
|
+
return this.withRetry(async () => {
|
|
1890
|
+
if (abortSignal?.aborted) throw new Error("Aborted");
|
|
1891
|
+
const response = await this.client.chatCompletion(
|
|
1892
|
+
messages,
|
|
1893
|
+
[openaiTool],
|
|
1894
|
+
{ toolChoice: { type: "function", function: { name: tool.name } } }
|
|
1895
|
+
);
|
|
1896
|
+
const toolCall = response.toolCalls?.[0];
|
|
1897
|
+
if (!toolCall) {
|
|
1898
|
+
if (response.content) {
|
|
1899
|
+
const extracted = extractJsonFromString(response.content);
|
|
1900
|
+
if (extracted) {
|
|
1901
|
+
const args2 = typeof extracted === "string" ? JSON.parse(extracted) : extracted;
|
|
1902
|
+
const result2 = await tool.execute(args2);
|
|
1903
|
+
return {
|
|
1904
|
+
toolCall: { name: tool.name, args: args2 },
|
|
1905
|
+
toolResult: result2,
|
|
1906
|
+
usage: response.usage
|
|
1907
|
+
};
|
|
1908
|
+
}
|
|
1909
|
+
}
|
|
1910
|
+
throw new InvokeError("NO_TOOL_CALL" /* NO_TOOL_CALL */, "No tool call in response");
|
|
1911
|
+
}
|
|
1912
|
+
let args;
|
|
1913
|
+
try {
|
|
1914
|
+
args = JSON.parse(toolCall.function.arguments);
|
|
1915
|
+
} catch {
|
|
1916
|
+
try {
|
|
1917
|
+
args = JSON.parse(JSON.parse(toolCall.function.arguments));
|
|
1918
|
+
} catch {
|
|
1919
|
+
throw new InvokeError("INVALID_TOOL_ARGS" /* INVALID_TOOL_ARGS */, `Invalid JSON in tool args: ${toolCall.function.arguments}`);
|
|
1920
|
+
}
|
|
1921
|
+
}
|
|
1922
|
+
let result;
|
|
1923
|
+
try {
|
|
1924
|
+
result = await tool.execute(args);
|
|
1925
|
+
} catch (err) {
|
|
1926
|
+
throw new InvokeError("TOOL_EXECUTION_ERROR" /* TOOL_EXECUTION_ERROR */, `Tool execution failed: ${err}`, { rawError: err });
|
|
1927
|
+
}
|
|
1928
|
+
return {
|
|
1929
|
+
toolCall: { name: tool.name, args },
|
|
1930
|
+
toolResult: result,
|
|
1931
|
+
usage: response.usage
|
|
1932
|
+
};
|
|
1933
|
+
});
|
|
1934
|
+
}
|
|
1935
|
+
async withRetry(fn) {
|
|
1936
|
+
const maxRetries = this.config.maxRetries ?? 3;
|
|
1937
|
+
let lastError;
|
|
1938
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
1939
|
+
try {
|
|
1940
|
+
return await fn();
|
|
1941
|
+
} catch (err) {
|
|
1942
|
+
lastError = err;
|
|
1943
|
+
if (err instanceof InvokeError && !err.retryable) throw err;
|
|
1944
|
+
if (err instanceof Error && err.name === "AbortError") throw err;
|
|
1945
|
+
if (attempt < maxRetries) {
|
|
1946
|
+
await new Promise((r) => setTimeout(r, 100 * (attempt + 1)));
|
|
1947
|
+
}
|
|
1948
|
+
}
|
|
1949
|
+
}
|
|
1950
|
+
throw lastError;
|
|
1951
|
+
}
|
|
1952
|
+
};
|
|
1953
|
+
function extractJsonFromString(str) {
|
|
1954
|
+
const start = str.indexOf("{");
|
|
1955
|
+
const end = str.lastIndexOf("}");
|
|
1956
|
+
if (start === -1 || end === -1 || end <= start) return null;
|
|
1957
|
+
try {
|
|
1958
|
+
return JSON.parse(str.slice(start, end + 1));
|
|
1959
|
+
} catch {
|
|
1960
|
+
return null;
|
|
1961
|
+
}
|
|
1962
|
+
}
|
|
1963
|
+
|
|
1964
|
+
// src/agent/tools/click.ts
|
|
1965
|
+
import { z } from "zod";
|
|
1966
|
+
function createClickTool(page) {
|
|
1967
|
+
return {
|
|
1968
|
+
description: "Click on an interactive element by its index number from the page content.",
|
|
1969
|
+
inputSchema: z.object({
|
|
1970
|
+
index: z.number().describe("The index of the element to click")
|
|
1971
|
+
}),
|
|
1972
|
+
execute: async (args) => {
|
|
1973
|
+
await page.click(args.index);
|
|
1974
|
+
return `Clicked element [${args.index}]`;
|
|
1975
|
+
}
|
|
1976
|
+
};
|
|
1977
|
+
}
|
|
1978
|
+
|
|
1979
|
+
// src/agent/tools/type.ts
|
|
1980
|
+
import { z as z2 } from "zod";
|
|
1981
|
+
function createTypeTool(page) {
|
|
1982
|
+
return {
|
|
1983
|
+
description: "Type text into an input field identified by its index number.",
|
|
1984
|
+
inputSchema: z2.object({
|
|
1985
|
+
index: z2.number().describe("The index of the input element"),
|
|
1986
|
+
text: z2.string().describe("The text to type")
|
|
1987
|
+
}),
|
|
1988
|
+
execute: async (args) => {
|
|
1989
|
+
await page.typeText(args.index, args.text);
|
|
1990
|
+
return `Typed "${args.text}" into element [${args.index}]`;
|
|
1991
|
+
}
|
|
1992
|
+
};
|
|
1993
|
+
}
|
|
1994
|
+
|
|
1995
|
+
// src/agent/tools/scroll.ts
|
|
1996
|
+
import { z as z3 } from "zod";
|
|
1997
|
+
function createScrollTool(page) {
|
|
1998
|
+
return {
|
|
1999
|
+
description: "Scroll the page in a given direction. Use to reveal more content.",
|
|
2000
|
+
inputSchema: z3.object({
|
|
2001
|
+
direction: z3.enum(["up", "down", "left", "right"]).describe("Scroll direction"),
|
|
2002
|
+
amount: z3.number().optional().describe("Pixels to scroll (default 500)")
|
|
2003
|
+
}),
|
|
2004
|
+
execute: async (args) => {
|
|
2005
|
+
await page.scroll(args.direction, args.amount);
|
|
2006
|
+
return `Scrolled ${args.direction}${args.amount ? ` ${args.amount}px` : ""}`;
|
|
2007
|
+
}
|
|
2008
|
+
};
|
|
2009
|
+
}
|
|
2010
|
+
|
|
2011
|
+
// src/agent/tools/select.ts
|
|
2012
|
+
import { z as z4 } from "zod";
|
|
2013
|
+
function createSelectTool(page) {
|
|
2014
|
+
return {
|
|
2015
|
+
description: "Select an option from a dropdown/select element by its index.",
|
|
2016
|
+
inputSchema: z4.object({
|
|
2017
|
+
index: z4.number().describe("The index of the select element"),
|
|
2018
|
+
value: z4.string().describe("The option text or value to select")
|
|
2019
|
+
}),
|
|
2020
|
+
execute: async (args) => {
|
|
2021
|
+
await page.selectOption(args.index, args.value);
|
|
2022
|
+
return `Selected "${args.value}" in element [${args.index}]`;
|
|
2023
|
+
}
|
|
2024
|
+
};
|
|
2025
|
+
}
|
|
2026
|
+
|
|
2027
|
+
// src/agent/tools/wait.ts
|
|
2028
|
+
import { z as z5 } from "zod";
|
|
2029
|
+
function createWaitTool() {
|
|
2030
|
+
return {
|
|
2031
|
+
description: "Wait for a specified number of seconds before continuing.",
|
|
2032
|
+
inputSchema: z5.object({
|
|
2033
|
+
seconds: z5.number().min(0.1).max(30).describe("Seconds to wait")
|
|
2034
|
+
}),
|
|
2035
|
+
execute: async (args) => {
|
|
2036
|
+
await new Promise((r) => setTimeout(r, args.seconds * 1e3));
|
|
2037
|
+
return `Waited ${args.seconds} seconds`;
|
|
2038
|
+
}
|
|
2039
|
+
};
|
|
2040
|
+
}
|
|
2041
|
+
|
|
2042
|
+
// src/agent/tools/done.ts
|
|
2043
|
+
import { z as z6 } from "zod";
|
|
2044
|
+
function createDoneTool() {
|
|
2045
|
+
return {
|
|
2046
|
+
description: "Signal that the task is complete. Call this when you have finished the task or cannot proceed further.",
|
|
2047
|
+
inputSchema: z6.object({
|
|
2048
|
+
success: z6.boolean().describe("Whether the task was completed successfully"),
|
|
2049
|
+
text: z6.string().describe("Summary of the result or explanation of failure")
|
|
2050
|
+
}),
|
|
2051
|
+
execute: async (args) => {
|
|
2052
|
+
return JSON.stringify({ done: true, success: args.success, text: args.text });
|
|
2053
|
+
}
|
|
2054
|
+
};
|
|
2055
|
+
}
|
|
2056
|
+
|
|
2057
|
+
// src/agent/tools/ask-user.ts
|
|
2058
|
+
import { z as z7 } from "zod";
|
|
2059
|
+
import { createInterface } from "readline";
|
|
2060
|
+
function createAskUserTool() {
|
|
2061
|
+
return {
|
|
2062
|
+
description: "Ask the user a question when you need clarification or input to proceed.",
|
|
2063
|
+
inputSchema: z7.object({
|
|
2064
|
+
question: z7.string().describe("The question to ask the user")
|
|
2065
|
+
}),
|
|
2066
|
+
execute: async (args) => {
|
|
2067
|
+
const rl = createInterface({ input: process.stdin, output: process.stdout });
|
|
2068
|
+
return new Promise((resolve) => {
|
|
2069
|
+
rl.question(`
|
|
2070
|
+
\u{1F916} Agent asks: ${args.question}
|
|
2071
|
+
> `, (answer) => {
|
|
2072
|
+
rl.close();
|
|
2073
|
+
resolve(`User answered: ${answer}`);
|
|
2074
|
+
});
|
|
2075
|
+
});
|
|
2076
|
+
}
|
|
2077
|
+
};
|
|
2078
|
+
}
|
|
2079
|
+
|
|
2080
|
+
// src/agent/tools/execute-js.ts
|
|
2081
|
+
import { z as z8 } from "zod";
|
|
2082
|
+
function createExecuteJsTool(page) {
|
|
2083
|
+
return {
|
|
2084
|
+
description: "Execute JavaScript code on the current page. Returns the result.",
|
|
2085
|
+
inputSchema: z8.object({
|
|
2086
|
+
code: z8.string().describe("JavaScript code to execute on the page")
|
|
2087
|
+
}),
|
|
2088
|
+
execute: async (args) => {
|
|
2089
|
+
const result = await page.evaluate(args.code);
|
|
2090
|
+
return typeof result === "string" ? result : JSON.stringify(result, null, 2);
|
|
2091
|
+
}
|
|
2092
|
+
};
|
|
2093
|
+
}
|
|
2094
|
+
|
|
2095
|
+
// src/agent/tools/index.ts
|
|
2096
|
+
function createDefaultTools(page) {
|
|
2097
|
+
return {
|
|
2098
|
+
click_element_by_index: createClickTool(page),
|
|
2099
|
+
input_text: createTypeTool(page),
|
|
2100
|
+
scroll: createScrollTool(page),
|
|
2101
|
+
select_dropdown_option: createSelectTool(page),
|
|
2102
|
+
wait: createWaitTool(),
|
|
2103
|
+
done: createDoneTool(),
|
|
2104
|
+
ask_user: createAskUserTool(),
|
|
2105
|
+
execute_javascript: createExecuteJsTool(page)
|
|
2106
|
+
};
|
|
2107
|
+
}
|
|
2108
|
+
|
|
2109
|
+
// src/agent/macro-tool.ts
|
|
2110
|
+
import { z as z9 } from "zod";
|
|
2111
|
+
|
|
2112
|
+
// src/agent/auto-fixer.ts
|
|
2113
|
+
function normalizeResponse(raw, toolName, availableActions, toolSchemas) {
|
|
2114
|
+
let result = { ...raw };
|
|
2115
|
+
if (result.type === "function" && result.function) {
|
|
2116
|
+
const fn = result.function;
|
|
2117
|
+
if (typeof fn.arguments === "string") {
|
|
2118
|
+
try {
|
|
2119
|
+
result = JSON.parse(fn.arguments);
|
|
2120
|
+
} catch {
|
|
2121
|
+
}
|
|
2122
|
+
} else if (typeof fn.arguments === "object") {
|
|
2123
|
+
result = fn.arguments;
|
|
2124
|
+
}
|
|
2125
|
+
}
|
|
2126
|
+
for (const [key, value] of Object.entries(result)) {
|
|
2127
|
+
if (typeof value === "string") {
|
|
2128
|
+
try {
|
|
2129
|
+
const parsed = JSON.parse(value);
|
|
2130
|
+
if (typeof parsed === "object" && parsed !== null) {
|
|
2131
|
+
result[key] = parsed;
|
|
2132
|
+
}
|
|
2133
|
+
} catch {
|
|
2134
|
+
}
|
|
2135
|
+
}
|
|
2136
|
+
}
|
|
2137
|
+
if (!result.action) {
|
|
2138
|
+
for (const actionName of availableActions) {
|
|
2139
|
+
if (actionName in result) {
|
|
2140
|
+
result = {
|
|
2141
|
+
...result,
|
|
2142
|
+
action: { [actionName]: result[actionName] }
|
|
2143
|
+
};
|
|
2144
|
+
delete result[actionName];
|
|
2145
|
+
break;
|
|
2146
|
+
}
|
|
2147
|
+
}
|
|
2148
|
+
}
|
|
2149
|
+
if (!result.action) {
|
|
2150
|
+
result.action = { wait: { seconds: 1 } };
|
|
2151
|
+
}
|
|
2152
|
+
if (typeof result.action === "string") {
|
|
2153
|
+
if (availableActions.includes(result.action)) {
|
|
2154
|
+
result.action = { [result.action]: {} };
|
|
2155
|
+
} else {
|
|
2156
|
+
result.action = { wait: { seconds: 1 } };
|
|
2157
|
+
}
|
|
2158
|
+
}
|
|
2159
|
+
const action = result.action;
|
|
2160
|
+
for (const [name, input] of Object.entries(action)) {
|
|
2161
|
+
if (typeof input !== "object" || input === null) {
|
|
2162
|
+
if (toolSchemas && toolSchemas[name]) {
|
|
2163
|
+
const schema = toolSchemas[name].inputSchema;
|
|
2164
|
+
const coerced = coercePrimitiveToSchema(input, schema);
|
|
2165
|
+
if (coerced !== null) {
|
|
2166
|
+
action[name] = coerced;
|
|
2167
|
+
continue;
|
|
2168
|
+
}
|
|
2169
|
+
}
|
|
2170
|
+
if (typeof input === "number") {
|
|
2171
|
+
action[name] = { index: input };
|
|
2172
|
+
} else if (typeof input === "string") {
|
|
2173
|
+
action[name] = { text: input };
|
|
2174
|
+
} else {
|
|
2175
|
+
action[name] = {};
|
|
2176
|
+
}
|
|
2177
|
+
}
|
|
2178
|
+
if (toolSchemas && toolSchemas[name] && typeof action[name] === "object") {
|
|
2179
|
+
const schema = toolSchemas[name].inputSchema;
|
|
2180
|
+
const validation = schema.safeParse(action[name]);
|
|
2181
|
+
if (!validation.success) {
|
|
2182
|
+
const fixed = attemptSchemaFix(action[name], schema, validation.error);
|
|
2183
|
+
if (fixed) {
|
|
2184
|
+
action[name] = fixed;
|
|
2185
|
+
}
|
|
2186
|
+
}
|
|
2187
|
+
}
|
|
2188
|
+
}
|
|
2189
|
+
return result;
|
|
2190
|
+
}
|
|
2191
|
+
function coercePrimitiveToSchema(value, schema) {
|
|
2192
|
+
try {
|
|
2193
|
+
const def = schema._def;
|
|
2194
|
+
if (def?.typeName !== "ZodObject") return null;
|
|
2195
|
+
const shape = def.shape();
|
|
2196
|
+
const keys = Object.keys(shape);
|
|
2197
|
+
const requiredKeys = keys.filter((k) => {
|
|
2198
|
+
const fieldDef = shape[k]?._def;
|
|
2199
|
+
return fieldDef?.typeName !== "ZodOptional";
|
|
2200
|
+
});
|
|
2201
|
+
if (requiredKeys.length === 1) {
|
|
2202
|
+
return { [requiredKeys[0]]: value };
|
|
2203
|
+
}
|
|
2204
|
+
const indexField = keys.find((k) => /index|idx|num|number/i.test(k));
|
|
2205
|
+
if (indexField && typeof value === "number") {
|
|
2206
|
+
return { [indexField]: value };
|
|
2207
|
+
}
|
|
2208
|
+
const textField = keys.find((k) => /text|value|query|code|question|url/i.test(k));
|
|
2209
|
+
if (textField && typeof value === "string") {
|
|
2210
|
+
return { [textField]: value };
|
|
2211
|
+
}
|
|
2212
|
+
} catch {
|
|
2213
|
+
}
|
|
2214
|
+
return null;
|
|
2215
|
+
}
|
|
2216
|
+
function attemptSchemaFix(input, schema, error) {
|
|
2217
|
+
try {
|
|
2218
|
+
const def = schema._def;
|
|
2219
|
+
if (def?.typeName !== "ZodObject") return null;
|
|
2220
|
+
const shape = def.shape();
|
|
2221
|
+
const expectedKeys = Object.keys(shape);
|
|
2222
|
+
const inputKeys = Object.keys(input);
|
|
2223
|
+
const fixed = { ...input };
|
|
2224
|
+
for (const issue of error.issues) {
|
|
2225
|
+
if (issue.code === "invalid_type" && issue.path.length === 1) {
|
|
2226
|
+
const key = String(issue.path[0]);
|
|
2227
|
+
const val = input[key];
|
|
2228
|
+
if (issue.expected === "number" && typeof val === "string") {
|
|
2229
|
+
const num = Number(val);
|
|
2230
|
+
if (!isNaN(num)) fixed[key] = num;
|
|
2231
|
+
} else if (issue.expected === "string" && typeof val === "number") {
|
|
2232
|
+
fixed[key] = String(val);
|
|
2233
|
+
} else if (issue.expected === "boolean" && typeof val === "string") {
|
|
2234
|
+
fixed[key] = val === "true";
|
|
2235
|
+
}
|
|
2236
|
+
}
|
|
2237
|
+
if (issue.code === "unrecognized_keys") {
|
|
2238
|
+
for (const k of issue.keys || []) {
|
|
2239
|
+
delete fixed[k];
|
|
2240
|
+
}
|
|
2241
|
+
}
|
|
2242
|
+
}
|
|
2243
|
+
const result = schema.safeParse(fixed);
|
|
2244
|
+
if (result.success) return fixed;
|
|
2245
|
+
} catch {
|
|
2246
|
+
}
|
|
2247
|
+
return null;
|
|
2248
|
+
}
|
|
2249
|
+
|
|
2250
|
+
// src/agent/macro-tool.ts
|
|
2251
|
+
function packMacroTool(tools) {
|
|
2252
|
+
const actionSchemas = [];
|
|
2253
|
+
const toolNames = [];
|
|
2254
|
+
for (const [name, tool] of Object.entries(tools)) {
|
|
2255
|
+
toolNames.push(name);
|
|
2256
|
+
actionSchemas.push(
|
|
2257
|
+
z9.object({ [name]: tool.inputSchema }).describe(tool.description)
|
|
2258
|
+
);
|
|
2259
|
+
}
|
|
2260
|
+
const actionSchema = actionSchemas.length === 1 ? actionSchemas[0] : z9.union(actionSchemas);
|
|
2261
|
+
const macroSchema = z9.object({
|
|
2262
|
+
evaluation_previous_goal: z9.string().optional().describe("Evaluate whether the previous goal was achieved"),
|
|
2263
|
+
memory: z9.string().optional().describe("Important information to remember for future steps"),
|
|
2264
|
+
next_goal: z9.string().optional().describe("The next immediate goal to achieve"),
|
|
2265
|
+
action: actionSchema.describe("The action to take")
|
|
2266
|
+
});
|
|
2267
|
+
return {
|
|
2268
|
+
name: "AgentOutput",
|
|
2269
|
+
description: "The agent's output containing reflection and action. Must be called every step.",
|
|
2270
|
+
schema: macroSchema,
|
|
2271
|
+
execute: async (args) => {
|
|
2272
|
+
const normalized = normalizeResponse(args, "AgentOutput", toolNames, tools);
|
|
2273
|
+
const action = normalized.action;
|
|
2274
|
+
const [toolName, toolInput] = Object.entries(action)[0];
|
|
2275
|
+
const tool = tools[toolName];
|
|
2276
|
+
if (!tool) {
|
|
2277
|
+
return `Error: Unknown tool "${toolName}". Available: ${toolNames.join(", ")}`;
|
|
2278
|
+
}
|
|
2279
|
+
try {
|
|
2280
|
+
const result = await tool.execute(toolInput);
|
|
2281
|
+
return result;
|
|
2282
|
+
} catch (err) {
|
|
2283
|
+
return `Error executing ${toolName}: ${err}`;
|
|
2284
|
+
}
|
|
2285
|
+
}
|
|
2286
|
+
};
|
|
2287
|
+
}
|
|
2288
|
+
|
|
2289
|
+
// src/agent/core.ts
|
|
2290
|
+
var __dirname = dirname(fileURLToPath(import.meta.url));
|
|
2291
|
+
var AgentCore = class {
|
|
2292
|
+
page;
|
|
2293
|
+
config;
|
|
2294
|
+
llm;
|
|
2295
|
+
history = [];
|
|
2296
|
+
_status = "idle";
|
|
2297
|
+
listeners = /* @__PURE__ */ new Map();
|
|
2298
|
+
previousElementHashes = /* @__PURE__ */ new Set();
|
|
2299
|
+
totalWaitTime = 0;
|
|
2300
|
+
constructor(page, config) {
|
|
2301
|
+
this.page = page;
|
|
2302
|
+
this.config = config;
|
|
2303
|
+
this.llm = new LLM(config.llm);
|
|
2304
|
+
}
|
|
2305
|
+
// ── Event system ──
|
|
2306
|
+
on(event, listener) {
|
|
2307
|
+
if (!this.listeners.has(event)) this.listeners.set(event, /* @__PURE__ */ new Set());
|
|
2308
|
+
this.listeners.get(event).add(listener);
|
|
2309
|
+
}
|
|
2310
|
+
off(event, listener) {
|
|
2311
|
+
this.listeners.get(event)?.delete(listener);
|
|
2312
|
+
}
|
|
2313
|
+
emit(event) {
|
|
2314
|
+
const listeners = this.listeners.get(event.type);
|
|
2315
|
+
if (listeners) {
|
|
2316
|
+
for (const fn of listeners) {
|
|
2317
|
+
try {
|
|
2318
|
+
fn(event);
|
|
2319
|
+
} catch {
|
|
2320
|
+
}
|
|
2321
|
+
}
|
|
2322
|
+
}
|
|
2323
|
+
}
|
|
2324
|
+
get status() {
|
|
2325
|
+
return this._status;
|
|
2326
|
+
}
|
|
2327
|
+
setStatus(newStatus) {
|
|
2328
|
+
const prev = this._status;
|
|
2329
|
+
this._status = newStatus;
|
|
2330
|
+
this.emit({ type: "statuschange", status: newStatus, previousStatus: prev });
|
|
2331
|
+
}
|
|
2332
|
+
pushHistory(event) {
|
|
2333
|
+
this.history.push(event);
|
|
2334
|
+
this.emit({ type: "historychange", history: this.history });
|
|
2335
|
+
}
|
|
2336
|
+
async execute(task, abortSignal) {
|
|
2337
|
+
this.setStatus("running");
|
|
2338
|
+
this.history = [];
|
|
2339
|
+
this.previousElementHashes.clear();
|
|
2340
|
+
this.totalWaitTime = 0;
|
|
2341
|
+
const maxSteps = this.config.maxSteps ?? 40;
|
|
2342
|
+
const stepDelay = this.config.stepDelay ?? 0.4;
|
|
2343
|
+
const tools = {
|
|
2344
|
+
...createDefaultTools(this.page),
|
|
2345
|
+
...this.config.customTools || {}
|
|
2346
|
+
};
|
|
2347
|
+
for (const [name, tool] of Object.entries(tools)) {
|
|
2348
|
+
if (tool === null) delete tools[name];
|
|
2349
|
+
}
|
|
2350
|
+
const macroTool = packMacroTool(tools);
|
|
2351
|
+
let systemPrompt;
|
|
2352
|
+
try {
|
|
2353
|
+
systemPrompt = readFileSync(join(__dirname, "prompts", "system.md"), "utf-8");
|
|
2354
|
+
} catch {
|
|
2355
|
+
systemPrompt = "You are an AI web agent that navigates web pages to complete tasks.";
|
|
2356
|
+
}
|
|
2357
|
+
if (this.config.instructions?.system) {
|
|
2358
|
+
systemPrompt += "\n\n" + this.config.instructions.system;
|
|
2359
|
+
}
|
|
2360
|
+
let lastURL = "";
|
|
2361
|
+
for (let step = 1; step <= maxSteps; step++) {
|
|
2362
|
+
if (abortSignal?.aborted) {
|
|
2363
|
+
this.setStatus("error");
|
|
2364
|
+
return { success: false, data: "Aborted", history: this.history };
|
|
2365
|
+
}
|
|
2366
|
+
const browserState = await this.page.browserState().catch(() => ({
|
|
2367
|
+
url: "",
|
|
2368
|
+
title: "",
|
|
2369
|
+
viewportWidth: 0,
|
|
2370
|
+
viewportHeight: 0,
|
|
2371
|
+
pageWidth: 0,
|
|
2372
|
+
pageHeight: 0,
|
|
2373
|
+
scrollX: 0,
|
|
2374
|
+
scrollY: 0,
|
|
2375
|
+
scrollPercent: 0,
|
|
2376
|
+
pixelsAbove: 0,
|
|
2377
|
+
pixelsBelow: 0
|
|
2378
|
+
}));
|
|
2379
|
+
const flatTree = await this.page.flatTree().catch(() => ({ rootId: "", map: {} }));
|
|
2380
|
+
const pageContent = flatTreeToString(flatTree);
|
|
2381
|
+
const currentHashes = /* @__PURE__ */ new Set();
|
|
2382
|
+
let newElementCount = 0;
|
|
2383
|
+
for (const node of Object.values(flatTree.map)) {
|
|
2384
|
+
if (node.isInteractive && node.highlightIndex !== void 0) {
|
|
2385
|
+
const hash = `${node.tagName}:${node.text || ""}:${JSON.stringify(node.attributes || {})}`;
|
|
2386
|
+
currentHashes.add(hash);
|
|
2387
|
+
if (!this.previousElementHashes.has(hash)) {
|
|
2388
|
+
newElementCount++;
|
|
2389
|
+
}
|
|
2390
|
+
}
|
|
2391
|
+
}
|
|
2392
|
+
this.previousElementHashes = currentHashes;
|
|
2393
|
+
const observations = [];
|
|
2394
|
+
if (browserState.url !== lastURL && lastURL) {
|
|
2395
|
+
observations.push(`Navigated to ${browserState.url}`);
|
|
2396
|
+
}
|
|
2397
|
+
lastURL = browserState.url;
|
|
2398
|
+
if (newElementCount > 0 && step > 1) {
|
|
2399
|
+
observations.push(`${newElementCount} new interactive element(s) appeared`);
|
|
2400
|
+
}
|
|
2401
|
+
if (this.totalWaitTime > 3) {
|
|
2402
|
+
observations.push(`Total wait time: ${this.totalWaitTime.toFixed(1)}s \u2014 consider if page is still loading`);
|
|
2403
|
+
}
|
|
2404
|
+
if (step >= maxSteps - 5) {
|
|
2405
|
+
observations.push(`Warning: ${maxSteps - step} steps remaining`);
|
|
2406
|
+
}
|
|
2407
|
+
if (this.config.instructions?.getPageInstructions) {
|
|
2408
|
+
try {
|
|
2409
|
+
const pi = this.config.instructions.getPageInstructions(browserState.url);
|
|
2410
|
+
if (pi) observations.push(`Page instructions: ${pi}`);
|
|
2411
|
+
} catch {
|
|
2412
|
+
}
|
|
2413
|
+
}
|
|
2414
|
+
for (const obs of observations) {
|
|
2415
|
+
this.pushHistory({ type: "observation", message: obs });
|
|
2416
|
+
this.emit({ type: "activity", kind: "observation", message: obs, step });
|
|
2417
|
+
}
|
|
2418
|
+
const userPrompt = assembleUserPrompt(
|
|
2419
|
+
task,
|
|
2420
|
+
pageContent,
|
|
2421
|
+
browserState,
|
|
2422
|
+
this.history,
|
|
2423
|
+
step,
|
|
2424
|
+
maxSteps
|
|
2425
|
+
);
|
|
2426
|
+
const messages = [
|
|
2427
|
+
{ role: "system", content: systemPrompt },
|
|
2428
|
+
{ role: "user", content: userPrompt }
|
|
2429
|
+
];
|
|
2430
|
+
log.step(step, `Thinking... (${browserState.url})`);
|
|
2431
|
+
this.emit({ type: "activity", kind: "thinking", message: `Step ${step}: thinking`, step });
|
|
2432
|
+
if (this.config.onBeforeStep) await this.config.onBeforeStep(step);
|
|
2433
|
+
const startTime = Date.now();
|
|
2434
|
+
let result;
|
|
2435
|
+
try {
|
|
2436
|
+
result = await this.llm.invoke(messages, macroTool, abortSignal);
|
|
2437
|
+
} catch (err) {
|
|
2438
|
+
log.error(`LLM error at step ${step}: ${err}`);
|
|
2439
|
+
this.pushHistory({ type: "error", error: String(err), step });
|
|
2440
|
+
this.emit({ type: "activity", kind: "error", message: String(err), step });
|
|
2441
|
+
continue;
|
|
2442
|
+
}
|
|
2443
|
+
const duration = Date.now() - startTime;
|
|
2444
|
+
const args = result.toolCall.args;
|
|
2445
|
+
const action = args.action || args;
|
|
2446
|
+
const [actionName, actionInput] = Object.entries(action)[0] || ["unknown", {}];
|
|
2447
|
+
this.emit({ type: "activity", kind: "executing", message: actionName, step });
|
|
2448
|
+
if (actionName === "wait") {
|
|
2449
|
+
const secs = actionInput?.seconds || 0;
|
|
2450
|
+
this.totalWaitTime += secs;
|
|
2451
|
+
}
|
|
2452
|
+
const stepEvent = {
|
|
2453
|
+
type: "step",
|
|
2454
|
+
step,
|
|
2455
|
+
reflection: {
|
|
2456
|
+
evaluation_previous_goal: args.evaluation_previous_goal || "",
|
|
2457
|
+
memory: args.memory || "",
|
|
2458
|
+
next_goal: args.next_goal || ""
|
|
2459
|
+
},
|
|
2460
|
+
action: { name: actionName, args: actionInput },
|
|
2461
|
+
output: result.toolResult,
|
|
2462
|
+
duration
|
|
2463
|
+
};
|
|
2464
|
+
this.pushHistory(stepEvent);
|
|
2465
|
+
log.step(step, `Action: ${actionName} \u2192 ${result.toolResult.slice(0, 100)}`);
|
|
2466
|
+
this.emit({ type: "activity", kind: "executed", message: `${actionName}: ${result.toolResult.slice(0, 80)}`, step, duration });
|
|
2467
|
+
if (this.config.onAfterStep) await this.config.onAfterStep(this.history);
|
|
2468
|
+
if (actionName === "done") {
|
|
2469
|
+
try {
|
|
2470
|
+
const doneResult = JSON.parse(result.toolResult);
|
|
2471
|
+
this.setStatus("completed");
|
|
2472
|
+
return { success: doneResult.success, data: doneResult.text || result.toolResult, history: this.history };
|
|
2473
|
+
} catch {
|
|
2474
|
+
this.setStatus("completed");
|
|
2475
|
+
return { success: true, data: result.toolResult, history: this.history };
|
|
2476
|
+
}
|
|
2477
|
+
}
|
|
2478
|
+
if (stepDelay > 0) {
|
|
2479
|
+
await new Promise((r) => setTimeout(r, stepDelay * 1e3));
|
|
2480
|
+
}
|
|
2481
|
+
}
|
|
2482
|
+
this.setStatus("error");
|
|
2483
|
+
return { success: false, data: `Reached maximum steps (${maxSteps})`, history: this.history };
|
|
2484
|
+
}
|
|
2485
|
+
};
|
|
2486
|
+
function assembleUserPrompt(task, pageContent, state, history, step, maxSteps) {
|
|
2487
|
+
let prompt = `# Task
|
|
2488
|
+
${task}
|
|
2489
|
+
|
|
2490
|
+
`;
|
|
2491
|
+
prompt += `# Current Page
|
|
2492
|
+
`;
|
|
2493
|
+
prompt += `URL: ${state.url}
|
|
2494
|
+
`;
|
|
2495
|
+
prompt += `Title: ${state.title}
|
|
2496
|
+
`;
|
|
2497
|
+
prompt += `Viewport: ${state.viewportWidth}x${state.viewportHeight} | Page height: ${state.pageHeight}px
|
|
2498
|
+
`;
|
|
2499
|
+
prompt += `Scroll: ${state.scrollPercent}%`;
|
|
2500
|
+
if (state.pixelsAbove > 50) prompt += ` | ${state.pixelsAbove}px above`;
|
|
2501
|
+
if (state.pixelsBelow > 50) prompt += ` | ${state.pixelsBelow}px below`;
|
|
2502
|
+
prompt += `
|
|
2503
|
+
Step: ${step}/${maxSteps}
|
|
2504
|
+
|
|
2505
|
+
`;
|
|
2506
|
+
prompt += `# Browser State
|
|
2507
|
+
${pageContent}
|
|
2508
|
+
|
|
2509
|
+
`;
|
|
2510
|
+
if (history.length > 0) {
|
|
2511
|
+
prompt += `# History
|
|
2512
|
+
`;
|
|
2513
|
+
const recent = history.slice(-10);
|
|
2514
|
+
for (const event of recent) {
|
|
2515
|
+
if (event.type === "step") {
|
|
2516
|
+
const s = event;
|
|
2517
|
+
prompt += `<step_${s.step}>
|
|
2518
|
+
`;
|
|
2519
|
+
if (s.reflection) {
|
|
2520
|
+
prompt += ` eval: ${s.reflection.evaluation_previous_goal}
|
|
2521
|
+
`;
|
|
2522
|
+
prompt += ` memory: ${s.reflection.memory}
|
|
2523
|
+
`;
|
|
2524
|
+
prompt += ` goal: ${s.reflection.next_goal}
|
|
2525
|
+
`;
|
|
2526
|
+
}
|
|
2527
|
+
prompt += ` action: ${s.action.name}(${JSON.stringify(s.action.args)})
|
|
2528
|
+
`;
|
|
2529
|
+
prompt += ` result: ${s.output.slice(0, 200)}
|
|
2530
|
+
`;
|
|
2531
|
+
prompt += `</step_${s.step}>
|
|
2532
|
+
`;
|
|
2533
|
+
} else if (event.type === "observation") {
|
|
2534
|
+
prompt += `<sys>${event.message}</sys>
|
|
2535
|
+
`;
|
|
2536
|
+
}
|
|
2537
|
+
}
|
|
2538
|
+
}
|
|
2539
|
+
return prompt;
|
|
2540
|
+
}
|
|
2541
|
+
|
|
2542
|
+
// src/pipeline/template.ts
|
|
2543
|
+
var EXPR_RE = /\$\{\{\s*(.*?)\s*\}\}/g;
|
|
2544
|
+
function renderTemplate(template, ctx) {
|
|
2545
|
+
if (typeof template !== "string") {
|
|
2546
|
+
if (typeof template === "object" && template !== null) {
|
|
2547
|
+
if (Array.isArray(template)) return template.map((v) => renderTemplate(v, ctx));
|
|
2548
|
+
const result = {};
|
|
2549
|
+
for (const [k, v] of Object.entries(template)) {
|
|
2550
|
+
result[k] = renderTemplate(v, ctx);
|
|
2551
|
+
}
|
|
2552
|
+
return result;
|
|
2553
|
+
}
|
|
2554
|
+
return template;
|
|
2555
|
+
}
|
|
2556
|
+
const fullMatch = template.match(/^\$\{\{\s*(.*?)\s*\}\}$/);
|
|
2557
|
+
if (fullMatch) {
|
|
2558
|
+
return evaluateExpression(fullMatch[1], ctx);
|
|
2559
|
+
}
|
|
2560
|
+
return template.replace(EXPR_RE, (_, expr) => {
|
|
2561
|
+
const val = evaluateExpression(expr, ctx);
|
|
2562
|
+
return val === null || val === void 0 ? "" : String(val);
|
|
2563
|
+
});
|
|
2564
|
+
}
|
|
2565
|
+
function evaluateExpression(expr, ctx) {
|
|
2566
|
+
const parts = expr.split(/\s*\|\s*/);
|
|
2567
|
+
let value = resolveValue(parts[0].trim(), ctx);
|
|
2568
|
+
for (let i = 1; i < parts.length; i++) {
|
|
2569
|
+
value = applyFilter(value, parts[i].trim());
|
|
2570
|
+
}
|
|
2571
|
+
return value;
|
|
2572
|
+
}
|
|
2573
|
+
function resolveValue(path, ctx) {
|
|
2574
|
+
const arithMatch = path.match(/^(\w[\w.]*)\s*([+\-*])\s*(\d+)$/);
|
|
2575
|
+
if (arithMatch) {
|
|
2576
|
+
const base = Number(resolvePath(arithMatch[1], ctx));
|
|
2577
|
+
const op = arithMatch[2];
|
|
2578
|
+
const num = Number(arithMatch[3]);
|
|
2579
|
+
if (op === "+") return base + num;
|
|
2580
|
+
if (op === "-") return base - num;
|
|
2581
|
+
if (op === "*") return base * num;
|
|
2582
|
+
}
|
|
2583
|
+
const orMatch = path.match(/^(.+?)\s*\|\|\s*(.+)$/);
|
|
2584
|
+
if (orMatch) {
|
|
2585
|
+
const left = resolvePath(orMatch[1].trim(), ctx);
|
|
2586
|
+
if (left !== null && left !== void 0 && left !== "" && left !== false) return left;
|
|
2587
|
+
const right = orMatch[2].trim();
|
|
2588
|
+
if (right.startsWith("'") && right.endsWith("'") || right.startsWith('"') && right.endsWith('"')) {
|
|
2589
|
+
return right.slice(1, -1);
|
|
2590
|
+
}
|
|
2591
|
+
return resolvePath(right, ctx);
|
|
2592
|
+
}
|
|
2593
|
+
if (path.startsWith("'") && path.endsWith("'") || path.startsWith('"') && path.endsWith('"')) {
|
|
2594
|
+
return path.slice(1, -1);
|
|
2595
|
+
}
|
|
2596
|
+
if (!isNaN(Number(path)) && path !== "") return Number(path);
|
|
2597
|
+
return resolvePath(path, ctx);
|
|
2598
|
+
}
|
|
2599
|
+
function resolvePath(path, ctx) {
|
|
2600
|
+
if (path === "index") return ctx.index ?? 0;
|
|
2601
|
+
const parts = path.split(".");
|
|
2602
|
+
let root;
|
|
2603
|
+
if (parts[0] === "args") {
|
|
2604
|
+
root = ctx.args;
|
|
2605
|
+
parts.shift();
|
|
2606
|
+
} else if (parts[0] === "item") {
|
|
2607
|
+
root = ctx.item;
|
|
2608
|
+
parts.shift();
|
|
2609
|
+
} else if (parts[0] === "data") {
|
|
2610
|
+
root = ctx.data;
|
|
2611
|
+
parts.shift();
|
|
2612
|
+
} else {
|
|
2613
|
+
root = getNestedValue(ctx.item, parts);
|
|
2614
|
+
if (root !== void 0) return root;
|
|
2615
|
+
root = getNestedValue(ctx.args, parts);
|
|
2616
|
+
if (root !== void 0) return root;
|
|
2617
|
+
root = getNestedValue(ctx.data, parts);
|
|
2618
|
+
if (root !== void 0) return root;
|
|
2619
|
+
return void 0;
|
|
2620
|
+
}
|
|
2621
|
+
return getNestedValue(root, parts);
|
|
2622
|
+
}
|
|
2623
|
+
function getNestedValue(obj, parts) {
|
|
2624
|
+
let current = obj;
|
|
2625
|
+
for (const part of parts) {
|
|
2626
|
+
if (current === null || current === void 0) return void 0;
|
|
2627
|
+
if (typeof current === "object") {
|
|
2628
|
+
current = current[part];
|
|
2629
|
+
} else {
|
|
2630
|
+
return void 0;
|
|
2631
|
+
}
|
|
2632
|
+
}
|
|
2633
|
+
return current;
|
|
2634
|
+
}
|
|
2635
|
+
function applyFilter(value, filter) {
|
|
2636
|
+
const match = filter.match(/^(\w+)(?:\((.+)\))?$/);
|
|
2637
|
+
if (!match) return value;
|
|
2638
|
+
const name = match[1];
|
|
2639
|
+
const arg = match[2]?.replace(/^['"]|['"]$/g, "");
|
|
2640
|
+
switch (name) {
|
|
2641
|
+
case "default":
|
|
2642
|
+
return value === null || value === void 0 || value === "" ? arg : value;
|
|
2643
|
+
case "join":
|
|
2644
|
+
return Array.isArray(value) ? value.join(arg || ", ") : value;
|
|
2645
|
+
case "upper":
|
|
2646
|
+
return typeof value === "string" ? value.toUpperCase() : value;
|
|
2647
|
+
case "lower":
|
|
2648
|
+
return typeof value === "string" ? value.toLowerCase() : value;
|
|
2649
|
+
case "trim":
|
|
2650
|
+
return typeof value === "string" ? value.trim() : value;
|
|
2651
|
+
case "truncate": {
|
|
2652
|
+
const len = parseInt(arg || "100");
|
|
2653
|
+
if (typeof value === "string" && value.length > len) return value.slice(0, len) + "...";
|
|
2654
|
+
return value;
|
|
2655
|
+
}
|
|
2656
|
+
case "replace": {
|
|
2657
|
+
if (typeof value !== "string" || !arg) return value;
|
|
2658
|
+
const [from, to] = arg.split(",").map((s) => s.trim().replace(/^['"]|['"]$/g, ""));
|
|
2659
|
+
return value.replaceAll(from, to || "");
|
|
2660
|
+
}
|
|
2661
|
+
case "keys":
|
|
2662
|
+
return typeof value === "object" && value !== null ? Object.keys(value) : [];
|
|
2663
|
+
case "length":
|
|
2664
|
+
return Array.isArray(value) ? value.length : typeof value === "string" ? value.length : 0;
|
|
2665
|
+
case "first":
|
|
2666
|
+
return Array.isArray(value) ? value[0] : value;
|
|
2667
|
+
case "last":
|
|
2668
|
+
return Array.isArray(value) ? value[value.length - 1] : value;
|
|
2669
|
+
case "json":
|
|
2670
|
+
return JSON.stringify(value);
|
|
2671
|
+
case "slugify":
|
|
2672
|
+
return typeof value === "string" ? value.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "") : value;
|
|
2673
|
+
case "sanitize":
|
|
2674
|
+
return typeof value === "string" ? value.replace(/[<>:"/\\|?*\x00-\x1f]/g, "_").replace(/__+/g, "_").trim() : value;
|
|
2675
|
+
case "ext": {
|
|
2676
|
+
if (typeof value !== "string") return "";
|
|
2677
|
+
const extMatch = value.match(/\.([a-zA-Z0-9]+)(?:\?.*)?$/);
|
|
2678
|
+
return extMatch ? extMatch[1] : "";
|
|
2679
|
+
}
|
|
2680
|
+
case "basename": {
|
|
2681
|
+
if (typeof value !== "string") return "";
|
|
2682
|
+
try {
|
|
2683
|
+
return new URL(value).pathname.split("/").pop() || "";
|
|
2684
|
+
} catch {
|
|
2685
|
+
}
|
|
2686
|
+
return value.split("/").pop() || "";
|
|
2687
|
+
}
|
|
2688
|
+
default:
|
|
2689
|
+
return value;
|
|
2690
|
+
}
|
|
2691
|
+
}
|
|
2692
|
+
|
|
2693
|
+
// src/pipeline/registry.ts
|
|
2694
|
+
var stepHandlers = /* @__PURE__ */ new Map();
|
|
2695
|
+
function registerStep(name, handler) {
|
|
2696
|
+
stepHandlers.set(name, handler);
|
|
2697
|
+
}
|
|
2698
|
+
function getStep(name) {
|
|
2699
|
+
return stepHandlers.get(name);
|
|
2700
|
+
}
|
|
2701
|
+
|
|
2702
|
+
// src/pipeline/steps/fetch.ts
|
|
2703
|
+
async function fetchBatchInBrowser(page, urls, method, headers, concurrency) {
|
|
2704
|
+
const headersJs = JSON.stringify(headers);
|
|
2705
|
+
const urlsJs = JSON.stringify(urls);
|
|
2706
|
+
return page.evaluate(`
|
|
2707
|
+
(async () => {
|
|
2708
|
+
const urls = ${urlsJs};
|
|
2709
|
+
const method = ${JSON.stringify(method)};
|
|
2710
|
+
const headers = ${headersJs};
|
|
2711
|
+
const concurrency = ${concurrency};
|
|
2712
|
+
|
|
2713
|
+
const results = new Array(urls.length);
|
|
2714
|
+
let idx = 0;
|
|
2715
|
+
|
|
2716
|
+
async function worker() {
|
|
2717
|
+
while (idx < urls.length) {
|
|
2718
|
+
const i = idx++;
|
|
2719
|
+
try {
|
|
2720
|
+
const resp = await fetch(urls[i], { method, headers, credentials: "include" });
|
|
2721
|
+
results[i] = await resp.json();
|
|
2722
|
+
} catch (e) {
|
|
2723
|
+
results[i] = { error: e.message };
|
|
2724
|
+
}
|
|
2725
|
+
}
|
|
2726
|
+
}
|
|
2727
|
+
|
|
2728
|
+
const workers = Array.from({ length: Math.min(concurrency, urls.length) }, () => worker());
|
|
2729
|
+
await Promise.all(workers);
|
|
2730
|
+
return results;
|
|
2731
|
+
})()
|
|
2732
|
+
`);
|
|
2733
|
+
}
|
|
2734
|
+
async function mapConcurrent(items, limit, fn) {
|
|
2735
|
+
const results = new Array(items.length);
|
|
2736
|
+
let index = 0;
|
|
2737
|
+
async function worker() {
|
|
2738
|
+
while (index < items.length) {
|
|
2739
|
+
const i = index++;
|
|
2740
|
+
results[i] = await fn(items[i], i);
|
|
2741
|
+
}
|
|
2742
|
+
}
|
|
2743
|
+
const workers = Array.from({ length: Math.min(limit, items.length) }, () => worker());
|
|
2744
|
+
await Promise.all(workers);
|
|
2745
|
+
return results;
|
|
2746
|
+
}
|
|
2747
|
+
async function fetchSingle(page, url, method, headers) {
|
|
2748
|
+
if (page) {
|
|
2749
|
+
const headersJs = JSON.stringify(headers);
|
|
2750
|
+
const urlJs = JSON.stringify(url);
|
|
2751
|
+
const methodJs = JSON.stringify(method);
|
|
2752
|
+
return page.evaluate(`
|
|
2753
|
+
(async () => {
|
|
2754
|
+
const resp = await fetch(${urlJs}, {
|
|
2755
|
+
method: ${methodJs}, headers: ${headersJs}, credentials: "include"
|
|
2756
|
+
});
|
|
2757
|
+
return await resp.json();
|
|
2758
|
+
})()
|
|
2759
|
+
`);
|
|
2760
|
+
}
|
|
2761
|
+
const resp = await fetch(url, { method, headers });
|
|
2762
|
+
return resp.json();
|
|
2763
|
+
}
|
|
2764
|
+
registerStep("fetch", async (ctx, params) => {
|
|
2765
|
+
const data = ctx.data;
|
|
2766
|
+
const urlOrObj = typeof params === "string" ? params : params?.url ?? "";
|
|
2767
|
+
const method = params?.method || "GET";
|
|
2768
|
+
const rawHeaders = params?.headers ?? {};
|
|
2769
|
+
const rawParams = params?.params ?? {};
|
|
2770
|
+
const urlTemplate = String(urlOrObj);
|
|
2771
|
+
const headers = {};
|
|
2772
|
+
for (const [k, v] of Object.entries(rawHeaders)) {
|
|
2773
|
+
headers[k] = String(renderTemplate(v, { args: ctx.args, data }));
|
|
2774
|
+
}
|
|
2775
|
+
if (Array.isArray(data) && urlTemplate.includes("item")) {
|
|
2776
|
+
const concurrency = typeof params?.concurrency === "number" ? params.concurrency : 5;
|
|
2777
|
+
const renderedParams2 = {};
|
|
2778
|
+
for (const [k, v] of Object.entries(rawParams)) {
|
|
2779
|
+
renderedParams2[k] = String(renderTemplate(v, { args: ctx.args, data }));
|
|
2780
|
+
}
|
|
2781
|
+
const urls = data.map((item, index) => {
|
|
2782
|
+
let url2 = String(renderTemplate(urlTemplate, { args: ctx.args, data, item, index }));
|
|
2783
|
+
if (Object.keys(renderedParams2).length > 0) {
|
|
2784
|
+
const qs = new URLSearchParams(renderedParams2).toString();
|
|
2785
|
+
url2 = `${url2}${url2.includes("?") ? "&" : "?"}${qs}`;
|
|
2786
|
+
}
|
|
2787
|
+
return url2;
|
|
2788
|
+
});
|
|
2789
|
+
if (ctx.page) {
|
|
2790
|
+
return fetchBatchInBrowser(ctx.page, urls, method.toUpperCase(), headers, concurrency);
|
|
2791
|
+
}
|
|
2792
|
+
return mapConcurrent(urls, concurrency, async (url2) => {
|
|
2793
|
+
return fetchSingle(null, url2, method.toUpperCase(), headers);
|
|
2794
|
+
});
|
|
2795
|
+
}
|
|
2796
|
+
let url = String(renderTemplate(urlOrObj, { args: ctx.args, data }));
|
|
2797
|
+
const renderedParams = {};
|
|
2798
|
+
for (const [k, v] of Object.entries(rawParams)) {
|
|
2799
|
+
renderedParams[k] = String(renderTemplate(v, { args: ctx.args, data }));
|
|
2800
|
+
}
|
|
2801
|
+
if (Object.keys(renderedParams).length > 0) {
|
|
2802
|
+
const qs = new URLSearchParams(renderedParams).toString();
|
|
2803
|
+
url = `${url}${url.includes("?") ? "&" : "?"}${qs}`;
|
|
2804
|
+
}
|
|
2805
|
+
return fetchSingle(ctx.page, url, method.toUpperCase(), headers);
|
|
2806
|
+
});
|
|
2807
|
+
|
|
2808
|
+
// src/pipeline/steps/browser.ts
|
|
2809
|
+
registerStep("navigate", async (ctx, params) => {
|
|
2810
|
+
if (!ctx.page) throw new Error("Browser page required for navigate step");
|
|
2811
|
+
const url = renderTemplate(params, { args: ctx.args, data: ctx.data });
|
|
2812
|
+
await ctx.page.goto(url);
|
|
2813
|
+
return ctx.data;
|
|
2814
|
+
});
|
|
2815
|
+
registerStep("click", async (ctx, params) => {
|
|
2816
|
+
if (!ctx.page) throw new Error("Browser page required for click step");
|
|
2817
|
+
const ref = renderTemplate(params, { args: ctx.args, data: ctx.data });
|
|
2818
|
+
await ctx.page.click(ref);
|
|
2819
|
+
return ctx.data;
|
|
2820
|
+
});
|
|
2821
|
+
registerStep("type", async (ctx, params) => {
|
|
2822
|
+
if (!ctx.page) throw new Error("Browser page required for type step");
|
|
2823
|
+
const p = renderTemplate(params, { args: ctx.args, data: ctx.data });
|
|
2824
|
+
await ctx.page.typeText(p.ref, p.text);
|
|
2825
|
+
if (p.submit) await ctx.page.pressKey("Enter");
|
|
2826
|
+
return ctx.data;
|
|
2827
|
+
});
|
|
2828
|
+
registerStep("wait", async (ctx, params) => {
|
|
2829
|
+
if (!ctx.page) throw new Error("Browser page required for wait step");
|
|
2830
|
+
const rendered = renderTemplate(params, { args: ctx.args, data: ctx.data });
|
|
2831
|
+
if (typeof rendered === "number") {
|
|
2832
|
+
await ctx.page.wait(rendered);
|
|
2833
|
+
} else {
|
|
2834
|
+
await ctx.page.wait(rendered);
|
|
2835
|
+
}
|
|
2836
|
+
return ctx.data;
|
|
2837
|
+
});
|
|
2838
|
+
registerStep("press", async (ctx, params) => {
|
|
2839
|
+
if (!ctx.page) throw new Error("Browser page required for press step");
|
|
2840
|
+
const key = renderTemplate(params, { args: ctx.args, data: ctx.data });
|
|
2841
|
+
await ctx.page.pressKey(key);
|
|
2842
|
+
return ctx.data;
|
|
2843
|
+
});
|
|
2844
|
+
registerStep("snapshot", async (ctx, _params) => {
|
|
2845
|
+
if (!ctx.page) throw new Error("Browser page required for snapshot step");
|
|
2846
|
+
return ctx.page.snapshot();
|
|
2847
|
+
});
|
|
2848
|
+
registerStep("evaluate", async (ctx, params) => {
|
|
2849
|
+
if (!ctx.page) throw new Error("Browser page required for evaluate step");
|
|
2850
|
+
const js = renderTemplate(params, { args: ctx.args, data: ctx.data });
|
|
2851
|
+
const result = await ctx.page.evaluate(js);
|
|
2852
|
+
if (typeof result === "string") {
|
|
2853
|
+
try {
|
|
2854
|
+
return JSON.parse(result);
|
|
2855
|
+
} catch {
|
|
2856
|
+
return result;
|
|
2857
|
+
}
|
|
2858
|
+
}
|
|
2859
|
+
return result;
|
|
2860
|
+
});
|
|
2861
|
+
|
|
2862
|
+
// src/pipeline/steps/transform.ts
|
|
2863
|
+
registerStep("select", async (ctx, params) => {
|
|
2864
|
+
const path = renderTemplate(params, { args: ctx.args, data: ctx.data });
|
|
2865
|
+
const parts = path.split(".");
|
|
2866
|
+
let current = ctx.data;
|
|
2867
|
+
for (const part of parts) {
|
|
2868
|
+
if (current === null || current === void 0) return void 0;
|
|
2869
|
+
const indexMatch = part.match(/^(\w+)\[(\d+)\]$/);
|
|
2870
|
+
if (indexMatch) {
|
|
2871
|
+
current = current[indexMatch[1]];
|
|
2872
|
+
if (Array.isArray(current)) current = current[Number(indexMatch[2])];
|
|
2873
|
+
else return void 0;
|
|
2874
|
+
continue;
|
|
2875
|
+
}
|
|
2876
|
+
const wildcardMatch = part.match(/^(\w+)\[\*\]$/);
|
|
2877
|
+
if (wildcardMatch) {
|
|
2878
|
+
current = current[wildcardMatch[1]];
|
|
2879
|
+
if (!Array.isArray(current)) return void 0;
|
|
2880
|
+
const remaining = parts.slice(parts.indexOf(part) + 1);
|
|
2881
|
+
if (remaining.length > 0) {
|
|
2882
|
+
return current.map((item) => {
|
|
2883
|
+
let val = item;
|
|
2884
|
+
for (const r of remaining) {
|
|
2885
|
+
if (val === null || val === void 0) return void 0;
|
|
2886
|
+
val = val[r];
|
|
2887
|
+
}
|
|
2888
|
+
return val;
|
|
2889
|
+
});
|
|
2890
|
+
}
|
|
2891
|
+
continue;
|
|
2892
|
+
}
|
|
2893
|
+
current = current[part];
|
|
2894
|
+
}
|
|
2895
|
+
return current;
|
|
2896
|
+
});
|
|
2897
|
+
registerStep("map", async (ctx, params) => {
|
|
2898
|
+
if (!Array.isArray(ctx.data)) throw new Error("map requires array data");
|
|
2899
|
+
const template = params;
|
|
2900
|
+
return ctx.data.map(
|
|
2901
|
+
(item, index) => renderTemplate(template, { args: ctx.args, item, data: ctx.data, index })
|
|
2902
|
+
);
|
|
2903
|
+
});
|
|
2904
|
+
registerStep("filter", async (ctx, params) => {
|
|
2905
|
+
if (!Array.isArray(ctx.data)) throw new Error("filter requires array data");
|
|
2906
|
+
const expr = params;
|
|
2907
|
+
return ctx.data.filter((item, index) => {
|
|
2908
|
+
const result = renderTemplate(`\${{ ${expr} }}`, { args: ctx.args, item, data: ctx.data, index });
|
|
2909
|
+
return Boolean(result);
|
|
2910
|
+
});
|
|
2911
|
+
});
|
|
2912
|
+
registerStep("sort", async (ctx, params) => {
|
|
2913
|
+
if (!Array.isArray(ctx.data)) throw new Error("sort requires array data");
|
|
2914
|
+
const p = params;
|
|
2915
|
+
const sorted = [...ctx.data].sort((a, b) => {
|
|
2916
|
+
const va = a[p.by];
|
|
2917
|
+
const vb = b[p.by];
|
|
2918
|
+
if (typeof va === "number" && typeof vb === "number") return va - vb;
|
|
2919
|
+
return String(va).localeCompare(String(vb));
|
|
2920
|
+
});
|
|
2921
|
+
return p.order === "desc" ? sorted.reverse() : sorted;
|
|
2922
|
+
});
|
|
2923
|
+
registerStep("limit", async (ctx, params) => {
|
|
2924
|
+
if (!Array.isArray(ctx.data)) return ctx.data;
|
|
2925
|
+
const n = renderTemplate(params, { args: ctx.args, data: ctx.data });
|
|
2926
|
+
return ctx.data.slice(0, Number(n) || 20);
|
|
2927
|
+
});
|
|
2928
|
+
|
|
2929
|
+
// src/pipeline/steps/intercept.ts
|
|
2930
|
+
registerStep("intercept", async (ctx, params) => {
|
|
2931
|
+
if (!ctx.page) throw new Error("Browser page required for intercept step");
|
|
2932
|
+
const p = params;
|
|
2933
|
+
const pattern = renderTemplate(p.pattern, { args: ctx.args, data: ctx.data });
|
|
2934
|
+
await ctx.page.installInterceptor(pattern);
|
|
2935
|
+
if (p.trigger) {
|
|
2936
|
+
const trigger = renderTemplate(p.trigger, { args: ctx.args, data: ctx.data });
|
|
2937
|
+
const [action, value] = trigger.split(":");
|
|
2938
|
+
switch (action) {
|
|
2939
|
+
case "navigate":
|
|
2940
|
+
await ctx.page.goto(value);
|
|
2941
|
+
break;
|
|
2942
|
+
case "click":
|
|
2943
|
+
await ctx.page.click(value);
|
|
2944
|
+
break;
|
|
2945
|
+
case "evaluate":
|
|
2946
|
+
await ctx.page.evaluate(value);
|
|
2947
|
+
break;
|
|
2948
|
+
case "scroll":
|
|
2949
|
+
await ctx.page.scroll("down");
|
|
2950
|
+
break;
|
|
2951
|
+
}
|
|
2952
|
+
}
|
|
2953
|
+
const timeout = p.timeout || 10;
|
|
2954
|
+
await ctx.page.wait(timeout);
|
|
2955
|
+
const requests = await ctx.page.getInterceptedRequests();
|
|
2956
|
+
if (requests.length === 0) return ctx.data;
|
|
2957
|
+
let result = requests.map((r) => r.body);
|
|
2958
|
+
if (result && Array.isArray(result) && result.length === 1) result = result[0];
|
|
2959
|
+
if (p.select && result) {
|
|
2960
|
+
const parts = p.select.split(".");
|
|
2961
|
+
let current = result;
|
|
2962
|
+
for (const part of parts) {
|
|
2963
|
+
if (current && typeof current === "object") {
|
|
2964
|
+
current = current[part];
|
|
2965
|
+
}
|
|
2966
|
+
}
|
|
2967
|
+
result = current;
|
|
2968
|
+
}
|
|
2969
|
+
return result;
|
|
2970
|
+
});
|
|
2971
|
+
|
|
2972
|
+
// src/pipeline/steps/download.ts
|
|
2973
|
+
import { writeFileSync, mkdirSync, existsSync as existsSync2 } from "fs";
|
|
2974
|
+
import { join as join2, basename } from "path";
|
|
2975
|
+
import { execSync } from "child_process";
|
|
2976
|
+
import { tmpdir } from "os";
|
|
2977
|
+
var DownloadProgressTracker = class {
|
|
2978
|
+
total;
|
|
2979
|
+
completed = 0;
|
|
2980
|
+
failed = 0;
|
|
2981
|
+
totalBytes = 0;
|
|
2982
|
+
constructor(total) {
|
|
2983
|
+
this.total = total;
|
|
2984
|
+
}
|
|
2985
|
+
success(bytes) {
|
|
2986
|
+
this.completed++;
|
|
2987
|
+
this.totalBytes += bytes;
|
|
2988
|
+
}
|
|
2989
|
+
fail() {
|
|
2990
|
+
this.completed++;
|
|
2991
|
+
this.failed++;
|
|
2992
|
+
}
|
|
2993
|
+
summary() {
|
|
2994
|
+
return `Downloaded ${this.completed - this.failed}/${this.total} files (${formatBytes(this.totalBytes)})${this.failed > 0 ? `, ${this.failed} failed` : ""}`;
|
|
2995
|
+
}
|
|
2996
|
+
};
|
|
2997
|
+
function formatBytes(bytes) {
|
|
2998
|
+
if (bytes < 1024) return `${bytes}B`;
|
|
2999
|
+
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)}KB`;
|
|
3000
|
+
return `${(bytes / (1024 * 1024)).toFixed(1)}MB`;
|
|
3001
|
+
}
|
|
3002
|
+
async function exportCookiesNetscape(ctx, domain) {
|
|
3003
|
+
if (!ctx.page) return null;
|
|
3004
|
+
try {
|
|
3005
|
+
const cookies = await ctx.page.getCookies({ domain });
|
|
3006
|
+
if (cookies.length === 0) return null;
|
|
3007
|
+
const lines = ["# Netscape HTTP Cookie File"];
|
|
3008
|
+
for (const c of cookies) {
|
|
3009
|
+
const httpOnly = c.httpOnly ? "TRUE" : "FALSE";
|
|
3010
|
+
const secure = c.secure ? "TRUE" : "FALSE";
|
|
3011
|
+
const expires = c.expires ? Math.floor(c.expires) : 0;
|
|
3012
|
+
lines.push(`${c.domain || domain} TRUE ${c.path || "/"} ${secure} ${expires} ${c.name} ${c.value}`);
|
|
3013
|
+
}
|
|
3014
|
+
const tmpFile = join2(tmpdir(), `lobster-cookies-${Date.now()}.txt`);
|
|
3015
|
+
writeFileSync(tmpFile, lines.join("\n"));
|
|
3016
|
+
return tmpFile;
|
|
3017
|
+
} catch {
|
|
3018
|
+
return null;
|
|
3019
|
+
}
|
|
3020
|
+
}
|
|
3021
|
+
function hasYtDlp() {
|
|
3022
|
+
try {
|
|
3023
|
+
execSync("yt-dlp --version", { stdio: "pipe" });
|
|
3024
|
+
return true;
|
|
3025
|
+
} catch {
|
|
3026
|
+
return false;
|
|
3027
|
+
}
|
|
3028
|
+
}
|
|
3029
|
+
function downloadWithYtDlp(url, dir, opts) {
|
|
3030
|
+
const args = ["yt-dlp", "-o", join2(dir, opts.filename || "%(title)s.%(ext)s")];
|
|
3031
|
+
if (opts.format) args.push("-f", opts.format);
|
|
3032
|
+
if (opts.cookieFile) args.push("--cookies", opts.cookieFile);
|
|
3033
|
+
args.push("--no-warnings", "--no-progress", url);
|
|
3034
|
+
try {
|
|
3035
|
+
execSync(args.join(" "), { stdio: "pipe", timeout: 3e5 });
|
|
3036
|
+
return { url, file: dir, success: true };
|
|
3037
|
+
} catch (err) {
|
|
3038
|
+
return { url, file: "", success: false, error: err.message?.slice(0, 200) };
|
|
3039
|
+
}
|
|
3040
|
+
}
|
|
3041
|
+
async function downloadPool(items, concurrency, fn) {
|
|
3042
|
+
const results = new Array(items.length);
|
|
3043
|
+
let idx = 0;
|
|
3044
|
+
async function worker() {
|
|
3045
|
+
while (idx < items.length) {
|
|
3046
|
+
const i = idx++;
|
|
3047
|
+
results[i] = await fn(items[i], i);
|
|
3048
|
+
}
|
|
3049
|
+
}
|
|
3050
|
+
const workers = Array.from(
|
|
3051
|
+
{ length: Math.min(concurrency, items.length) },
|
|
3052
|
+
() => worker()
|
|
3053
|
+
);
|
|
3054
|
+
await Promise.all(workers);
|
|
3055
|
+
return results;
|
|
3056
|
+
}
|
|
3057
|
+
registerStep("download", async (ctx, params) => {
|
|
3058
|
+
const p = params;
|
|
3059
|
+
const dir = renderTemplate(p.dir || "./downloads", { args: ctx.args, data: ctx.data });
|
|
3060
|
+
if (!existsSync2(dir)) mkdirSync(dir, { recursive: true });
|
|
3061
|
+
const concurrency = p.concurrency || 3;
|
|
3062
|
+
const skipExisting = p.skip_existing ?? false;
|
|
3063
|
+
const timeout = (p.timeout || 60) * 1e3;
|
|
3064
|
+
const items = [];
|
|
3065
|
+
if (p.url) {
|
|
3066
|
+
items.push({
|
|
3067
|
+
url: renderTemplate(p.url, { args: ctx.args, data: ctx.data }),
|
|
3068
|
+
index: 0
|
|
3069
|
+
});
|
|
3070
|
+
} else if (Array.isArray(ctx.data)) {
|
|
3071
|
+
for (let i = 0; i < ctx.data.length; i++) {
|
|
3072
|
+
const item = ctx.data[i];
|
|
3073
|
+
const url = typeof item === "string" ? item : item.url;
|
|
3074
|
+
if (url) items.push({ url, item, index: i });
|
|
3075
|
+
}
|
|
3076
|
+
}
|
|
3077
|
+
if (items.length === 0) return [];
|
|
3078
|
+
const tracker = new DownloadProgressTracker(items.length);
|
|
3079
|
+
if (p.video) {
|
|
3080
|
+
if (!hasYtDlp()) {
|
|
3081
|
+
throw new Error("yt-dlp not found. Install: brew install yt-dlp (mac) or pip install yt-dlp");
|
|
3082
|
+
}
|
|
3083
|
+
const cookieFile = await exportCookiesNetscape(ctx, new URL(items[0].url).hostname);
|
|
3084
|
+
const results2 = await downloadPool(items, Math.min(concurrency, 2), async (entry) => {
|
|
3085
|
+
const result = downloadWithYtDlp(entry.url, dir, {
|
|
3086
|
+
format: p.format,
|
|
3087
|
+
cookieFile,
|
|
3088
|
+
filename: p.filename ? renderTemplate(p.filename, { args: ctx.args, item: entry.item, data: ctx.data, index: entry.index }) : void 0
|
|
3089
|
+
});
|
|
3090
|
+
if (result.success) tracker.success(0);
|
|
3091
|
+
else tracker.fail();
|
|
3092
|
+
return result;
|
|
3093
|
+
});
|
|
3094
|
+
if (cookieFile) try {
|
|
3095
|
+
__require("fs").unlinkSync(cookieFile);
|
|
3096
|
+
} catch {
|
|
3097
|
+
}
|
|
3098
|
+
if (ctx.debug) console.log(tracker.summary());
|
|
3099
|
+
return results2;
|
|
3100
|
+
}
|
|
3101
|
+
if (p.content) {
|
|
3102
|
+
const results2 = await downloadPool(items, concurrency, async (entry) => {
|
|
3103
|
+
try {
|
|
3104
|
+
const resp = await fetch(entry.url, {
|
|
3105
|
+
signal: AbortSignal.timeout(timeout)
|
|
3106
|
+
});
|
|
3107
|
+
if (!resp.ok) return { url: entry.url, file: "", success: false, error: `HTTP ${resp.status}` };
|
|
3108
|
+
let content;
|
|
3109
|
+
const html = await resp.text();
|
|
3110
|
+
if (p.content === "html") {
|
|
3111
|
+
content = html;
|
|
3112
|
+
} else if (p.content === "text") {
|
|
3113
|
+
content = html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim();
|
|
3114
|
+
} else if (p.content === "json") {
|
|
3115
|
+
try {
|
|
3116
|
+
content = JSON.stringify(JSON.parse(html), null, 2);
|
|
3117
|
+
} catch {
|
|
3118
|
+
content = html;
|
|
3119
|
+
}
|
|
3120
|
+
} else {
|
|
3121
|
+
content = html.replace(/<h[1-6][^>]*>(.*?)<\/h[1-6]>/gi, "\n## $1\n").replace(/<p[^>]*>(.*?)<\/p>/gi, "\n$1\n").replace(/<a[^>]*href="([^"]*)"[^>]*>(.*?)<\/a>/gi, "[$2]($1)").replace(/<[^>]+>/g, "").replace(/\n{3,}/g, "\n\n").trim();
|
|
3122
|
+
}
|
|
3123
|
+
const filename = p.filename ? renderTemplate(p.filename, { args: ctx.args, item: entry.item, data: ctx.data, index: entry.index }) : basename(new URL(entry.url).pathname).replace(/\.[^.]+$/, "") + (p.content === "json" ? ".json" : ".md");
|
|
3124
|
+
const filepath = join2(dir, filename);
|
|
3125
|
+
if (p.metadata) {
|
|
3126
|
+
const meta = renderTemplate(p.metadata, { args: ctx.args, item: entry.item, data: ctx.data, index: entry.index });
|
|
3127
|
+
const header = Object.entries(meta).map(([k, v]) => `${k}: ${v}`).join("\n");
|
|
3128
|
+
content = `---
|
|
3129
|
+
${header}
|
|
3130
|
+
---
|
|
3131
|
+
|
|
3132
|
+
${content}`;
|
|
3133
|
+
}
|
|
3134
|
+
writeFileSync(filepath, content, "utf-8");
|
|
3135
|
+
tracker.success(Buffer.byteLength(content));
|
|
3136
|
+
return { url: entry.url, file: filepath, success: true, size: Buffer.byteLength(content), content: content.slice(0, 200) };
|
|
3137
|
+
} catch (err) {
|
|
3138
|
+
tracker.fail();
|
|
3139
|
+
return { url: entry.url, file: "", success: false, error: err.message };
|
|
3140
|
+
}
|
|
3141
|
+
});
|
|
3142
|
+
if (ctx.debug) console.log(tracker.summary());
|
|
3143
|
+
return results2;
|
|
3144
|
+
}
|
|
3145
|
+
const results = await downloadPool(items, concurrency, async (entry) => {
|
|
3146
|
+
try {
|
|
3147
|
+
const filename = p.filename ? renderTemplate(p.filename, { args: ctx.args, item: entry.item, data: ctx.data, index: entry.index }) : decodeURIComponent(basename(new URL(entry.url).pathname)) || `download-${entry.index}`;
|
|
3148
|
+
const filepath = join2(dir, filename);
|
|
3149
|
+
if (skipExisting && existsSync2(filepath)) {
|
|
3150
|
+
tracker.success(0);
|
|
3151
|
+
return { url: entry.url, file: filepath, success: true, size: 0 };
|
|
3152
|
+
}
|
|
3153
|
+
const resp = await fetch(entry.url, {
|
|
3154
|
+
signal: AbortSignal.timeout(timeout)
|
|
3155
|
+
});
|
|
3156
|
+
if (!resp.ok) {
|
|
3157
|
+
tracker.fail();
|
|
3158
|
+
return { url: entry.url, file: "", success: false, error: `HTTP ${resp.status}` };
|
|
3159
|
+
}
|
|
3160
|
+
const buffer = Buffer.from(await resp.arrayBuffer());
|
|
3161
|
+
writeFileSync(filepath, buffer);
|
|
3162
|
+
tracker.success(buffer.length);
|
|
3163
|
+
return { url: entry.url, file: filepath, success: true, size: buffer.length };
|
|
3164
|
+
} catch (err) {
|
|
3165
|
+
tracker.fail();
|
|
3166
|
+
return { url: entry.url, file: "", success: false, error: err.message };
|
|
3167
|
+
}
|
|
3168
|
+
});
|
|
3169
|
+
if (ctx.debug) console.log(tracker.summary());
|
|
3170
|
+
return results;
|
|
3171
|
+
});
|
|
3172
|
+
|
|
3173
|
+
// src/pipeline/steps/tap.ts
|
|
3174
|
+
registerStep("tap", async (ctx, params) => {
|
|
3175
|
+
if (!ctx.page) throw new Error("Browser page required for tap step");
|
|
3176
|
+
const p = renderTemplate(params, { args: ctx.args, data: ctx.data });
|
|
3177
|
+
const storeName = p.store;
|
|
3178
|
+
const actionName = p.action;
|
|
3179
|
+
const capturePattern = p.capture;
|
|
3180
|
+
const actionArgs = p.args || [];
|
|
3181
|
+
const timeoutSec = p.timeout || 5;
|
|
3182
|
+
const selectPath = p.select;
|
|
3183
|
+
const result = await ctx.page.evaluate(`
|
|
3184
|
+
(async () => {
|
|
3185
|
+
let captured = null;
|
|
3186
|
+
let captureResolve;
|
|
3187
|
+
const capturePromise = new Promise(r => { captureResolve = r; });
|
|
3188
|
+
const capturePattern = ${JSON.stringify(capturePattern)};
|
|
3189
|
+
const timeoutMs = ${timeoutSec * 1e3};
|
|
3190
|
+
|
|
3191
|
+
// 1. Patch fetch
|
|
3192
|
+
const origFetch = window.fetch;
|
|
3193
|
+
window.fetch = async function(...args) {
|
|
3194
|
+
const resp = await origFetch.apply(this, args);
|
|
3195
|
+
try {
|
|
3196
|
+
const url = typeof args[0] === 'string' ? args[0] : args[0]?.url || '';
|
|
3197
|
+
if (url.includes(capturePattern) && !captured) {
|
|
3198
|
+
captured = await resp.clone().json();
|
|
3199
|
+
captureResolve();
|
|
3200
|
+
}
|
|
3201
|
+
} catch {}
|
|
3202
|
+
return resp;
|
|
3203
|
+
};
|
|
3204
|
+
|
|
3205
|
+
// 2. Patch XHR
|
|
3206
|
+
const origSend = XMLHttpRequest.prototype.send;
|
|
3207
|
+
const origOpen = XMLHttpRequest.prototype.open;
|
|
3208
|
+
XMLHttpRequest.prototype.open = function(method, url, ...rest) {
|
|
3209
|
+
this.__tapUrl = url;
|
|
3210
|
+
return origOpen.call(this, method, url, ...rest);
|
|
3211
|
+
};
|
|
3212
|
+
XMLHttpRequest.prototype.send = function(...args) {
|
|
3213
|
+
this.addEventListener('load', function() {
|
|
3214
|
+
if (this.__tapUrl?.includes(capturePattern) && !captured) {
|
|
3215
|
+
try {
|
|
3216
|
+
captured = JSON.parse(this.responseText);
|
|
3217
|
+
captureResolve();
|
|
3218
|
+
} catch {}
|
|
3219
|
+
}
|
|
3220
|
+
});
|
|
3221
|
+
return origSend.apply(this, args);
|
|
3222
|
+
};
|
|
3223
|
+
|
|
3224
|
+
try {
|
|
3225
|
+
// 3. Find the store
|
|
3226
|
+
const app = document.querySelector('#app');
|
|
3227
|
+
let store = null;
|
|
3228
|
+
|
|
3229
|
+
// Try Pinia via __vue_app__
|
|
3230
|
+
if (app?.__vue_app__) {
|
|
3231
|
+
const pinia = app.__vue_app__.config?.globalProperties?.$pinia;
|
|
3232
|
+
if (pinia?._s) {
|
|
3233
|
+
store = pinia._s.get(${JSON.stringify(storeName)});
|
|
3234
|
+
}
|
|
3235
|
+
// Try Vuex
|
|
3236
|
+
if (!store) {
|
|
3237
|
+
const vuex = app.__vue_app__.config?.globalProperties?.$store;
|
|
3238
|
+
if (vuex) store = vuex;
|
|
3239
|
+
}
|
|
3240
|
+
}
|
|
3241
|
+
|
|
3242
|
+
// Fallback: global pinia
|
|
3243
|
+
if (!store && window.__pinia?._s) {
|
|
3244
|
+
store = window.__pinia._s.get(${JSON.stringify(storeName)});
|
|
3245
|
+
}
|
|
3246
|
+
|
|
3247
|
+
if (!store) {
|
|
3248
|
+
return { error: 'Store not found: ' + ${JSON.stringify(storeName)} };
|
|
3249
|
+
}
|
|
3250
|
+
|
|
3251
|
+
// 4. Call the action
|
|
3252
|
+
const actionFn = store[${JSON.stringify(actionName)}];
|
|
3253
|
+
if (typeof actionFn !== 'function') {
|
|
3254
|
+
return { error: 'Action not found: ' + ${JSON.stringify(actionName)} };
|
|
3255
|
+
}
|
|
3256
|
+
|
|
3257
|
+
await actionFn.apply(store, ${JSON.stringify(actionArgs)});
|
|
3258
|
+
|
|
3259
|
+
// 5. Wait for capture
|
|
3260
|
+
if (!captured) {
|
|
3261
|
+
await Promise.race([
|
|
3262
|
+
capturePromise,
|
|
3263
|
+
new Promise(r => setTimeout(r, timeoutMs)),
|
|
3264
|
+
]);
|
|
3265
|
+
}
|
|
3266
|
+
} finally {
|
|
3267
|
+
// 6. Restore originals
|
|
3268
|
+
window.fetch = origFetch;
|
|
3269
|
+
XMLHttpRequest.prototype.send = origSend;
|
|
3270
|
+
XMLHttpRequest.prototype.open = origOpen;
|
|
3271
|
+
}
|
|
3272
|
+
|
|
3273
|
+
return captured;
|
|
3274
|
+
})()
|
|
3275
|
+
`);
|
|
3276
|
+
if (!result) return null;
|
|
3277
|
+
if (result?.error) {
|
|
3278
|
+
throw new Error(result.error);
|
|
3279
|
+
}
|
|
3280
|
+
if (selectPath) {
|
|
3281
|
+
let current = result;
|
|
3282
|
+
for (const part of selectPath.split(".")) {
|
|
3283
|
+
if (current === null || current === void 0) return null;
|
|
3284
|
+
current = current[part];
|
|
3285
|
+
}
|
|
3286
|
+
return current;
|
|
3287
|
+
}
|
|
3288
|
+
return result;
|
|
3289
|
+
});
|
|
3290
|
+
|
|
3291
|
+
// src/pipeline/executor.ts
|
|
3292
|
+
async function executePipeline(steps, page, args, debug = false) {
|
|
3293
|
+
const ctx = { page, args, data: null, debug };
|
|
3294
|
+
for (let i = 0; i < steps.length; i++) {
|
|
3295
|
+
const stepDef = steps[i];
|
|
3296
|
+
const [stepName, params] = Object.entries(stepDef)[0];
|
|
3297
|
+
const handler = getStep(stepName);
|
|
3298
|
+
if (!handler) {
|
|
3299
|
+
throw new Error(`Unknown pipeline step: ${stepName}`);
|
|
3300
|
+
}
|
|
3301
|
+
if (debug) {
|
|
3302
|
+
log.step(i + 1, `${stepName}`);
|
|
3303
|
+
}
|
|
3304
|
+
ctx.data = await handler(ctx, params);
|
|
3305
|
+
if (debug && ctx.data !== void 0) {
|
|
3306
|
+
const preview = JSON.stringify(ctx.data)?.slice(0, 200);
|
|
3307
|
+
log.dim(` \u2192 ${preview}...`);
|
|
3308
|
+
}
|
|
3309
|
+
}
|
|
3310
|
+
return ctx.data;
|
|
3311
|
+
}
|
|
3312
|
+
|
|
3313
|
+
// src/utils/timeout.ts
|
|
3314
|
+
function runWithTimeout(promise, timeoutMs, label = "Operation") {
|
|
3315
|
+
return new Promise((resolve, reject) => {
|
|
3316
|
+
const timer = setTimeout(
|
|
3317
|
+
() => reject(new Error(`${label} timed out after ${timeoutMs}ms`)),
|
|
3318
|
+
timeoutMs
|
|
3319
|
+
);
|
|
3320
|
+
promise.then((result) => {
|
|
3321
|
+
clearTimeout(timer);
|
|
3322
|
+
resolve(result);
|
|
3323
|
+
}).catch((err) => {
|
|
3324
|
+
clearTimeout(timer);
|
|
3325
|
+
reject(err);
|
|
3326
|
+
});
|
|
3327
|
+
});
|
|
3328
|
+
}
|
|
3329
|
+
|
|
3330
|
+
// src/router/index.ts
|
|
3331
|
+
var SmartRouter = class {
|
|
3332
|
+
config;
|
|
3333
|
+
browserManager;
|
|
3334
|
+
constructor(config) {
|
|
3335
|
+
this.config = config;
|
|
3336
|
+
this.browserManager = new BrowserManager({
|
|
3337
|
+
executablePath: config.browser.executablePath || void 0,
|
|
3338
|
+
headless: config.browser.headless,
|
|
3339
|
+
cdpEndpoint: config.browser.cdpEndpoint || void 0
|
|
3340
|
+
});
|
|
3341
|
+
}
|
|
3342
|
+
async execute(request) {
|
|
3343
|
+
const decision = makeRoutingDecision(request);
|
|
3344
|
+
log.debug(`Routing: Level ${ExecutionLevel[decision.level]} \u2014 ${decision.reason}`);
|
|
3345
|
+
try {
|
|
3346
|
+
switch (decision.level) {
|
|
3347
|
+
case 0 /* HTTP */:
|
|
3348
|
+
return await this.executeHttp(request);
|
|
3349
|
+
case 2 /* ADAPTER */:
|
|
3350
|
+
return await this.executeAdapter(request, decision);
|
|
3351
|
+
case 1 /* BROWSER */:
|
|
3352
|
+
case 3 /* AGENT */:
|
|
3353
|
+
return await this.executeAgent(request);
|
|
3354
|
+
}
|
|
3355
|
+
} catch (err) {
|
|
3356
|
+
if (decision.level < 3 /* AGENT */) {
|
|
3357
|
+
log.warn(`Level ${ExecutionLevel[decision.level]} failed, escalating...`);
|
|
3358
|
+
return this.execute({
|
|
3359
|
+
...request,
|
|
3360
|
+
task: request.task || `Fetch content from ${request.url}`
|
|
3361
|
+
});
|
|
3362
|
+
}
|
|
3363
|
+
throw err;
|
|
3364
|
+
}
|
|
3365
|
+
}
|
|
3366
|
+
async executeHttp(request) {
|
|
3367
|
+
if (!request.url) throw new Error("URL required for HTTP execution");
|
|
3368
|
+
const result = await directFetch(request.url);
|
|
3369
|
+
return { data: result.body, format: request.format || "json" };
|
|
3370
|
+
}
|
|
3371
|
+
async executeAdapter(request, decision) {
|
|
3372
|
+
const adapter = decision.adapter;
|
|
3373
|
+
const args = request.args || {};
|
|
3374
|
+
for (const arg of adapter.args) {
|
|
3375
|
+
if (args[arg.name] === void 0 && arg.default !== void 0) {
|
|
3376
|
+
args[arg.name] = arg.default;
|
|
3377
|
+
}
|
|
3378
|
+
}
|
|
3379
|
+
let data;
|
|
3380
|
+
if (adapter.pipeline) {
|
|
3381
|
+
let page = null;
|
|
3382
|
+
if (adapter.browser !== false && adapter.strategy !== "public" /* PUBLIC */) {
|
|
3383
|
+
const rawPage = await this.browserManager.newPage();
|
|
3384
|
+
page = new PuppeteerPage(rawPage);
|
|
3385
|
+
if (adapter.domain) {
|
|
3386
|
+
await page.goto(`https://${adapter.domain}`);
|
|
3387
|
+
}
|
|
3388
|
+
}
|
|
3389
|
+
try {
|
|
3390
|
+
data = await executePipeline(adapter.pipeline, page, args);
|
|
3391
|
+
} finally {
|
|
3392
|
+
if (page) await page.close();
|
|
3393
|
+
}
|
|
3394
|
+
} else if (adapter.func) {
|
|
3395
|
+
const rawPage = await this.browserManager.newPage();
|
|
3396
|
+
const page = new PuppeteerPage(rawPage);
|
|
3397
|
+
try {
|
|
3398
|
+
if (adapter.domain) {
|
|
3399
|
+
await page.goto(`https://${adapter.domain}`);
|
|
3400
|
+
}
|
|
3401
|
+
data = await runWithTimeout(
|
|
3402
|
+
adapter.func(page, args),
|
|
3403
|
+
(adapter.timeoutSeconds || this.config.browser.commandTimeout) * 1e3,
|
|
3404
|
+
`${adapter.site}/${adapter.name}`
|
|
3405
|
+
);
|
|
3406
|
+
} finally {
|
|
3407
|
+
await page.close();
|
|
3408
|
+
}
|
|
3409
|
+
} else {
|
|
3410
|
+
throw new Error(`Adapter ${adapter.site}/${adapter.name} has neither func nor pipeline`);
|
|
3411
|
+
}
|
|
3412
|
+
return { data, format: request.format || "table" };
|
|
3413
|
+
}
|
|
3414
|
+
async executeAgent(request) {
|
|
3415
|
+
if (!this.config.llm.apiKey) {
|
|
3416
|
+
throw new Error("LLM API key required for agent mode. Run: lobster config set llm.apiKey <key>");
|
|
3417
|
+
}
|
|
3418
|
+
const rawPage = await this.browserManager.newPage();
|
|
3419
|
+
const page = new PuppeteerPage(rawPage);
|
|
3420
|
+
try {
|
|
3421
|
+
if (request.url) {
|
|
3422
|
+
await page.goto(request.url);
|
|
3423
|
+
}
|
|
3424
|
+
const agent = new AgentCore(page, {
|
|
3425
|
+
llm: this.config.llm,
|
|
3426
|
+
maxSteps: this.config.agent.maxSteps,
|
|
3427
|
+
stepDelay: this.config.agent.stepDelay
|
|
3428
|
+
});
|
|
3429
|
+
const task = request.task || `Extract content from ${request.url}`;
|
|
3430
|
+
const result = await agent.execute(task);
|
|
3431
|
+
return { data: result.data, format: request.format || "json" };
|
|
3432
|
+
} finally {
|
|
3433
|
+
await page.close();
|
|
3434
|
+
}
|
|
3435
|
+
}
|
|
3436
|
+
async close() {
|
|
3437
|
+
await this.browserManager.close();
|
|
3438
|
+
}
|
|
3439
|
+
};
|
|
3440
|
+
export {
|
|
3441
|
+
SmartRouter
|
|
3442
|
+
};
|
|
3443
|
+
//# sourceMappingURL=index.js.map
|