lobster-cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +389 -0
- package/dist/agent/core.js +1013 -0
- package/dist/agent/core.js.map +1 -0
- package/dist/agent/index.js +1027 -0
- package/dist/agent/index.js.map +1 -0
- package/dist/brain/index.js +60 -0
- package/dist/brain/index.js.map +1 -0
- package/dist/browser/dom/index.js +1096 -0
- package/dist/browser/dom/index.js.map +1 -0
- package/dist/browser/index.js +2034 -0
- package/dist/browser/index.js.map +1 -0
- package/dist/browser/manager.js +86 -0
- package/dist/browser/manager.js.map +1 -0
- package/dist/browser/page-adapter.js +1345 -0
- package/dist/browser/page-adapter.js.map +1 -0
- package/dist/cascade/index.js +138 -0
- package/dist/cascade/index.js.map +1 -0
- package/dist/config/index.js +110 -0
- package/dist/config/index.js.map +1 -0
- package/dist/config/schema.js +66 -0
- package/dist/config/schema.js.map +1 -0
- package/dist/discover/index.js +545 -0
- package/dist/discover/index.js.map +1 -0
- package/dist/index.js +5529 -0
- package/dist/index.js.map +1 -0
- package/dist/lib.js +4206 -0
- package/dist/lib.js.map +1 -0
- package/dist/llm/client.js +379 -0
- package/dist/llm/client.js.map +1 -0
- package/dist/llm/index.js +397 -0
- package/dist/llm/index.js.map +1 -0
- package/dist/llm/openai-client.js +214 -0
- package/dist/llm/openai-client.js.map +1 -0
- package/dist/output/index.js +93 -0
- package/dist/output/index.js.map +1 -0
- package/dist/pipeline/index.js +802 -0
- package/dist/pipeline/index.js.map +1 -0
- package/dist/router/decision.js +80 -0
- package/dist/router/decision.js.map +1 -0
- package/dist/router/index.js +3443 -0
- package/dist/router/index.js.map +1 -0
- package/dist/types/index.js +23 -0
- package/dist/types/index.js.map +1 -0
- package/logo.svg +11 -0
- package/package.json +65 -0
|
@@ -0,0 +1,802 @@
|
|
|
1
|
+
var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
|
|
2
|
+
get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
|
|
3
|
+
}) : x)(function(x) {
|
|
4
|
+
if (typeof require !== "undefined") return require.apply(this, arguments);
|
|
5
|
+
throw Error('Dynamic require of "' + x + '" is not supported');
|
|
6
|
+
});
|
|
7
|
+
|
|
8
|
+
// src/pipeline/template.ts
|
|
9
|
+
var EXPR_RE = /\$\{\{\s*(.*?)\s*\}\}/g;
|
|
10
|
+
function renderTemplate(template, ctx) {
|
|
11
|
+
if (typeof template !== "string") {
|
|
12
|
+
if (typeof template === "object" && template !== null) {
|
|
13
|
+
if (Array.isArray(template)) return template.map((v) => renderTemplate(v, ctx));
|
|
14
|
+
const result = {};
|
|
15
|
+
for (const [k, v] of Object.entries(template)) {
|
|
16
|
+
result[k] = renderTemplate(v, ctx);
|
|
17
|
+
}
|
|
18
|
+
return result;
|
|
19
|
+
}
|
|
20
|
+
return template;
|
|
21
|
+
}
|
|
22
|
+
const fullMatch = template.match(/^\$\{\{\s*(.*?)\s*\}\}$/);
|
|
23
|
+
if (fullMatch) {
|
|
24
|
+
return evaluateExpression(fullMatch[1], ctx);
|
|
25
|
+
}
|
|
26
|
+
return template.replace(EXPR_RE, (_, expr) => {
|
|
27
|
+
const val = evaluateExpression(expr, ctx);
|
|
28
|
+
return val === null || val === void 0 ? "" : String(val);
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
function evaluateExpression(expr, ctx) {
|
|
32
|
+
const parts = expr.split(/\s*\|\s*/);
|
|
33
|
+
let value = resolveValue(parts[0].trim(), ctx);
|
|
34
|
+
for (let i = 1; i < parts.length; i++) {
|
|
35
|
+
value = applyFilter(value, parts[i].trim());
|
|
36
|
+
}
|
|
37
|
+
return value;
|
|
38
|
+
}
|
|
39
|
+
function resolveValue(path, ctx) {
|
|
40
|
+
const arithMatch = path.match(/^(\w[\w.]*)\s*([+\-*])\s*(\d+)$/);
|
|
41
|
+
if (arithMatch) {
|
|
42
|
+
const base = Number(resolvePath(arithMatch[1], ctx));
|
|
43
|
+
const op = arithMatch[2];
|
|
44
|
+
const num = Number(arithMatch[3]);
|
|
45
|
+
if (op === "+") return base + num;
|
|
46
|
+
if (op === "-") return base - num;
|
|
47
|
+
if (op === "*") return base * num;
|
|
48
|
+
}
|
|
49
|
+
const orMatch = path.match(/^(.+?)\s*\|\|\s*(.+)$/);
|
|
50
|
+
if (orMatch) {
|
|
51
|
+
const left = resolvePath(orMatch[1].trim(), ctx);
|
|
52
|
+
if (left !== null && left !== void 0 && left !== "" && left !== false) return left;
|
|
53
|
+
const right = orMatch[2].trim();
|
|
54
|
+
if (right.startsWith("'") && right.endsWith("'") || right.startsWith('"') && right.endsWith('"')) {
|
|
55
|
+
return right.slice(1, -1);
|
|
56
|
+
}
|
|
57
|
+
return resolvePath(right, ctx);
|
|
58
|
+
}
|
|
59
|
+
if (path.startsWith("'") && path.endsWith("'") || path.startsWith('"') && path.endsWith('"')) {
|
|
60
|
+
return path.slice(1, -1);
|
|
61
|
+
}
|
|
62
|
+
if (!isNaN(Number(path)) && path !== "") return Number(path);
|
|
63
|
+
return resolvePath(path, ctx);
|
|
64
|
+
}
|
|
65
|
+
function resolvePath(path, ctx) {
|
|
66
|
+
if (path === "index") return ctx.index ?? 0;
|
|
67
|
+
const parts = path.split(".");
|
|
68
|
+
let root;
|
|
69
|
+
if (parts[0] === "args") {
|
|
70
|
+
root = ctx.args;
|
|
71
|
+
parts.shift();
|
|
72
|
+
} else if (parts[0] === "item") {
|
|
73
|
+
root = ctx.item;
|
|
74
|
+
parts.shift();
|
|
75
|
+
} else if (parts[0] === "data") {
|
|
76
|
+
root = ctx.data;
|
|
77
|
+
parts.shift();
|
|
78
|
+
} else {
|
|
79
|
+
root = getNestedValue(ctx.item, parts);
|
|
80
|
+
if (root !== void 0) return root;
|
|
81
|
+
root = getNestedValue(ctx.args, parts);
|
|
82
|
+
if (root !== void 0) return root;
|
|
83
|
+
root = getNestedValue(ctx.data, parts);
|
|
84
|
+
if (root !== void 0) return root;
|
|
85
|
+
return void 0;
|
|
86
|
+
}
|
|
87
|
+
return getNestedValue(root, parts);
|
|
88
|
+
}
|
|
89
|
+
function getNestedValue(obj, parts) {
|
|
90
|
+
let current = obj;
|
|
91
|
+
for (const part of parts) {
|
|
92
|
+
if (current === null || current === void 0) return void 0;
|
|
93
|
+
if (typeof current === "object") {
|
|
94
|
+
current = current[part];
|
|
95
|
+
} else {
|
|
96
|
+
return void 0;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
return current;
|
|
100
|
+
}
|
|
101
|
+
function applyFilter(value, filter) {
|
|
102
|
+
const match = filter.match(/^(\w+)(?:\((.+)\))?$/);
|
|
103
|
+
if (!match) return value;
|
|
104
|
+
const name = match[1];
|
|
105
|
+
const arg = match[2]?.replace(/^['"]|['"]$/g, "");
|
|
106
|
+
switch (name) {
|
|
107
|
+
case "default":
|
|
108
|
+
return value === null || value === void 0 || value === "" ? arg : value;
|
|
109
|
+
case "join":
|
|
110
|
+
return Array.isArray(value) ? value.join(arg || ", ") : value;
|
|
111
|
+
case "upper":
|
|
112
|
+
return typeof value === "string" ? value.toUpperCase() : value;
|
|
113
|
+
case "lower":
|
|
114
|
+
return typeof value === "string" ? value.toLowerCase() : value;
|
|
115
|
+
case "trim":
|
|
116
|
+
return typeof value === "string" ? value.trim() : value;
|
|
117
|
+
case "truncate": {
|
|
118
|
+
const len = parseInt(arg || "100");
|
|
119
|
+
if (typeof value === "string" && value.length > len) return value.slice(0, len) + "...";
|
|
120
|
+
return value;
|
|
121
|
+
}
|
|
122
|
+
case "replace": {
|
|
123
|
+
if (typeof value !== "string" || !arg) return value;
|
|
124
|
+
const [from, to] = arg.split(",").map((s) => s.trim().replace(/^['"]|['"]$/g, ""));
|
|
125
|
+
return value.replaceAll(from, to || "");
|
|
126
|
+
}
|
|
127
|
+
case "keys":
|
|
128
|
+
return typeof value === "object" && value !== null ? Object.keys(value) : [];
|
|
129
|
+
case "length":
|
|
130
|
+
return Array.isArray(value) ? value.length : typeof value === "string" ? value.length : 0;
|
|
131
|
+
case "first":
|
|
132
|
+
return Array.isArray(value) ? value[0] : value;
|
|
133
|
+
case "last":
|
|
134
|
+
return Array.isArray(value) ? value[value.length - 1] : value;
|
|
135
|
+
case "json":
|
|
136
|
+
return JSON.stringify(value);
|
|
137
|
+
case "slugify":
|
|
138
|
+
return typeof value === "string" ? value.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-|-$/g, "") : value;
|
|
139
|
+
case "sanitize":
|
|
140
|
+
return typeof value === "string" ? value.replace(/[<>:"/\\|?*\x00-\x1f]/g, "_").replace(/__+/g, "_").trim() : value;
|
|
141
|
+
case "ext": {
|
|
142
|
+
if (typeof value !== "string") return "";
|
|
143
|
+
const extMatch = value.match(/\.([a-zA-Z0-9]+)(?:\?.*)?$/);
|
|
144
|
+
return extMatch ? extMatch[1] : "";
|
|
145
|
+
}
|
|
146
|
+
case "basename": {
|
|
147
|
+
if (typeof value !== "string") return "";
|
|
148
|
+
try {
|
|
149
|
+
return new URL(value).pathname.split("/").pop() || "";
|
|
150
|
+
} catch {
|
|
151
|
+
}
|
|
152
|
+
return value.split("/").pop() || "";
|
|
153
|
+
}
|
|
154
|
+
default:
|
|
155
|
+
return value;
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// src/pipeline/registry.ts
|
|
160
|
+
var stepHandlers = /* @__PURE__ */ new Map();
|
|
161
|
+
function registerStep(name, handler) {
|
|
162
|
+
stepHandlers.set(name, handler);
|
|
163
|
+
}
|
|
164
|
+
function getStep(name) {
|
|
165
|
+
return stepHandlers.get(name);
|
|
166
|
+
}
|
|
167
|
+
function getStepNames() {
|
|
168
|
+
return [...stepHandlers.keys()];
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// src/pipeline/steps/fetch.ts
|
|
172
|
+
async function fetchBatchInBrowser(page, urls, method, headers, concurrency) {
|
|
173
|
+
const headersJs = JSON.stringify(headers);
|
|
174
|
+
const urlsJs = JSON.stringify(urls);
|
|
175
|
+
return page.evaluate(`
|
|
176
|
+
(async () => {
|
|
177
|
+
const urls = ${urlsJs};
|
|
178
|
+
const method = ${JSON.stringify(method)};
|
|
179
|
+
const headers = ${headersJs};
|
|
180
|
+
const concurrency = ${concurrency};
|
|
181
|
+
|
|
182
|
+
const results = new Array(urls.length);
|
|
183
|
+
let idx = 0;
|
|
184
|
+
|
|
185
|
+
async function worker() {
|
|
186
|
+
while (idx < urls.length) {
|
|
187
|
+
const i = idx++;
|
|
188
|
+
try {
|
|
189
|
+
const resp = await fetch(urls[i], { method, headers, credentials: "include" });
|
|
190
|
+
results[i] = await resp.json();
|
|
191
|
+
} catch (e) {
|
|
192
|
+
results[i] = { error: e.message };
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
const workers = Array.from({ length: Math.min(concurrency, urls.length) }, () => worker());
|
|
198
|
+
await Promise.all(workers);
|
|
199
|
+
return results;
|
|
200
|
+
})()
|
|
201
|
+
`);
|
|
202
|
+
}
|
|
203
|
+
async function mapConcurrent(items, limit, fn) {
|
|
204
|
+
const results = new Array(items.length);
|
|
205
|
+
let index = 0;
|
|
206
|
+
async function worker() {
|
|
207
|
+
while (index < items.length) {
|
|
208
|
+
const i = index++;
|
|
209
|
+
results[i] = await fn(items[i], i);
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
const workers = Array.from({ length: Math.min(limit, items.length) }, () => worker());
|
|
213
|
+
await Promise.all(workers);
|
|
214
|
+
return results;
|
|
215
|
+
}
|
|
216
|
+
async function fetchSingle(page, url, method, headers) {
|
|
217
|
+
if (page) {
|
|
218
|
+
const headersJs = JSON.stringify(headers);
|
|
219
|
+
const urlJs = JSON.stringify(url);
|
|
220
|
+
const methodJs = JSON.stringify(method);
|
|
221
|
+
return page.evaluate(`
|
|
222
|
+
(async () => {
|
|
223
|
+
const resp = await fetch(${urlJs}, {
|
|
224
|
+
method: ${methodJs}, headers: ${headersJs}, credentials: "include"
|
|
225
|
+
});
|
|
226
|
+
return await resp.json();
|
|
227
|
+
})()
|
|
228
|
+
`);
|
|
229
|
+
}
|
|
230
|
+
const resp = await fetch(url, { method, headers });
|
|
231
|
+
return resp.json();
|
|
232
|
+
}
|
|
233
|
+
registerStep("fetch", async (ctx, params) => {
|
|
234
|
+
const data = ctx.data;
|
|
235
|
+
const urlOrObj = typeof params === "string" ? params : params?.url ?? "";
|
|
236
|
+
const method = params?.method || "GET";
|
|
237
|
+
const rawHeaders = params?.headers ?? {};
|
|
238
|
+
const rawParams = params?.params ?? {};
|
|
239
|
+
const urlTemplate = String(urlOrObj);
|
|
240
|
+
const headers = {};
|
|
241
|
+
for (const [k, v] of Object.entries(rawHeaders)) {
|
|
242
|
+
headers[k] = String(renderTemplate(v, { args: ctx.args, data }));
|
|
243
|
+
}
|
|
244
|
+
if (Array.isArray(data) && urlTemplate.includes("item")) {
|
|
245
|
+
const concurrency = typeof params?.concurrency === "number" ? params.concurrency : 5;
|
|
246
|
+
const renderedParams2 = {};
|
|
247
|
+
for (const [k, v] of Object.entries(rawParams)) {
|
|
248
|
+
renderedParams2[k] = String(renderTemplate(v, { args: ctx.args, data }));
|
|
249
|
+
}
|
|
250
|
+
const urls = data.map((item, index) => {
|
|
251
|
+
let url2 = String(renderTemplate(urlTemplate, { args: ctx.args, data, item, index }));
|
|
252
|
+
if (Object.keys(renderedParams2).length > 0) {
|
|
253
|
+
const qs = new URLSearchParams(renderedParams2).toString();
|
|
254
|
+
url2 = `${url2}${url2.includes("?") ? "&" : "?"}${qs}`;
|
|
255
|
+
}
|
|
256
|
+
return url2;
|
|
257
|
+
});
|
|
258
|
+
if (ctx.page) {
|
|
259
|
+
return fetchBatchInBrowser(ctx.page, urls, method.toUpperCase(), headers, concurrency);
|
|
260
|
+
}
|
|
261
|
+
return mapConcurrent(urls, concurrency, async (url2) => {
|
|
262
|
+
return fetchSingle(null, url2, method.toUpperCase(), headers);
|
|
263
|
+
});
|
|
264
|
+
}
|
|
265
|
+
let url = String(renderTemplate(urlOrObj, { args: ctx.args, data }));
|
|
266
|
+
const renderedParams = {};
|
|
267
|
+
for (const [k, v] of Object.entries(rawParams)) {
|
|
268
|
+
renderedParams[k] = String(renderTemplate(v, { args: ctx.args, data }));
|
|
269
|
+
}
|
|
270
|
+
if (Object.keys(renderedParams).length > 0) {
|
|
271
|
+
const qs = new URLSearchParams(renderedParams).toString();
|
|
272
|
+
url = `${url}${url.includes("?") ? "&" : "?"}${qs}`;
|
|
273
|
+
}
|
|
274
|
+
return fetchSingle(ctx.page, url, method.toUpperCase(), headers);
|
|
275
|
+
});
|
|
276
|
+
|
|
277
|
+
// src/pipeline/steps/browser.ts
|
|
278
|
+
registerStep("navigate", async (ctx, params) => {
|
|
279
|
+
if (!ctx.page) throw new Error("Browser page required for navigate step");
|
|
280
|
+
const url = renderTemplate(params, { args: ctx.args, data: ctx.data });
|
|
281
|
+
await ctx.page.goto(url);
|
|
282
|
+
return ctx.data;
|
|
283
|
+
});
|
|
284
|
+
registerStep("click", async (ctx, params) => {
|
|
285
|
+
if (!ctx.page) throw new Error("Browser page required for click step");
|
|
286
|
+
const ref = renderTemplate(params, { args: ctx.args, data: ctx.data });
|
|
287
|
+
await ctx.page.click(ref);
|
|
288
|
+
return ctx.data;
|
|
289
|
+
});
|
|
290
|
+
registerStep("type", async (ctx, params) => {
|
|
291
|
+
if (!ctx.page) throw new Error("Browser page required for type step");
|
|
292
|
+
const p = renderTemplate(params, { args: ctx.args, data: ctx.data });
|
|
293
|
+
await ctx.page.typeText(p.ref, p.text);
|
|
294
|
+
if (p.submit) await ctx.page.pressKey("Enter");
|
|
295
|
+
return ctx.data;
|
|
296
|
+
});
|
|
297
|
+
registerStep("wait", async (ctx, params) => {
|
|
298
|
+
if (!ctx.page) throw new Error("Browser page required for wait step");
|
|
299
|
+
const rendered = renderTemplate(params, { args: ctx.args, data: ctx.data });
|
|
300
|
+
if (typeof rendered === "number") {
|
|
301
|
+
await ctx.page.wait(rendered);
|
|
302
|
+
} else {
|
|
303
|
+
await ctx.page.wait(rendered);
|
|
304
|
+
}
|
|
305
|
+
return ctx.data;
|
|
306
|
+
});
|
|
307
|
+
registerStep("press", async (ctx, params) => {
|
|
308
|
+
if (!ctx.page) throw new Error("Browser page required for press step");
|
|
309
|
+
const key = renderTemplate(params, { args: ctx.args, data: ctx.data });
|
|
310
|
+
await ctx.page.pressKey(key);
|
|
311
|
+
return ctx.data;
|
|
312
|
+
});
|
|
313
|
+
registerStep("snapshot", async (ctx, _params) => {
|
|
314
|
+
if (!ctx.page) throw new Error("Browser page required for snapshot step");
|
|
315
|
+
return ctx.page.snapshot();
|
|
316
|
+
});
|
|
317
|
+
registerStep("evaluate", async (ctx, params) => {
|
|
318
|
+
if (!ctx.page) throw new Error("Browser page required for evaluate step");
|
|
319
|
+
const js = renderTemplate(params, { args: ctx.args, data: ctx.data });
|
|
320
|
+
const result = await ctx.page.evaluate(js);
|
|
321
|
+
if (typeof result === "string") {
|
|
322
|
+
try {
|
|
323
|
+
return JSON.parse(result);
|
|
324
|
+
} catch {
|
|
325
|
+
return result;
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
return result;
|
|
329
|
+
});
|
|
330
|
+
|
|
331
|
+
// src/pipeline/steps/transform.ts
|
|
332
|
+
registerStep("select", async (ctx, params) => {
|
|
333
|
+
const path = renderTemplate(params, { args: ctx.args, data: ctx.data });
|
|
334
|
+
const parts = path.split(".");
|
|
335
|
+
let current = ctx.data;
|
|
336
|
+
for (const part of parts) {
|
|
337
|
+
if (current === null || current === void 0) return void 0;
|
|
338
|
+
const indexMatch = part.match(/^(\w+)\[(\d+)\]$/);
|
|
339
|
+
if (indexMatch) {
|
|
340
|
+
current = current[indexMatch[1]];
|
|
341
|
+
if (Array.isArray(current)) current = current[Number(indexMatch[2])];
|
|
342
|
+
else return void 0;
|
|
343
|
+
continue;
|
|
344
|
+
}
|
|
345
|
+
const wildcardMatch = part.match(/^(\w+)\[\*\]$/);
|
|
346
|
+
if (wildcardMatch) {
|
|
347
|
+
current = current[wildcardMatch[1]];
|
|
348
|
+
if (!Array.isArray(current)) return void 0;
|
|
349
|
+
const remaining = parts.slice(parts.indexOf(part) + 1);
|
|
350
|
+
if (remaining.length > 0) {
|
|
351
|
+
return current.map((item) => {
|
|
352
|
+
let val = item;
|
|
353
|
+
for (const r of remaining) {
|
|
354
|
+
if (val === null || val === void 0) return void 0;
|
|
355
|
+
val = val[r];
|
|
356
|
+
}
|
|
357
|
+
return val;
|
|
358
|
+
});
|
|
359
|
+
}
|
|
360
|
+
continue;
|
|
361
|
+
}
|
|
362
|
+
current = current[part];
|
|
363
|
+
}
|
|
364
|
+
return current;
|
|
365
|
+
});
|
|
366
|
+
registerStep("map", async (ctx, params) => {
|
|
367
|
+
if (!Array.isArray(ctx.data)) throw new Error("map requires array data");
|
|
368
|
+
const template = params;
|
|
369
|
+
return ctx.data.map(
|
|
370
|
+
(item, index) => renderTemplate(template, { args: ctx.args, item, data: ctx.data, index })
|
|
371
|
+
);
|
|
372
|
+
});
|
|
373
|
+
registerStep("filter", async (ctx, params) => {
|
|
374
|
+
if (!Array.isArray(ctx.data)) throw new Error("filter requires array data");
|
|
375
|
+
const expr = params;
|
|
376
|
+
return ctx.data.filter((item, index) => {
|
|
377
|
+
const result = renderTemplate(`\${{ ${expr} }}`, { args: ctx.args, item, data: ctx.data, index });
|
|
378
|
+
return Boolean(result);
|
|
379
|
+
});
|
|
380
|
+
});
|
|
381
|
+
registerStep("sort", async (ctx, params) => {
|
|
382
|
+
if (!Array.isArray(ctx.data)) throw new Error("sort requires array data");
|
|
383
|
+
const p = params;
|
|
384
|
+
const sorted = [...ctx.data].sort((a, b) => {
|
|
385
|
+
const va = a[p.by];
|
|
386
|
+
const vb = b[p.by];
|
|
387
|
+
if (typeof va === "number" && typeof vb === "number") return va - vb;
|
|
388
|
+
return String(va).localeCompare(String(vb));
|
|
389
|
+
});
|
|
390
|
+
return p.order === "desc" ? sorted.reverse() : sorted;
|
|
391
|
+
});
|
|
392
|
+
registerStep("limit", async (ctx, params) => {
|
|
393
|
+
if (!Array.isArray(ctx.data)) return ctx.data;
|
|
394
|
+
const n = renderTemplate(params, { args: ctx.args, data: ctx.data });
|
|
395
|
+
return ctx.data.slice(0, Number(n) || 20);
|
|
396
|
+
});
|
|
397
|
+
|
|
398
|
+
// src/pipeline/steps/intercept.ts
|
|
399
|
+
registerStep("intercept", async (ctx, params) => {
|
|
400
|
+
if (!ctx.page) throw new Error("Browser page required for intercept step");
|
|
401
|
+
const p = params;
|
|
402
|
+
const pattern = renderTemplate(p.pattern, { args: ctx.args, data: ctx.data });
|
|
403
|
+
await ctx.page.installInterceptor(pattern);
|
|
404
|
+
if (p.trigger) {
|
|
405
|
+
const trigger = renderTemplate(p.trigger, { args: ctx.args, data: ctx.data });
|
|
406
|
+
const [action, value] = trigger.split(":");
|
|
407
|
+
switch (action) {
|
|
408
|
+
case "navigate":
|
|
409
|
+
await ctx.page.goto(value);
|
|
410
|
+
break;
|
|
411
|
+
case "click":
|
|
412
|
+
await ctx.page.click(value);
|
|
413
|
+
break;
|
|
414
|
+
case "evaluate":
|
|
415
|
+
await ctx.page.evaluate(value);
|
|
416
|
+
break;
|
|
417
|
+
case "scroll":
|
|
418
|
+
await ctx.page.scroll("down");
|
|
419
|
+
break;
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
const timeout = p.timeout || 10;
|
|
423
|
+
await ctx.page.wait(timeout);
|
|
424
|
+
const requests = await ctx.page.getInterceptedRequests();
|
|
425
|
+
if (requests.length === 0) return ctx.data;
|
|
426
|
+
let result = requests.map((r) => r.body);
|
|
427
|
+
if (result && Array.isArray(result) && result.length === 1) result = result[0];
|
|
428
|
+
if (p.select && result) {
|
|
429
|
+
const parts = p.select.split(".");
|
|
430
|
+
let current = result;
|
|
431
|
+
for (const part of parts) {
|
|
432
|
+
if (current && typeof current === "object") {
|
|
433
|
+
current = current[part];
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
result = current;
|
|
437
|
+
}
|
|
438
|
+
return result;
|
|
439
|
+
});
|
|
440
|
+
|
|
441
|
+
// src/pipeline/steps/download.ts
|
|
442
|
+
import { writeFileSync, mkdirSync, existsSync } from "fs";
|
|
443
|
+
import { join, basename } from "path";
|
|
444
|
+
import { execSync } from "child_process";
|
|
445
|
+
import { tmpdir } from "os";
|
|
446
|
+
var DownloadProgressTracker = class {
|
|
447
|
+
total;
|
|
448
|
+
completed = 0;
|
|
449
|
+
failed = 0;
|
|
450
|
+
totalBytes = 0;
|
|
451
|
+
constructor(total) {
|
|
452
|
+
this.total = total;
|
|
453
|
+
}
|
|
454
|
+
success(bytes) {
|
|
455
|
+
this.completed++;
|
|
456
|
+
this.totalBytes += bytes;
|
|
457
|
+
}
|
|
458
|
+
fail() {
|
|
459
|
+
this.completed++;
|
|
460
|
+
this.failed++;
|
|
461
|
+
}
|
|
462
|
+
summary() {
|
|
463
|
+
return `Downloaded ${this.completed - this.failed}/${this.total} files (${formatBytes(this.totalBytes)})${this.failed > 0 ? `, ${this.failed} failed` : ""}`;
|
|
464
|
+
}
|
|
465
|
+
};
|
|
466
|
+
function formatBytes(bytes) {
|
|
467
|
+
if (bytes < 1024) return `${bytes}B`;
|
|
468
|
+
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)}KB`;
|
|
469
|
+
return `${(bytes / (1024 * 1024)).toFixed(1)}MB`;
|
|
470
|
+
}
|
|
471
|
+
async function exportCookiesNetscape(ctx, domain) {
|
|
472
|
+
if (!ctx.page) return null;
|
|
473
|
+
try {
|
|
474
|
+
const cookies = await ctx.page.getCookies({ domain });
|
|
475
|
+
if (cookies.length === 0) return null;
|
|
476
|
+
const lines = ["# Netscape HTTP Cookie File"];
|
|
477
|
+
for (const c of cookies) {
|
|
478
|
+
const httpOnly = c.httpOnly ? "TRUE" : "FALSE";
|
|
479
|
+
const secure = c.secure ? "TRUE" : "FALSE";
|
|
480
|
+
const expires = c.expires ? Math.floor(c.expires) : 0;
|
|
481
|
+
lines.push(`${c.domain || domain} TRUE ${c.path || "/"} ${secure} ${expires} ${c.name} ${c.value}`);
|
|
482
|
+
}
|
|
483
|
+
const tmpFile = join(tmpdir(), `lobster-cookies-${Date.now()}.txt`);
|
|
484
|
+
writeFileSync(tmpFile, lines.join("\n"));
|
|
485
|
+
return tmpFile;
|
|
486
|
+
} catch {
|
|
487
|
+
return null;
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
function hasYtDlp() {
|
|
491
|
+
try {
|
|
492
|
+
execSync("yt-dlp --version", { stdio: "pipe" });
|
|
493
|
+
return true;
|
|
494
|
+
} catch {
|
|
495
|
+
return false;
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
function downloadWithYtDlp(url, dir, opts) {
|
|
499
|
+
const args = ["yt-dlp", "-o", join(dir, opts.filename || "%(title)s.%(ext)s")];
|
|
500
|
+
if (opts.format) args.push("-f", opts.format);
|
|
501
|
+
if (opts.cookieFile) args.push("--cookies", opts.cookieFile);
|
|
502
|
+
args.push("--no-warnings", "--no-progress", url);
|
|
503
|
+
try {
|
|
504
|
+
execSync(args.join(" "), { stdio: "pipe", timeout: 3e5 });
|
|
505
|
+
return { url, file: dir, success: true };
|
|
506
|
+
} catch (err) {
|
|
507
|
+
return { url, file: "", success: false, error: err.message?.slice(0, 200) };
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
async function downloadPool(items, concurrency, fn) {
|
|
511
|
+
const results = new Array(items.length);
|
|
512
|
+
let idx = 0;
|
|
513
|
+
async function worker() {
|
|
514
|
+
while (idx < items.length) {
|
|
515
|
+
const i = idx++;
|
|
516
|
+
results[i] = await fn(items[i], i);
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
const workers = Array.from(
|
|
520
|
+
{ length: Math.min(concurrency, items.length) },
|
|
521
|
+
() => worker()
|
|
522
|
+
);
|
|
523
|
+
await Promise.all(workers);
|
|
524
|
+
return results;
|
|
525
|
+
}
|
|
526
|
+
registerStep("download", async (ctx, params) => {
|
|
527
|
+
const p = params;
|
|
528
|
+
const dir = renderTemplate(p.dir || "./downloads", { args: ctx.args, data: ctx.data });
|
|
529
|
+
if (!existsSync(dir)) mkdirSync(dir, { recursive: true });
|
|
530
|
+
const concurrency = p.concurrency || 3;
|
|
531
|
+
const skipExisting = p.skip_existing ?? false;
|
|
532
|
+
const timeout = (p.timeout || 60) * 1e3;
|
|
533
|
+
const items = [];
|
|
534
|
+
if (p.url) {
|
|
535
|
+
items.push({
|
|
536
|
+
url: renderTemplate(p.url, { args: ctx.args, data: ctx.data }),
|
|
537
|
+
index: 0
|
|
538
|
+
});
|
|
539
|
+
} else if (Array.isArray(ctx.data)) {
|
|
540
|
+
for (let i = 0; i < ctx.data.length; i++) {
|
|
541
|
+
const item = ctx.data[i];
|
|
542
|
+
const url = typeof item === "string" ? item : item.url;
|
|
543
|
+
if (url) items.push({ url, item, index: i });
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
if (items.length === 0) return [];
|
|
547
|
+
const tracker = new DownloadProgressTracker(items.length);
|
|
548
|
+
if (p.video) {
|
|
549
|
+
if (!hasYtDlp()) {
|
|
550
|
+
throw new Error("yt-dlp not found. Install: brew install yt-dlp (mac) or pip install yt-dlp");
|
|
551
|
+
}
|
|
552
|
+
const cookieFile = await exportCookiesNetscape(ctx, new URL(items[0].url).hostname);
|
|
553
|
+
const results2 = await downloadPool(items, Math.min(concurrency, 2), async (entry) => {
|
|
554
|
+
const result = downloadWithYtDlp(entry.url, dir, {
|
|
555
|
+
format: p.format,
|
|
556
|
+
cookieFile,
|
|
557
|
+
filename: p.filename ? renderTemplate(p.filename, { args: ctx.args, item: entry.item, data: ctx.data, index: entry.index }) : void 0
|
|
558
|
+
});
|
|
559
|
+
if (result.success) tracker.success(0);
|
|
560
|
+
else tracker.fail();
|
|
561
|
+
return result;
|
|
562
|
+
});
|
|
563
|
+
if (cookieFile) try {
|
|
564
|
+
__require("fs").unlinkSync(cookieFile);
|
|
565
|
+
} catch {
|
|
566
|
+
}
|
|
567
|
+
if (ctx.debug) console.log(tracker.summary());
|
|
568
|
+
return results2;
|
|
569
|
+
}
|
|
570
|
+
if (p.content) {
|
|
571
|
+
const results2 = await downloadPool(items, concurrency, async (entry) => {
|
|
572
|
+
try {
|
|
573
|
+
const resp = await fetch(entry.url, {
|
|
574
|
+
signal: AbortSignal.timeout(timeout)
|
|
575
|
+
});
|
|
576
|
+
if (!resp.ok) return { url: entry.url, file: "", success: false, error: `HTTP ${resp.status}` };
|
|
577
|
+
let content;
|
|
578
|
+
const html = await resp.text();
|
|
579
|
+
if (p.content === "html") {
|
|
580
|
+
content = html;
|
|
581
|
+
} else if (p.content === "text") {
|
|
582
|
+
content = html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim();
|
|
583
|
+
} else if (p.content === "json") {
|
|
584
|
+
try {
|
|
585
|
+
content = JSON.stringify(JSON.parse(html), null, 2);
|
|
586
|
+
} catch {
|
|
587
|
+
content = html;
|
|
588
|
+
}
|
|
589
|
+
} else {
|
|
590
|
+
content = html.replace(/<h[1-6][^>]*>(.*?)<\/h[1-6]>/gi, "\n## $1\n").replace(/<p[^>]*>(.*?)<\/p>/gi, "\n$1\n").replace(/<a[^>]*href="([^"]*)"[^>]*>(.*?)<\/a>/gi, "[$2]($1)").replace(/<[^>]+>/g, "").replace(/\n{3,}/g, "\n\n").trim();
|
|
591
|
+
}
|
|
592
|
+
const filename = p.filename ? renderTemplate(p.filename, { args: ctx.args, item: entry.item, data: ctx.data, index: entry.index }) : basename(new URL(entry.url).pathname).replace(/\.[^.]+$/, "") + (p.content === "json" ? ".json" : ".md");
|
|
593
|
+
const filepath = join(dir, filename);
|
|
594
|
+
if (p.metadata) {
|
|
595
|
+
const meta = renderTemplate(p.metadata, { args: ctx.args, item: entry.item, data: ctx.data, index: entry.index });
|
|
596
|
+
const header = Object.entries(meta).map(([k, v]) => `${k}: ${v}`).join("\n");
|
|
597
|
+
content = `---
|
|
598
|
+
${header}
|
|
599
|
+
---
|
|
600
|
+
|
|
601
|
+
${content}`;
|
|
602
|
+
}
|
|
603
|
+
writeFileSync(filepath, content, "utf-8");
|
|
604
|
+
tracker.success(Buffer.byteLength(content));
|
|
605
|
+
return { url: entry.url, file: filepath, success: true, size: Buffer.byteLength(content), content: content.slice(0, 200) };
|
|
606
|
+
} catch (err) {
|
|
607
|
+
tracker.fail();
|
|
608
|
+
return { url: entry.url, file: "", success: false, error: err.message };
|
|
609
|
+
}
|
|
610
|
+
});
|
|
611
|
+
if (ctx.debug) console.log(tracker.summary());
|
|
612
|
+
return results2;
|
|
613
|
+
}
|
|
614
|
+
const results = await downloadPool(items, concurrency, async (entry) => {
|
|
615
|
+
try {
|
|
616
|
+
const filename = p.filename ? renderTemplate(p.filename, { args: ctx.args, item: entry.item, data: ctx.data, index: entry.index }) : decodeURIComponent(basename(new URL(entry.url).pathname)) || `download-${entry.index}`;
|
|
617
|
+
const filepath = join(dir, filename);
|
|
618
|
+
if (skipExisting && existsSync(filepath)) {
|
|
619
|
+
tracker.success(0);
|
|
620
|
+
return { url: entry.url, file: filepath, success: true, size: 0 };
|
|
621
|
+
}
|
|
622
|
+
const resp = await fetch(entry.url, {
|
|
623
|
+
signal: AbortSignal.timeout(timeout)
|
|
624
|
+
});
|
|
625
|
+
if (!resp.ok) {
|
|
626
|
+
tracker.fail();
|
|
627
|
+
return { url: entry.url, file: "", success: false, error: `HTTP ${resp.status}` };
|
|
628
|
+
}
|
|
629
|
+
const buffer = Buffer.from(await resp.arrayBuffer());
|
|
630
|
+
writeFileSync(filepath, buffer);
|
|
631
|
+
tracker.success(buffer.length);
|
|
632
|
+
return { url: entry.url, file: filepath, success: true, size: buffer.length };
|
|
633
|
+
} catch (err) {
|
|
634
|
+
tracker.fail();
|
|
635
|
+
return { url: entry.url, file: "", success: false, error: err.message };
|
|
636
|
+
}
|
|
637
|
+
});
|
|
638
|
+
if (ctx.debug) console.log(tracker.summary());
|
|
639
|
+
return results;
|
|
640
|
+
});
|
|
641
|
+
|
|
642
|
+
// src/pipeline/steps/tap.ts
|
|
643
|
+
registerStep("tap", async (ctx, params) => {
|
|
644
|
+
if (!ctx.page) throw new Error("Browser page required for tap step");
|
|
645
|
+
const p = renderTemplate(params, { args: ctx.args, data: ctx.data });
|
|
646
|
+
const storeName = p.store;
|
|
647
|
+
const actionName = p.action;
|
|
648
|
+
const capturePattern = p.capture;
|
|
649
|
+
const actionArgs = p.args || [];
|
|
650
|
+
const timeoutSec = p.timeout || 5;
|
|
651
|
+
const selectPath = p.select;
|
|
652
|
+
const result = await ctx.page.evaluate(`
|
|
653
|
+
(async () => {
|
|
654
|
+
let captured = null;
|
|
655
|
+
let captureResolve;
|
|
656
|
+
const capturePromise = new Promise(r => { captureResolve = r; });
|
|
657
|
+
const capturePattern = ${JSON.stringify(capturePattern)};
|
|
658
|
+
const timeoutMs = ${timeoutSec * 1e3};
|
|
659
|
+
|
|
660
|
+
// 1. Patch fetch
|
|
661
|
+
const origFetch = window.fetch;
|
|
662
|
+
window.fetch = async function(...args) {
|
|
663
|
+
const resp = await origFetch.apply(this, args);
|
|
664
|
+
try {
|
|
665
|
+
const url = typeof args[0] === 'string' ? args[0] : args[0]?.url || '';
|
|
666
|
+
if (url.includes(capturePattern) && !captured) {
|
|
667
|
+
captured = await resp.clone().json();
|
|
668
|
+
captureResolve();
|
|
669
|
+
}
|
|
670
|
+
} catch {}
|
|
671
|
+
return resp;
|
|
672
|
+
};
|
|
673
|
+
|
|
674
|
+
// 2. Patch XHR
|
|
675
|
+
const origSend = XMLHttpRequest.prototype.send;
|
|
676
|
+
const origOpen = XMLHttpRequest.prototype.open;
|
|
677
|
+
XMLHttpRequest.prototype.open = function(method, url, ...rest) {
|
|
678
|
+
this.__tapUrl = url;
|
|
679
|
+
return origOpen.call(this, method, url, ...rest);
|
|
680
|
+
};
|
|
681
|
+
XMLHttpRequest.prototype.send = function(...args) {
|
|
682
|
+
this.addEventListener('load', function() {
|
|
683
|
+
if (this.__tapUrl?.includes(capturePattern) && !captured) {
|
|
684
|
+
try {
|
|
685
|
+
captured = JSON.parse(this.responseText);
|
|
686
|
+
captureResolve();
|
|
687
|
+
} catch {}
|
|
688
|
+
}
|
|
689
|
+
});
|
|
690
|
+
return origSend.apply(this, args);
|
|
691
|
+
};
|
|
692
|
+
|
|
693
|
+
try {
|
|
694
|
+
// 3. Find the store
|
|
695
|
+
const app = document.querySelector('#app');
|
|
696
|
+
let store = null;
|
|
697
|
+
|
|
698
|
+
// Try Pinia via __vue_app__
|
|
699
|
+
if (app?.__vue_app__) {
|
|
700
|
+
const pinia = app.__vue_app__.config?.globalProperties?.$pinia;
|
|
701
|
+
if (pinia?._s) {
|
|
702
|
+
store = pinia._s.get(${JSON.stringify(storeName)});
|
|
703
|
+
}
|
|
704
|
+
// Try Vuex
|
|
705
|
+
if (!store) {
|
|
706
|
+
const vuex = app.__vue_app__.config?.globalProperties?.$store;
|
|
707
|
+
if (vuex) store = vuex;
|
|
708
|
+
}
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
// Fallback: global pinia
|
|
712
|
+
if (!store && window.__pinia?._s) {
|
|
713
|
+
store = window.__pinia._s.get(${JSON.stringify(storeName)});
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
if (!store) {
|
|
717
|
+
return { error: 'Store not found: ' + ${JSON.stringify(storeName)} };
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
// 4. Call the action
|
|
721
|
+
const actionFn = store[${JSON.stringify(actionName)}];
|
|
722
|
+
if (typeof actionFn !== 'function') {
|
|
723
|
+
return { error: 'Action not found: ' + ${JSON.stringify(actionName)} };
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
await actionFn.apply(store, ${JSON.stringify(actionArgs)});
|
|
727
|
+
|
|
728
|
+
// 5. Wait for capture
|
|
729
|
+
if (!captured) {
|
|
730
|
+
await Promise.race([
|
|
731
|
+
capturePromise,
|
|
732
|
+
new Promise(r => setTimeout(r, timeoutMs)),
|
|
733
|
+
]);
|
|
734
|
+
}
|
|
735
|
+
} finally {
|
|
736
|
+
// 6. Restore originals
|
|
737
|
+
window.fetch = origFetch;
|
|
738
|
+
XMLHttpRequest.prototype.send = origSend;
|
|
739
|
+
XMLHttpRequest.prototype.open = origOpen;
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
return captured;
|
|
743
|
+
})()
|
|
744
|
+
`);
|
|
745
|
+
if (!result) return null;
|
|
746
|
+
if (result?.error) {
|
|
747
|
+
throw new Error(result.error);
|
|
748
|
+
}
|
|
749
|
+
if (selectPath) {
|
|
750
|
+
let current = result;
|
|
751
|
+
for (const part of selectPath.split(".")) {
|
|
752
|
+
if (current === null || current === void 0) return null;
|
|
753
|
+
current = current[part];
|
|
754
|
+
}
|
|
755
|
+
return current;
|
|
756
|
+
}
|
|
757
|
+
return result;
|
|
758
|
+
});
|
|
759
|
+
|
|
760
|
+
// src/utils/logger.ts
|
|
761
|
+
import chalk from "chalk";
|
|
762
|
+
var log = {
|
|
763
|
+
info: (msg) => console.log(chalk.blue("\u2139"), msg),
|
|
764
|
+
success: (msg) => console.log(chalk.green("\u2713"), msg),
|
|
765
|
+
warn: (msg) => console.log(chalk.yellow("\u26A0"), msg),
|
|
766
|
+
error: (msg) => console.error(chalk.red("\u2717"), msg),
|
|
767
|
+
debug: (msg) => {
|
|
768
|
+
if (process.env.LOBSTER_DEBUG) console.log(chalk.gray("\u22EF"), msg);
|
|
769
|
+
},
|
|
770
|
+
step: (n, msg) => console.log(chalk.cyan(`[${n}]`), msg),
|
|
771
|
+
dim: (msg) => console.log(chalk.dim(msg))
|
|
772
|
+
};
|
|
773
|
+
|
|
774
|
+
// src/pipeline/executor.ts
|
|
775
|
+
async function executePipeline(steps, page, args, debug = false) {
|
|
776
|
+
const ctx = { page, args, data: null, debug };
|
|
777
|
+
for (let i = 0; i < steps.length; i++) {
|
|
778
|
+
const stepDef = steps[i];
|
|
779
|
+
const [stepName, params] = Object.entries(stepDef)[0];
|
|
780
|
+
const handler = getStep(stepName);
|
|
781
|
+
if (!handler) {
|
|
782
|
+
throw new Error(`Unknown pipeline step: ${stepName}`);
|
|
783
|
+
}
|
|
784
|
+
if (debug) {
|
|
785
|
+
log.step(i + 1, `${stepName}`);
|
|
786
|
+
}
|
|
787
|
+
ctx.data = await handler(ctx, params);
|
|
788
|
+
if (debug && ctx.data !== void 0) {
|
|
789
|
+
const preview = JSON.stringify(ctx.data)?.slice(0, 200);
|
|
790
|
+
log.dim(` \u2192 ${preview}...`);
|
|
791
|
+
}
|
|
792
|
+
}
|
|
793
|
+
return ctx.data;
|
|
794
|
+
}
|
|
795
|
+
export {
|
|
796
|
+
executePipeline,
|
|
797
|
+
getStep,
|
|
798
|
+
getStepNames,
|
|
799
|
+
registerStep,
|
|
800
|
+
renderTemplate
|
|
801
|
+
};
|
|
802
|
+
//# sourceMappingURL=index.js.map
|