@doppelgangerdev/doppelganger 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/.dockerignore +9 -0
  2. package/.github/workflows/docker-publish.yml +59 -0
  3. package/CODE_OF_CONDUCT.md +28 -0
  4. package/CONTRIBUTING.md +42 -0
  5. package/Dockerfile +44 -0
  6. package/LICENSE +163 -0
  7. package/README.md +133 -0
  8. package/TERMS.md +16 -0
  9. package/THIRD_PARTY_LICENSES.md +3502 -0
  10. package/agent.js +1240 -0
  11. package/headful.js +171 -0
  12. package/index.html +21 -0
  13. package/n8n-nodes-doppelganger/LICENSE +201 -0
  14. package/n8n-nodes-doppelganger/README.md +42 -0
  15. package/n8n-nodes-doppelganger/package-lock.json +6128 -0
  16. package/n8n-nodes-doppelganger/package.json +36 -0
  17. package/n8n-nodes-doppelganger/src/credentials/DoppelgangerApi.credentials.ts +35 -0
  18. package/n8n-nodes-doppelganger/src/index.ts +4 -0
  19. package/n8n-nodes-doppelganger/src/nodes/Doppelganger/Doppelganger.node.ts +147 -0
  20. package/n8n-nodes-doppelganger/src/nodes/Doppelganger/icon.png +0 -0
  21. package/n8n-nodes-doppelganger/tsconfig.json +14 -0
  22. package/package.json +45 -0
  23. package/postcss.config.js +6 -0
  24. package/public/icon.png +0 -0
  25. package/public/novnc.html +151 -0
  26. package/public/styles.css +86 -0
  27. package/scrape.js +389 -0
  28. package/server.js +875 -0
  29. package/src/App.tsx +722 -0
  30. package/src/components/AuthScreen.tsx +95 -0
  31. package/src/components/CodeEditor.tsx +70 -0
  32. package/src/components/DashboardScreen.tsx +133 -0
  33. package/src/components/EditorScreen.tsx +1519 -0
  34. package/src/components/ExecutionDetailScreen.tsx +115 -0
  35. package/src/components/ExecutionsScreen.tsx +156 -0
  36. package/src/components/LoadingScreen.tsx +26 -0
  37. package/src/components/NotFoundScreen.tsx +34 -0
  38. package/src/components/RichInput.tsx +68 -0
  39. package/src/components/SettingsScreen.tsx +228 -0
  40. package/src/components/Sidebar.tsx +61 -0
  41. package/src/components/app/CenterAlert.tsx +44 -0
  42. package/src/components/app/CenterConfirm.tsx +33 -0
  43. package/src/components/app/EditorLoader.tsx +89 -0
  44. package/src/components/editor/ActionPalette.tsx +79 -0
  45. package/src/components/editor/JsonEditorPane.tsx +71 -0
  46. package/src/components/editor/ResultsPane.tsx +641 -0
  47. package/src/components/editor/actionCatalog.ts +23 -0
  48. package/src/components/settings/AgentAiPanel.tsx +105 -0
  49. package/src/components/settings/ApiKeyPanel.tsx +68 -0
  50. package/src/components/settings/CookiesPanel.tsx +154 -0
  51. package/src/components/settings/LayoutPanel.tsx +46 -0
  52. package/src/components/settings/ScreenshotsPanel.tsx +64 -0
  53. package/src/components/settings/SettingsHeader.tsx +28 -0
  54. package/src/components/settings/StoragePanel.tsx +35 -0
  55. package/src/index.css +287 -0
  56. package/src/main.tsx +13 -0
  57. package/src/types.ts +114 -0
  58. package/src/utils/syntaxHighlight.ts +140 -0
  59. package/start-vnc.sh +52 -0
  60. package/tailwind.config.js +22 -0
  61. package/tsconfig.json +39 -0
  62. package/tsconfig.node.json +12 -0
  63. package/vite.config.mts +27 -0
package/agent.js ADDED
@@ -0,0 +1,1240 @@
1
+ const { chromium } = require('playwright');
2
+ const { JSDOM } = require('jsdom');
3
+ const fs = require('fs');
4
+ const path = require('path');
5
+
6
+ const STORAGE_STATE_PATH = path.join(__dirname, 'storage_state.json');
7
+ const STORAGE_STATE_FILE = (() => {
8
+ try {
9
+ if (fs.existsSync(STORAGE_STATE_PATH)) {
10
+ const stat = fs.statSync(STORAGE_STATE_PATH);
11
+ if (stat.isDirectory()) {
12
+ return path.join(STORAGE_STATE_PATH, 'storage_state.json');
13
+ }
14
+ }
15
+ } catch {}
16
+ return STORAGE_STATE_PATH;
17
+ })();
18
+
19
+ const userAgents = [
20
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
21
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
22
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
23
+ ];
24
+
25
+ const API_KEY_FILE = path.join(__dirname, 'data', 'api_key.json');
26
+
27
+ const loadApiKey = () => {
28
+ if (!fs.existsSync(API_KEY_FILE)) return null;
29
+ try {
30
+ const data = JSON.parse(fs.readFileSync(API_KEY_FILE, 'utf8'));
31
+ return data && data.apiKey ? data.apiKey : null;
32
+ } catch {
33
+ return null;
34
+ }
35
+ };
36
+
37
+ let progressReporter = null;
38
+ let stopChecker = null;
39
+
40
+ const setProgressReporter = (reporter) => {
41
+ progressReporter = reporter;
42
+ };
43
+
44
+ const reportProgress = (runId, payload) => {
45
+ if (!runId || typeof progressReporter !== 'function') return;
46
+ try {
47
+ progressReporter(runId, payload);
48
+ } catch {
49
+ // ignore
50
+ }
51
+ };
52
+
53
+ const setStopChecker = (checker) => {
54
+ stopChecker = checker;
55
+ };
56
+
57
+ const isStopRequested = (runId) => {
58
+ if (!runId || typeof stopChecker !== 'function') return false;
59
+ try {
60
+ return !!stopChecker(runId);
61
+ } catch {
62
+ return false;
63
+ }
64
+ };
65
+
66
+ async function moveMouseHumanlike(page, targetX, targetY) {
67
+ const steps = 8 + Math.floor(Math.random() * 6);
68
+ const startX = targetX + (Math.random() - 0.5) * 120;
69
+ const startY = targetY + (Math.random() - 0.5) * 120;
70
+ const ctrlX = (startX + targetX) / 2 + (Math.random() - 0.5) * 80;
71
+ const ctrlY = (startY + targetY) / 2 + (Math.random() - 0.5) * 80;
72
+
73
+ for (let i = 1; i <= steps; i++) {
74
+ const t = i / steps;
75
+ const inv = 1 - t;
76
+ const curveX = inv * inv * startX + 2 * inv * t * ctrlX + t * t * targetX;
77
+ const curveY = inv * inv * startY + 2 * inv * t * ctrlY + t * t * targetY;
78
+ const jitterX = (Math.random() - 0.5) * 2;
79
+ const jitterY = (Math.random() - 0.5) * 2;
80
+ await page.mouse.move(curveX + jitterX, curveY + jitterY, { steps: 1 });
81
+ }
82
+ }
83
+
84
+ async function idleMouse(page) {
85
+ const viewport = page.viewportSize() || { width: 1280, height: 720 };
86
+ const drifts = 3 + Math.floor(Math.random() * 3);
87
+ let x = Math.random() * viewport.width;
88
+ let y = Math.random() * viewport.height;
89
+ for (let i = 0; i < drifts; i++) {
90
+ const targetX = Math.random() * viewport.width;
91
+ const targetY = Math.random() * viewport.height;
92
+ const steps = 20 + Math.floor(Math.random() * 20);
93
+ for (let s = 0; s < steps; s++) {
94
+ x += (targetX - x) / (steps - s);
95
+ y += (targetY - y) / (steps - s);
96
+ await page.mouse.move(x, y, { steps: 1 });
97
+ }
98
+ if (Math.random() < 0.4) {
99
+ await page.waitForTimeout(200 + Math.random() * 600);
100
+ }
101
+ }
102
+ }
103
+
104
+ async function overshootScroll(page, targetY) {
105
+ const overshoot = (Math.random() > 0.5 ? 1 : -1) * (40 + Math.floor(Math.random() * 120));
106
+ const smoothTarget = targetY + overshoot;
107
+
108
+ await page.evaluate((y) => window.scrollTo({ top: y, behavior: 'smooth' }), smoothTarget);
109
+ await page.waitForTimeout(250 + Math.random() * 400);
110
+ await page.evaluate((y) => window.scrollTo({ top: y, behavior: 'smooth' }), targetY);
111
+ if (Math.random() < 0.35) {
112
+ await page.waitForTimeout(120 + Math.random() * 200);
113
+ await page.evaluate((y) => window.scrollBy({ top: y, behavior: 'smooth' }), (Math.random() - 0.5) * 60);
114
+ }
115
+ }
116
+
117
+ const punctuationPause = /[.,!?;:]/;
118
+
119
+ const randomBetween = (min, max) => min + Math.random() * (max - min);
120
+
121
+ async function humanType(page, selector, text, options = {}) {
122
+ const { allowTypos = false, naturalTyping = false, fatigue = false } = options;
123
+ if (selector) await page.focus(selector);
124
+ const chars = text.split('');
125
+ let burstCounter = 0;
126
+ const burstLimit = naturalTyping ? Math.floor(randomBetween(4, 12)) : 999;
127
+ const baseDelay = naturalTyping ? randomBetween(30, 140) : randomBetween(25, 80);
128
+ const typeChar = async (char, delay) => {
129
+ try {
130
+ await page.keyboard.press(char, { delay });
131
+ } catch (err) {
132
+ await page.keyboard.insertText(char);
133
+ if (delay) await page.waitForTimeout(delay);
134
+ }
135
+ };
136
+
137
+ for (const char of chars) {
138
+ if (naturalTyping && burstCounter >= burstLimit) {
139
+ await page.waitForTimeout(randomBetween(120, 420));
140
+ burstCounter = 0;
141
+ }
142
+
143
+ if (allowTypos && Math.random() < (naturalTyping ? 0.1 : 0.04)) {
144
+ const keys = 'qwertyuiopasdfghjklzxcvbnm';
145
+ const typo = keys[Math.floor(Math.random() * keys.length)];
146
+ await page.keyboard.press(typo, { delay: 40 + Math.random() * 120 });
147
+ if (Math.random() < 0.5) {
148
+ await page.waitForTimeout(120 + Math.random() * 200);
149
+ }
150
+ await page.keyboard.press('Backspace', { delay: 40 + Math.random() * 120 });
151
+ if (Math.random() < 0.3) {
152
+ await page.keyboard.press(typo, { delay: 40 + Math.random() * 120 });
153
+ await page.keyboard.press('Backspace', { delay: 40 + Math.random() * 120 });
154
+ }
155
+ }
156
+
157
+ const extra = punctuationPause.test(char) ? randomBetween(140, 320) : randomBetween(0, 90);
158
+ const fatiguePause = fatigue && Math.random() < 0.06 ? randomBetween(180, 420) : 0;
159
+ await typeChar(char, baseDelay + extra + fatiguePause);
160
+ burstCounter += 1;
161
+
162
+ if (naturalTyping && char === ' ') {
163
+ await page.waitForTimeout(randomBetween(40, 180));
164
+ }
165
+ }
166
+ }
167
+
168
+ async function handleAgent(req, res) {
169
+ const data = (req.method === 'POST') ? req.body : req.query;
170
+ let { url, actions, wait: globalWait, rotateUserAgents, humanTyping, stealth = {} } = data;
171
+ const runId = data.runId ? String(data.runId) : null;
172
+ const includeShadowDomRaw = data.includeShadowDom ?? req.query.includeShadowDom;
173
+ const includeShadowDom = includeShadowDomRaw === undefined
174
+ ? true
175
+ : !(String(includeShadowDomRaw).toLowerCase() === 'false' || includeShadowDomRaw === false);
176
+ const {
177
+ allowTypos = false,
178
+ idleMovements = false,
179
+ overscroll = false,
180
+ deadClicks = false,
181
+ fatigue = false,
182
+ naturalTyping = false
183
+ } = stealth;
184
+
185
+ if (typeof actions === 'string') {
186
+ try {
187
+ actions = JSON.parse(actions);
188
+ } catch (e) {
189
+ return res.status(400).json({ error: 'Invalid actions JSON format.' });
190
+ }
191
+ }
192
+
193
+ if (!actions || !Array.isArray(actions)) {
194
+ return res.status(400).json({
195
+ error: 'Actions array is required.',
196
+ usage: 'POST JSON with {"actions": [...], "stealth": {...}}'
197
+ });
198
+ }
199
+
200
+ const baseUrl = `${req.protocol || 'http'}://${req.get('host')}`;
201
+ const runtimeVars = { ...(data.taskVariables || data.variables || {}) };
202
+ let lastBlockOutput = null;
203
+ runtimeVars['block.output'] = lastBlockOutput;
204
+
205
+ const setBlockOutput = (value) => {
206
+ lastBlockOutput = value;
207
+ runtimeVars['block.output'] = value;
208
+ };
209
+
210
+ const resolveTemplate = (input) => {
211
+ if (typeof input !== 'string') return input;
212
+ return input.replace(/\{\$([\w.]+)\}/g, (_match, name) => {
213
+ if (name === 'now') return new Date().toISOString();
214
+ const value = runtimeVars[name];
215
+ if (value === undefined || value === null) return '';
216
+ if (typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean') {
217
+ return String(value);
218
+ }
219
+ try {
220
+ return JSON.stringify(value);
221
+ } catch {
222
+ return String(value);
223
+ }
224
+ });
225
+ };
226
+
227
+ const resolveMaybe = (value) => {
228
+ if (typeof value !== 'string') return value;
229
+ return resolveTemplate(value);
230
+ };
231
+
232
+ const parseValue = (value) => {
233
+ if (typeof value !== 'string') return value;
234
+ const trimmed = value.trim();
235
+ if (!trimmed) return '';
236
+ if (trimmed === 'true') return true;
237
+ if (trimmed === 'false') return false;
238
+ if (/^-?\d+(\.\d+)?$/.test(trimmed)) return Number(trimmed);
239
+ if ((trimmed.startsWith('{') && trimmed.endsWith('}')) || (trimmed.startsWith('[') && trimmed.endsWith(']'))) {
240
+ try {
241
+ return JSON.parse(trimmed);
242
+ } catch {
243
+ return value;
244
+ }
245
+ }
246
+ return value;
247
+ };
248
+
249
+ const parseCsv = (input) => {
250
+ const text = typeof input === 'string' ? input : String(input || '');
251
+ const rows = [];
252
+ let row = [];
253
+ let current = '';
254
+ let inQuotes = false;
255
+
256
+ for (let i = 0; i < text.length; i += 1) {
257
+ const char = text[i];
258
+ if (inQuotes) {
259
+ if (char === '"') {
260
+ if (text[i + 1] === '"') {
261
+ current += '"';
262
+ i += 1;
263
+ } else {
264
+ inQuotes = false;
265
+ }
266
+ } else {
267
+ current += char;
268
+ }
269
+ } else {
270
+ if (char === '"') {
271
+ inQuotes = true;
272
+ } else if (char === ',') {
273
+ row.push(current);
274
+ current = '';
275
+ } else if (char === '\n') {
276
+ row.push(current);
277
+ rows.push(row);
278
+ row = [];
279
+ current = '';
280
+ } else if (char === '\r') {
281
+ // ignore CR (handle CRLF)
282
+ } else {
283
+ current += char;
284
+ }
285
+ }
286
+ }
287
+ row.push(current);
288
+ if (row.length > 1 || row[0] !== '' || rows.length > 0) rows.push(row);
289
+
290
+ if (rows.length === 0) return [];
291
+ const header = rows[0].map((cell, idx) => {
292
+ const trimmed = String(cell || '').trim();
293
+ return trimmed || `column_${idx + 1}`;
294
+ });
295
+ const dataRows = rows.slice(1);
296
+ return dataRows.map((cells) => {
297
+ const obj = {};
298
+ header.forEach((key, idx) => {
299
+ obj[key] = cells[idx] ?? '';
300
+ });
301
+ return obj;
302
+ });
303
+ };
304
+
305
+ const csvEscape = (value) => {
306
+ const text = value === undefined || value === null ? '' : String(value);
307
+ if (/[",\n\r]/.test(text) || /^\s|\s$/.test(text)) {
308
+ return `"${text.replace(/"/g, '""')}"`;
309
+ }
310
+ return text;
311
+ };
312
+
313
+ const toCsvString = (raw) => {
314
+ if (raw === undefined || raw === null) return '';
315
+ if (typeof raw === 'string') {
316
+ const trimmed = raw.trim();
317
+ if (trimmed.startsWith('{') || trimmed.startsWith('[')) {
318
+ try {
319
+ return toCsvString(JSON.parse(trimmed));
320
+ } catch {
321
+ return raw;
322
+ }
323
+ }
324
+ return raw;
325
+ }
326
+ const rows = Array.isArray(raw) ? raw : [raw];
327
+ if (rows.length === 0) return '';
328
+
329
+ const allKeys = [];
330
+ rows.forEach((row) => {
331
+ if (row && typeof row === 'object' && !Array.isArray(row)) {
332
+ Object.keys(row).forEach((key) => {
333
+ if (!allKeys.includes(key)) allKeys.push(key);
334
+ });
335
+ }
336
+ });
337
+
338
+ if (allKeys.length === 0) {
339
+ const lines = rows.map((row) => {
340
+ if (Array.isArray(row)) return row.map(csvEscape).join(',');
341
+ return csvEscape(row);
342
+ });
343
+ return lines.join('\n');
344
+ }
345
+
346
+ const headerLine = allKeys.map(csvEscape).join(',');
347
+ const lines = rows.map((row) => {
348
+ const obj = row && typeof row === 'object' ? row : {};
349
+ return allKeys.map((key) => csvEscape(obj[key])).join(',');
350
+ });
351
+ return [headerLine, ...lines].join('\n');
352
+ };
353
+
354
+ const buildBlockMap = (list) => {
355
+ const blockStarts = new Set(['if', 'while', 'repeat', 'foreach', 'on_error']);
356
+ const startToEnd = {};
357
+ const startToElse = {};
358
+ const elseToEnd = {};
359
+ const endToStart = {};
360
+ const stack = [];
361
+
362
+ list.forEach((action, idx) => {
363
+ if (blockStarts.has(action.type)) {
364
+ stack.push({ type: action.type, idx });
365
+ return;
366
+ }
367
+ if (action.type === 'else') {
368
+ for (let i = stack.length - 1; i >= 0; i -= 1) {
369
+ const entry = stack[i];
370
+ if (entry.type === 'if' && startToElse[entry.idx] === undefined) {
371
+ startToElse[entry.idx] = idx;
372
+ break;
373
+ }
374
+ }
375
+ return;
376
+ }
377
+ if (action.type === 'end') {
378
+ const entry = stack.pop();
379
+ if (!entry) return;
380
+ startToEnd[entry.idx] = idx;
381
+ endToStart[idx] = entry.idx;
382
+ if (startToElse[entry.idx] !== undefined) {
383
+ elseToEnd[startToElse[entry.idx]] = idx;
384
+ }
385
+ }
386
+ });
387
+
388
+ return { startToEnd, startToElse, elseToEnd, endToStart };
389
+ };
390
+
391
+ // Pick a random UA if rotation is enabled, otherwise use the first one
392
+ const selectedUA = rotateUserAgents
393
+ ? userAgents[Math.floor(Math.random() * userAgents.length)]
394
+ : userAgents[0];
395
+
396
+ let browser;
397
+ try {
398
+ browser = await chromium.launch({
399
+ headless: true,
400
+ channel: 'chrome',
401
+ args: [
402
+ '--no-sandbox',
403
+ '--disable-setuid-sandbox',
404
+ '--disable-blink-features=AutomationControlled',
405
+ '--hide-scrollbars',
406
+ '--mute-audio'
407
+ ]
408
+ });
409
+
410
+ const contextOptions = {
411
+ userAgent: selectedUA,
412
+ viewport: { width: 1280 + Math.floor(Math.random() * 640), height: 720 + Math.floor(Math.random() * 360) },
413
+ deviceScaleFactor: 1,
414
+ locale: 'en-US',
415
+ timezoneId: 'America/New_York',
416
+ colorScheme: 'dark',
417
+ permissions: ['geolocation']
418
+ };
419
+
420
+ if (fs.existsSync(STORAGE_STATE_FILE)) {
421
+ contextOptions.storageState = STORAGE_STATE_FILE;
422
+ }
423
+
424
+ const context = await browser.newContext(contextOptions);
425
+
426
+ await context.addInitScript(() => {
427
+ Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
428
+ });
429
+ if (includeShadowDom) {
430
+ await context.addInitScript(() => {
431
+ if (!Element.prototype.attachShadow) return;
432
+ const original = Element.prototype.attachShadow;
433
+ Element.prototype.attachShadow = function (init) {
434
+ const options = init ? { ...init, mode: 'open' } : { mode: 'open' };
435
+ return original.call(this, options);
436
+ };
437
+ });
438
+ }
439
+
440
+ const page = await context.newPage();
441
+
442
+ if (url) {
443
+ await page.goto(resolveTemplate(url), { waitUntil: 'domcontentloaded', timeout: 60000 });
444
+ }
445
+
446
+ const logs = [];
447
+ let actionIdx = 0;
448
+ const baseDelay = (ms) => {
449
+ const fatigueMultiplier = fatigue ? 1 + (actionIdx * 0.1) : 1;
450
+ const microPause = fatigue && Math.random() < 0.08 ? randomBetween(120, 480) : 0;
451
+ return ((ms + Math.random() * 140) * fatigueMultiplier) + microPause;
452
+ };
453
+
454
+ const { startToEnd, startToElse, elseToEnd, endToStart } = buildBlockMap(actions);
455
+ const repeatState = new Map();
456
+ const foreachState = new Map();
457
+ let errorHandler = null;
458
+ let inErrorHandler = false;
459
+ let stopRequested = false;
460
+ let stopOutcome = 'success';
461
+
462
+ const normalizeVarRef = (raw) => {
463
+ if (!raw) return '';
464
+ const trimmed = String(raw).trim();
465
+ const match = trimmed.match(/^\{\$([\w.]+)\}$/);
466
+ return match ? match[1] : trimmed;
467
+ };
468
+
469
+ const getValueFromVarOrLiteral = (raw) => {
470
+ const name = normalizeVarRef(raw);
471
+ if (name && Object.prototype.hasOwnProperty.call(runtimeVars, name)) return runtimeVars[name];
472
+ if (typeof raw === 'string') return resolveTemplate(raw);
473
+ return raw;
474
+ };
475
+
476
+ const coerceBoolean = (value) => {
477
+ if (typeof value === 'boolean') return value;
478
+ if (typeof value === 'string') {
479
+ const parsed = parseValue(value);
480
+ if (typeof parsed === 'boolean') return parsed;
481
+ }
482
+ return Boolean(value);
483
+ };
484
+
485
+ const toNumber = (value) => {
486
+ if (typeof value === 'number') return value;
487
+ if (typeof value === 'string') {
488
+ const parsed = parseValue(value);
489
+ if (typeof parsed === 'number') return parsed;
490
+ }
491
+ const numeric = Number(value);
492
+ return Number.isFinite(numeric) ? numeric : NaN;
493
+ };
494
+
495
+ const toString = (value) => {
496
+ if (value === undefined || value === null) return '';
497
+ return String(value);
498
+ };
499
+
500
+ const evalStructuredCondition = (act) => {
501
+ const varType = act.conditionVarType || 'string';
502
+ const op = act.conditionOp || (varType === 'boolean' ? 'is_true' : 'equals');
503
+ const leftRaw = getValueFromVarOrLiteral(act.conditionVar || '');
504
+ const rightRaw = act.conditionValue ?? '';
505
+ const rightResolved = resolveTemplate(String(rightRaw));
506
+
507
+ if (varType === 'boolean') {
508
+ const leftBool = coerceBoolean(leftRaw);
509
+ return op === 'is_false' ? !leftBool : !!leftBool;
510
+ }
511
+
512
+ if (varType === 'number') {
513
+ const leftNum = toNumber(leftRaw);
514
+ const rightNum = toNumber(rightResolved);
515
+ if (!Number.isFinite(leftNum) || !Number.isFinite(rightNum)) return false;
516
+ if (op === 'not_equals') return leftNum !== rightNum;
517
+ if (op === 'gt') return leftNum > rightNum;
518
+ if (op === 'gte') return leftNum >= rightNum;
519
+ if (op === 'lt') return leftNum < rightNum;
520
+ if (op === 'lte') return leftNum <= rightNum;
521
+ return leftNum === rightNum;
522
+ }
523
+
524
+ const leftText = toString(leftRaw);
525
+ const rightText = rightResolved;
526
+ if (op === 'not_equals') return leftText !== rightText;
527
+ if (op === 'contains') return leftText.includes(rightText);
528
+ if (op === 'starts_with') return leftText.startsWith(rightText);
529
+ if (op === 'ends_with') return leftText.endsWith(rightText);
530
+ if (op === 'matches') {
531
+ try {
532
+ const regex = new RegExp(rightText);
533
+ return regex.test(leftText);
534
+ } catch {
535
+ return false;
536
+ }
537
+ }
538
+ return leftText === rightText;
539
+ };
540
+
541
+ const evalCondition = async (expr) => {
542
+ const resolved = resolveTemplate(expr || '');
543
+ if (!resolved.trim()) return false;
544
+ return page.evaluate(({ expression, vars, blockOutput }) => {
545
+ const exists = (selector) => {
546
+ if (!selector) return false;
547
+ return !!document.querySelector(selector);
548
+ };
549
+ const text = (selector) => {
550
+ if (!selector) return '';
551
+ const el = document.querySelector(selector);
552
+ return el ? (el.textContent || '').trim() : '';
553
+ };
554
+ const url = () => window.location.href;
555
+ const block = { output: blockOutput };
556
+ // eslint-disable-next-line no-new-func
557
+ const fn = new Function('vars', 'block', 'exists', 'text', 'url', `return !!(${expression});`);
558
+ return fn(vars || {}, block, exists, text, url);
559
+ }, { expression: resolved, vars: runtimeVars, blockOutput: lastBlockOutput });
560
+ };
561
+
562
+ const setLoopVars = (item, index, count) => {
563
+ runtimeVars['loop.index'] = index;
564
+ runtimeVars['loop.count'] = count;
565
+ runtimeVars['loop.item'] = item;
566
+ if (item && typeof item === 'object') {
567
+ if ('text' in item) runtimeVars['loop.text'] = item.text;
568
+ if ('html' in item) runtimeVars['loop.html'] = item.html;
569
+ } else {
570
+ runtimeVars['loop.text'] = item;
571
+ runtimeVars['loop.html'] = '';
572
+ }
573
+ };
574
+
575
+ const getForeachItems = async (act) => {
576
+ const selector = resolveMaybe(act.selector);
577
+ const varName = resolveMaybe(act.varName);
578
+ if (selector) {
579
+ return page.$$eval(String(selector), (elements) => elements.map((el) => ({
580
+ text: (el.textContent || '').trim(),
581
+ html: el.innerHTML || ''
582
+ })));
583
+ }
584
+ if (varName && runtimeVars[String(varName)]) {
585
+ const source = runtimeVars[String(varName)];
586
+ if (Array.isArray(source)) return source;
587
+ if (typeof source === 'string') {
588
+ try {
589
+ const parsed = JSON.parse(source);
590
+ return Array.isArray(parsed) ? parsed : [];
591
+ } catch {
592
+ return [];
593
+ }
594
+ }
595
+ }
596
+ return [];
597
+ };
598
+
599
+ const getMergeSources = (raw) => {
600
+ const resolved = resolveMaybe(raw);
601
+ if (Array.isArray(resolved)) return resolved;
602
+ if (resolved && typeof resolved === 'object') return [resolved];
603
+ if (typeof resolved !== 'string') {
604
+ return resolved === undefined || resolved === null ? [] : [resolved];
605
+ }
606
+ const tokens = resolved
607
+ .split(',')
608
+ .map((token) => token.trim())
609
+ .filter(Boolean);
610
+ if (tokens.length === 0) return [];
611
+ const sources = [];
612
+ tokens.forEach((token) => {
613
+ const name = normalizeVarRef(token);
614
+ if (Object.prototype.hasOwnProperty.call(runtimeVars, name)) {
615
+ sources.push(runtimeVars[name]);
616
+ return;
617
+ }
618
+ sources.push(parseValue(token));
619
+ });
620
+ return sources;
621
+ };
622
+
623
+ const mergeSources = (sources) => {
624
+ const list = Array.isArray(sources) ? sources : [];
625
+ if (list.length === 0) return [];
626
+ const arraysOnly = list.every(Array.isArray);
627
+ if (arraysOnly) return list.flat();
628
+ const objectsOnly = list.every((item) => item && typeof item === 'object' && !Array.isArray(item));
629
+ if (objectsOnly) return Object.assign({}, ...list);
630
+ const merged = [];
631
+ list.forEach((item) => {
632
+ if (Array.isArray(item)) {
633
+ merged.push(...item);
634
+ } else if (item !== undefined) {
635
+ merged.push(item);
636
+ }
637
+ });
638
+ return merged;
639
+ };
640
+
641
+ const executeAction = async (act) => {
642
+ const { type, timeout } = act;
643
+ const actionTimeout = timeout || 10000;
644
+ let result = null;
645
+
646
+ switch (type) {
647
+ case 'navigate':
648
+ case 'goto':
649
+ logs.push(`Navigating to: ${resolveMaybe(act.value)}`);
650
+ await page.goto(resolveMaybe(act.value), { waitUntil: 'domcontentloaded' });
651
+ result = page.url();
652
+ break;
653
+ case 'click':
654
+ logs.push(`Clicking: ${resolveMaybe(act.selector)}`);
655
+ await page.waitForSelector(resolveMaybe(act.selector), { timeout: actionTimeout });
656
+
657
+ // Neutral Dead Click
658
+ if (deadClicks && Math.random() < 0.4) {
659
+ logs.push('Performing neutral dead-click...');
660
+ const viewport = page.viewportSize() || { width: 1280, height: 720 };
661
+ await page.mouse.click(
662
+ 10 + Math.random() * (viewport.width * 0.2),
663
+ 10 + Math.random() * (viewport.height * 0.2)
664
+ );
665
+ await page.waitForTimeout(baseDelay(200));
666
+ }
667
+
668
+ // Get element point for human-like movement
669
+ const handle = await page.$(resolveMaybe(act.selector));
670
+ const box = await handle.boundingBox();
671
+ if (box) {
672
+ const centerX = box.x + box.width / 2 + (Math.random() - 0.5) * 5;
673
+ const centerY = box.y + box.height / 2 + (Math.random() - 0.5) * 5;
674
+ await moveMouseHumanlike(page, centerX, centerY);
675
+ if (deadClicks && Math.random() < 0.25) {
676
+ const offsetX = (Math.random() - 0.5) * Math.min(20, box.width / 3);
677
+ const offsetY = (Math.random() - 0.5) * Math.min(20, box.height / 3);
678
+ await page.mouse.click(centerX + offsetX, centerY + offsetY, { delay: baseDelay(30) });
679
+ await page.waitForTimeout(baseDelay(120));
680
+ }
681
+ }
682
+
683
+ await page.waitForTimeout(baseDelay(50));
684
+ await page.click(resolveMaybe(act.selector), {
685
+ delay: baseDelay(50)
686
+ });
687
+ result = true;
688
+ break;
689
+ case 'type':
690
+ case 'fill':
691
+ if (act.selector) {
692
+ logs.push(`Typing into ${resolveMaybe(act.selector)}: ${resolveMaybe(act.value)}`);
693
+ await page.waitForSelector(resolveMaybe(act.selector), { timeout: actionTimeout });
694
+ if (humanTyping) {
695
+ await humanType(page, resolveMaybe(act.selector), resolveMaybe(act.value), { allowTypos, naturalTyping, fatigue });
696
+ } else {
697
+ await page.fill(resolveMaybe(act.selector), resolveMaybe(act.value));
698
+ }
699
+ } else {
700
+ logs.push(`Typing (global): ${resolveMaybe(act.value)}`);
701
+ if (humanTyping) {
702
+ await humanType(page, null, resolveMaybe(act.value), { allowTypos, naturalTyping, fatigue });
703
+ } else {
704
+ await page.keyboard.type(resolveMaybe(act.value), { delay: baseDelay(50) });
705
+ }
706
+ }
707
+ result = resolveMaybe(act.value);
708
+ break;
709
+ case 'hover':
710
+ logs.push(`Hovering: ${resolveMaybe(act.selector)}`);
711
+ await page.waitForSelector(resolveMaybe(act.selector), { timeout: actionTimeout });
712
+ {
713
+ const handle = await page.$(resolveMaybe(act.selector));
714
+ const box = handle && await handle.boundingBox();
715
+ if (box) {
716
+ const centerX = box.x + box.width / 2 + (Math.random() - 0.5) * 5;
717
+ const centerY = box.y + box.height / 2 + (Math.random() - 0.5) * 5;
718
+ await moveMouseHumanlike(page, centerX, centerY);
719
+ }
720
+ }
721
+ await page.waitForTimeout(baseDelay(150));
722
+ result = true;
723
+ break;
724
+ case 'press':
725
+ logs.push(`Pressing key: ${resolveMaybe(act.key)}`);
726
+ await page.keyboard.press(resolveMaybe(act.key), { delay: baseDelay(50) });
727
+ result = resolveMaybe(act.key);
728
+ break;
729
+ case 'wait':
730
+ const ms = act.value ? parseFloat(resolveMaybe(act.value)) * 1000 : 2000;
731
+ logs.push(`Waiting: ${ms}ms`);
732
+
733
+ if (idleMovements) {
734
+ logs.push('Simulating cursor restlessness...');
735
+ await Promise.race([
736
+ idleMouse(page),
737
+ page.waitForTimeout(ms)
738
+ ]);
739
+ } else {
740
+ await page.waitForTimeout(ms);
741
+ }
742
+ result = ms;
743
+ break;
744
+ case 'select':
745
+ logs.push(`Selecting ${resolveMaybe(act.value)} from ${resolveMaybe(act.selector)}`);
746
+ await page.waitForSelector(resolveMaybe(act.selector), { timeout: actionTimeout });
747
+ await page.selectOption(resolveMaybe(act.selector), resolveMaybe(act.value));
748
+ result = resolveMaybe(act.value);
749
+ break;
750
+ case 'scroll':
751
+ const amount = act.value ? parseInt(resolveMaybe(act.value), 10) : (400 + Math.random() * 400);
752
+ logs.push(`Scrolling page: ${amount}px...`);
753
+ if (overscroll) {
754
+ await overshootScroll(page, amount);
755
+ } else if (act.selector) {
756
+ await page.evaluate(({ selector, y }) => {
757
+ const el = document.querySelector(selector);
758
+ if (el) el.scrollBy({ top: y, behavior: 'smooth' });
759
+ }, { selector: resolveMaybe(act.selector), y: amount });
760
+ } else {
761
+ await page.evaluate((y) => window.scrollBy({ top: y, behavior: 'smooth' }), amount);
762
+ }
763
+ await page.waitForTimeout(baseDelay(500));
764
+ result = amount;
765
+ break;
766
+ case 'javascript':
767
+ logs.push('Running custom JavaScript...');
768
+ if (act.value) {
769
+ result = await page.evaluate((code) => {
770
+ // eslint-disable-next-line no-eval
771
+ return eval(code);
772
+ }, resolveMaybe(act.value));
773
+ }
774
+ break;
775
+ case 'csv': {
776
+ const source = act.value ? resolveTemplate(act.value) : lastBlockOutput;
777
+ if (typeof source === 'string') {
778
+ result = parseCsv(source);
779
+ } else if (Array.isArray(source) || (source && typeof source === 'object')) {
780
+ result = source;
781
+ } else {
782
+ result = [];
783
+ }
784
+ logs.push(`Parsed ${Array.isArray(result) ? result.length : 0} CSV rows.`);
785
+ break;
786
+ }
787
+ case 'merge': {
788
+ const sources = getMergeSources(act.value || '');
789
+ const merged = mergeSources(sources);
790
+ if (act.varName) {
791
+ const targetName = normalizeVarRef(act.varName);
792
+ runtimeVars[String(targetName)] = merged;
793
+ }
794
+ if (Array.isArray(merged)) {
795
+ logs.push(`Merged ${merged.length} item(s).`);
796
+ } else if (merged && typeof merged === 'object') {
797
+ logs.push(`Merged ${Object.keys(merged).length} field(s).`);
798
+ } else {
799
+ logs.push('Merged values.');
800
+ }
801
+ result = merged;
802
+ break;
803
+ }
804
+ case 'set':
805
+ if (act.varName) {
806
+ const resolved = resolveTemplate(act.value || '');
807
+ const parsed = parseValue(resolved);
808
+ runtimeVars[String(act.varName)] = parsed;
809
+ logs.push(`Set variable ${act.varName}`);
810
+ result = parsed;
811
+ }
812
+ break;
813
+ case 'stop':
814
+ stopRequested = true;
815
+ stopOutcome = act.value === 'error' ? 'error' : 'success';
816
+ logs.push(`Stop task (${stopOutcome}).`);
817
+ result = stopOutcome;
818
+ break;
819
+ case 'start': {
820
+ const taskId = resolveMaybe(act.value);
821
+ if (!taskId) throw new Error('Missing task id.');
822
+ const apiKey = loadApiKey() || data.apiKey || data.key;
823
+ if (!apiKey) throw new Error('API key is required to start a task.');
824
+ logs.push(`Starting task: ${taskId}`);
825
+ const response = await fetch(`${baseUrl}/tasks/${taskId}/api`, {
826
+ method: 'POST',
827
+ headers: {
828
+ 'Content-Type': 'application/json',
829
+ 'x-api-key': apiKey
830
+ },
831
+ body: JSON.stringify({
832
+ variables: runtimeVars,
833
+ taskVariables: runtimeVars,
834
+ runSource: 'agent_block',
835
+ taskId
836
+ })
837
+ });
838
+ const payload = await response.json();
839
+ if (!response.ok) {
840
+ const detail = payload?.error || payload?.message || response.statusText;
841
+ throw new Error(`Start task failed: ${detail}`);
842
+ }
843
+ result = payload?.data ?? payload?.html ?? payload;
844
+ break;
845
+ }
846
+ }
847
+ return result;
848
+ };
849
+
850
+ let index = 0;
851
+ const maxSteps = Math.max(actions.length * 20, 1000);
852
+ let steps = 0;
853
+
854
+ while (index < actions.length) {
855
+ if (isStopRequested(runId)) {
856
+ logs.push('Execution stopped by user.');
857
+ break;
858
+ }
859
+ if (steps++ > maxSteps) {
860
+ logs.push('Execution aborted: possible infinite loop.');
861
+ break;
862
+ }
863
+
864
+ const act = actions[index];
865
+ actionIdx += 1;
866
+
867
+ if (act.disabled) {
868
+ logs.push(`SKIPPED disabled action: ${act.type}`);
869
+ reportProgress(runId, { actionId: act.id, status: 'skipped' });
870
+ index += 1;
871
+ continue;
872
+ }
873
+
874
+ if (act.type === 'on_error') {
875
+ const endIndex = startToEnd[index];
876
+ if (endIndex !== undefined) {
877
+ reportProgress(runId, { actionId: act.id, status: 'running' });
878
+ errorHandler = { start: index + 1, end: endIndex };
879
+ logs.push('On-error handler registered.');
880
+ reportProgress(runId, { actionId: act.id, status: 'success' });
881
+ index = endIndex + 1;
882
+ continue;
883
+ }
884
+ }
885
+
886
+ if (act.type === 'if') {
887
+ try {
888
+ reportProgress(runId, { actionId: act.id, status: 'running' });
889
+ const hasStructured = act.conditionVarType || act.conditionOp || act.conditionVar || act.conditionValue;
890
+ const condition = hasStructured ? evalStructuredCondition(act) : await evalCondition(act.value);
891
+ setBlockOutput(condition);
892
+ logs.push(`If condition: ${condition ? 'true' : 'false'}`);
893
+ reportProgress(runId, { actionId: act.id, status: 'success' });
894
+ if (!condition) {
895
+ const elseIndex = startToElse[index];
896
+ if (elseIndex !== undefined) {
897
+ index = elseIndex + 1;
898
+ } else {
899
+ index = (startToEnd[index] ?? index) + 1;
900
+ }
901
+ continue;
902
+ }
903
+ } catch (err) {
904
+ logs.push(`FAILED condition: ${err.message}`);
905
+ reportProgress(runId, { actionId: act.id, status: 'error' });
906
+ if (errorHandler && !inErrorHandler) {
907
+ inErrorHandler = true;
908
+ index = errorHandler.start;
909
+ continue;
910
+ }
911
+ }
912
+ index += 1;
913
+ continue;
914
+ }
915
+
916
+ if (act.type === 'else') {
917
+ reportProgress(runId, { actionId: act.id, status: 'success' });
918
+ index = (elseToEnd[index] ?? index) + 1;
919
+ continue;
920
+ }
921
+
922
+ if (act.type === 'while') {
923
+ try {
924
+ reportProgress(runId, { actionId: act.id, status: 'running' });
925
+ const condition = await evalCondition(act.value);
926
+ setBlockOutput(condition);
927
+ logs.push(`While condition: ${condition ? 'true' : 'false'}`);
928
+ reportProgress(runId, { actionId: act.id, status: 'success' });
929
+ if (!condition) {
930
+ index = (startToEnd[index] ?? index) + 1;
931
+ continue;
932
+ }
933
+ } catch (err) {
934
+ logs.push(`FAILED condition: ${err.message}`);
935
+ reportProgress(runId, { actionId: act.id, status: 'error' });
936
+ if (errorHandler && !inErrorHandler) {
937
+ inErrorHandler = true;
938
+ index = errorHandler.start;
939
+ continue;
940
+ }
941
+ }
942
+ index += 1;
943
+ continue;
944
+ }
945
+
946
+ if (act.type === 'repeat') {
947
+ reportProgress(runId, { actionId: act.id, status: 'running' });
948
+ const rawCount = parseInt(resolveMaybe(act.value) || '0', 10);
949
+ const count = Number.isFinite(rawCount) ? rawCount : 0;
950
+ const state = repeatState.get(index) || { remaining: count };
951
+ repeatState.set(index, state);
952
+ if (state.remaining <= 0) {
953
+ repeatState.delete(index);
954
+ reportProgress(runId, { actionId: act.id, status: 'success' });
955
+ index = (startToEnd[index] ?? index) + 1;
956
+ continue;
957
+ }
958
+ state.remaining -= 1;
959
+ logs.push(`Repeat block: ${state.remaining + 1} remaining`);
960
+ setBlockOutput(state.remaining + 1);
961
+ reportProgress(runId, { actionId: act.id, status: 'success' });
962
+ index += 1;
963
+ continue;
964
+ }
965
+
966
+ if (act.type === 'foreach') {
967
+ reportProgress(runId, { actionId: act.id, status: 'running' });
968
+ let state = foreachState.get(index);
969
+ if (!state) {
970
+ const items = await getForeachItems(act);
971
+ state = { items, index: 0 };
972
+ foreachState.set(index, state);
973
+ }
974
+ if (!state.items || state.items.length === 0) {
975
+ foreachState.delete(index);
976
+ reportProgress(runId, { actionId: act.id, status: 'success' });
977
+ index = (startToEnd[index] ?? index) + 1;
978
+ continue;
979
+ }
980
+ const item = state.items[state.index];
981
+ setLoopVars(item, state.index, state.items.length);
982
+ setBlockOutput(item);
983
+ logs.push(`For-each item ${state.index + 1}/${state.items.length}`);
984
+ reportProgress(runId, { actionId: act.id, status: 'success' });
985
+ index += 1;
986
+ continue;
987
+ }
988
+
989
+ if (act.type === 'end') {
990
+ reportProgress(runId, { actionId: act.id, status: 'success' });
991
+ const startIndex = endToStart[index];
992
+ if (startIndex !== undefined) {
993
+ const startAction = actions[startIndex];
994
+ if (startAction.type === 'while') {
995
+ index = startIndex;
996
+ continue;
997
+ }
998
+ if (startAction.type === 'repeat') {
999
+ const state = repeatState.get(startIndex);
1000
+ if (state && state.remaining > 0) {
1001
+ index = startIndex + 1;
1002
+ continue;
1003
+ }
1004
+ repeatState.delete(startIndex);
1005
+ }
1006
+ if (startAction.type === 'foreach') {
1007
+ const state = foreachState.get(startIndex);
1008
+ if (state) {
1009
+ state.index += 1;
1010
+ if (state.index < state.items.length) {
1011
+ const item = state.items[state.index];
1012
+ setLoopVars(item, state.index, state.items.length);
1013
+ setBlockOutput(item);
1014
+ index = startIndex + 1;
1015
+ continue;
1016
+ }
1017
+ foreachState.delete(startIndex);
1018
+ }
1019
+ }
1020
+ }
1021
+ index += 1;
1022
+ if (inErrorHandler && errorHandler && index > errorHandler.end) {
1023
+ break;
1024
+ }
1025
+ continue;
1026
+ }
1027
+
1028
+ if (stopRequested) break;
1029
+
1030
+ try {
1031
+ reportProgress(runId, { actionId: act.id, status: 'running' });
1032
+ const result = await executeAction(act);
1033
+ if (act.type === 'stop') {
1034
+ setBlockOutput(result);
1035
+ reportProgress(runId, { actionId: act.id, status: stopOutcome === 'error' ? 'error' : 'success' });
1036
+ break;
1037
+ }
1038
+ if (result !== undefined) setBlockOutput(result);
1039
+ reportProgress(runId, { actionId: act.id, status: 'success' });
1040
+ } catch (err) {
1041
+ logs.push(`FAILED action ${act.type}: ${err.message}`);
1042
+ reportProgress(runId, { actionId: act.id, status: 'error' });
1043
+ if (errorHandler && !inErrorHandler) {
1044
+ inErrorHandler = true;
1045
+ index = errorHandler.start;
1046
+ continue;
1047
+ }
1048
+ }
1049
+
1050
+ if (stopRequested) break;
1051
+
1052
+ index += 1;
1053
+ if (inErrorHandler && errorHandler && index > errorHandler.end) break;
1054
+ }
1055
+
1056
+ if (globalWait) await page.waitForTimeout(parseFloat(globalWait) * 1000);
1057
+ await page.waitForTimeout(baseDelay(500));
1058
+
1059
+ const cleanedHtml = await page.evaluate((withShadow) => {
1060
+ const stripUseless = (root) => {
1061
+ const useless = root.querySelectorAll('script, style, svg, link, noscript');
1062
+ useless.forEach(node => node.remove());
1063
+ };
1064
+
1065
+ const cloneWithShadow = (root) => {
1066
+ const clone = root.cloneNode(true);
1067
+ const walkerOrig = document.createTreeWalker(root, NodeFilter.SHOW_ELEMENT);
1068
+ const walkerClone = document.createTreeWalker(clone, NodeFilter.SHOW_ELEMENT);
1069
+
1070
+ while (walkerOrig.nextNode() && walkerClone.nextNode()) {
1071
+ const orig = walkerOrig.currentNode;
1072
+ const cloned = walkerClone.currentNode;
1073
+ if (orig.shadowRoot) {
1074
+ const template = document.createElement('template');
1075
+ template.setAttribute('data-shadowroot', 'open');
1076
+ template.innerHTML = orig.shadowRoot.innerHTML;
1077
+ cloned.appendChild(template);
1078
+ }
1079
+ }
1080
+
1081
+ stripUseless(clone);
1082
+ return clone;
1083
+ };
1084
+
1085
+ const clone = withShadow ? cloneWithShadow(document.documentElement) : document.documentElement.cloneNode(true);
1086
+ if (!withShadow) stripUseless(clone);
1087
+ return clone.outerHTML;
1088
+ }, includeShadowDom);
1089
+
1090
+ const runExtractionScript = async (script, html, pageUrl) => {
1091
+ if (!script || typeof script !== 'string') return { result: undefined, logs: [] };
1092
+ try {
1093
+ const dom = new JSDOM(html || '');
1094
+ const { window } = dom;
1095
+ const logBuffer = [];
1096
+ const consoleProxy = {
1097
+ log: (...args) => logBuffer.push(args.join(' ')),
1098
+ warn: (...args) => logBuffer.push(args.join(' ')),
1099
+ error: (...args) => logBuffer.push(args.join(' '))
1100
+ };
1101
+ const shadowHelpers = (() => {
1102
+ const shadowQueryAll = (selector, root = window.document) => {
1103
+ const results = [];
1104
+ const walk = (node) => {
1105
+ if (!node) return;
1106
+ if (node.nodeType === 1) {
1107
+ const el = node;
1108
+ if (selector && el.matches && el.matches(selector)) results.push(el);
1109
+ if (el.tagName === 'TEMPLATE' && el.hasAttribute('data-shadowroot')) {
1110
+ walk(el.content);
1111
+ }
1112
+ } else if (node.nodeType === 11) {
1113
+ // DocumentFragment
1114
+ }
1115
+ if (node.childNodes) {
1116
+ node.childNodes.forEach((child) => walk(child));
1117
+ }
1118
+ };
1119
+ walk(root);
1120
+ return results;
1121
+ };
1122
+
1123
+ const shadowText = (root = window.document) => {
1124
+ const texts = [];
1125
+ const walk = (node) => {
1126
+ if (!node) return;
1127
+ if (node.nodeType === 3) {
1128
+ const text = node.nodeValue ? node.nodeValue.trim() : '';
1129
+ if (text) texts.push(text);
1130
+ return;
1131
+ }
1132
+ if (node.nodeType === 1) {
1133
+ const el = node;
1134
+ if (el.tagName === 'TEMPLATE' && el.hasAttribute('data-shadowroot')) {
1135
+ walk(el.content);
1136
+ }
1137
+ }
1138
+ if (node.childNodes) {
1139
+ node.childNodes.forEach((child) => walk(child));
1140
+ }
1141
+ };
1142
+ walk(root);
1143
+ return texts;
1144
+ };
1145
+
1146
+ return { shadowQueryAll, shadowText };
1147
+ })();
1148
+
1149
+ const executor = new Function(
1150
+ '$$data',
1151
+ 'window',
1152
+ 'document',
1153
+ 'DOMParser',
1154
+ 'console',
1155
+ `"use strict"; return (async () => { ${script}\n})();`
1156
+ );
1157
+ const $$data = {
1158
+ html: () => html || '',
1159
+ url: () => pageUrl || '',
1160
+ window,
1161
+ document: window.document,
1162
+ shadowQueryAll: includeShadowDom ? shadowHelpers.shadowQueryAll : undefined,
1163
+ shadowText: includeShadowDom ? shadowHelpers.shadowText : undefined
1164
+ };
1165
+ const result = await executor($$data, window, window.document, window.DOMParser, consoleProxy);
1166
+ return { result, logs: logBuffer };
1167
+ } catch (e) {
1168
+ return { result: `Extraction script error: ${e.message}`, logs: [] };
1169
+ }
1170
+ };
1171
+
1172
+ const extractionScriptRaw = typeof data.extractionScript === 'string'
1173
+ ? data.extractionScript
1174
+ : (data.taskSnapshot && typeof data.taskSnapshot.extractionScript === 'string' ? data.taskSnapshot.extractionScript : undefined);
1175
+ const extractionScript = extractionScriptRaw ? resolveTemplate(extractionScriptRaw) : undefined;
1176
+ const extraction = await runExtractionScript(extractionScript, cleanedHtml, page.url());
1177
+
1178
+ // Simple HTML Formatter (fallback to raw if formatting collapses content)
1179
+ const formatHTML = (html) => {
1180
+ let indent = 0;
1181
+ return html.replace(/<(\/?)([a-z0-9]+)([^>]*?)(\/?)>/gi, (match, slash, tag, attrs, selfClose) => {
1182
+ if (slash) indent--;
1183
+ const result = ' '.repeat(Math.max(0, indent)) + match;
1184
+ if (!slash && !selfClose && !['img', 'br', 'hr', 'input', 'link', 'meta'].includes(tag.toLowerCase())) indent++;
1185
+ return '\n' + result;
1186
+ }).trim();
1187
+ };
1188
+
1189
+ const safeFormatHTML = (html) => {
1190
+ if (typeof html !== 'string') return '';
1191
+ try {
1192
+ const formatted = formatHTML(html);
1193
+ if (!formatted) return html;
1194
+ if (formatted.length < Math.max(200, Math.floor(html.length * 0.5))) return html;
1195
+ return formatted;
1196
+ } catch {
1197
+ return html;
1198
+ }
1199
+ };
1200
+
1201
+ // Ensure the public/screenshots directory exists
1202
+ const screenshotsDir = path.join(__dirname, 'public', 'screenshots');
1203
+ if (!fs.existsSync(screenshotsDir)) {
1204
+ fs.mkdirSync(screenshotsDir, { recursive: true });
1205
+ }
1206
+
1207
+ const screenshotName = `agent_${Date.now()}.png`;
1208
+ const screenshotPath = path.join(screenshotsDir, screenshotName);
1209
+ try {
1210
+ await page.screenshot({ path: screenshotPath, fullPage: false });
1211
+ } catch (e) {
1212
+ console.error('Agent Screenshot failed:', e.message);
1213
+ }
1214
+
1215
+ const extractionFormat = String(data.extractionFormat || (data.taskSnapshot && data.taskSnapshot.extractionFormat) || '').toLowerCase() === 'csv'
1216
+ ? 'csv'
1217
+ : 'json';
1218
+ const rawExtraction = extraction.result !== undefined ? extraction.result : (extraction.logs.length ? extraction.logs.join('\n') : undefined);
1219
+ const formattedExtraction = extractionFormat === 'csv' ? toCsvString(rawExtraction) : rawExtraction;
1220
+
1221
+ // Defensive return for the frontend: always return fields, even if empty on error
1222
+ const outputData = {
1223
+ final_url: page.url() || url || '',
1224
+ logs: logs || [],
1225
+ html: typeof cleanedHtml === 'string' ? safeFormatHTML(cleanedHtml) : '',
1226
+ data: formattedExtraction,
1227
+ screenshot_url: fs.existsSync(screenshotPath) ? `/screenshots/${screenshotName}` : null
1228
+ };
1229
+
1230
+ try { await context.storageState({ path: STORAGE_STATE_FILE }); } catch {}
1231
+ try { await browser.close(); } catch {}
1232
+ res.json(outputData);
1233
+ } catch (error) {
1234
+ console.error('Agent Error:', error);
1235
+ if (browser) await browser.close();
1236
+ res.status(500).json({ error: 'Agent failed', details: error.message });
1237
+ }
1238
+ }
1239
+
1240
+ module.exports = { handleAgent, setProgressReporter, setStopChecker };