@doppelgangerdev/doppelganger 0.5.6 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/agent.js CHANGED
@@ -2,8 +2,10 @@ const { chromium } = require('playwright');
2
2
  const { JSDOM } = require('jsdom');
3
3
  const fs = require('fs');
4
4
  const path = require('path');
5
+ const vm = require('vm');
5
6
  const { getProxySelection } = require('./proxy-rotation');
6
7
  const { selectUserAgent } = require('./user-agent-settings');
8
+ const { formatHTML, safeFormatHTML } = require('./html-utils');
7
9
 
8
10
  const STORAGE_STATE_PATH = path.join(__dirname, 'storage_state.json');
9
11
  const STORAGE_STATE_FILE = (() => {
@@ -20,10 +22,10 @@ const STORAGE_STATE_FILE = (() => {
20
22
 
21
23
  const API_KEY_FILE = path.join(__dirname, 'data', 'api_key.json');
22
24
 
23
- const loadApiKey = () => {
24
- if (!fs.existsSync(API_KEY_FILE)) return null;
25
+ const loadApiKey = async () => {
25
26
  try {
26
- const data = JSON.parse(fs.readFileSync(API_KEY_FILE, 'utf8'));
27
+ const raw = await fs.promises.readFile(API_KEY_FILE, 'utf8');
28
+ const data = JSON.parse(raw);
27
29
  return data && data.apiKey ? data.apiKey : null;
28
30
  } catch {
29
31
  return null;
@@ -113,6 +115,12 @@ async function overshootScroll(page, targetY) {
113
115
  const punctuationPause = /[.,!?;:]/;
114
116
 
115
117
  const randomBetween = (min, max) => min + Math.random() * (max - min);
118
+ const parseBooleanFlag = (value) => {
119
+ if (typeof value === 'boolean') return value;
120
+ if (value === undefined || value === null) return false;
121
+ const normalized = String(value).toLowerCase();
122
+ return normalized === 'true' || normalized === '1';
123
+ };
116
124
 
117
125
  async function humanType(page, selector, text, options = {}) {
118
126
  const { allowTypos = false, naturalTyping = false, fatigue = false } = options;
@@ -161,6 +169,93 @@ async function humanType(page, selector, text, options = {}) {
161
169
  }
162
170
  }
163
171
 
172
+ const REAL_TARGET = Symbol('REAL_TARGET');
173
+
174
+ function createSafeProxy(target) {
175
+ if (target === null || (typeof target !== 'object' && typeof target !== 'function')) {
176
+ return target;
177
+ }
178
+
179
+ let shadowTarget = target;
180
+ if (typeof target === 'function') {
181
+ shadowTarget = function (...args) { };
182
+ try { Object.defineProperty(shadowTarget, 'name', { value: target.name, configurable: true }); } catch {}
183
+ try { Object.defineProperty(shadowTarget, 'length', { value: target.length, configurable: true }); } catch {}
184
+ shadowTarget[REAL_TARGET] = target;
185
+ }
186
+
187
+ return new Proxy(shadowTarget, {
188
+ get(target, prop, receiver) {
189
+ const realTarget = target[REAL_TARGET] || target;
190
+ if (prop === 'constructor' || prop === '__proto__') {
191
+ return undefined;
192
+ }
193
+ if (prop === REAL_TARGET) return realTarget;
194
+
195
+ const value = Reflect.get(realTarget, prop, realTarget);
196
+
197
+ if (typeof value === 'function') {
198
+ return function (...args) {
199
+ const realArgs = args.map(arg => {
200
+ return (arg && arg[REAL_TARGET]) ? arg[REAL_TARGET] : arg;
201
+ });
202
+ const wrappedArgs = realArgs.map(arg => {
203
+ if (typeof arg === 'function') {
204
+ return function (...cbArgs) {
205
+ const wrappedCbArgs = cbArgs.map(a => createSafeProxy(a));
206
+ return arg.apply(this, wrappedCbArgs);
207
+ }
208
+ }
209
+ return arg;
210
+ });
211
+ try {
212
+ const result = value.apply(realTarget, wrappedArgs);
213
+ return createSafeProxy(result);
214
+ } catch (e) {
215
+ throw e;
216
+ }
217
+ };
218
+ }
219
+ return createSafeProxy(value);
220
+ },
221
+ apply(target, thisArg, argList) {
222
+ const realTarget = target[REAL_TARGET] || target;
223
+ const realThis = (thisArg && thisArg[REAL_TARGET]) ? thisArg[REAL_TARGET] : thisArg;
224
+ const realArgs = argList.map(arg => {
225
+ return (arg && arg[REAL_TARGET]) ? arg[REAL_TARGET] : arg;
226
+ });
227
+ const wrappedArgs = realArgs.map(arg => {
228
+ if (typeof arg === 'function') {
229
+ return function (...cbArgs) {
230
+ const wrappedCbArgs = cbArgs.map(a => createSafeProxy(a));
231
+ return arg.apply(this, wrappedCbArgs);
232
+ }
233
+ }
234
+ return arg;
235
+ });
236
+
237
+ try {
238
+ const result = Reflect.apply(realTarget, realThis, wrappedArgs);
239
+ return createSafeProxy(result);
240
+ } catch (e) {
241
+ throw e;
242
+ }
243
+ },
244
+ construct(target, argumentsList, newTarget) {
245
+ const realTarget = target[REAL_TARGET] || target;
246
+ const realArgs = argumentsList.map(arg => {
247
+ return (arg && arg[REAL_TARGET]) ? arg[REAL_TARGET] : arg;
248
+ });
249
+ try {
250
+ const result = Reflect.construct(realTarget, realArgs, realTarget);
251
+ return createSafeProxy(result);
252
+ } catch (e) {
253
+ throw e;
254
+ }
255
+ }
256
+ });
257
+ }
258
+
164
259
  async function handleAgent(req, res) {
165
260
  const data = (req.method === 'POST') ? req.body : req.query;
166
261
  let { url, actions, wait: globalWait, rotateUserAgents, rotateProxies, humanTyping, stealth = {} } = data;
@@ -170,6 +265,10 @@ async function handleAgent(req, res) {
170
265
  const includeShadowDom = includeShadowDomRaw === undefined
171
266
  ? true
172
267
  : !(String(includeShadowDomRaw).toLowerCase() === 'false' || includeShadowDomRaw === false);
268
+ const disableRecordingRaw = data.disableRecording ?? req.query.disableRecording;
269
+ const disableRecording = parseBooleanFlag(disableRecordingRaw);
270
+ const statelessExecutionRaw = data.statelessExecution ?? req.query.statelessExecution;
271
+ const statelessExecution = parseBooleanFlag(statelessExecutionRaw);
173
272
  const {
174
273
  allowTypos = false,
175
274
  idleMovements = false,
@@ -194,10 +293,10 @@ async function handleAgent(req, res) {
194
293
  });
195
294
  }
196
295
 
197
- const localPort = req.socket && req.socket.localPort;
198
- const configuredPort = process.env.PORT || process.env.VITE_BACKEND_PORT;
199
- const basePort = localPort || configuredPort || '11345';
200
- const baseUrl = `${req.protocol || 'http'}://127.0.0.1:${basePort}`;
296
+ const localPort = req.socket && req.socket.localPort;
297
+ const configuredPort = process.env.PORT || process.env.VITE_BACKEND_PORT;
298
+ const basePort = localPort || configuredPort || '11345';
299
+ const baseUrl = `${req.protocol || 'http'}://127.0.0.1:${basePort}`;
201
300
  const runtimeVars = { ...(data.taskVariables || data.variables || {}) };
202
301
  let lastBlockOutput = null;
203
302
  runtimeVars['block.output'] = lastBlockOutput;
@@ -258,39 +357,58 @@ async function handleAgent(req, res) {
258
357
 
259
358
  const parseCsv = (input) => {
260
359
  const text = typeof input === 'string' ? input : String(input || '');
360
+ const len = text.length;
261
361
  const rows = [];
262
362
  let row = [];
263
363
  let current = '';
264
364
  let inQuotes = false;
365
+ const specialChar = /[",\n\r]/g;
265
366
 
266
- for (let i = 0; i < text.length; i += 1) {
267
- const char = text[i];
367
+ let i = 0;
368
+ while (i < len) {
268
369
  if (inQuotes) {
269
- if (char === '"') {
270
- if (text[i + 1] === '"') {
271
- current += '"';
272
- i += 1;
273
- } else {
274
- inQuotes = false;
275
- }
370
+ const nextQuote = text.indexOf('"', i);
371
+ if (nextQuote === -1) {
372
+ current += text.slice(i);
373
+ i = len;
374
+ break;
375
+ }
376
+ current += text.slice(i, nextQuote);
377
+ i = nextQuote;
378
+ if (i + 1 < len && text[i + 1] === '"') {
379
+ current += '"';
380
+ i += 2;
276
381
  } else {
277
- current += char;
382
+ inQuotes = false;
383
+ i += 1;
278
384
  }
279
385
  } else {
386
+ specialChar.lastIndex = i;
387
+ const match = specialChar.exec(text);
388
+ if (!match) {
389
+ current += text.slice(i);
390
+ i = len;
391
+ break;
392
+ }
393
+ const idx = match.index;
394
+ const char = match[0];
395
+ current += text.slice(i, idx);
396
+ i = idx;
280
397
  if (char === '"') {
281
398
  inQuotes = true;
399
+ i += 1;
282
400
  } else if (char === ',') {
283
401
  row.push(current);
284
402
  current = '';
403
+ i += 1;
285
404
  } else if (char === '\n') {
286
405
  row.push(current);
287
406
  rows.push(row);
288
407
  row = [];
289
408
  current = '';
409
+ i += 1;
290
410
  } else if (char === '\r') {
291
- // ignore CR (handle CRLF)
292
- } else {
293
- current += char;
411
+ i += 1;
294
412
  }
295
413
  }
296
414
  }
@@ -398,7 +516,7 @@ async function handleAgent(req, res) {
398
516
  return { startToEnd, startToElse, elseToEnd, endToStart };
399
517
  };
400
518
 
401
- const selectedUA = selectUserAgent(rotateUserAgents);
519
+ const selectedUA = await selectUserAgent(rotateUserAgents);
402
520
 
403
521
  let browser;
404
522
  let context;
@@ -425,9 +543,7 @@ async function handleAgent(req, res) {
425
543
  browser = await chromium.launch(launchOptions);
426
544
 
427
545
  const recordingsDir = path.join(__dirname, 'data', 'recordings');
428
- if (!fs.existsSync(recordingsDir)) {
429
- fs.mkdirSync(recordingsDir, { recursive: true });
430
- }
546
+ await fs.promises.mkdir(recordingsDir, { recursive: true });
431
547
 
432
548
  const rotateViewport = String(data.rotateViewport).toLowerCase() === 'true' || data.rotateViewport === true;
433
549
  const viewport = rotateViewport
@@ -442,13 +558,16 @@ async function handleAgent(req, res) {
442
558
  timezoneId: 'America/New_York',
443
559
  colorScheme: 'dark',
444
560
  permissions: ['geolocation'],
445
- recordVideo: { dir: recordingsDir, size: viewport }
446
561
  };
447
562
 
448
- if (fs.existsSync(STORAGE_STATE_FILE)) {
563
+ const shouldUseStorageState = !statelessExecution && fs.existsSync(STORAGE_STATE_FILE);
564
+ if (shouldUseStorageState) {
449
565
  contextOptions.storageState = STORAGE_STATE_FILE;
450
566
  }
451
567
 
568
+ if (!disableRecording) {
569
+ contextOptions.recordVideo = { dir: recordingsDir, size: viewport };
570
+ }
452
571
  context = await browser.newContext(contextOptions);
453
572
 
454
573
  await context.addInitScript(() => {
@@ -1009,7 +1128,7 @@ async function handleAgent(req, res) {
1009
1128
  case 'start': {
1010
1129
  const taskId = resolveMaybe(act.value);
1011
1130
  if (!taskId) throw new Error('Missing task id.');
1012
- const apiKey = loadApiKey() || data.apiKey || data.key;
1131
+ const apiKey = (await loadApiKey()) || data.apiKey || data.key;
1013
1132
  if (!apiKey) {
1014
1133
  logs.push('No API key available; attempting internal start.');
1015
1134
  }
@@ -1350,16 +1469,6 @@ async function handleAgent(req, res) {
1350
1469
  return { shadowQueryAll, shadowText };
1351
1470
  })();
1352
1471
 
1353
- // CodeQL alerts on dynamic eval, but extraction scripts intentionally run inside the browser sandbox,
1354
- // so we expose only the helpers needed (window, document, DOMParser, console) and keep the evaluation confined there.
1355
- const executor = new Function(
1356
- '$$data',
1357
- 'window',
1358
- 'document',
1359
- 'DOMParser',
1360
- 'console',
1361
- `"use strict"; return (async () => { ${script}\n})();`
1362
- );
1363
1472
  const $$data = {
1364
1473
  html: () => html || '',
1365
1474
  url: () => pageUrl || '',
@@ -1368,7 +1477,33 @@ async function handleAgent(req, res) {
1368
1477
  shadowQueryAll: includeShadowDom ? shadowHelpers.shadowQueryAll : undefined,
1369
1478
  shadowText: includeShadowDom ? shadowHelpers.shadowText : undefined
1370
1479
  };
1371
- const result = await executor($$data, window, window.document, window.DOMParser, consoleProxy);
1480
+
1481
+ // Use vm for sandboxed execution
1482
+ const sandbox = Object.create(null);
1483
+ sandbox.window = createSafeProxy(window);
1484
+ sandbox.document = createSafeProxy(window.document);
1485
+ sandbox.DOMParser = createSafeProxy(window.DOMParser);
1486
+ sandbox.console = createSafeProxy(consoleProxy);
1487
+ sandbox.$$data = createSafeProxy($$data);
1488
+
1489
+ // Pass the script as a variable to avoid string interpolation (CodeQL: Code Injection)
1490
+ sandbox.$$userScript = script;
1491
+
1492
+ const context = vm.createContext(sandbox);
1493
+
1494
+ // We use a static wrapper to execute the user script.
1495
+ // This ensures that the code passed to vm.runInContext is constant and safe.
1496
+ // The user script is retrieved from the sandbox environment and executed as an AsyncFunction.
1497
+ const scriptCode = `
1498
+ "use strict";
1499
+ (async () => {
1500
+ const AsyncFunction = Object.getPrototypeOf(async function(){}).constructor;
1501
+ const fn = new AsyncFunction('$$data', 'window', 'document', 'DOMParser', 'console', $$userScript);
1502
+ return fn($$data, window, document, DOMParser, console);
1503
+ })();
1504
+ `;
1505
+
1506
+ const result = await vm.runInContext(scriptCode, context);
1372
1507
  return { result, logs: logBuffer };
1373
1508
  } catch (e) {
1374
1509
  return { result: `Extraction script error: ${e.message}`, logs: [] };
@@ -1381,29 +1516,6 @@ async function handleAgent(req, res) {
1381
1516
  const extractionScript = extractionScriptRaw ? resolveTemplate(extractionScriptRaw) : undefined;
1382
1517
  const extraction = await runExtractionScript(extractionScript, cleanedHtml, page.url());
1383
1518
 
1384
- // Simple HTML Formatter (fallback to raw if formatting collapses content)
1385
- const formatHTML = (html) => {
1386
- let indent = 0;
1387
- return html.replace(/<(\/?)([a-z0-9]+)([^>]*?)(\/?)>/gi, (match, slash, tag, attrs, selfClose) => {
1388
- if (slash) indent--;
1389
- const result = ' '.repeat(Math.max(0, indent)) + match;
1390
- if (!slash && !selfClose && !['img', 'br', 'hr', 'input', 'link', 'meta'].includes(tag.toLowerCase())) indent++;
1391
- return '\n' + result;
1392
- }).trim();
1393
- };
1394
-
1395
- const safeFormatHTML = (html) => {
1396
- if (typeof html !== 'string') return '';
1397
- try {
1398
- const formatted = formatHTML(html);
1399
- if (!formatted) return html;
1400
- if (formatted.length < Math.max(200, Math.floor(html.length * 0.5))) return html;
1401
- return formatted;
1402
- } catch {
1403
- return html;
1404
- }
1405
- };
1406
-
1407
1519
  // Ensure the public/screenshots directory exists
1408
1520
  const capturesDir = path.join(__dirname, 'public', 'captures');
1409
1521
  if (!fs.existsSync(capturesDir)) {
@@ -1434,7 +1546,9 @@ async function handleAgent(req, res) {
1434
1546
  };
1435
1547
 
1436
1548
  const video = page.video();
1437
- try { await context.storageState({ path: STORAGE_STATE_FILE }); } catch {}
1549
+ if (!statelessExecution) {
1550
+ try { await context.storageState({ path: STORAGE_STATE_FILE }); } catch {}
1551
+ }
1438
1552
  try { await context.close(); } catch {}
1439
1553
  if (video) {
1440
1554
  try {