@doppelgangerdev/doppelganger 0.5.7 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +2 -2
- package/README.md +9 -29
- package/agent.js +200 -101
- package/headful.js +126 -126
- package/package.json +2 -2
- package/scrape.js +249 -284
- package/server.js +469 -359
package/LICENSE
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
|
|
2
|
-
Version 1.0, January 2026
|
|
1
|
+
Notice & Attribution License v1.0
|
|
3
2
|
|
|
3
|
+
© 2026 Mnemosyne
|
|
4
4
|
|
|
5
5
|
1. Definitions
|
|
6
6
|
1.1. "Software" means all source code, binaries, scripts, libraries, components, build configurations, and other software artifacts provided under this license.
|
package/README.md
CHANGED
|
@@ -1,31 +1,10 @@
|
|
|
1
|
-

|
|
1
|
+

|
|
2
2
|
|
|
3
3
|
# Doppelganger — Browser Automation for Everyone
|
|
4
4
|
|
|
5
|
-
<div align="center">
|
|
6
|
-
<a href="https://doppelgangerdev.com">
|
|
7
|
-
<img src="https://img.shields.io/badge/Website-doppelgangerdev.com-0056ff?style=for-the-badge&logo=googlechrome&rounded=true" alt="Website" />
|
|
8
|
-
</a>
|
|
9
|
-
<a href="https://doppelgangerdev.com/docs">
|
|
10
|
-
<img src="https://img.shields.io/badge/Docs-doppelgangerdev.com%2Fdocs-00c2ff?style=for-the-badge&logo=readthedocs&rounded=true" alt="Docs" />
|
|
11
|
-
</a>
|
|
12
|
-
<a href="https://forum.doppelgangerdev.com">
|
|
13
|
-
<img src="https://img.shields.io/badge/Forum-forum.doppelgangerdev.com-ff9900?style=for-the-badge&logo=discourse&rounded=true" alt="Forum" />
|
|
14
|
-
</a>
|
|
15
|
-
<a href="https://opensource.org/">
|
|
16
|
-
<img src="https://img.shields.io/badge/Open_Source-Yes-0056ff?style=for-the-badge&logo=opensourceinitiative&logoColor=white" alt="Open Source" />
|
|
17
|
-
</a>
|
|
18
|
-
<a href="https://www.npmjs.com/package/@doppelgangerdev/doppelganger">
|
|
19
|
-
<img src="https://img.shields.io/badge/Version-0.5.5-6a8cff?style=for-the-badge&logo=npm&rounded=true" alt="Version" />
|
|
20
|
-
</a>
|
|
21
|
-
<a href="https://hub.docker.com/r/mnemosyneai/doppelganger">
|
|
22
|
-
<img src="https://img.shields.io/badge/Docker-mnemosyneai%2Fdoppelganger-0db7ed?style=for-the-badge&logo=docker&rounded=true" alt="Docker" />
|
|
23
|
-
</a>
|
|
24
|
-
</div>
|
|
25
|
-
|
|
26
5
|
Doppelganger is a self‑hosted, block-first automation control plane built for teams that want predictable, auditable browser workflows without pushing sensitive data to third‑party SaaS. It bundles a React/Vite frontend, an Express/Playwright backend, helper scripts, and optional CLI tooling so you can sketch blocks, inject JavaScript, rotate proxies, and run everything locally.
|
|
27
6
|
|
|
28
|
-

|
|
7
|
+

|
|
29
8
|
|
|
30
9
|
# What You Get
|
|
31
10
|
|
|
@@ -236,7 +215,7 @@ Authentication enforces sessions (`/api/auth/login`, `/api/auth/logout`, `/api/a
|
|
|
236
215
|
|
|
237
216
|
# Maintenance
|
|
238
217
|
|
|
239
|
-
- The project is governed by the **[
|
|
218
|
+
- The project is governed by the **[Notice & Attribution License v1.0](https://github.com/mnemosyne-artificial-intelligence/doppelganger/blob/main/LICENSE)**, which grants royalty-free internal/private rights while requiring notice, attribution, and source delivery when the software is deployed for external end users; hosting it as a competing service is prohibited.
|
|
240
219
|
- Keep `data/` and `storage_state.json` backed up if you rely on historical cookies or proxies.
|
|
241
220
|
- Release updates by pulling `mnemosyneai/doppelganger` (Docker) or `npm i @doppelgangerdev/doppelganger` (npm). The Settings view always displays the current package version.
|
|
242
221
|
- Contributions: follow `.github/` templates, respect `CONTRIBUTING.md`, and run available lint/test scripts if you touch critical areas.
|
|
@@ -250,15 +229,16 @@ Authentication enforces sessions (`/api/auth/login`, `/api/auth/logout`, `/api/a
|
|
|
250
229
|
- [x] **Task proxy rotation toggle** — the “Rotate Proxies” option in each task ties into the Settings rotation controls, enabling rotation per execution.
|
|
251
230
|
- [ ] **Action key combos** — add modifier shortcuts (e.g., Ctrl+Click, Shift+Scroll) so tasks can more closely mirror real user interactions.
|
|
252
231
|
- [ ] **Click-and-drag block** — add an action that does drag gestures (selecting text, moving items) so tasks can simulate click-and-drag flows.
|
|
253
|
-
- [x] **Recording controls** — Task editor now exposes a “Disable automated recording” switch in the general settings panel so workflows can skip video capture on a per-task basis.
|
|
232
|
+
- [x] **Recording controls** — Task editor now exposes a “Disable automated recording” switch in the general settings panel so workflows can skip video capture on a per-task basis.
|
|
254
233
|
- [ ] **File downloads** — add explicit support for agent tasks to download files (PDFs, CSVs, etc.) directly from target pages, then surface those downloads in the UI so users can preview or export them without sifting through captures.
|
|
255
|
-
- [x] **Stateless mode** — Tasks now have a “Stateless execution” toggle alongside the recording controls so each run can skip `storage_state.json`, ensuring no cookies or local storage persist between executions for that workflow.
|
|
256
|
-
- [ ] **Adblocking filters** — add controls so execution contexts can enable built-in ad/malware filtering (e.g., via hosts file overrides or request blocking) to reduce noise on sensitive sites.
|
|
257
|
-
- [ ] **Extraction response mode** — add a Settings switch so users can choose whether the UI returns HTML+data (for debugging) or data-only payloads when extraction scripts run.
|
|
234
|
+
- [x] **Stateless mode** — Tasks now have a “Stateless execution” toggle alongside the recording controls so each run can skip `storage_state.json`, ensuring no cookies or local storage persist between executions for that workflow.
|
|
235
|
+
- [ ] **Adblocking filters** — add controls so execution contexts can enable built-in ad/malware filtering (e.g., via hosts file overrides or request blocking) to reduce noise on sensitive sites.
|
|
236
|
+
- [ ] **Extraction response mode** — add a Settings switch so users can choose whether the UI returns HTML+data (for debugging) or data-only payloads when extraction scripts run.
|
|
258
237
|
- [ ] **Folder organization** — group tasks, assets, and captures into named folders so operators can browse, filter, and download collections per workflow.
|
|
259
238
|
- [ ] **Stable capture retention** — add filtering, pinning, and archiving in captures tab so teams can keep compliance records.
|
|
260
239
|
- [ ] **Workspace templates** — allow saving and sharing workspace presets (layout + default proxies/agents) so new team members can onboard with pre-configured setups.
|
|
261
240
|
- [ ] **Geo-targeted exits** — allow choosing proxy regions for tasks so you can pin the apparent location before running a job.
|
|
241
|
+
- [ ] **Complete anti-detection coverage** — follow browserscan.net's anti-detection checklist (fingerprints, headers, fonts, WebRTC, etc.) so automated runs mimic real browsers across task executions.
|
|
262
242
|
- [ ] **Session recording redaction** — add toggles to redact sensitive fields (passwords, credit cards) from recordings/logs before storing them.
|
|
263
243
|
- [ ] **Two-factor authentication** — add optional TOTP/second-factor support to Settings/Auth so operators can lock down the UI with 2FA.
|
|
264
244
|
- [ ] **AI-assisted fixing** — add an “AI auto-fix” helper that suggests layout, selector, and proxy tweaks after failed runs, letting teams approve or discard the proposed changes without switching contexts.
|
|
@@ -276,4 +256,4 @@ Authentication enforces sessions (`/api/auth/login`, `/api/auth/logout`, `/api/a
|
|
|
276
256
|
|
|
277
257
|
- Report issues or request features via the GitHub repo issue tracker.
|
|
278
258
|
- Follow the authors on `https://github.com/mnemosyne-artificial-intelligence` for releases.
|
|
279
|
-
- Share automation recipes with other self-hosted users in your org, but respect the license for sharing infrastructure.
|
|
259
|
+
- Share automation recipes with other self-hosted users in your org, but respect the license for sharing infrastructure.
|
package/agent.js
CHANGED
|
@@ -2,8 +2,10 @@ const { chromium } = require('playwright');
|
|
|
2
2
|
const { JSDOM } = require('jsdom');
|
|
3
3
|
const fs = require('fs');
|
|
4
4
|
const path = require('path');
|
|
5
|
+
const vm = require('vm');
|
|
5
6
|
const { getProxySelection } = require('./proxy-rotation');
|
|
6
7
|
const { selectUserAgent } = require('./user-agent-settings');
|
|
8
|
+
const { formatHTML, safeFormatHTML } = require('./html-utils');
|
|
7
9
|
|
|
8
10
|
const STORAGE_STATE_PATH = path.join(__dirname, 'storage_state.json');
|
|
9
11
|
const STORAGE_STATE_FILE = (() => {
|
|
@@ -20,10 +22,10 @@ const STORAGE_STATE_FILE = (() => {
|
|
|
20
22
|
|
|
21
23
|
const API_KEY_FILE = path.join(__dirname, 'data', 'api_key.json');
|
|
22
24
|
|
|
23
|
-
const loadApiKey = () => {
|
|
24
|
-
if (!fs.existsSync(API_KEY_FILE)) return null;
|
|
25
|
+
const loadApiKey = async () => {
|
|
25
26
|
try {
|
|
26
|
-
const
|
|
27
|
+
const raw = await fs.promises.readFile(API_KEY_FILE, 'utf8');
|
|
28
|
+
const data = JSON.parse(raw);
|
|
27
29
|
return data && data.apiKey ? data.apiKey : null;
|
|
28
30
|
} catch {
|
|
29
31
|
return null;
|
|
@@ -110,17 +112,17 @@ async function overshootScroll(page, targetY) {
|
|
|
110
112
|
}
|
|
111
113
|
}
|
|
112
114
|
|
|
113
|
-
const punctuationPause = /[.,!?;:]/;
|
|
114
|
-
|
|
115
|
-
const randomBetween = (min, max) => min + Math.random() * (max - min);
|
|
116
|
-
const parseBooleanFlag = (value) => {
|
|
117
|
-
if (typeof value === 'boolean') return value;
|
|
118
|
-
if (value === undefined || value === null) return false;
|
|
119
|
-
const normalized = String(value).toLowerCase();
|
|
120
|
-
return normalized === 'true' || normalized === '1';
|
|
121
|
-
};
|
|
122
|
-
|
|
123
|
-
async function humanType(page, selector, text, options = {}) {
|
|
115
|
+
const punctuationPause = /[.,!?;:]/;
|
|
116
|
+
|
|
117
|
+
const randomBetween = (min, max) => min + Math.random() * (max - min);
|
|
118
|
+
const parseBooleanFlag = (value) => {
|
|
119
|
+
if (typeof value === 'boolean') return value;
|
|
120
|
+
if (value === undefined || value === null) return false;
|
|
121
|
+
const normalized = String(value).toLowerCase();
|
|
122
|
+
return normalized === 'true' || normalized === '1';
|
|
123
|
+
};
|
|
124
|
+
|
|
125
|
+
async function humanType(page, selector, text, options = {}) {
|
|
124
126
|
const { allowTypos = false, naturalTyping = false, fatigue = false } = options;
|
|
125
127
|
if (selector) await page.focus(selector);
|
|
126
128
|
const chars = text.split('');
|
|
@@ -167,21 +169,108 @@ async function humanType(page, selector, text, options = {}) {
|
|
|
167
169
|
}
|
|
168
170
|
}
|
|
169
171
|
|
|
172
|
+
const REAL_TARGET = Symbol('REAL_TARGET');
|
|
173
|
+
|
|
174
|
+
function createSafeProxy(target) {
|
|
175
|
+
if (target === null || (typeof target !== 'object' && typeof target !== 'function')) {
|
|
176
|
+
return target;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
let shadowTarget = target;
|
|
180
|
+
if (typeof target === 'function') {
|
|
181
|
+
shadowTarget = function (...args) { };
|
|
182
|
+
try { Object.defineProperty(shadowTarget, 'name', { value: target.name, configurable: true }); } catch {}
|
|
183
|
+
try { Object.defineProperty(shadowTarget, 'length', { value: target.length, configurable: true }); } catch {}
|
|
184
|
+
shadowTarget[REAL_TARGET] = target;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
return new Proxy(shadowTarget, {
|
|
188
|
+
get(target, prop, receiver) {
|
|
189
|
+
const realTarget = target[REAL_TARGET] || target;
|
|
190
|
+
if (prop === 'constructor' || prop === '__proto__') {
|
|
191
|
+
return undefined;
|
|
192
|
+
}
|
|
193
|
+
if (prop === REAL_TARGET) return realTarget;
|
|
194
|
+
|
|
195
|
+
const value = Reflect.get(realTarget, prop, realTarget);
|
|
196
|
+
|
|
197
|
+
if (typeof value === 'function') {
|
|
198
|
+
return function (...args) {
|
|
199
|
+
const realArgs = args.map(arg => {
|
|
200
|
+
return (arg && arg[REAL_TARGET]) ? arg[REAL_TARGET] : arg;
|
|
201
|
+
});
|
|
202
|
+
const wrappedArgs = realArgs.map(arg => {
|
|
203
|
+
if (typeof arg === 'function') {
|
|
204
|
+
return function (...cbArgs) {
|
|
205
|
+
const wrappedCbArgs = cbArgs.map(a => createSafeProxy(a));
|
|
206
|
+
return arg.apply(this, wrappedCbArgs);
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
return arg;
|
|
210
|
+
});
|
|
211
|
+
try {
|
|
212
|
+
const result = value.apply(realTarget, wrappedArgs);
|
|
213
|
+
return createSafeProxy(result);
|
|
214
|
+
} catch (e) {
|
|
215
|
+
throw e;
|
|
216
|
+
}
|
|
217
|
+
};
|
|
218
|
+
}
|
|
219
|
+
return createSafeProxy(value);
|
|
220
|
+
},
|
|
221
|
+
apply(target, thisArg, argList) {
|
|
222
|
+
const realTarget = target[REAL_TARGET] || target;
|
|
223
|
+
const realThis = (thisArg && thisArg[REAL_TARGET]) ? thisArg[REAL_TARGET] : thisArg;
|
|
224
|
+
const realArgs = argList.map(arg => {
|
|
225
|
+
return (arg && arg[REAL_TARGET]) ? arg[REAL_TARGET] : arg;
|
|
226
|
+
});
|
|
227
|
+
const wrappedArgs = realArgs.map(arg => {
|
|
228
|
+
if (typeof arg === 'function') {
|
|
229
|
+
return function (...cbArgs) {
|
|
230
|
+
const wrappedCbArgs = cbArgs.map(a => createSafeProxy(a));
|
|
231
|
+
return arg.apply(this, wrappedCbArgs);
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
return arg;
|
|
235
|
+
});
|
|
236
|
+
|
|
237
|
+
try {
|
|
238
|
+
const result = Reflect.apply(realTarget, realThis, wrappedArgs);
|
|
239
|
+
return createSafeProxy(result);
|
|
240
|
+
} catch (e) {
|
|
241
|
+
throw e;
|
|
242
|
+
}
|
|
243
|
+
},
|
|
244
|
+
construct(target, argumentsList, newTarget) {
|
|
245
|
+
const realTarget = target[REAL_TARGET] || target;
|
|
246
|
+
const realArgs = argumentsList.map(arg => {
|
|
247
|
+
return (arg && arg[REAL_TARGET]) ? arg[REAL_TARGET] : arg;
|
|
248
|
+
});
|
|
249
|
+
try {
|
|
250
|
+
const result = Reflect.construct(realTarget, realArgs, realTarget);
|
|
251
|
+
return createSafeProxy(result);
|
|
252
|
+
} catch (e) {
|
|
253
|
+
throw e;
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
});
|
|
257
|
+
}
|
|
258
|
+
|
|
170
259
|
async function handleAgent(req, res) {
|
|
171
260
|
const data = (req.method === 'POST') ? req.body : req.query;
|
|
172
261
|
let { url, actions, wait: globalWait, rotateUserAgents, rotateProxies, humanTyping, stealth = {} } = data;
|
|
173
262
|
const runId = data.runId ? String(data.runId) : null;
|
|
174
263
|
const captureRunId = runId || `run_${Date.now()}_unknown`;
|
|
175
|
-
const includeShadowDomRaw = data.includeShadowDom ?? req.query.includeShadowDom;
|
|
176
|
-
const includeShadowDom = includeShadowDomRaw === undefined
|
|
177
|
-
? true
|
|
178
|
-
: !(String(includeShadowDomRaw).toLowerCase() === 'false' || includeShadowDomRaw === false);
|
|
179
|
-
const disableRecordingRaw = data.disableRecording ?? req.query.disableRecording;
|
|
180
|
-
const disableRecording = parseBooleanFlag(disableRecordingRaw);
|
|
181
|
-
const statelessExecutionRaw = data.statelessExecution ?? req.query.statelessExecution;
|
|
182
|
-
const statelessExecution = parseBooleanFlag(statelessExecutionRaw);
|
|
183
|
-
const {
|
|
184
|
-
allowTypos = false,
|
|
264
|
+
const includeShadowDomRaw = data.includeShadowDom ?? req.query.includeShadowDom;
|
|
265
|
+
const includeShadowDom = includeShadowDomRaw === undefined
|
|
266
|
+
? true
|
|
267
|
+
: !(String(includeShadowDomRaw).toLowerCase() === 'false' || includeShadowDomRaw === false);
|
|
268
|
+
const disableRecordingRaw = data.disableRecording ?? req.query.disableRecording;
|
|
269
|
+
const disableRecording = parseBooleanFlag(disableRecordingRaw);
|
|
270
|
+
const statelessExecutionRaw = data.statelessExecution ?? req.query.statelessExecution;
|
|
271
|
+
const statelessExecution = parseBooleanFlag(statelessExecutionRaw);
|
|
272
|
+
const {
|
|
273
|
+
allowTypos = false,
|
|
185
274
|
idleMovements = false,
|
|
186
275
|
overscroll = false,
|
|
187
276
|
deadClicks = false,
|
|
@@ -204,10 +293,10 @@ async function handleAgent(req, res) {
|
|
|
204
293
|
});
|
|
205
294
|
}
|
|
206
295
|
|
|
207
|
-
const localPort = req.socket && req.socket.localPort;
|
|
208
|
-
const configuredPort = process.env.PORT || process.env.VITE_BACKEND_PORT;
|
|
209
|
-
const basePort = localPort || configuredPort || '11345';
|
|
210
|
-
const baseUrl = `${req.protocol || 'http'}://127.0.0.1:${basePort}`;
|
|
296
|
+
const localPort = req.socket && req.socket.localPort;
|
|
297
|
+
const configuredPort = process.env.PORT || process.env.VITE_BACKEND_PORT;
|
|
298
|
+
const basePort = localPort || configuredPort || '11345';
|
|
299
|
+
const baseUrl = `${req.protocol || 'http'}://127.0.0.1:${basePort}`;
|
|
211
300
|
const runtimeVars = { ...(data.taskVariables || data.variables || {}) };
|
|
212
301
|
let lastBlockOutput = null;
|
|
213
302
|
runtimeVars['block.output'] = lastBlockOutput;
|
|
@@ -268,39 +357,58 @@ async function handleAgent(req, res) {
|
|
|
268
357
|
|
|
269
358
|
const parseCsv = (input) => {
|
|
270
359
|
const text = typeof input === 'string' ? input : String(input || '');
|
|
360
|
+
const len = text.length;
|
|
271
361
|
const rows = [];
|
|
272
362
|
let row = [];
|
|
273
363
|
let current = '';
|
|
274
364
|
let inQuotes = false;
|
|
365
|
+
const specialChar = /[",\n\r]/g;
|
|
275
366
|
|
|
276
|
-
|
|
277
|
-
|
|
367
|
+
let i = 0;
|
|
368
|
+
while (i < len) {
|
|
278
369
|
if (inQuotes) {
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
370
|
+
const nextQuote = text.indexOf('"', i);
|
|
371
|
+
if (nextQuote === -1) {
|
|
372
|
+
current += text.slice(i);
|
|
373
|
+
i = len;
|
|
374
|
+
break;
|
|
375
|
+
}
|
|
376
|
+
current += text.slice(i, nextQuote);
|
|
377
|
+
i = nextQuote;
|
|
378
|
+
if (i + 1 < len && text[i + 1] === '"') {
|
|
379
|
+
current += '"';
|
|
380
|
+
i += 2;
|
|
286
381
|
} else {
|
|
287
|
-
|
|
382
|
+
inQuotes = false;
|
|
383
|
+
i += 1;
|
|
288
384
|
}
|
|
289
385
|
} else {
|
|
386
|
+
specialChar.lastIndex = i;
|
|
387
|
+
const match = specialChar.exec(text);
|
|
388
|
+
if (!match) {
|
|
389
|
+
current += text.slice(i);
|
|
390
|
+
i = len;
|
|
391
|
+
break;
|
|
392
|
+
}
|
|
393
|
+
const idx = match.index;
|
|
394
|
+
const char = match[0];
|
|
395
|
+
current += text.slice(i, idx);
|
|
396
|
+
i = idx;
|
|
290
397
|
if (char === '"') {
|
|
291
398
|
inQuotes = true;
|
|
399
|
+
i += 1;
|
|
292
400
|
} else if (char === ',') {
|
|
293
401
|
row.push(current);
|
|
294
402
|
current = '';
|
|
403
|
+
i += 1;
|
|
295
404
|
} else if (char === '\n') {
|
|
296
405
|
row.push(current);
|
|
297
406
|
rows.push(row);
|
|
298
407
|
row = [];
|
|
299
408
|
current = '';
|
|
409
|
+
i += 1;
|
|
300
410
|
} else if (char === '\r') {
|
|
301
|
-
|
|
302
|
-
} else {
|
|
303
|
-
current += char;
|
|
411
|
+
i += 1;
|
|
304
412
|
}
|
|
305
413
|
}
|
|
306
414
|
}
|
|
@@ -408,7 +516,7 @@ async function handleAgent(req, res) {
|
|
|
408
516
|
return { startToEnd, startToElse, elseToEnd, endToStart };
|
|
409
517
|
};
|
|
410
518
|
|
|
411
|
-
const selectedUA = selectUserAgent(rotateUserAgents);
|
|
519
|
+
const selectedUA = await selectUserAgent(rotateUserAgents);
|
|
412
520
|
|
|
413
521
|
let browser;
|
|
414
522
|
let context;
|
|
@@ -435,33 +543,31 @@ async function handleAgent(req, res) {
|
|
|
435
543
|
browser = await chromium.launch(launchOptions);
|
|
436
544
|
|
|
437
545
|
const recordingsDir = path.join(__dirname, 'data', 'recordings');
|
|
438
|
-
|
|
439
|
-
fs.mkdirSync(recordingsDir, { recursive: true });
|
|
440
|
-
}
|
|
546
|
+
await fs.promises.mkdir(recordingsDir, { recursive: true });
|
|
441
547
|
|
|
442
548
|
const rotateViewport = String(data.rotateViewport).toLowerCase() === 'true' || data.rotateViewport === true;
|
|
443
549
|
const viewport = rotateViewport
|
|
444
550
|
? { width: 1280 + Math.floor(Math.random() * 640), height: 720 + Math.floor(Math.random() * 360) }
|
|
445
551
|
: { width: 1366, height: 768 };
|
|
446
552
|
|
|
447
|
-
const contextOptions = {
|
|
448
|
-
userAgent: selectedUA,
|
|
449
|
-
viewport,
|
|
450
|
-
deviceScaleFactor: 1,
|
|
451
|
-
locale: 'en-US',
|
|
452
|
-
timezoneId: 'America/New_York',
|
|
453
|
-
colorScheme: 'dark',
|
|
454
|
-
permissions: ['geolocation'],
|
|
455
|
-
};
|
|
456
|
-
|
|
457
|
-
const shouldUseStorageState = !statelessExecution && fs.existsSync(STORAGE_STATE_FILE);
|
|
458
|
-
if (shouldUseStorageState) {
|
|
459
|
-
contextOptions.storageState = STORAGE_STATE_FILE;
|
|
460
|
-
}
|
|
461
|
-
|
|
462
|
-
if (!disableRecording) {
|
|
463
|
-
contextOptions.recordVideo = { dir: recordingsDir, size: viewport };
|
|
464
|
-
}
|
|
553
|
+
const contextOptions = {
|
|
554
|
+
userAgent: selectedUA,
|
|
555
|
+
viewport,
|
|
556
|
+
deviceScaleFactor: 1,
|
|
557
|
+
locale: 'en-US',
|
|
558
|
+
timezoneId: 'America/New_York',
|
|
559
|
+
colorScheme: 'dark',
|
|
560
|
+
permissions: ['geolocation'],
|
|
561
|
+
};
|
|
562
|
+
|
|
563
|
+
const shouldUseStorageState = !statelessExecution && fs.existsSync(STORAGE_STATE_FILE);
|
|
564
|
+
if (shouldUseStorageState) {
|
|
565
|
+
contextOptions.storageState = STORAGE_STATE_FILE;
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
if (!disableRecording) {
|
|
569
|
+
contextOptions.recordVideo = { dir: recordingsDir, size: viewport };
|
|
570
|
+
}
|
|
465
571
|
context = await browser.newContext(contextOptions);
|
|
466
572
|
|
|
467
573
|
await context.addInitScript(() => {
|
|
@@ -1022,7 +1128,7 @@ async function handleAgent(req, res) {
|
|
|
1022
1128
|
case 'start': {
|
|
1023
1129
|
const taskId = resolveMaybe(act.value);
|
|
1024
1130
|
if (!taskId) throw new Error('Missing task id.');
|
|
1025
|
-
const apiKey = loadApiKey() || data.apiKey || data.key;
|
|
1131
|
+
const apiKey = (await loadApiKey()) || data.apiKey || data.key;
|
|
1026
1132
|
if (!apiKey) {
|
|
1027
1133
|
logs.push('No API key available; attempting internal start.');
|
|
1028
1134
|
}
|
|
@@ -1363,16 +1469,6 @@ async function handleAgent(req, res) {
|
|
|
1363
1469
|
return { shadowQueryAll, shadowText };
|
|
1364
1470
|
})();
|
|
1365
1471
|
|
|
1366
|
-
// CodeQL alerts on dynamic eval, but extraction scripts intentionally run inside the browser sandbox,
|
|
1367
|
-
// so we expose only the helpers needed (window, document, DOMParser, console) and keep the evaluation confined there.
|
|
1368
|
-
const executor = new Function(
|
|
1369
|
-
'$$data',
|
|
1370
|
-
'window',
|
|
1371
|
-
'document',
|
|
1372
|
-
'DOMParser',
|
|
1373
|
-
'console',
|
|
1374
|
-
`"use strict"; return (async () => { ${script}\n})();`
|
|
1375
|
-
);
|
|
1376
1472
|
const $$data = {
|
|
1377
1473
|
html: () => html || '',
|
|
1378
1474
|
url: () => pageUrl || '',
|
|
@@ -1381,7 +1477,33 @@ async function handleAgent(req, res) {
|
|
|
1381
1477
|
shadowQueryAll: includeShadowDom ? shadowHelpers.shadowQueryAll : undefined,
|
|
1382
1478
|
shadowText: includeShadowDom ? shadowHelpers.shadowText : undefined
|
|
1383
1479
|
};
|
|
1384
|
-
|
|
1480
|
+
|
|
1481
|
+
// Use vm for sandboxed execution
|
|
1482
|
+
const sandbox = Object.create(null);
|
|
1483
|
+
sandbox.window = createSafeProxy(window);
|
|
1484
|
+
sandbox.document = createSafeProxy(window.document);
|
|
1485
|
+
sandbox.DOMParser = createSafeProxy(window.DOMParser);
|
|
1486
|
+
sandbox.console = createSafeProxy(consoleProxy);
|
|
1487
|
+
sandbox.$$data = createSafeProxy($$data);
|
|
1488
|
+
|
|
1489
|
+
// Pass the script as a variable to avoid string interpolation (CodeQL: Code Injection)
|
|
1490
|
+
sandbox.$$userScript = script;
|
|
1491
|
+
|
|
1492
|
+
const context = vm.createContext(sandbox);
|
|
1493
|
+
|
|
1494
|
+
// We use a static wrapper to execute the user script.
|
|
1495
|
+
// This ensures that the code passed to vm.runInContext is constant and safe.
|
|
1496
|
+
// The user script is retrieved from the sandbox environment and executed as an AsyncFunction.
|
|
1497
|
+
const scriptCode = `
|
|
1498
|
+
"use strict";
|
|
1499
|
+
(async () => {
|
|
1500
|
+
const AsyncFunction = Object.getPrototypeOf(async function(){}).constructor;
|
|
1501
|
+
const fn = new AsyncFunction('$$data', 'window', 'document', 'DOMParser', 'console', $$userScript);
|
|
1502
|
+
return fn($$data, window, document, DOMParser, console);
|
|
1503
|
+
})();
|
|
1504
|
+
`;
|
|
1505
|
+
|
|
1506
|
+
const result = await vm.runInContext(scriptCode, context);
|
|
1385
1507
|
return { result, logs: logBuffer };
|
|
1386
1508
|
} catch (e) {
|
|
1387
1509
|
return { result: `Extraction script error: ${e.message}`, logs: [] };
|
|
@@ -1394,29 +1516,6 @@ async function handleAgent(req, res) {
|
|
|
1394
1516
|
const extractionScript = extractionScriptRaw ? resolveTemplate(extractionScriptRaw) : undefined;
|
|
1395
1517
|
const extraction = await runExtractionScript(extractionScript, cleanedHtml, page.url());
|
|
1396
1518
|
|
|
1397
|
-
// Simple HTML Formatter (fallback to raw if formatting collapses content)
|
|
1398
|
-
const formatHTML = (html) => {
|
|
1399
|
-
let indent = 0;
|
|
1400
|
-
return html.replace(/<(\/?)([a-z0-9]+)([^>]*?)(\/?)>/gi, (match, slash, tag, attrs, selfClose) => {
|
|
1401
|
-
if (slash) indent--;
|
|
1402
|
-
const result = ' '.repeat(Math.max(0, indent)) + match;
|
|
1403
|
-
if (!slash && !selfClose && !['img', 'br', 'hr', 'input', 'link', 'meta'].includes(tag.toLowerCase())) indent++;
|
|
1404
|
-
return '\n' + result;
|
|
1405
|
-
}).trim();
|
|
1406
|
-
};
|
|
1407
|
-
|
|
1408
|
-
const safeFormatHTML = (html) => {
|
|
1409
|
-
if (typeof html !== 'string') return '';
|
|
1410
|
-
try {
|
|
1411
|
-
const formatted = formatHTML(html);
|
|
1412
|
-
if (!formatted) return html;
|
|
1413
|
-
if (formatted.length < Math.max(200, Math.floor(html.length * 0.5))) return html;
|
|
1414
|
-
return formatted;
|
|
1415
|
-
} catch {
|
|
1416
|
-
return html;
|
|
1417
|
-
}
|
|
1418
|
-
};
|
|
1419
|
-
|
|
1420
1519
|
// Ensure the public/screenshots directory exists
|
|
1421
1520
|
const capturesDir = path.join(__dirname, 'public', 'captures');
|
|
1422
1521
|
if (!fs.existsSync(capturesDir)) {
|
|
@@ -1447,9 +1546,9 @@ async function handleAgent(req, res) {
|
|
|
1447
1546
|
};
|
|
1448
1547
|
|
|
1449
1548
|
const video = page.video();
|
|
1450
|
-
if (!statelessExecution) {
|
|
1451
|
-
try { await context.storageState({ path: STORAGE_STATE_FILE }); } catch {}
|
|
1452
|
-
}
|
|
1549
|
+
if (!statelessExecution) {
|
|
1550
|
+
try { await context.storageState({ path: STORAGE_STATE_FILE }); } catch {}
|
|
1551
|
+
}
|
|
1453
1552
|
try { await context.close(); } catch {}
|
|
1454
1553
|
if (video) {
|
|
1455
1554
|
try {
|