@grabbit-labs/dynafetch 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/dynafetch-net-darwin-arm64 +0 -0
- package/bin/dynafetch-net-darwin-x64 +0 -0
- package/bin/dynafetch-net-linux-arm64 +0 -0
- package/bin/dynafetch-net-linux-x64 +0 -0
- package/bin/dynafetch-net-win32-x64.exe +0 -0
- package/dist/index.d.ts +192 -0
- package/dist/index.js +3050 -0
- package/dist/index.js.map +7 -0
- package/package.json +23 -3
- package/src/index.ts +0 -1
package/dist/index.js
ADDED
|
@@ -0,0 +1,3050 @@
|
|
|
1
|
+
import { createRequire } from "node:module"; import { fileURLToPath as __fileURLToPath } from "node:url"; import { dirname as __dirname_fn } from "node:path"; const __filename = __fileURLToPath(import.meta.url); const __dirname = __dirname_fn(__filename); const require = createRequire(import.meta.url);
|
|
2
|
+
var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
|
|
3
|
+
get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
|
|
4
|
+
}) : x)(function(x) {
|
|
5
|
+
if (typeof require !== "undefined") return require.apply(this, arguments);
|
|
6
|
+
throw Error('Dynamic require of "' + x + '" is not supported');
|
|
7
|
+
});
|
|
8
|
+
|
|
9
|
+
// ../dynafetch-core/src/index.ts
|
|
10
|
+
import * as net from "node:net";
|
|
11
|
+
|
|
12
|
+
// ../../src/phantom/execute.ts
|
|
13
|
+
import { JSDOM, VirtualConsole, CookieJar } from "jsdom";
|
|
14
|
+
import WebSocket from "ws";
|
|
15
|
+
import * as nodeCrypto from "crypto";
|
|
16
|
+
|
|
17
|
+
// ../../src/phantom/transform.ts
|
|
18
|
+
import { parse } from "@babel/parser";
|
|
19
|
+
import traverse from "@babel/traverse";
|
|
20
|
+
import generate from "@babel/generator";
|
|
21
|
+
import * as t from "@babel/types";
|
|
22
|
+
import * as crypto from "crypto";
|
|
23
|
+
import * as fs from "fs";
|
|
24
|
+
import * as path from "path";
|
|
25
|
+
var CACHE_DIR = path.join(process.cwd(), ".next/cache/phantom");
|
|
26
|
+
try {
|
|
27
|
+
if (!fs.existsSync(CACHE_DIR)) fs.mkdirSync(CACHE_DIR, { recursive: true });
|
|
28
|
+
} catch {
|
|
29
|
+
}
|
|
30
|
+
var memCache = /* @__PURE__ */ new Map();
|
|
31
|
+
var fsCacheEnabled = true;
|
|
32
|
+
var Transformer = class {
|
|
33
|
+
transform(code, scriptId) {
|
|
34
|
+
const hash = crypto.createHash("sha256").update(code).digest("hex");
|
|
35
|
+
const cachePath = path.join(CACHE_DIR, `${hash}.js`);
|
|
36
|
+
const memHit = memCache.get(hash);
|
|
37
|
+
if (memHit) return memHit;
|
|
38
|
+
if (fsCacheEnabled) {
|
|
39
|
+
try {
|
|
40
|
+
if (fs.existsSync(cachePath)) {
|
|
41
|
+
const diskHit = fs.readFileSync(cachePath, "utf-8");
|
|
42
|
+
memCache.set(hash, diskHit);
|
|
43
|
+
return diskHit;
|
|
44
|
+
}
|
|
45
|
+
} catch {
|
|
46
|
+
fsCacheEnabled = false;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
const needsRewrite = process.env.PHANTOM_ENABLE_AST_IMPORT_REWRITE === "1" && code.includes("import(");
|
|
50
|
+
if (!needsRewrite) {
|
|
51
|
+
const passthrough = `try { ${code}
|
|
52
|
+
} catch(e) { console.warn('Script ${scriptId} runtime error:', e); }`;
|
|
53
|
+
memCache.set(hash, passthrough);
|
|
54
|
+
if (fsCacheEnabled) {
|
|
55
|
+
try {
|
|
56
|
+
fs.writeFileSync(cachePath, passthrough);
|
|
57
|
+
} catch {
|
|
58
|
+
fsCacheEnabled = false;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
return passthrough;
|
|
62
|
+
}
|
|
63
|
+
let ast;
|
|
64
|
+
try {
|
|
65
|
+
ast = parse(code, {
|
|
66
|
+
sourceType: "unambiguous",
|
|
67
|
+
plugins: ["typescript", "jsx"]
|
|
68
|
+
});
|
|
69
|
+
} catch (e) {
|
|
70
|
+
const fallback = `try { ${code} } catch(e) { console.error('Script ${scriptId} failed:', e); }`;
|
|
71
|
+
memCache.set(hash, fallback);
|
|
72
|
+
if (fsCacheEnabled) {
|
|
73
|
+
try {
|
|
74
|
+
fs.writeFileSync(cachePath, fallback);
|
|
75
|
+
} catch {
|
|
76
|
+
fsCacheEnabled = false;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
return fallback;
|
|
80
|
+
}
|
|
81
|
+
traverse(ast, {
|
|
82
|
+
CallExpression(path3) {
|
|
83
|
+
if (path3.node.callee.type === "Import") {
|
|
84
|
+
path3.replaceWith(
|
|
85
|
+
t.callExpression(
|
|
86
|
+
t.memberExpression(
|
|
87
|
+
t.identifier("__phantom"),
|
|
88
|
+
t.identifier("dynamicImport")
|
|
89
|
+
),
|
|
90
|
+
path3.node.arguments
|
|
91
|
+
)
|
|
92
|
+
);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
});
|
|
96
|
+
const output = generate(ast, { compact: true, minified: true }, code);
|
|
97
|
+
const result = `try {
|
|
98
|
+
${output.code}
|
|
99
|
+
} catch(e) {
|
|
100
|
+
console.warn('Script ${scriptId} runtime error:', e);
|
|
101
|
+
}`;
|
|
102
|
+
memCache.set(hash, result);
|
|
103
|
+
if (fsCacheEnabled) {
|
|
104
|
+
try {
|
|
105
|
+
fs.writeFileSync(cachePath, result);
|
|
106
|
+
} catch {
|
|
107
|
+
fsCacheEnabled = false;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
return result;
|
|
111
|
+
}
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
// ../dynafetch-core/src/net/worker-client.ts
|
|
115
|
+
import { AsyncLocalStorage } from "node:async_hooks";
|
|
116
|
+
import { spawn } from "node:child_process";
|
|
117
|
+
import { randomUUID } from "node:crypto";
|
|
118
|
+
import path2 from "node:path";
|
|
119
|
+
import readline from "node:readline";
|
|
120
|
+
var sessionStore = new AsyncLocalStorage();
|
|
121
|
+
var transportPromise = null;
|
|
122
|
+
function findPrecompiledBinary() {
|
|
123
|
+
const platform = process.platform;
|
|
124
|
+
const arch = process.arch === "x64" ? "x64" : "arm64";
|
|
125
|
+
const ext = platform === "win32" ? ".exe" : "";
|
|
126
|
+
const name = `dynafetch-net-${platform}-${arch}${ext}`;
|
|
127
|
+
const candidates = [
|
|
128
|
+
path2.resolve(__dirname, "../bin", name),
|
|
129
|
+
// installed: dist/../bin
|
|
130
|
+
path2.resolve(__dirname, "../../../dynafetch-net/bin", name),
|
|
131
|
+
// dev: dynafetch-core/src/net -> dynafetch-net/bin
|
|
132
|
+
path2.resolve(__dirname, "../../../../packages/dynafetch-net/bin", name),
|
|
133
|
+
// dev: alt layout
|
|
134
|
+
path2.resolve(process.cwd(), "packages/dynafetch-net/bin", name)
|
|
135
|
+
// dev: from workspace root
|
|
136
|
+
];
|
|
137
|
+
for (const candidate of candidates) {
|
|
138
|
+
try {
|
|
139
|
+
const fs2 = __require("fs");
|
|
140
|
+
fs2.accessSync(candidate, fs2.constants.X_OK);
|
|
141
|
+
return candidate;
|
|
142
|
+
} catch {
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
return null;
|
|
146
|
+
}
|
|
147
|
+
function createWorkerCommand() {
|
|
148
|
+
const explicitBin = process.env.DYNAFETCH_NET_BIN?.trim();
|
|
149
|
+
if (explicitBin) {
|
|
150
|
+
return { command: explicitBin, args: [] };
|
|
151
|
+
}
|
|
152
|
+
const precompiled = findPrecompiledBinary();
|
|
153
|
+
if (precompiled) {
|
|
154
|
+
return { command: precompiled, args: [] };
|
|
155
|
+
}
|
|
156
|
+
return {
|
|
157
|
+
command: "go",
|
|
158
|
+
args: ["run", "."],
|
|
159
|
+
cwd: path2.resolve(process.cwd(), "packages/dynafetch-net")
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
function createWorkerTransport() {
|
|
163
|
+
const { command, args, cwd } = createWorkerCommand();
|
|
164
|
+
const child = spawn(command, args, {
|
|
165
|
+
cwd,
|
|
166
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
167
|
+
env: process.env
|
|
168
|
+
});
|
|
169
|
+
const pending = /* @__PURE__ */ new Map();
|
|
170
|
+
const rl = readline.createInterface({ input: child.stdout });
|
|
171
|
+
rl.on("line", (line) => {
|
|
172
|
+
const trimmed = line.trim();
|
|
173
|
+
if (!trimmed) return;
|
|
174
|
+
let payload;
|
|
175
|
+
try {
|
|
176
|
+
payload = JSON.parse(trimmed);
|
|
177
|
+
} catch (error) {
|
|
178
|
+
for (const entry of pending.values()) {
|
|
179
|
+
entry.reject(new Error(`Invalid dynafetch-net response: ${String(error)}`));
|
|
180
|
+
}
|
|
181
|
+
pending.clear();
|
|
182
|
+
return;
|
|
183
|
+
}
|
|
184
|
+
const request = pending.get(payload.id);
|
|
185
|
+
if (!request) return;
|
|
186
|
+
pending.delete(payload.id);
|
|
187
|
+
if (payload.error) {
|
|
188
|
+
request.reject(new Error(payload.error.message || payload.error.code || "dynafetch-net request failed"));
|
|
189
|
+
return;
|
|
190
|
+
}
|
|
191
|
+
request.resolve(payload.result);
|
|
192
|
+
});
|
|
193
|
+
child.stderr.on("data", (chunk) => {
|
|
194
|
+
const message = chunk.toString().trim();
|
|
195
|
+
if (message) {
|
|
196
|
+
console.warn(`[dynafetch-net] ${message}`);
|
|
197
|
+
}
|
|
198
|
+
});
|
|
199
|
+
const onExit = (code, signal) => {
|
|
200
|
+
const reason = `dynafetch-net exited (code=${code ?? "null"}, signal=${signal ?? "null"})`;
|
|
201
|
+
for (const entry of pending.values()) {
|
|
202
|
+
entry.reject(new Error(reason));
|
|
203
|
+
}
|
|
204
|
+
pending.clear();
|
|
205
|
+
transportPromise = null;
|
|
206
|
+
};
|
|
207
|
+
child.once("error", (error) => {
|
|
208
|
+
for (const entry of pending.values()) {
|
|
209
|
+
entry.reject(error);
|
|
210
|
+
}
|
|
211
|
+
pending.clear();
|
|
212
|
+
transportPromise = null;
|
|
213
|
+
});
|
|
214
|
+
child.once("exit", onExit);
|
|
215
|
+
return Promise.resolve({ child, pending });
|
|
216
|
+
}
|
|
217
|
+
async function getWorkerTransport() {
|
|
218
|
+
if (!transportPromise) {
|
|
219
|
+
transportPromise = createWorkerTransport();
|
|
220
|
+
}
|
|
221
|
+
return transportPromise;
|
|
222
|
+
}
|
|
223
|
+
async function callWorker(method, params) {
|
|
224
|
+
const transport = await getWorkerTransport();
|
|
225
|
+
const id = randomUUID();
|
|
226
|
+
const payload = JSON.stringify({ id, method, params });
|
|
227
|
+
return await new Promise((resolve, reject) => {
|
|
228
|
+
transport.pending.set(id, { resolve, reject });
|
|
229
|
+
transport.child.stdin.write(`${payload}
|
|
230
|
+
`, (error) => {
|
|
231
|
+
if (!error) return;
|
|
232
|
+
transport.pending.delete(id);
|
|
233
|
+
reject(error);
|
|
234
|
+
});
|
|
235
|
+
});
|
|
236
|
+
}
|
|
237
|
+
async function withDynafetchSession(options, run) {
|
|
238
|
+
const session = await callWorker("openSession", options);
|
|
239
|
+
try {
|
|
240
|
+
return await sessionStore.run({ sessionId: session.sessionId }, run);
|
|
241
|
+
} finally {
|
|
242
|
+
await callWorker("closeSession", { sessionId: session.sessionId }).catch(() => {
|
|
243
|
+
});
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
async function dynafetchNetHealth() {
|
|
247
|
+
return await callWorker("health", {});
|
|
248
|
+
}
|
|
249
|
+
async function dynafetchNetFetch(request, options = {}) {
|
|
250
|
+
const session = sessionStore.getStore();
|
|
251
|
+
return await callWorker("fetch", {
|
|
252
|
+
sessionId: session?.sessionId,
|
|
253
|
+
request,
|
|
254
|
+
followRedirect: options.followRedirect,
|
|
255
|
+
maxRedirects: options.maxRedirects,
|
|
256
|
+
browserProfile: options.browserProfile,
|
|
257
|
+
timeoutSeconds: options.timeoutSeconds,
|
|
258
|
+
proxy: options.proxy
|
|
259
|
+
});
|
|
260
|
+
}
|
|
261
|
+
async function dynafetchNetBatchFetch(requests, options = {}) {
|
|
262
|
+
const session = sessionStore.getStore();
|
|
263
|
+
return await callWorker("batchFetch", {
|
|
264
|
+
sessionId: session?.sessionId,
|
|
265
|
+
requests,
|
|
266
|
+
followRedirect: options.followRedirect,
|
|
267
|
+
maxRedirects: options.maxRedirects,
|
|
268
|
+
browserProfile: options.browserProfile,
|
|
269
|
+
timeoutSeconds: options.timeoutSeconds,
|
|
270
|
+
proxy: options.proxy
|
|
271
|
+
});
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
// ../../src/phantom/phantom-proxy.ts
|
|
275
|
+
function headersToRecord(h) {
|
|
276
|
+
const out = {};
|
|
277
|
+
h.forEach((v, k) => {
|
|
278
|
+
out[k] = v;
|
|
279
|
+
});
|
|
280
|
+
const anyH = h;
|
|
281
|
+
if (typeof anyH.getSetCookie === "function") {
|
|
282
|
+
const sc = anyH.getSetCookie();
|
|
283
|
+
if (Array.isArray(sc) && sc.length) out["set-cookie"] = sc.join("\n");
|
|
284
|
+
}
|
|
285
|
+
return out;
|
|
286
|
+
}
|
|
287
|
+
async function directFetch(payload) {
|
|
288
|
+
const init = {
|
|
289
|
+
method: payload.method,
|
|
290
|
+
headers: payload.headers,
|
|
291
|
+
redirect: "manual"
|
|
292
|
+
};
|
|
293
|
+
if (payload.body) init.body = payload.body;
|
|
294
|
+
const resp = await fetch(payload.url, init);
|
|
295
|
+
const body = await resp.text().catch(() => "");
|
|
296
|
+
return { status: resp.status, body, headers: headersToRecord(resp.headers) };
|
|
297
|
+
}
|
|
298
|
+
async function dynafetchWorkerFetch(payload) {
|
|
299
|
+
const response = await dynafetchNetFetch(payload, {
|
|
300
|
+
followRedirect: false
|
|
301
|
+
});
|
|
302
|
+
return {
|
|
303
|
+
status: response.status,
|
|
304
|
+
body: response.body,
|
|
305
|
+
headers: response.headers,
|
|
306
|
+
finalUrl: response.finalUrl,
|
|
307
|
+
error: response.error
|
|
308
|
+
};
|
|
309
|
+
}
|
|
310
|
+
async function phantomFetch(payload) {
|
|
311
|
+
if (process.env.PHANTOM_DISABLE_PROXY === "1" || process.env.DYNAFETCH_DISABLE_NET === "1") {
|
|
312
|
+
return directFetch(payload);
|
|
313
|
+
}
|
|
314
|
+
try {
|
|
315
|
+
return await dynafetchWorkerFetch(payload);
|
|
316
|
+
} catch (error) {
|
|
317
|
+
if (process.env.DYNAFETCH_DISABLE_DIRECT_FALLBACK === "1") {
|
|
318
|
+
throw error;
|
|
319
|
+
}
|
|
320
|
+
return await directFetch(payload);
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
async function phantomBatchFetch(payloads) {
|
|
324
|
+
if (payloads.length === 0) return [];
|
|
325
|
+
if (process.env.PHANTOM_DISABLE_PROXY === "1" || process.env.DYNAFETCH_DISABLE_NET === "1") {
|
|
326
|
+
return Promise.all(payloads.map((p) => directFetch(p)));
|
|
327
|
+
}
|
|
328
|
+
try {
|
|
329
|
+
const responses = await dynafetchNetBatchFetch(
|
|
330
|
+
payloads,
|
|
331
|
+
{ followRedirect: false }
|
|
332
|
+
);
|
|
333
|
+
return responses.map((r) => ({
|
|
334
|
+
status: r.status,
|
|
335
|
+
body: r.body,
|
|
336
|
+
headers: r.headers,
|
|
337
|
+
finalUrl: r.finalUrl,
|
|
338
|
+
error: r.error
|
|
339
|
+
}));
|
|
340
|
+
} catch (error) {
|
|
341
|
+
if (process.env.DYNAFETCH_DISABLE_DIRECT_FALLBACK === "1") {
|
|
342
|
+
throw error;
|
|
343
|
+
}
|
|
344
|
+
return Promise.all(payloads.map((p) => directFetch(p)));
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
// ../../src/phantom/headers.ts
|
|
349
|
+
var CHROME_146_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36";
|
|
350
|
+
var CHROME_146_SEC_CH_UA = '"Chromium";v="146", "Not-A.Brand";v="24", "Google Chrome";v="146"';
|
|
351
|
+
function chromeDocumentHeaders() {
|
|
352
|
+
const headers = {
|
|
353
|
+
accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
|
354
|
+
"accept-language": "en-US,en;q=0.9",
|
|
355
|
+
"cache-control": "max-age=0",
|
|
356
|
+
priority: "u=0, i",
|
|
357
|
+
"sec-ch-ua": CHROME_146_SEC_CH_UA,
|
|
358
|
+
"sec-ch-ua-mobile": "?0",
|
|
359
|
+
"sec-ch-ua-platform": '"macOS"',
|
|
360
|
+
"sec-fetch-dest": "document",
|
|
361
|
+
"sec-fetch-mode": "navigate",
|
|
362
|
+
"sec-fetch-site": "none",
|
|
363
|
+
"sec-fetch-user": "?1",
|
|
364
|
+
"upgrade-insecure-requests": "1",
|
|
365
|
+
"user-agent": CHROME_146_USER_AGENT
|
|
366
|
+
};
|
|
367
|
+
const order = [
|
|
368
|
+
"accept",
|
|
369
|
+
"accept-language",
|
|
370
|
+
"cache-control",
|
|
371
|
+
"priority",
|
|
372
|
+
"sec-ch-ua",
|
|
373
|
+
"sec-ch-ua-mobile",
|
|
374
|
+
"sec-ch-ua-platform",
|
|
375
|
+
"sec-fetch-dest",
|
|
376
|
+
"sec-fetch-mode",
|
|
377
|
+
"sec-fetch-site",
|
|
378
|
+
"sec-fetch-user",
|
|
379
|
+
"upgrade-insecure-requests",
|
|
380
|
+
"user-agent"
|
|
381
|
+
];
|
|
382
|
+
return { headers, order };
|
|
383
|
+
}
|
|
384
|
+
function chromeSubresourceHeaders(referer) {
|
|
385
|
+
const headers = {
|
|
386
|
+
accept: "*/*",
|
|
387
|
+
"accept-language": "en-US,en;q=0.9",
|
|
388
|
+
"sec-ch-ua": CHROME_146_SEC_CH_UA,
|
|
389
|
+
"sec-ch-ua-mobile": "?0",
|
|
390
|
+
"sec-ch-ua-platform": '"macOS"',
|
|
391
|
+
referer,
|
|
392
|
+
"user-agent": CHROME_146_USER_AGENT
|
|
393
|
+
};
|
|
394
|
+
const order = [
|
|
395
|
+
"accept",
|
|
396
|
+
"accept-language",
|
|
397
|
+
"sec-ch-ua",
|
|
398
|
+
"sec-ch-ua-mobile",
|
|
399
|
+
"sec-ch-ua-platform",
|
|
400
|
+
"referer",
|
|
401
|
+
"user-agent"
|
|
402
|
+
];
|
|
403
|
+
return { headers, order };
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
// ../../src/phantom/matcher.ts
|
|
407
|
+
var VALID_REGEX_FLAGS = /* @__PURE__ */ new Set(["d", "g", "i", "m", "s", "u", "v", "y"]);
|
|
408
|
+
function hasOnlyValidFlags(flags) {
|
|
409
|
+
for (const ch of flags) {
|
|
410
|
+
if (!VALID_REGEX_FLAGS.has(ch)) return false;
|
|
411
|
+
}
|
|
412
|
+
return true;
|
|
413
|
+
}
|
|
414
|
+
function dedupeFlags(flags) {
|
|
415
|
+
const seen = /* @__PURE__ */ new Set();
|
|
416
|
+
let out = "";
|
|
417
|
+
for (const ch of flags) {
|
|
418
|
+
if (!seen.has(ch)) {
|
|
419
|
+
seen.add(ch);
|
|
420
|
+
out += ch;
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
return out;
|
|
424
|
+
}
|
|
425
|
+
function parseRegexLiteral(input) {
|
|
426
|
+
if (!input.startsWith("/")) return null;
|
|
427
|
+
let lastSlash = -1;
|
|
428
|
+
for (let i = input.length - 1; i > 0; i--) {
|
|
429
|
+
if (input[i] !== "/") continue;
|
|
430
|
+
let slashEscapes = 0;
|
|
431
|
+
for (let j = i - 1; j >= 0 && input[j] === "\\"; j--) slashEscapes++;
|
|
432
|
+
if (slashEscapes % 2 === 0) {
|
|
433
|
+
lastSlash = i;
|
|
434
|
+
break;
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
if (lastSlash <= 0) return null;
|
|
438
|
+
return {
|
|
439
|
+
source: input.slice(1, lastSlash),
|
|
440
|
+
flags: input.slice(lastSlash + 1)
|
|
441
|
+
};
|
|
442
|
+
}
|
|
443
|
+
function compileMatcher(input) {
|
|
444
|
+
const matcher = String(input.matcher ?? "");
|
|
445
|
+
const fuzzyMatch = input.fuzzyMatch !== false;
|
|
446
|
+
const matcherRegex = input.matcherRegex === true;
|
|
447
|
+
if (!matcherRegex) {
|
|
448
|
+
const needle = fuzzyMatch ? matcher.toLowerCase() : matcher;
|
|
449
|
+
return {
|
|
450
|
+
kind: "plain",
|
|
451
|
+
raw: matcher,
|
|
452
|
+
test: (value) => {
|
|
453
|
+
const hay = fuzzyMatch ? value.toLowerCase() : value;
|
|
454
|
+
return hay.includes(needle);
|
|
455
|
+
},
|
|
456
|
+
find: (value) => {
|
|
457
|
+
const hay = fuzzyMatch ? value.toLowerCase() : value;
|
|
458
|
+
const idx = hay.indexOf(needle);
|
|
459
|
+
if (idx < 0) return null;
|
|
460
|
+
return { index: idx, length: Math.max(needle.length, 1) };
|
|
461
|
+
}
|
|
462
|
+
};
|
|
463
|
+
}
|
|
464
|
+
const parsed = parseRegexLiteral(matcher);
|
|
465
|
+
const source = parsed ? parsed.source : matcher;
|
|
466
|
+
let flags = parsed ? parsed.flags : "";
|
|
467
|
+
if (!source) {
|
|
468
|
+
throw new Error("Regex matcher cannot be empty.");
|
|
469
|
+
}
|
|
470
|
+
if (!hasOnlyValidFlags(flags)) {
|
|
471
|
+
throw new Error(`Invalid regex flags "${flags}".`);
|
|
472
|
+
}
|
|
473
|
+
flags = dedupeFlags(flags);
|
|
474
|
+
if (fuzzyMatch && !flags.includes("i")) flags += "i";
|
|
475
|
+
const stableFlags = flags.replace(/[gy]/g, "");
|
|
476
|
+
const regex = new RegExp(source, stableFlags);
|
|
477
|
+
return {
|
|
478
|
+
kind: "regex",
|
|
479
|
+
raw: matcher,
|
|
480
|
+
test: (value) => regex.test(value),
|
|
481
|
+
find: (value) => {
|
|
482
|
+
const m = regex.exec(value);
|
|
483
|
+
if (!m || typeof m.index !== "number") return null;
|
|
484
|
+
const len = typeof m[0] === "string" ? m[0].length : 0;
|
|
485
|
+
return { index: m.index, length: Math.max(len, 1) };
|
|
486
|
+
}
|
|
487
|
+
};
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
// ../../src/phantom/script-policy.ts
|
|
491
|
+
var TELEMETRY_URL_PATTERNS = [
|
|
492
|
+
/google-analytics\.com/i,
|
|
493
|
+
/googletagmanager\.com/i,
|
|
494
|
+
/googleadservices\.com/i,
|
|
495
|
+
/doubleclick\.net/i,
|
|
496
|
+
/googleads\.g\.doubleclick\.net/i,
|
|
497
|
+
/posthog/i,
|
|
498
|
+
/segment\./i,
|
|
499
|
+
/segmentcdn/i,
|
|
500
|
+
/analytics/i,
|
|
501
|
+
/mixpanel/i,
|
|
502
|
+
/amplitude/i,
|
|
503
|
+
/fullstory/i,
|
|
504
|
+
/hotjar/i,
|
|
505
|
+
/clarity\.ms/i,
|
|
506
|
+
/newrelic/i,
|
|
507
|
+
/datadog/i,
|
|
508
|
+
/bugsnag/i,
|
|
509
|
+
/sentry/i,
|
|
510
|
+
/logrocket/i,
|
|
511
|
+
/heap/i,
|
|
512
|
+
/rudderstack/i,
|
|
513
|
+
/gtag\/js/i
|
|
514
|
+
];
|
|
515
|
+
var AD_URL_PATTERNS = [
|
|
516
|
+
/doubleclick/i,
|
|
517
|
+
/adservice/i,
|
|
518
|
+
/adsystem/i,
|
|
519
|
+
/adnxs/i,
|
|
520
|
+
/taboola/i,
|
|
521
|
+
/outbrain/i,
|
|
522
|
+
/criteo/i,
|
|
523
|
+
/ads-twitter\.com/i,
|
|
524
|
+
/connect\.facebook\.net.*fbevents/i
|
|
525
|
+
];
|
|
526
|
+
var CHAT_URL_PATTERNS = [
|
|
527
|
+
/intercom/i,
|
|
528
|
+
/drift/i,
|
|
529
|
+
/crisp\.chat/i,
|
|
530
|
+
/zendesk/i,
|
|
531
|
+
/olark/i,
|
|
532
|
+
/livechat/i,
|
|
533
|
+
/tawk\.to/i
|
|
534
|
+
];
|
|
535
|
+
var WIDGET_URL_PATTERNS = [
|
|
536
|
+
/maps\.googleapis\.com\/maps\/api\/js/i,
|
|
537
|
+
/maps-api-v3/i,
|
|
538
|
+
/recaptcha/i,
|
|
539
|
+
/hcaptcha/i,
|
|
540
|
+
/player\.vimeo/i,
|
|
541
|
+
/youtube\.com\/iframe_api/i,
|
|
542
|
+
/static\.zdassets\.com/i
|
|
543
|
+
];
|
|
544
|
+
var BOT_DEFENSE_PATTERNS = [
|
|
545
|
+
/perimeterx/i,
|
|
546
|
+
/kasada/i,
|
|
547
|
+
/kpsdk/i,
|
|
548
|
+
/datadome/i,
|
|
549
|
+
/px-cdn/i
|
|
550
|
+
];
|
|
551
|
+
var TELEMETRY_INLINE_PATTERNS = [
|
|
552
|
+
/GoogleAnalyticsObject/i,
|
|
553
|
+
/\bgtag\s*\(/i,
|
|
554
|
+
/\bga\s*\(\s*['"]create['"]/i,
|
|
555
|
+
/\bposthog\b/i,
|
|
556
|
+
/\bmixpanel\b/i,
|
|
557
|
+
/\bclarity\s*\(/i,
|
|
558
|
+
/\bfbq\s*\(/i,
|
|
559
|
+
/\bhj\s*\(/i,
|
|
560
|
+
/\bnewrelic\b/i,
|
|
561
|
+
/\bdatadog\b/i,
|
|
562
|
+
/\bSentry\b/i,
|
|
563
|
+
/\bLogRocket\b/i
|
|
564
|
+
];
|
|
565
|
+
function siteKey(hostname) {
|
|
566
|
+
const parts = hostname.split(".").filter(Boolean);
|
|
567
|
+
if (parts.length <= 2) return hostname;
|
|
568
|
+
return parts.slice(-2).join(".");
|
|
569
|
+
}
|
|
570
|
+
function isLikelySameSite(candidateUrl, pageUrl) {
|
|
571
|
+
try {
|
|
572
|
+
const candidate = new URL(candidateUrl);
|
|
573
|
+
const page = new URL(pageUrl);
|
|
574
|
+
if (candidate.origin === page.origin) return true;
|
|
575
|
+
return siteKey(candidate.hostname.toLowerCase()) === siteKey(page.hostname.toLowerCase());
|
|
576
|
+
} catch {
|
|
577
|
+
return false;
|
|
578
|
+
}
|
|
579
|
+
}
|
|
580
|
+
function matchesAny(input, patterns) {
|
|
581
|
+
return patterns.some((pattern) => pattern.test(input));
|
|
582
|
+
}
|
|
583
|
+
function classifyScriptAsset(script, pageUrl) {
|
|
584
|
+
const url = script.url || "";
|
|
585
|
+
const content = script.content || "";
|
|
586
|
+
if (url && matchesAny(url, BOT_DEFENSE_PATTERNS)) return "bot-defense";
|
|
587
|
+
if (!url && matchesAny(content, BOT_DEFENSE_PATTERNS)) return "bot-defense";
|
|
588
|
+
if (url) {
|
|
589
|
+
if (matchesAny(url, TELEMETRY_URL_PATTERNS)) return "telemetry";
|
|
590
|
+
if (matchesAny(url, AD_URL_PATTERNS)) return "ads";
|
|
591
|
+
if (matchesAny(url, CHAT_URL_PATTERNS)) return "chat";
|
|
592
|
+
if (matchesAny(url, WIDGET_URL_PATTERNS)) return "widget";
|
|
593
|
+
if (isLikelySameSite(url, pageUrl)) return "application";
|
|
594
|
+
if (script.scriptKind === "module") return "application";
|
|
595
|
+
return "unknown";
|
|
596
|
+
}
|
|
597
|
+
if (matchesAny(content, TELEMETRY_INLINE_PATTERNS)) return "telemetry";
|
|
598
|
+
return "application";
|
|
599
|
+
}
|
|
600
|
+
function shouldSkipScriptCategory(category, policy) {
|
|
601
|
+
if (category === "bot-defense") return true;
|
|
602
|
+
if (policy === "execute-all") return false;
|
|
603
|
+
return category === "telemetry" || category === "ads" || category === "chat" || category === "widget";
|
|
604
|
+
}
|
|
605
|
+
function shouldSkipScriptAsset(script, pageUrl, policy) {
|
|
606
|
+
return shouldSkipScriptCategory(classifyScriptAsset(script, pageUrl), policy);
|
|
607
|
+
}
|
|
608
|
+
function shouldSkipDynamicScriptUrl(url, pageUrl, policy) {
|
|
609
|
+
return shouldSkipScriptAsset({ url, content: "", scriptKind: "classic", type: "external" }, pageUrl, policy);
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
// ../../src/phantom/execute.ts
|
|
613
|
+
var Executor = class {
|
|
614
|
+
constructor(harvestData, options = null) {
|
|
615
|
+
this.logs = [];
|
|
616
|
+
this.targetValue = null;
|
|
617
|
+
this.matcherRegex = false;
|
|
618
|
+
this.compiledMatcher = null;
|
|
619
|
+
this.scriptCache = /* @__PURE__ */ new Map();
|
|
620
|
+
this.initialComponentName = null;
|
|
621
|
+
this.pendingRequests = 0;
|
|
622
|
+
this.pendingRequestNextId = 1;
|
|
623
|
+
this.pendingRequestMap = /* @__PURE__ */ new Map();
|
|
624
|
+
this.pendingTasks = 0;
|
|
625
|
+
this.pendingTaskNextId = 1;
|
|
626
|
+
this.pendingTaskTimers = /* @__PURE__ */ new Map();
|
|
627
|
+
this.quiescenceResolver = null;
|
|
628
|
+
this.quiescenceTimer = null;
|
|
629
|
+
this.quiescenceMaxTimer = null;
|
|
630
|
+
this.quiescenceMinTimer = null;
|
|
631
|
+
this.minifyBundle = false;
|
|
632
|
+
// Module-script support (JSDOM does not execute <script type="module">).
|
|
633
|
+
this.handledModuleScriptUrls = /* @__PURE__ */ new Set();
|
|
634
|
+
// JSDOM won't fetch/execute external scripts unless resources are enabled. We instead
|
|
635
|
+
// intercept dynamic <script src="..."> insertions and fetch+eval them ourselves.
|
|
636
|
+
this.handledClassicScriptUrls = /* @__PURE__ */ new Set();
|
|
637
|
+
this.moduleBundleCache = /* @__PURE__ */ new Map();
|
|
638
|
+
// entryUrl -> transformed bundled JS
|
|
639
|
+
this.moduleResolveCache = /* @__PURE__ */ new Map();
|
|
640
|
+
// url -> source
|
|
641
|
+
this.moduleInFlight = /* @__PURE__ */ new Map();
|
|
642
|
+
// entryUrl -> promise
|
|
643
|
+
this.windowClosed = false;
|
|
644
|
+
// Simple telemetry counters (useful for debugging).
|
|
645
|
+
this.telemetry_stubbed = 0;
|
|
646
|
+
this.telemetry_proxy = 0;
|
|
647
|
+
this.moduleWaitMs = 1500;
|
|
648
|
+
this.quiescenceOptions = {
|
|
649
|
+
minWaitMs: 200,
|
|
650
|
+
idleWaitMs: 250,
|
|
651
|
+
maxWaitMs: 5e3
|
|
652
|
+
};
|
|
653
|
+
this.timings = {
|
|
654
|
+
transform_ms_total: 0,
|
|
655
|
+
scripts_transformed_count: 0,
|
|
656
|
+
quiescence_ms: 0
|
|
657
|
+
};
|
|
658
|
+
this.executionErrors = [];
|
|
659
|
+
this.thirdPartyPolicy = "skip-noncritical";
|
|
660
|
+
// Early exit tracking
|
|
661
|
+
this.findAll = false;
|
|
662
|
+
this.fuzzyMatch = true;
|
|
663
|
+
this.earlyMatches = [];
|
|
664
|
+
this.matchFound = false;
|
|
665
|
+
this.harvestData = harvestData;
|
|
666
|
+
this.transformer = new Transformer();
|
|
667
|
+
if (options === null || typeof options === "string" || typeof options === "number") {
|
|
668
|
+
this.targetValue = options;
|
|
669
|
+
} else {
|
|
670
|
+
this.targetValue = options.targetValue ?? null;
|
|
671
|
+
this.matcherRegex = options.matcherRegex === true;
|
|
672
|
+
this.findAll = options.findAll ?? false;
|
|
673
|
+
this.fuzzyMatch = options.fuzzyMatch ?? true;
|
|
674
|
+
this.thirdPartyPolicy = options.thirdPartyPolicy ?? "skip-noncritical";
|
|
675
|
+
this.proxy = options.proxy;
|
|
676
|
+
this.applyDefaults(options.quiescence, options.moduleWaitMs);
|
|
677
|
+
}
|
|
678
|
+
if (this.targetValue !== null && this.targetValue !== void 0) {
|
|
679
|
+
const rawMatcher = String(this.targetValue);
|
|
680
|
+
if (rawMatcher.length > 0) {
|
|
681
|
+
this.compiledMatcher = compileMatcher({
|
|
682
|
+
matcher: rawMatcher,
|
|
683
|
+
matcherRegex: this.matcherRegex,
|
|
684
|
+
fuzzyMatch: this.fuzzyMatch
|
|
685
|
+
});
|
|
686
|
+
}
|
|
687
|
+
}
|
|
688
|
+
this.logs = [...harvestData.logs];
|
|
689
|
+
this.initialComponentName = this.findInitialComponentName(harvestData.initialState);
|
|
690
|
+
this.harvestData.scripts.forEach((s) => {
|
|
691
|
+
if (s.url) {
|
|
692
|
+
this.scriptCache.set(s.url, s.content);
|
|
693
|
+
}
|
|
694
|
+
});
|
|
695
|
+
this.harvestData.modulePreloads?.forEach((asset) => {
|
|
696
|
+
this.scriptCache.set(asset.url, asset.content);
|
|
697
|
+
this.moduleResolveCache.set(asset.url, {
|
|
698
|
+
contents: this.rewriteImportMeta(asset.content, asset.url),
|
|
699
|
+
loader: asset.url.endsWith(".ts") || asset.url.endsWith(".tsx") ? "ts" : "js"
|
|
700
|
+
});
|
|
701
|
+
});
|
|
702
|
+
this.harvestData.moduleGraphCache?.forEach((content, url) => {
|
|
703
|
+
if (!this.scriptCache.has(url)) {
|
|
704
|
+
this.scriptCache.set(url, content);
|
|
705
|
+
}
|
|
706
|
+
});
|
|
707
|
+
}
|
|
708
|
+
clampMs(v, min, max) {
|
|
709
|
+
if (!Number.isFinite(v)) return min;
|
|
710
|
+
return Math.max(min, Math.min(max, Math.trunc(v)));
|
|
711
|
+
}
|
|
712
|
+
applyDefaults(quiescence, moduleWaitMsOverride) {
|
|
713
|
+
const hardMaxCap = this.clampMs(Number(process.env.PHANTOM_QUIESCENCE_MAX_CAP_MS ?? 8e3), 500, 6e4);
|
|
714
|
+
const minWaitMs = this.clampMs(quiescence?.minWaitMs ?? 75, 0, 1e4);
|
|
715
|
+
const idleWaitMs = this.clampMs(quiescence?.idleWaitMs ?? 100, 0, 1e4);
|
|
716
|
+
const maxWaitMs = this.clampMs(quiescence?.maxWaitMs ?? 2e3, 0, hardMaxCap);
|
|
717
|
+
const hardModuleCap = this.clampMs(Number(process.env.PHANTOM_MODULE_WAIT_MAX_CAP_MS ?? 3e4), 1e3, 12e4);
|
|
718
|
+
this.moduleWaitMs = this.clampMs(Number(process.env.PHANTOM_MODULE_WAIT_MS ?? moduleWaitMsOverride ?? 6e3), 1e3, hardModuleCap);
|
|
719
|
+
this.quiescenceOptions = { minWaitMs, idleWaitMs, maxWaitMs };
|
|
720
|
+
}
|
|
721
|
+
logRequest(entry) {
|
|
722
|
+
this.logs.push(entry);
|
|
723
|
+
this.checkForMatch(entry);
|
|
724
|
+
}
|
|
725
|
+
checkForMatch(entry) {
|
|
726
|
+
if (!this.compiledMatcher || this.matchFound && !this.findAll) return;
|
|
727
|
+
const urlToCheck = entry.url ?? "";
|
|
728
|
+
const bodyToCheck = typeof entry.responseBody === "string" ? entry.responseBody : "";
|
|
729
|
+
if (this.compiledMatcher.test(urlToCheck) || this.compiledMatcher.test(bodyToCheck)) {
|
|
730
|
+
this.earlyMatches.push(entry);
|
|
731
|
+
if (!this.findAll) {
|
|
732
|
+
this.matchFound = true;
|
|
733
|
+
this.triggerEarlyExit();
|
|
734
|
+
}
|
|
735
|
+
}
|
|
736
|
+
}
|
|
737
|
+
triggerEarlyExit() {
|
|
738
|
+
if (this.quiescenceResolver) {
|
|
739
|
+
if (this.quiescenceTimer) {
|
|
740
|
+
clearTimeout(this.quiescenceTimer);
|
|
741
|
+
this.quiescenceTimer = null;
|
|
742
|
+
}
|
|
743
|
+
if (this.quiescenceMaxTimer) {
|
|
744
|
+
clearTimeout(this.quiescenceMaxTimer);
|
|
745
|
+
this.quiescenceMaxTimer = null;
|
|
746
|
+
}
|
|
747
|
+
if (this.quiescenceMinTimer) {
|
|
748
|
+
clearTimeout(this.quiescenceMinTimer);
|
|
749
|
+
this.quiescenceMinTimer = null;
|
|
750
|
+
}
|
|
751
|
+
this.quiescenceResolver();
|
|
752
|
+
this.quiescenceResolver = null;
|
|
753
|
+
}
|
|
754
|
+
}
|
|
755
|
+
trackRequestStart(url, kind) {
|
|
756
|
+
const id = this.pendingRequestNextId++;
|
|
757
|
+
this.pendingRequests++;
|
|
758
|
+
this.pendingRequestMap.set(id, { url, kind, startedAt: Date.now() });
|
|
759
|
+
if (this.quiescenceTimer) {
|
|
760
|
+
clearTimeout(this.quiescenceTimer);
|
|
761
|
+
this.quiescenceTimer = null;
|
|
762
|
+
}
|
|
763
|
+
return id;
|
|
764
|
+
}
|
|
765
|
+
trackRequestEnd(id) {
|
|
766
|
+
this.pendingRequests = Math.max(0, this.pendingRequests - 1);
|
|
767
|
+
if (typeof id === "number") this.pendingRequestMap.delete(id);
|
|
768
|
+
if (this.pendingRequests + this.pendingTasks === 0 && this.quiescenceResolver) {
|
|
769
|
+
this.quiescenceTimer = setTimeout(() => {
|
|
770
|
+
if (this.pendingRequests + this.pendingTasks === 0 && this.quiescenceResolver) {
|
|
771
|
+
this.quiescenceResolver();
|
|
772
|
+
this.quiescenceResolver = null;
|
|
773
|
+
}
|
|
774
|
+
}, this.quiescenceOptions.idleWaitMs);
|
|
775
|
+
}
|
|
776
|
+
}
|
|
777
|
+
trackTaskStart(kind, url, maxBlockMs) {
|
|
778
|
+
const id = this.pendingTaskNextId++;
|
|
779
|
+
this.pendingTasks++;
|
|
780
|
+
if (this.quiescenceTimer) {
|
|
781
|
+
clearTimeout(this.quiescenceTimer);
|
|
782
|
+
this.quiescenceTimer = null;
|
|
783
|
+
}
|
|
784
|
+
const ms = this.clampMs(Number(maxBlockMs ?? 0), 0, 6e4);
|
|
785
|
+
if (ms > 0) {
|
|
786
|
+
const t2 = setTimeout(() => this.trackTaskEnd(id), ms);
|
|
787
|
+
this.pendingTaskTimers.set(id, t2);
|
|
788
|
+
}
|
|
789
|
+
return id;
|
|
790
|
+
}
|
|
791
|
+
trackTaskEnd(id) {
|
|
792
|
+
const t2 = this.pendingTaskTimers.get(id);
|
|
793
|
+
if (t2) {
|
|
794
|
+
clearTimeout(t2);
|
|
795
|
+
this.pendingTaskTimers.delete(id);
|
|
796
|
+
}
|
|
797
|
+
if (this.pendingTasks > 0) this.pendingTasks--;
|
|
798
|
+
if (this.pendingRequests + this.pendingTasks === 0 && this.quiescenceResolver) {
|
|
799
|
+
this.quiescenceTimer = setTimeout(() => {
|
|
800
|
+
if (this.pendingRequests + this.pendingTasks === 0 && this.quiescenceResolver) {
|
|
801
|
+
this.quiescenceResolver();
|
|
802
|
+
this.quiescenceResolver = null;
|
|
803
|
+
}
|
|
804
|
+
}, this.quiescenceOptions.idleWaitMs);
|
|
805
|
+
}
|
|
806
|
+
}
|
|
807
|
+
waitForQuiescence() {
|
|
808
|
+
const { maxWaitMs, minWaitMs, idleWaitMs } = this.quiescenceOptions;
|
|
809
|
+
return new Promise((resolve) => {
|
|
810
|
+
const finish = () => {
|
|
811
|
+
if (!this.quiescenceResolver) return;
|
|
812
|
+
if (this.quiescenceTimer) clearTimeout(this.quiescenceTimer);
|
|
813
|
+
if (this.quiescenceMaxTimer) clearTimeout(this.quiescenceMaxTimer);
|
|
814
|
+
if (this.quiescenceMinTimer) clearTimeout(this.quiescenceMinTimer);
|
|
815
|
+
this.quiescenceTimer = null;
|
|
816
|
+
this.quiescenceMaxTimer = null;
|
|
817
|
+
this.quiescenceMinTimer = null;
|
|
818
|
+
this.quiescenceResolver = null;
|
|
819
|
+
resolve();
|
|
820
|
+
};
|
|
821
|
+
this.quiescenceResolver = finish;
|
|
822
|
+
if (this.matchFound && !this.findAll) {
|
|
823
|
+
this.triggerEarlyExit();
|
|
824
|
+
return;
|
|
825
|
+
}
|
|
826
|
+
this.quiescenceMinTimer = setTimeout(() => {
|
|
827
|
+
if (!this.quiescenceResolver) return;
|
|
828
|
+
if (this.pendingRequests + this.pendingTasks === 0) {
|
|
829
|
+
this.quiescenceTimer = setTimeout(() => {
|
|
830
|
+
if (this.quiescenceResolver) this.quiescenceResolver();
|
|
831
|
+
}, idleWaitMs);
|
|
832
|
+
}
|
|
833
|
+
}, minWaitMs);
|
|
834
|
+
this.quiescenceMaxTimer = setTimeout(() => {
|
|
835
|
+
if (this.quiescenceResolver) this.quiescenceResolver();
|
|
836
|
+
}, maxWaitMs);
|
|
837
|
+
});
|
|
838
|
+
}
|
|
839
|
+
async waitForModuleWork(timeoutMs) {
|
|
840
|
+
const pending = Array.from(this.moduleInFlight.values());
|
|
841
|
+
if (!pending.length) return;
|
|
842
|
+
const timeout = this.clampMs(timeoutMs, 0, 6e4);
|
|
843
|
+
if (timeout === 0) return;
|
|
844
|
+
const all = Promise.allSettled(pending).then(() => {
|
|
845
|
+
});
|
|
846
|
+
await Promise.race([
|
|
847
|
+
all,
|
|
848
|
+
new Promise((r) => setTimeout(r, timeout))
|
|
849
|
+
]);
|
|
850
|
+
}
|
|
851
|
+
proxyUrlForScope(scope) {
|
|
852
|
+
if (!this.proxy) return void 0;
|
|
853
|
+
return this.proxy.scopes.has(scope) ? this.proxy.url : void 0;
|
|
854
|
+
}
|
|
855
|
+
async fetchViaProxy(url, method, headers, body, proxyScope = "api") {
|
|
856
|
+
try {
|
|
857
|
+
this.telemetry_proxy++;
|
|
858
|
+
const payload = { method, url, headers, headerOrder: Object.keys(headers), body, proxy: this.proxyUrlForScope(proxyScope) };
|
|
859
|
+
return await phantomFetch(payload);
|
|
860
|
+
} catch (e) {
|
|
861
|
+
return { status: 0, body: e.message, headers: {}, error: e.message };
|
|
862
|
+
}
|
|
863
|
+
}
|
|
864
|
+
rewriteImportMeta(source, moduleUrl) {
|
|
865
|
+
const importMetaLiteral = `({ url: ${JSON.stringify(moduleUrl)}, env: { MODE: "production", PROD: true, DEV: false, SSR: false, BASE_URL: "/" }, hot: undefined })`;
|
|
866
|
+
return source.replace(/\bimport\.meta\.url\b/g, JSON.stringify(moduleUrl)).replace(/\bimport\.meta\.env\.MODE\b/g, `"production"`).replace(/\bimport\.meta\.env\.PROD\b/g, "true").replace(/\bimport\.meta\.env\.DEV\b/g, "false").replace(/\bimport\.meta\.env\.SSR\b/g, "false").replace(/\bimport\.meta\.env\.BASE_URL\b/g, `"/"`).replace(/\bimport\.meta\.env\b/g, `${importMetaLiteral}.env`).replace(/\bimport\.meta\b/g, importMetaLiteral);
|
|
867
|
+
}
|
|
868
|
+
findInitialComponentName(initialState) {
|
|
869
|
+
for (const value of Object.values(initialState || {})) {
|
|
870
|
+
if (value && typeof value === "object" && typeof value.component === "string") {
|
|
871
|
+
return value.component;
|
|
872
|
+
}
|
|
873
|
+
}
|
|
874
|
+
return null;
|
|
875
|
+
}
|
|
876
|
+
findMatchingBraceIndex(source, startIndex) {
|
|
877
|
+
let depth = 0;
|
|
878
|
+
let quote = null;
|
|
879
|
+
let escaped = false;
|
|
880
|
+
for (let i = startIndex; i < source.length; i++) {
|
|
881
|
+
const ch = source[i];
|
|
882
|
+
if (quote) {
|
|
883
|
+
if (escaped) {
|
|
884
|
+
escaped = false;
|
|
885
|
+
continue;
|
|
886
|
+
}
|
|
887
|
+
if (ch === "\\") {
|
|
888
|
+
escaped = true;
|
|
889
|
+
continue;
|
|
890
|
+
}
|
|
891
|
+
if (ch === quote) quote = null;
|
|
892
|
+
continue;
|
|
893
|
+
}
|
|
894
|
+
if (ch === '"' || ch === "'" || ch === "`") {
|
|
895
|
+
quote = ch;
|
|
896
|
+
continue;
|
|
897
|
+
}
|
|
898
|
+
if (ch === "{") {
|
|
899
|
+
depth++;
|
|
900
|
+
continue;
|
|
901
|
+
}
|
|
902
|
+
if (ch === "}") {
|
|
903
|
+
depth--;
|
|
904
|
+
if (depth === 0) return i;
|
|
905
|
+
}
|
|
906
|
+
}
|
|
907
|
+
return -1;
|
|
908
|
+
}
|
|
909
|
+
extractObjectProperty(source, objectStart, objectEnd, key) {
|
|
910
|
+
const needle = `"${key}"`;
|
|
911
|
+
const keyIndex = source.indexOf(needle, objectStart);
|
|
912
|
+
if (keyIndex === -1 || keyIndex > objectEnd) return null;
|
|
913
|
+
let colonIndex = keyIndex + needle.length;
|
|
914
|
+
while (colonIndex < objectEnd && source[colonIndex] !== ":") colonIndex++;
|
|
915
|
+
if (colonIndex >= objectEnd) return null;
|
|
916
|
+
let i = colonIndex + 1;
|
|
917
|
+
let parenDepth = 0;
|
|
918
|
+
let bracketDepth = 0;
|
|
919
|
+
let braceDepth = 0;
|
|
920
|
+
let quote = null;
|
|
921
|
+
let escaped = false;
|
|
922
|
+
for (; i < objectEnd; i++) {
|
|
923
|
+
const ch = source[i];
|
|
924
|
+
if (quote) {
|
|
925
|
+
if (escaped) {
|
|
926
|
+
escaped = false;
|
|
927
|
+
continue;
|
|
928
|
+
}
|
|
929
|
+
if (ch === "\\") {
|
|
930
|
+
escaped = true;
|
|
931
|
+
continue;
|
|
932
|
+
}
|
|
933
|
+
if (ch === quote) quote = null;
|
|
934
|
+
continue;
|
|
935
|
+
}
|
|
936
|
+
if (ch === '"' || ch === "'" || ch === "`") {
|
|
937
|
+
quote = ch;
|
|
938
|
+
continue;
|
|
939
|
+
}
|
|
940
|
+
if (ch === "(") parenDepth++;
|
|
941
|
+
else if (ch === ")") parenDepth--;
|
|
942
|
+
else if (ch === "[") bracketDepth++;
|
|
943
|
+
else if (ch === "]") bracketDepth--;
|
|
944
|
+
else if (ch === "{") braceDepth++;
|
|
945
|
+
else if (ch === "}") braceDepth--;
|
|
946
|
+
else if (ch === "," && parenDepth === 0 && bracketDepth === 0 && braceDepth === 0) {
|
|
947
|
+
return source.slice(keyIndex, i).trim();
|
|
948
|
+
}
|
|
949
|
+
}
|
|
950
|
+
return source.slice(keyIndex, objectEnd).trim();
|
|
951
|
+
}
|
|
952
|
+
pruneComponentRegistrySource(source) {
|
|
953
|
+
if (!this.initialComponentName) return null;
|
|
954
|
+
if (!source.includes(this.initialComponentName)) return null;
|
|
955
|
+
const constMatch = source.match(/const\s+([A-Za-z_$][\w$]*)\s*=\s*\{/);
|
|
956
|
+
if (!constMatch || constMatch.index == null) return null;
|
|
957
|
+
const registryVar = constMatch[1];
|
|
958
|
+
const objectStart = source.indexOf("{", constMatch.index);
|
|
959
|
+
const objectEnd = this.findMatchingBraceIndex(source, objectStart);
|
|
960
|
+
if (objectStart === -1 || objectEnd === -1) return null;
|
|
961
|
+
const propertySource = this.extractObjectProperty(source, objectStart, objectEnd, this.initialComponentName);
|
|
962
|
+
if (!propertySource) return null;
|
|
963
|
+
const exportRegex = new RegExp(`export\\{\\s*${registryVar}\\s+as\\s+([A-Za-z_$][\\w$]*)\\s*\\}`);
|
|
964
|
+
const exportMatch = source.match(exportRegex);
|
|
965
|
+
if (!exportMatch || exportMatch.index == null) return null;
|
|
966
|
+
const prefix = source.slice(0, constMatch.index);
|
|
967
|
+
const suffix = source.slice(exportMatch.index);
|
|
968
|
+
return `${prefix}const ${registryVar}={${propertySource}};${suffix}`;
|
|
969
|
+
}
|
|
970
|
+
async handleModuleScript(entryUrl, window, options = {}) {
|
|
971
|
+
const cacheKey = options.cacheKey ?? entryUrl;
|
|
972
|
+
const existing = this.moduleInFlight.get(cacheKey);
|
|
973
|
+
if (existing) return existing;
|
|
974
|
+
const p = (async () => {
|
|
975
|
+
const taskId = this.trackTaskStart("module_bundle", cacheKey, this.moduleWaitMs);
|
|
976
|
+
try {
|
|
977
|
+
if (process.env.PHANTOM_DEBUG_MODULES === "1") {
|
|
978
|
+
console.log("[Executor] Bundling module entry:", cacheKey);
|
|
979
|
+
}
|
|
980
|
+
const cached = this.moduleBundleCache.get(cacheKey);
|
|
981
|
+
if (cached) {
|
|
982
|
+
if (!this.windowClosed) window.eval(cached);
|
|
983
|
+
return;
|
|
984
|
+
}
|
|
985
|
+
const esbuildMod = await import("esbuild");
|
|
986
|
+
const buildFn = esbuildMod?.build || esbuildMod?.default?.build;
|
|
987
|
+
if (typeof buildFn !== "function") {
|
|
988
|
+
throw new Error("esbuild.build not available (esbuild import failed)");
|
|
989
|
+
}
|
|
990
|
+
const entry = new URL(entryUrl);
|
|
991
|
+
const entryOrigin = entry.origin;
|
|
992
|
+
const stripQueryHash = (u) => {
|
|
993
|
+
try {
|
|
994
|
+
const uu = new URL(u);
|
|
995
|
+
return uu.pathname.toLowerCase();
|
|
996
|
+
} catch {
|
|
997
|
+
return String(u).split("?")[0].split("#")[0].toLowerCase();
|
|
998
|
+
}
|
|
999
|
+
};
|
|
1000
|
+
const isStubAsset2 = (u) => {
|
|
1001
|
+
const p2 = stripQueryHash(u);
|
|
1002
|
+
return p2.endsWith(".css") || p2.endsWith(".png") || p2.endsWith(".jpg") || p2.endsWith(".jpeg") || p2.endsWith(".gif") || p2.endsWith(".webp") || p2.endsWith(".avif") || p2.endsWith(".svg") || p2.endsWith(".ico") || p2.endsWith(".woff") || p2.endsWith(".woff2") || p2.endsWith(".ttf") || p2.endsWith(".otf") || p2.endsWith(".eot") || p2.endsWith(".mp3") || p2.endsWith(".mp4") || p2.endsWith(".webm") || p2.endsWith(".mov") || p2.endsWith(".wasm");
|
|
1003
|
+
};
|
|
1004
|
+
const getLoader = (u) => {
|
|
1005
|
+
const p2 = stripQueryHash(u);
|
|
1006
|
+
if (p2.endsWith(".ts") || p2.endsWith(".tsx")) return "ts";
|
|
1007
|
+
return "js";
|
|
1008
|
+
};
|
|
1009
|
+
const httpPlugin = {
|
|
1010
|
+
name: "phantom-http-url",
|
|
1011
|
+
setup: (build) => {
|
|
1012
|
+
build.onResolve({ filter: /.*/ }, (args) => {
|
|
1013
|
+
const path3 = String(args.path || "");
|
|
1014
|
+
const importer = String(args.importer || "");
|
|
1015
|
+
if (path3.startsWith("data:") || path3.startsWith("blob:") || path3.startsWith("file:")) {
|
|
1016
|
+
return { path: path3, external: true };
|
|
1017
|
+
}
|
|
1018
|
+
if (path3.startsWith("http://") || path3.startsWith("https://")) {
|
|
1019
|
+
return { path: path3, namespace: "http-url" };
|
|
1020
|
+
}
|
|
1021
|
+
const base = (() => {
|
|
1022
|
+
try {
|
|
1023
|
+
if (importer.startsWith("http://") || importer.startsWith("https://")) return importer;
|
|
1024
|
+
} catch {
|
|
1025
|
+
}
|
|
1026
|
+
return entryUrl;
|
|
1027
|
+
})();
|
|
1028
|
+
try {
|
|
1029
|
+
if (path3.startsWith("/")) {
|
|
1030
|
+
const origin = (() => {
|
|
1031
|
+
try {
|
|
1032
|
+
return new URL(base).origin;
|
|
1033
|
+
} catch {
|
|
1034
|
+
return entryOrigin;
|
|
1035
|
+
}
|
|
1036
|
+
})();
|
|
1037
|
+
return { path: new URL(path3, origin).toString(), namespace: "http-url" };
|
|
1038
|
+
}
|
|
1039
|
+
return { path: new URL(path3, base).toString(), namespace: "http-url" };
|
|
1040
|
+
} catch {
|
|
1041
|
+
return null;
|
|
1042
|
+
}
|
|
1043
|
+
});
|
|
1044
|
+
build.onLoad({ filter: /.*/, namespace: "http-url" }, async (args) => {
|
|
1045
|
+
const url = String(args.path || "");
|
|
1046
|
+
if (isStubAsset2(url)) {
|
|
1047
|
+
this.telemetry_stubbed++;
|
|
1048
|
+
return { contents: 'export default "";\n', loader: "js" };
|
|
1049
|
+
}
|
|
1050
|
+
const cached2 = this.moduleResolveCache.get(url);
|
|
1051
|
+
if (cached2) return cached2;
|
|
1052
|
+
const prefetched = this.scriptCache.get(url);
|
|
1053
|
+
if (prefetched != null) {
|
|
1054
|
+
const pruned2 = /component_registry/i.test(url) ? this.pruneComponentRegistrySource(prefetched) : null;
|
|
1055
|
+
const entry3 = { contents: this.rewriteImportMeta(pruned2 ?? prefetched, url), loader: getLoader(url) };
|
|
1056
|
+
this.moduleResolveCache.set(url, entry3);
|
|
1057
|
+
return entry3;
|
|
1058
|
+
}
|
|
1059
|
+
const { headers: subHeaders } = chromeSubresourceHeaders(this.harvestData.url);
|
|
1060
|
+
subHeaders["sec-fetch-dest"] = "script";
|
|
1061
|
+
subHeaders["sec-fetch-mode"] = "cors";
|
|
1062
|
+
try {
|
|
1063
|
+
const pageOrigin = new URL(this.harvestData.url).origin;
|
|
1064
|
+
const reqOrigin = new URL(url).origin;
|
|
1065
|
+
subHeaders["sec-fetch-site"] = reqOrigin === pageOrigin ? "same-origin" : "cross-site";
|
|
1066
|
+
} catch {
|
|
1067
|
+
subHeaders["sec-fetch-site"] = "cross-site";
|
|
1068
|
+
}
|
|
1069
|
+
const resp = await this.fetchViaProxy(url, "GET", subHeaders, "", "assets");
|
|
1070
|
+
if (resp.error || resp.status >= 400) {
|
|
1071
|
+
throw new Error(resp.error || `Module fetch failed: ${resp.status} ${url}`);
|
|
1072
|
+
}
|
|
1073
|
+
const body = resp.body ?? "";
|
|
1074
|
+
const pruned = /component_registry/i.test(url) ? this.pruneComponentRegistrySource(body) : null;
|
|
1075
|
+
const entry2 = { contents: this.rewriteImportMeta(pruned ?? body, url), loader: getLoader(url) };
|
|
1076
|
+
this.moduleResolveCache.set(url, entry2);
|
|
1077
|
+
return entry2;
|
|
1078
|
+
});
|
|
1079
|
+
}
|
|
1080
|
+
};
|
|
1081
|
+
const result = await buildFn({
|
|
1082
|
+
bundle: true,
|
|
1083
|
+
write: false,
|
|
1084
|
+
format: "iife",
|
|
1085
|
+
platform: "browser",
|
|
1086
|
+
target: "es2020",
|
|
1087
|
+
sourcemap: false,
|
|
1088
|
+
minify: this.minifyBundle,
|
|
1089
|
+
stdin: {
|
|
1090
|
+
contents: options.inlineSource ? this.rewriteImportMeta(options.inlineSource, entryUrl) : `import ${JSON.stringify(entryUrl)};
|
|
1091
|
+
`,
|
|
1092
|
+
sourcefile: entryUrl
|
|
1093
|
+
},
|
|
1094
|
+
plugins: [httpPlugin]
|
|
1095
|
+
});
|
|
1096
|
+
const outputText = result?.outputFiles?.[0]?.text;
|
|
1097
|
+
if (!outputText) throw new Error("esbuild produced no output");
|
|
1098
|
+
const bundleIdHash = nodeCrypto.createHash("sha256").update(cacheKey).digest("hex").slice(0, 10);
|
|
1099
|
+
const transformed = this.transformer.transform(outputText, `module_bundle_${bundleIdHash}`);
|
|
1100
|
+
this.moduleBundleCache.set(cacheKey, transformed);
|
|
1101
|
+
if (!this.windowClosed) window.eval(transformed);
|
|
1102
|
+
if (process.env.PHANTOM_DEBUG_MODULES === "1") {
|
|
1103
|
+
console.log("[Executor] Module bundle eval complete:", cacheKey);
|
|
1104
|
+
}
|
|
1105
|
+
} catch (e) {
|
|
1106
|
+
this.recordExecutionError(e, "unhandledRejection");
|
|
1107
|
+
} finally {
|
|
1108
|
+
this.trackTaskEnd(taskId);
|
|
1109
|
+
this.moduleInFlight.delete(cacheKey);
|
|
1110
|
+
}
|
|
1111
|
+
})();
|
|
1112
|
+
this.moduleInFlight.set(cacheKey, p);
|
|
1113
|
+
return p;
|
|
1114
|
+
}
|
|
1115
|
+
recordExecutionError(err, source) {
|
|
1116
|
+
const e = err instanceof Error ? err : new Error(String(err));
|
|
1117
|
+
this.executionErrors.push({ source, message: e.message, stack: e.stack });
|
|
1118
|
+
}
|
|
1119
|
+
looksLikeESModule(code) {
|
|
1120
|
+
return /(^|[^\w$.])import(?:\s+[\w*{]|\s*["']|\()/m.test(code) || /(^|[^\w$.])export\s+(?:\{|\*|default|const|function|class|let|var)\b/m.test(code) || /\bimport\.meta\b/.test(code);
|
|
1121
|
+
}
|
|
1122
|
+
installWebPlatformShims(window) {
|
|
1123
|
+
const that = this;
|
|
1124
|
+
try {
|
|
1125
|
+
window.globalThis = window;
|
|
1126
|
+
} catch {
|
|
1127
|
+
}
|
|
1128
|
+
try {
|
|
1129
|
+
window.self = window;
|
|
1130
|
+
} catch {
|
|
1131
|
+
}
|
|
1132
|
+
if (!window.atob) {
|
|
1133
|
+
window.atob = (s) => Buffer.from(String(s), "base64").toString("binary");
|
|
1134
|
+
}
|
|
1135
|
+
if (!window.btoa) {
|
|
1136
|
+
window.btoa = (s) => Buffer.from(String(s), "binary").toString("base64");
|
|
1137
|
+
}
|
|
1138
|
+
if (window.__phantom?.fetch) {
|
|
1139
|
+
window.fetch = function fetchShim(input, init) {
|
|
1140
|
+
return window.__phantom.fetch(input, init);
|
|
1141
|
+
};
|
|
1142
|
+
}
|
|
1143
|
+
const g = globalThis;
|
|
1144
|
+
if (!window.Headers && g.Headers) window.Headers = g.Headers;
|
|
1145
|
+
if (!window.Request && g.Request) window.Request = g.Request;
|
|
1146
|
+
if (!window.Response && g.Response) window.Response = g.Response;
|
|
1147
|
+
if (!window.AbortController && g.AbortController) window.AbortController = g.AbortController;
|
|
1148
|
+
if (!window.AbortSignal && g.AbortSignal) window.AbortSignal = g.AbortSignal;
|
|
1149
|
+
if (!window.TextEncoder && g.TextEncoder) window.TextEncoder = g.TextEncoder;
|
|
1150
|
+
if (!window.TextDecoder && g.TextDecoder) window.TextDecoder = g.TextDecoder;
|
|
1151
|
+
if (!window.structuredClone && g.structuredClone) window.structuredClone = g.structuredClone.bind(g);
|
|
1152
|
+
const makeStorage = () => {
|
|
1153
|
+
const store = /* @__PURE__ */ new Map();
|
|
1154
|
+
return {
|
|
1155
|
+
get length() {
|
|
1156
|
+
return store.size;
|
|
1157
|
+
},
|
|
1158
|
+
clear() {
|
|
1159
|
+
store.clear();
|
|
1160
|
+
},
|
|
1161
|
+
getItem(key) {
|
|
1162
|
+
return store.has(String(key)) ? store.get(String(key)) ?? null : null;
|
|
1163
|
+
},
|
|
1164
|
+
key(index) {
|
|
1165
|
+
return Array.from(store.keys())[index] ?? null;
|
|
1166
|
+
},
|
|
1167
|
+
removeItem(key) {
|
|
1168
|
+
store.delete(String(key));
|
|
1169
|
+
},
|
|
1170
|
+
setItem(key, value) {
|
|
1171
|
+
store.set(String(key), String(value));
|
|
1172
|
+
}
|
|
1173
|
+
};
|
|
1174
|
+
};
|
|
1175
|
+
try {
|
|
1176
|
+
if (!window.localStorage) window.localStorage = makeStorage();
|
|
1177
|
+
} catch {
|
|
1178
|
+
window.localStorage = makeStorage();
|
|
1179
|
+
}
|
|
1180
|
+
try {
|
|
1181
|
+
if (!window.sessionStorage) window.sessionStorage = makeStorage();
|
|
1182
|
+
} catch {
|
|
1183
|
+
window.sessionStorage = makeStorage();
|
|
1184
|
+
}
|
|
1185
|
+
if (!window.StorageEvent) {
|
|
1186
|
+
window.StorageEvent = class StorageEvent extends window.Event {
|
|
1187
|
+
constructor(type, init = {}) {
|
|
1188
|
+
super(type, init);
|
|
1189
|
+
this.key = init.key ?? null;
|
|
1190
|
+
this.oldValue = init.oldValue ?? null;
|
|
1191
|
+
this.newValue = init.newValue ?? null;
|
|
1192
|
+
this.storageArea = init.storageArea ?? null;
|
|
1193
|
+
this.url = init.url ?? window.location.href;
|
|
1194
|
+
}
|
|
1195
|
+
};
|
|
1196
|
+
}
|
|
1197
|
+
if (!window.crypto) window.crypto = {};
|
|
1198
|
+
if (!window.crypto.getRandomValues) {
|
|
1199
|
+
if (g.crypto && typeof g.crypto.getRandomValues === "function") {
|
|
1200
|
+
window.crypto.getRandomValues = g.crypto.getRandomValues.bind(g.crypto);
|
|
1201
|
+
} else {
|
|
1202
|
+
window.crypto.getRandomValues = (arr) => {
|
|
1203
|
+
const buf = nodeCrypto.randomBytes(arr.length);
|
|
1204
|
+
arr.set(buf);
|
|
1205
|
+
return arr;
|
|
1206
|
+
};
|
|
1207
|
+
}
|
|
1208
|
+
}
|
|
1209
|
+
{
|
|
1210
|
+
const _g = globalThis;
|
|
1211
|
+
if (!window.MessageChannel && _g.MessageChannel) window.MessageChannel = _g.MessageChannel;
|
|
1212
|
+
if (!window.MessagePort && _g.MessagePort) window.MessagePort = _g.MessagePort;
|
|
1213
|
+
}
|
|
1214
|
+
if (!window.requestIdleCallback) {
|
|
1215
|
+
window.requestIdleCallback = (cb) => window.setTimeout(() => cb({
|
|
1216
|
+
didTimeout: false,
|
|
1217
|
+
timeRemaining: () => 50
|
|
1218
|
+
}), 1);
|
|
1219
|
+
}
|
|
1220
|
+
if (!window.cancelIdleCallback) {
|
|
1221
|
+
window.cancelIdleCallback = (id) => window.clearTimeout(id);
|
|
1222
|
+
}
|
|
1223
|
+
if (!window.ResizeObserver) {
|
|
1224
|
+
window.ResizeObserver = class ResizeObserver {
|
|
1225
|
+
constructor(callback) {
|
|
1226
|
+
this.callback = callback;
|
|
1227
|
+
}
|
|
1228
|
+
observe(target) {
|
|
1229
|
+
try {
|
|
1230
|
+
window.setTimeout(() => {
|
|
1231
|
+
const rect = typeof target?.getBoundingClientRect === "function" ? target.getBoundingClientRect() : { width: window.innerWidth, height: window.innerHeight, top: 0, left: 0, right: window.innerWidth, bottom: window.innerHeight };
|
|
1232
|
+
this.callback?.([{ target, contentRect: rect, borderBoxSize: [], contentBoxSize: [] }], this);
|
|
1233
|
+
}, 0);
|
|
1234
|
+
} catch {
|
|
1235
|
+
}
|
|
1236
|
+
}
|
|
1237
|
+
unobserve() {
|
|
1238
|
+
}
|
|
1239
|
+
disconnect() {
|
|
1240
|
+
}
|
|
1241
|
+
};
|
|
1242
|
+
}
|
|
1243
|
+
try {
|
|
1244
|
+
Object.defineProperty(window.navigator, "language", { value: "en-US", configurable: true });
|
|
1245
|
+
Object.defineProperty(window.navigator, "languages", { value: ["en-US", "en"], configurable: true });
|
|
1246
|
+
Object.defineProperty(window.navigator, "platform", { value: "MacIntel", configurable: true });
|
|
1247
|
+
Object.defineProperty(window.navigator, "onLine", { value: true, configurable: true });
|
|
1248
|
+
Object.defineProperty(window.navigator, "webdriver", { value: false, configurable: true });
|
|
1249
|
+
Object.defineProperty(window.navigator, "hardwareConcurrency", { value: 8, configurable: true });
|
|
1250
|
+
Object.defineProperty(window.navigator, "deviceMemory", { value: 8, configurable: true });
|
|
1251
|
+
} catch {
|
|
1252
|
+
}
|
|
1253
|
+
try {
|
|
1254
|
+
Object.defineProperty(window, "innerWidth", { value: 1440, configurable: true });
|
|
1255
|
+
Object.defineProperty(window, "innerHeight", { value: 900, configurable: true });
|
|
1256
|
+
Object.defineProperty(window, "devicePixelRatio", { value: 2, configurable: true });
|
|
1257
|
+
Object.defineProperty(window, "scrollX", { value: 0, writable: true, configurable: true });
|
|
1258
|
+
Object.defineProperty(window, "scrollY", { value: 0, writable: true, configurable: true });
|
|
1259
|
+
} catch {
|
|
1260
|
+
}
|
|
1261
|
+
if (!window.screen) {
|
|
1262
|
+
window.screen = {
|
|
1263
|
+
width: 1440,
|
|
1264
|
+
height: 900,
|
|
1265
|
+
availWidth: 1440,
|
|
1266
|
+
availHeight: 900
|
|
1267
|
+
};
|
|
1268
|
+
}
|
|
1269
|
+
if (!window.visualViewport) {
|
|
1270
|
+
window.visualViewport = {
|
|
1271
|
+
width: window.innerWidth,
|
|
1272
|
+
height: window.innerHeight,
|
|
1273
|
+
scale: 1,
|
|
1274
|
+
offsetLeft: 0,
|
|
1275
|
+
offsetTop: 0,
|
|
1276
|
+
pageLeft: 0,
|
|
1277
|
+
pageTop: 0,
|
|
1278
|
+
addEventListener() {
|
|
1279
|
+
},
|
|
1280
|
+
removeEventListener() {
|
|
1281
|
+
}
|
|
1282
|
+
};
|
|
1283
|
+
}
|
|
1284
|
+
if (!window.scrollTo) {
|
|
1285
|
+
window.scrollTo = (x, y) => {
|
|
1286
|
+
window.scrollX = typeof x === "number" ? x : window.scrollX;
|
|
1287
|
+
window.scrollY = typeof y === "number" ? y : window.scrollY;
|
|
1288
|
+
};
|
|
1289
|
+
}
|
|
1290
|
+
if (!window.scrollBy) {
|
|
1291
|
+
window.scrollBy = (x, y) => {
|
|
1292
|
+
window.scrollTo((window.scrollX || 0) + (Number(x) || 0), (window.scrollY || 0) + (Number(y) || 0));
|
|
1293
|
+
};
|
|
1294
|
+
}
|
|
1295
|
+
try {
|
|
1296
|
+
Object.defineProperty(window.document, "visibilityState", { value: "visible", configurable: true });
|
|
1297
|
+
Object.defineProperty(window.document, "hidden", { value: false, configurable: true });
|
|
1298
|
+
} catch {
|
|
1299
|
+
}
|
|
1300
|
+
if (!window.document.hasFocus) {
|
|
1301
|
+
window.document.hasFocus = () => true;
|
|
1302
|
+
}
|
|
1303
|
+
if (!window.focus) {
|
|
1304
|
+
window.focus = () => {
|
|
1305
|
+
try {
|
|
1306
|
+
window.dispatchEvent(new window.Event("focus"));
|
|
1307
|
+
} catch {
|
|
1308
|
+
}
|
|
1309
|
+
};
|
|
1310
|
+
}
|
|
1311
|
+
if (!window.blur) {
|
|
1312
|
+
window.blur = () => {
|
|
1313
|
+
try {
|
|
1314
|
+
window.dispatchEvent(new window.Event("blur"));
|
|
1315
|
+
} catch {
|
|
1316
|
+
}
|
|
1317
|
+
};
|
|
1318
|
+
}
|
|
1319
|
+
const wrapCb = (cb) => {
|
|
1320
|
+
if (typeof cb !== "function") return cb;
|
|
1321
|
+
return function wrappedCallback(...args) {
|
|
1322
|
+
try {
|
|
1323
|
+
return cb.apply(this, args);
|
|
1324
|
+
} catch (e) {
|
|
1325
|
+
that.recordExecutionError(e, "uncaughtException");
|
|
1326
|
+
}
|
|
1327
|
+
};
|
|
1328
|
+
};
|
|
1329
|
+
const _setTimeout = window.setTimeout?.bind(window);
|
|
1330
|
+
const _setInterval = window.setInterval?.bind(window);
|
|
1331
|
+
if (_setTimeout) window.setTimeout = (cb, ms, ...rest) => _setTimeout(wrapCb(cb), ms, ...rest);
|
|
1332
|
+
if (_setInterval) window.setInterval = (cb, ms, ...rest) => _setInterval(wrapCb(cb), ms, ...rest);
|
|
1333
|
+
if (window.queueMicrotask) {
|
|
1334
|
+
const _q = window.queueMicrotask.bind(window);
|
|
1335
|
+
window.queueMicrotask = (cb) => _q(wrapCb(cb));
|
|
1336
|
+
}
|
|
1337
|
+
if (window.addEventListener) {
|
|
1338
|
+
window.addEventListener("error", (ev) => {
|
|
1339
|
+
that.recordExecutionError(ev?.error || ev?.message || ev, "error");
|
|
1340
|
+
if (typeof ev?.preventDefault === "function") ev.preventDefault();
|
|
1341
|
+
});
|
|
1342
|
+
window.addEventListener("unhandledrejection", (ev) => {
|
|
1343
|
+
that.recordExecutionError(ev?.reason || ev, "unhandledrejection");
|
|
1344
|
+
if (typeof ev?.preventDefault === "function") ev.preventDefault();
|
|
1345
|
+
});
|
|
1346
|
+
}
|
|
1347
|
+
let domContentLoadedDispatched = false;
|
|
1348
|
+
window.__phantom_markDCLDispatched = () => {
|
|
1349
|
+
domContentLoadedDispatched = true;
|
|
1350
|
+
};
|
|
1351
|
+
if (window.document?.addEventListener) {
|
|
1352
|
+
const origDocAddEventListener = window.document.addEventListener.bind(window.document);
|
|
1353
|
+
window.document.addEventListener = function(type, listener, options) {
|
|
1354
|
+
const result = origDocAddEventListener(type, listener, options);
|
|
1355
|
+
if (type === "DOMContentLoaded" && domContentLoadedDispatched) {
|
|
1356
|
+
window.setTimeout(() => {
|
|
1357
|
+
try {
|
|
1358
|
+
if (typeof listener === "function") {
|
|
1359
|
+
listener.call(window.document, new window.Event("DOMContentLoaded"));
|
|
1360
|
+
} else if (listener && typeof listener.handleEvent === "function") {
|
|
1361
|
+
listener.handleEvent.call(listener, new window.Event("DOMContentLoaded"));
|
|
1362
|
+
}
|
|
1363
|
+
} catch (e) {
|
|
1364
|
+
that.recordExecutionError(e, "error");
|
|
1365
|
+
}
|
|
1366
|
+
}, 0);
|
|
1367
|
+
}
|
|
1368
|
+
return result;
|
|
1369
|
+
};
|
|
1370
|
+
}
|
|
1371
|
+
const makeCtx = () => {
|
|
1372
|
+
const noop = () => {
|
|
1373
|
+
};
|
|
1374
|
+
const base = {
|
|
1375
|
+
canvas: null,
|
|
1376
|
+
// Common methods used by renderers
|
|
1377
|
+
save: noop,
|
|
1378
|
+
restore: noop,
|
|
1379
|
+
beginPath: noop,
|
|
1380
|
+
closePath: noop,
|
|
1381
|
+
clip: noop,
|
|
1382
|
+
moveTo: noop,
|
|
1383
|
+
lineTo: noop,
|
|
1384
|
+
bezierCurveTo: noop,
|
|
1385
|
+
quadraticCurveTo: noop,
|
|
1386
|
+
rect: noop,
|
|
1387
|
+
arc: noop,
|
|
1388
|
+
fill: noop,
|
|
1389
|
+
stroke: noop,
|
|
1390
|
+
clearRect: noop,
|
|
1391
|
+
fillRect: noop,
|
|
1392
|
+
strokeRect: noop,
|
|
1393
|
+
drawImage: noop,
|
|
1394
|
+
translate: noop,
|
|
1395
|
+
rotate: noop,
|
|
1396
|
+
scale: noop,
|
|
1397
|
+
transform: noop,
|
|
1398
|
+
setTransform: noop,
|
|
1399
|
+
resetTransform: noop,
|
|
1400
|
+
measureText: (text) => ({ width: (String(text).length || 0) * 8 }),
|
|
1401
|
+
createLinearGradient: () => ({ addColorStop: noop }),
|
|
1402
|
+
createRadialGradient: () => ({ addColorStop: noop }),
|
|
1403
|
+
createPattern: () => ({}),
|
|
1404
|
+
getImageData: () => ({ data: new Uint8ClampedArray(0) }),
|
|
1405
|
+
putImageData: noop
|
|
1406
|
+
};
|
|
1407
|
+
return new Proxy(base, {
|
|
1408
|
+
get(target, prop) {
|
|
1409
|
+
if (prop in target) return target[prop];
|
|
1410
|
+
return noop;
|
|
1411
|
+
},
|
|
1412
|
+
set(target, prop, value) {
|
|
1413
|
+
target[prop] = value;
|
|
1414
|
+
return true;
|
|
1415
|
+
}
|
|
1416
|
+
});
|
|
1417
|
+
};
|
|
1418
|
+
if (window.HTMLCanvasElement && window.HTMLCanvasElement.prototype) {
|
|
1419
|
+
const proto = window.HTMLCanvasElement.prototype;
|
|
1420
|
+
const origGetContext = proto.getContext;
|
|
1421
|
+
proto.getContext = function(type, ...args) {
|
|
1422
|
+
try {
|
|
1423
|
+
const ctx = origGetContext ? origGetContext.call(this, type, ...args) : null;
|
|
1424
|
+
if (ctx) return ctx;
|
|
1425
|
+
} catch {
|
|
1426
|
+
}
|
|
1427
|
+
const mock = makeCtx();
|
|
1428
|
+
mock.canvas = this;
|
|
1429
|
+
return mock;
|
|
1430
|
+
};
|
|
1431
|
+
}
|
|
1432
|
+
}
|
|
1433
|
+
async execute() {
|
|
1434
|
+
const onNodeUncaught = (err) => this.recordExecutionError(err, "uncaughtException");
|
|
1435
|
+
const onNodeUnhandled = (reason) => this.recordExecutionError(reason, "unhandledRejection");
|
|
1436
|
+
process.on("uncaughtException", onNodeUncaught);
|
|
1437
|
+
process.on("unhandledRejection", onNodeUnhandled);
|
|
1438
|
+
try {
|
|
1439
|
+
const virtualConsole = new VirtualConsole();
|
|
1440
|
+
virtualConsole.on("log", (...args) => console.log("[JSDOM Log]", ...args));
|
|
1441
|
+
virtualConsole.on("error", (...args) => console.error("[JSDOM Error]", ...args));
|
|
1442
|
+
virtualConsole.on("warn", (...args) => console.warn("[JSDOM Warn]", ...args));
|
|
1443
|
+
const cookieJar = new CookieJar();
|
|
1444
|
+
this.harvestData.cookies.forEach((c) => {
|
|
1445
|
+
try {
|
|
1446
|
+
cookieJar.setCookieSync(c, this.harvestData.url);
|
|
1447
|
+
} catch (e) {
|
|
1448
|
+
}
|
|
1449
|
+
});
|
|
1450
|
+
const DEFAULT_UA = chromeDocumentHeaders().headers["user-agent"];
|
|
1451
|
+
const htmlWithDataOnly = this.stripExecutableScripts(this.harvestData.html);
|
|
1452
|
+
const dom = new JSDOM(htmlWithDataOnly, {
|
|
1453
|
+
url: this.harvestData.url,
|
|
1454
|
+
referrer: "https://www.google.com/",
|
|
1455
|
+
runScripts: "dangerously",
|
|
1456
|
+
cookieJar,
|
|
1457
|
+
virtualConsole,
|
|
1458
|
+
beforeParse: (window2) => {
|
|
1459
|
+
try {
|
|
1460
|
+
Object.defineProperty(window2.navigator, "userAgent", {
|
|
1461
|
+
value: DEFAULT_UA,
|
|
1462
|
+
configurable: true
|
|
1463
|
+
});
|
|
1464
|
+
} catch {
|
|
1465
|
+
}
|
|
1466
|
+
window2.IntersectionObserver = class IntersectionObserver {
|
|
1467
|
+
constructor(callback) {
|
|
1468
|
+
this._callback = callback;
|
|
1469
|
+
}
|
|
1470
|
+
observe(target) {
|
|
1471
|
+
try {
|
|
1472
|
+
Promise.resolve().then(() => {
|
|
1473
|
+
this._callback?.([{ target, isIntersecting: true, intersectionRatio: 1 }], this);
|
|
1474
|
+
});
|
|
1475
|
+
} catch {
|
|
1476
|
+
}
|
|
1477
|
+
}
|
|
1478
|
+
unobserve() {
|
|
1479
|
+
}
|
|
1480
|
+
disconnect() {
|
|
1481
|
+
}
|
|
1482
|
+
takeRecords() {
|
|
1483
|
+
return [];
|
|
1484
|
+
}
|
|
1485
|
+
};
|
|
1486
|
+
window2.matchMedia = window2.matchMedia || function(query) {
|
|
1487
|
+
return {
|
|
1488
|
+
matches: false,
|
|
1489
|
+
media: query,
|
|
1490
|
+
onchange: null,
|
|
1491
|
+
addListener: function() {
|
|
1492
|
+
},
|
|
1493
|
+
removeListener: function() {
|
|
1494
|
+
},
|
|
1495
|
+
addEventListener: function() {
|
|
1496
|
+
},
|
|
1497
|
+
removeEventListener: function() {
|
|
1498
|
+
},
|
|
1499
|
+
dispatchEvent: function() {
|
|
1500
|
+
return false;
|
|
1501
|
+
}
|
|
1502
|
+
};
|
|
1503
|
+
};
|
|
1504
|
+
if (!window2.performance) {
|
|
1505
|
+
window2.performance = {};
|
|
1506
|
+
}
|
|
1507
|
+
if (!window2.performance.mark) {
|
|
1508
|
+
window2.performance.mark = (name) => {
|
|
1509
|
+
return {
|
|
1510
|
+
name,
|
|
1511
|
+
entryType: "mark",
|
|
1512
|
+
startTime: Date.now(),
|
|
1513
|
+
duration: 0,
|
|
1514
|
+
toJSON: () => {
|
|
1515
|
+
}
|
|
1516
|
+
};
|
|
1517
|
+
};
|
|
1518
|
+
}
|
|
1519
|
+
if (!window2.performance.measure) {
|
|
1520
|
+
window2.performance.measure = (name, startMark, endMark) => {
|
|
1521
|
+
return {
|
|
1522
|
+
name,
|
|
1523
|
+
entryType: "measure",
|
|
1524
|
+
startTime: Date.now(),
|
|
1525
|
+
duration: 0,
|
|
1526
|
+
toJSON: () => {
|
|
1527
|
+
}
|
|
1528
|
+
};
|
|
1529
|
+
};
|
|
1530
|
+
}
|
|
1531
|
+
if (!window2.performance.clearMarks) {
|
|
1532
|
+
window2.performance.clearMarks = () => {
|
|
1533
|
+
};
|
|
1534
|
+
}
|
|
1535
|
+
if (!window2.performance.clearMeasures) {
|
|
1536
|
+
window2.performance.clearMeasures = () => {
|
|
1537
|
+
};
|
|
1538
|
+
}
|
|
1539
|
+
if (!window2.performance.getEntriesByName) {
|
|
1540
|
+
window2.performance.getEntriesByName = () => [];
|
|
1541
|
+
}
|
|
1542
|
+
if (!window2.performance.getEntriesByType) {
|
|
1543
|
+
window2.performance.getEntriesByType = () => [];
|
|
1544
|
+
}
|
|
1545
|
+
if (window2.URL) {
|
|
1546
|
+
const NativeURL = window2.URL;
|
|
1547
|
+
const URLShim = function(input, base) {
|
|
1548
|
+
if (base === void 0 && typeof input === "string" && /^[/?#]/.test(input)) {
|
|
1549
|
+
return new NativeURL(input, window2.location.href);
|
|
1550
|
+
}
|
|
1551
|
+
return new NativeURL(input, base);
|
|
1552
|
+
};
|
|
1553
|
+
try {
|
|
1554
|
+
Object.setPrototypeOf(URLShim, NativeURL);
|
|
1555
|
+
} catch {
|
|
1556
|
+
}
|
|
1557
|
+
URLShim.prototype = NativeURL.prototype;
|
|
1558
|
+
window2.URL = URLShim;
|
|
1559
|
+
}
|
|
1560
|
+
if (!window2.requestAnimationFrame) {
|
|
1561
|
+
window2.requestAnimationFrame = (callback) => window2.setTimeout(callback, 0);
|
|
1562
|
+
}
|
|
1563
|
+
if (!window2.cancelAnimationFrame) {
|
|
1564
|
+
window2.cancelAnimationFrame = (id) => window2.clearTimeout(id);
|
|
1565
|
+
}
|
|
1566
|
+
if (!window2.process) {
|
|
1567
|
+
window2.process = {
|
|
1568
|
+
env: { NODE_ENV: "production" },
|
|
1569
|
+
version: "",
|
|
1570
|
+
nextTick: (cb) => window2.setTimeout(cb, 0),
|
|
1571
|
+
browser: true
|
|
1572
|
+
};
|
|
1573
|
+
}
|
|
1574
|
+
this.injectPhantom(window2);
|
|
1575
|
+
this.installWebPlatformShims(window2);
|
|
1576
|
+
const HOOK_FLAG = "__phantomModuleScriptHookInstalled";
|
|
1577
|
+
if (!window2[HOOK_FLAG]) {
|
|
1578
|
+
window2[HOOK_FLAG] = true;
|
|
1579
|
+
const that = this;
|
|
1580
|
+
const NodeProto = window2.Node?.prototype;
|
|
1581
|
+
const syntheticLoadedLinks = /* @__PURE__ */ new WeakSet();
|
|
1582
|
+
if (NodeProto) {
|
|
1583
|
+
const origAppendChild = NodeProto.appendChild;
|
|
1584
|
+
const origInsertBefore = NodeProto.insertBefore;
|
|
1585
|
+
const maybeHandle = (node) => {
|
|
1586
|
+
try {
|
|
1587
|
+
if (!node) return;
|
|
1588
|
+
const synthesizeStylesheetLoad = (el) => {
|
|
1589
|
+
const tag = String(el?.tagName || "").toLowerCase();
|
|
1590
|
+
if (tag !== "link") return;
|
|
1591
|
+
const rel = String(el.rel || el.getAttribute?.("rel") || "").toLowerCase();
|
|
1592
|
+
if (rel !== "stylesheet") return;
|
|
1593
|
+
if (syntheticLoadedLinks.has(el)) return;
|
|
1594
|
+
syntheticLoadedLinks.add(el);
|
|
1595
|
+
window2.setTimeout(() => {
|
|
1596
|
+
try {
|
|
1597
|
+
const ev = new window2.Event("load");
|
|
1598
|
+
if (typeof el.onload === "function") el.onload(ev);
|
|
1599
|
+
if (typeof el.dispatchEvent === "function") el.dispatchEvent(ev);
|
|
1600
|
+
} catch {
|
|
1601
|
+
}
|
|
1602
|
+
}, 0);
|
|
1603
|
+
};
|
|
1604
|
+
const handleOne = (el) => {
|
|
1605
|
+
const tag = String(el?.tagName || "").toLowerCase();
|
|
1606
|
+
if (tag === "link") {
|
|
1607
|
+
synthesizeStylesheetLoad(el);
|
|
1608
|
+
return;
|
|
1609
|
+
}
|
|
1610
|
+
if (tag !== "script") return;
|
|
1611
|
+
const t2 = String(el.type || el.getAttribute?.("type") || "").toLowerCase();
|
|
1612
|
+
const src = String(el.src || el.getAttribute?.("src") || "");
|
|
1613
|
+
if (!src) return;
|
|
1614
|
+
const abs = new URL(src, window2.location.href).toString();
|
|
1615
|
+
if (shouldSkipDynamicScriptUrl(abs, that.harvestData.url, that.thirdPartyPolicy)) {
|
|
1616
|
+
return;
|
|
1617
|
+
}
|
|
1618
|
+
if (t2 === "module") {
|
|
1619
|
+
if (that.handledModuleScriptUrls.has(abs)) return;
|
|
1620
|
+
that.handledModuleScriptUrls.add(abs);
|
|
1621
|
+
if (process.env.PHANTOM_DEBUG_MODULES === "1") {
|
|
1622
|
+
console.log("[Executor] Detected module script:", abs);
|
|
1623
|
+
}
|
|
1624
|
+
void that.handleModuleScript(abs, window2);
|
|
1625
|
+
return;
|
|
1626
|
+
}
|
|
1627
|
+
if (that.handledClassicScriptUrls.has(abs)) return;
|
|
1628
|
+
that.handledClassicScriptUrls.add(abs);
|
|
1629
|
+
const pendingId = that.trackRequestStart(abs, "resource_load");
|
|
1630
|
+
const start = Date.now();
|
|
1631
|
+
const logEntry = {
|
|
1632
|
+
type: "resource_load",
|
|
1633
|
+
url: abs,
|
|
1634
|
+
timestamp: start,
|
|
1635
|
+
initiator: "Dynamic Script Loader"
|
|
1636
|
+
};
|
|
1637
|
+
that.logRequest(logEntry);
|
|
1638
|
+
const runClassic = async () => {
|
|
1639
|
+
try {
|
|
1640
|
+
let code = that.scriptCache.get(abs);
|
|
1641
|
+
let headers = {};
|
|
1642
|
+
let status = 200;
|
|
1643
|
+
if (code == null) {
|
|
1644
|
+
const { headers: subHeaders } = chromeSubresourceHeaders(that.harvestData.url);
|
|
1645
|
+
subHeaders["user-agent"] = DEFAULT_UA;
|
|
1646
|
+
subHeaders["referer"] = window2.location.href;
|
|
1647
|
+
const proxyResp = await that.fetchViaProxy(abs, "GET", subHeaders, "", "assets");
|
|
1648
|
+
status = proxyResp.status;
|
|
1649
|
+
headers = proxyResp.headers || {};
|
|
1650
|
+
code = proxyResp.status < 400 ? proxyResp.body : "";
|
|
1651
|
+
that.scriptCache.set(abs, code);
|
|
1652
|
+
}
|
|
1653
|
+
logEntry.status = status;
|
|
1654
|
+
logEntry.responseHeaders = headers;
|
|
1655
|
+
logEntry.responseBody = code;
|
|
1656
|
+
that.checkForMatch(logEntry);
|
|
1657
|
+
if (status >= 400) return;
|
|
1658
|
+
if (code && that.looksLikeESModule(code)) {
|
|
1659
|
+
that.handledModuleScriptUrls.add(abs);
|
|
1660
|
+
await that.handleModuleScript(abs, window2, { cacheKey: abs });
|
|
1661
|
+
return;
|
|
1662
|
+
}
|
|
1663
|
+
const t0 = Date.now();
|
|
1664
|
+
const transformed = that.transformer.transform(code, `dynamic_script_${nodeCrypto.createHash("sha256").update(abs).digest("hex").slice(0, 10)}`);
|
|
1665
|
+
that.timings.transform_ms_total += Date.now() - t0;
|
|
1666
|
+
that.timings.scripts_transformed_count++;
|
|
1667
|
+
if (!that.windowClosed) window2.eval(transformed);
|
|
1668
|
+
} catch (e) {
|
|
1669
|
+
that.recordExecutionError(e, "error");
|
|
1670
|
+
} finally {
|
|
1671
|
+
that.trackRequestEnd(pendingId);
|
|
1672
|
+
}
|
|
1673
|
+
};
|
|
1674
|
+
void runClassic();
|
|
1675
|
+
};
|
|
1676
|
+
handleOne(node);
|
|
1677
|
+
if (typeof node.querySelectorAll === "function") {
|
|
1678
|
+
const scripts = node.querySelectorAll("script[src]");
|
|
1679
|
+
for (const s of scripts) handleOne(s);
|
|
1680
|
+
const stylesheets = node.querySelectorAll('link[rel="stylesheet"]');
|
|
1681
|
+
for (const link of stylesheets) handleOne(link);
|
|
1682
|
+
}
|
|
1683
|
+
} catch {
|
|
1684
|
+
}
|
|
1685
|
+
};
|
|
1686
|
+
if (typeof origAppendChild === "function") {
|
|
1687
|
+
NodeProto.appendChild = function(child) {
|
|
1688
|
+
maybeHandle(child);
|
|
1689
|
+
const ret = origAppendChild.call(this, child);
|
|
1690
|
+
return ret;
|
|
1691
|
+
};
|
|
1692
|
+
}
|
|
1693
|
+
if (typeof origInsertBefore === "function") {
|
|
1694
|
+
NodeProto.insertBefore = function(newNode, refNode) {
|
|
1695
|
+
maybeHandle(newNode);
|
|
1696
|
+
const ret = origInsertBefore.call(this, newNode, refNode);
|
|
1697
|
+
return ret;
|
|
1698
|
+
};
|
|
1699
|
+
}
|
|
1700
|
+
if (typeof window2.MutationObserver === "function" && window2.document) {
|
|
1701
|
+
try {
|
|
1702
|
+
const obs = new window2.MutationObserver((mutations) => {
|
|
1703
|
+
try {
|
|
1704
|
+
for (const m of mutations) {
|
|
1705
|
+
const added = m?.addedNodes;
|
|
1706
|
+
if (!added) continue;
|
|
1707
|
+
for (const n of Array.from(added)) maybeHandle(n);
|
|
1708
|
+
}
|
|
1709
|
+
} catch {
|
|
1710
|
+
}
|
|
1711
|
+
});
|
|
1712
|
+
obs.observe(window2.document, { childList: true, subtree: true });
|
|
1713
|
+
} catch {
|
|
1714
|
+
}
|
|
1715
|
+
}
|
|
1716
|
+
}
|
|
1717
|
+
}
|
|
1718
|
+
}
|
|
1719
|
+
});
|
|
1720
|
+
const { window } = dom;
|
|
1721
|
+
let readyStateValue = "loading";
|
|
1722
|
+
try {
|
|
1723
|
+
Object.defineProperty(window.document, "readyState", {
|
|
1724
|
+
configurable: true,
|
|
1725
|
+
get() {
|
|
1726
|
+
return readyStateValue;
|
|
1727
|
+
}
|
|
1728
|
+
});
|
|
1729
|
+
} catch {
|
|
1730
|
+
}
|
|
1731
|
+
const inertScriptPlaceholders = Array.from(
|
|
1732
|
+
window.document.querySelectorAll('script[type="application/x-phantom-script"]')
|
|
1733
|
+
);
|
|
1734
|
+
const placeholderById = /* @__PURE__ */ new Map();
|
|
1735
|
+
const orderedScripts = [...this.harvestData.scripts].sort((a, b) => a.order - b.order);
|
|
1736
|
+
orderedScripts.forEach((script, index) => {
|
|
1737
|
+
placeholderById.set(script.id, inertScriptPlaceholders[index] ?? null);
|
|
1738
|
+
});
|
|
1739
|
+
const syncScripts = this.harvestData.scripts.filter((s) => s.execution === "sync");
|
|
1740
|
+
const deferScripts = this.harvestData.scripts.filter((s) => s.execution === "defer");
|
|
1741
|
+
const asyncScripts = this.harvestData.scripts.filter((s) => s.execution === "async");
|
|
1742
|
+
const currentScriptState = { value: null };
|
|
1743
|
+
try {
|
|
1744
|
+
Object.defineProperty(window.document, "currentScript", {
|
|
1745
|
+
configurable: true,
|
|
1746
|
+
get() {
|
|
1747
|
+
return currentScriptState.value;
|
|
1748
|
+
}
|
|
1749
|
+
});
|
|
1750
|
+
} catch {
|
|
1751
|
+
}
|
|
1752
|
+
const shouldSkipScript = (script) => {
|
|
1753
|
+
return shouldSkipScriptAsset(script, this.harvestData.url, this.thirdPartyPolicy);
|
|
1754
|
+
};
|
|
1755
|
+
const executeScript = async (script) => {
|
|
1756
|
+
if (shouldSkipScript(script)) return;
|
|
1757
|
+
if (script.type === "external" && script.url) {
|
|
1758
|
+
if (script.scriptKind === "module" && this.handledModuleScriptUrls.has(script.url)) return;
|
|
1759
|
+
if (script.scriptKind === "classic" && this.handledClassicScriptUrls.has(script.url)) return;
|
|
1760
|
+
}
|
|
1761
|
+
const prevCurrentScript = currentScriptState.value;
|
|
1762
|
+
const scriptEl = placeholderById.get(script.id) || window.document.createElement("script");
|
|
1763
|
+
if (script.url) scriptEl.src = script.url;
|
|
1764
|
+
if (script.scriptKind === "module") scriptEl.type = "module";
|
|
1765
|
+
if (script.execution === "async") scriptEl.async = true;
|
|
1766
|
+
if (script.execution === "defer") scriptEl.defer = true;
|
|
1767
|
+
currentScriptState.value = scriptEl;
|
|
1768
|
+
if (script.scriptKind === "module") {
|
|
1769
|
+
try {
|
|
1770
|
+
if (script.type === "external" && script.url) {
|
|
1771
|
+
this.handledModuleScriptUrls.add(script.url);
|
|
1772
|
+
await this.handleModuleScript(script.url, window);
|
|
1773
|
+
} else {
|
|
1774
|
+
const inlineEntryUrl = new URL(`./__dynafetch_inline_module__/${script.id}.mjs`, this.harvestData.url).toString();
|
|
1775
|
+
await this.handleModuleScript(inlineEntryUrl, window, {
|
|
1776
|
+
inlineSource: script.content,
|
|
1777
|
+
cacheKey: `inline:${script.id}`
|
|
1778
|
+
});
|
|
1779
|
+
}
|
|
1780
|
+
} catch (e) {
|
|
1781
|
+
console.warn(`[Executor] Module script ${script.id} failed:`, e);
|
|
1782
|
+
} finally {
|
|
1783
|
+
currentScriptState.value = prevCurrentScript;
|
|
1784
|
+
}
|
|
1785
|
+
return;
|
|
1786
|
+
}
|
|
1787
|
+
if (script.type === "external" && script.url) {
|
|
1788
|
+
this.handledClassicScriptUrls.add(script.url);
|
|
1789
|
+
}
|
|
1790
|
+
const t0 = Date.now();
|
|
1791
|
+
const code = this.transformer.transform(script.content, script.id);
|
|
1792
|
+
this.timings.transform_ms_total += Date.now() - t0;
|
|
1793
|
+
this.timings.scripts_transformed_count++;
|
|
1794
|
+
try {
|
|
1795
|
+
window.eval(code);
|
|
1796
|
+
} catch (e) {
|
|
1797
|
+
console.warn(`[Executor] Script ${script.id} failed:`, e);
|
|
1798
|
+
} finally {
|
|
1799
|
+
currentScriptState.value = prevCurrentScript;
|
|
1800
|
+
}
|
|
1801
|
+
};
|
|
1802
|
+
for (const s of syncScripts) {
|
|
1803
|
+
await executeScript(s);
|
|
1804
|
+
if (this.matchFound && !this.findAll) break;
|
|
1805
|
+
}
|
|
1806
|
+
{
|
|
1807
|
+
const deferWork = (async () => {
|
|
1808
|
+
for (const s of deferScripts) {
|
|
1809
|
+
await executeScript(s);
|
|
1810
|
+
if (this.matchFound && !this.findAll) break;
|
|
1811
|
+
}
|
|
1812
|
+
})();
|
|
1813
|
+
await Promise.all([deferWork, this.waitForModuleWork(this.moduleWaitMs)]);
|
|
1814
|
+
}
|
|
1815
|
+
readyStateValue = "interactive";
|
|
1816
|
+
window.document.dispatchEvent(new window.Event("readystatechange"));
|
|
1817
|
+
window.document.dispatchEvent(new window.Event("DOMContentLoaded"));
|
|
1818
|
+
if (typeof window.__phantom_markDCLDispatched === "function") {
|
|
1819
|
+
window.__phantom_markDCLDispatched();
|
|
1820
|
+
}
|
|
1821
|
+
window.document.dispatchEvent(new window.Event("visibilitychange"));
|
|
1822
|
+
if (asyncScripts.length > 0) {
|
|
1823
|
+
await Promise.all(asyncScripts.map((s) => executeScript(s).catch(() => {
|
|
1824
|
+
})));
|
|
1825
|
+
}
|
|
1826
|
+
if (this.moduleInFlight.size > 0) {
|
|
1827
|
+
await this.waitForModuleWork(this.moduleWaitMs);
|
|
1828
|
+
}
|
|
1829
|
+
readyStateValue = "complete";
|
|
1830
|
+
window.document.dispatchEvent(new window.Event("readystatechange"));
|
|
1831
|
+
window.dispatchEvent(new window.Event("load"));
|
|
1832
|
+
window.dispatchEvent(new window.Event("pageshow"));
|
|
1833
|
+
window.dispatchEvent(new window.Event("focus"));
|
|
1834
|
+
if (this.moduleInFlight.size > 0) {
|
|
1835
|
+
await this.waitForModuleWork(this.moduleWaitMs);
|
|
1836
|
+
}
|
|
1837
|
+
console.log("[Executor] Waiting for network quiescence...");
|
|
1838
|
+
const quiescenceStart = Date.now();
|
|
1839
|
+
try {
|
|
1840
|
+
await this.waitForQuiescence();
|
|
1841
|
+
} catch (e) {
|
|
1842
|
+
console.warn("[Executor] Quiescence wait failed:", e);
|
|
1843
|
+
}
|
|
1844
|
+
this.timings.quiescence_ms = Date.now() - quiescenceStart;
|
|
1845
|
+
const reason = this.matchFound && !this.findAll ? "(early exit on match)" : "";
|
|
1846
|
+
console.log(`[Executor] Quiescence reached in ${Date.now() - quiescenceStart}ms ${reason}`);
|
|
1847
|
+
const renderedHtml = this.serializeDocument(window);
|
|
1848
|
+
this.windowClosed = true;
|
|
1849
|
+
try {
|
|
1850
|
+
window.close();
|
|
1851
|
+
} catch {
|
|
1852
|
+
}
|
|
1853
|
+
const result = {
|
|
1854
|
+
logs: this.logs,
|
|
1855
|
+
matchedRequests: this.earlyMatches,
|
|
1856
|
+
renderedHtml,
|
|
1857
|
+
timings: { ...this.timings },
|
|
1858
|
+
errors: this.executionErrors.length ? this.executionErrors : void 0
|
|
1859
|
+
};
|
|
1860
|
+
const shutdownGraceMs = this.clampMs(Number(process.env.PHANTOM_SHUTDOWN_GRACE_MS ?? 50), 10, 5e3);
|
|
1861
|
+
await new Promise((r) => setTimeout(r, shutdownGraceMs));
|
|
1862
|
+
return result;
|
|
1863
|
+
} finally {
|
|
1864
|
+
process.off("uncaughtException", onNodeUncaught);
|
|
1865
|
+
process.off("unhandledRejection", onNodeUnhandled);
|
|
1866
|
+
}
|
|
1867
|
+
}
|
|
1868
|
+
serializeDocument(window) {
|
|
1869
|
+
try {
|
|
1870
|
+
const doctype = window.document?.doctype;
|
|
1871
|
+
const serializedDoctype = doctype ? `<!DOCTYPE ${doctype.name}${doctype.publicId ? ` PUBLIC "${doctype.publicId}"` : ""}${doctype.systemId ? ` "${doctype.systemId}"` : ""}>` : "";
|
|
1872
|
+
const html = window.document?.documentElement?.outerHTML ?? "";
|
|
1873
|
+
return `${serializedDoctype}${html}`;
|
|
1874
|
+
} catch {
|
|
1875
|
+
return this.harvestData.html;
|
|
1876
|
+
}
|
|
1877
|
+
}
|
|
1878
|
+
stripExecutableScripts(html) {
|
|
1879
|
+
return html.replace(/<script\b([^>]*)>([\s\S]*?)<\/script>/gim, (match, attrs, content) => {
|
|
1880
|
+
const isDataScript = attrs.includes('type="application/json"') || attrs.includes("type='application/json'") || attrs.includes('type="application/ld+json"') || attrs.includes('id="__ACGH_DATA__"');
|
|
1881
|
+
if (isDataScript) return match;
|
|
1882
|
+
const inertAttrs = attrs.replace(/\btype\s*=\s*(?:"[^"]*"|'[^']*'|[^\s>]+)/i, "").trim();
|
|
1883
|
+
return `<script type="application/x-phantom-script"${inertAttrs ? ` ${inertAttrs}` : ""}></script>`;
|
|
1884
|
+
});
|
|
1885
|
+
}
|
|
1886
|
+
injectPhantom(window) {
|
|
1887
|
+
const that = this;
|
|
1888
|
+
const headersToObject = (h) => {
|
|
1889
|
+
if (!h) return {};
|
|
1890
|
+
if (typeof h.forEach === "function") {
|
|
1891
|
+
const out = {};
|
|
1892
|
+
try {
|
|
1893
|
+
h.forEach((v, k) => {
|
|
1894
|
+
out[String(k)] = String(v);
|
|
1895
|
+
});
|
|
1896
|
+
return out;
|
|
1897
|
+
} catch {
|
|
1898
|
+
}
|
|
1899
|
+
}
|
|
1900
|
+
if (typeof h.entries === "function") {
|
|
1901
|
+
const out = {};
|
|
1902
|
+
try {
|
|
1903
|
+
for (const [k, v] of h.entries()) out[String(k)] = String(v);
|
|
1904
|
+
return out;
|
|
1905
|
+
} catch {
|
|
1906
|
+
}
|
|
1907
|
+
}
|
|
1908
|
+
if (typeof h === "object") {
|
|
1909
|
+
const out = {};
|
|
1910
|
+
for (const [k, v] of Object.entries(h)) {
|
|
1911
|
+
if (v === void 0 || v === null) continue;
|
|
1912
|
+
out[String(k)] = Array.isArray(v) ? String(v[0]) : String(v);
|
|
1913
|
+
}
|
|
1914
|
+
return out;
|
|
1915
|
+
}
|
|
1916
|
+
return {};
|
|
1917
|
+
};
|
|
1918
|
+
const normalizeFetchInput = async (input, opts) => {
|
|
1919
|
+
let url = input;
|
|
1920
|
+
let method = opts?.method || "GET";
|
|
1921
|
+
let headers = headersToObject(opts?.headers);
|
|
1922
|
+
let body = opts?.body;
|
|
1923
|
+
const isRequestLike = input && typeof input === "object" && (typeof input.url === "string" || typeof input.href === "string") && (typeof input.method === "string" || typeof input.headers === "object" || typeof input.headers?.forEach === "function");
|
|
1924
|
+
if (isRequestLike) {
|
|
1925
|
+
url = input.url || input.href;
|
|
1926
|
+
method = opts?.method || input.method || method;
|
|
1927
|
+
headers = { ...headersToObject(input.headers), ...headers };
|
|
1928
|
+
if (body === void 0) {
|
|
1929
|
+
try {
|
|
1930
|
+
if (typeof input.text === "function") body = await input.text();
|
|
1931
|
+
} catch {
|
|
1932
|
+
}
|
|
1933
|
+
}
|
|
1934
|
+
}
|
|
1935
|
+
const fullUrl = new URL(String(url), window.location.href).toString();
|
|
1936
|
+
return { url: fullUrl, method, headers, body };
|
|
1937
|
+
};
|
|
1938
|
+
const toSafeResponseHeaders = (rawHeaders) => {
|
|
1939
|
+
const H = window.Headers || global.Headers;
|
|
1940
|
+
if (typeof H !== "function") return rawHeaders ?? {};
|
|
1941
|
+
const safe = new H();
|
|
1942
|
+
if (!rawHeaders) return safe;
|
|
1943
|
+
for (const [k, v] of Object.entries(rawHeaders)) {
|
|
1944
|
+
if (v == null) continue;
|
|
1945
|
+
const value = String(v).replace(/[\r\n]+/g, ", ").trim();
|
|
1946
|
+
if (!value) continue;
|
|
1947
|
+
try {
|
|
1948
|
+
safe.set(k, value);
|
|
1949
|
+
} catch {
|
|
1950
|
+
}
|
|
1951
|
+
}
|
|
1952
|
+
return safe;
|
|
1953
|
+
};
|
|
1954
|
+
window.__phantom = {
|
|
1955
|
+
fetch: async (input, opts = {}) => {
|
|
1956
|
+
const norm = await normalizeFetchInput(input, opts);
|
|
1957
|
+
const start = Date.now();
|
|
1958
|
+
const headers = {
|
|
1959
|
+
"User-Agent": window.navigator.userAgent,
|
|
1960
|
+
"Cookie": window.document.cookie,
|
|
1961
|
+
"Referer": window.location.href,
|
|
1962
|
+
...norm.headers
|
|
1963
|
+
};
|
|
1964
|
+
const logEntry = {
|
|
1965
|
+
type: "fetch",
|
|
1966
|
+
method: norm.method || "GET",
|
|
1967
|
+
url: norm.url,
|
|
1968
|
+
requestHeaders: headers,
|
|
1969
|
+
requestBody: norm.body,
|
|
1970
|
+
timestamp: start
|
|
1971
|
+
};
|
|
1972
|
+
this.logRequest(logEntry);
|
|
1973
|
+
const pendingId = that.trackRequestStart(norm.url, "fetch");
|
|
1974
|
+
try {
|
|
1975
|
+
const bodyStr = norm.body == null ? "" : typeof norm.body === "string" ? norm.body : String(norm.body);
|
|
1976
|
+
const proxyResp = await that.fetchViaProxy(norm.url, norm.method || "GET", headers, bodyStr);
|
|
1977
|
+
if (proxyResp.error) throw new Error(proxyResp.error);
|
|
1978
|
+
logEntry.status = proxyResp.status;
|
|
1979
|
+
logEntry.responseBody = proxyResp.body;
|
|
1980
|
+
logEntry.responseHeaders = proxyResp.headers;
|
|
1981
|
+
this.checkForMatch(logEntry);
|
|
1982
|
+
return new (window.Response || global.Response)(proxyResp.body, {
|
|
1983
|
+
status: proxyResp.status,
|
|
1984
|
+
headers: toSafeResponseHeaders(proxyResp.headers)
|
|
1985
|
+
});
|
|
1986
|
+
} catch (e) {
|
|
1987
|
+
logEntry.status = 0;
|
|
1988
|
+
logEntry.responseBody = e.message;
|
|
1989
|
+
this.checkForMatch(logEntry);
|
|
1990
|
+
throw e;
|
|
1991
|
+
} finally {
|
|
1992
|
+
that.trackRequestEnd(pendingId);
|
|
1993
|
+
}
|
|
1994
|
+
},
|
|
1995
|
+
WebSocket: class extends WebSocket {
|
|
1996
|
+
constructor(url, protocols) {
|
|
1997
|
+
super(url, protocols);
|
|
1998
|
+
that.logRequest({ type: "websocket", url, timestamp: Date.now() });
|
|
1999
|
+
}
|
|
2000
|
+
},
|
|
2001
|
+
EventSource: class {
|
|
2002
|
+
constructor(url) {
|
|
2003
|
+
this.readyState = 0;
|
|
2004
|
+
// 0=CONNECTING, 1=OPEN, 2=CLOSED
|
|
2005
|
+
this.onopen = null;
|
|
2006
|
+
this.onmessage = null;
|
|
2007
|
+
this.onerror = null;
|
|
2008
|
+
this.listeners = /* @__PURE__ */ new Map();
|
|
2009
|
+
this.url = new URL(String(url), window.location.href).toString();
|
|
2010
|
+
that.logRequest({ type: "eventsource", url: this.url, timestamp: Date.now() });
|
|
2011
|
+
this.readyState = 1;
|
|
2012
|
+
window.setTimeout(() => {
|
|
2013
|
+
try {
|
|
2014
|
+
this.onopen?.({ type: "open" });
|
|
2015
|
+
this.listeners.get("open")?.forEach((fn) => fn({ type: "open" }));
|
|
2016
|
+
} catch (e) {
|
|
2017
|
+
that.recordExecutionError(e, "error");
|
|
2018
|
+
}
|
|
2019
|
+
}, 0);
|
|
2020
|
+
}
|
|
2021
|
+
addEventListener(type, cb) {
|
|
2022
|
+
if (!this.listeners.has(type)) this.listeners.set(type, /* @__PURE__ */ new Set());
|
|
2023
|
+
this.listeners.get(type).add(cb);
|
|
2024
|
+
}
|
|
2025
|
+
removeEventListener(type, cb) {
|
|
2026
|
+
this.listeners.get(type)?.delete(cb);
|
|
2027
|
+
}
|
|
2028
|
+
close() {
|
|
2029
|
+
this.readyState = 2;
|
|
2030
|
+
}
|
|
2031
|
+
},
|
|
2032
|
+
dynamicImport: async (url) => {
|
|
2033
|
+
const fullUrl = new URL(url, window.location.href).toString();
|
|
2034
|
+
const start = Date.now();
|
|
2035
|
+
const logEntry = {
|
|
2036
|
+
type: "dynamic_import",
|
|
2037
|
+
url: fullUrl,
|
|
2038
|
+
timestamp: start,
|
|
2039
|
+
initiator: "dynamicImport"
|
|
2040
|
+
};
|
|
2041
|
+
this.logRequest(logEntry);
|
|
2042
|
+
const pendingId = that.trackRequestStart(fullUrl, "dynamic_import");
|
|
2043
|
+
try {
|
|
2044
|
+
let code = that.scriptCache.get(fullUrl);
|
|
2045
|
+
let responseHeaders;
|
|
2046
|
+
let status = 200;
|
|
2047
|
+
if (code == null) {
|
|
2048
|
+
const proxyResp = await that.fetchViaProxy(fullUrl, "GET", {
|
|
2049
|
+
"User-Agent": window.navigator.userAgent,
|
|
2050
|
+
"Cookie": window.document.cookie,
|
|
2051
|
+
"Referer": window.location.href
|
|
2052
|
+
}, "", "assets");
|
|
2053
|
+
responseHeaders = proxyResp.headers;
|
|
2054
|
+
status = proxyResp.status;
|
|
2055
|
+
if (proxyResp.error || proxyResp.status >= 400) {
|
|
2056
|
+
logEntry.status = status;
|
|
2057
|
+
logEntry.responseHeaders = responseHeaders;
|
|
2058
|
+
logEntry.responseBody = null;
|
|
2059
|
+
this.checkForMatch(logEntry);
|
|
2060
|
+
throw new Error(proxyResp.error || `Status ${proxyResp.status}`);
|
|
2061
|
+
}
|
|
2062
|
+
code = proxyResp.body;
|
|
2063
|
+
that.scriptCache.set(fullUrl, code);
|
|
2064
|
+
}
|
|
2065
|
+
logEntry.status = status;
|
|
2066
|
+
logEntry.responseHeaders = responseHeaders;
|
|
2067
|
+
logEntry.responseBody = code;
|
|
2068
|
+
this.checkForMatch(logEntry);
|
|
2069
|
+
that.handledModuleScriptUrls.add(fullUrl);
|
|
2070
|
+
await that.handleModuleScript(fullUrl, window, {
|
|
2071
|
+
cacheKey: `dynamic:${fullUrl}`
|
|
2072
|
+
});
|
|
2073
|
+
return {};
|
|
2074
|
+
} finally {
|
|
2075
|
+
that.trackRequestEnd(pendingId);
|
|
2076
|
+
}
|
|
2077
|
+
},
|
|
2078
|
+
XMLHttpRequest: class {
|
|
2079
|
+
constructor() {
|
|
2080
|
+
this.readyState = 0;
|
|
2081
|
+
this.status = 0;
|
|
2082
|
+
this.statusText = "";
|
|
2083
|
+
this.responseText = "";
|
|
2084
|
+
this.response = null;
|
|
2085
|
+
this.responseType = "";
|
|
2086
|
+
this.responseURL = "";
|
|
2087
|
+
this.timeout = 0;
|
|
2088
|
+
this.withCredentials = false;
|
|
2089
|
+
this.onreadystatechange = null;
|
|
2090
|
+
this.onload = null;
|
|
2091
|
+
this.onloadend = null;
|
|
2092
|
+
this.onerror = null;
|
|
2093
|
+
this.onprogress = null;
|
|
2094
|
+
this.onabort = null;
|
|
2095
|
+
this.ontimeout = null;
|
|
2096
|
+
this.method = "GET";
|
|
2097
|
+
this.url = "";
|
|
2098
|
+
this.asyncFlag = true;
|
|
2099
|
+
this.requestHeaders = {};
|
|
2100
|
+
this.responseHeaders = {};
|
|
2101
|
+
this.aborted = false;
|
|
2102
|
+
}
|
|
2103
|
+
open(method, url, async = true) {
|
|
2104
|
+
this.method = String(method || "GET").toUpperCase();
|
|
2105
|
+
this.url = new URL(String(url), window.location.href).toString();
|
|
2106
|
+
this.asyncFlag = async !== false;
|
|
2107
|
+
this.aborted = false;
|
|
2108
|
+
if (process.env.PHANTOM_DEBUG_XHR === "1") {
|
|
2109
|
+
console.log("[XHR open]", this.method, this.url);
|
|
2110
|
+
}
|
|
2111
|
+
this.readyState = 1;
|
|
2112
|
+
this.responseURL = this.url;
|
|
2113
|
+
this.onreadystatechange?.({ type: "readystatechange" });
|
|
2114
|
+
}
|
|
2115
|
+
setRequestHeader(k, v) {
|
|
2116
|
+
this.requestHeaders[String(k)] = String(v);
|
|
2117
|
+
}
|
|
2118
|
+
addEventListener(type, cb) {
|
|
2119
|
+
if (type === "load") this.onload = cb;
|
|
2120
|
+
if (type === "error") this.onerror = cb;
|
|
2121
|
+
if (type === "readystatechange") this.onreadystatechange = cb;
|
|
2122
|
+
if (type === "progress") this.onprogress = cb;
|
|
2123
|
+
if (type === "loadend") this.onloadend = cb;
|
|
2124
|
+
if (type === "abort") this.onabort = cb;
|
|
2125
|
+
if (type === "timeout") this.ontimeout = cb;
|
|
2126
|
+
}
|
|
2127
|
+
getAllResponseHeaders() {
|
|
2128
|
+
return Object.entries(this.responseHeaders).map(([k, v]) => `${k}: ${v}\r
|
|
2129
|
+
`).join("");
|
|
2130
|
+
}
|
|
2131
|
+
getResponseHeader(name) {
|
|
2132
|
+
const n = String(name || "").toLowerCase();
|
|
2133
|
+
for (const [k, v] of Object.entries(this.responseHeaders)) {
|
|
2134
|
+
if (k.toLowerCase() === n) return v;
|
|
2135
|
+
}
|
|
2136
|
+
return null;
|
|
2137
|
+
}
|
|
2138
|
+
abort() {
|
|
2139
|
+
this.aborted = true;
|
|
2140
|
+
this.readyState = 0;
|
|
2141
|
+
this.onabort?.({ type: "abort" });
|
|
2142
|
+
this.onloadend?.({ type: "loadend" });
|
|
2143
|
+
}
|
|
2144
|
+
async send(body) {
|
|
2145
|
+
const start = Date.now();
|
|
2146
|
+
const headers = {
|
|
2147
|
+
"User-Agent": window.navigator.userAgent,
|
|
2148
|
+
"Cookie": window.document.cookie,
|
|
2149
|
+
"Referer": window.location.href,
|
|
2150
|
+
...this.requestHeaders
|
|
2151
|
+
};
|
|
2152
|
+
const logEntry = {
|
|
2153
|
+
type: "xhr",
|
|
2154
|
+
method: this.method,
|
|
2155
|
+
url: this.url,
|
|
2156
|
+
requestHeaders: headers,
|
|
2157
|
+
requestBody: body == null ? null : typeof body === "string" ? body : String(body),
|
|
2158
|
+
timestamp: start
|
|
2159
|
+
};
|
|
2160
|
+
that.logRequest(logEntry);
|
|
2161
|
+
if (process.env.PHANTOM_DEBUG_XHR === "1") {
|
|
2162
|
+
console.log("[XHR send]", this.method, this.url, {
|
|
2163
|
+
hasBody: body != null,
|
|
2164
|
+
headers
|
|
2165
|
+
});
|
|
2166
|
+
}
|
|
2167
|
+
const doWork = async () => {
|
|
2168
|
+
const pendingId = that.trackRequestStart(this.url, "xhr");
|
|
2169
|
+
try {
|
|
2170
|
+
if (this.aborted) return;
|
|
2171
|
+
const proxyResp = await that.fetchViaProxy(this.url, this.method, headers, logEntry.requestBody || "");
|
|
2172
|
+
if (proxyResp.error) throw new Error(proxyResp.error);
|
|
2173
|
+
this.responseHeaders = proxyResp.headers || {};
|
|
2174
|
+
this.status = proxyResp.status;
|
|
2175
|
+
this.statusText = proxyResp.status >= 200 && proxyResp.status < 300 ? "OK" : "";
|
|
2176
|
+
this.readyState = 2;
|
|
2177
|
+
this.onreadystatechange?.({ type: "readystatechange" });
|
|
2178
|
+
this.responseText = proxyResp.body ?? "";
|
|
2179
|
+
this.readyState = 3;
|
|
2180
|
+
this.onprogress?.({ type: "progress", loaded: this.responseText.length, total: this.responseText.length, lengthComputable: true });
|
|
2181
|
+
this.onreadystatechange?.({ type: "readystatechange" });
|
|
2182
|
+
this.readyState = 4;
|
|
2183
|
+
this.response = this.responseType === "json" ? (() => {
|
|
2184
|
+
try {
|
|
2185
|
+
return JSON.parse(this.responseText);
|
|
2186
|
+
} catch {
|
|
2187
|
+
return null;
|
|
2188
|
+
}
|
|
2189
|
+
})() : this.responseText;
|
|
2190
|
+
logEntry.status = proxyResp.status;
|
|
2191
|
+
logEntry.responseHeaders = proxyResp.headers;
|
|
2192
|
+
logEntry.responseBody = proxyResp.body;
|
|
2193
|
+
that.checkForMatch(logEntry);
|
|
2194
|
+
this.onreadystatechange?.({ type: "readystatechange" });
|
|
2195
|
+
this.onload?.({ type: "load" });
|
|
2196
|
+
this.onloadend?.({ type: "loadend" });
|
|
2197
|
+
} catch (e) {
|
|
2198
|
+
this.readyState = 4;
|
|
2199
|
+
this.status = 0;
|
|
2200
|
+
logEntry.status = 0;
|
|
2201
|
+
logEntry.responseBody = e?.message || String(e);
|
|
2202
|
+
that.checkForMatch(logEntry);
|
|
2203
|
+
this.onreadystatechange?.({ type: "readystatechange" });
|
|
2204
|
+
this.onerror?.({ type: "error", error: e });
|
|
2205
|
+
this.onloadend?.({ type: "loadend" });
|
|
2206
|
+
} finally {
|
|
2207
|
+
that.trackRequestEnd(pendingId);
|
|
2208
|
+
}
|
|
2209
|
+
};
|
|
2210
|
+
if (this.asyncFlag) {
|
|
2211
|
+
window.setTimeout(() => {
|
|
2212
|
+
void doWork();
|
|
2213
|
+
}, 0);
|
|
2214
|
+
} else {
|
|
2215
|
+
await doWork();
|
|
2216
|
+
}
|
|
2217
|
+
}
|
|
2218
|
+
}
|
|
2219
|
+
};
|
|
2220
|
+
window.XMLHttpRequest = window.__phantom.XMLHttpRequest;
|
|
2221
|
+
window.EventSource = window.__phantom.EventSource;
|
|
2222
|
+
window.WebSocket = window.__phantom.WebSocket;
|
|
2223
|
+
}
|
|
2224
|
+
};
|
|
2225
|
+
|
|
2226
|
+
// ../../src/phantom/harvest.ts
|
|
2227
|
+
import * as cheerio from "cheerio";
|
|
2228
|
+
|
|
2229
|
+
// ../../src/phantom/errors.ts
|
|
2230
|
+
var BlockedByBotProtectionError = class extends Error {
|
|
2231
|
+
constructor(url, message) {
|
|
2232
|
+
super(message || `Blocked by bot protection while fetching: ${url}`);
|
|
2233
|
+
this.kind = "blocked";
|
|
2234
|
+
this.url = url;
|
|
2235
|
+
this.name = "BlockedByBotProtectionError";
|
|
2236
|
+
}
|
|
2237
|
+
};
|
|
2238
|
+
|
|
2239
|
+
// ../../src/phantom/module-prefetch.ts
|
|
2240
|
+
var STUB_EXTENSIONS = /* @__PURE__ */ new Set([
|
|
2241
|
+
".css",
|
|
2242
|
+
".png",
|
|
2243
|
+
".jpg",
|
|
2244
|
+
".jpeg",
|
|
2245
|
+
".gif",
|
|
2246
|
+
".webp",
|
|
2247
|
+
".avif",
|
|
2248
|
+
".svg",
|
|
2249
|
+
".ico",
|
|
2250
|
+
".woff",
|
|
2251
|
+
".woff2",
|
|
2252
|
+
".ttf",
|
|
2253
|
+
".otf",
|
|
2254
|
+
".eot",
|
|
2255
|
+
".mp3",
|
|
2256
|
+
".mp4",
|
|
2257
|
+
".webm",
|
|
2258
|
+
".mov",
|
|
2259
|
+
".wasm"
|
|
2260
|
+
]);
|
|
2261
|
+
function isStubAsset(url) {
|
|
2262
|
+
try {
|
|
2263
|
+
const p = new URL(url).pathname.toLowerCase();
|
|
2264
|
+
const ext = p.slice(p.lastIndexOf("."));
|
|
2265
|
+
return STUB_EXTENSIONS.has(ext);
|
|
2266
|
+
} catch {
|
|
2267
|
+
const p = url.split("?")[0].split("#")[0].toLowerCase();
|
|
2268
|
+
const ext = p.slice(p.lastIndexOf("."));
|
|
2269
|
+
return STUB_EXTENSIONS.has(ext);
|
|
2270
|
+
}
|
|
2271
|
+
}
|
|
2272
|
+
function scanImports(code, baseUrl, originFallback) {
|
|
2273
|
+
const deps = [];
|
|
2274
|
+
const importRe = /(?:import|export)\s*.*?from\s*['"]([^'"]+)['"]|import\s*\(\s*['"]([^'"]+)['"]\s*\)/g;
|
|
2275
|
+
let m;
|
|
2276
|
+
while ((m = importRe.exec(code)) !== null) {
|
|
2277
|
+
const specifier = m[1] || m[2];
|
|
2278
|
+
if (!specifier) continue;
|
|
2279
|
+
let resolved;
|
|
2280
|
+
try {
|
|
2281
|
+
if (specifier.startsWith("http://") || specifier.startsWith("https://")) {
|
|
2282
|
+
resolved = specifier;
|
|
2283
|
+
} else if (specifier.startsWith("/")) {
|
|
2284
|
+
resolved = new URL(specifier, originFallback).toString();
|
|
2285
|
+
} else {
|
|
2286
|
+
resolved = new URL(specifier, baseUrl).toString();
|
|
2287
|
+
}
|
|
2288
|
+
} catch {
|
|
2289
|
+
continue;
|
|
2290
|
+
}
|
|
2291
|
+
if (!isStubAsset(resolved)) {
|
|
2292
|
+
deps.push(resolved);
|
|
2293
|
+
}
|
|
2294
|
+
}
|
|
2295
|
+
return deps;
|
|
2296
|
+
}
|
|
2297
|
+
async function prefetchModuleGraph(rootUrls, cache, pageUrl, opts) {
|
|
2298
|
+
const maxRounds = opts?.maxRounds ?? 8;
|
|
2299
|
+
const { headers: subHeaders } = chromeSubresourceHeaders(pageUrl);
|
|
2300
|
+
subHeaders["sec-fetch-dest"] = "script";
|
|
2301
|
+
subHeaders["sec-fetch-mode"] = "cors";
|
|
2302
|
+
subHeaders["sec-fetch-site"] = "same-origin";
|
|
2303
|
+
let origin;
|
|
2304
|
+
try {
|
|
2305
|
+
origin = new URL(pageUrl).origin;
|
|
2306
|
+
} catch {
|
|
2307
|
+
origin = pageUrl;
|
|
2308
|
+
}
|
|
2309
|
+
const seen = /* @__PURE__ */ new Set();
|
|
2310
|
+
let toFetch = [];
|
|
2311
|
+
let totalFetched = 0;
|
|
2312
|
+
const toScan = [...rootUrls];
|
|
2313
|
+
while (toScan.length > 0) {
|
|
2314
|
+
const url = toScan.pop();
|
|
2315
|
+
if (seen.has(url)) continue;
|
|
2316
|
+
seen.add(url);
|
|
2317
|
+
const code = cache.get(url);
|
|
2318
|
+
if (!code) {
|
|
2319
|
+
toFetch.push(url);
|
|
2320
|
+
continue;
|
|
2321
|
+
}
|
|
2322
|
+
const deps = scanImports(code, url, origin);
|
|
2323
|
+
for (const d of deps) {
|
|
2324
|
+
if (!seen.has(d)) toScan.push(d);
|
|
2325
|
+
}
|
|
2326
|
+
}
|
|
2327
|
+
for (let round = 0; round < maxRounds && toFetch.length > 0; round++) {
|
|
2328
|
+
if (process.env.PHANTOM_DEBUG_MODULES === "1") {
|
|
2329
|
+
console.log(`[prefetch] Round ${round}: ${toFetch.length} modules`);
|
|
2330
|
+
}
|
|
2331
|
+
const payloads = toFetch.map((u) => ({
|
|
2332
|
+
method: "GET",
|
|
2333
|
+
url: u,
|
|
2334
|
+
headers: { ...subHeaders },
|
|
2335
|
+
headerOrder: Object.keys(subHeaders),
|
|
2336
|
+
body: "",
|
|
2337
|
+
proxy: opts?.proxyUrl
|
|
2338
|
+
}));
|
|
2339
|
+
const responses = await phantomBatchFetch(payloads);
|
|
2340
|
+
const newToScan = [];
|
|
2341
|
+
for (let i = 0; i < toFetch.length; i++) {
|
|
2342
|
+
const u = toFetch[i];
|
|
2343
|
+
const r = responses[i];
|
|
2344
|
+
if (r.status < 400 && r.body) {
|
|
2345
|
+
cache.set(u, r.body);
|
|
2346
|
+
totalFetched++;
|
|
2347
|
+
newToScan.push(u);
|
|
2348
|
+
}
|
|
2349
|
+
}
|
|
2350
|
+
toFetch = [];
|
|
2351
|
+
for (const url of newToScan) {
|
|
2352
|
+
const code = cache.get(url);
|
|
2353
|
+
if (!code) continue;
|
|
2354
|
+
const deps = scanImports(code, url, origin);
|
|
2355
|
+
for (const d of deps) {
|
|
2356
|
+
if (!seen.has(d)) {
|
|
2357
|
+
seen.add(d);
|
|
2358
|
+
if (cache.has(d)) {
|
|
2359
|
+
newToScan.push(d);
|
|
2360
|
+
} else {
|
|
2361
|
+
toFetch.push(d);
|
|
2362
|
+
}
|
|
2363
|
+
}
|
|
2364
|
+
}
|
|
2365
|
+
}
|
|
2366
|
+
}
|
|
2367
|
+
return totalFetched;
|
|
2368
|
+
}
|
|
2369
|
+
|
|
2370
|
+
// ../../src/phantom/harvest.ts
|
|
2371
|
+
var { headers: DEFAULT_HEADERS, order: DEFAULT_HEADER_ORDER } = chromeDocumentHeaders();
|
|
2372
|
+
var Harvester = class {
|
|
2373
|
+
constructor(url, opts = {}) {
|
|
2374
|
+
this.cookies = [];
|
|
2375
|
+
this.logs = [];
|
|
2376
|
+
this.prefetchExternalScripts = true;
|
|
2377
|
+
this.externalScriptConcurrency = 8;
|
|
2378
|
+
this.requestHeaders = {};
|
|
2379
|
+
this.thirdPartyPolicy = "skip-noncritical";
|
|
2380
|
+
this.prefetchModulePreloads = true;
|
|
2381
|
+
this.targetUrl = url;
|
|
2382
|
+
this.prefetchExternalScripts = opts.prefetchExternalScripts !== false;
|
|
2383
|
+
this.externalScriptConcurrency = opts.externalScriptConcurrency ?? 8;
|
|
2384
|
+
this.requestHeaders = opts.requestHeaders ?? {};
|
|
2385
|
+
this.cookies = [...opts.initialCookies ?? []];
|
|
2386
|
+
this.thirdPartyPolicy = opts.thirdPartyPolicy ?? "skip-noncritical";
|
|
2387
|
+
this.prefetchModulePreloads = opts.prefetchModulePreloads !== false;
|
|
2388
|
+
this.proxy = opts.proxy;
|
|
2389
|
+
}
|
|
2390
|
+
proxyUrlForScope(scope) {
|
|
2391
|
+
if (!this.proxy) return void 0;
|
|
2392
|
+
return this.proxy.scopes.has(scope) ? this.proxy.url : void 0;
|
|
2393
|
+
}
|
|
2394
|
+
buildCookieHeader() {
|
|
2395
|
+
const pairs = [];
|
|
2396
|
+
for (const raw of this.cookies) {
|
|
2397
|
+
for (const single of raw.split("\n")) {
|
|
2398
|
+
const nameVal = single.split(";")[0]?.trim();
|
|
2399
|
+
if (nameVal && nameVal.includes("=")) pairs.push(nameVal);
|
|
2400
|
+
}
|
|
2401
|
+
}
|
|
2402
|
+
return pairs.join("; ");
|
|
2403
|
+
}
|
|
2404
|
+
async fetchViaProxy(url, headers = {}, followRedirects = false, maxRedirects = 5, method = "GET", body = "", proxyScope = "page") {
|
|
2405
|
+
let currentUrl = url;
|
|
2406
|
+
let redirectCount = 0;
|
|
2407
|
+
let currentMethod = method;
|
|
2408
|
+
let currentBody = body;
|
|
2409
|
+
while (true) {
|
|
2410
|
+
try {
|
|
2411
|
+
const cookieHeader = this.buildCookieHeader();
|
|
2412
|
+
const reqHeaders = { ...headers };
|
|
2413
|
+
if (cookieHeader) {
|
|
2414
|
+
reqHeaders["Cookie"] = cookieHeader;
|
|
2415
|
+
}
|
|
2416
|
+
const payload = {
|
|
2417
|
+
method: currentMethod,
|
|
2418
|
+
url: currentUrl,
|
|
2419
|
+
headers: reqHeaders,
|
|
2420
|
+
headerOrder: DEFAULT_HEADER_ORDER,
|
|
2421
|
+
body: currentBody,
|
|
2422
|
+
proxy: this.proxyUrlForScope(proxyScope)
|
|
2423
|
+
};
|
|
2424
|
+
const data = await phantomFetch(payload);
|
|
2425
|
+
if (data.error) throw new Error(`Proxy Error: ${data.error}`);
|
|
2426
|
+
const setCookie = data.headers["Set-Cookie"] || data.headers["set-cookie"];
|
|
2427
|
+
if (setCookie) this.cookies.push(setCookie);
|
|
2428
|
+
if (followRedirects && data.status >= 300 && data.status < 400 && redirectCount < maxRedirects) {
|
|
2429
|
+
const location = data.headers["Location"] || data.headers["location"];
|
|
2430
|
+
if (location) {
|
|
2431
|
+
currentUrl = new URL(location, currentUrl).toString();
|
|
2432
|
+
console.log(`[Harvest] Following redirect to: ${currentUrl}`);
|
|
2433
|
+
redirectCount++;
|
|
2434
|
+
if (data.status === 302 || data.status === 303) {
|
|
2435
|
+
currentMethod = "GET";
|
|
2436
|
+
currentBody = "";
|
|
2437
|
+
}
|
|
2438
|
+
continue;
|
|
2439
|
+
}
|
|
2440
|
+
}
|
|
2441
|
+
return { ...data, finalUrl: currentUrl };
|
|
2442
|
+
} catch (e) {
|
|
2443
|
+
console.error(`[Harvester] Proxy request failed for ${currentUrl}:`, e);
|
|
2444
|
+
throw e;
|
|
2445
|
+
}
|
|
2446
|
+
}
|
|
2447
|
+
}
|
|
2448
|
+
isConsentWall(url, html) {
|
|
2449
|
+
const consentDomains = ["consent.yahoo.com", "guce.yahoo.com", "consent.google.com", "consent.youtube.com"];
|
|
2450
|
+
try {
|
|
2451
|
+
const hostname = new URL(url).hostname;
|
|
2452
|
+
if (consentDomains.some((d) => hostname.includes(d))) return true;
|
|
2453
|
+
} catch {
|
|
2454
|
+
}
|
|
2455
|
+
const $ = cheerio.load(html);
|
|
2456
|
+
const hasConsentForm = $("form.consent-form").length > 0 || $("form").filter((_, el) => {
|
|
2457
|
+
const $f = $(el);
|
|
2458
|
+
return $f.find('input[name="csrfToken"]').length > 0 && $f.find('input[name="sessionId"]').length > 0;
|
|
2459
|
+
}).length > 0;
|
|
2460
|
+
return hasConsentForm;
|
|
2461
|
+
}
|
|
2462
|
+
parseConsentForm(html, baseUrl) {
|
|
2463
|
+
try {
|
|
2464
|
+
const $ = cheerio.load(html);
|
|
2465
|
+
let $form = $("form").filter((_, el) => {
|
|
2466
|
+
const text = $(el).text().toLowerCase();
|
|
2467
|
+
return text.includes("agree") || text.includes("accept") || text.includes("consent");
|
|
2468
|
+
}).first();
|
|
2469
|
+
if (!$form.length) $form = $("form").first();
|
|
2470
|
+
if (!$form.length) return null;
|
|
2471
|
+
const action = $form.attr("action");
|
|
2472
|
+
const absoluteAction = action === void 0 || action === null ? null : action === "" ? baseUrl : new URL(action, baseUrl).toString();
|
|
2473
|
+
if (!absoluteAction) return null;
|
|
2474
|
+
const fields = {};
|
|
2475
|
+
$form.find('input[type="hidden"]').each((_, el) => {
|
|
2476
|
+
const name = $(el).attr("name");
|
|
2477
|
+
const value = $(el).attr("value") ?? "";
|
|
2478
|
+
if (name) fields[name] = value;
|
|
2479
|
+
});
|
|
2480
|
+
const agreeBtn = $form.find('button[name], input[type="submit"][name]').filter((_, el) => {
|
|
2481
|
+
const text = ($(el).text() + " " + ($(el).attr("value") ?? "")).toLowerCase();
|
|
2482
|
+
return text.includes("agree") || text.includes("accept") || text.includes("consent");
|
|
2483
|
+
}).first();
|
|
2484
|
+
if (agreeBtn.length) {
|
|
2485
|
+
const name = agreeBtn.attr("name");
|
|
2486
|
+
const value = agreeBtn.attr("value") ?? "agree";
|
|
2487
|
+
if (name) fields[name] = value;
|
|
2488
|
+
} else {
|
|
2489
|
+
fields["agree"] = "agree";
|
|
2490
|
+
}
|
|
2491
|
+
return { action: absoluteAction, fields };
|
|
2492
|
+
} catch {
|
|
2493
|
+
return null;
|
|
2494
|
+
}
|
|
2495
|
+
}
|
|
2496
|
+
looksBlocked(status, body) {
|
|
2497
|
+
if (status !== 403 && status !== 429 && status !== 503 && status !== 999) return false;
|
|
2498
|
+
const b = (body || "").toLowerCase();
|
|
2499
|
+
if (status === 999) return true;
|
|
2500
|
+
return b.includes("just a moment") || b.includes("challenge-platform") || b.includes("__cf_chl") || b.includes("cf-browser-verification") || b.includes("enable javascript and cookies to continue") || b.includes("security verification") || b.includes("captcha") || b.includes("trkcode=") || b.includes("trkinfo=");
|
|
2501
|
+
}
|
|
2502
|
+
async harvest() {
|
|
2503
|
+
console.log(`[Harvest] Fetching ${this.targetUrl} via TLS Proxy...`);
|
|
2504
|
+
let response = await this.fetchViaProxy(this.targetUrl, { ...DEFAULT_HEADERS, ...this.requestHeaders }, true);
|
|
2505
|
+
if (response.status >= 400) {
|
|
2506
|
+
console.log(`[Harvest] Response Body on Error:`, response.body.substring(0, 500));
|
|
2507
|
+
if (this.looksBlocked(response.status, response.body || "")) {
|
|
2508
|
+
throw new BlockedByBotProtectionError(
|
|
2509
|
+
this.targetUrl,
|
|
2510
|
+
`Site is blocked by bot protection (HTTP ${response.status}) and cannot be fetched from this environment: ${this.targetUrl}`
|
|
2511
|
+
);
|
|
2512
|
+
}
|
|
2513
|
+
throw new Error(`Failed to fetch ${this.targetUrl}: ${response.status}`);
|
|
2514
|
+
}
|
|
2515
|
+
let finalUrl = response.finalUrl;
|
|
2516
|
+
let html = response.body;
|
|
2517
|
+
if (this.isConsentWall(finalUrl, html)) {
|
|
2518
|
+
console.log(`[Harvest] Consent wall detected at ${finalUrl}, attempting bypass...`);
|
|
2519
|
+
const form = this.parseConsentForm(html, finalUrl);
|
|
2520
|
+
if (form) {
|
|
2521
|
+
try {
|
|
2522
|
+
const formBody = new URLSearchParams(form.fields).toString();
|
|
2523
|
+
const postHeaders = {
|
|
2524
|
+
...DEFAULT_HEADERS,
|
|
2525
|
+
...this.requestHeaders,
|
|
2526
|
+
"Content-Type": "application/x-www-form-urlencoded",
|
|
2527
|
+
"Referer": finalUrl,
|
|
2528
|
+
"Origin": new URL(finalUrl).origin
|
|
2529
|
+
};
|
|
2530
|
+
const consentResp = await this.fetchViaProxy(form.action, postHeaders, true, 10, "POST", formBody);
|
|
2531
|
+
if (consentResp.status < 400) {
|
|
2532
|
+
console.log(`[Harvest] Consent POST succeeded (${consentResp.status}), final URL: ${consentResp.finalUrl}`);
|
|
2533
|
+
if (!this.isConsentWall(consentResp.finalUrl, consentResp.body)) {
|
|
2534
|
+
response = consentResp;
|
|
2535
|
+
finalUrl = consentResp.finalUrl;
|
|
2536
|
+
html = consentResp.body;
|
|
2537
|
+
console.log(`[Harvest] Consent bypass successful (from redirect), got real page at ${finalUrl}`);
|
|
2538
|
+
} else {
|
|
2539
|
+
console.log(`[Harvest] Consent redirect still on consent page, re-fetching original URL...`);
|
|
2540
|
+
const retryResp = await this.fetchViaProxy(this.targetUrl, { ...DEFAULT_HEADERS, ...this.requestHeaders }, true);
|
|
2541
|
+
if (retryResp.status < 400 && !this.isConsentWall(retryResp.finalUrl, retryResp.body)) {
|
|
2542
|
+
response = retryResp;
|
|
2543
|
+
finalUrl = retryResp.finalUrl;
|
|
2544
|
+
html = retryResp.body;
|
|
2545
|
+
console.log(`[Harvest] Consent bypass successful (re-fetch), got real page at ${finalUrl}`);
|
|
2546
|
+
} else {
|
|
2547
|
+
console.warn(`[Harvest] Re-fetch after consent still returned consent wall, proceeding with original`);
|
|
2548
|
+
}
|
|
2549
|
+
}
|
|
2550
|
+
} else {
|
|
2551
|
+
console.warn(`[Harvest] Consent POST returned ${consentResp.status}, proceeding with consent page`);
|
|
2552
|
+
}
|
|
2553
|
+
} catch (e) {
|
|
2554
|
+
console.warn(`[Harvest] Consent bypass failed, proceeding with consent page:`, e);
|
|
2555
|
+
}
|
|
2556
|
+
} else {
|
|
2557
|
+
console.warn(`[Harvest] Could not parse consent form, proceeding with consent page`);
|
|
2558
|
+
}
|
|
2559
|
+
}
|
|
2560
|
+
const $ = cheerio.load(html);
|
|
2561
|
+
const scriptAssets = [];
|
|
2562
|
+
const modulePreloads = [];
|
|
2563
|
+
const scriptTags = $("script");
|
|
2564
|
+
let skippedScriptCount = 0;
|
|
2565
|
+
const fetchTasks = [];
|
|
2566
|
+
const batchScriptMeta = [];
|
|
2567
|
+
let scriptCounter = 0;
|
|
2568
|
+
const seenModulePreloads = /* @__PURE__ */ new Set();
|
|
2569
|
+
for (let i = 0; i < scriptTags.length; i++) {
|
|
2570
|
+
const el = scriptTags[i];
|
|
2571
|
+
const $el = $(el);
|
|
2572
|
+
const src = $el.attr("src");
|
|
2573
|
+
const content = $el.html();
|
|
2574
|
+
const type = ($el.attr("type") || "").trim().toLowerCase();
|
|
2575
|
+
const isDefer = $el.attr("defer") !== void 0;
|
|
2576
|
+
const isAsync = $el.attr("async") !== void 0;
|
|
2577
|
+
const scriptKind = type === "module" ? "module" : "classic";
|
|
2578
|
+
let execution = "sync";
|
|
2579
|
+
if (scriptKind === "module") {
|
|
2580
|
+
execution = isAsync ? "async" : "defer";
|
|
2581
|
+
} else {
|
|
2582
|
+
if (isDefer) execution = "defer";
|
|
2583
|
+
if (isAsync) execution = "async";
|
|
2584
|
+
}
|
|
2585
|
+
if (type && type !== "text/javascript" && type !== "application/javascript" && type !== "module" && !type.includes("json")) continue;
|
|
2586
|
+
if (type && (type === "application/json" || type === "application/ld+json")) continue;
|
|
2587
|
+
const order = i;
|
|
2588
|
+
const id = `script_${scriptCounter++}`;
|
|
2589
|
+
if (src) {
|
|
2590
|
+
const absoluteUrl = new URL(src, this.targetUrl).toString();
|
|
2591
|
+
const category = classifyScriptAsset({ url: absoluteUrl, content: content ?? "", scriptKind, type: "external" }, this.targetUrl);
|
|
2592
|
+
if (shouldSkipScriptAsset({ url: absoluteUrl, content: content ?? "", scriptKind, type: "external" }, this.targetUrl, this.thirdPartyPolicy)) {
|
|
2593
|
+
skippedScriptCount++;
|
|
2594
|
+
continue;
|
|
2595
|
+
}
|
|
2596
|
+
if (!this.prefetchExternalScripts) continue;
|
|
2597
|
+
const pageOrigin = new URL(this.targetUrl).origin;
|
|
2598
|
+
const scriptOrigin = new URL(absoluteUrl).origin;
|
|
2599
|
+
batchScriptMeta.push({
|
|
2600
|
+
absoluteUrl,
|
|
2601
|
+
id,
|
|
2602
|
+
scriptKind,
|
|
2603
|
+
category,
|
|
2604
|
+
order,
|
|
2605
|
+
execution,
|
|
2606
|
+
headers: {
|
|
2607
|
+
...DEFAULT_HEADERS,
|
|
2608
|
+
...this.requestHeaders,
|
|
2609
|
+
"Referer": this.targetUrl,
|
|
2610
|
+
"Sec-Fetch-Dest": "script",
|
|
2611
|
+
"Sec-Fetch-Mode": scriptKind === "module" ? "cors" : "no-cors",
|
|
2612
|
+
"Sec-Fetch-Site": scriptOrigin === pageOrigin ? "same-origin" : "cross-site"
|
|
2613
|
+
}
|
|
2614
|
+
});
|
|
2615
|
+
} else if (content && content.trim().length > 0) {
|
|
2616
|
+
const category = classifyScriptAsset({ url: void 0, content, scriptKind, type: "inline" }, this.targetUrl);
|
|
2617
|
+
if (shouldSkipScriptAsset({ url: void 0, content, scriptKind, type: "inline" }, this.targetUrl, this.thirdPartyPolicy)) {
|
|
2618
|
+
skippedScriptCount++;
|
|
2619
|
+
continue;
|
|
2620
|
+
}
|
|
2621
|
+
scriptAssets.push({ id, type: "inline", scriptKind, category, content, order, execution });
|
|
2622
|
+
}
|
|
2623
|
+
}
|
|
2624
|
+
const modulePreloadUrls = [];
|
|
2625
|
+
if (this.prefetchModulePreloads) {
|
|
2626
|
+
$('link[rel="modulepreload"][href]').each((_, el) => {
|
|
2627
|
+
const href = $(el).attr("href");
|
|
2628
|
+
if (!href) return;
|
|
2629
|
+
const absoluteUrl = new URL(href, this.targetUrl).toString();
|
|
2630
|
+
if (seenModulePreloads.has(absoluteUrl)) return;
|
|
2631
|
+
seenModulePreloads.add(absoluteUrl);
|
|
2632
|
+
modulePreloadUrls.push(absoluteUrl);
|
|
2633
|
+
});
|
|
2634
|
+
}
|
|
2635
|
+
const pageOriginForPreloads = new URL(this.targetUrl).origin;
|
|
2636
|
+
const assetsProxyForPreloads = this.proxyUrlForScope("assets");
|
|
2637
|
+
const preloadPayloads = modulePreloadUrls.map((url) => {
|
|
2638
|
+
const preloadOrigin = new URL(url).origin;
|
|
2639
|
+
return {
|
|
2640
|
+
method: "GET",
|
|
2641
|
+
url,
|
|
2642
|
+
headers: {
|
|
2643
|
+
...DEFAULT_HEADERS,
|
|
2644
|
+
...this.requestHeaders,
|
|
2645
|
+
"Referer": this.targetUrl,
|
|
2646
|
+
"Sec-Fetch-Dest": "script",
|
|
2647
|
+
"Sec-Fetch-Mode": "cors",
|
|
2648
|
+
"Sec-Fetch-Site": preloadOrigin === pageOriginForPreloads ? "same-origin" : "cross-site"
|
|
2649
|
+
},
|
|
2650
|
+
headerOrder: DEFAULT_HEADER_ORDER,
|
|
2651
|
+
body: "",
|
|
2652
|
+
proxy: assetsProxyForPreloads
|
|
2653
|
+
};
|
|
2654
|
+
});
|
|
2655
|
+
const assetsProxy = this.proxyUrlForScope("assets");
|
|
2656
|
+
const scriptPayloads = batchScriptMeta.map((m) => ({
|
|
2657
|
+
method: "GET",
|
|
2658
|
+
url: m.absoluteUrl,
|
|
2659
|
+
headers: m.headers,
|
|
2660
|
+
headerOrder: DEFAULT_HEADER_ORDER,
|
|
2661
|
+
body: "",
|
|
2662
|
+
proxy: assetsProxy
|
|
2663
|
+
}));
|
|
2664
|
+
const allPayloads = [...scriptPayloads, ...preloadPayloads];
|
|
2665
|
+
if (allPayloads.length > 0) {
|
|
2666
|
+
console.log(`[Harvest] Batch-fetching ${scriptPayloads.length} scripts + ${preloadPayloads.length} modulepreloads...`);
|
|
2667
|
+
const allResponses = await phantomBatchFetch(allPayloads);
|
|
2668
|
+
for (let i = 0; i < batchScriptMeta.length; i++) {
|
|
2669
|
+
const meta = batchScriptMeta[i];
|
|
2670
|
+
const resp = allResponses[i];
|
|
2671
|
+
const logEntry = {
|
|
2672
|
+
type: "resource_load",
|
|
2673
|
+
url: meta.absoluteUrl,
|
|
2674
|
+
timestamp: Date.now(),
|
|
2675
|
+
initiator: "Harvester",
|
|
2676
|
+
status: resp.status,
|
|
2677
|
+
responseHeaders: resp.headers,
|
|
2678
|
+
responseBody: resp.status < 400 ? resp.body : null
|
|
2679
|
+
};
|
|
2680
|
+
this.logs.push(logEntry);
|
|
2681
|
+
if (resp.status < 400) {
|
|
2682
|
+
scriptAssets.push({
|
|
2683
|
+
id: meta.id,
|
|
2684
|
+
type: "external",
|
|
2685
|
+
scriptKind: meta.scriptKind,
|
|
2686
|
+
category: meta.category,
|
|
2687
|
+
url: meta.absoluteUrl,
|
|
2688
|
+
content: resp.body,
|
|
2689
|
+
order: meta.order,
|
|
2690
|
+
execution: meta.execution
|
|
2691
|
+
});
|
|
2692
|
+
} else {
|
|
2693
|
+
console.warn(`[Harvest] Failed to fetch script ${meta.absoluteUrl}: status ${resp.status}`);
|
|
2694
|
+
}
|
|
2695
|
+
}
|
|
2696
|
+
for (let i = 0; i < modulePreloadUrls.length; i++) {
|
|
2697
|
+
const url = modulePreloadUrls[i];
|
|
2698
|
+
const resp = allResponses[batchScriptMeta.length + i];
|
|
2699
|
+
const logEntry = {
|
|
2700
|
+
type: "resource_load",
|
|
2701
|
+
url,
|
|
2702
|
+
timestamp: Date.now(),
|
|
2703
|
+
initiator: "Harvester.modulepreload",
|
|
2704
|
+
status: resp.status,
|
|
2705
|
+
responseHeaders: resp.headers,
|
|
2706
|
+
responseBody: resp.status < 400 ? resp.body : null
|
|
2707
|
+
};
|
|
2708
|
+
this.logs.push(logEntry);
|
|
2709
|
+
if (resp.status < 400) {
|
|
2710
|
+
modulePreloads.push({ url, content: resp.body });
|
|
2711
|
+
}
|
|
2712
|
+
}
|
|
2713
|
+
}
|
|
2714
|
+
scriptAssets.sort((a, b) => a.order - b.order);
|
|
2715
|
+
const initialState = {};
|
|
2716
|
+
const statePatterns = [
|
|
2717
|
+
/window\.__INITIAL_STATE__\s*=\s*({.+?});/,
|
|
2718
|
+
/window\.__NEXT_DATA__\s*=\s*({.+?});/,
|
|
2719
|
+
/window\.__NUXT__\s*=\s*({.+?});/,
|
|
2720
|
+
/window\.__APP_DATA__\s*=\s*({.+?});/
|
|
2721
|
+
];
|
|
2722
|
+
for (const pattern of statePatterns) {
|
|
2723
|
+
const match = html.match(pattern);
|
|
2724
|
+
if (match && match[1]) {
|
|
2725
|
+
try {
|
|
2726
|
+
initialState[pattern.toString()] = JSON.parse(match[1]);
|
|
2727
|
+
} catch (e) {
|
|
2728
|
+
}
|
|
2729
|
+
}
|
|
2730
|
+
}
|
|
2731
|
+
$('script[type="application/json"]').each((_, el) => {
|
|
2732
|
+
const id = $(el).attr("id");
|
|
2733
|
+
const content = $(el).html();
|
|
2734
|
+
if (id && content) {
|
|
2735
|
+
try {
|
|
2736
|
+
initialState[id] = JSON.parse(content);
|
|
2737
|
+
} catch (e) {
|
|
2738
|
+
}
|
|
2739
|
+
}
|
|
2740
|
+
});
|
|
2741
|
+
$("[data-page], [data-props], [data-state]").each((index, el) => {
|
|
2742
|
+
const attrs = ["data-page", "data-props", "data-state"];
|
|
2743
|
+
for (const attr of attrs) {
|
|
2744
|
+
const raw = $(el).attr(attr);
|
|
2745
|
+
if (!raw) continue;
|
|
2746
|
+
try {
|
|
2747
|
+
initialState[`attr:${attr}:${index}`] = JSON.parse(raw);
|
|
2748
|
+
} catch {
|
|
2749
|
+
}
|
|
2750
|
+
}
|
|
2751
|
+
});
|
|
2752
|
+
const moduleGraphCache = /* @__PURE__ */ new Map();
|
|
2753
|
+
for (const s of scriptAssets) {
|
|
2754
|
+
if (s.url) moduleGraphCache.set(s.url, s.content);
|
|
2755
|
+
}
|
|
2756
|
+
for (const mp of modulePreloads) {
|
|
2757
|
+
moduleGraphCache.set(mp.url, mp.content);
|
|
2758
|
+
}
|
|
2759
|
+
const moduleEntryUrls = scriptAssets.filter((s) => s.scriptKind === "module" && s.url).map((s) => s.url);
|
|
2760
|
+
if (moduleEntryUrls.length > 0 || modulePreloads.length > 0) {
|
|
2761
|
+
const rootUrls = [...moduleEntryUrls, ...modulePreloads.map((mp) => mp.url)];
|
|
2762
|
+
await prefetchModuleGraph(rootUrls, moduleGraphCache, finalUrl, {
|
|
2763
|
+
proxyUrl: this.proxyUrlForScope("assets")
|
|
2764
|
+
});
|
|
2765
|
+
}
|
|
2766
|
+
return {
|
|
2767
|
+
url: finalUrl,
|
|
2768
|
+
status: response.status,
|
|
2769
|
+
html,
|
|
2770
|
+
scripts: scriptAssets,
|
|
2771
|
+
modulePreloads,
|
|
2772
|
+
skippedScriptCount,
|
|
2773
|
+
initialState,
|
|
2774
|
+
cookies: this.cookies,
|
|
2775
|
+
headers: response.headers,
|
|
2776
|
+
logs: this.logs,
|
|
2777
|
+
moduleGraphCache
|
|
2778
|
+
};
|
|
2779
|
+
}
|
|
2780
|
+
};
|
|
2781
|
+
|
|
2782
|
+
// ../dynafetch-core/src/detect.ts
|
|
2783
|
+
function hasScriptUrl(harvest, pattern) {
|
|
2784
|
+
return harvest.scripts.some((script) => Boolean(script.url && pattern.test(script.url)));
|
|
2785
|
+
}
|
|
2786
|
+
function detectFramework(harvest) {
|
|
2787
|
+
const html = harvest.html;
|
|
2788
|
+
const lower = html.toLowerCase();
|
|
2789
|
+
if ("__NEXT_DATA__" in harvest.initialState || lower.includes("__next_data__") || lower.includes("/_next/") || hasScriptUrl(harvest, /\/_next\//i)) {
|
|
2790
|
+
return "nextjs";
|
|
2791
|
+
}
|
|
2792
|
+
if ("__NUXT__" in harvest.initialState || lower.includes("__nuxt") || hasScriptUrl(harvest, /\/_nuxt\//i)) {
|
|
2793
|
+
return "nuxt";
|
|
2794
|
+
}
|
|
2795
|
+
if (lower.includes("window.__remixcontext") || lower.includes("remix-context") || hasScriptUrl(harvest, /\/build\/.*entry\.client/i)) {
|
|
2796
|
+
return "remix";
|
|
2797
|
+
}
|
|
2798
|
+
if (lower.includes("data-page=") || hasScriptUrl(harvest, /inertia-[^/]+\.js/i) || hasScriptUrl(harvest, /\/vite\/assets\//i)) {
|
|
2799
|
+
return "inertia";
|
|
2800
|
+
}
|
|
2801
|
+
if (lower.includes("astro-island") || lower.includes("__astro") || hasScriptUrl(harvest, /\/_astro\//i)) {
|
|
2802
|
+
return "astro";
|
|
2803
|
+
}
|
|
2804
|
+
if (lower.includes("__sveltekit") || hasScriptUrl(harvest, /\/_app\/immutable\//i)) {
|
|
2805
|
+
return "sveltekit";
|
|
2806
|
+
}
|
|
2807
|
+
if (lower.includes("hx-get=") || lower.includes("hx-post=") || lower.includes("hx-trigger=")) {
|
|
2808
|
+
return "htmx";
|
|
2809
|
+
}
|
|
2810
|
+
if (harvest.scripts.length > 0) {
|
|
2811
|
+
return "generic-spa";
|
|
2812
|
+
}
|
|
2813
|
+
return "static";
|
|
2814
|
+
}
|
|
2815
|
+
|
|
2816
|
+
// ../dynafetch-core/src/planner.ts
|
|
2817
|
+
function planDynafetch(framework, harvest, allowJsdomFallback) {
|
|
2818
|
+
if (framework === "static" && harvest.scripts.length === 0) {
|
|
2819
|
+
return {
|
|
2820
|
+
framework,
|
|
2821
|
+
strategy: "static-html",
|
|
2822
|
+
reason: "document has no executable scripts; return the fetched HTML directly"
|
|
2823
|
+
};
|
|
2824
|
+
}
|
|
2825
|
+
if (!allowJsdomFallback) {
|
|
2826
|
+
return {
|
|
2827
|
+
framework,
|
|
2828
|
+
strategy: "static-html",
|
|
2829
|
+
reason: "dynamic execution disabled; return the fetched HTML without script execution"
|
|
2830
|
+
};
|
|
2831
|
+
}
|
|
2832
|
+
if (framework !== "generic-spa" && framework !== "static") {
|
|
2833
|
+
return {
|
|
2834
|
+
framework,
|
|
2835
|
+
strategy: "framework-probe",
|
|
2836
|
+
reason: "known framework markers detected; run the lightweight runtime under framework-aware labeling"
|
|
2837
|
+
};
|
|
2838
|
+
}
|
|
2839
|
+
return {
|
|
2840
|
+
framework,
|
|
2841
|
+
strategy: "jsdom-fallback",
|
|
2842
|
+
reason: "generic client-rendered page requires runtime execution to recover dynamic HTML"
|
|
2843
|
+
};
|
|
2844
|
+
}
|
|
2845
|
+
|
|
2846
|
+
// ../dynafetch-core/src/index.ts
|
|
2847
|
+
var DynafetchInputError = class extends Error {
|
|
2848
|
+
constructor(message, status = 400) {
|
|
2849
|
+
super(message);
|
|
2850
|
+
this.name = "DynafetchInputError";
|
|
2851
|
+
this.status = status;
|
|
2852
|
+
}
|
|
2853
|
+
};
|
|
2854
|
+
function isPrivateOrLocalHost(hostname) {
|
|
2855
|
+
const h = hostname.toLowerCase();
|
|
2856
|
+
if (h === "localhost" || h.endsWith(".localhost") || h === "0.0.0.0") return true;
|
|
2857
|
+
if (h === "metadata.google.internal") return true;
|
|
2858
|
+
const ipVer = net.isIP(h);
|
|
2859
|
+
if (!ipVer) return false;
|
|
2860
|
+
if (ipVer === 4) {
|
|
2861
|
+
const [a, b] = h.split(".").map((x) => Number(x));
|
|
2862
|
+
if (a === 10) return true;
|
|
2863
|
+
if (a === 127) return true;
|
|
2864
|
+
if (a === 0) return true;
|
|
2865
|
+
if (a === 169 && b === 254) return true;
|
|
2866
|
+
if (a === 172 && b >= 16 && b <= 31) return true;
|
|
2867
|
+
if (a === 192 && b === 168) return true;
|
|
2868
|
+
return false;
|
|
2869
|
+
}
|
|
2870
|
+
if (h === "::1") return true;
|
|
2871
|
+
if (h.startsWith("fe80:")) return true;
|
|
2872
|
+
if (h.startsWith("fc") || h.startsWith("fd")) return true;
|
|
2873
|
+
return false;
|
|
2874
|
+
}
|
|
2875
|
+
function normalizeProxy(input) {
|
|
2876
|
+
if (!input) return void 0;
|
|
2877
|
+
if (typeof input === "string") {
|
|
2878
|
+
const url2 = input.trim();
|
|
2879
|
+
if (!url2) return void 0;
|
|
2880
|
+
return { url: url2, scopes: /* @__PURE__ */ new Set(["page", "api", "assets"]) };
|
|
2881
|
+
}
|
|
2882
|
+
const url = input.url?.trim();
|
|
2883
|
+
if (!url) return void 0;
|
|
2884
|
+
const scopes = input.only?.length ? new Set(input.only) : /* @__PURE__ */ new Set(["page", "api", "assets"]);
|
|
2885
|
+
return { url, scopes };
|
|
2886
|
+
}
|
|
2887
|
+
function normalizeCookies(input) {
|
|
2888
|
+
if (!input) return [];
|
|
2889
|
+
if (typeof input === "string") {
|
|
2890
|
+
return input.split(";").map((part) => part.trim()).filter(Boolean);
|
|
2891
|
+
}
|
|
2892
|
+
if (Array.isArray(input)) {
|
|
2893
|
+
return input.map((value) => value.trim()).filter(Boolean);
|
|
2894
|
+
}
|
|
2895
|
+
return Object.entries(input).map(([key, value]) => `${key}=${value}`);
|
|
2896
|
+
}
|
|
2897
|
+
function normalizeOptions(input) {
|
|
2898
|
+
const options = typeof input === "string" ? { url: input } : input;
|
|
2899
|
+
if (!options?.url) {
|
|
2900
|
+
throw new DynafetchInputError("URL is required");
|
|
2901
|
+
}
|
|
2902
|
+
let parsedUrl;
|
|
2903
|
+
try {
|
|
2904
|
+
parsedUrl = new URL(options.url);
|
|
2905
|
+
} catch {
|
|
2906
|
+
throw new DynafetchInputError("Invalid URL");
|
|
2907
|
+
}
|
|
2908
|
+
if (parsedUrl.protocol !== "http:" && parsedUrl.protocol !== "https:") {
|
|
2909
|
+
throw new DynafetchInputError("Only http(s) URLs are allowed");
|
|
2910
|
+
}
|
|
2911
|
+
if (isPrivateOrLocalHost(parsedUrl.hostname)) {
|
|
2912
|
+
throw new DynafetchInputError("Refusing to fetch local/private addresses");
|
|
2913
|
+
}
|
|
2914
|
+
return {
|
|
2915
|
+
...options,
|
|
2916
|
+
url: parsedUrl.toString(),
|
|
2917
|
+
allowJsdomFallback: options.allowJsdomFallback !== false,
|
|
2918
|
+
browserProfile: options.browserProfile?.trim() || "chrome_146",
|
|
2919
|
+
prefetchExternalScripts: options.prefetchExternalScripts !== false,
|
|
2920
|
+
prefetchModulePreloads: options.prefetchModulePreloads !== false,
|
|
2921
|
+
thirdPartyPolicy: options.thirdPartyPolicy ?? "skip-noncritical"
|
|
2922
|
+
};
|
|
2923
|
+
}
|
|
2924
|
+
function toWarnings(plan, errors, options) {
|
|
2925
|
+
const warnings = [plan.reason];
|
|
2926
|
+
if (plan.strategy === "jsdom-fallback" || plan.strategy === "framework-probe") {
|
|
2927
|
+
warnings.push("runtime execution used the legacy JSDOM-based renderer while lightweight adapters are still being built");
|
|
2928
|
+
}
|
|
2929
|
+
if (options.maxSubrequests) {
|
|
2930
|
+
warnings.push(`maxSubrequests is advisory in the current implementation (${options.maxSubrequests})`);
|
|
2931
|
+
}
|
|
2932
|
+
if (options.thirdPartyPolicy === "skip-noncritical") {
|
|
2933
|
+
warnings.push("non-critical third-party scripts are skipped on the critical render path");
|
|
2934
|
+
}
|
|
2935
|
+
if (errors?.length) {
|
|
2936
|
+
for (const error of errors.slice(0, 3)) {
|
|
2937
|
+
warnings.push(`${error.source}: ${error.message}`);
|
|
2938
|
+
}
|
|
2939
|
+
}
|
|
2940
|
+
return warnings;
|
|
2941
|
+
}
|
|
2942
|
+
function computeConfidence(params) {
|
|
2943
|
+
let confidence = params.plan.strategy === "static-html" ? 0.92 : 0.68;
|
|
2944
|
+
if (params.plan.framework !== "generic-spa" && params.plan.framework !== "static") {
|
|
2945
|
+
confidence += 0.08;
|
|
2946
|
+
}
|
|
2947
|
+
if (params.initialStateCount > 0) {
|
|
2948
|
+
confidence += 0.06;
|
|
2949
|
+
}
|
|
2950
|
+
if (params.plan.strategy === "jsdom-fallback") {
|
|
2951
|
+
confidence -= 0.08;
|
|
2952
|
+
}
|
|
2953
|
+
if (params.htmlLength < 256) {
|
|
2954
|
+
confidence -= 0.1;
|
|
2955
|
+
}
|
|
2956
|
+
confidence -= Math.min(0.28, params.executionErrors * 0.07);
|
|
2957
|
+
return Math.max(0.05, Math.min(0.98, Number(confidence.toFixed(2))));
|
|
2958
|
+
}
|
|
2959
|
+
async function dynafetch(input) {
|
|
2960
|
+
const options = normalizeOptions(input);
|
|
2961
|
+
const timeoutSeconds = options.timeoutMs ? Math.max(1, Math.ceil(options.timeoutMs / 1e3)) : void 0;
|
|
2962
|
+
const initialCookies = normalizeCookies(options.cookies);
|
|
2963
|
+
const proxy = normalizeProxy(options.proxy);
|
|
2964
|
+
return await withDynafetchSession(
|
|
2965
|
+
{
|
|
2966
|
+
browserProfile: options.browserProfile,
|
|
2967
|
+
timeoutSeconds,
|
|
2968
|
+
proxy: proxy?.url
|
|
2969
|
+
},
|
|
2970
|
+
async () => {
|
|
2971
|
+
const totalStart = Date.now();
|
|
2972
|
+
const harvestStart = Date.now();
|
|
2973
|
+
const harvester = new Harvester(options.url, {
|
|
2974
|
+
prefetchExternalScripts: options.prefetchExternalScripts,
|
|
2975
|
+
prefetchModulePreloads: options.prefetchModulePreloads,
|
|
2976
|
+
requestHeaders: options.headers,
|
|
2977
|
+
initialCookies,
|
|
2978
|
+
thirdPartyPolicy: options.thirdPartyPolicy,
|
|
2979
|
+
proxy
|
|
2980
|
+
});
|
|
2981
|
+
const harvest = await harvester.harvest();
|
|
2982
|
+
const harvestMs = Date.now() - harvestStart;
|
|
2983
|
+
const framework = detectFramework(harvest);
|
|
2984
|
+
const plan = planDynafetch(framework, harvest, options.allowJsdomFallback !== false);
|
|
2985
|
+
let html = harvest.html;
|
|
2986
|
+
let requestCount = harvest.logs.length;
|
|
2987
|
+
let executionErrors;
|
|
2988
|
+
let executeMs = 0;
|
|
2989
|
+
let quiescenceMs = 0;
|
|
2990
|
+
let scriptsTransformed = 0;
|
|
2991
|
+
if (plan.strategy !== "static-html") {
|
|
2992
|
+
const executeStart = Date.now();
|
|
2993
|
+
const executor = new Executor(harvest, {
|
|
2994
|
+
thirdPartyPolicy: options.thirdPartyPolicy,
|
|
2995
|
+
quiescence: {
|
|
2996
|
+
minWaitMs: options.minWaitMs,
|
|
2997
|
+
idleWaitMs: options.idleWaitMs,
|
|
2998
|
+
maxWaitMs: options.maxWaitMs
|
|
2999
|
+
},
|
|
3000
|
+
moduleWaitMs: options.moduleWaitMs,
|
|
3001
|
+
proxy
|
|
3002
|
+
});
|
|
3003
|
+
const execution = await executor.execute();
|
|
3004
|
+
executeMs = Date.now() - executeStart;
|
|
3005
|
+
html = execution.renderedHtml ?? harvest.html;
|
|
3006
|
+
requestCount = execution.logs.length;
|
|
3007
|
+
executionErrors = execution.errors;
|
|
3008
|
+
quiescenceMs = execution.timings?.quiescence_ms ?? 0;
|
|
3009
|
+
scriptsTransformed = execution.timings?.scripts_transformed_count ?? 0;
|
|
3010
|
+
}
|
|
3011
|
+
const totalMs = Date.now() - totalStart;
|
|
3012
|
+
const warnings = toWarnings(plan, executionErrors, options);
|
|
3013
|
+
const confidence = computeConfidence({
|
|
3014
|
+
plan,
|
|
3015
|
+
initialStateCount: Object.keys(harvest.initialState).length,
|
|
3016
|
+
executionErrors: executionErrors?.length ?? 0,
|
|
3017
|
+
htmlLength: html.length
|
|
3018
|
+
});
|
|
3019
|
+
return {
|
|
3020
|
+
url: options.url,
|
|
3021
|
+
finalUrl: harvest.url,
|
|
3022
|
+
status: harvest.status,
|
|
3023
|
+
html,
|
|
3024
|
+
framework,
|
|
3025
|
+
strategy: plan.strategy,
|
|
3026
|
+
confidence,
|
|
3027
|
+
warnings,
|
|
3028
|
+
timings: {
|
|
3029
|
+
total: totalMs,
|
|
3030
|
+
harvest: harvestMs,
|
|
3031
|
+
execute: executeMs,
|
|
3032
|
+
quiescence: quiescenceMs,
|
|
3033
|
+
scriptsTransformed
|
|
3034
|
+
},
|
|
3035
|
+
requestCount
|
|
3036
|
+
};
|
|
3037
|
+
}
|
|
3038
|
+
);
|
|
3039
|
+
}
|
|
3040
|
+
export {
|
|
3041
|
+
DynafetchInputError,
|
|
3042
|
+
detectFramework,
|
|
3043
|
+
dynafetch,
|
|
3044
|
+
dynafetchNetBatchFetch,
|
|
3045
|
+
dynafetchNetFetch,
|
|
3046
|
+
dynafetchNetHealth,
|
|
3047
|
+
planDynafetch,
|
|
3048
|
+
withDynafetchSession
|
|
3049
|
+
};
|
|
3050
|
+
//# sourceMappingURL=index.js.map
|