@grabbit-labs/dynafetch 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/bin/dynafetch-net-darwin-arm64 +0 -0
- package/bin/dynafetch-net-darwin-x64 +0 -0
- package/bin/dynafetch-net-linux-arm64 +0 -0
- package/bin/dynafetch-net-linux-x64 +0 -0
- package/bin/dynafetch-net-win32-x64.exe +0 -0
- package/dist/index.d.ts +61 -118
- package/dist/index.js +479 -198
- package/dist/index.js.map +4 -4
- package/package.json +12 -2
package/dist/index.js
CHANGED
|
@@ -1,13 +1,12 @@
|
|
|
1
1
|
import { createRequire } from "node:module"; import { fileURLToPath as __fileURLToPath } from "node:url"; import { dirname as __dirname_fn } from "node:path"; const __filename = __fileURLToPath(import.meta.url); const __dirname = __dirname_fn(__filename); const require = createRequire(import.meta.url);
|
|
2
|
-
var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
|
|
3
|
-
get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
|
|
4
|
-
}) : x)(function(x) {
|
|
5
|
-
if (typeof require !== "undefined") return require.apply(this, arguments);
|
|
6
|
-
throw Error('Dynamic require of "' + x + '" is not supported');
|
|
7
|
-
});
|
|
8
2
|
|
|
9
|
-
//
|
|
10
|
-
|
|
3
|
+
// ../../src/phantom/log.ts
|
|
4
|
+
var enabled = process.env.DYNAFETCH_DEBUG === "1";
|
|
5
|
+
var log = enabled ? console.log.bind(console) : () => {
|
|
6
|
+
};
|
|
7
|
+
var warn = enabled ? console.warn.bind(console) : () => {
|
|
8
|
+
};
|
|
9
|
+
var error = console.error.bind(console);
|
|
11
10
|
|
|
12
11
|
// ../../src/phantom/execute.ts
|
|
13
12
|
import { JSDOM, VirtualConsole, CookieJar } from "jsdom";
|
|
@@ -115,29 +114,32 @@ var Transformer = class {
|
|
|
115
114
|
import { AsyncLocalStorage } from "node:async_hooks";
|
|
116
115
|
import { spawn } from "node:child_process";
|
|
117
116
|
import { randomUUID } from "node:crypto";
|
|
117
|
+
import { accessSync, constants } from "node:fs";
|
|
118
118
|
import path2 from "node:path";
|
|
119
119
|
import readline from "node:readline";
|
|
120
|
+
import { fileURLToPath } from "node:url";
|
|
120
121
|
var sessionStore = new AsyncLocalStorage();
|
|
121
122
|
var transportPromise = null;
|
|
123
|
+
var workerDir = path2.dirname(fileURLToPath(import.meta.url));
|
|
122
124
|
function findPrecompiledBinary() {
|
|
123
125
|
const platform = process.platform;
|
|
124
|
-
const arch = process.arch === "x64" ? "x64" : "arm64";
|
|
126
|
+
const arch = process.arch === "x64" ? "x64" : process.arch === "arm64" ? "arm64" : null;
|
|
127
|
+
if (!arch) return null;
|
|
125
128
|
const ext = platform === "win32" ? ".exe" : "";
|
|
126
129
|
const name = `dynafetch-net-${platform}-${arch}${ext}`;
|
|
127
130
|
const candidates = [
|
|
128
|
-
path2.resolve(
|
|
131
|
+
path2.resolve(workerDir, "../bin", name),
|
|
129
132
|
// installed: dist/../bin
|
|
130
|
-
path2.resolve(
|
|
133
|
+
path2.resolve(workerDir, "../../../dynafetch-net/bin", name),
|
|
131
134
|
// dev: dynafetch-core/src/net -> dynafetch-net/bin
|
|
132
|
-
path2.resolve(
|
|
135
|
+
path2.resolve(workerDir, "../../../../packages/dynafetch-net/bin", name),
|
|
133
136
|
// dev: alt layout
|
|
134
137
|
path2.resolve(process.cwd(), "packages/dynafetch-net/bin", name)
|
|
135
138
|
// dev: from workspace root
|
|
136
139
|
];
|
|
137
140
|
for (const candidate of candidates) {
|
|
138
141
|
try {
|
|
139
|
-
|
|
140
|
-
fs2.accessSync(candidate, fs2.constants.X_OK);
|
|
142
|
+
accessSync(candidate, constants.X_OK);
|
|
141
143
|
return candidate;
|
|
142
144
|
} catch {
|
|
143
145
|
}
|
|
@@ -161,12 +163,44 @@ function createWorkerCommand() {
|
|
|
161
163
|
}
|
|
162
164
|
function createWorkerTransport() {
|
|
163
165
|
const { command, args, cwd } = createWorkerCommand();
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
166
|
+
let child;
|
|
167
|
+
try {
|
|
168
|
+
child = spawn(command, args, {
|
|
169
|
+
cwd,
|
|
170
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
171
|
+
env: process.env
|
|
172
|
+
});
|
|
173
|
+
} catch (err) {
|
|
174
|
+
return Promise.reject(
|
|
175
|
+
new Error(`Failed to start dynafetch-net TLS proxy: ${err instanceof Error ? err.message : String(err)}. Binary: ${command}`)
|
|
176
|
+
);
|
|
177
|
+
}
|
|
169
178
|
const pending = /* @__PURE__ */ new Map();
|
|
179
|
+
let holdCount = 0;
|
|
180
|
+
const updateRef = () => {
|
|
181
|
+
if (pending.size === 0 && holdCount === 0) {
|
|
182
|
+
child.unref();
|
|
183
|
+
child.stdin.unref?.();
|
|
184
|
+
child.stdout.unref?.();
|
|
185
|
+
child.stderr.unref?.();
|
|
186
|
+
} else {
|
|
187
|
+
child.ref();
|
|
188
|
+
child.stdin.ref?.();
|
|
189
|
+
child.stdout.ref?.();
|
|
190
|
+
child.stderr.ref?.();
|
|
191
|
+
}
|
|
192
|
+
};
|
|
193
|
+
const hold = () => {
|
|
194
|
+
holdCount++;
|
|
195
|
+
updateRef();
|
|
196
|
+
};
|
|
197
|
+
const release = () => {
|
|
198
|
+
holdCount = Math.max(0, holdCount - 1);
|
|
199
|
+
updateRef();
|
|
200
|
+
};
|
|
201
|
+
child.stdin.on("error", () => {
|
|
202
|
+
});
|
|
203
|
+
updateRef();
|
|
170
204
|
const rl = readline.createInterface({ input: child.stdout });
|
|
171
205
|
rl.on("line", (line) => {
|
|
172
206
|
const trimmed = line.trim();
|
|
@@ -174,16 +208,18 @@ function createWorkerTransport() {
|
|
|
174
208
|
let payload;
|
|
175
209
|
try {
|
|
176
210
|
payload = JSON.parse(trimmed);
|
|
177
|
-
} catch (
|
|
211
|
+
} catch (error2) {
|
|
178
212
|
for (const entry of pending.values()) {
|
|
179
|
-
entry.reject(new Error(`Invalid dynafetch-net response: ${String(
|
|
213
|
+
entry.reject(new Error(`Invalid dynafetch-net response: ${String(error2)}`));
|
|
180
214
|
}
|
|
181
215
|
pending.clear();
|
|
216
|
+
updateRef();
|
|
182
217
|
return;
|
|
183
218
|
}
|
|
184
219
|
const request = pending.get(payload.id);
|
|
185
220
|
if (!request) return;
|
|
186
221
|
pending.delete(payload.id);
|
|
222
|
+
updateRef();
|
|
187
223
|
if (payload.error) {
|
|
188
224
|
request.reject(new Error(payload.error.message || payload.error.code || "dynafetch-net request failed"));
|
|
189
225
|
return;
|
|
@@ -193,26 +229,42 @@ function createWorkerTransport() {
|
|
|
193
229
|
child.stderr.on("data", (chunk) => {
|
|
194
230
|
const message = chunk.toString().trim();
|
|
195
231
|
if (message) {
|
|
196
|
-
console.warn(`[dynafetch-net] ${message}`);
|
|
232
|
+
if (process.env.DYNAFETCH_DEBUG === "1") console.warn(`[dynafetch-net] ${message}`);
|
|
197
233
|
}
|
|
198
234
|
});
|
|
199
235
|
const onExit = (code, signal) => {
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
entry.
|
|
236
|
+
if (pending.size > 0 && signal !== "SIGKILL") {
|
|
237
|
+
const reason = `dynafetch-net exited (code=${code ?? "null"}, signal=${signal ?? "null"})`;
|
|
238
|
+
for (const entry of pending.values()) {
|
|
239
|
+
entry.reject(new Error(reason));
|
|
240
|
+
}
|
|
203
241
|
}
|
|
204
242
|
pending.clear();
|
|
205
243
|
transportPromise = null;
|
|
206
244
|
};
|
|
207
|
-
child.once("error", (
|
|
245
|
+
child.once("error", (error2) => {
|
|
208
246
|
for (const entry of pending.values()) {
|
|
209
|
-
entry.reject(
|
|
247
|
+
entry.reject(error2);
|
|
210
248
|
}
|
|
211
249
|
pending.clear();
|
|
212
250
|
transportPromise = null;
|
|
213
251
|
});
|
|
214
252
|
child.once("exit", onExit);
|
|
215
|
-
return Promise
|
|
253
|
+
return new Promise((resolve, reject) => {
|
|
254
|
+
let settled = false;
|
|
255
|
+
child.once("error", (err) => {
|
|
256
|
+
if (!settled) {
|
|
257
|
+
settled = true;
|
|
258
|
+
reject(new Error(`Failed to start dynafetch-net TLS proxy: ${err.message}. Binary: ${command}`));
|
|
259
|
+
}
|
|
260
|
+
});
|
|
261
|
+
child.once("spawn", () => {
|
|
262
|
+
if (!settled) {
|
|
263
|
+
settled = true;
|
|
264
|
+
resolve({ child, pending, updateRef, holdCount, hold, release });
|
|
265
|
+
}
|
|
266
|
+
});
|
|
267
|
+
});
|
|
216
268
|
}
|
|
217
269
|
async function getWorkerTransport() {
|
|
218
270
|
if (!transportPromise) {
|
|
@@ -220,27 +272,62 @@ async function getWorkerTransport() {
|
|
|
220
272
|
}
|
|
221
273
|
return transportPromise;
|
|
222
274
|
}
|
|
223
|
-
async function callWorker(method, params) {
|
|
275
|
+
async function callWorker(method, params, timeoutMs = 3e4) {
|
|
224
276
|
const transport = await getWorkerTransport();
|
|
225
277
|
const id = randomUUID();
|
|
226
278
|
const payload = JSON.stringify({ id, method, params });
|
|
227
279
|
return await new Promise((resolve, reject) => {
|
|
228
|
-
|
|
280
|
+
const timer = setTimeout(() => {
|
|
281
|
+
transport.pending.delete(id);
|
|
282
|
+
transport.updateRef();
|
|
283
|
+
reject(new Error(`dynafetch-net request timed out after ${timeoutMs}ms (method: ${method})`));
|
|
284
|
+
}, timeoutMs);
|
|
285
|
+
timer.unref();
|
|
286
|
+
transport.pending.set(id, {
|
|
287
|
+
resolve: (value) => {
|
|
288
|
+
clearTimeout(timer);
|
|
289
|
+
resolve(value);
|
|
290
|
+
},
|
|
291
|
+
reject: (err) => {
|
|
292
|
+
clearTimeout(timer);
|
|
293
|
+
reject(err);
|
|
294
|
+
}
|
|
295
|
+
});
|
|
296
|
+
transport.updateRef();
|
|
229
297
|
transport.child.stdin.write(`${payload}
|
|
230
|
-
`, (
|
|
231
|
-
if (!
|
|
298
|
+
`, (error2) => {
|
|
299
|
+
if (!error2) return;
|
|
300
|
+
clearTimeout(timer);
|
|
232
301
|
transport.pending.delete(id);
|
|
233
|
-
|
|
302
|
+
transport.updateRef();
|
|
303
|
+
reject(error2);
|
|
234
304
|
});
|
|
235
305
|
});
|
|
236
306
|
}
|
|
307
|
+
function resolveRpcTimeoutMs(options) {
|
|
308
|
+
if (options.rpcTimeoutMs != null) {
|
|
309
|
+
return Math.max(1, Math.ceil(options.rpcTimeoutMs));
|
|
310
|
+
}
|
|
311
|
+
if (options.timeoutSeconds != null) {
|
|
312
|
+
return Math.max(1e3, Math.ceil(options.timeoutSeconds * 1e3) + 1e3);
|
|
313
|
+
}
|
|
314
|
+
return 3e4;
|
|
315
|
+
}
|
|
237
316
|
async function withDynafetchSession(options, run) {
|
|
238
|
-
const
|
|
317
|
+
const transport = await getWorkerTransport();
|
|
318
|
+
transport.hold();
|
|
319
|
+
const { rpcTimeoutMs, ...sessionOptions } = options;
|
|
320
|
+
const session = await callWorker(
|
|
321
|
+
"openSession",
|
|
322
|
+
sessionOptions,
|
|
323
|
+
resolveRpcTimeoutMs(options)
|
|
324
|
+
);
|
|
239
325
|
try {
|
|
240
326
|
return await sessionStore.run({ sessionId: session.sessionId }, run);
|
|
241
327
|
} finally {
|
|
242
|
-
|
|
328
|
+
callWorker("closeSession", { sessionId: session.sessionId }).catch(() => {
|
|
243
329
|
});
|
|
330
|
+
transport.release();
|
|
244
331
|
}
|
|
245
332
|
}
|
|
246
333
|
async function dynafetchNetHealth() {
|
|
@@ -256,7 +343,7 @@ async function dynafetchNetFetch(request, options = {}) {
|
|
|
256
343
|
browserProfile: options.browserProfile,
|
|
257
344
|
timeoutSeconds: options.timeoutSeconds,
|
|
258
345
|
proxy: options.proxy
|
|
259
|
-
});
|
|
346
|
+
}, resolveRpcTimeoutMs(options));
|
|
260
347
|
}
|
|
261
348
|
async function dynafetchNetBatchFetch(requests, options = {}) {
|
|
262
349
|
const session = sessionStore.getStore();
|
|
@@ -268,7 +355,73 @@ async function dynafetchNetBatchFetch(requests, options = {}) {
|
|
|
268
355
|
browserProfile: options.browserProfile,
|
|
269
356
|
timeoutSeconds: options.timeoutSeconds,
|
|
270
357
|
proxy: options.proxy
|
|
271
|
-
});
|
|
358
|
+
}, resolveRpcTimeoutMs(options));
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
// ../../src/phantom/url-safety.ts
|
|
362
|
+
import { lookup } from "node:dns/promises";
|
|
363
|
+
import * as net from "node:net";
|
|
364
|
+
var BLOCKED_HOSTNAMES = /* @__PURE__ */ new Set([
|
|
365
|
+
"0.0.0.0",
|
|
366
|
+
"localhost",
|
|
367
|
+
"metadata.google.internal"
|
|
368
|
+
]);
|
|
369
|
+
var hostnameLookupCache = /* @__PURE__ */ new Map();
|
|
370
|
+
function normalizeHostname(hostname) {
|
|
371
|
+
return hostname.trim().replace(/^\[|\]$/g, "").replace(/\.+$/g, "").toLowerCase();
|
|
372
|
+
}
|
|
373
|
+
function isPrivateOrLocalHost(hostname) {
|
|
374
|
+
const h = normalizeHostname(hostname);
|
|
375
|
+
if (!h) return false;
|
|
376
|
+
if (BLOCKED_HOSTNAMES.has(h) || h.endsWith(".localhost")) return true;
|
|
377
|
+
const ipVer = net.isIP(h);
|
|
378
|
+
if (!ipVer) return false;
|
|
379
|
+
if (ipVer === 4) {
|
|
380
|
+
const [a, b] = h.split(".").map((value) => Number(value));
|
|
381
|
+
if (a === 10) return true;
|
|
382
|
+
if (a === 127) return true;
|
|
383
|
+
if (a === 0) return true;
|
|
384
|
+
if (a === 169 && b === 254) return true;
|
|
385
|
+
if (a === 172 && b >= 16 && b <= 31) return true;
|
|
386
|
+
if (a === 192 && b === 168) return true;
|
|
387
|
+
return false;
|
|
388
|
+
}
|
|
389
|
+
if (h === "::1") return true;
|
|
390
|
+
if (h.startsWith("fe80:")) return true;
|
|
391
|
+
if (h.startsWith("fc") || h.startsWith("fd")) return true;
|
|
392
|
+
return false;
|
|
393
|
+
}
|
|
394
|
+
function assertSafeHttpUrlSync(input) {
|
|
395
|
+
let parsedUrl;
|
|
396
|
+
try {
|
|
397
|
+
parsedUrl = new URL(input);
|
|
398
|
+
} catch {
|
|
399
|
+
throw new Error("Invalid URL");
|
|
400
|
+
}
|
|
401
|
+
if (parsedUrl.protocol !== "http:" && parsedUrl.protocol !== "https:") {
|
|
402
|
+
throw new Error("Only http(s) URLs are allowed");
|
|
403
|
+
}
|
|
404
|
+
if (isPrivateOrLocalHost(parsedUrl.hostname)) {
|
|
405
|
+
throw new Error("Refusing to fetch local/private addresses");
|
|
406
|
+
}
|
|
407
|
+
return parsedUrl;
|
|
408
|
+
}
|
|
409
|
+
async function hostnameResolvesToPrivateAddress(hostname) {
|
|
410
|
+
const normalized = normalizeHostname(hostname);
|
|
411
|
+
if (!normalized || net.isIP(normalized)) return false;
|
|
412
|
+
let pending = hostnameLookupCache.get(normalized);
|
|
413
|
+
if (!pending) {
|
|
414
|
+
pending = lookup(normalized, { all: true, verbatim: true }).then((records) => records.some((record) => isPrivateOrLocalHost(record.address))).catch(() => false);
|
|
415
|
+
hostnameLookupCache.set(normalized, pending);
|
|
416
|
+
}
|
|
417
|
+
return pending;
|
|
418
|
+
}
|
|
419
|
+
async function assertSafeRemoteUrl(input) {
|
|
420
|
+
const parsedUrl = assertSafeHttpUrlSync(input);
|
|
421
|
+
if (await hostnameResolvesToPrivateAddress(parsedUrl.hostname)) {
|
|
422
|
+
throw new Error("Refusing to fetch local/private addresses");
|
|
423
|
+
}
|
|
424
|
+
return parsedUrl;
|
|
272
425
|
}
|
|
273
426
|
|
|
274
427
|
// ../../src/phantom/phantom-proxy.ts
|
|
@@ -284,64 +437,115 @@ function headersToRecord(h) {
|
|
|
284
437
|
}
|
|
285
438
|
return out;
|
|
286
439
|
}
|
|
287
|
-
|
|
440
|
+
var DIRECT_FALLBACK_WARNING = "dynafetch-net was unavailable for one or more requests; fell back to Node fetch without TLS/browser impersonation";
|
|
441
|
+
var DIRECT_PROXY_ERROR = "Direct fallback cannot honor proxy configuration; dynafetch-net is required when proxy is set";
|
|
442
|
+
function createTimeoutController(timeoutMs) {
|
|
443
|
+
if (!timeoutMs || !Number.isFinite(timeoutMs)) {
|
|
444
|
+
return {
|
|
445
|
+
dispose: () => {
|
|
446
|
+
},
|
|
447
|
+
didTimeout: () => false
|
|
448
|
+
};
|
|
449
|
+
}
|
|
450
|
+
const controller = new AbortController();
|
|
451
|
+
let timedOut = false;
|
|
452
|
+
const timer = setTimeout(() => {
|
|
453
|
+
timedOut = true;
|
|
454
|
+
controller.abort();
|
|
455
|
+
}, Math.max(1, Math.ceil(timeoutMs)));
|
|
456
|
+
timer.unref?.();
|
|
457
|
+
return {
|
|
458
|
+
signal: controller.signal,
|
|
459
|
+
dispose: () => clearTimeout(timer),
|
|
460
|
+
didTimeout: () => timedOut
|
|
461
|
+
};
|
|
462
|
+
}
|
|
463
|
+
async function directFetch(payload, options = {}) {
|
|
464
|
+
if (payload.proxy) {
|
|
465
|
+
throw new Error(DIRECT_PROXY_ERROR);
|
|
466
|
+
}
|
|
288
467
|
const init = {
|
|
289
468
|
method: payload.method,
|
|
290
469
|
headers: payload.headers,
|
|
291
470
|
redirect: "manual"
|
|
292
471
|
};
|
|
293
472
|
if (payload.body) init.body = payload.body;
|
|
294
|
-
const
|
|
295
|
-
|
|
296
|
-
|
|
473
|
+
const timeout = createTimeoutController(options.timeoutMs);
|
|
474
|
+
if (timeout.signal) init.signal = timeout.signal;
|
|
475
|
+
try {
|
|
476
|
+
const resp = await fetch(payload.url, init);
|
|
477
|
+
const body = await resp.text().catch(() => "");
|
|
478
|
+
return {
|
|
479
|
+
status: resp.status,
|
|
480
|
+
body,
|
|
481
|
+
headers: headersToRecord(resp.headers),
|
|
482
|
+
transport: "direct",
|
|
483
|
+
warning: DIRECT_FALLBACK_WARNING
|
|
484
|
+
};
|
|
485
|
+
} catch (error2) {
|
|
486
|
+
if (timeout.didTimeout()) {
|
|
487
|
+
throw new Error(`dynafetch request timed out after ${Math.max(1, Math.ceil(options.timeoutMs ?? 0))}ms`);
|
|
488
|
+
}
|
|
489
|
+
throw error2;
|
|
490
|
+
} finally {
|
|
491
|
+
timeout.dispose();
|
|
492
|
+
}
|
|
297
493
|
}
|
|
298
|
-
async function dynafetchWorkerFetch(payload) {
|
|
494
|
+
async function dynafetchWorkerFetch(payload, options = {}) {
|
|
299
495
|
const response = await dynafetchNetFetch(payload, {
|
|
300
|
-
followRedirect: false
|
|
496
|
+
followRedirect: false,
|
|
497
|
+
rpcTimeoutMs: options.timeoutMs
|
|
301
498
|
});
|
|
302
499
|
return {
|
|
303
500
|
status: response.status,
|
|
304
501
|
body: response.body,
|
|
305
502
|
headers: response.headers,
|
|
306
503
|
finalUrl: response.finalUrl,
|
|
307
|
-
error: response.error
|
|
504
|
+
error: response.error,
|
|
505
|
+
transport: "dynafetch-net"
|
|
308
506
|
};
|
|
309
507
|
}
|
|
310
|
-
async function phantomFetch(payload) {
|
|
508
|
+
async function phantomFetch(payload, options = {}) {
|
|
509
|
+
await assertSafeRemoteUrl(payload.url);
|
|
311
510
|
if (process.env.PHANTOM_DISABLE_PROXY === "1" || process.env.DYNAFETCH_DISABLE_NET === "1") {
|
|
312
|
-
return directFetch(payload);
|
|
511
|
+
return directFetch(payload, options);
|
|
313
512
|
}
|
|
314
513
|
try {
|
|
315
|
-
return await dynafetchWorkerFetch(payload);
|
|
316
|
-
} catch (
|
|
514
|
+
return await dynafetchWorkerFetch(payload, options);
|
|
515
|
+
} catch (error2) {
|
|
317
516
|
if (process.env.DYNAFETCH_DISABLE_DIRECT_FALLBACK === "1") {
|
|
318
|
-
throw
|
|
517
|
+
throw error2;
|
|
319
518
|
}
|
|
320
|
-
return await directFetch(payload);
|
|
519
|
+
return await directFetch(payload, options);
|
|
321
520
|
}
|
|
322
521
|
}
|
|
323
|
-
async function phantomBatchFetch(payloads) {
|
|
522
|
+
async function phantomBatchFetch(payloads, options = {}) {
|
|
324
523
|
if (payloads.length === 0) return [];
|
|
524
|
+
await Promise.all(payloads.map((payload) => assertSafeRemoteUrl(payload.url)));
|
|
325
525
|
if (process.env.PHANTOM_DISABLE_PROXY === "1" || process.env.DYNAFETCH_DISABLE_NET === "1") {
|
|
326
|
-
return Promise.all(payloads.map((
|
|
526
|
+
return Promise.all(payloads.map((payload) => directFetch(payload, options)));
|
|
327
527
|
}
|
|
328
528
|
try {
|
|
329
529
|
const responses = await dynafetchNetBatchFetch(
|
|
330
530
|
payloads,
|
|
331
|
-
{
|
|
531
|
+
{
|
|
532
|
+
followRedirect: false,
|
|
533
|
+
rpcTimeoutMs: options.timeoutMs
|
|
534
|
+
}
|
|
332
535
|
);
|
|
333
536
|
return responses.map((r) => ({
|
|
334
537
|
status: r.status,
|
|
335
538
|
body: r.body,
|
|
336
539
|
headers: r.headers,
|
|
337
540
|
finalUrl: r.finalUrl,
|
|
338
|
-
error: r.error
|
|
541
|
+
error: r.error,
|
|
542
|
+
transport: "dynafetch-net"
|
|
339
543
|
}));
|
|
340
|
-
} catch (
|
|
544
|
+
} catch (error2) {
|
|
341
545
|
if (process.env.DYNAFETCH_DISABLE_DIRECT_FALLBACK === "1") {
|
|
342
|
-
throw
|
|
546
|
+
throw error2;
|
|
343
547
|
}
|
|
344
|
-
return Promise.all(payloads.map((
|
|
548
|
+
return Promise.all(payloads.map((payload) => directFetch(payload, options)));
|
|
345
549
|
}
|
|
346
550
|
}
|
|
347
551
|
|
|
@@ -657,6 +861,7 @@ var Executor = class {
|
|
|
657
861
|
};
|
|
658
862
|
this.executionErrors = [];
|
|
659
863
|
this.thirdPartyPolicy = "skip-noncritical";
|
|
864
|
+
this.warnings = /* @__PURE__ */ new Set();
|
|
660
865
|
// Early exit tracking
|
|
661
866
|
this.findAll = false;
|
|
662
867
|
this.fuzzyMatch = true;
|
|
@@ -673,6 +878,8 @@ var Executor = class {
|
|
|
673
878
|
this.fuzzyMatch = options.fuzzyMatch ?? true;
|
|
674
879
|
this.thirdPartyPolicy = options.thirdPartyPolicy ?? "skip-noncritical";
|
|
675
880
|
this.proxy = options.proxy;
|
|
881
|
+
this.timeoutMs = options.timeoutMs;
|
|
882
|
+
this.deadlineAt = options.deadlineAt;
|
|
676
883
|
this.applyDefaults(options.quiescence, options.moduleWaitMs);
|
|
677
884
|
}
|
|
678
885
|
if (this.targetValue !== null && this.targetValue !== void 0) {
|
|
@@ -709,6 +916,26 @@ var Executor = class {
|
|
|
709
916
|
if (!Number.isFinite(v)) return min;
|
|
710
917
|
return Math.max(min, Math.min(max, Math.trunc(v)));
|
|
711
918
|
}
|
|
919
|
+
createTimeoutError() {
|
|
920
|
+
const timeoutMs = Math.max(1, Math.ceil(this.timeoutMs ?? 1));
|
|
921
|
+
return new Error(`dynafetch timed out after ${timeoutMs}ms`);
|
|
922
|
+
}
|
|
923
|
+
remainingTimeMs() {
|
|
924
|
+
if (this.deadlineAt == null) return this.timeoutMs;
|
|
925
|
+
const remaining = this.deadlineAt - Date.now();
|
|
926
|
+
if (remaining <= 0) throw this.createTimeoutError();
|
|
927
|
+
return Math.max(1, Math.ceil(remaining));
|
|
928
|
+
}
|
|
929
|
+
boundedDurationMs(durationMs) {
|
|
930
|
+
if (this.deadlineAt == null) return durationMs;
|
|
931
|
+
const remaining = this.deadlineAt - Date.now();
|
|
932
|
+
if (remaining <= 0) return 0;
|
|
933
|
+
return Math.max(0, Math.min(durationMs, Math.ceil(remaining)));
|
|
934
|
+
}
|
|
935
|
+
recordWarning(warning) {
|
|
936
|
+
if (!warning) return;
|
|
937
|
+
this.warnings.add(warning);
|
|
938
|
+
}
|
|
712
939
|
applyDefaults(quiescence, moduleWaitMsOverride) {
|
|
713
940
|
const hardMaxCap = this.clampMs(Number(process.env.PHANTOM_QUIESCENCE_MAX_CAP_MS ?? 8e3), 500, 6e4);
|
|
714
941
|
const minWaitMs = this.clampMs(quiescence?.minWaitMs ?? 75, 0, 1e4);
|
|
@@ -839,7 +1066,7 @@ var Executor = class {
|
|
|
839
1066
|
async waitForModuleWork(timeoutMs) {
|
|
840
1067
|
const pending = Array.from(this.moduleInFlight.values());
|
|
841
1068
|
if (!pending.length) return;
|
|
842
|
-
const timeout = this.clampMs(timeoutMs, 0, 6e4);
|
|
1069
|
+
const timeout = this.clampMs(this.boundedDurationMs(timeoutMs), 0, 6e4);
|
|
843
1070
|
if (timeout === 0) return;
|
|
844
1071
|
const all = Promise.allSettled(pending).then(() => {
|
|
845
1072
|
});
|
|
@@ -856,7 +1083,11 @@ var Executor = class {
|
|
|
856
1083
|
try {
|
|
857
1084
|
this.telemetry_proxy++;
|
|
858
1085
|
const payload = { method, url, headers, headerOrder: Object.keys(headers), body, proxy: this.proxyUrlForScope(proxyScope) };
|
|
859
|
-
|
|
1086
|
+
const response = await phantomFetch(payload, {
|
|
1087
|
+
timeoutMs: this.remainingTimeMs()
|
|
1088
|
+
});
|
|
1089
|
+
this.recordWarning(response.warning);
|
|
1090
|
+
return response;
|
|
860
1091
|
} catch (e) {
|
|
861
1092
|
return { status: 0, body: e.message, headers: {}, error: e.message };
|
|
862
1093
|
}
|
|
@@ -972,10 +1203,10 @@ var Executor = class {
|
|
|
972
1203
|
const existing = this.moduleInFlight.get(cacheKey);
|
|
973
1204
|
if (existing) return existing;
|
|
974
1205
|
const p = (async () => {
|
|
975
|
-
const taskId = this.trackTaskStart("module_bundle", cacheKey, this.moduleWaitMs);
|
|
1206
|
+
const taskId = this.trackTaskStart("module_bundle", cacheKey, this.boundedDurationMs(this.moduleWaitMs));
|
|
976
1207
|
try {
|
|
977
1208
|
if (process.env.PHANTOM_DEBUG_MODULES === "1") {
|
|
978
|
-
|
|
1209
|
+
log("[Executor] Bundling module entry:", cacheKey);
|
|
979
1210
|
}
|
|
980
1211
|
const cached = this.moduleBundleCache.get(cacheKey);
|
|
981
1212
|
if (cached) {
|
|
@@ -1100,7 +1331,7 @@ var Executor = class {
|
|
|
1100
1331
|
this.moduleBundleCache.set(cacheKey, transformed);
|
|
1101
1332
|
if (!this.windowClosed) window.eval(transformed);
|
|
1102
1333
|
if (process.env.PHANTOM_DEBUG_MODULES === "1") {
|
|
1103
|
-
|
|
1334
|
+
log("[Executor] Module bundle eval complete:", cacheKey);
|
|
1104
1335
|
}
|
|
1105
1336
|
} catch (e) {
|
|
1106
1337
|
this.recordExecutionError(e, "unhandledRejection");
|
|
@@ -1437,9 +1668,9 @@ var Executor = class {
|
|
|
1437
1668
|
process.on("unhandledRejection", onNodeUnhandled);
|
|
1438
1669
|
try {
|
|
1439
1670
|
const virtualConsole = new VirtualConsole();
|
|
1440
|
-
virtualConsole.on("log", (...args) =>
|
|
1671
|
+
virtualConsole.on("log", (...args) => log("[JSDOM Log]", ...args));
|
|
1441
1672
|
virtualConsole.on("error", (...args) => console.error("[JSDOM Error]", ...args));
|
|
1442
|
-
virtualConsole.on("warn", (...args) =>
|
|
1673
|
+
virtualConsole.on("warn", (...args) => warn("[JSDOM Warn]", ...args));
|
|
1443
1674
|
const cookieJar = new CookieJar();
|
|
1444
1675
|
this.harvestData.cookies.forEach((c) => {
|
|
1445
1676
|
try {
|
|
@@ -1619,7 +1850,7 @@ var Executor = class {
|
|
|
1619
1850
|
if (that.handledModuleScriptUrls.has(abs)) return;
|
|
1620
1851
|
that.handledModuleScriptUrls.add(abs);
|
|
1621
1852
|
if (process.env.PHANTOM_DEBUG_MODULES === "1") {
|
|
1622
|
-
|
|
1853
|
+
log("[Executor] Detected module script:", abs);
|
|
1623
1854
|
}
|
|
1624
1855
|
void that.handleModuleScript(abs, window2);
|
|
1625
1856
|
return;
|
|
@@ -1778,7 +2009,7 @@ var Executor = class {
|
|
|
1778
2009
|
});
|
|
1779
2010
|
}
|
|
1780
2011
|
} catch (e) {
|
|
1781
|
-
|
|
2012
|
+
warn(`[Executor] Module script ${script.id} failed:`, e);
|
|
1782
2013
|
} finally {
|
|
1783
2014
|
currentScriptState.value = prevCurrentScript;
|
|
1784
2015
|
}
|
|
@@ -1794,7 +2025,7 @@ var Executor = class {
|
|
|
1794
2025
|
try {
|
|
1795
2026
|
window.eval(code);
|
|
1796
2027
|
} catch (e) {
|
|
1797
|
-
|
|
2028
|
+
warn(`[Executor] Script ${script.id} failed:`, e);
|
|
1798
2029
|
} finally {
|
|
1799
2030
|
currentScriptState.value = prevCurrentScript;
|
|
1800
2031
|
}
|
|
@@ -1834,16 +2065,16 @@ var Executor = class {
|
|
|
1834
2065
|
if (this.moduleInFlight.size > 0) {
|
|
1835
2066
|
await this.waitForModuleWork(this.moduleWaitMs);
|
|
1836
2067
|
}
|
|
1837
|
-
|
|
2068
|
+
log("[Executor] Waiting for network quiescence...");
|
|
1838
2069
|
const quiescenceStart = Date.now();
|
|
1839
2070
|
try {
|
|
1840
2071
|
await this.waitForQuiescence();
|
|
1841
2072
|
} catch (e) {
|
|
1842
|
-
|
|
2073
|
+
warn("[Executor] Quiescence wait failed:", e);
|
|
1843
2074
|
}
|
|
1844
2075
|
this.timings.quiescence_ms = Date.now() - quiescenceStart;
|
|
1845
2076
|
const reason = this.matchFound && !this.findAll ? "(early exit on match)" : "";
|
|
1846
|
-
|
|
2077
|
+
log(`[Executor] Quiescence reached in ${Date.now() - quiescenceStart}ms ${reason}`);
|
|
1847
2078
|
const renderedHtml = this.serializeDocument(window);
|
|
1848
2079
|
this.windowClosed = true;
|
|
1849
2080
|
try {
|
|
@@ -1855,7 +2086,8 @@ var Executor = class {
|
|
|
1855
2086
|
matchedRequests: this.earlyMatches,
|
|
1856
2087
|
renderedHtml,
|
|
1857
2088
|
timings: { ...this.timings },
|
|
1858
|
-
errors: this.executionErrors.length ? this.executionErrors : void 0
|
|
2089
|
+
errors: this.executionErrors.length ? this.executionErrors : void 0,
|
|
2090
|
+
warnings: Array.from(this.warnings)
|
|
1859
2091
|
};
|
|
1860
2092
|
const shutdownGraceMs = this.clampMs(Number(process.env.PHANTOM_SHUTDOWN_GRACE_MS ?? 50), 10, 5e3);
|
|
1861
2093
|
await new Promise((r) => setTimeout(r, shutdownGraceMs));
|
|
@@ -2106,7 +2338,7 @@ var Executor = class {
|
|
|
2106
2338
|
this.asyncFlag = async !== false;
|
|
2107
2339
|
this.aborted = false;
|
|
2108
2340
|
if (process.env.PHANTOM_DEBUG_XHR === "1") {
|
|
2109
|
-
|
|
2341
|
+
log("[XHR open]", this.method, this.url);
|
|
2110
2342
|
}
|
|
2111
2343
|
this.readyState = 1;
|
|
2112
2344
|
this.responseURL = this.url;
|
|
@@ -2159,7 +2391,7 @@ var Executor = class {
|
|
|
2159
2391
|
};
|
|
2160
2392
|
that.logRequest(logEntry);
|
|
2161
2393
|
if (process.env.PHANTOM_DEBUG_XHR === "1") {
|
|
2162
|
-
|
|
2394
|
+
log("[XHR send]", this.method, this.url, {
|
|
2163
2395
|
hasBody: body != null,
|
|
2164
2396
|
headers
|
|
2165
2397
|
});
|
|
@@ -2326,7 +2558,9 @@ async function prefetchModuleGraph(rootUrls, cache, pageUrl, opts) {
|
|
|
2326
2558
|
}
|
|
2327
2559
|
for (let round = 0; round < maxRounds && toFetch.length > 0; round++) {
|
|
2328
2560
|
if (process.env.PHANTOM_DEBUG_MODULES === "1") {
|
|
2329
|
-
|
|
2561
|
+
if (process.env.DYNAFETCH_DEBUG === "1") {
|
|
2562
|
+
console.log(`[prefetch] Round ${round}: ${toFetch.length} modules`);
|
|
2563
|
+
}
|
|
2330
2564
|
}
|
|
2331
2565
|
const payloads = toFetch.map((u) => ({
|
|
2332
2566
|
method: "GET",
|
|
@@ -2336,11 +2570,12 @@ async function prefetchModuleGraph(rootUrls, cache, pageUrl, opts) {
|
|
|
2336
2570
|
body: "",
|
|
2337
2571
|
proxy: opts?.proxyUrl
|
|
2338
2572
|
}));
|
|
2339
|
-
const responses = await phantomBatchFetch(payloads);
|
|
2573
|
+
const responses = await phantomBatchFetch(payloads, { timeoutMs: opts?.timeoutMs });
|
|
2340
2574
|
const newToScan = [];
|
|
2341
2575
|
for (let i = 0; i < toFetch.length; i++) {
|
|
2342
2576
|
const u = toFetch[i];
|
|
2343
2577
|
const r = responses[i];
|
|
2578
|
+
if (r.warning) opts?.onWarning?.(r.warning);
|
|
2344
2579
|
if (r.status < 400 && r.body) {
|
|
2345
2580
|
cache.set(u, r.body);
|
|
2346
2581
|
totalFetched++;
|
|
@@ -2378,6 +2613,7 @@ var Harvester = class {
|
|
|
2378
2613
|
this.requestHeaders = {};
|
|
2379
2614
|
this.thirdPartyPolicy = "skip-noncritical";
|
|
2380
2615
|
this.prefetchModulePreloads = true;
|
|
2616
|
+
this.warnings = /* @__PURE__ */ new Set();
|
|
2381
2617
|
this.targetUrl = url;
|
|
2382
2618
|
this.prefetchExternalScripts = opts.prefetchExternalScripts !== false;
|
|
2383
2619
|
this.externalScriptConcurrency = opts.externalScriptConcurrency ?? 8;
|
|
@@ -2386,6 +2622,8 @@ var Harvester = class {
|
|
|
2386
2622
|
this.thirdPartyPolicy = opts.thirdPartyPolicy ?? "skip-noncritical";
|
|
2387
2623
|
this.prefetchModulePreloads = opts.prefetchModulePreloads !== false;
|
|
2388
2624
|
this.proxy = opts.proxy;
|
|
2625
|
+
this.timeoutMs = opts.timeoutMs;
|
|
2626
|
+
this.deadlineAt = opts.deadlineAt;
|
|
2389
2627
|
}
|
|
2390
2628
|
proxyUrlForScope(scope) {
|
|
2391
2629
|
if (!this.proxy) return void 0;
|
|
@@ -2401,6 +2639,20 @@ var Harvester = class {
|
|
|
2401
2639
|
}
|
|
2402
2640
|
return pairs.join("; ");
|
|
2403
2641
|
}
|
|
2642
|
+
createTimeoutError() {
|
|
2643
|
+
const timeoutMs = Math.max(1, Math.ceil(this.timeoutMs ?? 1));
|
|
2644
|
+
return new Error(`dynafetch timed out after ${timeoutMs}ms`);
|
|
2645
|
+
}
|
|
2646
|
+
remainingTimeMs() {
|
|
2647
|
+
if (this.deadlineAt == null) return this.timeoutMs;
|
|
2648
|
+
const remaining = this.deadlineAt - Date.now();
|
|
2649
|
+
if (remaining <= 0) throw this.createTimeoutError();
|
|
2650
|
+
return Math.max(1, Math.ceil(remaining));
|
|
2651
|
+
}
|
|
2652
|
+
recordWarning(warning) {
|
|
2653
|
+
if (!warning) return;
|
|
2654
|
+
this.warnings.add(warning);
|
|
2655
|
+
}
|
|
2404
2656
|
async fetchViaProxy(url, headers = {}, followRedirects = false, maxRedirects = 5, method = "GET", body = "", proxyScope = "page") {
|
|
2405
2657
|
let currentUrl = url;
|
|
2406
2658
|
let redirectCount = 0;
|
|
@@ -2421,7 +2673,10 @@ var Harvester = class {
|
|
|
2421
2673
|
body: currentBody,
|
|
2422
2674
|
proxy: this.proxyUrlForScope(proxyScope)
|
|
2423
2675
|
};
|
|
2424
|
-
const data = await phantomFetch(payload
|
|
2676
|
+
const data = await phantomFetch(payload, {
|
|
2677
|
+
timeoutMs: this.remainingTimeMs()
|
|
2678
|
+
});
|
|
2679
|
+
this.recordWarning(data.warning);
|
|
2425
2680
|
if (data.error) throw new Error(`Proxy Error: ${data.error}`);
|
|
2426
2681
|
const setCookie = data.headers["Set-Cookie"] || data.headers["set-cookie"];
|
|
2427
2682
|
if (setCookie) this.cookies.push(setCookie);
|
|
@@ -2429,7 +2684,7 @@ var Harvester = class {
|
|
|
2429
2684
|
const location = data.headers["Location"] || data.headers["location"];
|
|
2430
2685
|
if (location) {
|
|
2431
2686
|
currentUrl = new URL(location, currentUrl).toString();
|
|
2432
|
-
|
|
2687
|
+
log(`[Harvest] Following redirect to: ${currentUrl}`);
|
|
2433
2688
|
redirectCount++;
|
|
2434
2689
|
if (data.status === 302 || data.status === 303) {
|
|
2435
2690
|
currentMethod = "GET";
|
|
@@ -2500,10 +2755,10 @@ var Harvester = class {
|
|
|
2500
2755
|
return b.includes("just a moment") || b.includes("challenge-platform") || b.includes("__cf_chl") || b.includes("cf-browser-verification") || b.includes("enable javascript and cookies to continue") || b.includes("security verification") || b.includes("captcha") || b.includes("trkcode=") || b.includes("trkinfo=");
|
|
2501
2756
|
}
|
|
2502
2757
|
async harvest() {
|
|
2503
|
-
|
|
2758
|
+
log(`[Harvest] Fetching ${this.targetUrl} via TLS Proxy...`);
|
|
2504
2759
|
let response = await this.fetchViaProxy(this.targetUrl, { ...DEFAULT_HEADERS, ...this.requestHeaders }, true);
|
|
2505
2760
|
if (response.status >= 400) {
|
|
2506
|
-
|
|
2761
|
+
log(`[Harvest] Response Body on Error:`, response.body.substring(0, 500));
|
|
2507
2762
|
if (this.looksBlocked(response.status, response.body || "")) {
|
|
2508
2763
|
throw new BlockedByBotProtectionError(
|
|
2509
2764
|
this.targetUrl,
|
|
@@ -2515,7 +2770,7 @@ var Harvester = class {
|
|
|
2515
2770
|
let finalUrl = response.finalUrl;
|
|
2516
2771
|
let html = response.body;
|
|
2517
2772
|
if (this.isConsentWall(finalUrl, html)) {
|
|
2518
|
-
|
|
2773
|
+
log(`[Harvest] Consent wall detected at ${finalUrl}, attempting bypass...`);
|
|
2519
2774
|
const form = this.parseConsentForm(html, finalUrl);
|
|
2520
2775
|
if (form) {
|
|
2521
2776
|
try {
|
|
@@ -2529,32 +2784,32 @@ var Harvester = class {
|
|
|
2529
2784
|
};
|
|
2530
2785
|
const consentResp = await this.fetchViaProxy(form.action, postHeaders, true, 10, "POST", formBody);
|
|
2531
2786
|
if (consentResp.status < 400) {
|
|
2532
|
-
|
|
2787
|
+
log(`[Harvest] Consent POST succeeded (${consentResp.status}), final URL: ${consentResp.finalUrl}`);
|
|
2533
2788
|
if (!this.isConsentWall(consentResp.finalUrl, consentResp.body)) {
|
|
2534
2789
|
response = consentResp;
|
|
2535
2790
|
finalUrl = consentResp.finalUrl;
|
|
2536
2791
|
html = consentResp.body;
|
|
2537
|
-
|
|
2792
|
+
log(`[Harvest] Consent bypass successful (from redirect), got real page at ${finalUrl}`);
|
|
2538
2793
|
} else {
|
|
2539
|
-
|
|
2794
|
+
log(`[Harvest] Consent redirect still on consent page, re-fetching original URL...`);
|
|
2540
2795
|
const retryResp = await this.fetchViaProxy(this.targetUrl, { ...DEFAULT_HEADERS, ...this.requestHeaders }, true);
|
|
2541
2796
|
if (retryResp.status < 400 && !this.isConsentWall(retryResp.finalUrl, retryResp.body)) {
|
|
2542
2797
|
response = retryResp;
|
|
2543
2798
|
finalUrl = retryResp.finalUrl;
|
|
2544
2799
|
html = retryResp.body;
|
|
2545
|
-
|
|
2800
|
+
log(`[Harvest] Consent bypass successful (re-fetch), got real page at ${finalUrl}`);
|
|
2546
2801
|
} else {
|
|
2547
|
-
|
|
2802
|
+
warn(`[Harvest] Re-fetch after consent still returned consent wall, proceeding with original`);
|
|
2548
2803
|
}
|
|
2549
2804
|
}
|
|
2550
2805
|
} else {
|
|
2551
|
-
|
|
2806
|
+
warn(`[Harvest] Consent POST returned ${consentResp.status}, proceeding with consent page`);
|
|
2552
2807
|
}
|
|
2553
2808
|
} catch (e) {
|
|
2554
|
-
|
|
2809
|
+
warn(`[Harvest] Consent bypass failed, proceeding with consent page:`, e);
|
|
2555
2810
|
}
|
|
2556
2811
|
} else {
|
|
2557
|
-
|
|
2812
|
+
warn(`[Harvest] Could not parse consent form, proceeding with consent page`);
|
|
2558
2813
|
}
|
|
2559
2814
|
}
|
|
2560
2815
|
const $ = cheerio.load(html);
|
|
@@ -2663,11 +2918,14 @@ var Harvester = class {
|
|
|
2663
2918
|
}));
|
|
2664
2919
|
const allPayloads = [...scriptPayloads, ...preloadPayloads];
|
|
2665
2920
|
if (allPayloads.length > 0) {
|
|
2666
|
-
|
|
2667
|
-
const allResponses = await phantomBatchFetch(allPayloads
|
|
2921
|
+
log(`[Harvest] Batch-fetching ${scriptPayloads.length} scripts + ${preloadPayloads.length} modulepreloads...`);
|
|
2922
|
+
const allResponses = await phantomBatchFetch(allPayloads, {
|
|
2923
|
+
timeoutMs: this.remainingTimeMs()
|
|
2924
|
+
});
|
|
2668
2925
|
for (let i = 0; i < batchScriptMeta.length; i++) {
|
|
2669
2926
|
const meta = batchScriptMeta[i];
|
|
2670
2927
|
const resp = allResponses[i];
|
|
2928
|
+
this.recordWarning(resp.warning);
|
|
2671
2929
|
const logEntry = {
|
|
2672
2930
|
type: "resource_load",
|
|
2673
2931
|
url: meta.absoluteUrl,
|
|
@@ -2690,12 +2948,13 @@ var Harvester = class {
|
|
|
2690
2948
|
execution: meta.execution
|
|
2691
2949
|
});
|
|
2692
2950
|
} else {
|
|
2693
|
-
|
|
2951
|
+
warn(`[Harvest] Failed to fetch script ${meta.absoluteUrl}: status ${resp.status}`);
|
|
2694
2952
|
}
|
|
2695
2953
|
}
|
|
2696
2954
|
for (let i = 0; i < modulePreloadUrls.length; i++) {
|
|
2697
2955
|
const url = modulePreloadUrls[i];
|
|
2698
2956
|
const resp = allResponses[batchScriptMeta.length + i];
|
|
2957
|
+
this.recordWarning(resp.warning);
|
|
2699
2958
|
const logEntry = {
|
|
2700
2959
|
type: "resource_load",
|
|
2701
2960
|
url,
|
|
@@ -2760,7 +3019,9 @@ var Harvester = class {
|
|
|
2760
3019
|
if (moduleEntryUrls.length > 0 || modulePreloads.length > 0) {
|
|
2761
3020
|
const rootUrls = [...moduleEntryUrls, ...modulePreloads.map((mp) => mp.url)];
|
|
2762
3021
|
await prefetchModuleGraph(rootUrls, moduleGraphCache, finalUrl, {
|
|
2763
|
-
proxyUrl: this.proxyUrlForScope("assets")
|
|
3022
|
+
proxyUrl: this.proxyUrlForScope("assets"),
|
|
3023
|
+
timeoutMs: this.remainingTimeMs(),
|
|
3024
|
+
onWarning: (warning) => this.recordWarning(warning)
|
|
2764
3025
|
});
|
|
2765
3026
|
}
|
|
2766
3027
|
return {
|
|
@@ -2774,7 +3035,8 @@ var Harvester = class {
|
|
|
2774
3035
|
cookies: this.cookies,
|
|
2775
3036
|
headers: response.headers,
|
|
2776
3037
|
logs: this.logs,
|
|
2777
|
-
moduleGraphCache
|
|
3038
|
+
moduleGraphCache,
|
|
3039
|
+
warnings: Array.from(this.warnings)
|
|
2778
3040
|
};
|
|
2779
3041
|
}
|
|
2780
3042
|
};
|
|
@@ -2851,27 +3113,6 @@ var DynafetchInputError = class extends Error {
|
|
|
2851
3113
|
this.status = status;
|
|
2852
3114
|
}
|
|
2853
3115
|
};
|
|
2854
|
-
function isPrivateOrLocalHost(hostname) {
|
|
2855
|
-
const h = hostname.toLowerCase();
|
|
2856
|
-
if (h === "localhost" || h.endsWith(".localhost") || h === "0.0.0.0") return true;
|
|
2857
|
-
if (h === "metadata.google.internal") return true;
|
|
2858
|
-
const ipVer = net.isIP(h);
|
|
2859
|
-
if (!ipVer) return false;
|
|
2860
|
-
if (ipVer === 4) {
|
|
2861
|
-
const [a, b] = h.split(".").map((x) => Number(x));
|
|
2862
|
-
if (a === 10) return true;
|
|
2863
|
-
if (a === 127) return true;
|
|
2864
|
-
if (a === 0) return true;
|
|
2865
|
-
if (a === 169 && b === 254) return true;
|
|
2866
|
-
if (a === 172 && b >= 16 && b <= 31) return true;
|
|
2867
|
-
if (a === 192 && b === 168) return true;
|
|
2868
|
-
return false;
|
|
2869
|
-
}
|
|
2870
|
-
if (h === "::1") return true;
|
|
2871
|
-
if (h.startsWith("fe80:")) return true;
|
|
2872
|
-
if (h.startsWith("fc") || h.startsWith("fd")) return true;
|
|
2873
|
-
return false;
|
|
2874
|
-
}
|
|
2875
3116
|
function normalizeProxy(input) {
|
|
2876
3117
|
if (!input) return void 0;
|
|
2877
3118
|
if (typeof input === "string") {
|
|
@@ -2901,15 +3142,10 @@ function normalizeOptions(input) {
|
|
|
2901
3142
|
}
|
|
2902
3143
|
let parsedUrl;
|
|
2903
3144
|
try {
|
|
2904
|
-
parsedUrl =
|
|
2905
|
-
} catch {
|
|
2906
|
-
|
|
2907
|
-
|
|
2908
|
-
if (parsedUrl.protocol !== "http:" && parsedUrl.protocol !== "https:") {
|
|
2909
|
-
throw new DynafetchInputError("Only http(s) URLs are allowed");
|
|
2910
|
-
}
|
|
2911
|
-
if (isPrivateOrLocalHost(parsedUrl.hostname)) {
|
|
2912
|
-
throw new DynafetchInputError("Refusing to fetch local/private addresses");
|
|
3145
|
+
parsedUrl = assertSafeHttpUrlSync(options.url);
|
|
3146
|
+
} catch (error2) {
|
|
3147
|
+
const message = error2 instanceof Error ? error2.message : "Invalid URL";
|
|
3148
|
+
throw new DynafetchInputError(message);
|
|
2913
3149
|
}
|
|
2914
3150
|
return {
|
|
2915
3151
|
...options,
|
|
@@ -2921,8 +3157,13 @@ function normalizeOptions(input) {
|
|
|
2921
3157
|
thirdPartyPolicy: options.thirdPartyPolicy ?? "skip-noncritical"
|
|
2922
3158
|
};
|
|
2923
3159
|
}
|
|
2924
|
-
function toWarnings(plan, errors, options) {
|
|
3160
|
+
function toWarnings(plan, errors, options, runtimeWarnings = []) {
|
|
2925
3161
|
const warnings = [plan.reason];
|
|
3162
|
+
for (const warning of runtimeWarnings) {
|
|
3163
|
+
if (warning && !warnings.includes(warning)) {
|
|
3164
|
+
warnings.push(warning);
|
|
3165
|
+
}
|
|
3166
|
+
}
|
|
2926
3167
|
if (plan.strategy === "jsdom-fallback" || plan.strategy === "framework-probe") {
|
|
2927
3168
|
warnings.push("runtime execution used the legacy JSDOM-based renderer while lightweight adapters are still being built");
|
|
2928
3169
|
}
|
|
@@ -2933,8 +3174,8 @@ function toWarnings(plan, errors, options) {
|
|
|
2933
3174
|
warnings.push("non-critical third-party scripts are skipped on the critical render path");
|
|
2934
3175
|
}
|
|
2935
3176
|
if (errors?.length) {
|
|
2936
|
-
for (const
|
|
2937
|
-
warnings.push(`${
|
|
3177
|
+
for (const error2 of errors.slice(0, 3)) {
|
|
3178
|
+
warnings.push(`${error2.source}: ${error2.message}`);
|
|
2938
3179
|
}
|
|
2939
3180
|
}
|
|
2940
3181
|
return warnings;
|
|
@@ -2956,85 +3197,125 @@ function computeConfidence(params) {
|
|
|
2956
3197
|
confidence -= Math.min(0.28, params.executionErrors * 0.07);
|
|
2957
3198
|
return Math.max(0.05, Math.min(0.98, Number(confidence.toFixed(2))));
|
|
2958
3199
|
}
|
|
3200
|
+
function createTimeoutError(timeoutMs) {
|
|
3201
|
+
const error2 = new Error(`dynafetch timed out after ${timeoutMs}ms`);
|
|
3202
|
+
error2.name = "DynafetchTimeoutError";
|
|
3203
|
+
return error2;
|
|
3204
|
+
}
|
|
3205
|
+
async function withOperationTimeout(operation, timeoutMs) {
|
|
3206
|
+
if (!timeoutMs || !Number.isFinite(timeoutMs)) {
|
|
3207
|
+
return await operation;
|
|
3208
|
+
}
|
|
3209
|
+
return await new Promise((resolve, reject) => {
|
|
3210
|
+
const timer = setTimeout(() => reject(createTimeoutError(Math.max(1, Math.ceil(timeoutMs)))), Math.max(1, Math.ceil(timeoutMs)));
|
|
3211
|
+
timer.unref?.();
|
|
3212
|
+
operation.then(
|
|
3213
|
+
(value) => {
|
|
3214
|
+
clearTimeout(timer);
|
|
3215
|
+
resolve(value);
|
|
3216
|
+
},
|
|
3217
|
+
(error2) => {
|
|
3218
|
+
clearTimeout(timer);
|
|
3219
|
+
reject(error2);
|
|
3220
|
+
}
|
|
3221
|
+
);
|
|
3222
|
+
});
|
|
3223
|
+
}
|
|
2959
3224
|
async function dynafetch(input) {
|
|
2960
3225
|
const options = normalizeOptions(input);
|
|
2961
3226
|
const timeoutSeconds = options.timeoutMs ? Math.max(1, Math.ceil(options.timeoutMs / 1e3)) : void 0;
|
|
3227
|
+
const deadlineAt = options.timeoutMs ? Date.now() + options.timeoutMs : void 0;
|
|
2962
3228
|
const initialCookies = normalizeCookies(options.cookies);
|
|
2963
3229
|
const proxy = normalizeProxy(options.proxy);
|
|
2964
|
-
return await
|
|
2965
|
-
|
|
2966
|
-
|
|
2967
|
-
|
|
2968
|
-
|
|
2969
|
-
|
|
2970
|
-
|
|
2971
|
-
|
|
2972
|
-
|
|
2973
|
-
|
|
2974
|
-
|
|
2975
|
-
|
|
2976
|
-
|
|
2977
|
-
|
|
2978
|
-
|
|
2979
|
-
|
|
2980
|
-
});
|
|
2981
|
-
const harvest = await harvester.harvest();
|
|
2982
|
-
const harvestMs = Date.now() - harvestStart;
|
|
2983
|
-
const framework = detectFramework(harvest);
|
|
2984
|
-
const plan = planDynafetch(framework, harvest, options.allowJsdomFallback !== false);
|
|
2985
|
-
let html = harvest.html;
|
|
2986
|
-
let requestCount = harvest.logs.length;
|
|
2987
|
-
let executionErrors;
|
|
2988
|
-
let executeMs = 0;
|
|
2989
|
-
let quiescenceMs = 0;
|
|
2990
|
-
let scriptsTransformed = 0;
|
|
2991
|
-
if (plan.strategy !== "static-html") {
|
|
2992
|
-
const executeStart = Date.now();
|
|
2993
|
-
const executor = new Executor(harvest, {
|
|
3230
|
+
return await withOperationTimeout(
|
|
3231
|
+
withDynafetchSession(
|
|
3232
|
+
{
|
|
3233
|
+
browserProfile: options.browserProfile,
|
|
3234
|
+
timeoutSeconds,
|
|
3235
|
+
proxy: proxy?.url,
|
|
3236
|
+
rpcTimeoutMs: options.timeoutMs
|
|
3237
|
+
},
|
|
3238
|
+
async () => {
|
|
3239
|
+
const totalStart = Date.now();
|
|
3240
|
+
const harvestStart = Date.now();
|
|
3241
|
+
const harvester = new Harvester(options.url, {
|
|
3242
|
+
prefetchExternalScripts: options.prefetchExternalScripts,
|
|
3243
|
+
prefetchModulePreloads: options.prefetchModulePreloads,
|
|
3244
|
+
requestHeaders: options.headers,
|
|
3245
|
+
initialCookies,
|
|
2994
3246
|
thirdPartyPolicy: options.thirdPartyPolicy,
|
|
2995
|
-
|
|
2996
|
-
|
|
2997
|
-
|
|
2998
|
-
maxWaitMs: options.maxWaitMs
|
|
2999
|
-
},
|
|
3000
|
-
moduleWaitMs: options.moduleWaitMs,
|
|
3001
|
-
proxy
|
|
3247
|
+
proxy,
|
|
3248
|
+
timeoutMs: options.timeoutMs,
|
|
3249
|
+
deadlineAt
|
|
3002
3250
|
});
|
|
3003
|
-
const
|
|
3004
|
-
|
|
3005
|
-
|
|
3006
|
-
|
|
3007
|
-
|
|
3008
|
-
|
|
3009
|
-
|
|
3010
|
-
|
|
3011
|
-
|
|
3012
|
-
|
|
3013
|
-
|
|
3014
|
-
plan
|
|
3015
|
-
|
|
3016
|
-
|
|
3017
|
-
|
|
3018
|
-
|
|
3019
|
-
|
|
3020
|
-
|
|
3021
|
-
|
|
3022
|
-
|
|
3023
|
-
|
|
3024
|
-
|
|
3025
|
-
|
|
3026
|
-
|
|
3027
|
-
|
|
3028
|
-
|
|
3029
|
-
|
|
3030
|
-
harvest
|
|
3031
|
-
|
|
3032
|
-
|
|
3033
|
-
|
|
3034
|
-
|
|
3035
|
-
|
|
3036
|
-
|
|
3037
|
-
|
|
3251
|
+
const harvest = await harvester.harvest();
|
|
3252
|
+
const harvestMs = Date.now() - harvestStart;
|
|
3253
|
+
const framework = detectFramework(harvest);
|
|
3254
|
+
const plan = planDynafetch(framework, harvest, options.allowJsdomFallback !== false);
|
|
3255
|
+
let html = harvest.html;
|
|
3256
|
+
let requestCount = harvest.logs.length;
|
|
3257
|
+
let executionErrors;
|
|
3258
|
+
let executionWarnings = [];
|
|
3259
|
+
let executeMs = 0;
|
|
3260
|
+
let quiescenceMs = 0;
|
|
3261
|
+
let scriptsTransformed = 0;
|
|
3262
|
+
if (plan.strategy !== "static-html") {
|
|
3263
|
+
const executeStart = Date.now();
|
|
3264
|
+
const executor = new Executor(harvest, {
|
|
3265
|
+
thirdPartyPolicy: options.thirdPartyPolicy,
|
|
3266
|
+
quiescence: {
|
|
3267
|
+
minWaitMs: options.minWaitMs,
|
|
3268
|
+
idleWaitMs: options.idleWaitMs,
|
|
3269
|
+
maxWaitMs: options.maxWaitMs
|
|
3270
|
+
},
|
|
3271
|
+
moduleWaitMs: options.moduleWaitMs,
|
|
3272
|
+
proxy,
|
|
3273
|
+
timeoutMs: options.timeoutMs,
|
|
3274
|
+
deadlineAt
|
|
3275
|
+
});
|
|
3276
|
+
const execution = await executor.execute();
|
|
3277
|
+
executeMs = Date.now() - executeStart;
|
|
3278
|
+
html = execution.renderedHtml ?? harvest.html;
|
|
3279
|
+
requestCount = execution.logs.length;
|
|
3280
|
+
executionErrors = execution.errors;
|
|
3281
|
+
executionWarnings = execution.warnings ?? [];
|
|
3282
|
+
quiescenceMs = execution.timings?.quiescence_ms ?? 0;
|
|
3283
|
+
scriptsTransformed = execution.timings?.scripts_transformed_count ?? 0;
|
|
3284
|
+
}
|
|
3285
|
+
const totalMs = Date.now() - totalStart;
|
|
3286
|
+
const warnings = toWarnings(
|
|
3287
|
+
plan,
|
|
3288
|
+
executionErrors,
|
|
3289
|
+
options,
|
|
3290
|
+
[...harvest.warnings ?? [], ...executionWarnings]
|
|
3291
|
+
);
|
|
3292
|
+
const confidence = computeConfidence({
|
|
3293
|
+
plan,
|
|
3294
|
+
initialStateCount: Object.keys(harvest.initialState).length,
|
|
3295
|
+
executionErrors: executionErrors?.length ?? 0,
|
|
3296
|
+
htmlLength: html.length
|
|
3297
|
+
});
|
|
3298
|
+
return {
|
|
3299
|
+
url: options.url,
|
|
3300
|
+
finalUrl: harvest.url,
|
|
3301
|
+
status: harvest.status,
|
|
3302
|
+
html,
|
|
3303
|
+
framework,
|
|
3304
|
+
strategy: plan.strategy,
|
|
3305
|
+
confidence,
|
|
3306
|
+
warnings,
|
|
3307
|
+
timings: {
|
|
3308
|
+
total: totalMs,
|
|
3309
|
+
harvest: harvestMs,
|
|
3310
|
+
execute: executeMs,
|
|
3311
|
+
quiescence: quiescenceMs,
|
|
3312
|
+
scriptsTransformed
|
|
3313
|
+
},
|
|
3314
|
+
requestCount
|
|
3315
|
+
};
|
|
3316
|
+
}
|
|
3317
|
+
),
|
|
3318
|
+
options.timeoutMs
|
|
3038
3319
|
);
|
|
3039
3320
|
}
|
|
3040
3321
|
export {
|