@grabbit-labs/dynafetch 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/bin/dynafetch-net-darwin-arm64 +0 -0
- package/bin/dynafetch-net-darwin-x64 +0 -0
- package/bin/dynafetch-net-linux-arm64 +0 -0
- package/bin/dynafetch-net-linux-x64 +0 -0
- package/bin/dynafetch-net-win32-x64.exe +0 -0
- package/dist/index.d.ts +61 -118
- package/dist/index.js +464 -154
- package/dist/index.js.map +4 -4
- package/package.json +12 -2
package/dist/index.js
CHANGED
|
@@ -1,13 +1,4 @@
|
|
|
1
1
|
import { createRequire } from "node:module"; import { fileURLToPath as __fileURLToPath } from "node:url"; import { dirname as __dirname_fn } from "node:path"; const __filename = __fileURLToPath(import.meta.url); const __dirname = __dirname_fn(__filename); const require = createRequire(import.meta.url);
|
|
2
|
-
var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
|
|
3
|
-
get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
|
|
4
|
-
}) : x)(function(x) {
|
|
5
|
-
if (typeof require !== "undefined") return require.apply(this, arguments);
|
|
6
|
-
throw Error('Dynamic require of "' + x + '" is not supported');
|
|
7
|
-
});
|
|
8
|
-
|
|
9
|
-
// ../dynafetch-core/src/index.ts
|
|
10
|
-
import * as net from "node:net";
|
|
11
2
|
|
|
12
3
|
// ../../src/phantom/log.ts
|
|
13
4
|
var enabled = process.env.DYNAFETCH_DEBUG === "1";
|
|
@@ -123,29 +114,32 @@ var Transformer = class {
|
|
|
123
114
|
import { AsyncLocalStorage } from "node:async_hooks";
|
|
124
115
|
import { spawn } from "node:child_process";
|
|
125
116
|
import { randomUUID } from "node:crypto";
|
|
117
|
+
import { accessSync, constants } from "node:fs";
|
|
126
118
|
import path2 from "node:path";
|
|
127
119
|
import readline from "node:readline";
|
|
120
|
+
import { fileURLToPath } from "node:url";
|
|
128
121
|
var sessionStore = new AsyncLocalStorage();
|
|
129
122
|
var transportPromise = null;
|
|
123
|
+
var workerDir = path2.dirname(fileURLToPath(import.meta.url));
|
|
130
124
|
function findPrecompiledBinary() {
|
|
131
125
|
const platform = process.platform;
|
|
132
|
-
const arch = process.arch === "x64" ? "x64" : "arm64";
|
|
126
|
+
const arch = process.arch === "x64" ? "x64" : process.arch === "arm64" ? "arm64" : null;
|
|
127
|
+
if (!arch) return null;
|
|
133
128
|
const ext = platform === "win32" ? ".exe" : "";
|
|
134
129
|
const name = `dynafetch-net-${platform}-${arch}${ext}`;
|
|
135
130
|
const candidates = [
|
|
136
|
-
path2.resolve(
|
|
131
|
+
path2.resolve(workerDir, "../bin", name),
|
|
137
132
|
// installed: dist/../bin
|
|
138
|
-
path2.resolve(
|
|
133
|
+
path2.resolve(workerDir, "../../../dynafetch-net/bin", name),
|
|
139
134
|
// dev: dynafetch-core/src/net -> dynafetch-net/bin
|
|
140
|
-
path2.resolve(
|
|
135
|
+
path2.resolve(workerDir, "../../../../packages/dynafetch-net/bin", name),
|
|
141
136
|
// dev: alt layout
|
|
142
137
|
path2.resolve(process.cwd(), "packages/dynafetch-net/bin", name)
|
|
143
138
|
// dev: from workspace root
|
|
144
139
|
];
|
|
145
140
|
for (const candidate of candidates) {
|
|
146
141
|
try {
|
|
147
|
-
|
|
148
|
-
fs2.accessSync(candidate, fs2.constants.X_OK);
|
|
142
|
+
accessSync(candidate, constants.X_OK);
|
|
149
143
|
return candidate;
|
|
150
144
|
} catch {
|
|
151
145
|
}
|
|
@@ -182,6 +176,31 @@ function createWorkerTransport() {
|
|
|
182
176
|
);
|
|
183
177
|
}
|
|
184
178
|
const pending = /* @__PURE__ */ new Map();
|
|
179
|
+
let holdCount = 0;
|
|
180
|
+
const updateRef = () => {
|
|
181
|
+
if (pending.size === 0 && holdCount === 0) {
|
|
182
|
+
child.unref();
|
|
183
|
+
child.stdin.unref?.();
|
|
184
|
+
child.stdout.unref?.();
|
|
185
|
+
child.stderr.unref?.();
|
|
186
|
+
} else {
|
|
187
|
+
child.ref();
|
|
188
|
+
child.stdin.ref?.();
|
|
189
|
+
child.stdout.ref?.();
|
|
190
|
+
child.stderr.ref?.();
|
|
191
|
+
}
|
|
192
|
+
};
|
|
193
|
+
const hold = () => {
|
|
194
|
+
holdCount++;
|
|
195
|
+
updateRef();
|
|
196
|
+
};
|
|
197
|
+
const release = () => {
|
|
198
|
+
holdCount = Math.max(0, holdCount - 1);
|
|
199
|
+
updateRef();
|
|
200
|
+
};
|
|
201
|
+
child.stdin.on("error", () => {
|
|
202
|
+
});
|
|
203
|
+
updateRef();
|
|
185
204
|
const rl = readline.createInterface({ input: child.stdout });
|
|
186
205
|
rl.on("line", (line) => {
|
|
187
206
|
const trimmed = line.trim();
|
|
@@ -194,11 +213,13 @@ function createWorkerTransport() {
|
|
|
194
213
|
entry.reject(new Error(`Invalid dynafetch-net response: ${String(error2)}`));
|
|
195
214
|
}
|
|
196
215
|
pending.clear();
|
|
216
|
+
updateRef();
|
|
197
217
|
return;
|
|
198
218
|
}
|
|
199
219
|
const request = pending.get(payload.id);
|
|
200
220
|
if (!request) return;
|
|
201
221
|
pending.delete(payload.id);
|
|
222
|
+
updateRef();
|
|
202
223
|
if (payload.error) {
|
|
203
224
|
request.reject(new Error(payload.error.message || payload.error.code || "dynafetch-net request failed"));
|
|
204
225
|
return;
|
|
@@ -212,9 +233,11 @@ function createWorkerTransport() {
|
|
|
212
233
|
}
|
|
213
234
|
});
|
|
214
235
|
const onExit = (code, signal) => {
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
entry.
|
|
236
|
+
if (pending.size > 0 && signal !== "SIGKILL") {
|
|
237
|
+
const reason = `dynafetch-net exited (code=${code ?? "null"}, signal=${signal ?? "null"})`;
|
|
238
|
+
for (const entry of pending.values()) {
|
|
239
|
+
entry.reject(new Error(reason));
|
|
240
|
+
}
|
|
218
241
|
}
|
|
219
242
|
pending.clear();
|
|
220
243
|
transportPromise = null;
|
|
@@ -238,7 +261,7 @@ function createWorkerTransport() {
|
|
|
238
261
|
child.once("spawn", () => {
|
|
239
262
|
if (!settled) {
|
|
240
263
|
settled = true;
|
|
241
|
-
resolve({ child, pending });
|
|
264
|
+
resolve({ child, pending, updateRef, holdCount, hold, release });
|
|
242
265
|
}
|
|
243
266
|
});
|
|
244
267
|
});
|
|
@@ -256,8 +279,10 @@ async function callWorker(method, params, timeoutMs = 3e4) {
|
|
|
256
279
|
return await new Promise((resolve, reject) => {
|
|
257
280
|
const timer = setTimeout(() => {
|
|
258
281
|
transport.pending.delete(id);
|
|
282
|
+
transport.updateRef();
|
|
259
283
|
reject(new Error(`dynafetch-net request timed out after ${timeoutMs}ms (method: ${method})`));
|
|
260
284
|
}, timeoutMs);
|
|
285
|
+
timer.unref();
|
|
261
286
|
transport.pending.set(id, {
|
|
262
287
|
resolve: (value) => {
|
|
263
288
|
clearTimeout(timer);
|
|
@@ -268,22 +293,41 @@ async function callWorker(method, params, timeoutMs = 3e4) {
|
|
|
268
293
|
reject(err);
|
|
269
294
|
}
|
|
270
295
|
});
|
|
296
|
+
transport.updateRef();
|
|
271
297
|
transport.child.stdin.write(`${payload}
|
|
272
298
|
`, (error2) => {
|
|
273
299
|
if (!error2) return;
|
|
274
300
|
clearTimeout(timer);
|
|
275
301
|
transport.pending.delete(id);
|
|
302
|
+
transport.updateRef();
|
|
276
303
|
reject(error2);
|
|
277
304
|
});
|
|
278
305
|
});
|
|
279
306
|
}
|
|
307
|
+
function resolveRpcTimeoutMs(options) {
|
|
308
|
+
if (options.rpcTimeoutMs != null) {
|
|
309
|
+
return Math.max(1, Math.ceil(options.rpcTimeoutMs));
|
|
310
|
+
}
|
|
311
|
+
if (options.timeoutSeconds != null) {
|
|
312
|
+
return Math.max(1e3, Math.ceil(options.timeoutSeconds * 1e3) + 1e3);
|
|
313
|
+
}
|
|
314
|
+
return 3e4;
|
|
315
|
+
}
|
|
280
316
|
async function withDynafetchSession(options, run) {
|
|
281
|
-
const
|
|
317
|
+
const transport = await getWorkerTransport();
|
|
318
|
+
transport.hold();
|
|
319
|
+
const { rpcTimeoutMs, ...sessionOptions } = options;
|
|
320
|
+
const session = await callWorker(
|
|
321
|
+
"openSession",
|
|
322
|
+
sessionOptions,
|
|
323
|
+
resolveRpcTimeoutMs(options)
|
|
324
|
+
);
|
|
282
325
|
try {
|
|
283
326
|
return await sessionStore.run({ sessionId: session.sessionId }, run);
|
|
284
327
|
} finally {
|
|
285
|
-
|
|
328
|
+
callWorker("closeSession", { sessionId: session.sessionId }).catch(() => {
|
|
286
329
|
});
|
|
330
|
+
transport.release();
|
|
287
331
|
}
|
|
288
332
|
}
|
|
289
333
|
async function dynafetchNetHealth() {
|
|
@@ -299,7 +343,7 @@ async function dynafetchNetFetch(request, options = {}) {
|
|
|
299
343
|
browserProfile: options.browserProfile,
|
|
300
344
|
timeoutSeconds: options.timeoutSeconds,
|
|
301
345
|
proxy: options.proxy
|
|
302
|
-
});
|
|
346
|
+
}, resolveRpcTimeoutMs(options));
|
|
303
347
|
}
|
|
304
348
|
async function dynafetchNetBatchFetch(requests, options = {}) {
|
|
305
349
|
const session = sessionStore.getStore();
|
|
@@ -311,7 +355,73 @@ async function dynafetchNetBatchFetch(requests, options = {}) {
|
|
|
311
355
|
browserProfile: options.browserProfile,
|
|
312
356
|
timeoutSeconds: options.timeoutSeconds,
|
|
313
357
|
proxy: options.proxy
|
|
314
|
-
});
|
|
358
|
+
}, resolveRpcTimeoutMs(options));
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
// ../../src/phantom/url-safety.ts
|
|
362
|
+
import { lookup } from "node:dns/promises";
|
|
363
|
+
import * as net from "node:net";
|
|
364
|
+
var BLOCKED_HOSTNAMES = /* @__PURE__ */ new Set([
|
|
365
|
+
"0.0.0.0",
|
|
366
|
+
"localhost",
|
|
367
|
+
"metadata.google.internal"
|
|
368
|
+
]);
|
|
369
|
+
var hostnameLookupCache = /* @__PURE__ */ new Map();
|
|
370
|
+
function normalizeHostname(hostname) {
|
|
371
|
+
return hostname.trim().replace(/^\[|\]$/g, "").replace(/\.+$/g, "").toLowerCase();
|
|
372
|
+
}
|
|
373
|
+
function isPrivateOrLocalHost(hostname) {
|
|
374
|
+
const h = normalizeHostname(hostname);
|
|
375
|
+
if (!h) return false;
|
|
376
|
+
if (BLOCKED_HOSTNAMES.has(h) || h.endsWith(".localhost")) return true;
|
|
377
|
+
const ipVer = net.isIP(h);
|
|
378
|
+
if (!ipVer) return false;
|
|
379
|
+
if (ipVer === 4) {
|
|
380
|
+
const [a, b] = h.split(".").map((value) => Number(value));
|
|
381
|
+
if (a === 10) return true;
|
|
382
|
+
if (a === 127) return true;
|
|
383
|
+
if (a === 0) return true;
|
|
384
|
+
if (a === 169 && b === 254) return true;
|
|
385
|
+
if (a === 172 && b >= 16 && b <= 31) return true;
|
|
386
|
+
if (a === 192 && b === 168) return true;
|
|
387
|
+
return false;
|
|
388
|
+
}
|
|
389
|
+
if (h === "::1") return true;
|
|
390
|
+
if (h.startsWith("fe80:")) return true;
|
|
391
|
+
if (h.startsWith("fc") || h.startsWith("fd")) return true;
|
|
392
|
+
return false;
|
|
393
|
+
}
|
|
394
|
+
function assertSafeHttpUrlSync(input) {
|
|
395
|
+
let parsedUrl;
|
|
396
|
+
try {
|
|
397
|
+
parsedUrl = new URL(input);
|
|
398
|
+
} catch {
|
|
399
|
+
throw new Error("Invalid URL");
|
|
400
|
+
}
|
|
401
|
+
if (parsedUrl.protocol !== "http:" && parsedUrl.protocol !== "https:") {
|
|
402
|
+
throw new Error("Only http(s) URLs are allowed");
|
|
403
|
+
}
|
|
404
|
+
if (isPrivateOrLocalHost(parsedUrl.hostname)) {
|
|
405
|
+
throw new Error("Refusing to fetch local/private addresses");
|
|
406
|
+
}
|
|
407
|
+
return parsedUrl;
|
|
408
|
+
}
|
|
409
|
+
async function hostnameResolvesToPrivateAddress(hostname) {
|
|
410
|
+
const normalized = normalizeHostname(hostname);
|
|
411
|
+
if (!normalized || net.isIP(normalized)) return false;
|
|
412
|
+
let pending = hostnameLookupCache.get(normalized);
|
|
413
|
+
if (!pending) {
|
|
414
|
+
pending = lookup(normalized, { all: true, verbatim: true }).then((records) => records.some((record) => isPrivateOrLocalHost(record.address))).catch(() => false);
|
|
415
|
+
hostnameLookupCache.set(normalized, pending);
|
|
416
|
+
}
|
|
417
|
+
return pending;
|
|
418
|
+
}
|
|
419
|
+
async function assertSafeRemoteUrl(input) {
|
|
420
|
+
const parsedUrl = assertSafeHttpUrlSync(input);
|
|
421
|
+
if (await hostnameResolvesToPrivateAddress(parsedUrl.hostname)) {
|
|
422
|
+
throw new Error("Refusing to fetch local/private addresses");
|
|
423
|
+
}
|
|
424
|
+
return parsedUrl;
|
|
315
425
|
}
|
|
316
426
|
|
|
317
427
|
// ../../src/phantom/phantom-proxy.ts
|
|
@@ -327,64 +437,115 @@ function headersToRecord(h) {
|
|
|
327
437
|
}
|
|
328
438
|
return out;
|
|
329
439
|
}
|
|
330
|
-
|
|
440
|
+
var DIRECT_FALLBACK_WARNING = "dynafetch-net was unavailable for one or more requests; fell back to Node fetch without TLS/browser impersonation";
|
|
441
|
+
var DIRECT_PROXY_ERROR = "Direct fallback cannot honor proxy configuration; dynafetch-net is required when proxy is set";
|
|
442
|
+
function createTimeoutController(timeoutMs) {
|
|
443
|
+
if (!timeoutMs || !Number.isFinite(timeoutMs)) {
|
|
444
|
+
return {
|
|
445
|
+
dispose: () => {
|
|
446
|
+
},
|
|
447
|
+
didTimeout: () => false
|
|
448
|
+
};
|
|
449
|
+
}
|
|
450
|
+
const controller = new AbortController();
|
|
451
|
+
let timedOut = false;
|
|
452
|
+
const timer = setTimeout(() => {
|
|
453
|
+
timedOut = true;
|
|
454
|
+
controller.abort();
|
|
455
|
+
}, Math.max(1, Math.ceil(timeoutMs)));
|
|
456
|
+
timer.unref?.();
|
|
457
|
+
return {
|
|
458
|
+
signal: controller.signal,
|
|
459
|
+
dispose: () => clearTimeout(timer),
|
|
460
|
+
didTimeout: () => timedOut
|
|
461
|
+
};
|
|
462
|
+
}
|
|
463
|
+
async function directFetch(payload, options = {}) {
|
|
464
|
+
if (payload.proxy) {
|
|
465
|
+
throw new Error(DIRECT_PROXY_ERROR);
|
|
466
|
+
}
|
|
331
467
|
const init = {
|
|
332
468
|
method: payload.method,
|
|
333
469
|
headers: payload.headers,
|
|
334
470
|
redirect: "manual"
|
|
335
471
|
};
|
|
336
472
|
if (payload.body) init.body = payload.body;
|
|
337
|
-
const
|
|
338
|
-
|
|
339
|
-
|
|
473
|
+
const timeout = createTimeoutController(options.timeoutMs);
|
|
474
|
+
if (timeout.signal) init.signal = timeout.signal;
|
|
475
|
+
try {
|
|
476
|
+
const resp = await fetch(payload.url, init);
|
|
477
|
+
const body = await resp.text().catch(() => "");
|
|
478
|
+
return {
|
|
479
|
+
status: resp.status,
|
|
480
|
+
body,
|
|
481
|
+
headers: headersToRecord(resp.headers),
|
|
482
|
+
transport: "direct",
|
|
483
|
+
warning: DIRECT_FALLBACK_WARNING
|
|
484
|
+
};
|
|
485
|
+
} catch (error2) {
|
|
486
|
+
if (timeout.didTimeout()) {
|
|
487
|
+
throw new Error(`dynafetch request timed out after ${Math.max(1, Math.ceil(options.timeoutMs ?? 0))}ms`);
|
|
488
|
+
}
|
|
489
|
+
throw error2;
|
|
490
|
+
} finally {
|
|
491
|
+
timeout.dispose();
|
|
492
|
+
}
|
|
340
493
|
}
|
|
341
|
-
async function dynafetchWorkerFetch(payload) {
|
|
494
|
+
async function dynafetchWorkerFetch(payload, options = {}) {
|
|
342
495
|
const response = await dynafetchNetFetch(payload, {
|
|
343
|
-
followRedirect: false
|
|
496
|
+
followRedirect: false,
|
|
497
|
+
rpcTimeoutMs: options.timeoutMs
|
|
344
498
|
});
|
|
345
499
|
return {
|
|
346
500
|
status: response.status,
|
|
347
501
|
body: response.body,
|
|
348
502
|
headers: response.headers,
|
|
349
503
|
finalUrl: response.finalUrl,
|
|
350
|
-
error: response.error
|
|
504
|
+
error: response.error,
|
|
505
|
+
transport: "dynafetch-net"
|
|
351
506
|
};
|
|
352
507
|
}
|
|
353
|
-
async function phantomFetch(payload) {
|
|
508
|
+
async function phantomFetch(payload, options = {}) {
|
|
509
|
+
await assertSafeRemoteUrl(payload.url);
|
|
354
510
|
if (process.env.PHANTOM_DISABLE_PROXY === "1" || process.env.DYNAFETCH_DISABLE_NET === "1") {
|
|
355
|
-
return directFetch(payload);
|
|
511
|
+
return directFetch(payload, options);
|
|
356
512
|
}
|
|
357
513
|
try {
|
|
358
|
-
return await dynafetchWorkerFetch(payload);
|
|
514
|
+
return await dynafetchWorkerFetch(payload, options);
|
|
359
515
|
} catch (error2) {
|
|
360
516
|
if (process.env.DYNAFETCH_DISABLE_DIRECT_FALLBACK === "1") {
|
|
361
517
|
throw error2;
|
|
362
518
|
}
|
|
363
|
-
return await directFetch(payload);
|
|
519
|
+
return await directFetch(payload, options);
|
|
364
520
|
}
|
|
365
521
|
}
|
|
366
|
-
async function phantomBatchFetch(payloads) {
|
|
522
|
+
async function phantomBatchFetch(payloads, options = {}) {
|
|
367
523
|
if (payloads.length === 0) return [];
|
|
524
|
+
await Promise.all(payloads.map((payload) => assertSafeRemoteUrl(payload.url)));
|
|
368
525
|
if (process.env.PHANTOM_DISABLE_PROXY === "1" || process.env.DYNAFETCH_DISABLE_NET === "1") {
|
|
369
|
-
return Promise.all(payloads.map((
|
|
526
|
+
return Promise.all(payloads.map((payload) => directFetch(payload, options)));
|
|
370
527
|
}
|
|
371
528
|
try {
|
|
372
529
|
const responses = await dynafetchNetBatchFetch(
|
|
373
530
|
payloads,
|
|
374
|
-
{
|
|
531
|
+
{
|
|
532
|
+
followRedirect: false,
|
|
533
|
+
rpcTimeoutMs: options.timeoutMs
|
|
534
|
+
}
|
|
375
535
|
);
|
|
376
536
|
return responses.map((r) => ({
|
|
377
537
|
status: r.status,
|
|
378
538
|
body: r.body,
|
|
379
539
|
headers: r.headers,
|
|
380
540
|
finalUrl: r.finalUrl,
|
|
381
|
-
error: r.error
|
|
541
|
+
error: r.error,
|
|
542
|
+
transport: "dynafetch-net"
|
|
382
543
|
}));
|
|
383
544
|
} catch (error2) {
|
|
384
545
|
if (process.env.DYNAFETCH_DISABLE_DIRECT_FALLBACK === "1") {
|
|
385
546
|
throw error2;
|
|
386
547
|
}
|
|
387
|
-
return Promise.all(payloads.map((
|
|
548
|
+
return Promise.all(payloads.map((payload) => directFetch(payload, options)));
|
|
388
549
|
}
|
|
389
550
|
}
|
|
390
551
|
|
|
@@ -653,6 +814,34 @@ function shouldSkipDynamicScriptUrl(url, pageUrl, policy) {
|
|
|
653
814
|
}
|
|
654
815
|
|
|
655
816
|
// ../../src/phantom/execute.ts
|
|
817
|
+
var esbuildModulePromise = null;
|
|
818
|
+
var esbuildRefCount = 0;
|
|
819
|
+
async function acquireEsbuildModule() {
|
|
820
|
+
if (!esbuildModulePromise) {
|
|
821
|
+
esbuildModulePromise = import("esbuild");
|
|
822
|
+
}
|
|
823
|
+
esbuildRefCount++;
|
|
824
|
+
try {
|
|
825
|
+
return await esbuildModulePromise;
|
|
826
|
+
} catch (error2) {
|
|
827
|
+
esbuildRefCount = Math.max(0, esbuildRefCount - 1);
|
|
828
|
+
if (esbuildRefCount === 0) esbuildModulePromise = null;
|
|
829
|
+
throw error2;
|
|
830
|
+
}
|
|
831
|
+
}
|
|
832
|
+
function releaseEsbuildModule(esbuildMod) {
|
|
833
|
+
if (!esbuildMod) return;
|
|
834
|
+
esbuildRefCount = Math.max(0, esbuildRefCount - 1);
|
|
835
|
+
if (esbuildRefCount > 0) return;
|
|
836
|
+
const stopFn = esbuildMod?.stop || esbuildMod?.default?.stop;
|
|
837
|
+
if (typeof stopFn === "function") {
|
|
838
|
+
try {
|
|
839
|
+
stopFn.call(esbuildMod?.default ?? esbuildMod);
|
|
840
|
+
} catch {
|
|
841
|
+
}
|
|
842
|
+
}
|
|
843
|
+
esbuildModulePromise = null;
|
|
844
|
+
}
|
|
656
845
|
var Executor = class {
|
|
657
846
|
constructor(harvestData, options = null) {
|
|
658
847
|
this.logs = [];
|
|
@@ -684,6 +873,9 @@ var Executor = class {
|
|
|
684
873
|
this.moduleInFlight = /* @__PURE__ */ new Map();
|
|
685
874
|
// entryUrl -> promise
|
|
686
875
|
this.windowClosed = false;
|
|
876
|
+
this.esbuildModule = null;
|
|
877
|
+
this.originalGlobalMessageChannel = void 0;
|
|
878
|
+
this.originalGlobalMessagePort = void 0;
|
|
687
879
|
// Simple telemetry counters (useful for debugging).
|
|
688
880
|
this.telemetry_stubbed = 0;
|
|
689
881
|
this.telemetry_proxy = 0;
|
|
@@ -700,6 +892,7 @@ var Executor = class {
|
|
|
700
892
|
};
|
|
701
893
|
this.executionErrors = [];
|
|
702
894
|
this.thirdPartyPolicy = "skip-noncritical";
|
|
895
|
+
this.warnings = /* @__PURE__ */ new Set();
|
|
703
896
|
// Early exit tracking
|
|
704
897
|
this.findAll = false;
|
|
705
898
|
this.fuzzyMatch = true;
|
|
@@ -716,6 +909,8 @@ var Executor = class {
|
|
|
716
909
|
this.fuzzyMatch = options.fuzzyMatch ?? true;
|
|
717
910
|
this.thirdPartyPolicy = options.thirdPartyPolicy ?? "skip-noncritical";
|
|
718
911
|
this.proxy = options.proxy;
|
|
912
|
+
this.timeoutMs = options.timeoutMs;
|
|
913
|
+
this.deadlineAt = options.deadlineAt;
|
|
719
914
|
this.applyDefaults(options.quiescence, options.moduleWaitMs);
|
|
720
915
|
}
|
|
721
916
|
if (this.targetValue !== null && this.targetValue !== void 0) {
|
|
@@ -752,6 +947,36 @@ var Executor = class {
|
|
|
752
947
|
if (!Number.isFinite(v)) return min;
|
|
753
948
|
return Math.max(min, Math.min(max, Math.trunc(v)));
|
|
754
949
|
}
|
|
950
|
+
createTimeoutError() {
|
|
951
|
+
const timeoutMs = Math.max(1, Math.ceil(this.timeoutMs ?? 1));
|
|
952
|
+
return new Error(`dynafetch timed out after ${timeoutMs}ms`);
|
|
953
|
+
}
|
|
954
|
+
remainingTimeMs() {
|
|
955
|
+
if (this.deadlineAt == null) return this.timeoutMs;
|
|
956
|
+
const remaining = this.deadlineAt - Date.now();
|
|
957
|
+
if (remaining <= 0) throw this.createTimeoutError();
|
|
958
|
+
return Math.max(1, Math.ceil(remaining));
|
|
959
|
+
}
|
|
960
|
+
boundedDurationMs(durationMs) {
|
|
961
|
+
if (this.deadlineAt == null) return durationMs;
|
|
962
|
+
const remaining = this.deadlineAt - Date.now();
|
|
963
|
+
if (remaining <= 0) return 0;
|
|
964
|
+
return Math.max(0, Math.min(durationMs, Math.ceil(remaining)));
|
|
965
|
+
}
|
|
966
|
+
recordWarning(warning) {
|
|
967
|
+
if (!warning) return;
|
|
968
|
+
this.warnings.add(warning);
|
|
969
|
+
}
|
|
970
|
+
unrefNewMessagePorts(initialHandles) {
|
|
971
|
+
for (const handle of process._getActiveHandles()) {
|
|
972
|
+
if (initialHandles.has(handle)) continue;
|
|
973
|
+
if (handle?.constructor?.name !== "MessagePort") continue;
|
|
974
|
+
try {
|
|
975
|
+
handle.unref?.();
|
|
976
|
+
} catch {
|
|
977
|
+
}
|
|
978
|
+
}
|
|
979
|
+
}
|
|
755
980
|
applyDefaults(quiescence, moduleWaitMsOverride) {
|
|
756
981
|
const hardMaxCap = this.clampMs(Number(process.env.PHANTOM_QUIESCENCE_MAX_CAP_MS ?? 8e3), 500, 6e4);
|
|
757
982
|
const minWaitMs = this.clampMs(quiescence?.minWaitMs ?? 75, 0, 1e4);
|
|
@@ -882,7 +1107,7 @@ var Executor = class {
|
|
|
882
1107
|
async waitForModuleWork(timeoutMs) {
|
|
883
1108
|
const pending = Array.from(this.moduleInFlight.values());
|
|
884
1109
|
if (!pending.length) return;
|
|
885
|
-
const timeout = this.clampMs(timeoutMs, 0, 6e4);
|
|
1110
|
+
const timeout = this.clampMs(this.boundedDurationMs(timeoutMs), 0, 6e4);
|
|
886
1111
|
if (timeout === 0) return;
|
|
887
1112
|
const all = Promise.allSettled(pending).then(() => {
|
|
888
1113
|
});
|
|
@@ -899,7 +1124,11 @@ var Executor = class {
|
|
|
899
1124
|
try {
|
|
900
1125
|
this.telemetry_proxy++;
|
|
901
1126
|
const payload = { method, url, headers, headerOrder: Object.keys(headers), body, proxy: this.proxyUrlForScope(proxyScope) };
|
|
902
|
-
|
|
1127
|
+
const response = await phantomFetch(payload, {
|
|
1128
|
+
timeoutMs: this.remainingTimeMs()
|
|
1129
|
+
});
|
|
1130
|
+
this.recordWarning(response.warning);
|
|
1131
|
+
return response;
|
|
903
1132
|
} catch (e) {
|
|
904
1133
|
return { status: 0, body: e.message, headers: {}, error: e.message };
|
|
905
1134
|
}
|
|
@@ -1015,7 +1244,7 @@ var Executor = class {
|
|
|
1015
1244
|
const existing = this.moduleInFlight.get(cacheKey);
|
|
1016
1245
|
if (existing) return existing;
|
|
1017
1246
|
const p = (async () => {
|
|
1018
|
-
const taskId = this.trackTaskStart("module_bundle", cacheKey, this.moduleWaitMs);
|
|
1247
|
+
const taskId = this.trackTaskStart("module_bundle", cacheKey, this.boundedDurationMs(this.moduleWaitMs));
|
|
1019
1248
|
try {
|
|
1020
1249
|
if (process.env.PHANTOM_DEBUG_MODULES === "1") {
|
|
1021
1250
|
log("[Executor] Bundling module entry:", cacheKey);
|
|
@@ -1025,7 +1254,8 @@ var Executor = class {
|
|
|
1025
1254
|
if (!this.windowClosed) window.eval(cached);
|
|
1026
1255
|
return;
|
|
1027
1256
|
}
|
|
1028
|
-
const esbuildMod = await
|
|
1257
|
+
const esbuildMod = this.esbuildModule ?? await acquireEsbuildModule();
|
|
1258
|
+
this.esbuildModule = esbuildMod;
|
|
1029
1259
|
const buildFn = esbuildMod?.build || esbuildMod?.default?.build;
|
|
1030
1260
|
if (typeof buildFn !== "function") {
|
|
1031
1261
|
throw new Error("esbuild.build not available (esbuild import failed)");
|
|
@@ -1251,8 +1481,27 @@ var Executor = class {
|
|
|
1251
1481
|
}
|
|
1252
1482
|
{
|
|
1253
1483
|
const _g = globalThis;
|
|
1254
|
-
if (
|
|
1255
|
-
|
|
1484
|
+
if (_g.MessageChannel) {
|
|
1485
|
+
if (this.originalGlobalMessageChannel === void 0) {
|
|
1486
|
+
this.originalGlobalMessageChannel = _g.MessageChannel;
|
|
1487
|
+
}
|
|
1488
|
+
const NativeMessageChannel = _g.MessageChannel;
|
|
1489
|
+
const UnrefMessageChannel = class MessageChannel extends NativeMessageChannel {
|
|
1490
|
+
constructor() {
|
|
1491
|
+
super();
|
|
1492
|
+
this.port1?.unref?.();
|
|
1493
|
+
this.port2?.unref?.();
|
|
1494
|
+
}
|
|
1495
|
+
};
|
|
1496
|
+
window.MessageChannel = UnrefMessageChannel;
|
|
1497
|
+
_g.MessageChannel = UnrefMessageChannel;
|
|
1498
|
+
}
|
|
1499
|
+
if (_g.MessagePort) {
|
|
1500
|
+
if (this.originalGlobalMessagePort === void 0) {
|
|
1501
|
+
this.originalGlobalMessagePort = _g.MessagePort;
|
|
1502
|
+
}
|
|
1503
|
+
window.MessagePort = _g.MessagePort;
|
|
1504
|
+
}
|
|
1256
1505
|
}
|
|
1257
1506
|
if (!window.requestIdleCallback) {
|
|
1258
1507
|
window.requestIdleCallback = (cb) => window.setTimeout(() => cb({
|
|
@@ -1476,6 +1725,7 @@ var Executor = class {
|
|
|
1476
1725
|
async execute() {
|
|
1477
1726
|
const onNodeUncaught = (err) => this.recordExecutionError(err, "uncaughtException");
|
|
1478
1727
|
const onNodeUnhandled = (reason) => this.recordExecutionError(reason, "unhandledRejection");
|
|
1728
|
+
const initialActiveHandles = new Set(process._getActiveHandles());
|
|
1479
1729
|
process.on("uncaughtException", onNodeUncaught);
|
|
1480
1730
|
process.on("unhandledRejection", onNodeUnhandled);
|
|
1481
1731
|
try {
|
|
@@ -1898,12 +2148,25 @@ var Executor = class {
|
|
|
1898
2148
|
matchedRequests: this.earlyMatches,
|
|
1899
2149
|
renderedHtml,
|
|
1900
2150
|
timings: { ...this.timings },
|
|
1901
|
-
errors: this.executionErrors.length ? this.executionErrors : void 0
|
|
2151
|
+
errors: this.executionErrors.length ? this.executionErrors : void 0,
|
|
2152
|
+
warnings: Array.from(this.warnings)
|
|
1902
2153
|
};
|
|
1903
2154
|
const shutdownGraceMs = this.clampMs(Number(process.env.PHANTOM_SHUTDOWN_GRACE_MS ?? 50), 10, 5e3);
|
|
1904
2155
|
await new Promise((r) => setTimeout(r, shutdownGraceMs));
|
|
2156
|
+
this.unrefNewMessagePorts(initialActiveHandles);
|
|
1905
2157
|
return result;
|
|
1906
2158
|
} finally {
|
|
2159
|
+
const g = globalThis;
|
|
2160
|
+
if (this.originalGlobalMessageChannel !== void 0) {
|
|
2161
|
+
g.MessageChannel = this.originalGlobalMessageChannel;
|
|
2162
|
+
this.originalGlobalMessageChannel = void 0;
|
|
2163
|
+
}
|
|
2164
|
+
if (this.originalGlobalMessagePort !== void 0) {
|
|
2165
|
+
g.MessagePort = this.originalGlobalMessagePort;
|
|
2166
|
+
this.originalGlobalMessagePort = void 0;
|
|
2167
|
+
}
|
|
2168
|
+
releaseEsbuildModule(this.esbuildModule);
|
|
2169
|
+
this.esbuildModule = null;
|
|
1907
2170
|
process.off("uncaughtException", onNodeUncaught);
|
|
1908
2171
|
process.off("unhandledRejection", onNodeUnhandled);
|
|
1909
2172
|
}
|
|
@@ -2381,11 +2644,12 @@ async function prefetchModuleGraph(rootUrls, cache, pageUrl, opts) {
|
|
|
2381
2644
|
body: "",
|
|
2382
2645
|
proxy: opts?.proxyUrl
|
|
2383
2646
|
}));
|
|
2384
|
-
const responses = await phantomBatchFetch(payloads);
|
|
2647
|
+
const responses = await phantomBatchFetch(payloads, { timeoutMs: opts?.timeoutMs });
|
|
2385
2648
|
const newToScan = [];
|
|
2386
2649
|
for (let i = 0; i < toFetch.length; i++) {
|
|
2387
2650
|
const u = toFetch[i];
|
|
2388
2651
|
const r = responses[i];
|
|
2652
|
+
if (r.warning) opts?.onWarning?.(r.warning);
|
|
2389
2653
|
if (r.status < 400 && r.body) {
|
|
2390
2654
|
cache.set(u, r.body);
|
|
2391
2655
|
totalFetched++;
|
|
@@ -2423,6 +2687,7 @@ var Harvester = class {
|
|
|
2423
2687
|
this.requestHeaders = {};
|
|
2424
2688
|
this.thirdPartyPolicy = "skip-noncritical";
|
|
2425
2689
|
this.prefetchModulePreloads = true;
|
|
2690
|
+
this.warnings = /* @__PURE__ */ new Set();
|
|
2426
2691
|
this.targetUrl = url;
|
|
2427
2692
|
this.prefetchExternalScripts = opts.prefetchExternalScripts !== false;
|
|
2428
2693
|
this.externalScriptConcurrency = opts.externalScriptConcurrency ?? 8;
|
|
@@ -2431,6 +2696,8 @@ var Harvester = class {
|
|
|
2431
2696
|
this.thirdPartyPolicy = opts.thirdPartyPolicy ?? "skip-noncritical";
|
|
2432
2697
|
this.prefetchModulePreloads = opts.prefetchModulePreloads !== false;
|
|
2433
2698
|
this.proxy = opts.proxy;
|
|
2699
|
+
this.timeoutMs = opts.timeoutMs;
|
|
2700
|
+
this.deadlineAt = opts.deadlineAt;
|
|
2434
2701
|
}
|
|
2435
2702
|
proxyUrlForScope(scope) {
|
|
2436
2703
|
if (!this.proxy) return void 0;
|
|
@@ -2446,6 +2713,20 @@ var Harvester = class {
|
|
|
2446
2713
|
}
|
|
2447
2714
|
return pairs.join("; ");
|
|
2448
2715
|
}
|
|
2716
|
+
createTimeoutError() {
|
|
2717
|
+
const timeoutMs = Math.max(1, Math.ceil(this.timeoutMs ?? 1));
|
|
2718
|
+
return new Error(`dynafetch timed out after ${timeoutMs}ms`);
|
|
2719
|
+
}
|
|
2720
|
+
remainingTimeMs() {
|
|
2721
|
+
if (this.deadlineAt == null) return this.timeoutMs;
|
|
2722
|
+
const remaining = this.deadlineAt - Date.now();
|
|
2723
|
+
if (remaining <= 0) throw this.createTimeoutError();
|
|
2724
|
+
return Math.max(1, Math.ceil(remaining));
|
|
2725
|
+
}
|
|
2726
|
+
recordWarning(warning) {
|
|
2727
|
+
if (!warning) return;
|
|
2728
|
+
this.warnings.add(warning);
|
|
2729
|
+
}
|
|
2449
2730
|
async fetchViaProxy(url, headers = {}, followRedirects = false, maxRedirects = 5, method = "GET", body = "", proxyScope = "page") {
|
|
2450
2731
|
let currentUrl = url;
|
|
2451
2732
|
let redirectCount = 0;
|
|
@@ -2466,7 +2747,10 @@ var Harvester = class {
|
|
|
2466
2747
|
body: currentBody,
|
|
2467
2748
|
proxy: this.proxyUrlForScope(proxyScope)
|
|
2468
2749
|
};
|
|
2469
|
-
const data = await phantomFetch(payload
|
|
2750
|
+
const data = await phantomFetch(payload, {
|
|
2751
|
+
timeoutMs: this.remainingTimeMs()
|
|
2752
|
+
});
|
|
2753
|
+
this.recordWarning(data.warning);
|
|
2470
2754
|
if (data.error) throw new Error(`Proxy Error: ${data.error}`);
|
|
2471
2755
|
const setCookie = data.headers["Set-Cookie"] || data.headers["set-cookie"];
|
|
2472
2756
|
if (setCookie) this.cookies.push(setCookie);
|
|
@@ -2709,10 +2993,13 @@ var Harvester = class {
|
|
|
2709
2993
|
const allPayloads = [...scriptPayloads, ...preloadPayloads];
|
|
2710
2994
|
if (allPayloads.length > 0) {
|
|
2711
2995
|
log(`[Harvest] Batch-fetching ${scriptPayloads.length} scripts + ${preloadPayloads.length} modulepreloads...`);
|
|
2712
|
-
const allResponses = await phantomBatchFetch(allPayloads
|
|
2996
|
+
const allResponses = await phantomBatchFetch(allPayloads, {
|
|
2997
|
+
timeoutMs: this.remainingTimeMs()
|
|
2998
|
+
});
|
|
2713
2999
|
for (let i = 0; i < batchScriptMeta.length; i++) {
|
|
2714
3000
|
const meta = batchScriptMeta[i];
|
|
2715
3001
|
const resp = allResponses[i];
|
|
3002
|
+
this.recordWarning(resp.warning);
|
|
2716
3003
|
const logEntry = {
|
|
2717
3004
|
type: "resource_load",
|
|
2718
3005
|
url: meta.absoluteUrl,
|
|
@@ -2741,6 +3028,7 @@ var Harvester = class {
|
|
|
2741
3028
|
for (let i = 0; i < modulePreloadUrls.length; i++) {
|
|
2742
3029
|
const url = modulePreloadUrls[i];
|
|
2743
3030
|
const resp = allResponses[batchScriptMeta.length + i];
|
|
3031
|
+
this.recordWarning(resp.warning);
|
|
2744
3032
|
const logEntry = {
|
|
2745
3033
|
type: "resource_load",
|
|
2746
3034
|
url,
|
|
@@ -2805,7 +3093,9 @@ var Harvester = class {
|
|
|
2805
3093
|
if (moduleEntryUrls.length > 0 || modulePreloads.length > 0) {
|
|
2806
3094
|
const rootUrls = [...moduleEntryUrls, ...modulePreloads.map((mp) => mp.url)];
|
|
2807
3095
|
await prefetchModuleGraph(rootUrls, moduleGraphCache, finalUrl, {
|
|
2808
|
-
proxyUrl: this.proxyUrlForScope("assets")
|
|
3096
|
+
proxyUrl: this.proxyUrlForScope("assets"),
|
|
3097
|
+
timeoutMs: this.remainingTimeMs(),
|
|
3098
|
+
onWarning: (warning) => this.recordWarning(warning)
|
|
2809
3099
|
});
|
|
2810
3100
|
}
|
|
2811
3101
|
return {
|
|
@@ -2819,7 +3109,8 @@ var Harvester = class {
|
|
|
2819
3109
|
cookies: this.cookies,
|
|
2820
3110
|
headers: response.headers,
|
|
2821
3111
|
logs: this.logs,
|
|
2822
|
-
moduleGraphCache
|
|
3112
|
+
moduleGraphCache,
|
|
3113
|
+
warnings: Array.from(this.warnings)
|
|
2823
3114
|
};
|
|
2824
3115
|
}
|
|
2825
3116
|
};
|
|
@@ -2896,27 +3187,6 @@ var DynafetchInputError = class extends Error {
|
|
|
2896
3187
|
this.status = status;
|
|
2897
3188
|
}
|
|
2898
3189
|
};
|
|
2899
|
-
function isPrivateOrLocalHost(hostname) {
|
|
2900
|
-
const h = hostname.toLowerCase();
|
|
2901
|
-
if (h === "localhost" || h.endsWith(".localhost") || h === "0.0.0.0") return true;
|
|
2902
|
-
if (h === "metadata.google.internal") return true;
|
|
2903
|
-
const ipVer = net.isIP(h);
|
|
2904
|
-
if (!ipVer) return false;
|
|
2905
|
-
if (ipVer === 4) {
|
|
2906
|
-
const [a, b] = h.split(".").map((x) => Number(x));
|
|
2907
|
-
if (a === 10) return true;
|
|
2908
|
-
if (a === 127) return true;
|
|
2909
|
-
if (a === 0) return true;
|
|
2910
|
-
if (a === 169 && b === 254) return true;
|
|
2911
|
-
if (a === 172 && b >= 16 && b <= 31) return true;
|
|
2912
|
-
if (a === 192 && b === 168) return true;
|
|
2913
|
-
return false;
|
|
2914
|
-
}
|
|
2915
|
-
if (h === "::1") return true;
|
|
2916
|
-
if (h.startsWith("fe80:")) return true;
|
|
2917
|
-
if (h.startsWith("fc") || h.startsWith("fd")) return true;
|
|
2918
|
-
return false;
|
|
2919
|
-
}
|
|
2920
3190
|
function normalizeProxy(input) {
|
|
2921
3191
|
if (!input) return void 0;
|
|
2922
3192
|
if (typeof input === "string") {
|
|
@@ -2946,15 +3216,10 @@ function normalizeOptions(input) {
|
|
|
2946
3216
|
}
|
|
2947
3217
|
let parsedUrl;
|
|
2948
3218
|
try {
|
|
2949
|
-
parsedUrl =
|
|
2950
|
-
} catch {
|
|
2951
|
-
|
|
2952
|
-
|
|
2953
|
-
if (parsedUrl.protocol !== "http:" && parsedUrl.protocol !== "https:") {
|
|
2954
|
-
throw new DynafetchInputError("Only http(s) URLs are allowed");
|
|
2955
|
-
}
|
|
2956
|
-
if (isPrivateOrLocalHost(parsedUrl.hostname)) {
|
|
2957
|
-
throw new DynafetchInputError("Refusing to fetch local/private addresses");
|
|
3219
|
+
parsedUrl = assertSafeHttpUrlSync(options.url);
|
|
3220
|
+
} catch (error2) {
|
|
3221
|
+
const message = error2 instanceof Error ? error2.message : "Invalid URL";
|
|
3222
|
+
throw new DynafetchInputError(message);
|
|
2958
3223
|
}
|
|
2959
3224
|
return {
|
|
2960
3225
|
...options,
|
|
@@ -2966,8 +3231,13 @@ function normalizeOptions(input) {
|
|
|
2966
3231
|
thirdPartyPolicy: options.thirdPartyPolicy ?? "skip-noncritical"
|
|
2967
3232
|
};
|
|
2968
3233
|
}
|
|
2969
|
-
function toWarnings(plan, errors, options) {
|
|
3234
|
+
function toWarnings(plan, errors, options, runtimeWarnings = []) {
|
|
2970
3235
|
const warnings = [plan.reason];
|
|
3236
|
+
for (const warning of runtimeWarnings) {
|
|
3237
|
+
if (warning && !warnings.includes(warning)) {
|
|
3238
|
+
warnings.push(warning);
|
|
3239
|
+
}
|
|
3240
|
+
}
|
|
2971
3241
|
if (plan.strategy === "jsdom-fallback" || plan.strategy === "framework-probe") {
|
|
2972
3242
|
warnings.push("runtime execution used the legacy JSDOM-based renderer while lightweight adapters are still being built");
|
|
2973
3243
|
}
|
|
@@ -3001,85 +3271,125 @@ function computeConfidence(params) {
|
|
|
3001
3271
|
confidence -= Math.min(0.28, params.executionErrors * 0.07);
|
|
3002
3272
|
return Math.max(0.05, Math.min(0.98, Number(confidence.toFixed(2))));
|
|
3003
3273
|
}
|
|
3274
|
+
function createTimeoutError(timeoutMs) {
|
|
3275
|
+
const error2 = new Error(`dynafetch timed out after ${timeoutMs}ms`);
|
|
3276
|
+
error2.name = "DynafetchTimeoutError";
|
|
3277
|
+
return error2;
|
|
3278
|
+
}
|
|
3279
|
+
async function withOperationTimeout(operation, timeoutMs) {
|
|
3280
|
+
if (!timeoutMs || !Number.isFinite(timeoutMs)) {
|
|
3281
|
+
return await operation;
|
|
3282
|
+
}
|
|
3283
|
+
return await new Promise((resolve, reject) => {
|
|
3284
|
+
const timer = setTimeout(() => reject(createTimeoutError(Math.max(1, Math.ceil(timeoutMs)))), Math.max(1, Math.ceil(timeoutMs)));
|
|
3285
|
+
timer.unref?.();
|
|
3286
|
+
operation.then(
|
|
3287
|
+
(value) => {
|
|
3288
|
+
clearTimeout(timer);
|
|
3289
|
+
resolve(value);
|
|
3290
|
+
},
|
|
3291
|
+
(error2) => {
|
|
3292
|
+
clearTimeout(timer);
|
|
3293
|
+
reject(error2);
|
|
3294
|
+
}
|
|
3295
|
+
);
|
|
3296
|
+
});
|
|
3297
|
+
}
|
|
3004
3298
|
async function dynafetch(input) {
|
|
3005
3299
|
const options = normalizeOptions(input);
|
|
3006
3300
|
const timeoutSeconds = options.timeoutMs ? Math.max(1, Math.ceil(options.timeoutMs / 1e3)) : void 0;
|
|
3301
|
+
const deadlineAt = options.timeoutMs ? Date.now() + options.timeoutMs : void 0;
|
|
3007
3302
|
const initialCookies = normalizeCookies(options.cookies);
|
|
3008
3303
|
const proxy = normalizeProxy(options.proxy);
|
|
3009
|
-
return await
|
|
3010
|
-
|
|
3011
|
-
|
|
3012
|
-
|
|
3013
|
-
|
|
3014
|
-
|
|
3015
|
-
|
|
3016
|
-
|
|
3017
|
-
|
|
3018
|
-
|
|
3019
|
-
|
|
3020
|
-
|
|
3021
|
-
|
|
3022
|
-
|
|
3023
|
-
|
|
3024
|
-
|
|
3025
|
-
});
|
|
3026
|
-
const harvest = await harvester.harvest();
|
|
3027
|
-
const harvestMs = Date.now() - harvestStart;
|
|
3028
|
-
const framework = detectFramework(harvest);
|
|
3029
|
-
const plan = planDynafetch(framework, harvest, options.allowJsdomFallback !== false);
|
|
3030
|
-
let html = harvest.html;
|
|
3031
|
-
let requestCount = harvest.logs.length;
|
|
3032
|
-
let executionErrors;
|
|
3033
|
-
let executeMs = 0;
|
|
3034
|
-
let quiescenceMs = 0;
|
|
3035
|
-
let scriptsTransformed = 0;
|
|
3036
|
-
if (plan.strategy !== "static-html") {
|
|
3037
|
-
const executeStart = Date.now();
|
|
3038
|
-
const executor = new Executor(harvest, {
|
|
3304
|
+
return await withOperationTimeout(
|
|
3305
|
+
withDynafetchSession(
|
|
3306
|
+
{
|
|
3307
|
+
browserProfile: options.browserProfile,
|
|
3308
|
+
timeoutSeconds,
|
|
3309
|
+
proxy: proxy?.url,
|
|
3310
|
+
rpcTimeoutMs: options.timeoutMs
|
|
3311
|
+
},
|
|
3312
|
+
async () => {
|
|
3313
|
+
const totalStart = Date.now();
|
|
3314
|
+
const harvestStart = Date.now();
|
|
3315
|
+
const harvester = new Harvester(options.url, {
|
|
3316
|
+
prefetchExternalScripts: options.prefetchExternalScripts,
|
|
3317
|
+
prefetchModulePreloads: options.prefetchModulePreloads,
|
|
3318
|
+
requestHeaders: options.headers,
|
|
3319
|
+
initialCookies,
|
|
3039
3320
|
thirdPartyPolicy: options.thirdPartyPolicy,
|
|
3040
|
-
|
|
3041
|
-
|
|
3042
|
-
|
|
3043
|
-
maxWaitMs: options.maxWaitMs
|
|
3044
|
-
},
|
|
3045
|
-
moduleWaitMs: options.moduleWaitMs,
|
|
3046
|
-
proxy
|
|
3321
|
+
proxy,
|
|
3322
|
+
timeoutMs: options.timeoutMs,
|
|
3323
|
+
deadlineAt
|
|
3047
3324
|
});
|
|
3048
|
-
const
|
|
3049
|
-
|
|
3050
|
-
|
|
3051
|
-
|
|
3052
|
-
|
|
3053
|
-
|
|
3054
|
-
|
|
3055
|
-
|
|
3056
|
-
|
|
3057
|
-
|
|
3058
|
-
|
|
3059
|
-
plan
|
|
3060
|
-
|
|
3061
|
-
|
|
3062
|
-
|
|
3063
|
-
|
|
3064
|
-
|
|
3065
|
-
|
|
3066
|
-
|
|
3067
|
-
|
|
3068
|
-
|
|
3069
|
-
|
|
3070
|
-
|
|
3071
|
-
|
|
3072
|
-
|
|
3073
|
-
|
|
3074
|
-
|
|
3075
|
-
harvest
|
|
3076
|
-
|
|
3077
|
-
|
|
3078
|
-
|
|
3079
|
-
|
|
3080
|
-
|
|
3081
|
-
|
|
3082
|
-
|
|
3325
|
+
const harvest = await harvester.harvest();
|
|
3326
|
+
const harvestMs = Date.now() - harvestStart;
|
|
3327
|
+
const framework = detectFramework(harvest);
|
|
3328
|
+
const plan = planDynafetch(framework, harvest, options.allowJsdomFallback !== false);
|
|
3329
|
+
let html = harvest.html;
|
|
3330
|
+
let requestCount = harvest.logs.length;
|
|
3331
|
+
let executionErrors;
|
|
3332
|
+
let executionWarnings = [];
|
|
3333
|
+
let executeMs = 0;
|
|
3334
|
+
let quiescenceMs = 0;
|
|
3335
|
+
let scriptsTransformed = 0;
|
|
3336
|
+
if (plan.strategy !== "static-html") {
|
|
3337
|
+
const executeStart = Date.now();
|
|
3338
|
+
const executor = new Executor(harvest, {
|
|
3339
|
+
thirdPartyPolicy: options.thirdPartyPolicy,
|
|
3340
|
+
quiescence: {
|
|
3341
|
+
minWaitMs: options.minWaitMs,
|
|
3342
|
+
idleWaitMs: options.idleWaitMs,
|
|
3343
|
+
maxWaitMs: options.maxWaitMs
|
|
3344
|
+
},
|
|
3345
|
+
moduleWaitMs: options.moduleWaitMs,
|
|
3346
|
+
proxy,
|
|
3347
|
+
timeoutMs: options.timeoutMs,
|
|
3348
|
+
deadlineAt
|
|
3349
|
+
});
|
|
3350
|
+
const execution = await executor.execute();
|
|
3351
|
+
executeMs = Date.now() - executeStart;
|
|
3352
|
+
html = execution.renderedHtml ?? harvest.html;
|
|
3353
|
+
requestCount = execution.logs.length;
|
|
3354
|
+
executionErrors = execution.errors;
|
|
3355
|
+
executionWarnings = execution.warnings ?? [];
|
|
3356
|
+
quiescenceMs = execution.timings?.quiescence_ms ?? 0;
|
|
3357
|
+
scriptsTransformed = execution.timings?.scripts_transformed_count ?? 0;
|
|
3358
|
+
}
|
|
3359
|
+
const totalMs = Date.now() - totalStart;
|
|
3360
|
+
const warnings = toWarnings(
|
|
3361
|
+
plan,
|
|
3362
|
+
executionErrors,
|
|
3363
|
+
options,
|
|
3364
|
+
[...harvest.warnings ?? [], ...executionWarnings]
|
|
3365
|
+
);
|
|
3366
|
+
const confidence = computeConfidence({
|
|
3367
|
+
plan,
|
|
3368
|
+
initialStateCount: Object.keys(harvest.initialState).length,
|
|
3369
|
+
executionErrors: executionErrors?.length ?? 0,
|
|
3370
|
+
htmlLength: html.length
|
|
3371
|
+
});
|
|
3372
|
+
return {
|
|
3373
|
+
url: options.url,
|
|
3374
|
+
finalUrl: harvest.url,
|
|
3375
|
+
status: harvest.status,
|
|
3376
|
+
html,
|
|
3377
|
+
framework,
|
|
3378
|
+
strategy: plan.strategy,
|
|
3379
|
+
confidence,
|
|
3380
|
+
warnings,
|
|
3381
|
+
timings: {
|
|
3382
|
+
total: totalMs,
|
|
3383
|
+
harvest: harvestMs,
|
|
3384
|
+
execute: executeMs,
|
|
3385
|
+
quiescence: quiescenceMs,
|
|
3386
|
+
scriptsTransformed
|
|
3387
|
+
},
|
|
3388
|
+
requestCount
|
|
3389
|
+
};
|
|
3390
|
+
}
|
|
3391
|
+
),
|
|
3392
|
+
options.timeoutMs
|
|
3083
3393
|
);
|
|
3084
3394
|
}
|
|
3085
3395
|
export {
|