@grabbit-labs/dynafetch 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,13 +1,4 @@
1
1
  import { createRequire } from "node:module"; import { fileURLToPath as __fileURLToPath } from "node:url"; import { dirname as __dirname_fn } from "node:path"; const __filename = __fileURLToPath(import.meta.url); const __dirname = __dirname_fn(__filename); const require = createRequire(import.meta.url);
2
- var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
3
- get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
4
- }) : x)(function(x) {
5
- if (typeof require !== "undefined") return require.apply(this, arguments);
6
- throw Error('Dynamic require of "' + x + '" is not supported');
7
- });
8
-
9
- // ../dynafetch-core/src/index.ts
10
- import * as net from "node:net";
11
2
 
12
3
  // ../../src/phantom/log.ts
13
4
  var enabled = process.env.DYNAFETCH_DEBUG === "1";
@@ -123,29 +114,32 @@ var Transformer = class {
123
114
  import { AsyncLocalStorage } from "node:async_hooks";
124
115
  import { spawn } from "node:child_process";
125
116
  import { randomUUID } from "node:crypto";
117
+ import { accessSync, constants } from "node:fs";
126
118
  import path2 from "node:path";
127
119
  import readline from "node:readline";
120
+ import { fileURLToPath } from "node:url";
128
121
  var sessionStore = new AsyncLocalStorage();
129
122
  var transportPromise = null;
123
+ var workerDir = path2.dirname(fileURLToPath(import.meta.url));
130
124
  function findPrecompiledBinary() {
131
125
  const platform = process.platform;
132
- const arch = process.arch === "x64" ? "x64" : "arm64";
126
+ const arch = process.arch === "x64" ? "x64" : process.arch === "arm64" ? "arm64" : null;
127
+ if (!arch) return null;
133
128
  const ext = platform === "win32" ? ".exe" : "";
134
129
  const name = `dynafetch-net-${platform}-${arch}${ext}`;
135
130
  const candidates = [
136
- path2.resolve(__dirname, "../bin", name),
131
+ path2.resolve(workerDir, "../bin", name),
137
132
  // installed: dist/../bin
138
- path2.resolve(__dirname, "../../../dynafetch-net/bin", name),
133
+ path2.resolve(workerDir, "../../../dynafetch-net/bin", name),
139
134
  // dev: dynafetch-core/src/net -> dynafetch-net/bin
140
- path2.resolve(__dirname, "../../../../packages/dynafetch-net/bin", name),
135
+ path2.resolve(workerDir, "../../../../packages/dynafetch-net/bin", name),
141
136
  // dev: alt layout
142
137
  path2.resolve(process.cwd(), "packages/dynafetch-net/bin", name)
143
138
  // dev: from workspace root
144
139
  ];
145
140
  for (const candidate of candidates) {
146
141
  try {
147
- const fs2 = __require("fs");
148
- fs2.accessSync(candidate, fs2.constants.X_OK);
142
+ accessSync(candidate, constants.X_OK);
149
143
  return candidate;
150
144
  } catch {
151
145
  }
@@ -182,6 +176,31 @@ function createWorkerTransport() {
182
176
  );
183
177
  }
184
178
  const pending = /* @__PURE__ */ new Map();
179
+ let holdCount = 0;
180
+ const updateRef = () => {
181
+ if (pending.size === 0 && holdCount === 0) {
182
+ child.unref();
183
+ child.stdin.unref?.();
184
+ child.stdout.unref?.();
185
+ child.stderr.unref?.();
186
+ } else {
187
+ child.ref();
188
+ child.stdin.ref?.();
189
+ child.stdout.ref?.();
190
+ child.stderr.ref?.();
191
+ }
192
+ };
193
+ const hold = () => {
194
+ holdCount++;
195
+ updateRef();
196
+ };
197
+ const release = () => {
198
+ holdCount = Math.max(0, holdCount - 1);
199
+ updateRef();
200
+ };
201
+ child.stdin.on("error", () => {
202
+ });
203
+ updateRef();
185
204
  const rl = readline.createInterface({ input: child.stdout });
186
205
  rl.on("line", (line) => {
187
206
  const trimmed = line.trim();
@@ -194,11 +213,13 @@ function createWorkerTransport() {
194
213
  entry.reject(new Error(`Invalid dynafetch-net response: ${String(error2)}`));
195
214
  }
196
215
  pending.clear();
216
+ updateRef();
197
217
  return;
198
218
  }
199
219
  const request = pending.get(payload.id);
200
220
  if (!request) return;
201
221
  pending.delete(payload.id);
222
+ updateRef();
202
223
  if (payload.error) {
203
224
  request.reject(new Error(payload.error.message || payload.error.code || "dynafetch-net request failed"));
204
225
  return;
@@ -212,9 +233,11 @@ function createWorkerTransport() {
212
233
  }
213
234
  });
214
235
  const onExit = (code, signal) => {
215
- const reason = `dynafetch-net exited (code=${code ?? "null"}, signal=${signal ?? "null"})`;
216
- for (const entry of pending.values()) {
217
- entry.reject(new Error(reason));
236
+ if (pending.size > 0 && signal !== "SIGKILL") {
237
+ const reason = `dynafetch-net exited (code=${code ?? "null"}, signal=${signal ?? "null"})`;
238
+ for (const entry of pending.values()) {
239
+ entry.reject(new Error(reason));
240
+ }
218
241
  }
219
242
  pending.clear();
220
243
  transportPromise = null;
@@ -238,7 +261,7 @@ function createWorkerTransport() {
238
261
  child.once("spawn", () => {
239
262
  if (!settled) {
240
263
  settled = true;
241
- resolve({ child, pending });
264
+ resolve({ child, pending, updateRef, holdCount, hold, release });
242
265
  }
243
266
  });
244
267
  });
@@ -256,8 +279,10 @@ async function callWorker(method, params, timeoutMs = 3e4) {
256
279
  return await new Promise((resolve, reject) => {
257
280
  const timer = setTimeout(() => {
258
281
  transport.pending.delete(id);
282
+ transport.updateRef();
259
283
  reject(new Error(`dynafetch-net request timed out after ${timeoutMs}ms (method: ${method})`));
260
284
  }, timeoutMs);
285
+ timer.unref();
261
286
  transport.pending.set(id, {
262
287
  resolve: (value) => {
263
288
  clearTimeout(timer);
@@ -268,22 +293,41 @@ async function callWorker(method, params, timeoutMs = 3e4) {
268
293
  reject(err);
269
294
  }
270
295
  });
296
+ transport.updateRef();
271
297
  transport.child.stdin.write(`${payload}
272
298
  `, (error2) => {
273
299
  if (!error2) return;
274
300
  clearTimeout(timer);
275
301
  transport.pending.delete(id);
302
+ transport.updateRef();
276
303
  reject(error2);
277
304
  });
278
305
  });
279
306
  }
307
+ function resolveRpcTimeoutMs(options) {
308
+ if (options.rpcTimeoutMs != null) {
309
+ return Math.max(1, Math.ceil(options.rpcTimeoutMs));
310
+ }
311
+ if (options.timeoutSeconds != null) {
312
+ return Math.max(1e3, Math.ceil(options.timeoutSeconds * 1e3) + 1e3);
313
+ }
314
+ return 3e4;
315
+ }
280
316
  async function withDynafetchSession(options, run) {
281
- const session = await callWorker("openSession", options);
317
+ const transport = await getWorkerTransport();
318
+ transport.hold();
319
+ const { rpcTimeoutMs, ...sessionOptions } = options;
320
+ const session = await callWorker(
321
+ "openSession",
322
+ sessionOptions,
323
+ resolveRpcTimeoutMs(options)
324
+ );
282
325
  try {
283
326
  return await sessionStore.run({ sessionId: session.sessionId }, run);
284
327
  } finally {
285
- await callWorker("closeSession", { sessionId: session.sessionId }).catch(() => {
328
+ callWorker("closeSession", { sessionId: session.sessionId }).catch(() => {
286
329
  });
330
+ transport.release();
287
331
  }
288
332
  }
289
333
  async function dynafetchNetHealth() {
@@ -299,7 +343,7 @@ async function dynafetchNetFetch(request, options = {}) {
299
343
  browserProfile: options.browserProfile,
300
344
  timeoutSeconds: options.timeoutSeconds,
301
345
  proxy: options.proxy
302
- });
346
+ }, resolveRpcTimeoutMs(options));
303
347
  }
304
348
  async function dynafetchNetBatchFetch(requests, options = {}) {
305
349
  const session = sessionStore.getStore();
@@ -311,7 +355,73 @@ async function dynafetchNetBatchFetch(requests, options = {}) {
311
355
  browserProfile: options.browserProfile,
312
356
  timeoutSeconds: options.timeoutSeconds,
313
357
  proxy: options.proxy
314
- });
358
+ }, resolveRpcTimeoutMs(options));
359
+ }
360
+
361
+ // ../../src/phantom/url-safety.ts
362
+ import { lookup } from "node:dns/promises";
363
+ import * as net from "node:net";
364
+ var BLOCKED_HOSTNAMES = /* @__PURE__ */ new Set([
365
+ "0.0.0.0",
366
+ "localhost",
367
+ "metadata.google.internal"
368
+ ]);
369
+ var hostnameLookupCache = /* @__PURE__ */ new Map();
370
+ function normalizeHostname(hostname) {
371
+ return hostname.trim().replace(/^\[|\]$/g, "").replace(/\.+$/g, "").toLowerCase();
372
+ }
373
+ function isPrivateOrLocalHost(hostname) {
374
+ const h = normalizeHostname(hostname);
375
+ if (!h) return false;
376
+ if (BLOCKED_HOSTNAMES.has(h) || h.endsWith(".localhost")) return true;
377
+ const ipVer = net.isIP(h);
378
+ if (!ipVer) return false;
379
+ if (ipVer === 4) {
380
+ const [a, b] = h.split(".").map((value) => Number(value));
381
+ if (a === 10) return true;
382
+ if (a === 127) return true;
383
+ if (a === 0) return true;
384
+ if (a === 169 && b === 254) return true;
385
+ if (a === 172 && b >= 16 && b <= 31) return true;
386
+ if (a === 192 && b === 168) return true;
387
+ return false;
388
+ }
389
+ if (h === "::1") return true;
390
+ if (h.startsWith("fe80:")) return true;
391
+ if (h.startsWith("fc") || h.startsWith("fd")) return true;
392
+ return false;
393
+ }
394
+ function assertSafeHttpUrlSync(input) {
395
+ let parsedUrl;
396
+ try {
397
+ parsedUrl = new URL(input);
398
+ } catch {
399
+ throw new Error("Invalid URL");
400
+ }
401
+ if (parsedUrl.protocol !== "http:" && parsedUrl.protocol !== "https:") {
402
+ throw new Error("Only http(s) URLs are allowed");
403
+ }
404
+ if (isPrivateOrLocalHost(parsedUrl.hostname)) {
405
+ throw new Error("Refusing to fetch local/private addresses");
406
+ }
407
+ return parsedUrl;
408
+ }
409
+ async function hostnameResolvesToPrivateAddress(hostname) {
410
+ const normalized = normalizeHostname(hostname);
411
+ if (!normalized || net.isIP(normalized)) return false;
412
+ let pending = hostnameLookupCache.get(normalized);
413
+ if (!pending) {
414
+ pending = lookup(normalized, { all: true, verbatim: true }).then((records) => records.some((record) => isPrivateOrLocalHost(record.address))).catch(() => false);
415
+ hostnameLookupCache.set(normalized, pending);
416
+ }
417
+ return pending;
418
+ }
419
+ async function assertSafeRemoteUrl(input) {
420
+ const parsedUrl = assertSafeHttpUrlSync(input);
421
+ if (await hostnameResolvesToPrivateAddress(parsedUrl.hostname)) {
422
+ throw new Error("Refusing to fetch local/private addresses");
423
+ }
424
+ return parsedUrl;
315
425
  }
316
426
 
317
427
  // ../../src/phantom/phantom-proxy.ts
@@ -327,64 +437,115 @@ function headersToRecord(h) {
327
437
  }
328
438
  return out;
329
439
  }
330
- async function directFetch(payload) {
440
+ var DIRECT_FALLBACK_WARNING = "dynafetch-net was unavailable for one or more requests; fell back to Node fetch without TLS/browser impersonation";
441
+ var DIRECT_PROXY_ERROR = "Direct fallback cannot honor proxy configuration; dynafetch-net is required when proxy is set";
442
+ function createTimeoutController(timeoutMs) {
443
+ if (!timeoutMs || !Number.isFinite(timeoutMs)) {
444
+ return {
445
+ dispose: () => {
446
+ },
447
+ didTimeout: () => false
448
+ };
449
+ }
450
+ const controller = new AbortController();
451
+ let timedOut = false;
452
+ const timer = setTimeout(() => {
453
+ timedOut = true;
454
+ controller.abort();
455
+ }, Math.max(1, Math.ceil(timeoutMs)));
456
+ timer.unref?.();
457
+ return {
458
+ signal: controller.signal,
459
+ dispose: () => clearTimeout(timer),
460
+ didTimeout: () => timedOut
461
+ };
462
+ }
463
+ async function directFetch(payload, options = {}) {
464
+ if (payload.proxy) {
465
+ throw new Error(DIRECT_PROXY_ERROR);
466
+ }
331
467
  const init = {
332
468
  method: payload.method,
333
469
  headers: payload.headers,
334
470
  redirect: "manual"
335
471
  };
336
472
  if (payload.body) init.body = payload.body;
337
- const resp = await fetch(payload.url, init);
338
- const body = await resp.text().catch(() => "");
339
- return { status: resp.status, body, headers: headersToRecord(resp.headers) };
473
+ const timeout = createTimeoutController(options.timeoutMs);
474
+ if (timeout.signal) init.signal = timeout.signal;
475
+ try {
476
+ const resp = await fetch(payload.url, init);
477
+ const body = await resp.text().catch(() => "");
478
+ return {
479
+ status: resp.status,
480
+ body,
481
+ headers: headersToRecord(resp.headers),
482
+ transport: "direct",
483
+ warning: DIRECT_FALLBACK_WARNING
484
+ };
485
+ } catch (error2) {
486
+ if (timeout.didTimeout()) {
487
+ throw new Error(`dynafetch request timed out after ${Math.max(1, Math.ceil(options.timeoutMs ?? 0))}ms`);
488
+ }
489
+ throw error2;
490
+ } finally {
491
+ timeout.dispose();
492
+ }
340
493
  }
341
- async function dynafetchWorkerFetch(payload) {
494
+ async function dynafetchWorkerFetch(payload, options = {}) {
342
495
  const response = await dynafetchNetFetch(payload, {
343
- followRedirect: false
496
+ followRedirect: false,
497
+ rpcTimeoutMs: options.timeoutMs
344
498
  });
345
499
  return {
346
500
  status: response.status,
347
501
  body: response.body,
348
502
  headers: response.headers,
349
503
  finalUrl: response.finalUrl,
350
- error: response.error
504
+ error: response.error,
505
+ transport: "dynafetch-net"
351
506
  };
352
507
  }
353
- async function phantomFetch(payload) {
508
+ async function phantomFetch(payload, options = {}) {
509
+ await assertSafeRemoteUrl(payload.url);
354
510
  if (process.env.PHANTOM_DISABLE_PROXY === "1" || process.env.DYNAFETCH_DISABLE_NET === "1") {
355
- return directFetch(payload);
511
+ return directFetch(payload, options);
356
512
  }
357
513
  try {
358
- return await dynafetchWorkerFetch(payload);
514
+ return await dynafetchWorkerFetch(payload, options);
359
515
  } catch (error2) {
360
516
  if (process.env.DYNAFETCH_DISABLE_DIRECT_FALLBACK === "1") {
361
517
  throw error2;
362
518
  }
363
- return await directFetch(payload);
519
+ return await directFetch(payload, options);
364
520
  }
365
521
  }
366
- async function phantomBatchFetch(payloads) {
522
+ async function phantomBatchFetch(payloads, options = {}) {
367
523
  if (payloads.length === 0) return [];
524
+ await Promise.all(payloads.map((payload) => assertSafeRemoteUrl(payload.url)));
368
525
  if (process.env.PHANTOM_DISABLE_PROXY === "1" || process.env.DYNAFETCH_DISABLE_NET === "1") {
369
- return Promise.all(payloads.map((p) => directFetch(p)));
526
+ return Promise.all(payloads.map((payload) => directFetch(payload, options)));
370
527
  }
371
528
  try {
372
529
  const responses = await dynafetchNetBatchFetch(
373
530
  payloads,
374
- { followRedirect: false }
531
+ {
532
+ followRedirect: false,
533
+ rpcTimeoutMs: options.timeoutMs
534
+ }
375
535
  );
376
536
  return responses.map((r) => ({
377
537
  status: r.status,
378
538
  body: r.body,
379
539
  headers: r.headers,
380
540
  finalUrl: r.finalUrl,
381
- error: r.error
541
+ error: r.error,
542
+ transport: "dynafetch-net"
382
543
  }));
383
544
  } catch (error2) {
384
545
  if (process.env.DYNAFETCH_DISABLE_DIRECT_FALLBACK === "1") {
385
546
  throw error2;
386
547
  }
387
- return Promise.all(payloads.map((p) => directFetch(p)));
548
+ return Promise.all(payloads.map((payload) => directFetch(payload, options)));
388
549
  }
389
550
  }
390
551
 
@@ -700,6 +861,7 @@ var Executor = class {
700
861
  };
701
862
  this.executionErrors = [];
702
863
  this.thirdPartyPolicy = "skip-noncritical";
864
+ this.warnings = /* @__PURE__ */ new Set();
703
865
  // Early exit tracking
704
866
  this.findAll = false;
705
867
  this.fuzzyMatch = true;
@@ -716,6 +878,8 @@ var Executor = class {
716
878
  this.fuzzyMatch = options.fuzzyMatch ?? true;
717
879
  this.thirdPartyPolicy = options.thirdPartyPolicy ?? "skip-noncritical";
718
880
  this.proxy = options.proxy;
881
+ this.timeoutMs = options.timeoutMs;
882
+ this.deadlineAt = options.deadlineAt;
719
883
  this.applyDefaults(options.quiescence, options.moduleWaitMs);
720
884
  }
721
885
  if (this.targetValue !== null && this.targetValue !== void 0) {
@@ -752,6 +916,26 @@ var Executor = class {
752
916
  if (!Number.isFinite(v)) return min;
753
917
  return Math.max(min, Math.min(max, Math.trunc(v)));
754
918
  }
919
+ createTimeoutError() {
920
+ const timeoutMs = Math.max(1, Math.ceil(this.timeoutMs ?? 1));
921
+ return new Error(`dynafetch timed out after ${timeoutMs}ms`);
922
+ }
923
+ remainingTimeMs() {
924
+ if (this.deadlineAt == null) return this.timeoutMs;
925
+ const remaining = this.deadlineAt - Date.now();
926
+ if (remaining <= 0) throw this.createTimeoutError();
927
+ return Math.max(1, Math.ceil(remaining));
928
+ }
929
+ boundedDurationMs(durationMs) {
930
+ if (this.deadlineAt == null) return durationMs;
931
+ const remaining = this.deadlineAt - Date.now();
932
+ if (remaining <= 0) return 0;
933
+ return Math.max(0, Math.min(durationMs, Math.ceil(remaining)));
934
+ }
935
+ recordWarning(warning) {
936
+ if (!warning) return;
937
+ this.warnings.add(warning);
938
+ }
755
939
  applyDefaults(quiescence, moduleWaitMsOverride) {
756
940
  const hardMaxCap = this.clampMs(Number(process.env.PHANTOM_QUIESCENCE_MAX_CAP_MS ?? 8e3), 500, 6e4);
757
941
  const minWaitMs = this.clampMs(quiescence?.minWaitMs ?? 75, 0, 1e4);
@@ -882,7 +1066,7 @@ var Executor = class {
882
1066
  async waitForModuleWork(timeoutMs) {
883
1067
  const pending = Array.from(this.moduleInFlight.values());
884
1068
  if (!pending.length) return;
885
- const timeout = this.clampMs(timeoutMs, 0, 6e4);
1069
+ const timeout = this.clampMs(this.boundedDurationMs(timeoutMs), 0, 6e4);
886
1070
  if (timeout === 0) return;
887
1071
  const all = Promise.allSettled(pending).then(() => {
888
1072
  });
@@ -899,7 +1083,11 @@ var Executor = class {
899
1083
  try {
900
1084
  this.telemetry_proxy++;
901
1085
  const payload = { method, url, headers, headerOrder: Object.keys(headers), body, proxy: this.proxyUrlForScope(proxyScope) };
902
- return await phantomFetch(payload);
1086
+ const response = await phantomFetch(payload, {
1087
+ timeoutMs: this.remainingTimeMs()
1088
+ });
1089
+ this.recordWarning(response.warning);
1090
+ return response;
903
1091
  } catch (e) {
904
1092
  return { status: 0, body: e.message, headers: {}, error: e.message };
905
1093
  }
@@ -1015,7 +1203,7 @@ var Executor = class {
1015
1203
  const existing = this.moduleInFlight.get(cacheKey);
1016
1204
  if (existing) return existing;
1017
1205
  const p = (async () => {
1018
- const taskId = this.trackTaskStart("module_bundle", cacheKey, this.moduleWaitMs);
1206
+ const taskId = this.trackTaskStart("module_bundle", cacheKey, this.boundedDurationMs(this.moduleWaitMs));
1019
1207
  try {
1020
1208
  if (process.env.PHANTOM_DEBUG_MODULES === "1") {
1021
1209
  log("[Executor] Bundling module entry:", cacheKey);
@@ -1898,7 +2086,8 @@ var Executor = class {
1898
2086
  matchedRequests: this.earlyMatches,
1899
2087
  renderedHtml,
1900
2088
  timings: { ...this.timings },
1901
- errors: this.executionErrors.length ? this.executionErrors : void 0
2089
+ errors: this.executionErrors.length ? this.executionErrors : void 0,
2090
+ warnings: Array.from(this.warnings)
1902
2091
  };
1903
2092
  const shutdownGraceMs = this.clampMs(Number(process.env.PHANTOM_SHUTDOWN_GRACE_MS ?? 50), 10, 5e3);
1904
2093
  await new Promise((r) => setTimeout(r, shutdownGraceMs));
@@ -2381,11 +2570,12 @@ async function prefetchModuleGraph(rootUrls, cache, pageUrl, opts) {
2381
2570
  body: "",
2382
2571
  proxy: opts?.proxyUrl
2383
2572
  }));
2384
- const responses = await phantomBatchFetch(payloads);
2573
+ const responses = await phantomBatchFetch(payloads, { timeoutMs: opts?.timeoutMs });
2385
2574
  const newToScan = [];
2386
2575
  for (let i = 0; i < toFetch.length; i++) {
2387
2576
  const u = toFetch[i];
2388
2577
  const r = responses[i];
2578
+ if (r.warning) opts?.onWarning?.(r.warning);
2389
2579
  if (r.status < 400 && r.body) {
2390
2580
  cache.set(u, r.body);
2391
2581
  totalFetched++;
@@ -2423,6 +2613,7 @@ var Harvester = class {
2423
2613
  this.requestHeaders = {};
2424
2614
  this.thirdPartyPolicy = "skip-noncritical";
2425
2615
  this.prefetchModulePreloads = true;
2616
+ this.warnings = /* @__PURE__ */ new Set();
2426
2617
  this.targetUrl = url;
2427
2618
  this.prefetchExternalScripts = opts.prefetchExternalScripts !== false;
2428
2619
  this.externalScriptConcurrency = opts.externalScriptConcurrency ?? 8;
@@ -2431,6 +2622,8 @@ var Harvester = class {
2431
2622
  this.thirdPartyPolicy = opts.thirdPartyPolicy ?? "skip-noncritical";
2432
2623
  this.prefetchModulePreloads = opts.prefetchModulePreloads !== false;
2433
2624
  this.proxy = opts.proxy;
2625
+ this.timeoutMs = opts.timeoutMs;
2626
+ this.deadlineAt = opts.deadlineAt;
2434
2627
  }
2435
2628
  proxyUrlForScope(scope) {
2436
2629
  if (!this.proxy) return void 0;
@@ -2446,6 +2639,20 @@ var Harvester = class {
2446
2639
  }
2447
2640
  return pairs.join("; ");
2448
2641
  }
2642
+ createTimeoutError() {
2643
+ const timeoutMs = Math.max(1, Math.ceil(this.timeoutMs ?? 1));
2644
+ return new Error(`dynafetch timed out after ${timeoutMs}ms`);
2645
+ }
2646
+ remainingTimeMs() {
2647
+ if (this.deadlineAt == null) return this.timeoutMs;
2648
+ const remaining = this.deadlineAt - Date.now();
2649
+ if (remaining <= 0) throw this.createTimeoutError();
2650
+ return Math.max(1, Math.ceil(remaining));
2651
+ }
2652
+ recordWarning(warning) {
2653
+ if (!warning) return;
2654
+ this.warnings.add(warning);
2655
+ }
2449
2656
  async fetchViaProxy(url, headers = {}, followRedirects = false, maxRedirects = 5, method = "GET", body = "", proxyScope = "page") {
2450
2657
  let currentUrl = url;
2451
2658
  let redirectCount = 0;
@@ -2466,7 +2673,10 @@ var Harvester = class {
2466
2673
  body: currentBody,
2467
2674
  proxy: this.proxyUrlForScope(proxyScope)
2468
2675
  };
2469
- const data = await phantomFetch(payload);
2676
+ const data = await phantomFetch(payload, {
2677
+ timeoutMs: this.remainingTimeMs()
2678
+ });
2679
+ this.recordWarning(data.warning);
2470
2680
  if (data.error) throw new Error(`Proxy Error: ${data.error}`);
2471
2681
  const setCookie = data.headers["Set-Cookie"] || data.headers["set-cookie"];
2472
2682
  if (setCookie) this.cookies.push(setCookie);
@@ -2709,10 +2919,13 @@ var Harvester = class {
2709
2919
  const allPayloads = [...scriptPayloads, ...preloadPayloads];
2710
2920
  if (allPayloads.length > 0) {
2711
2921
  log(`[Harvest] Batch-fetching ${scriptPayloads.length} scripts + ${preloadPayloads.length} modulepreloads...`);
2712
- const allResponses = await phantomBatchFetch(allPayloads);
2922
+ const allResponses = await phantomBatchFetch(allPayloads, {
2923
+ timeoutMs: this.remainingTimeMs()
2924
+ });
2713
2925
  for (let i = 0; i < batchScriptMeta.length; i++) {
2714
2926
  const meta = batchScriptMeta[i];
2715
2927
  const resp = allResponses[i];
2928
+ this.recordWarning(resp.warning);
2716
2929
  const logEntry = {
2717
2930
  type: "resource_load",
2718
2931
  url: meta.absoluteUrl,
@@ -2741,6 +2954,7 @@ var Harvester = class {
2741
2954
  for (let i = 0; i < modulePreloadUrls.length; i++) {
2742
2955
  const url = modulePreloadUrls[i];
2743
2956
  const resp = allResponses[batchScriptMeta.length + i];
2957
+ this.recordWarning(resp.warning);
2744
2958
  const logEntry = {
2745
2959
  type: "resource_load",
2746
2960
  url,
@@ -2805,7 +3019,9 @@ var Harvester = class {
2805
3019
  if (moduleEntryUrls.length > 0 || modulePreloads.length > 0) {
2806
3020
  const rootUrls = [...moduleEntryUrls, ...modulePreloads.map((mp) => mp.url)];
2807
3021
  await prefetchModuleGraph(rootUrls, moduleGraphCache, finalUrl, {
2808
- proxyUrl: this.proxyUrlForScope("assets")
3022
+ proxyUrl: this.proxyUrlForScope("assets"),
3023
+ timeoutMs: this.remainingTimeMs(),
3024
+ onWarning: (warning) => this.recordWarning(warning)
2809
3025
  });
2810
3026
  }
2811
3027
  return {
@@ -2819,7 +3035,8 @@ var Harvester = class {
2819
3035
  cookies: this.cookies,
2820
3036
  headers: response.headers,
2821
3037
  logs: this.logs,
2822
- moduleGraphCache
3038
+ moduleGraphCache,
3039
+ warnings: Array.from(this.warnings)
2823
3040
  };
2824
3041
  }
2825
3042
  };
@@ -2896,27 +3113,6 @@ var DynafetchInputError = class extends Error {
2896
3113
  this.status = status;
2897
3114
  }
2898
3115
  };
2899
- function isPrivateOrLocalHost(hostname) {
2900
- const h = hostname.toLowerCase();
2901
- if (h === "localhost" || h.endsWith(".localhost") || h === "0.0.0.0") return true;
2902
- if (h === "metadata.google.internal") return true;
2903
- const ipVer = net.isIP(h);
2904
- if (!ipVer) return false;
2905
- if (ipVer === 4) {
2906
- const [a, b] = h.split(".").map((x) => Number(x));
2907
- if (a === 10) return true;
2908
- if (a === 127) return true;
2909
- if (a === 0) return true;
2910
- if (a === 169 && b === 254) return true;
2911
- if (a === 172 && b >= 16 && b <= 31) return true;
2912
- if (a === 192 && b === 168) return true;
2913
- return false;
2914
- }
2915
- if (h === "::1") return true;
2916
- if (h.startsWith("fe80:")) return true;
2917
- if (h.startsWith("fc") || h.startsWith("fd")) return true;
2918
- return false;
2919
- }
2920
3116
  function normalizeProxy(input) {
2921
3117
  if (!input) return void 0;
2922
3118
  if (typeof input === "string") {
@@ -2946,15 +3142,10 @@ function normalizeOptions(input) {
2946
3142
  }
2947
3143
  let parsedUrl;
2948
3144
  try {
2949
- parsedUrl = new URL(options.url);
2950
- } catch {
2951
- throw new DynafetchInputError("Invalid URL");
2952
- }
2953
- if (parsedUrl.protocol !== "http:" && parsedUrl.protocol !== "https:") {
2954
- throw new DynafetchInputError("Only http(s) URLs are allowed");
2955
- }
2956
- if (isPrivateOrLocalHost(parsedUrl.hostname)) {
2957
- throw new DynafetchInputError("Refusing to fetch local/private addresses");
3145
+ parsedUrl = assertSafeHttpUrlSync(options.url);
3146
+ } catch (error2) {
3147
+ const message = error2 instanceof Error ? error2.message : "Invalid URL";
3148
+ throw new DynafetchInputError(message);
2958
3149
  }
2959
3150
  return {
2960
3151
  ...options,
@@ -2966,8 +3157,13 @@ function normalizeOptions(input) {
2966
3157
  thirdPartyPolicy: options.thirdPartyPolicy ?? "skip-noncritical"
2967
3158
  };
2968
3159
  }
2969
- function toWarnings(plan, errors, options) {
3160
+ function toWarnings(plan, errors, options, runtimeWarnings = []) {
2970
3161
  const warnings = [plan.reason];
3162
+ for (const warning of runtimeWarnings) {
3163
+ if (warning && !warnings.includes(warning)) {
3164
+ warnings.push(warning);
3165
+ }
3166
+ }
2971
3167
  if (plan.strategy === "jsdom-fallback" || plan.strategy === "framework-probe") {
2972
3168
  warnings.push("runtime execution used the legacy JSDOM-based renderer while lightweight adapters are still being built");
2973
3169
  }
@@ -3001,85 +3197,125 @@ function computeConfidence(params) {
3001
3197
  confidence -= Math.min(0.28, params.executionErrors * 0.07);
3002
3198
  return Math.max(0.05, Math.min(0.98, Number(confidence.toFixed(2))));
3003
3199
  }
3200
+ function createTimeoutError(timeoutMs) {
3201
+ const error2 = new Error(`dynafetch timed out after ${timeoutMs}ms`);
3202
+ error2.name = "DynafetchTimeoutError";
3203
+ return error2;
3204
+ }
3205
+ async function withOperationTimeout(operation, timeoutMs) {
3206
+ if (!timeoutMs || !Number.isFinite(timeoutMs)) {
3207
+ return await operation;
3208
+ }
3209
+ return await new Promise((resolve, reject) => {
3210
+ const timer = setTimeout(() => reject(createTimeoutError(Math.max(1, Math.ceil(timeoutMs)))), Math.max(1, Math.ceil(timeoutMs)));
3211
+ timer.unref?.();
3212
+ operation.then(
3213
+ (value) => {
3214
+ clearTimeout(timer);
3215
+ resolve(value);
3216
+ },
3217
+ (error2) => {
3218
+ clearTimeout(timer);
3219
+ reject(error2);
3220
+ }
3221
+ );
3222
+ });
3223
+ }
3004
3224
  async function dynafetch(input) {
3005
3225
  const options = normalizeOptions(input);
3006
3226
  const timeoutSeconds = options.timeoutMs ? Math.max(1, Math.ceil(options.timeoutMs / 1e3)) : void 0;
3227
+ const deadlineAt = options.timeoutMs ? Date.now() + options.timeoutMs : void 0;
3007
3228
  const initialCookies = normalizeCookies(options.cookies);
3008
3229
  const proxy = normalizeProxy(options.proxy);
3009
- return await withDynafetchSession(
3010
- {
3011
- browserProfile: options.browserProfile,
3012
- timeoutSeconds,
3013
- proxy: proxy?.url
3014
- },
3015
- async () => {
3016
- const totalStart = Date.now();
3017
- const harvestStart = Date.now();
3018
- const harvester = new Harvester(options.url, {
3019
- prefetchExternalScripts: options.prefetchExternalScripts,
3020
- prefetchModulePreloads: options.prefetchModulePreloads,
3021
- requestHeaders: options.headers,
3022
- initialCookies,
3023
- thirdPartyPolicy: options.thirdPartyPolicy,
3024
- proxy
3025
- });
3026
- const harvest = await harvester.harvest();
3027
- const harvestMs = Date.now() - harvestStart;
3028
- const framework = detectFramework(harvest);
3029
- const plan = planDynafetch(framework, harvest, options.allowJsdomFallback !== false);
3030
- let html = harvest.html;
3031
- let requestCount = harvest.logs.length;
3032
- let executionErrors;
3033
- let executeMs = 0;
3034
- let quiescenceMs = 0;
3035
- let scriptsTransformed = 0;
3036
- if (plan.strategy !== "static-html") {
3037
- const executeStart = Date.now();
3038
- const executor = new Executor(harvest, {
3230
+ return await withOperationTimeout(
3231
+ withDynafetchSession(
3232
+ {
3233
+ browserProfile: options.browserProfile,
3234
+ timeoutSeconds,
3235
+ proxy: proxy?.url,
3236
+ rpcTimeoutMs: options.timeoutMs
3237
+ },
3238
+ async () => {
3239
+ const totalStart = Date.now();
3240
+ const harvestStart = Date.now();
3241
+ const harvester = new Harvester(options.url, {
3242
+ prefetchExternalScripts: options.prefetchExternalScripts,
3243
+ prefetchModulePreloads: options.prefetchModulePreloads,
3244
+ requestHeaders: options.headers,
3245
+ initialCookies,
3039
3246
  thirdPartyPolicy: options.thirdPartyPolicy,
3040
- quiescence: {
3041
- minWaitMs: options.minWaitMs,
3042
- idleWaitMs: options.idleWaitMs,
3043
- maxWaitMs: options.maxWaitMs
3044
- },
3045
- moduleWaitMs: options.moduleWaitMs,
3046
- proxy
3247
+ proxy,
3248
+ timeoutMs: options.timeoutMs,
3249
+ deadlineAt
3047
3250
  });
3048
- const execution = await executor.execute();
3049
- executeMs = Date.now() - executeStart;
3050
- html = execution.renderedHtml ?? harvest.html;
3051
- requestCount = execution.logs.length;
3052
- executionErrors = execution.errors;
3053
- quiescenceMs = execution.timings?.quiescence_ms ?? 0;
3054
- scriptsTransformed = execution.timings?.scripts_transformed_count ?? 0;
3055
- }
3056
- const totalMs = Date.now() - totalStart;
3057
- const warnings = toWarnings(plan, executionErrors, options);
3058
- const confidence = computeConfidence({
3059
- plan,
3060
- initialStateCount: Object.keys(harvest.initialState).length,
3061
- executionErrors: executionErrors?.length ?? 0,
3062
- htmlLength: html.length
3063
- });
3064
- return {
3065
- url: options.url,
3066
- finalUrl: harvest.url,
3067
- status: harvest.status,
3068
- html,
3069
- framework,
3070
- strategy: plan.strategy,
3071
- confidence,
3072
- warnings,
3073
- timings: {
3074
- total: totalMs,
3075
- harvest: harvestMs,
3076
- execute: executeMs,
3077
- quiescence: quiescenceMs,
3078
- scriptsTransformed
3079
- },
3080
- requestCount
3081
- };
3082
- }
3251
+ const harvest = await harvester.harvest();
3252
+ const harvestMs = Date.now() - harvestStart;
3253
+ const framework = detectFramework(harvest);
3254
+ const plan = planDynafetch(framework, harvest, options.allowJsdomFallback !== false);
3255
+ let html = harvest.html;
3256
+ let requestCount = harvest.logs.length;
3257
+ let executionErrors;
3258
+ let executionWarnings = [];
3259
+ let executeMs = 0;
3260
+ let quiescenceMs = 0;
3261
+ let scriptsTransformed = 0;
3262
+ if (plan.strategy !== "static-html") {
3263
+ const executeStart = Date.now();
3264
+ const executor = new Executor(harvest, {
3265
+ thirdPartyPolicy: options.thirdPartyPolicy,
3266
+ quiescence: {
3267
+ minWaitMs: options.minWaitMs,
3268
+ idleWaitMs: options.idleWaitMs,
3269
+ maxWaitMs: options.maxWaitMs
3270
+ },
3271
+ moduleWaitMs: options.moduleWaitMs,
3272
+ proxy,
3273
+ timeoutMs: options.timeoutMs,
3274
+ deadlineAt
3275
+ });
3276
+ const execution = await executor.execute();
3277
+ executeMs = Date.now() - executeStart;
3278
+ html = execution.renderedHtml ?? harvest.html;
3279
+ requestCount = execution.logs.length;
3280
+ executionErrors = execution.errors;
3281
+ executionWarnings = execution.warnings ?? [];
3282
+ quiescenceMs = execution.timings?.quiescence_ms ?? 0;
3283
+ scriptsTransformed = execution.timings?.scripts_transformed_count ?? 0;
3284
+ }
3285
+ const totalMs = Date.now() - totalStart;
3286
+ const warnings = toWarnings(
3287
+ plan,
3288
+ executionErrors,
3289
+ options,
3290
+ [...harvest.warnings ?? [], ...executionWarnings]
3291
+ );
3292
+ const confidence = computeConfidence({
3293
+ plan,
3294
+ initialStateCount: Object.keys(harvest.initialState).length,
3295
+ executionErrors: executionErrors?.length ?? 0,
3296
+ htmlLength: html.length
3297
+ });
3298
+ return {
3299
+ url: options.url,
3300
+ finalUrl: harvest.url,
3301
+ status: harvest.status,
3302
+ html,
3303
+ framework,
3304
+ strategy: plan.strategy,
3305
+ confidence,
3306
+ warnings,
3307
+ timings: {
3308
+ total: totalMs,
3309
+ harvest: harvestMs,
3310
+ execute: executeMs,
3311
+ quiescence: quiescenceMs,
3312
+ scriptsTransformed
3313
+ },
3314
+ requestCount
3315
+ };
3316
+ }
3317
+ ),
3318
+ options.timeoutMs
3083
3319
  );
3084
3320
  }
3085
3321
  export {