@grabbit-labs/dynafetch 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -9,6 +9,14 @@ var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require
9
9
  // ../dynafetch-core/src/index.ts
10
10
  import * as net from "node:net";
11
11
 
12
+ // ../../src/phantom/log.ts
13
+ var enabled = process.env.DYNAFETCH_DEBUG === "1";
14
+ var log = enabled ? console.log.bind(console) : () => {
15
+ };
16
+ var warn = enabled ? console.warn.bind(console) : () => {
17
+ };
18
+ var error = console.error.bind(console);
19
+
12
20
  // ../../src/phantom/execute.ts
13
21
  import { JSDOM, VirtualConsole, CookieJar } from "jsdom";
14
22
  import WebSocket from "ws";
@@ -161,11 +169,18 @@ function createWorkerCommand() {
161
169
  }
162
170
  function createWorkerTransport() {
163
171
  const { command, args, cwd } = createWorkerCommand();
164
- const child = spawn(command, args, {
165
- cwd,
166
- stdio: ["pipe", "pipe", "pipe"],
167
- env: process.env
168
- });
172
+ let child;
173
+ try {
174
+ child = spawn(command, args, {
175
+ cwd,
176
+ stdio: ["pipe", "pipe", "pipe"],
177
+ env: process.env
178
+ });
179
+ } catch (err) {
180
+ return Promise.reject(
181
+ new Error(`Failed to start dynafetch-net TLS proxy: ${err instanceof Error ? err.message : String(err)}. Binary: ${command}`)
182
+ );
183
+ }
169
184
  const pending = /* @__PURE__ */ new Map();
170
185
  const rl = readline.createInterface({ input: child.stdout });
171
186
  rl.on("line", (line) => {
@@ -174,9 +189,9 @@ function createWorkerTransport() {
174
189
  let payload;
175
190
  try {
176
191
  payload = JSON.parse(trimmed);
177
- } catch (error) {
192
+ } catch (error2) {
178
193
  for (const entry of pending.values()) {
179
- entry.reject(new Error(`Invalid dynafetch-net response: ${String(error)}`));
194
+ entry.reject(new Error(`Invalid dynafetch-net response: ${String(error2)}`));
180
195
  }
181
196
  pending.clear();
182
197
  return;
@@ -193,7 +208,7 @@ function createWorkerTransport() {
193
208
  child.stderr.on("data", (chunk) => {
194
209
  const message = chunk.toString().trim();
195
210
  if (message) {
196
- console.warn(`[dynafetch-net] ${message}`);
211
+ if (process.env.DYNAFETCH_DEBUG === "1") console.warn(`[dynafetch-net] ${message}`);
197
212
  }
198
213
  });
199
214
  const onExit = (code, signal) => {
@@ -204,15 +219,29 @@ function createWorkerTransport() {
204
219
  pending.clear();
205
220
  transportPromise = null;
206
221
  };
207
- child.once("error", (error) => {
222
+ child.once("error", (error2) => {
208
223
  for (const entry of pending.values()) {
209
- entry.reject(error);
224
+ entry.reject(error2);
210
225
  }
211
226
  pending.clear();
212
227
  transportPromise = null;
213
228
  });
214
229
  child.once("exit", onExit);
215
- return Promise.resolve({ child, pending });
230
+ return new Promise((resolve, reject) => {
231
+ let settled = false;
232
+ child.once("error", (err) => {
233
+ if (!settled) {
234
+ settled = true;
235
+ reject(new Error(`Failed to start dynafetch-net TLS proxy: ${err.message}. Binary: ${command}`));
236
+ }
237
+ });
238
+ child.once("spawn", () => {
239
+ if (!settled) {
240
+ settled = true;
241
+ resolve({ child, pending });
242
+ }
243
+ });
244
+ });
216
245
  }
217
246
  async function getWorkerTransport() {
218
247
  if (!transportPromise) {
@@ -220,17 +249,31 @@ async function getWorkerTransport() {
220
249
  }
221
250
  return transportPromise;
222
251
  }
223
- async function callWorker(method, params) {
252
+ async function callWorker(method, params, timeoutMs = 3e4) {
224
253
  const transport = await getWorkerTransport();
225
254
  const id = randomUUID();
226
255
  const payload = JSON.stringify({ id, method, params });
227
256
  return await new Promise((resolve, reject) => {
228
- transport.pending.set(id, { resolve, reject });
257
+ const timer = setTimeout(() => {
258
+ transport.pending.delete(id);
259
+ reject(new Error(`dynafetch-net request timed out after ${timeoutMs}ms (method: ${method})`));
260
+ }, timeoutMs);
261
+ transport.pending.set(id, {
262
+ resolve: (value) => {
263
+ clearTimeout(timer);
264
+ resolve(value);
265
+ },
266
+ reject: (err) => {
267
+ clearTimeout(timer);
268
+ reject(err);
269
+ }
270
+ });
229
271
  transport.child.stdin.write(`${payload}
230
- `, (error) => {
231
- if (!error) return;
272
+ `, (error2) => {
273
+ if (!error2) return;
274
+ clearTimeout(timer);
232
275
  transport.pending.delete(id);
233
- reject(error);
276
+ reject(error2);
234
277
  });
235
278
  });
236
279
  }
@@ -313,9 +356,9 @@ async function phantomFetch(payload) {
313
356
  }
314
357
  try {
315
358
  return await dynafetchWorkerFetch(payload);
316
- } catch (error) {
359
+ } catch (error2) {
317
360
  if (process.env.DYNAFETCH_DISABLE_DIRECT_FALLBACK === "1") {
318
- throw error;
361
+ throw error2;
319
362
  }
320
363
  return await directFetch(payload);
321
364
  }
@@ -337,9 +380,9 @@ async function phantomBatchFetch(payloads) {
337
380
  finalUrl: r.finalUrl,
338
381
  error: r.error
339
382
  }));
340
- } catch (error) {
383
+ } catch (error2) {
341
384
  if (process.env.DYNAFETCH_DISABLE_DIRECT_FALLBACK === "1") {
342
- throw error;
385
+ throw error2;
343
386
  }
344
387
  return Promise.all(payloads.map((p) => directFetch(p)));
345
388
  }
@@ -975,7 +1018,7 @@ var Executor = class {
975
1018
  const taskId = this.trackTaskStart("module_bundle", cacheKey, this.moduleWaitMs);
976
1019
  try {
977
1020
  if (process.env.PHANTOM_DEBUG_MODULES === "1") {
978
- console.log("[Executor] Bundling module entry:", cacheKey);
1021
+ log("[Executor] Bundling module entry:", cacheKey);
979
1022
  }
980
1023
  const cached = this.moduleBundleCache.get(cacheKey);
981
1024
  if (cached) {
@@ -1100,7 +1143,7 @@ var Executor = class {
1100
1143
  this.moduleBundleCache.set(cacheKey, transformed);
1101
1144
  if (!this.windowClosed) window.eval(transformed);
1102
1145
  if (process.env.PHANTOM_DEBUG_MODULES === "1") {
1103
- console.log("[Executor] Module bundle eval complete:", cacheKey);
1146
+ log("[Executor] Module bundle eval complete:", cacheKey);
1104
1147
  }
1105
1148
  } catch (e) {
1106
1149
  this.recordExecutionError(e, "unhandledRejection");
@@ -1437,9 +1480,9 @@ var Executor = class {
1437
1480
  process.on("unhandledRejection", onNodeUnhandled);
1438
1481
  try {
1439
1482
  const virtualConsole = new VirtualConsole();
1440
- virtualConsole.on("log", (...args) => console.log("[JSDOM Log]", ...args));
1483
+ virtualConsole.on("log", (...args) => log("[JSDOM Log]", ...args));
1441
1484
  virtualConsole.on("error", (...args) => console.error("[JSDOM Error]", ...args));
1442
- virtualConsole.on("warn", (...args) => console.warn("[JSDOM Warn]", ...args));
1485
+ virtualConsole.on("warn", (...args) => warn("[JSDOM Warn]", ...args));
1443
1486
  const cookieJar = new CookieJar();
1444
1487
  this.harvestData.cookies.forEach((c) => {
1445
1488
  try {
@@ -1619,7 +1662,7 @@ var Executor = class {
1619
1662
  if (that.handledModuleScriptUrls.has(abs)) return;
1620
1663
  that.handledModuleScriptUrls.add(abs);
1621
1664
  if (process.env.PHANTOM_DEBUG_MODULES === "1") {
1622
- console.log("[Executor] Detected module script:", abs);
1665
+ log("[Executor] Detected module script:", abs);
1623
1666
  }
1624
1667
  void that.handleModuleScript(abs, window2);
1625
1668
  return;
@@ -1778,7 +1821,7 @@ var Executor = class {
1778
1821
  });
1779
1822
  }
1780
1823
  } catch (e) {
1781
- console.warn(`[Executor] Module script ${script.id} failed:`, e);
1824
+ warn(`[Executor] Module script ${script.id} failed:`, e);
1782
1825
  } finally {
1783
1826
  currentScriptState.value = prevCurrentScript;
1784
1827
  }
@@ -1794,7 +1837,7 @@ var Executor = class {
1794
1837
  try {
1795
1838
  window.eval(code);
1796
1839
  } catch (e) {
1797
- console.warn(`[Executor] Script ${script.id} failed:`, e);
1840
+ warn(`[Executor] Script ${script.id} failed:`, e);
1798
1841
  } finally {
1799
1842
  currentScriptState.value = prevCurrentScript;
1800
1843
  }
@@ -1834,16 +1877,16 @@ var Executor = class {
1834
1877
  if (this.moduleInFlight.size > 0) {
1835
1878
  await this.waitForModuleWork(this.moduleWaitMs);
1836
1879
  }
1837
- console.log("[Executor] Waiting for network quiescence...");
1880
+ log("[Executor] Waiting for network quiescence...");
1838
1881
  const quiescenceStart = Date.now();
1839
1882
  try {
1840
1883
  await this.waitForQuiescence();
1841
1884
  } catch (e) {
1842
- console.warn("[Executor] Quiescence wait failed:", e);
1885
+ warn("[Executor] Quiescence wait failed:", e);
1843
1886
  }
1844
1887
  this.timings.quiescence_ms = Date.now() - quiescenceStart;
1845
1888
  const reason = this.matchFound && !this.findAll ? "(early exit on match)" : "";
1846
- console.log(`[Executor] Quiescence reached in ${Date.now() - quiescenceStart}ms ${reason}`);
1889
+ log(`[Executor] Quiescence reached in ${Date.now() - quiescenceStart}ms ${reason}`);
1847
1890
  const renderedHtml = this.serializeDocument(window);
1848
1891
  this.windowClosed = true;
1849
1892
  try {
@@ -2106,7 +2149,7 @@ var Executor = class {
2106
2149
  this.asyncFlag = async !== false;
2107
2150
  this.aborted = false;
2108
2151
  if (process.env.PHANTOM_DEBUG_XHR === "1") {
2109
- console.log("[XHR open]", this.method, this.url);
2152
+ log("[XHR open]", this.method, this.url);
2110
2153
  }
2111
2154
  this.readyState = 1;
2112
2155
  this.responseURL = this.url;
@@ -2159,7 +2202,7 @@ var Executor = class {
2159
2202
  };
2160
2203
  that.logRequest(logEntry);
2161
2204
  if (process.env.PHANTOM_DEBUG_XHR === "1") {
2162
- console.log("[XHR send]", this.method, this.url, {
2205
+ log("[XHR send]", this.method, this.url, {
2163
2206
  hasBody: body != null,
2164
2207
  headers
2165
2208
  });
@@ -2326,7 +2369,9 @@ async function prefetchModuleGraph(rootUrls, cache, pageUrl, opts) {
2326
2369
  }
2327
2370
  for (let round = 0; round < maxRounds && toFetch.length > 0; round++) {
2328
2371
  if (process.env.PHANTOM_DEBUG_MODULES === "1") {
2329
- console.log(`[prefetch] Round ${round}: ${toFetch.length} modules`);
2372
+ if (process.env.DYNAFETCH_DEBUG === "1") {
2373
+ console.log(`[prefetch] Round ${round}: ${toFetch.length} modules`);
2374
+ }
2330
2375
  }
2331
2376
  const payloads = toFetch.map((u) => ({
2332
2377
  method: "GET",
@@ -2429,7 +2474,7 @@ var Harvester = class {
2429
2474
  const location = data.headers["Location"] || data.headers["location"];
2430
2475
  if (location) {
2431
2476
  currentUrl = new URL(location, currentUrl).toString();
2432
- console.log(`[Harvest] Following redirect to: ${currentUrl}`);
2477
+ log(`[Harvest] Following redirect to: ${currentUrl}`);
2433
2478
  redirectCount++;
2434
2479
  if (data.status === 302 || data.status === 303) {
2435
2480
  currentMethod = "GET";
@@ -2500,10 +2545,10 @@ var Harvester = class {
2500
2545
  return b.includes("just a moment") || b.includes("challenge-platform") || b.includes("__cf_chl") || b.includes("cf-browser-verification") || b.includes("enable javascript and cookies to continue") || b.includes("security verification") || b.includes("captcha") || b.includes("trkcode=") || b.includes("trkinfo=");
2501
2546
  }
2502
2547
  async harvest() {
2503
- console.log(`[Harvest] Fetching ${this.targetUrl} via TLS Proxy...`);
2548
+ log(`[Harvest] Fetching ${this.targetUrl} via TLS Proxy...`);
2504
2549
  let response = await this.fetchViaProxy(this.targetUrl, { ...DEFAULT_HEADERS, ...this.requestHeaders }, true);
2505
2550
  if (response.status >= 400) {
2506
- console.log(`[Harvest] Response Body on Error:`, response.body.substring(0, 500));
2551
+ log(`[Harvest] Response Body on Error:`, response.body.substring(0, 500));
2507
2552
  if (this.looksBlocked(response.status, response.body || "")) {
2508
2553
  throw new BlockedByBotProtectionError(
2509
2554
  this.targetUrl,
@@ -2515,7 +2560,7 @@ var Harvester = class {
2515
2560
  let finalUrl = response.finalUrl;
2516
2561
  let html = response.body;
2517
2562
  if (this.isConsentWall(finalUrl, html)) {
2518
- console.log(`[Harvest] Consent wall detected at ${finalUrl}, attempting bypass...`);
2563
+ log(`[Harvest] Consent wall detected at ${finalUrl}, attempting bypass...`);
2519
2564
  const form = this.parseConsentForm(html, finalUrl);
2520
2565
  if (form) {
2521
2566
  try {
@@ -2529,32 +2574,32 @@ var Harvester = class {
2529
2574
  };
2530
2575
  const consentResp = await this.fetchViaProxy(form.action, postHeaders, true, 10, "POST", formBody);
2531
2576
  if (consentResp.status < 400) {
2532
- console.log(`[Harvest] Consent POST succeeded (${consentResp.status}), final URL: ${consentResp.finalUrl}`);
2577
+ log(`[Harvest] Consent POST succeeded (${consentResp.status}), final URL: ${consentResp.finalUrl}`);
2533
2578
  if (!this.isConsentWall(consentResp.finalUrl, consentResp.body)) {
2534
2579
  response = consentResp;
2535
2580
  finalUrl = consentResp.finalUrl;
2536
2581
  html = consentResp.body;
2537
- console.log(`[Harvest] Consent bypass successful (from redirect), got real page at ${finalUrl}`);
2582
+ log(`[Harvest] Consent bypass successful (from redirect), got real page at ${finalUrl}`);
2538
2583
  } else {
2539
- console.log(`[Harvest] Consent redirect still on consent page, re-fetching original URL...`);
2584
+ log(`[Harvest] Consent redirect still on consent page, re-fetching original URL...`);
2540
2585
  const retryResp = await this.fetchViaProxy(this.targetUrl, { ...DEFAULT_HEADERS, ...this.requestHeaders }, true);
2541
2586
  if (retryResp.status < 400 && !this.isConsentWall(retryResp.finalUrl, retryResp.body)) {
2542
2587
  response = retryResp;
2543
2588
  finalUrl = retryResp.finalUrl;
2544
2589
  html = retryResp.body;
2545
- console.log(`[Harvest] Consent bypass successful (re-fetch), got real page at ${finalUrl}`);
2590
+ log(`[Harvest] Consent bypass successful (re-fetch), got real page at ${finalUrl}`);
2546
2591
  } else {
2547
- console.warn(`[Harvest] Re-fetch after consent still returned consent wall, proceeding with original`);
2592
+ warn(`[Harvest] Re-fetch after consent still returned consent wall, proceeding with original`);
2548
2593
  }
2549
2594
  }
2550
2595
  } else {
2551
- console.warn(`[Harvest] Consent POST returned ${consentResp.status}, proceeding with consent page`);
2596
+ warn(`[Harvest] Consent POST returned ${consentResp.status}, proceeding with consent page`);
2552
2597
  }
2553
2598
  } catch (e) {
2554
- console.warn(`[Harvest] Consent bypass failed, proceeding with consent page:`, e);
2599
+ warn(`[Harvest] Consent bypass failed, proceeding with consent page:`, e);
2555
2600
  }
2556
2601
  } else {
2557
- console.warn(`[Harvest] Could not parse consent form, proceeding with consent page`);
2602
+ warn(`[Harvest] Could not parse consent form, proceeding with consent page`);
2558
2603
  }
2559
2604
  }
2560
2605
  const $ = cheerio.load(html);
@@ -2663,7 +2708,7 @@ var Harvester = class {
2663
2708
  }));
2664
2709
  const allPayloads = [...scriptPayloads, ...preloadPayloads];
2665
2710
  if (allPayloads.length > 0) {
2666
- console.log(`[Harvest] Batch-fetching ${scriptPayloads.length} scripts + ${preloadPayloads.length} modulepreloads...`);
2711
+ log(`[Harvest] Batch-fetching ${scriptPayloads.length} scripts + ${preloadPayloads.length} modulepreloads...`);
2667
2712
  const allResponses = await phantomBatchFetch(allPayloads);
2668
2713
  for (let i = 0; i < batchScriptMeta.length; i++) {
2669
2714
  const meta = batchScriptMeta[i];
@@ -2690,7 +2735,7 @@ var Harvester = class {
2690
2735
  execution: meta.execution
2691
2736
  });
2692
2737
  } else {
2693
- console.warn(`[Harvest] Failed to fetch script ${meta.absoluteUrl}: status ${resp.status}`);
2738
+ warn(`[Harvest] Failed to fetch script ${meta.absoluteUrl}: status ${resp.status}`);
2694
2739
  }
2695
2740
  }
2696
2741
  for (let i = 0; i < modulePreloadUrls.length; i++) {
@@ -2933,8 +2978,8 @@ function toWarnings(plan, errors, options) {
2933
2978
  warnings.push("non-critical third-party scripts are skipped on the critical render path");
2934
2979
  }
2935
2980
  if (errors?.length) {
2936
- for (const error of errors.slice(0, 3)) {
2937
- warnings.push(`${error.source}: ${error.message}`);
2981
+ for (const error2 of errors.slice(0, 3)) {
2982
+ warnings.push(`${error2.source}: ${error2.message}`);
2938
2983
  }
2939
2984
  }
2940
2985
  return warnings;