@fanboynz/network-scanner 2.0.66 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,9 @@
4
4
  */
5
5
 
6
6
  const { formatLogMessage, messageColors } = require('./colorize');
7
+ const IS_PAGE_FROM_PREVIOUS_SCAN_TAG = messageColors.processing('[isPageFromPreviousScan]');
8
+ const REALTIME_CLEANUP_TAG = messageColors.processing('[realtime_cleanup]');
9
+ const GROUP_WINDOW_CLEANUP_TAG = messageColors.processing('[group_window_cleanup]');
7
10
  const { execSync, execFile } = require('child_process');
8
11
 
9
12
  // Window cleanup delay constant
@@ -13,11 +16,28 @@ const REALTIME_CLEANUP_BUFFER_MS = 25000; // Additional buffer time after site d
13
16
  const REALTIME_CLEANUP_THRESHOLD = 12; // Default number of pages to keep
14
17
  const REALTIME_CLEANUP_MIN_PAGES = 6; // Minimum pages before cleanup kicks in
15
18
 
16
- // Track page creation order for realtime cleanup
17
- const pageCreationTracker = new Map(); // Maps page -> creation timestamp
19
+ // Page-count thresholds soft warn vs hard restart trigger. The two used to
20
+ // live as bare 30/40 literals in different functions with no visible link.
21
+ const PAGE_COUNT_WARN_THRESHOLD = 30; // checkBrowserHealth: warn-only recommendation
22
+ const PAGE_COUNT_RESTART_THRESHOLD = 40; // monitorBrowserHealth: trigger restart
18
23
 
19
- // Track page usage for realtime cleanup safety
20
- const pageUsageTracker = new Map(); // Maps page -> { lastActivity: timestamp, isProcessing: boolean }
24
+ // Browser response-time threshold above which monitorBrowserHealth triggers a restart.
25
+ const SLOW_RESPONSE_RESTART_MS = 6000;
26
+
27
+ // Heuristics for isPageFromPreviousScan: pages older than CREATION_AGE_MS or
28
+ // idle longer than IDLE_AGE_MS are treated as leftover from a prior scan.
29
+ const PREVIOUS_SCAN_CREATION_AGE_MS = 120000; // 2 minutes
30
+ const PREVIOUS_SCAN_IDLE_AGE_MS = 60000; // 60 seconds
31
+
32
+ // Track page creation order for realtime cleanup. WeakMap so closed pages
33
+ // drop out of tracking automatically when Puppeteer releases its internal
34
+ // references — no manual purge needed to prevent the leak class that
35
+ // purgeStaleTrackers() used to mitigate. The only API loss from Map ->
36
+ // WeakMap is iteration (for...of), which only purgeStaleTrackers used.
37
+ const pageCreationTracker = new WeakMap(); // Page -> creation timestamp
38
+
39
+ // Track page usage for realtime cleanup safety. Same WeakMap rationale.
40
+ const pageUsageTracker = new WeakMap(); // Page -> { lastActivity, isProcessing }
21
41
  const PAGE_IDLE_THRESHOLD = 25000; // 25 seconds of inactivity before considering page safe to clean
22
42
 
23
43
  /**
@@ -78,7 +98,7 @@ async function performGroupWindowCleanup(browserInstance, groupDescription, forc
78
98
  };
79
99
  const modeText = cleanupMode === "all" ? "aggressive cleanup of old windows" : "conservative cleanup of extra windows"
80
100
  if (forceDebug) {
81
- console.log(formatLogMessage('debug', `[group_window_cleanup] Waiting ${WINDOW_CLEANUP_DELAY_MS}ms before ${modeText} for group: ${groupDescription}`));
101
+ console.log(formatLogMessage('debug', `${GROUP_WINDOW_CLEANUP_TAG} Waiting ${WINDOW_CLEANUP_DELAY_MS}ms before ${modeText} for group: ${groupDescription}`));
82
102
 
83
103
  }
84
104
  await new Promise(resolve => setTimeout(resolve, WINDOW_CLEANUP_DELAY_MS));
@@ -121,13 +141,13 @@ async function performGroupWindowCleanup(browserInstance, groupDescription, forc
121
141
  if (forceDebug) {
122
142
  // Cache URL call for logging
123
143
  const mainPageUrl = mainPuppeteerPage.url();
124
- console.log(formatLogMessage('debug', `[group_window_cleanup] No blank page found, using first page as main: ${mainPageUrl}`));
144
+ console.log(formatLogMessage('debug', `${GROUP_WINDOW_CLEANUP_TAG} No blank page found, using first page as main: ${mainPageUrl}`));
125
145
  }
126
146
  }
127
147
 
128
148
  if (pagesToClose.length === 0) {
129
149
  if (forceDebug) {
130
- console.log(formatLogMessage('debug', `[group_window_cleanup] No windows to close for group: ${groupDescription}`));
150
+ console.log(formatLogMessage('debug', `${GROUP_WINDOW_CLEANUP_TAG} No windows to close for group: ${groupDescription}`));
131
151
  }
132
152
  result.success = true;
133
153
  result.totalPages = allPages.length;
@@ -170,7 +190,7 @@ async function performGroupWindowCleanup(browserInstance, groupDescription, forc
170
190
  const pageUrl = page.url();
171
191
  if (!isPageClosed) {
172
192
  if (forceDebug) {
173
- console.log(formatLogMessage('debug', `[group_window_cleanup] Closing page: ${pageUrl}`));
193
+ console.log(formatLogMessage('debug', `${GROUP_WINDOW_CLEANUP_TAG} Closing page: ${pageUrl}`));
174
194
  }
175
195
  pageCreationTracker.delete(page);
176
196
  pageUsageTracker.delete(page);
@@ -180,7 +200,7 @@ async function performGroupWindowCleanup(browserInstance, groupDescription, forc
180
200
  return { success: false, reason: 'already_closed', estimatedMemory: 0 };
181
201
  } catch (closeErr) {
182
202
  if (forceDebug) {
183
- console.log(formatLogMessage('debug', `[group_window_cleanup] Failed to close old page ${index + 1}: ${closeErr.message}`));
203
+ console.log(formatLogMessage('debug', `${GROUP_WINDOW_CLEANUP_TAG} Failed to close old page ${index + 1}: ${closeErr.message}`));
184
204
  }
185
205
  return { success: false, error: closeErr.message, estimatedMemory: 0 };
186
206
  }
@@ -198,12 +218,12 @@ async function performGroupWindowCleanup(browserInstance, groupDescription, forc
198
218
  }
199
219
 
200
220
  if (forceDebug) {
201
- console.log(formatLogMessage('debug', `[group_window_cleanup] Closed ${successfulCloses}/${pagesToClose.length} old windows for completed group: ${groupDescription} after ${WINDOW_CLEANUP_DELAY_MS}ms delay`));
202
- console.log(formatLogMessage('debug', `[group_window_cleanup] Estimated memory freed: ${formatMemory(actualMemoryFreed)}`));
221
+ console.log(formatLogMessage('debug', `${GROUP_WINDOW_CLEANUP_TAG} Closed ${successfulCloses}/${pagesToClose.length} old windows for completed group: ${groupDescription} after ${WINDOW_CLEANUP_DELAY_MS}ms delay`));
222
+ console.log(formatLogMessage('debug', `${GROUP_WINDOW_CLEANUP_TAG} Estimated memory freed: ${formatMemory(actualMemoryFreed)}`));
203
223
  if (mainPuppeteerPage) {
204
224
  // Cache URL for final logging
205
225
  const mainPageUrl = mainPuppeteerPage.url();
206
- console.log(formatLogMessage('debug', `[group_window_cleanup] Main Puppeteer window preserved: ${mainPageUrl}`));
226
+ console.log(formatLogMessage('debug', `${GROUP_WINDOW_CLEANUP_TAG} Main Puppeteer window preserved: ${mainPageUrl}`));
207
227
  }
208
228
  }
209
229
 
@@ -219,7 +239,7 @@ async function performGroupWindowCleanup(browserInstance, groupDescription, forc
219
239
  return result;
220
240
  } catch (cleanupErr) {
221
241
  if (forceDebug) {
222
- console.log(formatLogMessage('debug', `[group_window_cleanup] Group cleanup failed for ${groupDescription}: ${cleanupErr.message}`));
242
+ console.log(formatLogMessage('debug', `${GROUP_WINDOW_CLEANUP_TAG} Group cleanup failed for ${groupDescription}: ${cleanupErr.message}`));
223
243
  }
224
244
  // Initialize result object with consistent shape for error case
225
245
  const result = {
@@ -266,7 +286,7 @@ async function isPageSafeToClose(page, forceDebug) {
266
286
  if (usage.isProcessing) {
267
287
  if (forceDebug) {
268
288
  const pageUrl = page.url();
269
- console.log(formatLogMessage('debug', `[realtime_cleanup] Page still processing: ${pageUrl.substring(0, 50)}...`));
289
+ console.log(formatLogMessage('debug', `${REALTIME_CLEANUP_TAG} Page still processing: ${pageUrl.substring(0, 50)}...`));
270
290
  }
271
291
  return false;
272
292
  }
@@ -275,13 +295,13 @@ async function isPageSafeToClose(page, forceDebug) {
275
295
  const isSafe = idleTime >= PAGE_IDLE_THRESHOLD;
276
296
 
277
297
  if (!isSafe && forceDebug) {
278
- console.log(formatLogMessage('debug', `[realtime_cleanup] Page not idle long enough: ${Math.round(idleTime/1000)}s < ${PAGE_IDLE_THRESHOLD/1000}s`));
298
+ console.log(formatLogMessage('debug', `${REALTIME_CLEANUP_TAG} Page not idle long enough: ${Math.round(idleTime/1000)}s < ${PAGE_IDLE_THRESHOLD/1000}s`));
279
299
  }
280
300
 
281
301
  return isSafe;
282
302
  } catch (err) {
283
303
  if (forceDebug) {
284
- console.log(formatLogMessage('debug', `[realtime_cleanup] Error checking page safety: ${err.message}`));
304
+ console.log(formatLogMessage('debug', `${REALTIME_CLEANUP_TAG} Error checking page safety: ${err.message}`));
285
305
  }
286
306
  return true; // Assume safe if we can't check
287
307
  }
@@ -344,7 +364,7 @@ async function performRealtimeWindowCleanup(browserInstance, threshold = REALTIM
344
364
  // Skip cleanup if we don't have enough pages to warrant it
345
365
  if (quickPages.length <= Math.max(threshold, REALTIME_CLEANUP_MIN_PAGES)) {
346
366
  if (forceDebug) {
347
- console.log(formatLogMessage('debug', `[realtime_cleanup] Only ${quickPages.length} pages open, threshold is ${threshold} - no cleanup needed`));
367
+ console.log(formatLogMessage('debug', `${REALTIME_CLEANUP_TAG} Only ${quickPages.length} pages open, threshold is ${threshold} - no cleanup needed`));
348
368
  }
349
369
  result.success = true;
350
370
  result.totalPages = quickPages.length;
@@ -356,7 +376,7 @@ async function performRealtimeWindowCleanup(browserInstance, threshold = REALTIM
356
376
  const cleanupDelay = totalDelay;
357
377
 
358
378
  if (forceDebug) {
359
- console.log(formatLogMessage('debug', `[realtime_cleanup] Waiting ${cleanupDelay}ms before cleanup (threshold: ${threshold})`));
379
+ console.log(formatLogMessage('debug', `${REALTIME_CLEANUP_TAG} Waiting ${cleanupDelay}ms before cleanup (threshold: ${threshold})`));
360
380
  }
361
381
  await new Promise(resolve => setTimeout(resolve, cleanupDelay));
362
382
 
@@ -369,7 +389,7 @@ async function performRealtimeWindowCleanup(browserInstance, threshold = REALTIM
369
389
  if (context.isIncognito && context !== browserInstance.defaultBrowserContext()) {
370
390
  const contextPages = await context.pages();
371
391
  if (forceDebug) {
372
- console.log(formatLogMessage('debug', `[realtime_cleanup] Found ${contextPages.length} pages in popup context`));
392
+ console.log(formatLogMessage('debug', `${REALTIME_CLEANUP_TAG} Found ${contextPages.length} pages in popup context`));
373
393
  }
374
394
  // Close popup context pages
375
395
  for (const page of contextPages) {
@@ -381,7 +401,7 @@ async function performRealtimeWindowCleanup(browserInstance, threshold = REALTIM
381
401
  }
382
402
  } catch (contextErr) {
383
403
  if (forceDebug) {
384
- console.log(formatLogMessage('debug', `[realtime_cleanup] Context cleanup error: ${contextErr.message}`));
404
+ console.log(formatLogMessage('debug', `${REALTIME_CLEANUP_TAG} Context cleanup error: ${contextErr.message}`));
385
405
  }
386
406
  }
387
407
 
@@ -418,7 +438,7 @@ async function performRealtimeWindowCleanup(browserInstance, threshold = REALTIM
418
438
  const unsafePagesCount = pagesToClose.length - safePagesToClose.length;
419
439
 
420
440
  if (unsafePagesCount > 0 && forceDebug) {
421
- console.log(formatLogMessage('debug', `[realtime_cleanup] Skipping ${unsafePagesCount} active pages for safety`));
441
+ console.log(formatLogMessage('debug', `${REALTIME_CLEANUP_TAG} Skipping ${unsafePagesCount} active pages for safety`));
422
442
  }
423
443
 
424
444
  if (safePagesToClose.length === 0) {
@@ -426,7 +446,7 @@ async function performRealtimeWindowCleanup(browserInstance, threshold = REALTIM
426
446
  const reason = pagesToClose.length === 0 ?
427
447
  `${sortedPages.length} content pages, keeping ${pagesToKeep}` :
428
448
  `${pagesToClose.length} pages still active`;
429
- console.log(formatLogMessage('debug', `[realtime_cleanup] No pages need closing (${reason})`));
449
+ console.log(formatLogMessage('debug', `${REALTIME_CLEANUP_TAG} No pages need closing (${reason})`));
430
450
  }
431
451
  result.success = true;
432
452
  result.totalPages = allPagesAfterDelay.length;
@@ -450,12 +470,12 @@ async function performRealtimeWindowCleanup(browserInstance, threshold = REALTIM
450
470
  closedCount++;
451
471
 
452
472
  if (forceDebug) {
453
- console.log(formatLogMessage('debug', `[realtime_cleanup] Closed old page: ${pageUrl.substring(0, 50)}...`));
473
+ console.log(formatLogMessage('debug', `${REALTIME_CLEANUP_TAG} Closed old page: ${pageUrl.substring(0, 50)}...`));
454
474
  }
455
475
  }
456
476
  } catch (closeErr) {
457
477
  if (forceDebug) {
458
- console.log(formatLogMessage('debug', `[realtime_cleanup] Failed to close page: ${closeErr.message}`));
478
+ console.log(formatLogMessage('debug', `${REALTIME_CLEANUP_TAG} Failed to close page: ${closeErr.message}`));
459
479
  }
460
480
  }
461
481
  }
@@ -463,7 +483,7 @@ async function performRealtimeWindowCleanup(browserInstance, threshold = REALTIM
463
483
  const remainingPages = allPagesAfterDelay.length - closedCount;
464
484
 
465
485
  if (forceDebug) {
466
- console.log(formatLogMessage('debug', `[realtime_cleanup] Closed ${closedCount}/${pagesToClose.length} oldest pages (${unsafePagesCount} skipped for safety), ${remainingPages} pages remaining`));
486
+ console.log(formatLogMessage('debug', `${REALTIME_CLEANUP_TAG} Closed ${closedCount}/${pagesToClose.length} oldest pages (${unsafePagesCount} skipped for safety), ${remainingPages} pages remaining`));
467
487
  }
468
488
 
469
489
  result.success = true;
@@ -476,7 +496,7 @@ async function performRealtimeWindowCleanup(browserInstance, threshold = REALTIM
476
496
  return result;
477
497
  } catch (cleanupErr) {
478
498
  if (forceDebug) {
479
- console.log(formatLogMessage('debug', `[realtime_cleanup] Cleanup failed: ${cleanupErr.message}`));
499
+ console.log(formatLogMessage('debug', `${REALTIME_CLEANUP_TAG} Cleanup failed: ${cleanupErr.message}`));
480
500
  }
481
501
  // Initialize result object with consistent shape for error case
482
502
  const result = {
@@ -520,15 +540,15 @@ async function isPageFromPreviousScan(page, forceDebug) {
520
540
  // Use tracker timestamp instead of expensive page.title() CDP call
521
541
  const now = Date.now();
522
542
  const createdAt = pageCreationTracker.get(page);
523
- if (createdAt && now - createdAt > 120000) {
524
- // Page is older than 2 minutes -- likely from a previous scan
543
+ if (createdAt && now - createdAt > PREVIOUS_SCAN_CREATION_AGE_MS) {
544
+ // Page older than the creation-age threshold likely from a previous scan
525
545
  return true;
526
546
  }
527
-
547
+
528
548
  // Check usage tracker -- idle pages are likely old
529
549
  const usage = pageUsageTracker.get(page);
530
- if (usage && !usage.isProcessing && now - usage.lastActivity > 60000) {
531
- return true; // Idle for over 60 seconds
550
+ if (usage && !usage.isProcessing && now - usage.lastActivity > PREVIOUS_SCAN_IDLE_AGE_MS) {
551
+ return true; // Idle beyond the idle-age threshold
532
552
  }
533
553
 
534
554
  // Fallback: only use page.title() if trackers have no data
@@ -552,9 +572,9 @@ async function isPageFromPreviousScan(page, forceDebug) {
552
572
  try {
553
573
  // Cache URL for error logging - wrap in try-catch as page might be closed
554
574
  const pageUrl = page.url();
555
- console.log(formatLogMessage('debug', `[isPageFromPreviousScan] Error evaluating page ${pageUrl}: ${err.message}`));
575
+ console.log(formatLogMessage('debug', `${IS_PAGE_FROM_PREVIOUS_SCAN_TAG} Error evaluating page ${pageUrl}: ${err.message}`));
556
576
  } catch (urlErr) {
557
- console.log(formatLogMessage('debug', `[isPageFromPreviousScan] Error evaluating page: ${err.message}`));
577
+ console.log(formatLogMessage('debug', `${IS_PAGE_FROM_PREVIOUS_SCAN_TAG} Error evaluating page: ${err.message}`));
558
578
  }
559
579
  }
560
580
  return false; // Conservative - don't close if we can't evaluate
@@ -581,22 +601,13 @@ function untrackPage(page) {
581
601
  }
582
602
 
583
603
  /**
584
- * Purges stale entries from tracking Maps (pages that were closed without cleanup)
585
- * Should be called periodically to prevent memory leaks
604
+ * No-op since the trackers were migrated to WeakMap GC reclaims dead-page
605
+ * entries automatically when Puppeteer drops its internal references. Kept
606
+ * exported so the ~7 callers in nwss.js continue to compile; safe to delete
607
+ * entirely once those callsites are scrubbed.
586
608
  */
587
609
  function purgeStaleTrackers() {
588
- for (const [page] of pageCreationTracker) {
589
- try {
590
- if (page.isClosed()) {
591
- pageCreationTracker.delete(page);
592
- pageUsageTracker.delete(page);
593
- }
594
- } catch (e) {
595
- // Page reference is invalid, remove it
596
- pageCreationTracker.delete(page);
597
- pageUsageTracker.delete(page);
598
- }
599
- }
610
+ // intentionally empty
600
611
  }
601
612
 
602
613
  /**
@@ -651,8 +662,13 @@ async function testNetworkCapability(browserInstance, timeout = 10000) {
651
662
  'Network.enable test timeout'
652
663
  );
653
664
 
654
- // Turn off interception and close
655
- await testPage.setRequestInterception(false);
665
+ // Turn off interception. Symmetric to the enable above — Network.disable
666
+ // can hang for the same CDP reasons, so it needs the same watchdog.
667
+ await raceWithTimeout(
668
+ testPage.setRequestInterception(false),
669
+ timeout,
670
+ 'Network.disable test timeout'
671
+ );
656
672
  result.capable = true;
657
673
  result.responseTime = Date.now() - startTime;
658
674
 
@@ -718,7 +734,7 @@ async function checkBrowserHealth(browserInstance, timeout = 8000) {
718
734
  healthResult.responseTime = Date.now() - startTime;
719
735
 
720
736
  // Test 3: Check for excessive pages (memory leak indicator)
721
- if (pages.length > 30) {
737
+ if (pages.length > PAGE_COUNT_WARN_THRESHOLD) {
722
738
  healthResult.recommendations.push('Too many open pages - consider browser restart');
723
739
  }
724
740
 
@@ -740,12 +756,18 @@ async function checkBrowserHealth(browserInstance, timeout = 8000) {
740
756
 
741
757
  // Test 5: Network capability test on the same page (avoids creating a second test page)
742
758
  try {
759
+ const netTimeout = Math.min(timeout, 5000);
743
760
  await raceWithTimeout(
744
761
  testPage.setRequestInterception(true),
745
- Math.min(timeout, 5000),
762
+ netTimeout,
746
763
  'Network.enable test timeout'
747
764
  );
748
- await testPage.setRequestInterception(false);
765
+ // Disable can hang for the same CDP reasons enable can; mirror the watchdog.
766
+ await raceWithTimeout(
767
+ testPage.setRequestInterception(false),
768
+ netTimeout,
769
+ 'Network.disable test timeout'
770
+ );
749
771
  healthResult.networkCapable = true;
750
772
  } catch (networkErr) {
751
773
  healthResult.networkCapable = false;
@@ -837,9 +859,9 @@ async function checkBrowserMemory(browserInstance) {
837
859
  else resolve(stdout);
838
860
  });
839
861
  });
840
- const memoryKB = parseInt(memInfo.trim());
841
-
842
- if (!isNaN(memoryKB)) {
862
+ const memoryKB = parseInt(memInfo.trim(), 10);
863
+
864
+ if (!Number.isNaN(memoryKB)) {
843
865
  const memoryMB = Math.round(memoryKB / 1024);
844
866
  memoryResult.available = true;
845
867
  memoryResult.usage = {
@@ -897,7 +919,7 @@ async function testBrowserConnectivity(browserInstance, timeout = 2500) {
897
919
 
898
920
  try {
899
921
  // Test 1: Basic browser connection
900
- const isConnected = browserInstance.isConnected();
922
+ const isConnected = browserInstance.connected;
901
923
  connectivityResult.connected = isConnected;
902
924
 
903
925
  if (!isConnected) {
@@ -994,7 +1016,9 @@ async function performHealthAssessment(browserInstance, options = {}) {
994
1016
  assessment.needsRestart = true;
995
1017
  } else if (assessment.recommendations.length > 0) {
996
1018
  assessment.overall = 'degraded';
997
- assessment.needsRestart = RESTART_RECOMMENDATION_REGEX.test(assessment.recommendations.join('|'));
1019
+ // Test each recommendation independently — avoids allocating a joined
1020
+ // string just to feed one regex test against it.
1021
+ assessment.needsRestart = assessment.recommendations.some(r => RESTART_RECOMMENDATION_REGEX.test(r));
998
1022
  } else {
999
1023
  assessment.overall = 'healthy';
1000
1024
  assessment.needsRestart = false;
@@ -1058,10 +1082,10 @@ async function monitorBrowserHealth(browserInstance, context = {}, options = {})
1058
1082
  } else if (urlsSinceCleanup >= cleanupInterval) {
1059
1083
  result.shouldRestart = true;
1060
1084
  result.reason = `Scheduled cleanup after ${urlsSinceCleanup} URLs`;
1061
- } else if (assessment.browser.responseTime > 6000) {
1085
+ } else if (assessment.browser.responseTime > SLOW_RESPONSE_RESTART_MS) {
1062
1086
  result.shouldRestart = true;
1063
- result.reason = `Slow browser response: ${assessment.browser.responseTime}ms (threshold: 6000ms)`;
1064
- } else if (assessment.browser.pageCount > 40) {
1087
+ result.reason = `Slow browser response: ${assessment.browser.responseTime}ms (threshold: ${SLOW_RESPONSE_RESTART_MS}ms)`;
1088
+ } else if (assessment.browser.pageCount > PAGE_COUNT_RESTART_THRESHOLD) {
1065
1089
  // More aggressive page count monitoring for Puppeteer 23.x
1066
1090
  result.shouldRestart = true;
1067
1091
  result.reason = `Too many open pages: ${assessment.browser.pageCount} (memory leak protection)`;
@@ -1070,7 +1094,7 @@ async function monitorBrowserHealth(browserInstance, context = {}, options = {})
1070
1094
  // Logging
1071
1095
  if (!silentMode && result.shouldRestart) {
1072
1096
  const progress = totalSites > 0 ? ` (${siteIndex + 1}/${totalSites})` : '';
1073
- console.log(`\n${messageColors.fileOp('?? Browser restart needed')} before site${progress}: ${result.reason}`);
1097
+ console.log(`\n${messageColors.fileOp('Browser restart needed')} before site${progress}: ${result.reason}`);
1074
1098
  }
1075
1099
 
1076
1100
  if (forceDebug && !result.shouldRestart) {
package/lib/cdp.js CHANGED
@@ -25,7 +25,11 @@
25
25
  // - Compatible with headless and headful modes
26
26
  // - Tested with Puppeteer 13+ but should work with older versions
27
27
 
28
- const { formatLogMessage } = require('./colorize');
28
+ const { formatLogMessage, messageColors } = require('./colorize');
29
+
30
+ // Precomputed colored '[cdp]' subsystem prefix. formatLogMessage only colors
31
+ // the [severity] tag; '[cdp]' was sitting plain inside the message string.
32
+ const CDP_TAG = messageColors.processing('[cdp]');
29
33
 
30
34
  /**
31
35
  * Race a promise against a timeout, clearing the timer when the promise settles.
@@ -43,6 +47,33 @@ function raceWithTimeout(promise, ms, message) {
43
47
  return Promise.race([promise, timeoutPromise]).finally(() => clearTimeout(timeoutId));
44
48
  }
45
49
 
50
+ // Shared no-op cleanup used by every no-CDP / CDP-failed return path. Hoisted
51
+ // so createSessionResult() doesn't allocate a fresh `async () => {}` per call.
52
+ const NOOP_CLEANUP = async () => {};
53
+
54
+ /**
55
+ * Safely extract a hostname from a URL string with a fallback for malformed URLs.
56
+ * Used in logs where 'unknown' or a truncated URL is acceptable on parse failure.
57
+ */
58
+ function safeHostname(url, fallback = 'unknown') {
59
+ try { return new URL(url).hostname; } catch { return fallback; }
60
+ }
61
+
62
+ /**
63
+ * Recognize CDP errors that mean the browser is broken and needs restarting.
64
+ * Centralized so setRequestInterceptionWithTimeout and createCDPSession's catch
65
+ * stay in sync — previously each had its own slightly-different pattern list.
66
+ */
67
+ function isCriticalCDPError(message) {
68
+ if (!message) return false;
69
+ return message.includes('Network.enable timed out') ||
70
+ message.includes('Protocol error') ||
71
+ message.includes('ProtocolError') ||
72
+ message.includes('Session closed') ||
73
+ message.includes('Target closed') ||
74
+ message.includes('Browser has been closed');
75
+ }
76
+
46
77
  /**
47
78
  * Creates a standardized session result object for consistent V8 optimization
48
79
  * @param {object|null} session - CDP session or null
@@ -50,7 +81,7 @@ function raceWithTimeout(promise, ms, message) {
50
81
  * @param {boolean} isEnhanced - Whether enhanced features are active
51
82
  * @returns {object} Standardized session object
52
83
  */
53
- const createSessionResult = (session = null, cleanup = async () => {}, isEnhanced = false) => ({
84
+ const createSessionResult = (session = null, cleanup = NOOP_CLEANUP, isEnhanced = false) => ({
54
85
  session,
55
86
  cleanup,
56
87
  isEnhanced
@@ -76,10 +107,8 @@ async function setRequestInterceptionWithTimeout(page, timeout = 15000) {
76
107
  try {
77
108
  await raceWithTimeout(page.setRequestInterception(true), timeout, 'Request interception timeout - first attempt');
78
109
  } catch (firstError) {
79
- // Check for immediate critical failures
80
- if (firstError.message.includes('Target closed') ||
81
- firstError.message.includes('Session closed') ||
82
- firstError.message.includes('Browser has been closed')) {
110
+ // Don't retry if the browser/session is already gone — escalate immediately.
111
+ if (isCriticalCDPError(firstError.message)) {
83
112
  throw new Error('CRITICAL_BROWSER_ERROR: ' + firstError.message);
84
113
  }
85
114
 
@@ -87,8 +116,7 @@ async function setRequestInterceptionWithTimeout(page, timeout = 15000) {
87
116
  try {
88
117
  await raceWithTimeout(page.setRequestInterception(true), timeout * 2, 'Request interception timeout - retry failed');
89
118
  } catch (retryError) {
90
- if (retryError.message.includes('Network.enable timed out') ||
91
- retryError.message.includes('ProtocolError')) {
119
+ if (isCriticalCDPError(retryError.message)) {
92
120
  throw new Error('CRITICAL_NETWORK_ERROR: ' + retryError.message);
93
121
  }
94
122
  throw retryError;
@@ -103,7 +131,7 @@ async function setRequestInterceptionWithTimeout(page, timeout = 15000) {
103
131
  * const cdpManager = await createCDPSession(page, 'https://example.com', {
104
132
  * enableCDP: true, // Global CDP flag
105
133
  * siteSpecificCDP: true, // Site-specific CDP flag
106
- * forceDebug: false // Enable debug logging
134
+ * forceDebug: true // When true, install the Network.requestWillBeSent log listener
107
135
  * });
108
136
  *
109
137
  * // Your page automation code here...
@@ -130,32 +158,32 @@ async function setRequestInterceptionWithTimeout(page, timeout = 15000) {
130
158
  * @param {boolean} options.enableCDP - Global CDP flag (from --cdp command line)
131
159
  * @param {boolean} options.siteSpecificCDP - Site-specific CDP flag (from config)
132
160
  * @param {boolean} options.forceDebug - Debug logging flag
133
- * @param {string} options.currentUrl - Current URL for domain-specific CDP decisions
134
161
  * @returns {Promise<object>} CDP session object with cleanup method
135
162
  */
136
163
  async function createCDPSession(page, currentUrl, options = {}) {
137
164
  const { enableCDP, siteSpecificCDP, forceDebug } = options;
138
-
139
- // Determine if CDP logging is needed for this page
140
- // You can customize this logic for your application's needs
141
- const cdpLoggingNeeded = enableCDP || siteSpecificCDP === true;
142
-
165
+
166
+ // The only thing this function's CDP session does is feed a debug-gated
167
+ // Network.requestWillBeSent listener. With !forceDebug the listener body is
168
+ // a no-op, so setting up CDP (and paying Network.enable's overhead) buys
169
+ // nothing. Skip entirely in that case — same observable behavior as before,
170
+ // minus the wasted protocol traffic.
171
+ const cdpLoggingNeeded = (enableCDP || siteSpecificCDP === true) && forceDebug;
172
+
143
173
  if (!cdpLoggingNeeded) {
144
- // Return a null session with no-op cleanup for consistent API
145
174
  return createSessionResult();
146
175
  }
147
176
 
148
- // Log which CDP mode is being used
149
- if (forceDebug) {
150
- const urlHostname = (() => {
151
- try { return new URL(currentUrl).hostname; } catch { return 'unknown'; }
152
- })();
177
+ // Parse the current URL hostname once and reuse it for the mode-log line,
178
+ // the per-request listener's first-vs-third-party comparison, and (with a
179
+ // different fallback) the catch-block error context.
180
+ const currentHostname = safeHostname(currentUrl);
153
181
 
154
- if (enableCDP) {
155
- console.log(formatLogMessage('debug', `[cdp] Global CDP enabled by --cdp flag for ${urlHostname}`));
156
- } else if (siteSpecificCDP === true) {
157
- console.log(formatLogMessage('debug', `[cdp] Site-specific CDP enabled for ${urlHostname} (via cdp: true or cdp_specific domain match)`));
158
- }
182
+ // Log which CDP mode is being used
183
+ if (enableCDP) {
184
+ console.log(formatLogMessage('debug', `${CDP_TAG} Global CDP enabled by --cdp flag for ${currentHostname}`));
185
+ } else if (siteSpecificCDP === true) {
186
+ console.log(formatLogMessage('debug', `${CDP_TAG} Site-specific CDP enabled for ${currentHostname} (via cdp: true or cdp_specific domain match)`));
159
187
  }
160
188
 
161
189
  let cdpSession = null;
@@ -164,84 +192,78 @@ async function createCDPSession(page, currentUrl, options = {}) {
164
192
  // Create CDP session using modern Puppeteer 20+ API
165
193
  // Add timeout protection for CDP session creation
166
194
  cdpSession = await raceWithTimeout(page.createCDPSession(), 20000, 'CDP session creation timeout');
167
-
168
- // Enable network domain - required for network event monitoring
169
- await cdpSession.send('Network.enable');
170
195
 
171
- // Parse current URL hostname once, reused across all request events
172
- let currentHostname = 'unknown';
173
- try { currentHostname = new URL(currentUrl).hostname; } catch (_) {}
196
+ // Enable network domain required for network event monitoring. This is
197
+ // the operation the rest of the codebase has learned can hang under
198
+ // overload; race against a watchdog so we don't block the page load.
199
+ await raceWithTimeout(
200
+ cdpSession.send('Network.enable'),
201
+ 15000,
202
+ 'Network.enable timed out'
203
+ );
174
204
 
175
205
  // Set up network request monitoring
176
- // This captures ALL network requests at the browser engine level
206
+ // This captures ALL network requests at the browser engine level.
207
+ // (We've already established forceDebug is true at this point — no inner
208
+ // check needed.)
177
209
  cdpSession.on('Network.requestWillBeSent', (params) => {
178
- if (forceDebug) {
179
- const { url: requestUrl, method } = params.request;
180
- const initiator = params.initiator ? params.initiator.type : 'unknown';
181
- let hostnameForLog = currentHostname;
182
- try {
183
- const requestHostname = new URL(requestUrl).hostname;
184
- if (currentHostname !== requestHostname) {
185
- hostnameForLog = `${currentHostname}?${requestHostname}`;
186
- }
187
- } catch (_) {}
188
- console.log(formatLogMessage('debug', `[cdp][${hostnameForLog}] ${method} ${requestUrl} (initiator: ${initiator})`));
189
- }
210
+ const { url: requestUrl, method } = params.request;
211
+ const initiator = params.initiator?.type ?? 'unknown';
212
+ let hostnameForLog = currentHostname;
213
+ try {
214
+ const requestHostname = new URL(requestUrl).hostname;
215
+ if (currentHostname !== requestHostname) {
216
+ hostnameForLog = `${currentHostname}?${requestHostname}`;
217
+ }
218
+ } catch (_) {}
219
+ console.log(formatLogMessage('debug', `${CDP_TAG}[${hostnameForLog}] ${method} ${requestUrl} (initiator: ${initiator})`));
190
220
  });
191
221
 
192
- if (forceDebug) {
193
- console.log(formatLogMessage('debug', `CDP session created successfully for ${currentUrl}`));
194
- }
222
+ console.log(formatLogMessage('debug', `${CDP_TAG} CDP session created successfully for ${currentUrl}`));
195
223
 
196
- return {
197
- session: cdpSession,
198
- cleanup: async () => {
224
+ return createSessionResult(
225
+ cdpSession,
226
+ async () => {
199
227
  // Safe cleanup that never throws errors
200
228
  if (cdpSession) {
201
229
  try {
202
230
  await cdpSession.detach();
203
- if (forceDebug) {
204
- console.log(formatLogMessage('debug', `CDP session detached for ${currentUrl}`));
205
- }
231
+ console.log(formatLogMessage('debug', `${CDP_TAG} CDP session detached for ${currentUrl}`));
206
232
  } catch (cdpCleanupErr) {
207
233
  // Log cleanup errors but don't throw - cleanup should never fail the calling code
208
- if (forceDebug) {
209
- console.log(formatLogMessage('debug', `Failed to detach CDP session for ${currentUrl}: ${cdpCleanupErr.message}`));
210
- }
234
+ console.log(formatLogMessage('debug', `${CDP_TAG} Failed to detach CDP session for ${currentUrl}: ${cdpCleanupErr.message}`));
211
235
  }
212
236
  }
213
237
  },
214
- isEnhanced: false
215
- };
238
+ false
239
+ );
216
240
 
217
241
  } catch (cdpErr) {
218
- cdpSession = null; // Reset on failure
219
-
220
- // Enhanced error context for CDP domain-specific debugging
221
- const urlContext = (() => {
222
- try {
223
- return new URL(currentUrl).hostname;
224
- } catch {
225
- return `${currentUrl.substring(0, 50)}...`;
242
+ // If the session was created but a subsequent send/wire-up failed, detach
243
+ // it so we don't leak a half-attached session. Previously the code just
244
+ // nulled the local and orphaned the session. We're already past the
245
+ // cdpLoggingNeeded gate here so forceDebug is true — log a failed detach
246
+ // instead of swallowing it, so partial-cleanup failures aren't invisible.
247
+ if (cdpSession) {
248
+ try { await cdpSession.detach(); }
249
+ catch (partialDetachErr) {
250
+ console.log(formatLogMessage('debug', `${CDP_TAG} Partial-session detach failed for ${currentUrl}: ${partialDetachErr.message}`));
226
251
  }
227
- })();
252
+ cdpSession = null;
253
+ }
254
+
255
+ // Enhanced error context for CDP domain-specific debugging
256
+ const urlContext = safeHostname(currentUrl, `${currentUrl.substring(0, 50)}...`);
228
257
 
229
- // Categorize CDP errors for proper handling
230
- // Enhanced error handling for Puppeteer 20+ error patterns
231
- if (cdpErr.message.includes('Network.enable timed out') ||
232
- cdpErr.message.includes('Protocol error') ||
233
- cdpErr.message.includes('Session closed') ||
234
- cdpErr.message.includes('Target closed') ||
235
- cdpErr.message.includes('Browser has been closed')) {
236
- // CRITICAL ERROR: Browser is broken and needs restart
237
- // Re-throw these errors so calling code can handle browser restart
238
- throw new Error(`Browser protocol broken: ${cdpErr.message}`);
258
+ // Critical errors: browser is broken, propagate so the caller can restart.
259
+ if (isCriticalCDPError(cdpErr.message)) {
260
+ throw new Error(`Browser protocol broken (${urlContext}): ${cdpErr.message}`);
239
261
  }
240
-
262
+
241
263
  // NON-CRITICAL ERROR: CDP failed but browser is still usable
242
264
  // Log warning but return working session object
243
- console.warn(formatLogMessage('warn', `[cdp] Failed to attach CDP session for ${currentUrl}: ${cdpErr.message}`));
244
-
265
+ console.warn(formatLogMessage('warn', `${CDP_TAG} Failed to attach CDP session for ${urlContext}: ${cdpErr.message}`));
266
+
245
267
  // Return null session with no-op cleanup for consistent API
246
268
  return createSessionResult();
247
269
  }