@fanboynz/network-scanner 1.0.42 → 1.0.44

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/cloudflare.js CHANGED
@@ -9,6 +9,38 @@
9
9
  */
10
10
  const CLOUDFLARE_MODULE_VERSION = '2.1.0';
11
11
 
12
+ /**
13
+ * Timeout constants for various operations (in milliseconds)
14
+ */
15
+ const TIMEOUTS = {
16
+ QUICK_DETECTION: 3000, // Quick Cloudflare detection check
17
+ PAGE_EVALUATION: 8000, // Standard page evaluation timeout
18
+ PAGE_EVALUATION_SAFE: 10000, // Safe page evaluation with extra buffer
19
+ CHALLENGE_COMPLETION: 3000, // Challenge completion check
20
+ PHISHING_WAIT: 2000, // Wait before checking phishing warning
21
+ PHISHING_CLICK: 3000, // Timeout for clicking phishing continue button
22
+ PHISHING_NAVIGATION: 8000, // Wait for navigation after phishing bypass
23
+ CHALLENGE_WAIT: 1000, // Wait before checking verification challenge
24
+ CHALLENGE_SOLVING: 20000, // Overall challenge solving timeout
25
+ JS_CHALLENGE: 15000, // JS challenge completion wait
26
+ JS_CHALLENGE_BUFFER: 18000, // JS challenge with safety buffer
27
+ TURNSTILE_OPERATION: 8000, // Turnstile iframe operations
28
+ TURNSTILE_COMPLETION: 12000, // Turnstile completion check
29
+ TURNSTILE_COMPLETION_BUFFER: 15000, // Turnstile completion with buffer
30
+ SELECTOR_WAIT: 2000, // Wait for selector to appear
31
+ SELECTOR_WAIT_BUFFER: 2500, // Selector wait with safety buffer
32
+ ELEMENT_INTERACTION_DELAY: 500, // Delay before element interactions
33
+ CLICK_TIMEOUT: 5000, // Standard click operation timeout
34
+ CLICK_TIMEOUT_BUFFER: 1000, // Click timeout safety buffer
35
+ NAVIGATION_TIMEOUT: 15000, // Standard navigation timeout
36
+ NAVIGATION_TIMEOUT_BUFFER: 2000, // Navigation timeout safety buffer
37
+ FALLBACK_TIMEOUT: 5000, // Fallback timeout for failed operations
38
+ ADAPTIVE_TIMEOUT_WITH_INDICATORS: 25000, // Adaptive timeout when indicators found + explicit config
39
+ ADAPTIVE_TIMEOUT_WITHOUT_INDICATORS: 20000, // Adaptive timeout with explicit config only
40
+ ADAPTIVE_TIMEOUT_AUTO_WITH_INDICATORS: 15000, // Adaptive timeout for auto-detected with indicators
41
+ ADAPTIVE_TIMEOUT_AUTO_WITHOUT_INDICATORS: 10000 // Adaptive timeout for auto-detected without indicators
42
+ };
43
+
12
44
  /**
13
45
  * Gets module version information
14
46
  * @returns {object} Version information object
@@ -29,26 +61,26 @@ async function waitForTimeout(page, timeout) {
29
61
  if (typeof page.waitForTimeout === 'function') {
30
62
  await Promise.race([
31
63
  page.waitForTimeout(timeout),
32
- new Promise((_, reject) => setTimeout(() => reject(new Error('waitForTimeout exceeded')), timeout + 5000))
64
+ new Promise((_, reject) => setTimeout(() => reject(new Error('waitForTimeout exceeded')), timeout + TIMEOUTS.FALLBACK_TIMEOUT))
33
65
  ]);
34
66
  } else if (typeof page.waitFor === 'function') {
35
67
  await Promise.race([
36
68
  page.waitFor(timeout),
37
- new Promise((_, reject) => setTimeout(() => reject(new Error('waitFor exceeded')), timeout + 5000))
69
+ new Promise((_, reject) => setTimeout(() => reject(new Error('waitFor exceeded')), timeout + TIMEOUTS.FALLBACK_TIMEOUT))
38
70
  ]);
39
71
  } else {
40
72
  await new Promise(resolve => setTimeout(resolve, timeout));
41
73
  }
42
74
  } catch (error) {
43
75
  // If all else fails, use setTimeout
44
- await new Promise(resolve => setTimeout(resolve, Math.min(timeout, 5000)));
76
+ await new Promise(resolve => setTimeout(resolve, Math.min(timeout, TIMEOUTS.FALLBACK_TIMEOUT)));
45
77
  }
46
78
  }
47
79
 
48
80
  /**
49
81
  * Safe page evaluation with timeout protection
50
82
  */
51
- async function safePageEvaluate(page, func, timeout = 10000) {
83
+ async function safePageEvaluate(page, func, timeout = TIMEOUTS.PAGE_EVALUATION_SAFE) {
52
84
  try {
53
85
  return await Promise.race([
54
86
  page.evaluate(func),
@@ -72,12 +104,12 @@ async function safePageEvaluate(page, func, timeout = 10000) {
72
104
  /**
73
105
  * Safe element clicking with timeout protection
74
106
  */
75
- async function safeClick(page, selector, timeout = 5000) {
107
+ async function safeClick(page, selector, timeout = TIMEOUTS.CLICK_TIMEOUT) {
76
108
  try {
77
109
  return await Promise.race([
78
110
  page.click(selector, { timeout: timeout }),
79
111
  new Promise((_, reject) =>
80
- setTimeout(() => reject(new Error('Click timeout')), timeout + 1000)
112
+ setTimeout(() => reject(new Error('Click timeout')), timeout + TIMEOUTS.CLICK_TIMEOUT_BUFFER)
81
113
  )
82
114
  ]);
83
115
  } catch (error) {
@@ -88,12 +120,12 @@ async function safeClick(page, selector, timeout = 5000) {
88
120
  /**
89
121
  * Safe navigation waiting with timeout protection
90
122
  */
91
- async function safeWaitForNavigation(page, timeout = 15000) {
123
+ async function safeWaitForNavigation(page, timeout = TIMEOUTS.NAVIGATION_TIMEOUT) {
92
124
  try {
93
125
  return await Promise.race([
94
126
  page.waitForNavigation({ waitUntil: 'domcontentloaded', timeout: timeout }),
95
127
  new Promise((_, reject) =>
96
- setTimeout(() => reject(new Error('Navigation timeout')), timeout + 2000)
128
+ setTimeout(() => reject(new Error('Navigation timeout')), timeout + TIMEOUTS.NAVIGATION_TIMEOUT_BUFFER)
97
129
  )
98
130
  ]);
99
131
  } catch (error) {
@@ -141,7 +173,7 @@ async function quickCloudflareDetection(page, forceDebug = false) {
141
173
  url,
142
174
  bodySnippet: bodyText.substring(0, 200)
143
175
  };
144
- }, 3000); // Quick 3-second timeout
176
+ }, TIMEOUTS.QUICK_DETECTION);
145
177
 
146
178
  if (forceDebug && quickCheck.hasIndicators) {
147
179
  console.log(`[debug][cloudflare] Quick detection found Cloudflare indicators on ${quickCheck.url}`);
@@ -232,7 +264,7 @@ async function analyzeCloudflareChallenge(page) {
232
264
  url: window.location.href,
233
265
  bodySnippet: bodyText.substring(0, 200)
234
266
  };
235
- }, 8000); // Reduced from 10 to 8 seconds
267
+ }, TIMEOUTS.PAGE_EVALUATION);
236
268
  } catch (error) {
237
269
  return {
238
270
  isChallengePresent: false,
@@ -247,6 +279,17 @@ async function analyzeCloudflareChallenge(page) {
247
279
 
248
280
  /**
249
281
  * Handles Cloudflare phishing warnings with timeout protection and enhanced debug logging
282
+ *
283
+ * @param {Object} page - Puppeteer page instance
284
+ * @param {string} currentUrl - URL being processed
285
+ * @param {boolean} forceDebug - Debug logging flag
286
+ * @returns {Promise<Object>} Phishing warning result:
287
+ * {
288
+ * success: boolean, // True if no warning found OR successfully bypassed
289
+ * attempted: boolean, // True if warning was detected and bypass attempted
290
+ * error: string|null, // Error message if bypass failed
291
+ * details: object|null // Analysis details from analyzeCloudflareChallenge()
292
+ * }
250
293
  */
251
294
  async function handlePhishingWarning(page, currentUrl, forceDebug = false) {
252
295
  const result = {
@@ -260,7 +303,7 @@ async function handlePhishingWarning(page, currentUrl, forceDebug = false) {
260
303
  if (forceDebug) console.log(`[debug][cloudflare] Checking for phishing warning on ${currentUrl}`);
261
304
 
262
305
  // Shorter wait with timeout protection
263
- await waitForTimeout(page, 2000);
306
+ await waitForTimeout(page, TIMEOUTS.PHISHING_WAIT);
264
307
 
265
308
  const challengeInfo = await analyzeCloudflareChallenge(page);
266
309
 
@@ -277,8 +320,8 @@ async function handlePhishingWarning(page, currentUrl, forceDebug = false) {
277
320
 
278
321
  try {
279
322
  // Use safe click with shorter timeout
280
- await safeClick(page, 'a[href*="continue"]', 3000);
281
- await safeWaitForNavigation(page, 8000);
323
+ await safeClick(page, 'a[href*="continue"]', TIMEOUTS.PHISHING_CLICK);
324
+ await safeWaitForNavigation(page, TIMEOUTS.PHISHING_NAVIGATION);
282
325
 
283
326
  result.success = true;
284
327
  if (forceDebug) console.log(`[debug][cloudflare] Successfully bypassed phishing warning for ${currentUrl}`);
@@ -300,6 +343,19 @@ async function handlePhishingWarning(page, currentUrl, forceDebug = false) {
300
343
 
301
344
  /**
302
345
  * Attempts to solve Cloudflare challenges with timeout protection and enhanced debug logging
346
+ *
347
+ * @param {Object} page - Puppeteer page instance
348
+ * @param {string} currentUrl - URL being processed
349
+ * @param {boolean} forceDebug - Debug logging flag
350
+ * @returns {Promise<Object>} Challenge verification result:
351
+ * {
352
+ * success: boolean, // True if no challenge found OR successfully solved
353
+ * attempted: boolean, // True if challenge was detected and solving attempted
354
+ * error: string|null, // Error message if solving failed
355
+ * requiresHuman: boolean, // True if CAPTCHA detected (requires manual intervention)
356
+ * method: string|null, // Method that succeeded: 'js_challenge_wait', 'turnstile', 'legacy_checkbox'
357
+ * details: object|null // Analysis details from analyzeCloudflareChallenge()
358
+ * }
303
359
  */
304
360
  async function handleVerificationChallenge(page, currentUrl, forceDebug = false) {
305
361
  const result = {
@@ -315,7 +371,7 @@ async function handleVerificationChallenge(page, currentUrl, forceDebug = false)
315
371
  if (forceDebug) console.log(`[debug][cloudflare] Checking for verification challenge on ${currentUrl}`);
316
372
 
317
373
  // Reduced wait time
318
- await waitForTimeout(page, 1000);
374
+ await waitForTimeout(page, TIMEOUTS.CHALLENGE_WAIT);
319
375
 
320
376
  const challengeInfo = await analyzeCloudflareChallenge(page);
321
377
  result.details = challengeInfo;
@@ -381,7 +437,7 @@ async function attemptChallengeSolveWithTimeout(page, currentUrl, challengeInfo,
381
437
  return await Promise.race([
382
438
  attemptChallengeSolve(page, currentUrl, challengeInfo, forceDebug),
383
439
  new Promise((_, reject) =>
384
- setTimeout(() => reject(new Error('Challenge solving timeout')), 20000)
440
+ setTimeout(() => reject(new Error('Challenge solving timeout')), TIMEOUTS.CHALLENGE_SOLVING)
385
441
  )
386
442
  ]);
387
443
  } catch (error) {
@@ -480,10 +536,10 @@ async function waitForJSChallengeCompletion(page, forceDebug = false) {
480
536
  !document.querySelector('.cf-challenge-running') &&
481
537
  !document.querySelector('[data-cf-challenge]');
482
538
  },
483
- { timeout: 15000 }
539
+ { timeout: TIMEOUTS.JS_CHALLENGE }
484
540
  ),
485
541
  new Promise((_, reject) =>
486
- setTimeout(() => reject(new Error('JS challenge timeout')), 18000)
542
+ setTimeout(() => reject(new Error('JS challenge timeout')), TIMEOUTS.JS_CHALLENGE_BUFFER)
487
543
  )
488
544
  ]);
489
545
 
@@ -508,7 +564,7 @@ async function handleTurnstileChallenge(page, forceDebug = false) {
508
564
 
509
565
  try {
510
566
  // Reduced timeout for Turnstile operations
511
- const turnstileTimeout = 8000; // Reduced from 10 to 8 seconds
567
+ const turnstileTimeout = TIMEOUTS.TURNSTILE_OPERATION;
512
568
 
513
569
  const turnstileSelectors = [
514
570
  'iframe[src*="challenges.cloudflare.com"]',
@@ -520,8 +576,8 @@ async function handleTurnstileChallenge(page, forceDebug = false) {
520
576
  for (const selector of turnstileSelectors) {
521
577
  try {
522
578
  await Promise.race([
523
- page.waitForSelector(selector, { timeout: 2000 }),
524
- new Promise((_, reject) => setTimeout(() => reject(new Error('Selector timeout')), 2500))
579
+ page.waitForSelector(selector, { timeout: TIMEOUTS.SELECTOR_WAIT }),
580
+ new Promise((_, reject) => setTimeout(() => reject(new Error('Selector timeout')), TIMEOUTS.SELECTOR_WAIT_BUFFER))
525
581
  ]);
526
582
 
527
583
  const frames = await page.frames();
@@ -554,11 +610,11 @@ async function handleTurnstileChallenge(page, forceDebug = false) {
554
610
  for (const selector of checkboxSelectors) {
555
611
  try {
556
612
  await Promise.race([
557
- turnstileFrame.waitForSelector(selector, { timeout: 2000 }),
558
- new Promise((_, reject) => setTimeout(() => reject(new Error('Checkbox timeout')), 2500))
613
+ turnstileFrame.waitForSelector(selector, { timeout: TIMEOUTS.SELECTOR_WAIT }),
614
+ new Promise((_, reject) => setTimeout(() => reject(new Error('Checkbox timeout')), TIMEOUTS.SELECTOR_WAIT_BUFFER))
559
615
  ]);
560
616
 
561
- await waitForTimeout(page, 500);
617
+ await waitForTimeout(page, TIMEOUTS.ELEMENT_INTERACTION_DELAY);
562
618
  await turnstileFrame.click(selector);
563
619
 
564
620
  if (forceDebug) console.log(`[debug][cloudflare] Clicked Turnstile checkbox: ${selector}`);
@@ -576,9 +632,9 @@ async function handleTurnstileChallenge(page, forceDebug = false) {
576
632
  const responseInput = document.querySelector('input[name="cf-turnstile-response"]');
577
633
  return responseInput && responseInput.value && responseInput.value.length > 0;
578
634
  },
579
- { timeout: 12000 }
635
+ { timeout: TIMEOUTS.TURNSTILE_COMPLETION }
580
636
  ),
581
- new Promise((_, reject) => setTimeout(() => reject(new Error('Turnstile completion timeout')), 15000))
637
+ new Promise((_, reject) => setTimeout(() => reject(new Error('Turnstile completion timeout')), TIMEOUTS.TURNSTILE_COMPLETION_BUFFER))
582
638
  ]);
583
639
 
584
640
  if (forceDebug) console.log(`[debug][cloudflare] Turnstile response token generated successfully`);
@@ -596,11 +652,11 @@ async function handleTurnstileChallenge(page, forceDebug = false) {
596
652
  for (const selector of containerSelectors) {
597
653
  try {
598
654
  await Promise.race([
599
- page.waitForSelector(selector, { timeout: 2000 }),
600
- new Promise((_, reject) => setTimeout(() => reject(new Error('Container timeout')), 2500))
655
+ page.waitForSelector(selector, { timeout: TIMEOUTS.SELECTOR_WAIT }),
656
+ new Promise((_, reject) => setTimeout(() => reject(new Error('Container timeout')), TIMEOUTS.SELECTOR_WAIT_BUFFER))
601
657
  ]);
602
658
 
603
- await waitForTimeout(page, 500);
659
+ await waitForTimeout(page, TIMEOUTS.ELEMENT_INTERACTION_DELAY);
604
660
  await page.click(selector);
605
661
 
606
662
  if (forceDebug) console.log(`[debug][cloudflare] Clicked Turnstile container: ${selector}`);
@@ -652,8 +708,8 @@ async function handleLegacyCheckbox(page, forceDebug = false) {
652
708
  for (const selector of legacySelectors) {
653
709
  try {
654
710
  await Promise.race([
655
- page.waitForSelector(selector, { timeout: 2000 }),
656
- new Promise((_, reject) => setTimeout(() => reject(new Error('Legacy selector timeout')), 2500))
711
+ page.waitForSelector(selector, { timeout: TIMEOUTS.SELECTOR_WAIT }),
712
+ new Promise((_, reject) => setTimeout(() => reject(new Error('Legacy selector timeout')), TIMEOUTS.SELECTOR_WAIT_BUFFER))
657
713
  ]);
658
714
 
659
715
  const checkbox = await page.$(selector);
@@ -705,7 +761,7 @@ async function checkChallengeCompletion(page) {
705
761
  return (noChallengeRunning && noChallengeContainer && noChallengePage) ||
706
762
  hasClearanceCookie ||
707
763
  hasTurnstileResponse;
708
- }, 3000); // Reduced timeout
764
+ }, TIMEOUTS.CHALLENGE_COMPLETION);
709
765
 
710
766
  return { isCompleted };
711
767
  } catch (error) {
@@ -715,6 +771,43 @@ async function checkChallengeCompletion(page) {
715
771
 
716
772
  /**
717
773
  * Main function to handle all Cloudflare challenges with smart detection and adaptive timeouts
774
+ *
775
+ * @param {Object} page - Puppeteer page instance
776
+ * @param {string} currentUrl - URL being processed
777
+ * @param {Object} siteConfig - Configuration object with cloudflare_phish and cloudflare_bypass flags
778
+ * @param {boolean} forceDebug - Enable debug logging
779
+ *
780
+ * @returns {Promise<Object>} Result object with the following structure:
781
+ * {
782
+ * phishingWarning: {
783
+ * attempted: boolean, // Whether phishing bypass was attempted
784
+ * success: boolean, // Whether bypass succeeded (true if no warning or successfully bypassed)
785
+ * error: string|null, // Error message if bypass failed
786
+ * details: object|null // Challenge analysis details from analyzeCloudflareChallenge()
787
+ * },
788
+ * verificationChallenge: {
789
+ * attempted: boolean, // Whether challenge bypass was attempted
790
+ * success: boolean, // Whether challenge was solved (true if no challenge or successfully solved)
791
+ * error: string|null, // Error message if solving failed
792
+ * requiresHuman: boolean, // True if CAPTCHA detected - requires manual intervention
793
+ * method: string|null, // Successful method used: 'js_challenge_wait', 'turnstile', 'legacy_checkbox'
794
+ * details: object|null // Challenge analysis details from analyzeCloudflareChallenge()
795
+ * },
796
+ * overallSuccess: boolean, // True if no critical failures occurred (challenges may be unsolved but didn't error)
797
+ * errors: string[], // Array of error messages from failed operations
798
+ * skippedNoIndicators: boolean, // True if processing was skipped due to no Cloudflare indicators detected
799
+ * timedOut: boolean // True if adaptive timeout was reached (processing continued anyway)
800
+ * }
801
+ *
802
+ * @example
803
+ * const result = await handleCloudflareProtection(page, url, {cloudflare_bypass: true}, false);
804
+ * if (result.verificationChallenge.requiresHuman) {
805
+ * console.log('Manual CAPTCHA solving required');
806
+ * } else if (!result.overallSuccess) {
807
+ * console.error('Critical errors:', result.errors);
808
+ * } else if (result.verificationChallenge.attempted && result.verificationChallenge.success) {
809
+ * console.log(`Challenge solved using: ${result.verificationChallenge.method}`);
810
+ * }
718
811
  */
719
812
  async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDebug = false) {
720
813
  if (forceDebug) {
@@ -722,7 +815,10 @@ async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDeb
722
815
  }
723
816
  // Quick detection first - exit early if no Cloudflare detected and no explicit config
724
817
  const quickDetection = await quickCloudflareDetection(page, forceDebug);
725
-
818
+
819
+ // Early return structure when no Cloudflare indicators found
820
+ // Sets attempted: false, success: true for both protection types
821
+
726
822
  // Only proceed if we have indicators OR explicit config enables Cloudflare handling
727
823
  if (!quickDetection.hasIndicators && !siteConfig.cloudflare_phish && !siteConfig.cloudflare_bypass) {
728
824
  if (forceDebug) console.log(`[debug][cloudflare] No Cloudflare indicators found and no explicit config, skipping protection handling for ${currentUrl}`);
@@ -736,6 +832,9 @@ async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDeb
736
832
  };
737
833
  }
738
834
 
835
+ // Standard return structure for all processing paths
836
+ // Individual handlers update their respective sections
837
+ // overallSuccess becomes false if any critical errors occur
739
838
  const result = {
740
839
  phishingWarning: { attempted: false, success: false },
741
840
  verificationChallenge: { attempted: false, success: false },
@@ -748,10 +847,10 @@ async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDeb
748
847
  let adaptiveTimeout;
749
848
  if (siteConfig.cloudflare_phish || siteConfig.cloudflare_bypass) {
750
849
  // Explicit config - give more time
751
- adaptiveTimeout = quickDetection.hasIndicators ? 25000 : 20000;
850
+ adaptiveTimeout = quickDetection.hasIndicators ? TIMEOUTS.ADAPTIVE_TIMEOUT_WITH_INDICATORS : TIMEOUTS.ADAPTIVE_TIMEOUT_WITHOUT_INDICATORS;
752
851
  } else {
753
852
  // Auto-detected only - shorter timeout
754
- adaptiveTimeout = quickDetection.hasIndicators ? 15000 : 10000;
853
+ adaptiveTimeout = quickDetection.hasIndicators ? TIMEOUTS.ADAPTIVE_TIMEOUT_AUTO_WITH_INDICATORS : TIMEOUTS.ADAPTIVE_TIMEOUT_AUTO_WITHOUT_INDICATORS;
755
854
  }
756
855
 
757
856
  if (forceDebug) {
@@ -783,6 +882,12 @@ async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDeb
783
882
 
784
883
  /**
785
884
  * Performs the actual Cloudflare handling with enhanced debug logging
885
+ *
886
+ * @param {Object} page - Puppeteer page instance
887
+ * @param {string} currentUrl - URL being processed
888
+ * @param {Object} siteConfig - Configuration flags
889
+ * @param {boolean} forceDebug - Debug logging flag
890
+ * @returns {Promise<Object>} Same structure as handleCloudflareProtection()
786
891
  */
787
892
  async function performCloudflareHandling(page, currentUrl, siteConfig, forceDebug = false) {
788
893
  const result = {
@@ -794,6 +899,8 @@ async function performCloudflareHandling(page, currentUrl, siteConfig, forceDebu
794
899
 
795
900
  if (forceDebug) console.log(`[debug][cloudflare] Starting Cloudflare protection handling for ${currentUrl}`);
796
901
 
902
+ // Handle phishing warnings first - updates result.phishingWarning
903
+ // Only runs if siteConfig.cloudflare_phish === true
797
904
  // Handle phishing warnings if enabled
798
905
  if (siteConfig.cloudflare_phish === true) {
799
906
  if (forceDebug) console.log(`[debug][cloudflare] Phishing warning bypass enabled for ${currentUrl}`);
@@ -812,6 +919,9 @@ async function performCloudflareHandling(page, currentUrl, siteConfig, forceDebu
812
919
  console.log(`[debug][cloudflare] Phishing warning bypass disabled for ${currentUrl}`);
813
920
  }
814
921
 
922
+ // Handle verification challenges second - updates result.verificationChallenge
923
+ // Only runs if siteConfig.cloudflare_bypass === true
924
+ // Sets requiresHuman: true if CAPTCHA detected (no bypass attempted)
815
925
  // Handle verification challenges if enabled
816
926
  if (siteConfig.cloudflare_bypass === true) {
817
927
  if (forceDebug) console.log(`[debug][cloudflare] Challenge bypass enabled for ${currentUrl}`);
@@ -0,0 +1,296 @@
1
+ /**
2
+ * Domain Cache Module - Tracks detected domains to prevent duplicate processing
3
+ * Provides performance optimization by skipping already detected domains
4
+ */
5
+
6
+ const { formatLogMessage } = require('./colorize');
7
+
8
+ /**
9
+ * Domain detection cache class for tracking processed domains
10
+ */
11
+ class DomainCache {
12
+ constructor(options = {}) {
13
+ this.cache = new Set();
14
+ this.stats = {
15
+ totalDetected: 0,
16
+ totalSkipped: 0,
17
+ cacheHits: 0,
18
+ cacheMisses: 0
19
+ };
20
+ this.options = {
21
+ enableLogging: options.enableLogging || false,
22
+ logPrefix: options.logPrefix || '[domain-cache]',
23
+ maxCacheSize: options.maxCacheSize || 10000 // Prevent memory leaks
24
+ };
25
+ }
26
+
27
+ /**
28
+ * Check if a domain was already detected in a previous scan
29
+ * @param {string} domain - Domain to check
30
+ * @returns {boolean} True if domain was already detected
31
+ */
32
+ isDomainAlreadyDetected(domain) {
33
+ if (!domain || typeof domain !== 'string') {
34
+ return false;
35
+ }
36
+
37
+ const isDetected = this.cache.has(domain);
38
+
39
+ if (isDetected) {
40
+ this.stats.totalSkipped++;
41
+ this.stats.cacheHits++;
42
+
43
+ if (this.options.enableLogging) {
44
+ console.log(formatLogMessage('debug', `${this.options.logPrefix} Cache HIT: ${domain} (skipped)`));
45
+ }
46
+ } else {
47
+ this.stats.cacheMisses++;
48
+
49
+ if (this.options.enableLogging) {
50
+ console.log(formatLogMessage('debug', `${this.options.logPrefix} Cache MISS: ${domain} (processing)`));
51
+ }
52
+ }
53
+
54
+ return isDetected;
55
+ }
56
+
57
+ /**
58
+ * Mark a domain as detected for future reference
59
+ * @param {string} domain - Domain to mark as detected
60
+ */
61
+ markDomainAsDetected(domain) {
62
+ if (!domain || typeof domain !== 'string') {
63
+ return false;
64
+ }
65
+
66
+ // Prevent cache from growing too large
67
+ if (this.cache.size >= this.options.maxCacheSize) {
68
+ this.clearOldestEntries(Math.floor(this.options.maxCacheSize * 0.1)); // Remove 10% of entries
69
+ }
70
+
71
+ const wasNew = !this.cache.has(domain);
72
+ this.cache.add(domain);
73
+
74
+ if (wasNew) {
75
+ this.stats.totalDetected++;
76
+
77
+ if (this.options.enableLogging) {
78
+ console.log(formatLogMessage('debug', `${this.options.logPrefix} Marked as detected: ${domain} (cache size: ${this.cache.size})`));
79
+ }
80
+ }
81
+
82
+ return wasNew;
83
+ }
84
+
85
+ /**
86
+ * Clear oldest entries from cache (basic LRU simulation)
87
+ * Note: Set doesn't maintain insertion order in all Node.js versions,
88
+ * so this is a simple implementation that clears a portion of the cache
89
+ * @param {number} count - Number of entries to remove
90
+ */
91
+ clearOldestEntries(count) {
92
+ if (count <= 0) return;
93
+
94
+ const entries = Array.from(this.cache);
95
+ const toRemove = entries.slice(0, count);
96
+
97
+ toRemove.forEach(domain => this.cache.delete(domain));
98
+
99
+ if (this.options.enableLogging) {
100
+ console.log(formatLogMessage('debug', `${this.options.logPrefix} Cleared ${toRemove.length} old entries, cache size now: ${this.cache.size}`));
101
+ }
102
+ }
103
+
104
+ /**
105
+ * Get cache statistics
106
+ * @returns {object} Cache statistics
107
+ */
108
+ getStats() {
109
+ return {
110
+ ...this.stats,
111
+ cacheSize: this.cache.size,
112
+ hitRate: this.stats.cacheHits > 0 ?
113
+ (this.stats.cacheHits / (this.stats.cacheHits + this.stats.cacheMisses) * 100).toFixed(2) + '%' :
114
+ '0%'
115
+ };
116
+ }
117
+
118
+ /**
119
+ * Clear all cached domains
120
+ */
121
+ clear() {
122
+ const previousSize = this.cache.size;
123
+ this.cache.clear();
124
+ this.stats = {
125
+ totalDetected: 0,
126
+ totalSkipped: 0,
127
+ cacheHits: 0,
128
+ cacheMisses: 0
129
+ };
130
+
131
+ if (this.options.enableLogging) {
132
+ console.log(formatLogMessage('debug', `${this.options.logPrefix} Cache cleared (${previousSize} entries removed)`));
133
+ }
134
+ }
135
+
136
+ /**
137
+ * Get all cached domains (for debugging)
138
+ * @returns {Array<string>} Array of cached domains
139
+ */
140
+ getAllCachedDomains() {
141
+ return Array.from(this.cache);
142
+ }
143
+
144
+ /**
145
+ * Check if cache contains a specific domain (without updating stats)
146
+ * @param {string} domain - Domain to check
147
+ * @returns {boolean} True if domain exists in cache
148
+ */
149
+ has(domain) {
150
+ return this.cache.has(domain);
151
+ }
152
+
153
+ /**
154
+ * Remove a specific domain from cache
155
+ * @param {string} domain - Domain to remove
156
+ * @returns {boolean} True if domain was removed, false if it wasn't in cache
157
+ */
158
+ removeDomain(domain) {
159
+ const wasRemoved = this.cache.delete(domain);
160
+
161
+ if (wasRemoved && this.options.enableLogging) {
162
+ console.log(formatLogMessage('debug', `${this.options.logPrefix} Removed from cache: ${domain}`));
163
+ }
164
+
165
+ return wasRemoved;
166
+ }
167
+
168
+ /**
169
+ * Add multiple domains to cache at once
170
+ * @param {Array<string>} domains - Array of domains to add
171
+ * @returns {number} Number of domains actually added (excludes duplicates)
172
+ */
173
+ markMultipleDomainsAsDetected(domains) {
174
+ if (!Array.isArray(domains)) {
175
+ return 0;
176
+ }
177
+
178
+ let addedCount = 0;
179
+ domains.forEach(domain => {
180
+ if (this.markDomainAsDetected(domain)) {
181
+ addedCount++;
182
+ }
183
+ });
184
+
185
+ return addedCount;
186
+ }
187
+
188
+ /**
189
+ * Create bound helper functions for easy integration with existing code
190
+ * @returns {object} Object with bound helper functions
191
+ */
192
+ createHelpers() {
193
+ return {
194
+ isDomainAlreadyDetected: this.isDomainAlreadyDetected.bind(this),
195
+ markDomainAsDetected: this.markDomainAsDetected.bind(this),
196
+ getSkippedCount: () => this.stats.totalSkipped,
197
+ getCacheSize: () => this.cache.size,
198
+ getStats: this.getStats.bind(this)
199
+ };
200
+ }
201
+ }
202
+
203
+ /**
204
+ * Create a global domain cache instance (singleton pattern)
205
+ */
206
+ let globalDomainCache = null;
207
+
208
+ /**
209
+ * Get or create the global domain cache instance
210
+ * @param {object} options - Cache options
211
+ * @returns {DomainCache} Global cache instance
212
+ */
213
+ function getGlobalDomainCache(options = {}) {
214
+ if (!globalDomainCache) {
215
+ globalDomainCache = new DomainCache(options);
216
+ }
217
+ return globalDomainCache;
218
+ }
219
+
220
+ /**
221
+ * Create helper functions that use the global cache
222
+ * @param {object} options - Cache options (only used if global cache doesn't exist)
223
+ * @returns {object} Helper functions bound to global cache
224
+ */
225
+ function createGlobalHelpers(options = {}) {
226
+ const cache = getGlobalDomainCache(options);
227
+ return cache.createHelpers();
228
+ }
229
+
230
+ /**
231
+ * Reset the global cache (useful for testing or manual resets)
232
+ */
233
+ function resetGlobalCache() {
234
+ if (globalDomainCache) {
235
+ globalDomainCache.clear();
236
+ }
237
+ globalDomainCache = null;
238
+ }
239
+
240
+ /**
241
+ * Legacy wrapper functions for backward compatibility
242
+ * These match the original function signatures from nwss.js
243
+ */
244
+
245
+ /**
246
+ * Check if a domain was already detected (legacy wrapper)
247
+ * @param {string} domain - Domain to check
248
+ * @returns {boolean} True if domain was already detected
249
+ */
250
+ function isDomainAlreadyDetected(domain) {
251
+ const cache = getGlobalDomainCache();
252
+ return cache.isDomainAlreadyDetected(domain);
253
+ }
254
+
255
+ /**
256
+ * Mark a domain as detected (legacy wrapper)
257
+ * @param {string} domain - Domain to mark as detected
258
+ */
259
+ function markDomainAsDetected(domain) {
260
+ const cache = getGlobalDomainCache();
261
+ cache.markDomainAsDetected(domain);
262
+ }
263
+
264
+ /**
265
+ * Get total domains skipped (legacy wrapper)
266
+ * @returns {number} Number of domains skipped
267
+ */
268
+ function getTotalDomainsSkipped() {
269
+ const cache = getGlobalDomainCache();
270
+ return cache.stats.totalSkipped;
271
+ }
272
+
273
+ /**
274
+ * Get detected domains cache size (legacy wrapper)
275
+ * @returns {number} Size of the detected domains cache
276
+ */
277
+ function getDetectedDomainsCount() {
278
+ const cache = getGlobalDomainCache();
279
+ return cache.cache.size;
280
+ }
281
+
282
+ module.exports = {
283
+ // Main class
284
+ DomainCache,
285
+
286
+ // Global cache functions
287
+ getGlobalDomainCache,
288
+ createGlobalHelpers,
289
+ resetGlobalCache,
290
+
291
+ // Legacy wrapper functions for backward compatibility
292
+ isDomainAlreadyDetected,
293
+ markDomainAsDetected,
294
+ getTotalDomainsSkipped,
295
+ getDetectedDomainsCount
296
+ };
package/lib/nettools.js CHANGED
@@ -711,7 +711,7 @@ function createNetToolsHandler(config) {
711
711
  dryRunCallback = null,
712
712
  matchedDomains,
713
713
  addMatchedDomain,
714
- currentUrl,
714
+ isDomainAlreadyDetected,
715
715
  getRootDomain,
716
716
  siteConfig,
717
717
  dumpUrls,
@@ -743,8 +743,19 @@ function createNetToolsHandler(config) {
743
743
  const DIG_CACHE_TTL = 300000; // 5 minutes cache TTL
744
744
  const DIG_MAX_CACHE_SIZE = 400; // Smaller cache for dig due to shorter TTL
745
745
 
746
- return async function handleNetToolsCheck(domain, originalDomain) {
746
+ return async function handleNetToolsCheck(domain, fullSubdomain) {
747
+ // Use fullSubdomain parameter instead of originalDomain to maintain consistency
748
+ // with the domain cache fix approach
749
+ const originalDomain = fullSubdomain;
747
750
  // Helper function to log to BOTH console and debug file
751
+
752
+ // Check if domain was already detected (skip expensive operations)
753
+ if (typeof isDomainAlreadyDetected === 'function' && isDomainAlreadyDetected(fullSubdomain)) {
754
+ if (forceDebug) {
755
+ logToConsoleAndFile(`${messageColors.highlight('[nettools]')} Skipping already detected subdomain: ${fullSubdomain} (output domain: ${domain})`);
756
+ }
757
+ return;
758
+ }
748
759
 
749
760
  // NOTE: The logToConsoleAndFile function needs to be declared INSIDE this function
750
761
  // so it has access to the closure variables (forceDebug, debugLogFile, fs) from the
@@ -1267,13 +1278,13 @@ function createNetToolsHandler(config) {
1267
1278
  // No need to add to matched domains
1268
1279
  } else {
1269
1280
  if (typeof addMatchedDomain === 'function') {
1270
- addMatchedDomain(domain);
1281
+ addMatchedDomain(domain, null, fullSubdomain);
1271
1282
  } else {
1272
1283
  matchedDomains.add(domain);
1273
1284
  }
1274
1285
  }
1275
1286
 
1276
- const simplifiedUrl = currentUrl ? getRootDomain(currentUrl) : 'unknown';
1287
+ const simplifiedUrl = config.currentUrl ? getRootDomain(config.currentUrl) : 'unknown';
1277
1288
 
1278
1289
  if (siteConfig.verbose === 1) {
1279
1290
  const matchType = [];
package/lib/output.js CHANGED
@@ -1,5 +1,7 @@
1
1
  const fs = require('fs');
2
2
  const path = require('path');
3
+ // Import domain cache functions for statistics
4
+ const { getTotalDomainsSkipped } = require('./domain-cache');
3
5
  const { loadComparisonRules, filterUniqueRules } = require('./compare');
4
6
  const { colorize, colors, messageColors, tags, formatLogMessage } = require('./colorize');
5
7
 
@@ -426,7 +428,6 @@ function writeOutput(lines, outputFile = null, silentMode = false) {
426
428
  * Main output handler that combines all output operations
427
429
  * @param {Array} results - Processing results from scanner
428
430
  * @param {object} config - Output configuration
429
- * @param {string[]} config.ignoreDomains - Domains to filter out from final output
430
431
  * @returns {object} Output statistics and file paths
431
432
  */
432
433
  function handleOutput(results, config = {}) {
@@ -440,7 +441,8 @@ function handleOutput(results, config = {}) {
440
441
  dumpUrls = false,
441
442
  adblockRulesLogFile = null,
442
443
  forceDebug = false,
443
- ignoreDomains = []
444
+ ignoreDomains = [],
445
+ totalDomainsSkipped = null // Allow override or get from cache
444
446
  } = config;
445
447
 
446
448
  // Handle append mode
@@ -572,7 +574,11 @@ function handleOutput(results, config = {}) {
572
574
  if (dumpUrls && adblockRulesLogFile) {
573
575
  logSuccess = writeOutput(outputLinesWithTitles, adblockRulesLogFile, silentMode);
574
576
  }
575
-
577
+
578
+ // Get domain skip statistics from cache if not provided
579
+ const finalTotalDomainsSkipped = totalDomainsSkipped !== null ?
580
+ totalDomainsSkipped : getTotalDomainsSkipped();
581
+
576
582
  return {
577
583
  success: mainSuccess && logSuccess,
578
584
  outputFile,
@@ -582,6 +588,8 @@ function handleOutput(results, config = {}) {
582
588
  filteredOutCount,
583
589
  totalLines: filteredOutputLines.length,
584
590
  outputLines: outputFile ? null : filteredOutputLines // Only return lines if not written to file
591
+ // Note: totalDomainsSkipped statistic is now available via getTotalDomainsSkipped()
592
+ // and doesn't need to be passed through the output handler
585
593
  };
586
594
  }
587
595
 
@@ -250,11 +250,19 @@ function createCurlHandler(config) {
250
250
  } = config;
251
251
 
252
252
  return async function curlHandler(requestUrl) {
253
- const respDomain = perSiteSubDomains ? (new URL(requestUrl)).hostname : getRootDomain(requestUrl);
254
253
 
255
254
  // Only process URLs that match our regex patterns
256
255
  const matchesRegex = regexes.some(re => re.test(requestUrl));
257
256
  if (!matchesRegex) return;
257
+
258
+ // Extract domain and check if already detected (skip expensive operations)
259
+ const reqDomain = perSiteSubDomains ? (new URL(requestUrl)).hostname : getRootDomain(requestUrl);
260
+ if (typeof config.isDomainAlreadyDetected === 'function' && config.isDomainAlreadyDetected(reqDomain)) {
261
+ if (forceDebug) {
262
+ console.log(`[debug][curl] Skipping already detected domain: ${reqDomain}`);
263
+ }
264
+ return;
265
+ }
258
266
 
259
267
  // Check if this is a first-party request (same domain as the URL being scanned)
260
268
  const currentUrlHostname = new URL(currentUrl).hostname;
@@ -283,11 +291,11 @@ function createCurlHandler(config) {
283
291
 
284
292
  // If NO searchstring is defined, match immediately (like browser behavior)
285
293
  if (!hasSearchString && !hasSearchStringAnd) {
286
- if (!respDomain || matchesIgnoreDomain(respDomain, ignoreDomains)) {
294
+ if (!reqDomain || matchesIgnoreDomain(reqDomain, ignoreDomains)) {
287
295
  return;
288
296
  }
289
297
 
290
- addDomainToCollection(matchedDomains, addMatchedDomain, respDomain, resourceType);
298
+ addDomainToCollection(matchedDomains, addMatchedDomain, reqDomain, resourceType);
291
299
  const simplifiedUrl = getRootDomain(currentUrl);
292
300
 
293
301
  if (siteConfig.verbose === 1) {
@@ -317,11 +325,11 @@ function createCurlHandler(config) {
317
325
  const { found, matchedString, logicType } = searchContent(content, searchStrings, searchStringsAnd, '');
318
326
 
319
327
  if (found) {
320
- if (!respDomain || matchesIgnoreDomain(respDomain, ignoreDomains)) {
328
+ if (!reqDomain || matchesIgnoreDomain(reqDomain, ignoreDomains)) {
321
329
  return;
322
330
  }
323
331
 
324
- addDomainToCollection(matchedDomains, addMatchedDomain, respDomain, resourceType);
332
+ addDomainToCollection(matchedDomains, addMatchedDomain, reqDomain, resourceType);
325
333
  const simplifiedUrl = getRootDomain(currentUrl);
326
334
 
327
335
  if (siteConfig.verbose === 1) {
@@ -387,6 +395,14 @@ function createResponseHandler(config) {
387
395
  const matchesRegex = regexes.some(re => re.test(respUrl));
388
396
  if (!matchesRegex) return;
389
397
 
398
+ // Extract domain and check if already detected (skip expensive operations)
399
+ if (typeof config.isDomainAlreadyDetected === 'function' && config.isDomainAlreadyDetected(respDomain)) {
400
+ if (forceDebug) {
401
+ console.log(`[debug] Skipping response analysis for already detected domain: ${respDomain}`);
402
+ }
403
+ return;
404
+ }
405
+
390
406
  // Check if this is a first-party response (same domain as the URL being scanned)
391
407
  const currentUrlHostname = new URL(currentUrl).hostname;
392
408
  const responseHostname = new URL(respUrl).hostname;
package/nwss.js CHANGED
@@ -1,4 +1,4 @@
1
- // === Network scanner script (nwss.js) v1.0.42 ===
1
+ // === Network scanner script (nwss.js) v1.0.44 ===
2
2
 
3
3
  // puppeteer for browser automation, fs for file system operations, psl for domain parsing.
4
4
  // const pLimit = require('p-limit'); // Will be dynamically imported
@@ -27,17 +27,23 @@ const { createNetToolsHandler, createEnhancedDryRunCallback, validateWhoisAvaila
27
27
  const { loadComparisonRules, filterUniqueRules } = require('./lib/compare');
28
28
  // Colorize various text when used
29
29
  const { colorize, colors, messageColors, tags, formatLogMessage } = require('./lib/colorize');
30
+ // Domain detection cache for performance optimization
31
+ const { createGlobalHelpers, getTotalDomainsSkipped, getDetectedDomainsCount } = require('./lib/domain-cache');
30
32
  // Enhanced redirect handling
31
33
  const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/redirect');
32
34
  // Ensure web browser is working correctly
33
35
  const { monitorBrowserHealth, isBrowserHealthy } = require('./lib/browserhealth');
34
36
 
35
37
  // --- Script Configuration & Constants ---
36
- const VERSION = '1.0.42'; // Script version
38
+ const VERSION = '1.0.44'; // Script version
37
39
 
38
40
  // get startTime
39
41
  const startTime = Date.now();
40
42
 
43
+ // Initialize domain cache helpers with debug logging if enabled
44
+ const domainCacheOptions = { enableLogging: false }; // Set to true for cache debug logs
45
+ const { isDomainAlreadyDetected, markDomainAsDetected } = createGlobalHelpers(domainCacheOptions);
46
+
41
47
  // --- Command-Line Argument Parsing ---
42
48
  const args = process.argv.slice(2);
43
49
 
@@ -1475,9 +1481,12 @@ function setupFrameHandling(page, forceDebug) {
1475
1481
  /**
1476
1482
  * Helper function to add domain to matched collection
1477
1483
  * @param {string} domain - Domain to add
1484
+ * @param {string} fullSubdomain - Full subdomain for cache tracking
1478
1485
  * @param {string} resourceType - Resource type (for --adblock-rules mode)
1479
1486
  */
1480
- function addMatchedDomain(domain, resourceType = null) {
1487
+ function addMatchedDomain(domain, resourceType = null, fullSubdomain = null) {
1488
+ // Use fullSubdomain for cache tracking if provided, otherwise fall back to domain
1489
+ const cacheKey = fullSubdomain || domain;
1481
1490
  // Check if we should ignore similar domains
1482
1491
  const ignoreSimilarEnabled = siteConfig.ignore_similar !== undefined ? siteConfig.ignore_similar : ignore_similar;
1483
1492
  const similarityThreshold = siteConfig.ignore_similar_threshold || ignore_similar_threshold;
@@ -1517,6 +1526,9 @@ function setupFrameHandling(page, forceDebug) {
1517
1526
  return; // Skip adding this domain
1518
1527
  }
1519
1528
  }
1529
+
1530
+ // Mark full subdomain as detected for future reference
1531
+ markDomainAsDetected(cacheKey);
1520
1532
 
1521
1533
  if (matchedDomains instanceof Map) {
1522
1534
  if (!matchedDomains.has(domain)) {
@@ -1583,6 +1595,10 @@ function setupFrameHandling(page, forceDebug) {
1583
1595
  }
1584
1596
  }
1585
1597
  const reqUrl = request.url();
1598
+
1599
+ // ALWAYS extract the FULL subdomain for cache checking to preserve unique subdomains
1600
+ const fullSubdomain = safeGetDomain(reqUrl, true); // Always get full subdomain for cache
1601
+ const reqDomain = safeGetDomain(reqUrl, perSiteSubDomains); // Output domain based on config
1586
1602
 
1587
1603
  if (allBlockedRegexes.some(re => re.test(reqUrl))) {
1588
1604
  if (forceDebug) {
@@ -1606,7 +1622,7 @@ function setupFrameHandling(page, forceDebug) {
1606
1622
 
1607
1623
  // NEW: Check if even_blocked is enabled and this URL matches filter regex
1608
1624
  if (evenBlocked) {
1609
- const reqDomain = safeGetDomain(reqUrl, perSiteSubDomains);
1625
+ // reqDomain already defined above
1610
1626
  if (reqDomain && !matchesIgnoreDomain(reqDomain, ignoreDomains)) {
1611
1627
  for (const re of regexes) {
1612
1628
  if (re.test(reqUrl)) {
@@ -1625,7 +1641,7 @@ function setupFrameHandling(page, forceDebug) {
1625
1641
  wasBlocked: true
1626
1642
  });
1627
1643
  } else {
1628
- addMatchedDomain(reqDomain, resourceType);
1644
+ addMatchedDomain(reqDomain, resourceType, fullSubdomain);
1629
1645
  }
1630
1646
 
1631
1647
  const simplifiedUrl = getRootDomain(currentUrl);
@@ -1649,8 +1665,7 @@ function setupFrameHandling(page, forceDebug) {
1649
1665
  return;
1650
1666
  }
1651
1667
 
1652
- const reqDomain = safeGetDomain(reqUrl, perSiteSubDomains);
1653
-
1668
+
1654
1669
  if (!reqDomain) {
1655
1670
  if (forceDebug) {
1656
1671
  console.log(formatLogMessage('debug', `Skipping request with unparseable URL: ${reqUrl}`));
@@ -1659,8 +1674,8 @@ function setupFrameHandling(page, forceDebug) {
1659
1674
  return;
1660
1675
  }
1661
1676
 
1662
- // Skip matching if this domain is one of the redirect intermediaries
1663
- if (redirectDomainsToExclude && redirectDomainsToExclude.includes(reqDomain)) {
1677
+ // Skip matching if this full subdomain is one of the redirect intermediaries
1678
+ if (redirectDomainsToExclude && redirectDomainsToExclude.includes(fullSubdomain)) {
1664
1679
  if (forceDebug) {
1665
1680
  console.log(formatLogMessage('debug', `Skipping redirect intermediary domain: ${reqDomain}`));
1666
1681
  }
@@ -1699,9 +1714,9 @@ function setupFrameHandling(page, forceDebug) {
1699
1714
  }
1700
1715
 
1701
1716
  // Check ignoreDomains AFTER regex match but BEFORE domain processing
1702
- if (matchesIgnoreDomain(reqDomain, ignoreDomains)) {
1717
+ if (matchesIgnoreDomain(fullSubdomain, ignoreDomains)) {
1703
1718
  if (forceDebug) {
1704
- console.log(formatLogMessage('debug', `Ignoring domain ${reqDomain} (matches ignoreDomains pattern)`));
1719
+ console.log(formatLogMessage('debug', `Ignoring domain ${fullSubdomain} (matches ignoreDomains pattern)`));
1705
1720
  }
1706
1721
  break; // Skip this URL - domain is in ignore list
1707
1722
  }
@@ -1734,6 +1749,14 @@ function setupFrameHandling(page, forceDebug) {
1734
1749
  }
1735
1750
  } else if (hasNetTools && !hasSearchString && !hasSearchStringAnd) {
1736
1751
  // If nettools are configured (whois/dig), perform checks on the domain
1752
+ // Skip nettools check if full subdomain was already detected
1753
+ if (isDomainAlreadyDetected(fullSubdomain)) {
1754
+ if (forceDebug) {
1755
+ console.log(formatLogMessage('debug', `Skipping nettools check for already detected subdomain: ${fullSubdomain}`));
1756
+ }
1757
+ break; // Skip to next URL
1758
+ }
1759
+
1737
1760
  if (forceDebug) {
1738
1761
  console.log(formatLogMessage('debug', `${reqUrl} matched regex ${re} and resourceType ${resourceType}, queued for nettools check`));
1739
1762
  }
@@ -1767,6 +1790,7 @@ function setupFrameHandling(page, forceDebug) {
1767
1790
  dryRunCallback: dryRunMode ? createEnhancedDryRunCallback(matchedDomains, forceDebug) : null,
1768
1791
  matchedDomains,
1769
1792
  addMatchedDomain,
1793
+ isDomainAlreadyDetected,
1770
1794
  currentUrl,
1771
1795
  getRootDomain,
1772
1796
  siteConfig,
@@ -1777,10 +1801,17 @@ function setupFrameHandling(page, forceDebug) {
1777
1801
  });
1778
1802
 
1779
1803
  // Execute nettools check asynchronously
1780
- const originalDomain = (new URL(reqUrl)).hostname;
1804
+ const originalDomain = fullSubdomain; // Use full subdomain for nettools
1781
1805
  setImmediate(() => netToolsHandler(reqDomain, originalDomain));
1782
1806
  } else {
1783
1807
  // If searchstring or searchstring_and IS defined (with or without nettools), queue for content checking
1808
+ // Skip searchstring check if full subdomain was already detected
1809
+ if (isDomainAlreadyDetected(fullSubdomain)) {
1810
+ if (forceDebug) {
1811
+ console.log(formatLogMessage('debug', `Skipping searchstring check for already detected subdomain: ${fullSubdomain}`));
1812
+ }
1813
+ break; // Skip to next URL
1814
+ }
1784
1815
  if (forceDebug) {
1785
1816
  const searchType = hasSearchStringAnd ? 'searchstring_and' : 'searchstring';
1786
1817
  console.log(formatLogMessage('debug', `${reqUrl} matched regex ${re} and resourceType ${resourceType}, queued for ${searchType} content search`));
@@ -1808,6 +1839,7 @@ function setupFrameHandling(page, forceDebug) {
1808
1839
  regexes,
1809
1840
  matchedDomains,
1810
1841
  addMatchedDomain, // Pass the helper function
1842
+ isDomainAlreadyDetected,
1811
1843
  currentUrl,
1812
1844
  perSiteSubDomains,
1813
1845
  ignoreDomains,
@@ -1838,6 +1870,7 @@ function setupFrameHandling(page, forceDebug) {
1838
1870
  regexes,
1839
1871
  matchedDomains,
1840
1872
  addMatchedDomain, // Pass the helper function
1873
+ isDomainAlreadyDetected,
1841
1874
  currentUrl,
1842
1875
  perSiteSubDomains,
1843
1876
  ignoreDomains,
@@ -1876,6 +1909,7 @@ function setupFrameHandling(page, forceDebug) {
1876
1909
  regexes,
1877
1910
  matchedDomains,
1878
1911
  addMatchedDomain, // Pass the helper function
1912
+ isDomainAlreadyDetected,
1879
1913
  currentUrl,
1880
1914
  perSiteSubDomains,
1881
1915
  ignoreDomains,
@@ -2462,6 +2496,8 @@ function setupFrameHandling(page, forceDebug) {
2462
2496
  const totalMatches = results.reduce((sum, r) => sum + (r.rules ? r.rules.length : 0), 0);
2463
2497
 
2464
2498
  // Debug: Show output format being used
2499
+ const totalDomainsSkipped = getTotalDomainsSkipped();
2500
+ const detectedDomainsCount = getDetectedDomainsCount();
2465
2501
  if (forceDebug) {
2466
2502
  const globalOptions = {
2467
2503
  localhostMode,
@@ -2476,6 +2512,7 @@ function setupFrameHandling(page, forceDebug) {
2476
2512
  };
2477
2513
  console.log(formatLogMessage('debug', `Output format: ${getFormatDescription(globalOptions)}`));
2478
2514
  console.log(formatLogMessage('debug', `Generated ${outputResult.totalRules} rules from ${outputResult.successfulPageLoads} successful page loads`));
2515
+ console.log(formatLogMessage('debug', `Performance: ${totalDomainsSkipped} domains skipped (already detected), ${detectedDomainsCount} unique domains cached`));
2479
2516
  }
2480
2517
 
2481
2518
  // Compress log files if --compress-logs is enabled
@@ -2567,6 +2604,9 @@ function setupFrameHandling(page, forceDebug) {
2567
2604
  } else if (outputResult.totalRules > 0 && dryRunMode) {
2568
2605
  console.log(messageColors.success('Found') + ` ${outputResult.totalRules} total matches across all URLs`);
2569
2606
  }
2607
+ if (totalDomainsSkipped > 0) {
2608
+ console.log(messageColors.info('Performance:') + ` ${totalDomainsSkipped} domains skipped (already detected)`);
2609
+ }
2570
2610
  }
2571
2611
 
2572
2612
  // Clean process termination
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fanboynz/network-scanner",
3
- "version": "1.0.42",
3
+ "version": "1.0.44",
4
4
  "description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
5
5
  "main": "nwss.js",
6
6
  "scripts": {