@fanboynz/network-scanner 1.0.42 → 1.0.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/cloudflare.js +144 -34
- package/lib/domain-cache.js +296 -0
- package/lib/nettools.js +15 -4
- package/lib/output.js +11 -3
- package/lib/searchstring.js +21 -5
- package/nwss.js +52 -12
- package/package.json +1 -1
package/lib/cloudflare.js
CHANGED
|
@@ -9,6 +9,38 @@
|
|
|
9
9
|
*/
|
|
10
10
|
const CLOUDFLARE_MODULE_VERSION = '2.1.0';
|
|
11
11
|
|
|
12
|
+
/**
|
|
13
|
+
* Timeout constants for various operations (in milliseconds)
|
|
14
|
+
*/
|
|
15
|
+
const TIMEOUTS = {
|
|
16
|
+
QUICK_DETECTION: 3000, // Quick Cloudflare detection check
|
|
17
|
+
PAGE_EVALUATION: 8000, // Standard page evaluation timeout
|
|
18
|
+
PAGE_EVALUATION_SAFE: 10000, // Safe page evaluation with extra buffer
|
|
19
|
+
CHALLENGE_COMPLETION: 3000, // Challenge completion check
|
|
20
|
+
PHISHING_WAIT: 2000, // Wait before checking phishing warning
|
|
21
|
+
PHISHING_CLICK: 3000, // Timeout for clicking phishing continue button
|
|
22
|
+
PHISHING_NAVIGATION: 8000, // Wait for navigation after phishing bypass
|
|
23
|
+
CHALLENGE_WAIT: 1000, // Wait before checking verification challenge
|
|
24
|
+
CHALLENGE_SOLVING: 20000, // Overall challenge solving timeout
|
|
25
|
+
JS_CHALLENGE: 15000, // JS challenge completion wait
|
|
26
|
+
JS_CHALLENGE_BUFFER: 18000, // JS challenge with safety buffer
|
|
27
|
+
TURNSTILE_OPERATION: 8000, // Turnstile iframe operations
|
|
28
|
+
TURNSTILE_COMPLETION: 12000, // Turnstile completion check
|
|
29
|
+
TURNSTILE_COMPLETION_BUFFER: 15000, // Turnstile completion with buffer
|
|
30
|
+
SELECTOR_WAIT: 2000, // Wait for selector to appear
|
|
31
|
+
SELECTOR_WAIT_BUFFER: 2500, // Selector wait with safety buffer
|
|
32
|
+
ELEMENT_INTERACTION_DELAY: 500, // Delay before element interactions
|
|
33
|
+
CLICK_TIMEOUT: 5000, // Standard click operation timeout
|
|
34
|
+
CLICK_TIMEOUT_BUFFER: 1000, // Click timeout safety buffer
|
|
35
|
+
NAVIGATION_TIMEOUT: 15000, // Standard navigation timeout
|
|
36
|
+
NAVIGATION_TIMEOUT_BUFFER: 2000, // Navigation timeout safety buffer
|
|
37
|
+
FALLBACK_TIMEOUT: 5000, // Fallback timeout for failed operations
|
|
38
|
+
ADAPTIVE_TIMEOUT_WITH_INDICATORS: 25000, // Adaptive timeout when indicators found + explicit config
|
|
39
|
+
ADAPTIVE_TIMEOUT_WITHOUT_INDICATORS: 20000, // Adaptive timeout with explicit config only
|
|
40
|
+
ADAPTIVE_TIMEOUT_AUTO_WITH_INDICATORS: 15000, // Adaptive timeout for auto-detected with indicators
|
|
41
|
+
ADAPTIVE_TIMEOUT_AUTO_WITHOUT_INDICATORS: 10000 // Adaptive timeout for auto-detected without indicators
|
|
42
|
+
};
|
|
43
|
+
|
|
12
44
|
/**
|
|
13
45
|
* Gets module version information
|
|
14
46
|
* @returns {object} Version information object
|
|
@@ -29,26 +61,26 @@ async function waitForTimeout(page, timeout) {
|
|
|
29
61
|
if (typeof page.waitForTimeout === 'function') {
|
|
30
62
|
await Promise.race([
|
|
31
63
|
page.waitForTimeout(timeout),
|
|
32
|
-
new Promise((_, reject) => setTimeout(() => reject(new Error('waitForTimeout exceeded')), timeout +
|
|
64
|
+
new Promise((_, reject) => setTimeout(() => reject(new Error('waitForTimeout exceeded')), timeout + TIMEOUTS.FALLBACK_TIMEOUT))
|
|
33
65
|
]);
|
|
34
66
|
} else if (typeof page.waitFor === 'function') {
|
|
35
67
|
await Promise.race([
|
|
36
68
|
page.waitFor(timeout),
|
|
37
|
-
new Promise((_, reject) => setTimeout(() => reject(new Error('waitFor exceeded')), timeout +
|
|
69
|
+
new Promise((_, reject) => setTimeout(() => reject(new Error('waitFor exceeded')), timeout + TIMEOUTS.FALLBACK_TIMEOUT))
|
|
38
70
|
]);
|
|
39
71
|
} else {
|
|
40
72
|
await new Promise(resolve => setTimeout(resolve, timeout));
|
|
41
73
|
}
|
|
42
74
|
} catch (error) {
|
|
43
75
|
// If all else fails, use setTimeout
|
|
44
|
-
await new Promise(resolve => setTimeout(resolve, Math.min(timeout,
|
|
76
|
+
await new Promise(resolve => setTimeout(resolve, Math.min(timeout, TIMEOUTS.FALLBACK_TIMEOUT)));
|
|
45
77
|
}
|
|
46
78
|
}
|
|
47
79
|
|
|
48
80
|
/**
|
|
49
81
|
* Safe page evaluation with timeout protection
|
|
50
82
|
*/
|
|
51
|
-
async function safePageEvaluate(page, func, timeout =
|
|
83
|
+
async function safePageEvaluate(page, func, timeout = TIMEOUTS.PAGE_EVALUATION_SAFE) {
|
|
52
84
|
try {
|
|
53
85
|
return await Promise.race([
|
|
54
86
|
page.evaluate(func),
|
|
@@ -72,12 +104,12 @@ async function safePageEvaluate(page, func, timeout = 10000) {
|
|
|
72
104
|
/**
|
|
73
105
|
* Safe element clicking with timeout protection
|
|
74
106
|
*/
|
|
75
|
-
async function safeClick(page, selector, timeout =
|
|
107
|
+
async function safeClick(page, selector, timeout = TIMEOUTS.CLICK_TIMEOUT) {
|
|
76
108
|
try {
|
|
77
109
|
return await Promise.race([
|
|
78
110
|
page.click(selector, { timeout: timeout }),
|
|
79
111
|
new Promise((_, reject) =>
|
|
80
|
-
setTimeout(() => reject(new Error('Click timeout')), timeout +
|
|
112
|
+
setTimeout(() => reject(new Error('Click timeout')), timeout + TIMEOUTS.CLICK_TIMEOUT_BUFFER)
|
|
81
113
|
)
|
|
82
114
|
]);
|
|
83
115
|
} catch (error) {
|
|
@@ -88,12 +120,12 @@ async function safeClick(page, selector, timeout = 5000) {
|
|
|
88
120
|
/**
|
|
89
121
|
* Safe navigation waiting with timeout protection
|
|
90
122
|
*/
|
|
91
|
-
async function safeWaitForNavigation(page, timeout =
|
|
123
|
+
async function safeWaitForNavigation(page, timeout = TIMEOUTS.NAVIGATION_TIMEOUT) {
|
|
92
124
|
try {
|
|
93
125
|
return await Promise.race([
|
|
94
126
|
page.waitForNavigation({ waitUntil: 'domcontentloaded', timeout: timeout }),
|
|
95
127
|
new Promise((_, reject) =>
|
|
96
|
-
setTimeout(() => reject(new Error('Navigation timeout')), timeout +
|
|
128
|
+
setTimeout(() => reject(new Error('Navigation timeout')), timeout + TIMEOUTS.NAVIGATION_TIMEOUT_BUFFER)
|
|
97
129
|
)
|
|
98
130
|
]);
|
|
99
131
|
} catch (error) {
|
|
@@ -141,7 +173,7 @@ async function quickCloudflareDetection(page, forceDebug = false) {
|
|
|
141
173
|
url,
|
|
142
174
|
bodySnippet: bodyText.substring(0, 200)
|
|
143
175
|
};
|
|
144
|
-
},
|
|
176
|
+
}, TIMEOUTS.QUICK_DETECTION);
|
|
145
177
|
|
|
146
178
|
if (forceDebug && quickCheck.hasIndicators) {
|
|
147
179
|
console.log(`[debug][cloudflare] Quick detection found Cloudflare indicators on ${quickCheck.url}`);
|
|
@@ -232,7 +264,7 @@ async function analyzeCloudflareChallenge(page) {
|
|
|
232
264
|
url: window.location.href,
|
|
233
265
|
bodySnippet: bodyText.substring(0, 200)
|
|
234
266
|
};
|
|
235
|
-
},
|
|
267
|
+
}, TIMEOUTS.PAGE_EVALUATION);
|
|
236
268
|
} catch (error) {
|
|
237
269
|
return {
|
|
238
270
|
isChallengePresent: false,
|
|
@@ -247,6 +279,17 @@ async function analyzeCloudflareChallenge(page) {
|
|
|
247
279
|
|
|
248
280
|
/**
|
|
249
281
|
* Handles Cloudflare phishing warnings with timeout protection and enhanced debug logging
|
|
282
|
+
*
|
|
283
|
+
* @param {Object} page - Puppeteer page instance
|
|
284
|
+
* @param {string} currentUrl - URL being processed
|
|
285
|
+
* @param {boolean} forceDebug - Debug logging flag
|
|
286
|
+
* @returns {Promise<Object>} Phishing warning result:
|
|
287
|
+
* {
|
|
288
|
+
* success: boolean, // True if no warning found OR successfully bypassed
|
|
289
|
+
* attempted: boolean, // True if warning was detected and bypass attempted
|
|
290
|
+
* error: string|null, // Error message if bypass failed
|
|
291
|
+
* details: object|null // Analysis details from analyzeCloudflareChallenge()
|
|
292
|
+
* }
|
|
250
293
|
*/
|
|
251
294
|
async function handlePhishingWarning(page, currentUrl, forceDebug = false) {
|
|
252
295
|
const result = {
|
|
@@ -260,7 +303,7 @@ async function handlePhishingWarning(page, currentUrl, forceDebug = false) {
|
|
|
260
303
|
if (forceDebug) console.log(`[debug][cloudflare] Checking for phishing warning on ${currentUrl}`);
|
|
261
304
|
|
|
262
305
|
// Shorter wait with timeout protection
|
|
263
|
-
await waitForTimeout(page,
|
|
306
|
+
await waitForTimeout(page, TIMEOUTS.PHISHING_WAIT);
|
|
264
307
|
|
|
265
308
|
const challengeInfo = await analyzeCloudflareChallenge(page);
|
|
266
309
|
|
|
@@ -277,8 +320,8 @@ async function handlePhishingWarning(page, currentUrl, forceDebug = false) {
|
|
|
277
320
|
|
|
278
321
|
try {
|
|
279
322
|
// Use safe click with shorter timeout
|
|
280
|
-
await safeClick(page, 'a[href*="continue"]',
|
|
281
|
-
await safeWaitForNavigation(page,
|
|
323
|
+
await safeClick(page, 'a[href*="continue"]', TIMEOUTS.PHISHING_CLICK);
|
|
324
|
+
await safeWaitForNavigation(page, TIMEOUTS.PHISHING_NAVIGATION);
|
|
282
325
|
|
|
283
326
|
result.success = true;
|
|
284
327
|
if (forceDebug) console.log(`[debug][cloudflare] Successfully bypassed phishing warning for ${currentUrl}`);
|
|
@@ -300,6 +343,19 @@ async function handlePhishingWarning(page, currentUrl, forceDebug = false) {
|
|
|
300
343
|
|
|
301
344
|
/**
|
|
302
345
|
* Attempts to solve Cloudflare challenges with timeout protection and enhanced debug logging
|
|
346
|
+
*
|
|
347
|
+
* @param {Object} page - Puppeteer page instance
|
|
348
|
+
* @param {string} currentUrl - URL being processed
|
|
349
|
+
* @param {boolean} forceDebug - Debug logging flag
|
|
350
|
+
* @returns {Promise<Object>} Challenge verification result:
|
|
351
|
+
* {
|
|
352
|
+
* success: boolean, // True if no challenge found OR successfully solved
|
|
353
|
+
* attempted: boolean, // True if challenge was detected and solving attempted
|
|
354
|
+
* error: string|null, // Error message if solving failed
|
|
355
|
+
* requiresHuman: boolean, // True if CAPTCHA detected (requires manual intervention)
|
|
356
|
+
* method: string|null, // Method that succeeded: 'js_challenge_wait', 'turnstile', 'legacy_checkbox'
|
|
357
|
+
* details: object|null // Analysis details from analyzeCloudflareChallenge()
|
|
358
|
+
* }
|
|
303
359
|
*/
|
|
304
360
|
async function handleVerificationChallenge(page, currentUrl, forceDebug = false) {
|
|
305
361
|
const result = {
|
|
@@ -315,7 +371,7 @@ async function handleVerificationChallenge(page, currentUrl, forceDebug = false)
|
|
|
315
371
|
if (forceDebug) console.log(`[debug][cloudflare] Checking for verification challenge on ${currentUrl}`);
|
|
316
372
|
|
|
317
373
|
// Reduced wait time
|
|
318
|
-
await waitForTimeout(page,
|
|
374
|
+
await waitForTimeout(page, TIMEOUTS.CHALLENGE_WAIT);
|
|
319
375
|
|
|
320
376
|
const challengeInfo = await analyzeCloudflareChallenge(page);
|
|
321
377
|
result.details = challengeInfo;
|
|
@@ -381,7 +437,7 @@ async function attemptChallengeSolveWithTimeout(page, currentUrl, challengeInfo,
|
|
|
381
437
|
return await Promise.race([
|
|
382
438
|
attemptChallengeSolve(page, currentUrl, challengeInfo, forceDebug),
|
|
383
439
|
new Promise((_, reject) =>
|
|
384
|
-
setTimeout(() => reject(new Error('Challenge solving timeout')),
|
|
440
|
+
setTimeout(() => reject(new Error('Challenge solving timeout')), TIMEOUTS.CHALLENGE_SOLVING)
|
|
385
441
|
)
|
|
386
442
|
]);
|
|
387
443
|
} catch (error) {
|
|
@@ -480,10 +536,10 @@ async function waitForJSChallengeCompletion(page, forceDebug = false) {
|
|
|
480
536
|
!document.querySelector('.cf-challenge-running') &&
|
|
481
537
|
!document.querySelector('[data-cf-challenge]');
|
|
482
538
|
},
|
|
483
|
-
{ timeout:
|
|
539
|
+
{ timeout: TIMEOUTS.JS_CHALLENGE }
|
|
484
540
|
),
|
|
485
541
|
new Promise((_, reject) =>
|
|
486
|
-
setTimeout(() => reject(new Error('JS challenge timeout')),
|
|
542
|
+
setTimeout(() => reject(new Error('JS challenge timeout')), TIMEOUTS.JS_CHALLENGE_BUFFER)
|
|
487
543
|
)
|
|
488
544
|
]);
|
|
489
545
|
|
|
@@ -508,7 +564,7 @@ async function handleTurnstileChallenge(page, forceDebug = false) {
|
|
|
508
564
|
|
|
509
565
|
try {
|
|
510
566
|
// Reduced timeout for Turnstile operations
|
|
511
|
-
const turnstileTimeout =
|
|
567
|
+
const turnstileTimeout = TIMEOUTS.TURNSTILE_OPERATION;
|
|
512
568
|
|
|
513
569
|
const turnstileSelectors = [
|
|
514
570
|
'iframe[src*="challenges.cloudflare.com"]',
|
|
@@ -520,8 +576,8 @@ async function handleTurnstileChallenge(page, forceDebug = false) {
|
|
|
520
576
|
for (const selector of turnstileSelectors) {
|
|
521
577
|
try {
|
|
522
578
|
await Promise.race([
|
|
523
|
-
page.waitForSelector(selector, { timeout:
|
|
524
|
-
new Promise((_, reject) => setTimeout(() => reject(new Error('Selector timeout')),
|
|
579
|
+
page.waitForSelector(selector, { timeout: TIMEOUTS.SELECTOR_WAIT }),
|
|
580
|
+
new Promise((_, reject) => setTimeout(() => reject(new Error('Selector timeout')), TIMEOUTS.SELECTOR_WAIT_BUFFER))
|
|
525
581
|
]);
|
|
526
582
|
|
|
527
583
|
const frames = await page.frames();
|
|
@@ -554,11 +610,11 @@ async function handleTurnstileChallenge(page, forceDebug = false) {
|
|
|
554
610
|
for (const selector of checkboxSelectors) {
|
|
555
611
|
try {
|
|
556
612
|
await Promise.race([
|
|
557
|
-
turnstileFrame.waitForSelector(selector, { timeout:
|
|
558
|
-
new Promise((_, reject) => setTimeout(() => reject(new Error('Checkbox timeout')),
|
|
613
|
+
turnstileFrame.waitForSelector(selector, { timeout: TIMEOUTS.SELECTOR_WAIT }),
|
|
614
|
+
new Promise((_, reject) => setTimeout(() => reject(new Error('Checkbox timeout')), TIMEOUTS.SELECTOR_WAIT_BUFFER))
|
|
559
615
|
]);
|
|
560
616
|
|
|
561
|
-
await waitForTimeout(page,
|
|
617
|
+
await waitForTimeout(page, TIMEOUTS.ELEMENT_INTERACTION_DELAY);
|
|
562
618
|
await turnstileFrame.click(selector);
|
|
563
619
|
|
|
564
620
|
if (forceDebug) console.log(`[debug][cloudflare] Clicked Turnstile checkbox: ${selector}`);
|
|
@@ -576,9 +632,9 @@ async function handleTurnstileChallenge(page, forceDebug = false) {
|
|
|
576
632
|
const responseInput = document.querySelector('input[name="cf-turnstile-response"]');
|
|
577
633
|
return responseInput && responseInput.value && responseInput.value.length > 0;
|
|
578
634
|
},
|
|
579
|
-
{ timeout:
|
|
635
|
+
{ timeout: TIMEOUTS.TURNSTILE_COMPLETION }
|
|
580
636
|
),
|
|
581
|
-
new Promise((_, reject) => setTimeout(() => reject(new Error('Turnstile completion timeout')),
|
|
637
|
+
new Promise((_, reject) => setTimeout(() => reject(new Error('Turnstile completion timeout')), TIMEOUTS.TURNSTILE_COMPLETION_BUFFER))
|
|
582
638
|
]);
|
|
583
639
|
|
|
584
640
|
if (forceDebug) console.log(`[debug][cloudflare] Turnstile response token generated successfully`);
|
|
@@ -596,11 +652,11 @@ async function handleTurnstileChallenge(page, forceDebug = false) {
|
|
|
596
652
|
for (const selector of containerSelectors) {
|
|
597
653
|
try {
|
|
598
654
|
await Promise.race([
|
|
599
|
-
page.waitForSelector(selector, { timeout:
|
|
600
|
-
new Promise((_, reject) => setTimeout(() => reject(new Error('Container timeout')),
|
|
655
|
+
page.waitForSelector(selector, { timeout: TIMEOUTS.SELECTOR_WAIT }),
|
|
656
|
+
new Promise((_, reject) => setTimeout(() => reject(new Error('Container timeout')), TIMEOUTS.SELECTOR_WAIT_BUFFER))
|
|
601
657
|
]);
|
|
602
658
|
|
|
603
|
-
await waitForTimeout(page,
|
|
659
|
+
await waitForTimeout(page, TIMEOUTS.ELEMENT_INTERACTION_DELAY);
|
|
604
660
|
await page.click(selector);
|
|
605
661
|
|
|
606
662
|
if (forceDebug) console.log(`[debug][cloudflare] Clicked Turnstile container: ${selector}`);
|
|
@@ -652,8 +708,8 @@ async function handleLegacyCheckbox(page, forceDebug = false) {
|
|
|
652
708
|
for (const selector of legacySelectors) {
|
|
653
709
|
try {
|
|
654
710
|
await Promise.race([
|
|
655
|
-
page.waitForSelector(selector, { timeout:
|
|
656
|
-
new Promise((_, reject) => setTimeout(() => reject(new Error('Legacy selector timeout')),
|
|
711
|
+
page.waitForSelector(selector, { timeout: TIMEOUTS.SELECTOR_WAIT }),
|
|
712
|
+
new Promise((_, reject) => setTimeout(() => reject(new Error('Legacy selector timeout')), TIMEOUTS.SELECTOR_WAIT_BUFFER))
|
|
657
713
|
]);
|
|
658
714
|
|
|
659
715
|
const checkbox = await page.$(selector);
|
|
@@ -705,7 +761,7 @@ async function checkChallengeCompletion(page) {
|
|
|
705
761
|
return (noChallengeRunning && noChallengeContainer && noChallengePage) ||
|
|
706
762
|
hasClearanceCookie ||
|
|
707
763
|
hasTurnstileResponse;
|
|
708
|
-
},
|
|
764
|
+
}, TIMEOUTS.CHALLENGE_COMPLETION);
|
|
709
765
|
|
|
710
766
|
return { isCompleted };
|
|
711
767
|
} catch (error) {
|
|
@@ -715,6 +771,43 @@ async function checkChallengeCompletion(page) {
|
|
|
715
771
|
|
|
716
772
|
/**
|
|
717
773
|
* Main function to handle all Cloudflare challenges with smart detection and adaptive timeouts
|
|
774
|
+
*
|
|
775
|
+
* @param {Object} page - Puppeteer page instance
|
|
776
|
+
* @param {string} currentUrl - URL being processed
|
|
777
|
+
* @param {Object} siteConfig - Configuration object with cloudflare_phish and cloudflare_bypass flags
|
|
778
|
+
* @param {boolean} forceDebug - Enable debug logging
|
|
779
|
+
*
|
|
780
|
+
* @returns {Promise<Object>} Result object with the following structure:
|
|
781
|
+
* {
|
|
782
|
+
* phishingWarning: {
|
|
783
|
+
* attempted: boolean, // Whether phishing bypass was attempted
|
|
784
|
+
* success: boolean, // Whether bypass succeeded (true if no warning or successfully bypassed)
|
|
785
|
+
* error: string|null, // Error message if bypass failed
|
|
786
|
+
* details: object|null // Challenge analysis details from analyzeCloudflareChallenge()
|
|
787
|
+
* },
|
|
788
|
+
* verificationChallenge: {
|
|
789
|
+
* attempted: boolean, // Whether challenge bypass was attempted
|
|
790
|
+
* success: boolean, // Whether challenge was solved (true if no challenge or successfully solved)
|
|
791
|
+
* error: string|null, // Error message if solving failed
|
|
792
|
+
* requiresHuman: boolean, // True if CAPTCHA detected - requires manual intervention
|
|
793
|
+
* method: string|null, // Successful method used: 'js_challenge_wait', 'turnstile', 'legacy_checkbox'
|
|
794
|
+
* details: object|null // Challenge analysis details from analyzeCloudflareChallenge()
|
|
795
|
+
* },
|
|
796
|
+
* overallSuccess: boolean, // True if no critical failures occurred (challenges may be unsolved but didn't error)
|
|
797
|
+
* errors: string[], // Array of error messages from failed operations
|
|
798
|
+
* skippedNoIndicators: boolean, // True if processing was skipped due to no Cloudflare indicators detected
|
|
799
|
+
* timedOut: boolean // True if adaptive timeout was reached (processing continued anyway)
|
|
800
|
+
* }
|
|
801
|
+
*
|
|
802
|
+
* @example
|
|
803
|
+
* const result = await handleCloudflareProtection(page, url, {cloudflare_bypass: true}, false);
|
|
804
|
+
* if (result.verificationChallenge.requiresHuman) {
|
|
805
|
+
* console.log('Manual CAPTCHA solving required');
|
|
806
|
+
* } else if (!result.overallSuccess) {
|
|
807
|
+
* console.error('Critical errors:', result.errors);
|
|
808
|
+
* } else if (result.verificationChallenge.attempted && result.verificationChallenge.success) {
|
|
809
|
+
* console.log(`Challenge solved using: ${result.verificationChallenge.method}`);
|
|
810
|
+
* }
|
|
718
811
|
*/
|
|
719
812
|
async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDebug = false) {
|
|
720
813
|
if (forceDebug) {
|
|
@@ -722,7 +815,10 @@ async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDeb
|
|
|
722
815
|
}
|
|
723
816
|
// Quick detection first - exit early if no Cloudflare detected and no explicit config
|
|
724
817
|
const quickDetection = await quickCloudflareDetection(page, forceDebug);
|
|
725
|
-
|
|
818
|
+
|
|
819
|
+
// Early return structure when no Cloudflare indicators found
|
|
820
|
+
// Sets attempted: false, success: true for both protection types
|
|
821
|
+
|
|
726
822
|
// Only proceed if we have indicators OR explicit config enables Cloudflare handling
|
|
727
823
|
if (!quickDetection.hasIndicators && !siteConfig.cloudflare_phish && !siteConfig.cloudflare_bypass) {
|
|
728
824
|
if (forceDebug) console.log(`[debug][cloudflare] No Cloudflare indicators found and no explicit config, skipping protection handling for ${currentUrl}`);
|
|
@@ -736,6 +832,9 @@ async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDeb
|
|
|
736
832
|
};
|
|
737
833
|
}
|
|
738
834
|
|
|
835
|
+
// Standard return structure for all processing paths
|
|
836
|
+
// Individual handlers update their respective sections
|
|
837
|
+
// overallSuccess becomes false if any critical errors occur
|
|
739
838
|
const result = {
|
|
740
839
|
phishingWarning: { attempted: false, success: false },
|
|
741
840
|
verificationChallenge: { attempted: false, success: false },
|
|
@@ -748,10 +847,10 @@ async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDeb
|
|
|
748
847
|
let adaptiveTimeout;
|
|
749
848
|
if (siteConfig.cloudflare_phish || siteConfig.cloudflare_bypass) {
|
|
750
849
|
// Explicit config - give more time
|
|
751
|
-
adaptiveTimeout = quickDetection.hasIndicators ?
|
|
850
|
+
adaptiveTimeout = quickDetection.hasIndicators ? TIMEOUTS.ADAPTIVE_TIMEOUT_WITH_INDICATORS : TIMEOUTS.ADAPTIVE_TIMEOUT_WITHOUT_INDICATORS;
|
|
752
851
|
} else {
|
|
753
852
|
// Auto-detected only - shorter timeout
|
|
754
|
-
adaptiveTimeout = quickDetection.hasIndicators ?
|
|
853
|
+
adaptiveTimeout = quickDetection.hasIndicators ? TIMEOUTS.ADAPTIVE_TIMEOUT_AUTO_WITH_INDICATORS : TIMEOUTS.ADAPTIVE_TIMEOUT_AUTO_WITHOUT_INDICATORS;
|
|
755
854
|
}
|
|
756
855
|
|
|
757
856
|
if (forceDebug) {
|
|
@@ -783,6 +882,12 @@ async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDeb
|
|
|
783
882
|
|
|
784
883
|
/**
|
|
785
884
|
* Performs the actual Cloudflare handling with enhanced debug logging
|
|
885
|
+
*
|
|
886
|
+
* @param {Object} page - Puppeteer page instance
|
|
887
|
+
* @param {string} currentUrl - URL being processed
|
|
888
|
+
* @param {Object} siteConfig - Configuration flags
|
|
889
|
+
* @param {boolean} forceDebug - Debug logging flag
|
|
890
|
+
* @returns {Promise<Object>} Same structure as handleCloudflareProtection()
|
|
786
891
|
*/
|
|
787
892
|
async function performCloudflareHandling(page, currentUrl, siteConfig, forceDebug = false) {
|
|
788
893
|
const result = {
|
|
@@ -794,6 +899,8 @@ async function performCloudflareHandling(page, currentUrl, siteConfig, forceDebu
|
|
|
794
899
|
|
|
795
900
|
if (forceDebug) console.log(`[debug][cloudflare] Starting Cloudflare protection handling for ${currentUrl}`);
|
|
796
901
|
|
|
902
|
+
// Handle phishing warnings first - updates result.phishingWarning
|
|
903
|
+
// Only runs if siteConfig.cloudflare_phish === true
|
|
797
904
|
// Handle phishing warnings if enabled
|
|
798
905
|
if (siteConfig.cloudflare_phish === true) {
|
|
799
906
|
if (forceDebug) console.log(`[debug][cloudflare] Phishing warning bypass enabled for ${currentUrl}`);
|
|
@@ -812,6 +919,9 @@ async function performCloudflareHandling(page, currentUrl, siteConfig, forceDebu
|
|
|
812
919
|
console.log(`[debug][cloudflare] Phishing warning bypass disabled for ${currentUrl}`);
|
|
813
920
|
}
|
|
814
921
|
|
|
922
|
+
// Handle verification challenges second - updates result.verificationChallenge
|
|
923
|
+
// Only runs if siteConfig.cloudflare_bypass === true
|
|
924
|
+
// Sets requiresHuman: true if CAPTCHA detected (no bypass attempted)
|
|
815
925
|
// Handle verification challenges if enabled
|
|
816
926
|
if (siteConfig.cloudflare_bypass === true) {
|
|
817
927
|
if (forceDebug) console.log(`[debug][cloudflare] Challenge bypass enabled for ${currentUrl}`);
|
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Domain Cache Module - Tracks detected domains to prevent duplicate processing
|
|
3
|
+
* Provides performance optimization by skipping already detected domains
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
const { formatLogMessage } = require('./colorize');
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Domain detection cache class for tracking processed domains
|
|
10
|
+
*/
|
|
11
|
+
class DomainCache {
|
|
12
|
+
constructor(options = {}) {
|
|
13
|
+
this.cache = new Set();
|
|
14
|
+
this.stats = {
|
|
15
|
+
totalDetected: 0,
|
|
16
|
+
totalSkipped: 0,
|
|
17
|
+
cacheHits: 0,
|
|
18
|
+
cacheMisses: 0
|
|
19
|
+
};
|
|
20
|
+
this.options = {
|
|
21
|
+
enableLogging: options.enableLogging || false,
|
|
22
|
+
logPrefix: options.logPrefix || '[domain-cache]',
|
|
23
|
+
maxCacheSize: options.maxCacheSize || 10000 // Prevent memory leaks
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Check if a domain was already detected in a previous scan
|
|
29
|
+
* @param {string} domain - Domain to check
|
|
30
|
+
* @returns {boolean} True if domain was already detected
|
|
31
|
+
*/
|
|
32
|
+
isDomainAlreadyDetected(domain) {
|
|
33
|
+
if (!domain || typeof domain !== 'string') {
|
|
34
|
+
return false;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const isDetected = this.cache.has(domain);
|
|
38
|
+
|
|
39
|
+
if (isDetected) {
|
|
40
|
+
this.stats.totalSkipped++;
|
|
41
|
+
this.stats.cacheHits++;
|
|
42
|
+
|
|
43
|
+
if (this.options.enableLogging) {
|
|
44
|
+
console.log(formatLogMessage('debug', `${this.options.logPrefix} Cache HIT: ${domain} (skipped)`));
|
|
45
|
+
}
|
|
46
|
+
} else {
|
|
47
|
+
this.stats.cacheMisses++;
|
|
48
|
+
|
|
49
|
+
if (this.options.enableLogging) {
|
|
50
|
+
console.log(formatLogMessage('debug', `${this.options.logPrefix} Cache MISS: ${domain} (processing)`));
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
return isDetected;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Mark a domain as detected for future reference
|
|
59
|
+
* @param {string} domain - Domain to mark as detected
|
|
60
|
+
*/
|
|
61
|
+
markDomainAsDetected(domain) {
|
|
62
|
+
if (!domain || typeof domain !== 'string') {
|
|
63
|
+
return false;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Prevent cache from growing too large
|
|
67
|
+
if (this.cache.size >= this.options.maxCacheSize) {
|
|
68
|
+
this.clearOldestEntries(Math.floor(this.options.maxCacheSize * 0.1)); // Remove 10% of entries
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const wasNew = !this.cache.has(domain);
|
|
72
|
+
this.cache.add(domain);
|
|
73
|
+
|
|
74
|
+
if (wasNew) {
|
|
75
|
+
this.stats.totalDetected++;
|
|
76
|
+
|
|
77
|
+
if (this.options.enableLogging) {
|
|
78
|
+
console.log(formatLogMessage('debug', `${this.options.logPrefix} Marked as detected: ${domain} (cache size: ${this.cache.size})`));
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
return wasNew;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Clear oldest entries from cache (basic LRU simulation)
|
|
87
|
+
* Note: Set doesn't maintain insertion order in all Node.js versions,
|
|
88
|
+
* so this is a simple implementation that clears a portion of the cache
|
|
89
|
+
* @param {number} count - Number of entries to remove
|
|
90
|
+
*/
|
|
91
|
+
clearOldestEntries(count) {
|
|
92
|
+
if (count <= 0) return;
|
|
93
|
+
|
|
94
|
+
const entries = Array.from(this.cache);
|
|
95
|
+
const toRemove = entries.slice(0, count);
|
|
96
|
+
|
|
97
|
+
toRemove.forEach(domain => this.cache.delete(domain));
|
|
98
|
+
|
|
99
|
+
if (this.options.enableLogging) {
|
|
100
|
+
console.log(formatLogMessage('debug', `${this.options.logPrefix} Cleared ${toRemove.length} old entries, cache size now: ${this.cache.size}`));
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Get cache statistics
|
|
106
|
+
* @returns {object} Cache statistics
|
|
107
|
+
*/
|
|
108
|
+
getStats() {
|
|
109
|
+
return {
|
|
110
|
+
...this.stats,
|
|
111
|
+
cacheSize: this.cache.size,
|
|
112
|
+
hitRate: this.stats.cacheHits > 0 ?
|
|
113
|
+
(this.stats.cacheHits / (this.stats.cacheHits + this.stats.cacheMisses) * 100).toFixed(2) + '%' :
|
|
114
|
+
'0%'
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Clear all cached domains
|
|
120
|
+
*/
|
|
121
|
+
clear() {
|
|
122
|
+
const previousSize = this.cache.size;
|
|
123
|
+
this.cache.clear();
|
|
124
|
+
this.stats = {
|
|
125
|
+
totalDetected: 0,
|
|
126
|
+
totalSkipped: 0,
|
|
127
|
+
cacheHits: 0,
|
|
128
|
+
cacheMisses: 0
|
|
129
|
+
};
|
|
130
|
+
|
|
131
|
+
if (this.options.enableLogging) {
|
|
132
|
+
console.log(formatLogMessage('debug', `${this.options.logPrefix} Cache cleared (${previousSize} entries removed)`));
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Get all cached domains (for debugging)
|
|
138
|
+
* @returns {Array<string>} Array of cached domains
|
|
139
|
+
*/
|
|
140
|
+
getAllCachedDomains() {
|
|
141
|
+
return Array.from(this.cache);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Check if cache contains a specific domain (without updating stats)
|
|
146
|
+
* @param {string} domain - Domain to check
|
|
147
|
+
* @returns {boolean} True if domain exists in cache
|
|
148
|
+
*/
|
|
149
|
+
has(domain) {
|
|
150
|
+
return this.cache.has(domain);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Remove a specific domain from cache
|
|
155
|
+
* @param {string} domain - Domain to remove
|
|
156
|
+
* @returns {boolean} True if domain was removed, false if it wasn't in cache
|
|
157
|
+
*/
|
|
158
|
+
removeDomain(domain) {
|
|
159
|
+
const wasRemoved = this.cache.delete(domain);
|
|
160
|
+
|
|
161
|
+
if (wasRemoved && this.options.enableLogging) {
|
|
162
|
+
console.log(formatLogMessage('debug', `${this.options.logPrefix} Removed from cache: ${domain}`));
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
return wasRemoved;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
/**
|
|
169
|
+
* Add multiple domains to cache at once
|
|
170
|
+
* @param {Array<string>} domains - Array of domains to add
|
|
171
|
+
* @returns {number} Number of domains actually added (excludes duplicates)
|
|
172
|
+
*/
|
|
173
|
+
markMultipleDomainsAsDetected(domains) {
|
|
174
|
+
if (!Array.isArray(domains)) {
|
|
175
|
+
return 0;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
let addedCount = 0;
|
|
179
|
+
domains.forEach(domain => {
|
|
180
|
+
if (this.markDomainAsDetected(domain)) {
|
|
181
|
+
addedCount++;
|
|
182
|
+
}
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
return addedCount;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Create bound helper functions for easy integration with existing code
|
|
190
|
+
* @returns {object} Object with bound helper functions
|
|
191
|
+
*/
|
|
192
|
+
createHelpers() {
|
|
193
|
+
return {
|
|
194
|
+
isDomainAlreadyDetected: this.isDomainAlreadyDetected.bind(this),
|
|
195
|
+
markDomainAsDetected: this.markDomainAsDetected.bind(this),
|
|
196
|
+
getSkippedCount: () => this.stats.totalSkipped,
|
|
197
|
+
getCacheSize: () => this.cache.size,
|
|
198
|
+
getStats: this.getStats.bind(this)
|
|
199
|
+
};
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Create a global domain cache instance (singleton pattern)
|
|
205
|
+
*/
|
|
206
|
+
let globalDomainCache = null;
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* Get or create the global domain cache instance
|
|
210
|
+
* @param {object} options - Cache options
|
|
211
|
+
* @returns {DomainCache} Global cache instance
|
|
212
|
+
*/
|
|
213
|
+
function getGlobalDomainCache(options = {}) {
|
|
214
|
+
if (!globalDomainCache) {
|
|
215
|
+
globalDomainCache = new DomainCache(options);
|
|
216
|
+
}
|
|
217
|
+
return globalDomainCache;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
/**
|
|
221
|
+
* Create helper functions that use the global cache
|
|
222
|
+
* @param {object} options - Cache options (only used if global cache doesn't exist)
|
|
223
|
+
* @returns {object} Helper functions bound to global cache
|
|
224
|
+
*/
|
|
225
|
+
function createGlobalHelpers(options = {}) {
|
|
226
|
+
const cache = getGlobalDomainCache(options);
|
|
227
|
+
return cache.createHelpers();
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
/**
|
|
231
|
+
* Reset the global cache (useful for testing or manual resets)
|
|
232
|
+
*/
|
|
233
|
+
function resetGlobalCache() {
|
|
234
|
+
if (globalDomainCache) {
|
|
235
|
+
globalDomainCache.clear();
|
|
236
|
+
}
|
|
237
|
+
globalDomainCache = null;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
/**
|
|
241
|
+
* Legacy wrapper functions for backward compatibility
|
|
242
|
+
* These match the original function signatures from nwss.js
|
|
243
|
+
*/
|
|
244
|
+
|
|
245
|
+
/**
|
|
246
|
+
* Check if a domain was already detected (legacy wrapper)
|
|
247
|
+
* @param {string} domain - Domain to check
|
|
248
|
+
* @returns {boolean} True if domain was already detected
|
|
249
|
+
*/
|
|
250
|
+
function isDomainAlreadyDetected(domain) {
|
|
251
|
+
const cache = getGlobalDomainCache();
|
|
252
|
+
return cache.isDomainAlreadyDetected(domain);
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
/**
|
|
256
|
+
* Mark a domain as detected (legacy wrapper)
|
|
257
|
+
* @param {string} domain - Domain to mark as detected
|
|
258
|
+
*/
|
|
259
|
+
function markDomainAsDetected(domain) {
|
|
260
|
+
const cache = getGlobalDomainCache();
|
|
261
|
+
cache.markDomainAsDetected(domain);
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
/**
|
|
265
|
+
* Get total domains skipped (legacy wrapper)
|
|
266
|
+
* @returns {number} Number of domains skipped
|
|
267
|
+
*/
|
|
268
|
+
function getTotalDomainsSkipped() {
|
|
269
|
+
const cache = getGlobalDomainCache();
|
|
270
|
+
return cache.stats.totalSkipped;
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
/**
|
|
274
|
+
* Get detected domains cache size (legacy wrapper)
|
|
275
|
+
* @returns {number} Size of the detected domains cache
|
|
276
|
+
*/
|
|
277
|
+
function getDetectedDomainsCount() {
|
|
278
|
+
const cache = getGlobalDomainCache();
|
|
279
|
+
return cache.cache.size;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
module.exports = {
|
|
283
|
+
// Main class
|
|
284
|
+
DomainCache,
|
|
285
|
+
|
|
286
|
+
// Global cache functions
|
|
287
|
+
getGlobalDomainCache,
|
|
288
|
+
createGlobalHelpers,
|
|
289
|
+
resetGlobalCache,
|
|
290
|
+
|
|
291
|
+
// Legacy wrapper functions for backward compatibility
|
|
292
|
+
isDomainAlreadyDetected,
|
|
293
|
+
markDomainAsDetected,
|
|
294
|
+
getTotalDomainsSkipped,
|
|
295
|
+
getDetectedDomainsCount
|
|
296
|
+
};
|
package/lib/nettools.js
CHANGED
|
@@ -711,7 +711,7 @@ function createNetToolsHandler(config) {
|
|
|
711
711
|
dryRunCallback = null,
|
|
712
712
|
matchedDomains,
|
|
713
713
|
addMatchedDomain,
|
|
714
|
-
|
|
714
|
+
isDomainAlreadyDetected,
|
|
715
715
|
getRootDomain,
|
|
716
716
|
siteConfig,
|
|
717
717
|
dumpUrls,
|
|
@@ -743,8 +743,19 @@ function createNetToolsHandler(config) {
|
|
|
743
743
|
const DIG_CACHE_TTL = 300000; // 5 minutes cache TTL
|
|
744
744
|
const DIG_MAX_CACHE_SIZE = 400; // Smaller cache for dig due to shorter TTL
|
|
745
745
|
|
|
746
|
-
return async function handleNetToolsCheck(domain,
|
|
746
|
+
return async function handleNetToolsCheck(domain, fullSubdomain) {
|
|
747
|
+
// Use fullSubdomain parameter instead of originalDomain to maintain consistency
|
|
748
|
+
// with the domain cache fix approach
|
|
749
|
+
const originalDomain = fullSubdomain;
|
|
747
750
|
// Helper function to log to BOTH console and debug file
|
|
751
|
+
|
|
752
|
+
// Check if domain was already detected (skip expensive operations)
|
|
753
|
+
if (typeof isDomainAlreadyDetected === 'function' && isDomainAlreadyDetected(fullSubdomain)) {
|
|
754
|
+
if (forceDebug) {
|
|
755
|
+
logToConsoleAndFile(`${messageColors.highlight('[nettools]')} Skipping already detected subdomain: ${fullSubdomain} (output domain: ${domain})`);
|
|
756
|
+
}
|
|
757
|
+
return;
|
|
758
|
+
}
|
|
748
759
|
|
|
749
760
|
// NOTE: The logToConsoleAndFile function needs to be declared INSIDE this function
|
|
750
761
|
// so it has access to the closure variables (forceDebug, debugLogFile, fs) from the
|
|
@@ -1267,13 +1278,13 @@ function createNetToolsHandler(config) {
|
|
|
1267
1278
|
// No need to add to matched domains
|
|
1268
1279
|
} else {
|
|
1269
1280
|
if (typeof addMatchedDomain === 'function') {
|
|
1270
|
-
addMatchedDomain(domain);
|
|
1281
|
+
addMatchedDomain(domain, null, fullSubdomain);
|
|
1271
1282
|
} else {
|
|
1272
1283
|
matchedDomains.add(domain);
|
|
1273
1284
|
}
|
|
1274
1285
|
}
|
|
1275
1286
|
|
|
1276
|
-
const simplifiedUrl = currentUrl ? getRootDomain(currentUrl) : 'unknown';
|
|
1287
|
+
const simplifiedUrl = config.currentUrl ? getRootDomain(config.currentUrl) : 'unknown';
|
|
1277
1288
|
|
|
1278
1289
|
if (siteConfig.verbose === 1) {
|
|
1279
1290
|
const matchType = [];
|
package/lib/output.js
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
const fs = require('fs');
|
|
2
2
|
const path = require('path');
|
|
3
|
+
// Import domain cache functions for statistics
|
|
4
|
+
const { getTotalDomainsSkipped } = require('./domain-cache');
|
|
3
5
|
const { loadComparisonRules, filterUniqueRules } = require('./compare');
|
|
4
6
|
const { colorize, colors, messageColors, tags, formatLogMessage } = require('./colorize');
|
|
5
7
|
|
|
@@ -426,7 +428,6 @@ function writeOutput(lines, outputFile = null, silentMode = false) {
|
|
|
426
428
|
* Main output handler that combines all output operations
|
|
427
429
|
* @param {Array} results - Processing results from scanner
|
|
428
430
|
* @param {object} config - Output configuration
|
|
429
|
-
* @param {string[]} config.ignoreDomains - Domains to filter out from final output
|
|
430
431
|
* @returns {object} Output statistics and file paths
|
|
431
432
|
*/
|
|
432
433
|
function handleOutput(results, config = {}) {
|
|
@@ -440,7 +441,8 @@ function handleOutput(results, config = {}) {
|
|
|
440
441
|
dumpUrls = false,
|
|
441
442
|
adblockRulesLogFile = null,
|
|
442
443
|
forceDebug = false,
|
|
443
|
-
ignoreDomains = []
|
|
444
|
+
ignoreDomains = [],
|
|
445
|
+
totalDomainsSkipped = null // Allow override or get from cache
|
|
444
446
|
} = config;
|
|
445
447
|
|
|
446
448
|
// Handle append mode
|
|
@@ -572,7 +574,11 @@ function handleOutput(results, config = {}) {
|
|
|
572
574
|
if (dumpUrls && adblockRulesLogFile) {
|
|
573
575
|
logSuccess = writeOutput(outputLinesWithTitles, adblockRulesLogFile, silentMode);
|
|
574
576
|
}
|
|
575
|
-
|
|
577
|
+
|
|
578
|
+
// Get domain skip statistics from cache if not provided
|
|
579
|
+
const finalTotalDomainsSkipped = totalDomainsSkipped !== null ?
|
|
580
|
+
totalDomainsSkipped : getTotalDomainsSkipped();
|
|
581
|
+
|
|
576
582
|
return {
|
|
577
583
|
success: mainSuccess && logSuccess,
|
|
578
584
|
outputFile,
|
|
@@ -582,6 +588,8 @@ function handleOutput(results, config = {}) {
|
|
|
582
588
|
filteredOutCount,
|
|
583
589
|
totalLines: filteredOutputLines.length,
|
|
584
590
|
outputLines: outputFile ? null : filteredOutputLines // Only return lines if not written to file
|
|
591
|
+
// Note: totalDomainsSkipped statistic is now available via getTotalDomainsSkipped()
|
|
592
|
+
// and doesn't need to be passed through the output handler
|
|
585
593
|
};
|
|
586
594
|
}
|
|
587
595
|
|
package/lib/searchstring.js
CHANGED
|
@@ -250,11 +250,19 @@ function createCurlHandler(config) {
|
|
|
250
250
|
} = config;
|
|
251
251
|
|
|
252
252
|
return async function curlHandler(requestUrl) {
|
|
253
|
-
const respDomain = perSiteSubDomains ? (new URL(requestUrl)).hostname : getRootDomain(requestUrl);
|
|
254
253
|
|
|
255
254
|
// Only process URLs that match our regex patterns
|
|
256
255
|
const matchesRegex = regexes.some(re => re.test(requestUrl));
|
|
257
256
|
if (!matchesRegex) return;
|
|
257
|
+
|
|
258
|
+
// Extract domain and check if already detected (skip expensive operations)
|
|
259
|
+
const reqDomain = perSiteSubDomains ? (new URL(requestUrl)).hostname : getRootDomain(requestUrl);
|
|
260
|
+
if (typeof config.isDomainAlreadyDetected === 'function' && config.isDomainAlreadyDetected(reqDomain)) {
|
|
261
|
+
if (forceDebug) {
|
|
262
|
+
console.log(`[debug][curl] Skipping already detected domain: ${reqDomain}`);
|
|
263
|
+
}
|
|
264
|
+
return;
|
|
265
|
+
}
|
|
258
266
|
|
|
259
267
|
// Check if this is a first-party request (same domain as the URL being scanned)
|
|
260
268
|
const currentUrlHostname = new URL(currentUrl).hostname;
|
|
@@ -283,11 +291,11 @@ function createCurlHandler(config) {
|
|
|
283
291
|
|
|
284
292
|
// If NO searchstring is defined, match immediately (like browser behavior)
|
|
285
293
|
if (!hasSearchString && !hasSearchStringAnd) {
|
|
286
|
-
if (!
|
|
294
|
+
if (!reqDomain || matchesIgnoreDomain(reqDomain, ignoreDomains)) {
|
|
287
295
|
return;
|
|
288
296
|
}
|
|
289
297
|
|
|
290
|
-
addDomainToCollection(matchedDomains, addMatchedDomain,
|
|
298
|
+
addDomainToCollection(matchedDomains, addMatchedDomain, reqDomain, resourceType);
|
|
291
299
|
const simplifiedUrl = getRootDomain(currentUrl);
|
|
292
300
|
|
|
293
301
|
if (siteConfig.verbose === 1) {
|
|
@@ -317,11 +325,11 @@ function createCurlHandler(config) {
|
|
|
317
325
|
const { found, matchedString, logicType } = searchContent(content, searchStrings, searchStringsAnd, '');
|
|
318
326
|
|
|
319
327
|
if (found) {
|
|
320
|
-
if (!
|
|
328
|
+
if (!reqDomain || matchesIgnoreDomain(reqDomain, ignoreDomains)) {
|
|
321
329
|
return;
|
|
322
330
|
}
|
|
323
331
|
|
|
324
|
-
addDomainToCollection(matchedDomains, addMatchedDomain,
|
|
332
|
+
addDomainToCollection(matchedDomains, addMatchedDomain, reqDomain, resourceType);
|
|
325
333
|
const simplifiedUrl = getRootDomain(currentUrl);
|
|
326
334
|
|
|
327
335
|
if (siteConfig.verbose === 1) {
|
|
@@ -387,6 +395,14 @@ function createResponseHandler(config) {
|
|
|
387
395
|
const matchesRegex = regexes.some(re => re.test(respUrl));
|
|
388
396
|
if (!matchesRegex) return;
|
|
389
397
|
|
|
398
|
+
// Extract domain and check if already detected (skip expensive operations)
|
|
399
|
+
if (typeof config.isDomainAlreadyDetected === 'function' && config.isDomainAlreadyDetected(respDomain)) {
|
|
400
|
+
if (forceDebug) {
|
|
401
|
+
console.log(`[debug] Skipping response analysis for already detected domain: ${respDomain}`);
|
|
402
|
+
}
|
|
403
|
+
return;
|
|
404
|
+
}
|
|
405
|
+
|
|
390
406
|
// Check if this is a first-party response (same domain as the URL being scanned)
|
|
391
407
|
const currentUrlHostname = new URL(currentUrl).hostname;
|
|
392
408
|
const responseHostname = new URL(respUrl).hostname;
|
package/nwss.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// === Network scanner script (nwss.js) v1.0.
|
|
1
|
+
// === Network scanner script (nwss.js) v1.0.44 ===
|
|
2
2
|
|
|
3
3
|
// puppeteer for browser automation, fs for file system operations, psl for domain parsing.
|
|
4
4
|
// const pLimit = require('p-limit'); // Will be dynamically imported
|
|
@@ -27,17 +27,23 @@ const { createNetToolsHandler, createEnhancedDryRunCallback, validateWhoisAvaila
|
|
|
27
27
|
const { loadComparisonRules, filterUniqueRules } = require('./lib/compare');
|
|
28
28
|
// Colorize various text when used
|
|
29
29
|
const { colorize, colors, messageColors, tags, formatLogMessage } = require('./lib/colorize');
|
|
30
|
+
// Domain detection cache for performance optimization
|
|
31
|
+
const { createGlobalHelpers, getTotalDomainsSkipped, getDetectedDomainsCount } = require('./lib/domain-cache');
|
|
30
32
|
// Enhanced redirect handling
|
|
31
33
|
const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/redirect');
|
|
32
34
|
// Ensure web browser is working correctly
|
|
33
35
|
const { monitorBrowserHealth, isBrowserHealthy } = require('./lib/browserhealth');
|
|
34
36
|
|
|
35
37
|
// --- Script Configuration & Constants ---
|
|
36
|
-
const VERSION = '1.0.
|
|
38
|
+
const VERSION = '1.0.44'; // Script version
|
|
37
39
|
|
|
38
40
|
// get startTime
|
|
39
41
|
const startTime = Date.now();
|
|
40
42
|
|
|
43
|
+
// Initialize domain cache helpers with debug logging if enabled
|
|
44
|
+
const domainCacheOptions = { enableLogging: false }; // Set to true for cache debug logs
|
|
45
|
+
const { isDomainAlreadyDetected, markDomainAsDetected } = createGlobalHelpers(domainCacheOptions);
|
|
46
|
+
|
|
41
47
|
// --- Command-Line Argument Parsing ---
|
|
42
48
|
const args = process.argv.slice(2);
|
|
43
49
|
|
|
@@ -1475,9 +1481,12 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1475
1481
|
/**
|
|
1476
1482
|
* Helper function to add domain to matched collection
|
|
1477
1483
|
* @param {string} domain - Domain to add
|
|
1484
|
+
* @param {string} fullSubdomain - Full subdomain for cache tracking
|
|
1478
1485
|
* @param {string} resourceType - Resource type (for --adblock-rules mode)
|
|
1479
1486
|
*/
|
|
1480
|
-
function addMatchedDomain(domain, resourceType = null) {
|
|
1487
|
+
function addMatchedDomain(domain, resourceType = null, fullSubdomain = null) {
|
|
1488
|
+
// Use fullSubdomain for cache tracking if provided, otherwise fall back to domain
|
|
1489
|
+
const cacheKey = fullSubdomain || domain;
|
|
1481
1490
|
// Check if we should ignore similar domains
|
|
1482
1491
|
const ignoreSimilarEnabled = siteConfig.ignore_similar !== undefined ? siteConfig.ignore_similar : ignore_similar;
|
|
1483
1492
|
const similarityThreshold = siteConfig.ignore_similar_threshold || ignore_similar_threshold;
|
|
@@ -1517,6 +1526,9 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1517
1526
|
return; // Skip adding this domain
|
|
1518
1527
|
}
|
|
1519
1528
|
}
|
|
1529
|
+
|
|
1530
|
+
// Mark full subdomain as detected for future reference
|
|
1531
|
+
markDomainAsDetected(cacheKey);
|
|
1520
1532
|
|
|
1521
1533
|
if (matchedDomains instanceof Map) {
|
|
1522
1534
|
if (!matchedDomains.has(domain)) {
|
|
@@ -1583,6 +1595,10 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1583
1595
|
}
|
|
1584
1596
|
}
|
|
1585
1597
|
const reqUrl = request.url();
|
|
1598
|
+
|
|
1599
|
+
// ALWAYS extract the FULL subdomain for cache checking to preserve unique subdomains
|
|
1600
|
+
const fullSubdomain = safeGetDomain(reqUrl, true); // Always get full subdomain for cache
|
|
1601
|
+
const reqDomain = safeGetDomain(reqUrl, perSiteSubDomains); // Output domain based on config
|
|
1586
1602
|
|
|
1587
1603
|
if (allBlockedRegexes.some(re => re.test(reqUrl))) {
|
|
1588
1604
|
if (forceDebug) {
|
|
@@ -1606,7 +1622,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1606
1622
|
|
|
1607
1623
|
// NEW: Check if even_blocked is enabled and this URL matches filter regex
|
|
1608
1624
|
if (evenBlocked) {
|
|
1609
|
-
|
|
1625
|
+
// reqDomain already defined above
|
|
1610
1626
|
if (reqDomain && !matchesIgnoreDomain(reqDomain, ignoreDomains)) {
|
|
1611
1627
|
for (const re of regexes) {
|
|
1612
1628
|
if (re.test(reqUrl)) {
|
|
@@ -1625,7 +1641,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1625
1641
|
wasBlocked: true
|
|
1626
1642
|
});
|
|
1627
1643
|
} else {
|
|
1628
|
-
addMatchedDomain(reqDomain, resourceType);
|
|
1644
|
+
addMatchedDomain(reqDomain, resourceType, fullSubdomain);
|
|
1629
1645
|
}
|
|
1630
1646
|
|
|
1631
1647
|
const simplifiedUrl = getRootDomain(currentUrl);
|
|
@@ -1649,8 +1665,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1649
1665
|
return;
|
|
1650
1666
|
}
|
|
1651
1667
|
|
|
1652
|
-
|
|
1653
|
-
|
|
1668
|
+
|
|
1654
1669
|
if (!reqDomain) {
|
|
1655
1670
|
if (forceDebug) {
|
|
1656
1671
|
console.log(formatLogMessage('debug', `Skipping request with unparseable URL: ${reqUrl}`));
|
|
@@ -1659,8 +1674,8 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1659
1674
|
return;
|
|
1660
1675
|
}
|
|
1661
1676
|
|
|
1662
|
-
|
|
1663
|
-
|
|
1677
|
+
// Skip matching if this full subdomain is one of the redirect intermediaries
|
|
1678
|
+
if (redirectDomainsToExclude && redirectDomainsToExclude.includes(fullSubdomain)) {
|
|
1664
1679
|
if (forceDebug) {
|
|
1665
1680
|
console.log(formatLogMessage('debug', `Skipping redirect intermediary domain: ${reqDomain}`));
|
|
1666
1681
|
}
|
|
@@ -1699,9 +1714,9 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1699
1714
|
}
|
|
1700
1715
|
|
|
1701
1716
|
// Check ignoreDomains AFTER regex match but BEFORE domain processing
|
|
1702
|
-
if (matchesIgnoreDomain(
|
|
1717
|
+
if (matchesIgnoreDomain(fullSubdomain, ignoreDomains)) {
|
|
1703
1718
|
if (forceDebug) {
|
|
1704
|
-
console.log(formatLogMessage('debug', `Ignoring domain ${
|
|
1719
|
+
console.log(formatLogMessage('debug', `Ignoring domain ${fullSubdomain} (matches ignoreDomains pattern)`));
|
|
1705
1720
|
}
|
|
1706
1721
|
break; // Skip this URL - domain is in ignore list
|
|
1707
1722
|
}
|
|
@@ -1734,6 +1749,14 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1734
1749
|
}
|
|
1735
1750
|
} else if (hasNetTools && !hasSearchString && !hasSearchStringAnd) {
|
|
1736
1751
|
// If nettools are configured (whois/dig), perform checks on the domain
|
|
1752
|
+
// Skip nettools check if full subdomain was already detected
|
|
1753
|
+
if (isDomainAlreadyDetected(fullSubdomain)) {
|
|
1754
|
+
if (forceDebug) {
|
|
1755
|
+
console.log(formatLogMessage('debug', `Skipping nettools check for already detected subdomain: ${fullSubdomain}`));
|
|
1756
|
+
}
|
|
1757
|
+
break; // Skip to next URL
|
|
1758
|
+
}
|
|
1759
|
+
|
|
1737
1760
|
if (forceDebug) {
|
|
1738
1761
|
console.log(formatLogMessage('debug', `${reqUrl} matched regex ${re} and resourceType ${resourceType}, queued for nettools check`));
|
|
1739
1762
|
}
|
|
@@ -1767,6 +1790,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1767
1790
|
dryRunCallback: dryRunMode ? createEnhancedDryRunCallback(matchedDomains, forceDebug) : null,
|
|
1768
1791
|
matchedDomains,
|
|
1769
1792
|
addMatchedDomain,
|
|
1793
|
+
isDomainAlreadyDetected,
|
|
1770
1794
|
currentUrl,
|
|
1771
1795
|
getRootDomain,
|
|
1772
1796
|
siteConfig,
|
|
@@ -1777,10 +1801,17 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1777
1801
|
});
|
|
1778
1802
|
|
|
1779
1803
|
// Execute nettools check asynchronously
|
|
1780
|
-
const originalDomain =
|
|
1804
|
+
const originalDomain = fullSubdomain; // Use full subdomain for nettools
|
|
1781
1805
|
setImmediate(() => netToolsHandler(reqDomain, originalDomain));
|
|
1782
1806
|
} else {
|
|
1783
1807
|
// If searchstring or searchstring_and IS defined (with or without nettools), queue for content checking
|
|
1808
|
+
// Skip searchstring check if full subdomain was already detected
|
|
1809
|
+
if (isDomainAlreadyDetected(fullSubdomain)) {
|
|
1810
|
+
if (forceDebug) {
|
|
1811
|
+
console.log(formatLogMessage('debug', `Skipping searchstring check for already detected subdomain: ${fullSubdomain}`));
|
|
1812
|
+
}
|
|
1813
|
+
break; // Skip to next URL
|
|
1814
|
+
}
|
|
1784
1815
|
if (forceDebug) {
|
|
1785
1816
|
const searchType = hasSearchStringAnd ? 'searchstring_and' : 'searchstring';
|
|
1786
1817
|
console.log(formatLogMessage('debug', `${reqUrl} matched regex ${re} and resourceType ${resourceType}, queued for ${searchType} content search`));
|
|
@@ -1808,6 +1839,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1808
1839
|
regexes,
|
|
1809
1840
|
matchedDomains,
|
|
1810
1841
|
addMatchedDomain, // Pass the helper function
|
|
1842
|
+
isDomainAlreadyDetected,
|
|
1811
1843
|
currentUrl,
|
|
1812
1844
|
perSiteSubDomains,
|
|
1813
1845
|
ignoreDomains,
|
|
@@ -1838,6 +1870,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1838
1870
|
regexes,
|
|
1839
1871
|
matchedDomains,
|
|
1840
1872
|
addMatchedDomain, // Pass the helper function
|
|
1873
|
+
isDomainAlreadyDetected,
|
|
1841
1874
|
currentUrl,
|
|
1842
1875
|
perSiteSubDomains,
|
|
1843
1876
|
ignoreDomains,
|
|
@@ -1876,6 +1909,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1876
1909
|
regexes,
|
|
1877
1910
|
matchedDomains,
|
|
1878
1911
|
addMatchedDomain, // Pass the helper function
|
|
1912
|
+
isDomainAlreadyDetected,
|
|
1879
1913
|
currentUrl,
|
|
1880
1914
|
perSiteSubDomains,
|
|
1881
1915
|
ignoreDomains,
|
|
@@ -2462,6 +2496,8 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2462
2496
|
const totalMatches = results.reduce((sum, r) => sum + (r.rules ? r.rules.length : 0), 0);
|
|
2463
2497
|
|
|
2464
2498
|
// Debug: Show output format being used
|
|
2499
|
+
const totalDomainsSkipped = getTotalDomainsSkipped();
|
|
2500
|
+
const detectedDomainsCount = getDetectedDomainsCount();
|
|
2465
2501
|
if (forceDebug) {
|
|
2466
2502
|
const globalOptions = {
|
|
2467
2503
|
localhostMode,
|
|
@@ -2476,6 +2512,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2476
2512
|
};
|
|
2477
2513
|
console.log(formatLogMessage('debug', `Output format: ${getFormatDescription(globalOptions)}`));
|
|
2478
2514
|
console.log(formatLogMessage('debug', `Generated ${outputResult.totalRules} rules from ${outputResult.successfulPageLoads} successful page loads`));
|
|
2515
|
+
console.log(formatLogMessage('debug', `Performance: ${totalDomainsSkipped} domains skipped (already detected), ${detectedDomainsCount} unique domains cached`));
|
|
2479
2516
|
}
|
|
2480
2517
|
|
|
2481
2518
|
// Compress log files if --compress-logs is enabled
|
|
@@ -2567,6 +2604,9 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2567
2604
|
} else if (outputResult.totalRules > 0 && dryRunMode) {
|
|
2568
2605
|
console.log(messageColors.success('Found') + ` ${outputResult.totalRules} total matches across all URLs`);
|
|
2569
2606
|
}
|
|
2607
|
+
if (totalDomainsSkipped > 0) {
|
|
2608
|
+
console.log(messageColors.info('Performance:') + ` ${totalDomainsSkipped} domains skipped (already detected)`);
|
|
2609
|
+
}
|
|
2570
2610
|
}
|
|
2571
2611
|
|
|
2572
2612
|
// Clean process termination
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fanboynz/network-scanner",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.44",
|
|
4
4
|
"description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
|
|
5
5
|
"main": "nwss.js",
|
|
6
6
|
"scripts": {
|