@fanboynz/network-scanner 2.0.24 → 2.0.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/lib/fingerprint.js +18 -5
- package/lib/searchstring.js +76 -19
- package/nwss.js +131 -75
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -223,7 +223,7 @@ When a page redirects to a new domain, first-party/third-party detection is base
|
|
|
223
223
|
| `whois_timeout_multiplier` | Number | `1.5` | Timeout increase multiplier per retry |
|
|
224
224
|
| `whois_use_fallback` | Boolean | `true` | Add TLD-specific fallback servers |
|
|
225
225
|
| `whois_retry_on_timeout` | Boolean | `true` | Retry on timeout errors |
|
|
226
|
-
| `whois_retry_on_error` | Boolean | `
|
|
226
|
+
| `whois_retry_on_error` | Boolean | `true` | Retry on connection/other errors |
|
|
227
227
|
| `dig` | Array | - | Check dig output for ALL specified terms (AND logic) |
|
|
228
228
|
| `dig-or` | Array | - | Check dig output for ANY specified term (OR logic) |
|
|
229
229
|
| `dig_subdomain` | Boolean | `false` | Use subdomain for dig lookup instead of root domain |
|
package/lib/fingerprint.js
CHANGED
|
@@ -59,7 +59,7 @@ const PRECOMPILED_MOCKS = Object.freeze({
|
|
|
59
59
|
postMessage: () => {},
|
|
60
60
|
disconnect: () => {}
|
|
61
61
|
}),
|
|
62
|
-
getManifest: () => Object.freeze({ name: "Chrome", version: "
|
|
62
|
+
getManifest: () => Object.freeze({ name: "Chrome", version: "141.0.0.0" }),
|
|
63
63
|
getURL: (path) => `chrome-extension://invalid/${path}`,
|
|
64
64
|
id: undefined
|
|
65
65
|
}),
|
|
@@ -91,9 +91,9 @@ const BUILT_IN_PROPERTIES = new Set([
|
|
|
91
91
|
// User agent collections with latest versions
|
|
92
92
|
const USER_AGENT_COLLECTIONS = {
|
|
93
93
|
chrome: [
|
|
94
|
-
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/
|
|
95
|
-
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/
|
|
96
|
-
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/
|
|
94
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36",
|
|
95
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36",
|
|
96
|
+
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36"
|
|
97
97
|
],
|
|
98
98
|
firefox: [
|
|
99
99
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:143.0) Gecko/20100101 Firefox/143.0",
|
|
@@ -535,7 +535,7 @@ async function applyUserAgentSpoofing(page, siteConfig, forceDebug, currentUrl)
|
|
|
535
535
|
onMessage: { addListener: () => {}, removeListener: () => {} },
|
|
536
536
|
sendMessage: () => {},
|
|
537
537
|
connect: () => ({ onMessage: { addListener: () => {}, removeListener: () => {} }, postMessage: () => {}, disconnect: () => {} }),
|
|
538
|
-
getManifest: () => ({ name: "Chrome", version: "
|
|
538
|
+
getManifest: () => ({ name: "Chrome", version: "141.0.0.0" }),
|
|
539
539
|
getURL: (path) => `chrome-extension://invalid/${path}`,
|
|
540
540
|
id: undefined
|
|
541
541
|
},
|
|
@@ -1347,6 +1347,19 @@ async function applyFingerprintProtection(page, siteConfig, forceDebug, currentU
|
|
|
1347
1347
|
|
|
1348
1348
|
try {
|
|
1349
1349
|
await page.evaluateOnNewDocument(({ spoof, debugEnabled }) => {
|
|
1350
|
+
|
|
1351
|
+
// Define helper functions FIRST in this context
|
|
1352
|
+
function spoofNavigatorProperties(navigator, properties) {
|
|
1353
|
+
for (const [prop, descriptor] of Object.entries(properties)) {
|
|
1354
|
+
safeDefinePropertyLocal(navigator, prop, descriptor);
|
|
1355
|
+
}
|
|
1356
|
+
}
|
|
1357
|
+
|
|
1358
|
+
function spoofScreenProperties(screen, properties) {
|
|
1359
|
+
for (const [prop, descriptor] of Object.entries(properties)) {
|
|
1360
|
+
safeDefinePropertyLocal(screen, prop, descriptor);
|
|
1361
|
+
}
|
|
1362
|
+
}
|
|
1350
1363
|
|
|
1351
1364
|
function safeDefinePropertyLocal(target, property, descriptor) {
|
|
1352
1365
|
try {
|
package/lib/searchstring.js
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
|
|
4
4
|
const fs = require('fs');
|
|
5
5
|
const { spawnSync } = require('child_process');
|
|
6
|
+
const { grepContent } = require('./grep');
|
|
6
7
|
|
|
7
8
|
// Configuration constants for search logic
|
|
8
9
|
const SEARCH_CONFIG = {
|
|
@@ -51,11 +52,12 @@ function parseSearchStrings(searchstring, searchstringAnd) {
|
|
|
51
52
|
* @param {Function} addMatchedDomain - Optional helper function for adding domains
|
|
52
53
|
* @param {string} domain - Domain to add
|
|
53
54
|
* @param {string} resourceType - Resource type (for --adblock-rules mode)
|
|
55
|
+
* @param {string} fullSubdomain - Full subdomain for cache tracking (optional)
|
|
54
56
|
*/
|
|
55
|
-
function addDomainToCollection(matchedDomains, addMatchedDomain, domain, resourceType = null) {
|
|
57
|
+
function addDomainToCollection(matchedDomains, addMatchedDomain, domain, resourceType = null, fullSubdomain = null) {
|
|
56
58
|
// Use helper function if provided (preferred method)
|
|
57
59
|
if (typeof addMatchedDomain === 'function') {
|
|
58
|
-
addMatchedDomain(domain, resourceType);
|
|
60
|
+
addMatchedDomain(domain, resourceType, fullSubdomain);
|
|
59
61
|
return;
|
|
60
62
|
}
|
|
61
63
|
|
|
@@ -575,6 +577,7 @@ function createResponseHandler(config) {
|
|
|
575
577
|
siteConfig,
|
|
576
578
|
dumpUrls,
|
|
577
579
|
matchedUrlsLogFile,
|
|
580
|
+
useGrep = false,
|
|
578
581
|
forceDebug,
|
|
579
582
|
resourceType // Will be null for response handler
|
|
580
583
|
} = config;
|
|
@@ -584,22 +587,16 @@ function createResponseHandler(config) {
|
|
|
584
587
|
const respDomain = perSiteSubDomains ? (new URL(respUrl)).hostname : getRootDomain(respUrl);
|
|
585
588
|
|
|
586
589
|
// Only process responses that match our regex patterns
|
|
587
|
-
const
|
|
588
|
-
if (!matchesRegex) return;
|
|
590
|
+
const fullSubdomain = (new URL(respUrl)).hostname; // Always get full subdomain for cache tracking
|
|
589
591
|
|
|
590
|
-
//
|
|
591
|
-
if (typeof config.isDomainAlreadyDetected === 'function' && config.isDomainAlreadyDetected(
|
|
592
|
-
if (forceDebug) {
|
|
593
|
-
console.log(`[debug] Skipping response analysis for already detected domain: ${respDomain}`);
|
|
594
|
-
}
|
|
592
|
+
// Skip if already detected to avoid duplicates
|
|
593
|
+
if (typeof config.isDomainAlreadyDetected === 'function' && config.isDomainAlreadyDetected(fullSubdomain)) {
|
|
595
594
|
return;
|
|
596
595
|
}
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
const currentUrlHostname = new URL(currentUrl).hostname;
|
|
600
|
-
const responseHostname = new URL(respUrl).hostname;
|
|
601
|
-
const isFirstParty = currentUrlHostname === responseHostname;
|
|
596
|
+
const matchesRegex = regexes.some(re => re.test(respUrl));
|
|
597
|
+
if (!matchesRegex) return;
|
|
602
598
|
|
|
599
|
+
// Extract domain and check if already detected (skip expensive operations)
|
|
603
600
|
// The main request handler already filtered first-party/third-party requests
|
|
604
601
|
// This response handler only runs for requests that passed that filter
|
|
605
602
|
// However, we need to apply the same first-party/third-party logic here for searchstring analysis
|
|
@@ -607,6 +604,10 @@ function createResponseHandler(config) {
|
|
|
607
604
|
|
|
608
605
|
// Apply first-party/third-party filtering for searchstring analysis
|
|
609
606
|
// Use the exact same logic as the main request handler
|
|
607
|
+
|
|
608
|
+
const currentUrlHostname = new URL(currentUrl).hostname;
|
|
609
|
+
const responseHostname = new URL(respUrl).hostname;
|
|
610
|
+
const isFirstParty = currentUrlHostname === responseHostname;
|
|
610
611
|
if (isFirstParty && siteConfig.firstParty === false) {
|
|
611
612
|
if (forceDebug) {
|
|
612
613
|
console.log(`[debug] Skipping first-party response for searchstring analysis (firstParty=false): ${respUrl}`);
|
|
@@ -632,9 +633,61 @@ function createResponseHandler(config) {
|
|
|
632
633
|
}
|
|
633
634
|
|
|
634
635
|
const content = await response.text();
|
|
636
|
+
|
|
637
|
+
// Cache the fetched content if callback provided
|
|
638
|
+
if (config.onContentFetched) {
|
|
639
|
+
try {
|
|
640
|
+
config.onContentFetched(respUrl, content);
|
|
641
|
+
} catch (cacheErr) {
|
|
642
|
+
if (forceDebug) {
|
|
643
|
+
console.log(`[debug] Content caching failed: ${cacheErr.message}`);
|
|
644
|
+
}
|
|
645
|
+
}
|
|
646
|
+
}
|
|
635
647
|
|
|
636
648
|
// Check if content contains search strings (OR or AND logic)
|
|
637
|
-
|
|
649
|
+
let searchResult;
|
|
650
|
+
|
|
651
|
+
if (useGrep && (searchStrings.length > 0 || searchStringsAnd.length > 0)) {
|
|
652
|
+
// Use grep for pattern matching
|
|
653
|
+
try {
|
|
654
|
+
const allPatterns = [...(searchStrings || []), ...(searchStringsAnd || [])];
|
|
655
|
+
const grepResult = await grepContent(content, allPatterns, {
|
|
656
|
+
ignoreCase: true,
|
|
657
|
+
wholeWord: false,
|
|
658
|
+
regex: false
|
|
659
|
+
});
|
|
660
|
+
|
|
661
|
+
if (hasSearchStringAnd && searchStringsAnd.length > 0) {
|
|
662
|
+
// For AND logic, check that all patterns were found
|
|
663
|
+
const foundPatterns = grepResult.allMatches.map(match => match.pattern);
|
|
664
|
+
const allFound = searchStringsAnd.every(pattern => foundPatterns.includes(pattern));
|
|
665
|
+
searchResult = {
|
|
666
|
+
found: allFound,
|
|
667
|
+
matchedString: allFound ? foundPatterns.join(' AND ') : null,
|
|
668
|
+
logicType: 'AND'
|
|
669
|
+
};
|
|
670
|
+
} else {
|
|
671
|
+
// For OR logic, any match is sufficient
|
|
672
|
+
searchResult = {
|
|
673
|
+
found: grepResult.found,
|
|
674
|
+
matchedString: grepResult.matchedPattern,
|
|
675
|
+
logicType: 'OR'
|
|
676
|
+
};
|
|
677
|
+
}
|
|
678
|
+
} catch (grepErr) {
|
|
679
|
+
if (forceDebug) {
|
|
680
|
+
console.log(`[debug] Grep failed for ${respUrl}, falling back to JavaScript: ${grepErr.message}`);
|
|
681
|
+
}
|
|
682
|
+
// Fallback to JavaScript search
|
|
683
|
+
searchResult = searchContent(content, searchStrings, searchStringsAnd, contentType, respUrl);
|
|
684
|
+
}
|
|
685
|
+
} else {
|
|
686
|
+
// Use JavaScript search
|
|
687
|
+
searchResult = searchContent(content, searchStrings, searchStringsAnd, contentType, respUrl);
|
|
688
|
+
}
|
|
689
|
+
|
|
690
|
+
const { found, matchedString, logicType, error } = searchResult;
|
|
638
691
|
|
|
639
692
|
if (found) {
|
|
640
693
|
if (!respDomain || matchesIgnoreDomain(respDomain, ignoreDomains)) {
|
|
@@ -642,27 +695,31 @@ function createResponseHandler(config) {
|
|
|
642
695
|
}
|
|
643
696
|
|
|
644
697
|
// Response handler doesn't have access to specific resource type
|
|
645
|
-
|
|
698
|
+
// Use the addMatchedDomain helper which handles fullSubdomain properly
|
|
699
|
+
addMatchedDomain(respDomain, null, fullSubdomain);
|
|
646
700
|
const simplifiedUrl = getRootDomain(currentUrl);
|
|
647
701
|
|
|
648
702
|
if (siteConfig.verbose === 1) {
|
|
649
703
|
const partyType = isFirstParty ? 'first-party' : 'third-party';
|
|
650
|
-
|
|
704
|
+
const searchMethod = useGrep ? 'grep' : 'js';
|
|
705
|
+
console.log(`[match][${simplifiedUrl}] ${respUrl} (${partyType}, ${searchMethod}) contains searchstring (${logicType}): "${matchedString}"`);
|
|
651
706
|
}
|
|
652
707
|
|
|
653
708
|
if (dumpUrls) {
|
|
654
709
|
const timestamp = new Date().toISOString();
|
|
655
710
|
const partyType = isFirstParty ? 'first-party' : 'third-party';
|
|
711
|
+
const searchMethod = useGrep ? 'grep' : 'js';
|
|
656
712
|
try {
|
|
657
713
|
fs.appendFileSync(matchedUrlsLogFile,
|
|
658
|
-
`${timestamp} [match][${simplifiedUrl}] ${respUrl} (${partyType}, searchstring (${logicType}): "${matchedString}")\n`);
|
|
714
|
+
`${timestamp} [match][${simplifiedUrl}] ${respUrl} (${partyType}, ${searchMethod}, searchstring (${logicType}): "${matchedString}")\n`);
|
|
659
715
|
} catch (logErr) {
|
|
660
716
|
console.warn(`[warn] Failed to write to matched URLs log: ${logErr.message}`);
|
|
661
717
|
}
|
|
662
718
|
}
|
|
663
719
|
} else if (forceDebug) {
|
|
664
720
|
const partyType = isFirstParty ? 'first-party' : 'third-party';
|
|
665
|
-
|
|
721
|
+
const searchMethod = useGrep ? 'grep' : 'js';
|
|
722
|
+
console.log(`[debug] ${respUrl} (${partyType}, ${searchMethod}) matched regex but no searchstring found`);
|
|
666
723
|
if (error) {
|
|
667
724
|
console.log(`[debug] Search error: ${error}`);
|
|
668
725
|
}
|
package/nwss.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// === Network scanner script (nwss.js) v2.0.
|
|
1
|
+
// === Network scanner script (nwss.js) v2.0.26 ===
|
|
2
2
|
|
|
3
3
|
// puppeteer for browser automation, fs for file system operations, psl for domain parsing.
|
|
4
4
|
// const pLimit = require('p-limit'); // Will be dynamically imported
|
|
@@ -56,7 +56,7 @@ function fastTimeout(ms) {
|
|
|
56
56
|
}
|
|
57
57
|
|
|
58
58
|
// --- Configuration Constants ---
|
|
59
|
-
const TIMEOUTS = {
|
|
59
|
+
const TIMEOUTS = Object.freeze({
|
|
60
60
|
DEFAULT_PAGE: 35000, // Standard page load timeout (35s)
|
|
61
61
|
DEFAULT_NAVIGATION: 25000, // Navigation operation timeout
|
|
62
62
|
DEFAULT_NAVIGATION_REDUCED: 20000, // Reduced timeout for faster failures
|
|
@@ -71,21 +71,32 @@ const TIMEOUTS = {
|
|
|
71
71
|
CURL_HANDLER_DELAY: 3000, // Wait for async curl operations
|
|
72
72
|
PROTOCOL_TIMEOUT: 180000, // Chrome DevTools Protocol timeout
|
|
73
73
|
REDIRECT_JS_TIMEOUT: 5000 // JavaScript redirect detection timeout
|
|
74
|
-
};
|
|
74
|
+
});
|
|
75
75
|
|
|
76
|
-
const CACHE_LIMITS = {
|
|
76
|
+
const CACHE_LIMITS = Object.freeze({
|
|
77
77
|
DISK_CACHE_SIZE: 52428800, // 50MB
|
|
78
78
|
MEDIA_CACHE_SIZE: 52428800, // 50MB
|
|
79
79
|
DEFAULT_CACHE_PATH: '.cache',
|
|
80
80
|
DEFAULT_MAX_SIZE: 5000
|
|
81
|
-
};
|
|
81
|
+
});
|
|
82
82
|
|
|
83
|
-
const CONCURRENCY_LIMITS = {
|
|
83
|
+
const CONCURRENCY_LIMITS = Object.freeze({
|
|
84
84
|
MIN: 1,
|
|
85
85
|
MAX: 50,
|
|
86
86
|
DEFAULT: 6,
|
|
87
87
|
HIGH_CONCURRENCY_THRESHOLD: 12 // Auto-enable aggressive caching above this
|
|
88
|
-
};
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
// V8 Optimization: Use Map for user agent lookups instead of object
|
|
91
|
+
const USER_AGENTS = Object.freeze(new Map([
|
|
92
|
+
['chrome', "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36"],
|
|
93
|
+
['chrome_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36"],
|
|
94
|
+
['chrome_linux', "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36"],
|
|
95
|
+
['firefox', "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:142.0) Gecko/20100101 Firefox/143.0"],
|
|
96
|
+
['firefox_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:142.0) Gecko/20100101 Firefox/143.0"],
|
|
97
|
+
['firefox_linux', "Mozilla/5.0 (X11; Linux x86_64; rv:142.0) Gecko/20100101 Firefox/143.0"],
|
|
98
|
+
['safari', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.6 Safari/605.1.15"]
|
|
99
|
+
]));
|
|
89
100
|
|
|
90
101
|
const REALTIME_CLEANUP_THRESHOLD = 8; // Default pages to keep for realtime cleanup
|
|
91
102
|
|
|
@@ -132,7 +143,7 @@ const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/r
|
|
|
132
143
|
const { monitorBrowserHealth, isBrowserHealthy, isQuicklyResponsive, performGroupWindowCleanup, performRealtimeWindowCleanup, trackPageForRealtime, updatePageUsage, cleanupPageBeforeReload } = require('./lib/browserhealth');
|
|
133
144
|
|
|
134
145
|
// --- Script Configuration & Constants ---
|
|
135
|
-
const VERSION = '2.0.
|
|
146
|
+
const VERSION = '2.0.26'; // Script version
|
|
136
147
|
|
|
137
148
|
// get startTime
|
|
138
149
|
const startTime = Date.now();
|
|
@@ -997,7 +1008,7 @@ function matchesIgnoreDomain(domain, ignorePatterns) {
|
|
|
997
1008
|
|
|
998
1009
|
function setupFrameHandling(page, forceDebug) {
|
|
999
1010
|
// Track active frames and clear on navigation to prevent detached frame access
|
|
1000
|
-
let activeFrames = new
|
|
1011
|
+
let activeFrames = new Set(); // Use Set to track frame references
|
|
1001
1012
|
|
|
1002
1013
|
// Clear frame tracking on navigation to prevent stale references
|
|
1003
1014
|
page.on('framenavigated', (frame) => {
|
|
@@ -1031,7 +1042,6 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1031
1042
|
// Enhanced frame validation with multiple safety checks
|
|
1032
1043
|
let frameUrl;
|
|
1033
1044
|
try {
|
|
1034
|
-
// Test frame accessibility first
|
|
1035
1045
|
frameUrl = frame.url();
|
|
1036
1046
|
|
|
1037
1047
|
// Check if frame is detached (if method exists)
|
|
@@ -1041,12 +1051,17 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1041
1051
|
}
|
|
1042
1052
|
return;
|
|
1043
1053
|
}
|
|
1054
|
+
|
|
1055
|
+
activeFrames.add(frame);
|
|
1056
|
+
|
|
1057
|
+
if (forceDebug) {
|
|
1058
|
+
console.log(formatLogMessage('debug', `New frame attached: ${frameUrl || 'about:blank'}`));
|
|
1059
|
+
}
|
|
1044
1060
|
} catch (frameAccessError) {
|
|
1045
1061
|
// Frame is not accessible (likely detached)
|
|
1046
1062
|
return;
|
|
1047
1063
|
}
|
|
1048
|
-
|
|
1049
|
-
activeFrames.add(frame);
|
|
1064
|
+
|
|
1050
1065
|
} catch (detachError) {
|
|
1051
1066
|
// Frame state checking can throw in 23.x, handle gracefully
|
|
1052
1067
|
if (forceDebug) {
|
|
@@ -1055,14 +1070,10 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1055
1070
|
return;
|
|
1056
1071
|
}
|
|
1057
1072
|
|
|
1058
|
-
// Store frame with timestamp for tracking
|
|
1059
|
-
activeFrames.set(frame, Date.now());
|
|
1060
1073
|
|
|
1061
1074
|
if (frame !== page.mainFrame() && frame.parentFrame()) { // Only handle child frames
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
console.log(formatLogMessage('debug', `New frame attached: ${frameUrl || 'about:blank'}`));
|
|
1065
|
-
}
|
|
1075
|
+
let frameUrl;
|
|
1076
|
+
frameUrl = frame.url();
|
|
1066
1077
|
|
|
1067
1078
|
// Don't try to navigate to frames with invalid/empty URLs
|
|
1068
1079
|
if (!frameUrl ||
|
|
@@ -1100,6 +1111,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1100
1111
|
// Let frames load naturally - manual navigation often causes Protocol errors
|
|
1101
1112
|
// await frame.goto(frame.url(), { waitUntil: 'domcontentloaded', timeout: 5000 });
|
|
1102
1113
|
|
|
1114
|
+
try {
|
|
1103
1115
|
if (forceDebug) {
|
|
1104
1116
|
console.log(formatLogMessage('debug', `Frame will load naturally: ${frameUrl}`));
|
|
1105
1117
|
}
|
|
@@ -1117,11 +1129,11 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1117
1129
|
});
|
|
1118
1130
|
// Handle frame navigations (keep this for monitoring)
|
|
1119
1131
|
page.on('framenavigated', (frame) => {
|
|
1120
|
-
let frameUrl;
|
|
1121
1132
|
|
|
1122
1133
|
// Skip if frame is not in our active set
|
|
1123
1134
|
if (!activeFrames.has(frame)) return;
|
|
1124
1135
|
|
|
1136
|
+
let frameUrl;
|
|
1125
1137
|
try {
|
|
1126
1138
|
frameUrl = frame.url();
|
|
1127
1139
|
} catch (urlErr) {
|
|
@@ -1143,17 +1155,14 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1143
1155
|
// Optional: Handle frame detachment for cleanup
|
|
1144
1156
|
page.on('framedetached', (frame) => {
|
|
1145
1157
|
// Remove from active tracking
|
|
1146
|
-
activeFrames.delete(frame);
|
|
1158
|
+
activeFrames.delete(frame); // This works for both Map and Set
|
|
1147
1159
|
|
|
1148
|
-
|
|
1149
|
-
let frameUrl;
|
|
1160
|
+
|
|
1150
1161
|
if (forceDebug) {
|
|
1162
|
+
let frameUrl;
|
|
1151
1163
|
try {
|
|
1152
1164
|
frameUrl = frame.url();
|
|
1153
|
-
|
|
1154
|
-
// Frame already detached, can't get URL
|
|
1155
|
-
return;
|
|
1156
|
-
}
|
|
1165
|
+
|
|
1157
1166
|
if (frameUrl &&
|
|
1158
1167
|
frameUrl !== 'about:blank' &&
|
|
1159
1168
|
frameUrl !== 'about:srcdoc' &&
|
|
@@ -1162,6 +1171,11 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1162
1171
|
!frameUrl.startsWith('chrome-extension://')) {
|
|
1163
1172
|
console.log(formatLogMessage('debug', `Frame detached: ${frameUrl}`));
|
|
1164
1173
|
}
|
|
1174
|
+
} catch (urlErr) {
|
|
1175
|
+
// Frame already detached, can't get URL - this is expected
|
|
1176
|
+
return;
|
|
1177
|
+
}
|
|
1178
|
+
|
|
1165
1179
|
}
|
|
1166
1180
|
});
|
|
1167
1181
|
}
|
|
@@ -1208,6 +1222,9 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1208
1222
|
'/usr/bin/chromium',
|
|
1209
1223
|
'/snap/bin/chromium'
|
|
1210
1224
|
];
|
|
1225
|
+
// V8 Optimization: Freeze the Chrome paths array since it's constant
|
|
1226
|
+
Object.freeze(systemChromePaths);
|
|
1227
|
+
|
|
1211
1228
|
|
|
1212
1229
|
let executablePath = null;
|
|
1213
1230
|
for (const chromePath of systemChromePaths) {
|
|
@@ -1384,12 +1401,28 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1384
1401
|
* @returns {Promise<object>} A promise that resolves to an object containing scan results.
|
|
1385
1402
|
*/
|
|
1386
1403
|
async function processUrl(currentUrl, siteConfig, browserInstance) {
|
|
1387
|
-
|
|
1388
|
-
const
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
1404
|
+
// V8 Optimization: Single destructuring to avoid multiple property lookups
|
|
1405
|
+
const {
|
|
1406
|
+
firstParty,
|
|
1407
|
+
thirdParty,
|
|
1408
|
+
subDomains,
|
|
1409
|
+
localhost,
|
|
1410
|
+
cloudflare_phish,
|
|
1411
|
+
cloudflare_bypass,
|
|
1412
|
+
flowproxy_detection,
|
|
1413
|
+
privoxy,
|
|
1414
|
+
pihole,
|
|
1415
|
+
even_blocked,
|
|
1416
|
+
comments,
|
|
1417
|
+
bypass_cache
|
|
1418
|
+
} = siteConfig;
|
|
1419
|
+
|
|
1420
|
+
const allowFirstParty = firstParty === true || firstParty === 1;
|
|
1421
|
+
const allowThirdParty = thirdParty === undefined || thirdParty === true || thirdParty === 1;
|
|
1422
|
+
const perSiteSubDomains = subDomains === 1 ? true : subDomainsMode;
|
|
1423
|
+
const siteLocalhostIP = localhost || null;
|
|
1424
|
+
const cloudflarePhishBypass = cloudflare_phish === true;
|
|
1425
|
+
const cloudflareBypass = cloudflare_bypass === true;
|
|
1393
1426
|
// Add redirect and same-page loop protection
|
|
1394
1427
|
const MAX_REDIRECT_DEPTH = siteConfig.max_redirects || 10;
|
|
1395
1428
|
const redirectHistory = new Set();
|
|
@@ -1398,14 +1431,14 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1398
1431
|
const MAX_SAME_PAGE_LOADS = 3;
|
|
1399
1432
|
let currentPageUrl = currentUrl;
|
|
1400
1433
|
|
|
1401
|
-
const sitePrivoxy =
|
|
1402
|
-
const sitePihole =
|
|
1403
|
-
const flowproxyDetection =
|
|
1434
|
+
const sitePrivoxy = privoxy === true;
|
|
1435
|
+
const sitePihole = pihole === true;
|
|
1436
|
+
const flowproxyDetection = flowproxy_detection === true;
|
|
1404
1437
|
|
|
1405
|
-
const evenBlocked =
|
|
1438
|
+
const evenBlocked = even_blocked === true;
|
|
1406
1439
|
// Log site-level comments if debug mode is enabled
|
|
1407
|
-
if (forceDebug &&
|
|
1408
|
-
const siteComments = Array.isArray(
|
|
1440
|
+
if (forceDebug && comments) {
|
|
1441
|
+
const siteComments = Array.isArray(comments) ? comments : [comments];
|
|
1409
1442
|
console.log(formatLogMessage('debug', `Site comments for ${currentUrl}: ${siteComments.length} item(s)`));
|
|
1410
1443
|
siteComments.forEach((comment, idx) =>
|
|
1411
1444
|
console.log(formatLogMessage('debug', ` Site comment ${idx + 1}: ${comment}`))
|
|
@@ -1413,11 +1446,11 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1413
1446
|
}
|
|
1414
1447
|
|
|
1415
1448
|
// Log bypass_cache setting if enabled
|
|
1416
|
-
if (forceDebug &&
|
|
1449
|
+
if (forceDebug && bypass_cache === true) {
|
|
1417
1450
|
console.log(formatLogMessage('debug', `Cache bypass enabled for all URLs in site: ${currentUrl}`));
|
|
1418
1451
|
}
|
|
1419
1452
|
|
|
1420
|
-
if (
|
|
1453
|
+
if (firstParty === 0 && thirdParty === 0) {
|
|
1421
1454
|
console.warn(`⚠ Skipping ${currentUrl} because both firstParty and thirdParty are disabled.`);
|
|
1422
1455
|
return { url: currentUrl, rules: [], success: false, skipped: true };
|
|
1423
1456
|
}
|
|
@@ -1472,14 +1505,14 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1472
1505
|
let finalUrlAfterRedirect = null;
|
|
1473
1506
|
|
|
1474
1507
|
// Enhanced error types for Puppeteer 23.x compatibility
|
|
1475
|
-
const CRITICAL_BROWSER_ERRORS = [
|
|
1508
|
+
const CRITICAL_BROWSER_ERRORS = Object.freeze([
|
|
1476
1509
|
'Protocol error',
|
|
1477
1510
|
'Target closed',
|
|
1478
1511
|
'Browser has been closed',
|
|
1479
1512
|
'Browser protocol broken',
|
|
1480
1513
|
'Browser process exited',
|
|
1481
1514
|
'Browser disconnected'
|
|
1482
|
-
];
|
|
1515
|
+
]);
|
|
1483
1516
|
|
|
1484
1517
|
try {
|
|
1485
1518
|
|
|
@@ -1904,29 +1937,30 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1904
1937
|
|
|
1905
1938
|
// Client Hints protection for Chrome user agents
|
|
1906
1939
|
if (siteConfig.userAgent && siteConfig.userAgent.toLowerCase().includes('chrome')) {
|
|
1940
|
+
const userAgentKey = siteConfig.userAgent.toLowerCase();
|
|
1907
1941
|
let platform = 'Windows';
|
|
1908
1942
|
let platformVersion = '15.0.0';
|
|
1909
1943
|
let arch = 'x86';
|
|
1910
1944
|
|
|
1911
|
-
if (
|
|
1945
|
+
if (userAgentKey === 'chrome_mac') {
|
|
1912
1946
|
platform = 'macOS';
|
|
1913
|
-
platformVersion = '13.5.0';
|
|
1947
|
+
platformVersion = '13.5.0';
|
|
1914
1948
|
arch = 'arm';
|
|
1915
|
-
} else if (
|
|
1949
|
+
} else if (userAgentKey === 'chrome_linux') {
|
|
1916
1950
|
platform = 'Linux';
|
|
1917
1951
|
platformVersion = '6.5.0';
|
|
1918
1952
|
arch = 'x86';
|
|
1919
1953
|
}
|
|
1920
|
-
|
|
1954
|
+
|
|
1921
1955
|
await page.setExtraHTTPHeaders({
|
|
1922
|
-
'Sec-CH-UA': '"Chromium";v="
|
|
1956
|
+
'Sec-CH-UA': '"Chromium";v="141", "Not=A?Brand";v="24", "Google Chrome";v="141"',
|
|
1923
1957
|
'Sec-CH-UA-Platform': `"${platform}"`,
|
|
1924
1958
|
'Sec-CH-UA-Platform-Version': `"${platformVersion}"`,
|
|
1925
1959
|
'Sec-CH-UA-Mobile': '?0',
|
|
1926
1960
|
'Sec-CH-UA-Arch': `"${arch}"`,
|
|
1927
1961
|
'Sec-CH-UA-Bitness': '"64"',
|
|
1928
|
-
'Sec-CH-UA-Full-Version': '"
|
|
1929
|
-
'Sec-CH-UA-Full-Version-List': '"Chromium";v="
|
|
1962
|
+
'Sec-CH-UA-Full-Version': '"141.0.7390.55"',
|
|
1963
|
+
'Sec-CH-UA-Full-Version-List': '"Chromium";v="141.0.7390.55", "Not=A?Brand";v="24.0.0.0", "Google Chrome";v="141.0.7390.55"'
|
|
1930
1964
|
});
|
|
1931
1965
|
}
|
|
1932
1966
|
} catch (fingerprintErr) {
|
|
@@ -1951,21 +1985,12 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1951
1985
|
// Parse searchstring patterns using module
|
|
1952
1986
|
const { searchStrings, searchStringsAnd, hasSearchString, hasSearchStringAnd } = parseSearchStrings(siteConfig.searchstring, siteConfig.searchstring_and);
|
|
1953
1987
|
const useCurl = siteConfig.curl === true; // Use curl if enabled, regardless of searchstring
|
|
1954
|
-
let useGrep = siteConfig.grep === true
|
|
1988
|
+
let useGrep = siteConfig.grep === true; // Grep can work independently
|
|
1955
1989
|
|
|
1956
1990
|
// Get user agent for curl if needed
|
|
1957
1991
|
let curlUserAgent = '';
|
|
1958
1992
|
if (useCurl && siteConfig.userAgent) {
|
|
1959
|
-
|
|
1960
|
-
chrome: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36",
|
|
1961
|
-
chrome_mac: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36",
|
|
1962
|
-
chrome_linux: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36",
|
|
1963
|
-
firefox: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:142.0) Gecko/20100101 Firefox/143.0",
|
|
1964
|
-
firefox_mac: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:142.0) Gecko/20100101 Firefox/143.0",
|
|
1965
|
-
firefox_linux: "Mozilla/5.0 (X11; Linux x86_64; rv:142.0) Gecko/20100101 Firefox/143.0",
|
|
1966
|
-
safari: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.6 Safari/605.1.15"
|
|
1967
|
-
};
|
|
1968
|
-
curlUserAgent = userAgents[siteConfig.userAgent.toLowerCase()] || '';
|
|
1993
|
+
curlUserAgent = USER_AGENTS.get(siteConfig.userAgent.toLowerCase()) || '';
|
|
1969
1994
|
}
|
|
1970
1995
|
|
|
1971
1996
|
if (useCurl && forceDebug) {
|
|
@@ -1973,7 +1998,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1973
1998
|
}
|
|
1974
1999
|
|
|
1975
2000
|
if (useGrep && forceDebug) {
|
|
1976
|
-
console.log(formatLogMessage('debug', `Grep-based pattern matching enabled for ${currentUrl}`));
|
|
2001
|
+
console.log(formatLogMessage('debug', `Grep-based pattern matching enabled for ${currentUrl}${useCurl ? ' (with curl)' : ' (with response handler)'}`));
|
|
1977
2002
|
}
|
|
1978
2003
|
|
|
1979
2004
|
// Validate grep availability if needed
|
|
@@ -1993,7 +2018,6 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1993
2018
|
if (!curlCheck.isAvailable) {
|
|
1994
2019
|
console.warn(formatLogMessage('warn', `Curl not available for ${currentUrl}: ${curlCheck.error}. Skipping curl-based analysis.`));
|
|
1995
2020
|
useCurl = false;
|
|
1996
|
-
useGrep = false; // Grep requires curl
|
|
1997
2021
|
} else if (forceDebug) {
|
|
1998
2022
|
console.log(formatLogMessage('debug', `Using curl: ${curlCheck.version}`));
|
|
1999
2023
|
}
|
|
@@ -2643,7 +2667,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2643
2667
|
|
|
2644
2668
|
// If curl is enabled, download and analyze content immediately
|
|
2645
2669
|
if (useCurl) {
|
|
2646
|
-
// Check bypass_cache before attempting cache lookup
|
|
2670
|
+
// Check bypass_cache before attempting cache lookup (curl mode)
|
|
2647
2671
|
let cachedContent = null;
|
|
2648
2672
|
if (!shouldBypassCacheForUrl(reqUrl, siteConfig)) {
|
|
2649
2673
|
// Check request cache first if smart cache is available and caching is enabled
|
|
@@ -2732,8 +2756,30 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2732
2756
|
}
|
|
2733
2757
|
}
|
|
2734
2758
|
}
|
|
2759
|
+
} else if (useGrep && (hasSearchString || hasSearchStringAnd)) {
|
|
2760
|
+
// Use grep with response handler (no curl)
|
|
2761
|
+
if (forceDebug) {
|
|
2762
|
+
console.log(formatLogMessage('debug', `[grep-response] Queuing ${reqUrl} for grep analysis via response handler`));
|
|
2763
|
+
}
|
|
2764
|
+
|
|
2765
|
+
// Queue for grep processing via response handler
|
|
2766
|
+
// The response handler will download content and call grep
|
|
2767
|
+
if (dryRunMode) {
|
|
2768
|
+
matchedDomains.get('dryRunMatches').push({
|
|
2769
|
+
regex: matchedRegexPattern,
|
|
2770
|
+
domain: reqDomain,
|
|
2771
|
+
resourceType: resourceType,
|
|
2772
|
+
fullUrl: reqUrl,
|
|
2773
|
+
isFirstParty: isFirstParty,
|
|
2774
|
+
needsGrepCheck: true
|
|
2775
|
+
});
|
|
2776
|
+
}
|
|
2777
|
+
|
|
2778
|
+
// Don't process immediately - let response handler do the work
|
|
2779
|
+
if (forceDebug) {
|
|
2780
|
+
console.log(formatLogMessage('debug', `URL ${reqUrl} queued for grep analysis via response handler`));
|
|
2781
|
+
}
|
|
2735
2782
|
}
|
|
2736
|
-
|
|
2737
2783
|
// No break needed since we've already determined if regex matched
|
|
2738
2784
|
}
|
|
2739
2785
|
request.continue();
|
|
@@ -2742,8 +2788,8 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2742
2788
|
// Mark page as actively processing network requests
|
|
2743
2789
|
updatePageUsage(page, true);
|
|
2744
2790
|
|
|
2745
|
-
// Add response handler
|
|
2746
|
-
if ((hasSearchString || hasSearchStringAnd) && !useCurl && !
|
|
2791
|
+
// Add response handler if searchstring is defined and either no curl, or grep without curl
|
|
2792
|
+
if ((hasSearchString || hasSearchStringAnd) && (!useCurl || (useGrep && !useCurl))) {
|
|
2747
2793
|
const responseHandler = createResponseHandler({
|
|
2748
2794
|
searchStrings,
|
|
2749
2795
|
searchStringsAnd,
|
|
@@ -2761,6 +2807,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2761
2807
|
} : undefined,
|
|
2762
2808
|
currentUrl,
|
|
2763
2809
|
perSiteSubDomains,
|
|
2810
|
+
useGrep, // Pass grep flag to response handler
|
|
2764
2811
|
ignoreDomains,
|
|
2765
2812
|
matchesIgnoreDomain,
|
|
2766
2813
|
getRootDomain,
|
|
@@ -3462,20 +3509,29 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3462
3509
|
// Temporarily store the pLimit function
|
|
3463
3510
|
const originalLimit = limit;
|
|
3464
3511
|
|
|
3465
|
-
//
|
|
3466
|
-
|
|
3512
|
+
// V8 Optimization: Calculate total URLs first to pre-allocate array
|
|
3513
|
+
let totalUrls = 0;
|
|
3467
3514
|
for (const site of sites) {
|
|
3468
3515
|
const urlsToProcess = Array.isArray(site.url) ? site.url : [site.url];
|
|
3469
|
-
urlsToProcess.
|
|
3470
|
-
|
|
3516
|
+
totalUrls += urlsToProcess.length;
|
|
3517
|
+
}
|
|
3518
|
+
|
|
3519
|
+
// Pre-allocate array with exact size to prevent multiple reallocations
|
|
3520
|
+
const allTasks = new Array(totalUrls);
|
|
3521
|
+
let taskIndex = 0;
|
|
3522
|
+
|
|
3523
|
+
// Populate the pre-allocated array
|
|
3524
|
+
for (const site of sites) {
|
|
3525
|
+
const urlsToProcess = Array.isArray(site.url) ? site.url : [site.url];
|
|
3526
|
+
for (const url of urlsToProcess) {
|
|
3527
|
+
allTasks[taskIndex++] = {
|
|
3471
3528
|
url,
|
|
3472
3529
|
config: { ...site, _originalUrl: url }, // Preserve original URL for CDP domain checking
|
|
3473
|
-
taskId:
|
|
3474
|
-
}
|
|
3475
|
-
}
|
|
3530
|
+
taskId: taskIndex - 1 // For tracking
|
|
3531
|
+
};
|
|
3532
|
+
}
|
|
3476
3533
|
}
|
|
3477
|
-
|
|
3478
|
-
const totalUrls = allTasks.length;
|
|
3534
|
+
|
|
3479
3535
|
|
|
3480
3536
|
let results = [];
|
|
3481
3537
|
let processedUrlCount = 0;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fanboynz/network-scanner",
|
|
3
|
-
"version": "2.0.
|
|
3
|
+
"version": "2.0.26",
|
|
4
4
|
"description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
|
|
5
5
|
"main": "nwss.js",
|
|
6
6
|
"scripts": {
|