@fanboynz/network-scanner 1.0.74 ā 1.0.76
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/smart-cache.js +0 -0
- package/nwss.js +152 -6
- package/package.json +1 -1
package/lib/smart-cache.js
CHANGED
|
Binary file
|
package/nwss.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// === Network scanner script (nwss.js) v1.0.
|
|
1
|
+
// === Network scanner script (nwss.js) v1.0.76 ===
|
|
2
2
|
|
|
3
3
|
// puppeteer for browser automation, fs for file system operations, psl for domain parsing.
|
|
4
4
|
// const pLimit = require('p-limit'); // Will be dynamically imported
|
|
@@ -123,7 +123,7 @@ const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/r
|
|
|
123
123
|
const { monitorBrowserHealth, isBrowserHealthy } = require('./lib/browserhealth');
|
|
124
124
|
|
|
125
125
|
// --- Script Configuration & Constants ---
|
|
126
|
-
const VERSION = '1.0.
|
|
126
|
+
const VERSION = '1.0.76'; // Script version
|
|
127
127
|
|
|
128
128
|
// get startTime
|
|
129
129
|
const startTime = Date.now();
|
|
@@ -187,6 +187,7 @@ const testValidation = args.includes('--test-validation');
|
|
|
187
187
|
let cleanRules = args.includes('--clean-rules');
|
|
188
188
|
const clearCache = args.includes('--clear-cache');
|
|
189
189
|
const ignoreCache = args.includes('--ignore-cache');
|
|
190
|
+
const cacheRequests = args.includes('--cache-requests');
|
|
190
191
|
|
|
191
192
|
let validateRulesFile = null;
|
|
192
193
|
const validateRulesIndex = args.findIndex(arg => arg === '--validate-rules');
|
|
@@ -454,6 +455,7 @@ General Options:
|
|
|
454
455
|
--remove-tempfiles Remove Chrome/Puppeteer temporary files before exit
|
|
455
456
|
|
|
456
457
|
Validation Options:
|
|
458
|
+
--cache-requests Cache HTTP requests to avoid re-requesting same URLs within scan
|
|
457
459
|
--validate-config Validate config.json file and exit
|
|
458
460
|
--validate-rules [file] Validate rule file format (uses --output/--compare files if no file specified)
|
|
459
461
|
--clean-rules [file] Clean rule files by removing invalid lines and optionally duplicates (uses --output/--compare files if no file specified)
|
|
@@ -522,6 +524,7 @@ Redirect Handling Options:
|
|
|
522
524
|
adblock_rules: true/false Generate adblock filter rules with resource types for this site
|
|
523
525
|
even_blocked: true/false Add matching rules even if requests are blocked (default: false)
|
|
524
526
|
|
|
527
|
+
bypass_cache: true/false Skip all caching for this site's URLs (default: false)
|
|
525
528
|
referrer_headers: "url" or ["url1", "url2"] Set referrer header for realistic traffic sources
|
|
526
529
|
custom_headers: {"Header": "value"} Add custom HTTP headers to requests
|
|
527
530
|
|
|
@@ -682,6 +685,7 @@ if (ignoreCache) {
|
|
|
682
685
|
} else {
|
|
683
686
|
smartCache = createSmartCache({
|
|
684
687
|
...config,
|
|
688
|
+
cache_requests: cacheRequests, // NEW: Pass request caching flag
|
|
685
689
|
forceDebug,
|
|
686
690
|
max_concurrent_sites: MAX_CONCURRENT_SITES, // Pass concurrency info
|
|
687
691
|
cache_aggressive_mode: MAX_CONCURRENT_SITES > CONCURRENCY_LIMITS.HIGH_CONCURRENCY_THRESHOLD, // Auto-enable for high concurrency
|
|
@@ -692,6 +696,24 @@ smartCache = createSmartCache({
|
|
|
692
696
|
});
|
|
693
697
|
}
|
|
694
698
|
|
|
699
|
+
// Add safe domain processing helper after smartCache initialization
|
|
700
|
+
function safeMarkDomainProcessed(domain, context, metadata) {
|
|
701
|
+
if (smartCache) {
|
|
702
|
+
try {
|
|
703
|
+
if (typeof smartCache.markDomainProcessed === 'function') {
|
|
704
|
+
smartCache.markDomainProcessed(domain, context, metadata);
|
|
705
|
+
} else {
|
|
706
|
+
// Fallback: trigger cache via shouldSkipDomain
|
|
707
|
+
smartCache.shouldSkipDomain(domain, context);
|
|
708
|
+
}
|
|
709
|
+
} catch (cacheErr) {
|
|
710
|
+
if (forceDebug) {
|
|
711
|
+
console.log(formatLogMessage('debug', `[SmartCache] Error marking domain: ${cacheErr.message}`));
|
|
712
|
+
}
|
|
713
|
+
}
|
|
714
|
+
}
|
|
715
|
+
}
|
|
716
|
+
|
|
695
717
|
// Handle --clean-rules after config is loaded (so we have access to sites)
|
|
696
718
|
if (cleanRules || cleanRulesFile) {
|
|
697
719
|
const filesToClean = cleanRulesFile ? [cleanRulesFile] : [outputFile, compareFile].filter(Boolean);
|
|
@@ -1000,6 +1022,16 @@ function shouldProcessUrl(url, forceDebug) {
|
|
|
1000
1022
|
}
|
|
1001
1023
|
}
|
|
1002
1024
|
|
|
1025
|
+
/**
|
|
1026
|
+
* Check if URL should bypass all caching for this site
|
|
1027
|
+
* @param {string} url - URL to check
|
|
1028
|
+
* @param {Object} siteConfig - Site configuration
|
|
1029
|
+
* @returns {boolean} True if should bypass cache
|
|
1030
|
+
*/
|
|
1031
|
+
function shouldBypassCacheForUrl(url, siteConfig) {
|
|
1032
|
+
return siteConfig.bypass_cache === true;
|
|
1033
|
+
}
|
|
1034
|
+
|
|
1003
1035
|
// ability to use widcards in ignoreDomains
|
|
1004
1036
|
function matchesIgnoreDomain(domain, ignorePatterns) {
|
|
1005
1037
|
return ignorePatterns.some(pattern => {
|
|
@@ -1365,6 +1397,11 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1365
1397
|
);
|
|
1366
1398
|
}
|
|
1367
1399
|
|
|
1400
|
+
// Log bypass_cache setting if enabled
|
|
1401
|
+
if (forceDebug && siteConfig.bypass_cache === true) {
|
|
1402
|
+
console.log(formatLogMessage('debug', `Cache bypass enabled for all URLs in site: ${currentUrl}`));
|
|
1403
|
+
}
|
|
1404
|
+
|
|
1368
1405
|
if (siteConfig.firstParty === 0 && siteConfig.thirdParty === 0) {
|
|
1369
1406
|
console.warn(`ā Skipping ${currentUrl} because both firstParty and thirdParty are disabled.`);
|
|
1370
1407
|
return { url: currentUrl, rules: [], success: false, skipped: true };
|
|
@@ -1891,7 +1928,18 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1891
1928
|
|
|
1892
1929
|
// Also mark in smart cache with context (if cache is enabled)
|
|
1893
1930
|
if (smartCache) {
|
|
1894
|
-
|
|
1931
|
+
try {
|
|
1932
|
+
if (smartCache.markDomainProcessed) {
|
|
1933
|
+
safeMarkDomainProcessed(domain, context, { resourceType, fullSubdomain });
|
|
1934
|
+
} else {
|
|
1935
|
+
// Fallback: use shouldSkipDomain to indirectly cache
|
|
1936
|
+
smartCache.shouldSkipDomain(domain, context);
|
|
1937
|
+
}
|
|
1938
|
+
} catch (cacheErr) {
|
|
1939
|
+
if (forceDebug) {
|
|
1940
|
+
console.log(formatLogMessage('debug', `[SmartCache] Error marking domain: ${cacheErr.message}`));
|
|
1941
|
+
}
|
|
1942
|
+
}
|
|
1895
1943
|
}
|
|
1896
1944
|
|
|
1897
1945
|
if (matchedDomains instanceof Map) {
|
|
@@ -2114,6 +2162,42 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2114
2162
|
|
|
2115
2163
|
// Check if nettools validation is required - if so, NEVER add domains immediately
|
|
2116
2164
|
if (hasNetTools) {
|
|
2165
|
+
// Call nettools handler BEFORE exiting
|
|
2166
|
+
if (hasNetTools && !hasSearchString && !hasSearchStringAnd) {
|
|
2167
|
+
// Create and execute nettools handler
|
|
2168
|
+
const netToolsHandler = createNetToolsHandler({
|
|
2169
|
+
whoisTerms,
|
|
2170
|
+
whoisOrTerms,
|
|
2171
|
+
whoisDelay: siteConfig.whois_delay || whois_delay,
|
|
2172
|
+
whoisServer,
|
|
2173
|
+
whoisServerMode: siteConfig.whois_server_mode || whois_server_mode,
|
|
2174
|
+
debugLogFile,
|
|
2175
|
+
fs,
|
|
2176
|
+
digTerms,
|
|
2177
|
+
digOrTerms,
|
|
2178
|
+
digRecordType,
|
|
2179
|
+
digSubdomain: siteConfig.dig_subdomain === true,
|
|
2180
|
+
dryRunCallback: dryRunMode ? createEnhancedDryRunCallback(matchedDomains, forceDebug) : null,
|
|
2181
|
+
matchedDomains,
|
|
2182
|
+
addMatchedDomain,
|
|
2183
|
+
isDomainAlreadyDetected,
|
|
2184
|
+
onWhoisResult: smartCache ? (domain, result) => smartCache.cacheNetTools(domain, 'whois', result) : undefined,
|
|
2185
|
+
onDigResult: smartCache ? (domain, result, recordType) => smartCache.cacheNetTools(domain, 'dig', result, recordType) : undefined,
|
|
2186
|
+
cachedWhois: smartCache ? smartCache.getCachedNetTools(reqDomain, 'whois') : null,
|
|
2187
|
+
cachedDig: smartCache ? smartCache.getCachedNetTools(reqDomain, 'dig', digRecordType) : null,
|
|
2188
|
+
currentUrl,
|
|
2189
|
+
getRootDomain,
|
|
2190
|
+
siteConfig,
|
|
2191
|
+
dumpUrls,
|
|
2192
|
+
matchedUrlsLogFile,
|
|
2193
|
+
forceDebug,
|
|
2194
|
+
fs
|
|
2195
|
+
});
|
|
2196
|
+
|
|
2197
|
+
// Execute nettools check asynchronously
|
|
2198
|
+
const originalDomain = fullSubdomain;
|
|
2199
|
+
setImmediate(() => netToolsHandler(reqDomain, originalDomain));
|
|
2200
|
+
}
|
|
2117
2201
|
if (forceDebug) {
|
|
2118
2202
|
console.log(formatLogMessage('debug', `${reqUrl} has nettools validation required - skipping immediate add`));
|
|
2119
2203
|
}
|
|
@@ -2267,8 +2351,16 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2267
2351
|
|
|
2268
2352
|
// If curl is enabled, download and analyze content immediately
|
|
2269
2353
|
if (useCurl) {
|
|
2270
|
-
// Check
|
|
2271
|
-
|
|
2354
|
+
// Check bypass_cache before attempting cache lookup
|
|
2355
|
+
let cachedContent = null;
|
|
2356
|
+
if (!shouldBypassCacheForUrl(reqUrl, siteConfig)) {
|
|
2357
|
+
// Check request cache first if smart cache is available and caching is enabled
|
|
2358
|
+
cachedContent = smartCache ? smartCache.getCachedRequest(reqUrl, {
|
|
2359
|
+
method: 'GET',
|
|
2360
|
+
headers: { 'user-agent': curlUserAgent },
|
|
2361
|
+
siteConfig: siteConfig
|
|
2362
|
+
}) : null;
|
|
2363
|
+
}
|
|
2272
2364
|
|
|
2273
2365
|
if (cachedContent && forceDebug) {
|
|
2274
2366
|
console.log(formatLogMessage('debug', `[SmartCache] Using cached response content for ${reqUrl.substring(0, 50)}...`));
|
|
@@ -2285,7 +2377,10 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2285
2377
|
addMatchedDomain, // Pass the helper function
|
|
2286
2378
|
isDomainAlreadyDetected,
|
|
2287
2379
|
onContentFetched: smartCache && !ignoreCache ? (url, content) => {
|
|
2288
|
-
|
|
2380
|
+
// Only cache if not bypassing cache
|
|
2381
|
+
if (!shouldBypassCacheForUrl(url, siteConfig)) {
|
|
2382
|
+
smartCache.cacheRequest(url, { method: 'GET', siteConfig }, { body: content, status: 200 });
|
|
2383
|
+
}
|
|
2289
2384
|
} : undefined,
|
|
2290
2385
|
currentUrl,
|
|
2291
2386
|
perSiteSubDomains,
|
|
@@ -2318,6 +2413,12 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2318
2413
|
matchedDomains,
|
|
2319
2414
|
addMatchedDomain, // Pass the helper function
|
|
2320
2415
|
isDomainAlreadyDetected,
|
|
2416
|
+
onContentFetched: smartCache && !ignoreCache ? (url, content) => {
|
|
2417
|
+
// Only cache if not bypassing cache
|
|
2418
|
+
if (!shouldBypassCacheForUrl(url, siteConfig)) {
|
|
2419
|
+
smartCache.cacheRequest(url, { method: 'GET', siteConfig }, { body: content, status: 200 });
|
|
2420
|
+
}
|
|
2421
|
+
} : undefined,
|
|
2321
2422
|
currentUrl,
|
|
2322
2423
|
perSiteSubDomains,
|
|
2323
2424
|
ignoreDomains,
|
|
@@ -2356,7 +2457,14 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2356
2457
|
regexes,
|
|
2357
2458
|
matchedDomains,
|
|
2358
2459
|
addMatchedDomain, // Pass the helper function
|
|
2460
|
+
bypassCache: (url) => shouldBypassCacheForUrl(url, siteConfig),
|
|
2359
2461
|
isDomainAlreadyDetected,
|
|
2462
|
+
onContentFetched: smartCache && !ignoreCache ? (url, content) => {
|
|
2463
|
+
// Only cache if not bypassing cache
|
|
2464
|
+
if (!shouldBypassCacheForUrl(url, siteConfig)) {
|
|
2465
|
+
smartCache.cacheRequest(url, { method: 'GET', siteConfig }, { body: content, status: 200 });
|
|
2466
|
+
}
|
|
2467
|
+
} : undefined,
|
|
2360
2468
|
currentUrl,
|
|
2361
2469
|
perSiteSubDomains,
|
|
2362
2470
|
ignoreDomains,
|
|
@@ -2874,6 +2982,17 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2874
2982
|
console.log(`\n${messageColors.fileOp('š Browser restart triggered:')} ${restartReason}`);
|
|
2875
2983
|
}
|
|
2876
2984
|
|
|
2985
|
+
// NEW: Clear request cache during browser restart to ensure fresh session
|
|
2986
|
+
if (smartCache && cacheRequests) {
|
|
2987
|
+
const requestCacheStats = smartCache.getRequestCacheStats();
|
|
2988
|
+
if (requestCacheStats.enabled && requestCacheStats.size > 0) {
|
|
2989
|
+
const clearedCount = smartCache.clearRequestCache();
|
|
2990
|
+
if (forceDebug) {
|
|
2991
|
+
console.log(formatLogMessage('debug', `[SmartCache] Cleared ${clearedCount} request cache entries during browser restart`));
|
|
2992
|
+
}
|
|
2993
|
+
}
|
|
2994
|
+
}
|
|
2995
|
+
|
|
2877
2996
|
try {
|
|
2878
2997
|
await handleBrowserExit(browser, {
|
|
2879
2998
|
forceDebug,
|
|
@@ -2950,6 +3069,17 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2950
3069
|
console.log(`\n${messageColors.fileOp('š Emergency browser restart:')} Critical browser errors detected`);
|
|
2951
3070
|
}
|
|
2952
3071
|
|
|
3072
|
+
// NEW: Clear request cache during emergency restart
|
|
3073
|
+
if (smartCache && cacheRequests) {
|
|
3074
|
+
const requestCacheStats = smartCache.getRequestCacheStats();
|
|
3075
|
+
if (requestCacheStats.enabled && requestCacheStats.size > 0) {
|
|
3076
|
+
const clearedCount = smartCache.clearRequestCache();
|
|
3077
|
+
if (forceDebug) {
|
|
3078
|
+
console.log(formatLogMessage('debug', `[SmartCache] Cleared ${clearedCount} request cache entries during emergency restart`));
|
|
3079
|
+
}
|
|
3080
|
+
}
|
|
3081
|
+
}
|
|
3082
|
+
|
|
2953
3083
|
// Force browser restart immediately
|
|
2954
3084
|
try {
|
|
2955
3085
|
// Enhanced emergency restart for Puppeteer 23.x
|
|
@@ -3012,6 +3142,22 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3012
3142
|
|
|
3013
3143
|
let outputResult;
|
|
3014
3144
|
|
|
3145
|
+
// NEW: Clear request cache after processing all sites in the JSON config
|
|
3146
|
+
if (smartCache && cacheRequests) {
|
|
3147
|
+
const requestCacheStats = smartCache.getRequestCacheStats();
|
|
3148
|
+
if (requestCacheStats.enabled && requestCacheStats.size > 0) {
|
|
3149
|
+
const clearedCount = smartCache.clearRequestCache();
|
|
3150
|
+
if (!silentMode && clearedCount > 0) {
|
|
3151
|
+
console.log(`\nšļø Cleared request cache: ${clearedCount} entries after JSON processing`);
|
|
3152
|
+
}
|
|
3153
|
+
if (forceDebug) {
|
|
3154
|
+
console.log(formatLogMessage('debug',
|
|
3155
|
+
`[SmartCache] Request cache cleared after JSON scan completion (hit rate: ${requestCacheStats.hitRate})`
|
|
3156
|
+
));
|
|
3157
|
+
}
|
|
3158
|
+
}
|
|
3159
|
+
}
|
|
3160
|
+
|
|
3015
3161
|
if (!dryRunMode) {
|
|
3016
3162
|
// Handle all output using the output module
|
|
3017
3163
|
const outputConfig = {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fanboynz/network-scanner",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.76",
|
|
4
4
|
"description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
|
|
5
5
|
"main": "nwss.js",
|
|
6
6
|
"scripts": {
|