@fanboynz/network-scanner 1.0.74 → 1.0.76

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
package/nwss.js CHANGED
@@ -1,4 +1,4 @@
1
- // === Network scanner script (nwss.js) v1.0.74 ===
1
+ // === Network scanner script (nwss.js) v1.0.76 ===
2
2
 
3
3
  // puppeteer for browser automation, fs for file system operations, psl for domain parsing.
4
4
  // const pLimit = require('p-limit'); // Will be dynamically imported
@@ -123,7 +123,7 @@ const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/r
123
123
  const { monitorBrowserHealth, isBrowserHealthy } = require('./lib/browserhealth');
124
124
 
125
125
  // --- Script Configuration & Constants ---
126
- const VERSION = '1.0.74'; // Script version
126
+ const VERSION = '1.0.76'; // Script version
127
127
 
128
128
  // get startTime
129
129
  const startTime = Date.now();
@@ -187,6 +187,7 @@ const testValidation = args.includes('--test-validation');
187
187
  let cleanRules = args.includes('--clean-rules');
188
188
  const clearCache = args.includes('--clear-cache');
189
189
  const ignoreCache = args.includes('--ignore-cache');
190
+ const cacheRequests = args.includes('--cache-requests');
190
191
 
191
192
  let validateRulesFile = null;
192
193
  const validateRulesIndex = args.findIndex(arg => arg === '--validate-rules');
@@ -454,6 +455,7 @@ General Options:
454
455
  --remove-tempfiles Remove Chrome/Puppeteer temporary files before exit
455
456
 
456
457
  Validation Options:
458
+ --cache-requests Cache HTTP requests to avoid re-requesting same URLs within scan
457
459
  --validate-config Validate config.json file and exit
458
460
  --validate-rules [file] Validate rule file format (uses --output/--compare files if no file specified)
459
461
  --clean-rules [file] Clean rule files by removing invalid lines and optionally duplicates (uses --output/--compare files if no file specified)
@@ -522,6 +524,7 @@ Redirect Handling Options:
522
524
  adblock_rules: true/false Generate adblock filter rules with resource types for this site
523
525
  even_blocked: true/false Add matching rules even if requests are blocked (default: false)
524
526
 
527
+ bypass_cache: true/false Skip all caching for this site's URLs (default: false)
525
528
  referrer_headers: "url" or ["url1", "url2"] Set referrer header for realistic traffic sources
526
529
  custom_headers: {"Header": "value"} Add custom HTTP headers to requests
527
530
 
@@ -682,6 +685,7 @@ if (ignoreCache) {
682
685
  } else {
683
686
  smartCache = createSmartCache({
684
687
  ...config,
688
+ cache_requests: cacheRequests, // NEW: Pass request caching flag
685
689
  forceDebug,
686
690
  max_concurrent_sites: MAX_CONCURRENT_SITES, // Pass concurrency info
687
691
  cache_aggressive_mode: MAX_CONCURRENT_SITES > CONCURRENCY_LIMITS.HIGH_CONCURRENCY_THRESHOLD, // Auto-enable for high concurrency
@@ -692,6 +696,24 @@ smartCache = createSmartCache({
692
696
  });
693
697
  }
694
698
 
699
+ // Add safe domain processing helper after smartCache initialization
700
+ function safeMarkDomainProcessed(domain, context, metadata) {
701
+ if (smartCache) {
702
+ try {
703
+ if (typeof smartCache.markDomainProcessed === 'function') {
704
+ smartCache.markDomainProcessed(domain, context, metadata);
705
+ } else {
706
+ // Fallback: trigger cache via shouldSkipDomain
707
+ smartCache.shouldSkipDomain(domain, context);
708
+ }
709
+ } catch (cacheErr) {
710
+ if (forceDebug) {
711
+ console.log(formatLogMessage('debug', `[SmartCache] Error marking domain: ${cacheErr.message}`));
712
+ }
713
+ }
714
+ }
715
+ }
716
+
695
717
  // Handle --clean-rules after config is loaded (so we have access to sites)
696
718
  if (cleanRules || cleanRulesFile) {
697
719
  const filesToClean = cleanRulesFile ? [cleanRulesFile] : [outputFile, compareFile].filter(Boolean);
@@ -1000,6 +1022,16 @@ function shouldProcessUrl(url, forceDebug) {
1000
1022
  }
1001
1023
  }
1002
1024
 
1025
+ /**
1026
+ * Check if URL should bypass all caching for this site
1027
+ * @param {string} url - URL to check
1028
+ * @param {Object} siteConfig - Site configuration
1029
+ * @returns {boolean} True if should bypass cache
1030
+ */
1031
+ function shouldBypassCacheForUrl(url, siteConfig) {
1032
+ return siteConfig.bypass_cache === true;
1033
+ }
1034
+
1003
1035
  // ability to use widcards in ignoreDomains
1004
1036
  function matchesIgnoreDomain(domain, ignorePatterns) {
1005
1037
  return ignorePatterns.some(pattern => {
@@ -1365,6 +1397,11 @@ function setupFrameHandling(page, forceDebug) {
1365
1397
  );
1366
1398
  }
1367
1399
 
1400
+ // Log bypass_cache setting if enabled
1401
+ if (forceDebug && siteConfig.bypass_cache === true) {
1402
+ console.log(formatLogMessage('debug', `Cache bypass enabled for all URLs in site: ${currentUrl}`));
1403
+ }
1404
+
1368
1405
  if (siteConfig.firstParty === 0 && siteConfig.thirdParty === 0) {
1369
1406
  console.warn(`⚠ Skipping ${currentUrl} because both firstParty and thirdParty are disabled.`);
1370
1407
  return { url: currentUrl, rules: [], success: false, skipped: true };
@@ -1891,7 +1928,18 @@ function setupFrameHandling(page, forceDebug) {
1891
1928
 
1892
1929
  // Also mark in smart cache with context (if cache is enabled)
1893
1930
  if (smartCache) {
1894
- smartCache.markDomainProcessed(domain, context, { resourceType, fullSubdomain });
1931
+ try {
1932
+ if (smartCache.markDomainProcessed) {
1933
+ safeMarkDomainProcessed(domain, context, { resourceType, fullSubdomain });
1934
+ } else {
1935
+ // Fallback: use shouldSkipDomain to indirectly cache
1936
+ smartCache.shouldSkipDomain(domain, context);
1937
+ }
1938
+ } catch (cacheErr) {
1939
+ if (forceDebug) {
1940
+ console.log(formatLogMessage('debug', `[SmartCache] Error marking domain: ${cacheErr.message}`));
1941
+ }
1942
+ }
1895
1943
  }
1896
1944
 
1897
1945
  if (matchedDomains instanceof Map) {
@@ -2114,6 +2162,42 @@ function setupFrameHandling(page, forceDebug) {
2114
2162
 
2115
2163
  // Check if nettools validation is required - if so, NEVER add domains immediately
2116
2164
  if (hasNetTools) {
2165
+ // Call nettools handler BEFORE exiting
2166
+ if (hasNetTools && !hasSearchString && !hasSearchStringAnd) {
2167
+ // Create and execute nettools handler
2168
+ const netToolsHandler = createNetToolsHandler({
2169
+ whoisTerms,
2170
+ whoisOrTerms,
2171
+ whoisDelay: siteConfig.whois_delay || whois_delay,
2172
+ whoisServer,
2173
+ whoisServerMode: siteConfig.whois_server_mode || whois_server_mode,
2174
+ debugLogFile,
2175
+ fs,
2176
+ digTerms,
2177
+ digOrTerms,
2178
+ digRecordType,
2179
+ digSubdomain: siteConfig.dig_subdomain === true,
2180
+ dryRunCallback: dryRunMode ? createEnhancedDryRunCallback(matchedDomains, forceDebug) : null,
2181
+ matchedDomains,
2182
+ addMatchedDomain,
2183
+ isDomainAlreadyDetected,
2184
+ onWhoisResult: smartCache ? (domain, result) => smartCache.cacheNetTools(domain, 'whois', result) : undefined,
2185
+ onDigResult: smartCache ? (domain, result, recordType) => smartCache.cacheNetTools(domain, 'dig', result, recordType) : undefined,
2186
+ cachedWhois: smartCache ? smartCache.getCachedNetTools(reqDomain, 'whois') : null,
2187
+ cachedDig: smartCache ? smartCache.getCachedNetTools(reqDomain, 'dig', digRecordType) : null,
2188
+ currentUrl,
2189
+ getRootDomain,
2190
+ siteConfig,
2191
+ dumpUrls,
2192
+ matchedUrlsLogFile,
2193
+ forceDebug,
2194
+ fs
2195
+ });
2196
+
2197
+ // Execute nettools check asynchronously
2198
+ const originalDomain = fullSubdomain;
2199
+ setImmediate(() => netToolsHandler(reqDomain, originalDomain));
2200
+ }
2117
2201
  if (forceDebug) {
2118
2202
  console.log(formatLogMessage('debug', `${reqUrl} has nettools validation required - skipping immediate add`));
2119
2203
  }
@@ -2267,8 +2351,16 @@ function setupFrameHandling(page, forceDebug) {
2267
2351
 
2268
2352
  // If curl is enabled, download and analyze content immediately
2269
2353
  if (useCurl) {
2270
- // Check response cache first if smart cache is available and caching is enabled
2271
- const cachedContent = smartCache ? smartCache.getCachedResponse(reqUrl) : null;
2354
+ // Check bypass_cache before attempting cache lookup
2355
+ let cachedContent = null;
2356
+ if (!shouldBypassCacheForUrl(reqUrl, siteConfig)) {
2357
+ // Check request cache first if smart cache is available and caching is enabled
2358
+ cachedContent = smartCache ? smartCache.getCachedRequest(reqUrl, {
2359
+ method: 'GET',
2360
+ headers: { 'user-agent': curlUserAgent },
2361
+ siteConfig: siteConfig
2362
+ }) : null;
2363
+ }
2272
2364
 
2273
2365
  if (cachedContent && forceDebug) {
2274
2366
  console.log(formatLogMessage('debug', `[SmartCache] Using cached response content for ${reqUrl.substring(0, 50)}...`));
@@ -2285,7 +2377,10 @@ function setupFrameHandling(page, forceDebug) {
2285
2377
  addMatchedDomain, // Pass the helper function
2286
2378
  isDomainAlreadyDetected,
2287
2379
  onContentFetched: smartCache && !ignoreCache ? (url, content) => {
2288
- smartCache.cacheResponse(url, content);
2380
+ // Only cache if not bypassing cache
2381
+ if (!shouldBypassCacheForUrl(url, siteConfig)) {
2382
+ smartCache.cacheRequest(url, { method: 'GET', siteConfig }, { body: content, status: 200 });
2383
+ }
2289
2384
  } : undefined,
2290
2385
  currentUrl,
2291
2386
  perSiteSubDomains,
@@ -2318,6 +2413,12 @@ function setupFrameHandling(page, forceDebug) {
2318
2413
  matchedDomains,
2319
2414
  addMatchedDomain, // Pass the helper function
2320
2415
  isDomainAlreadyDetected,
2416
+ onContentFetched: smartCache && !ignoreCache ? (url, content) => {
2417
+ // Only cache if not bypassing cache
2418
+ if (!shouldBypassCacheForUrl(url, siteConfig)) {
2419
+ smartCache.cacheRequest(url, { method: 'GET', siteConfig }, { body: content, status: 200 });
2420
+ }
2421
+ } : undefined,
2321
2422
  currentUrl,
2322
2423
  perSiteSubDomains,
2323
2424
  ignoreDomains,
@@ -2356,7 +2457,14 @@ function setupFrameHandling(page, forceDebug) {
2356
2457
  regexes,
2357
2458
  matchedDomains,
2358
2459
  addMatchedDomain, // Pass the helper function
2460
+ bypassCache: (url) => shouldBypassCacheForUrl(url, siteConfig),
2359
2461
  isDomainAlreadyDetected,
2462
+ onContentFetched: smartCache && !ignoreCache ? (url, content) => {
2463
+ // Only cache if not bypassing cache
2464
+ if (!shouldBypassCacheForUrl(url, siteConfig)) {
2465
+ smartCache.cacheRequest(url, { method: 'GET', siteConfig }, { body: content, status: 200 });
2466
+ }
2467
+ } : undefined,
2360
2468
  currentUrl,
2361
2469
  perSiteSubDomains,
2362
2470
  ignoreDomains,
@@ -2874,6 +2982,17 @@ function setupFrameHandling(page, forceDebug) {
2874
2982
  console.log(`\n${messageColors.fileOp('šŸ”„ Browser restart triggered:')} ${restartReason}`);
2875
2983
  }
2876
2984
 
2985
+ // NEW: Clear request cache during browser restart to ensure fresh session
2986
+ if (smartCache && cacheRequests) {
2987
+ const requestCacheStats = smartCache.getRequestCacheStats();
2988
+ if (requestCacheStats.enabled && requestCacheStats.size > 0) {
2989
+ const clearedCount = smartCache.clearRequestCache();
2990
+ if (forceDebug) {
2991
+ console.log(formatLogMessage('debug', `[SmartCache] Cleared ${clearedCount} request cache entries during browser restart`));
2992
+ }
2993
+ }
2994
+ }
2995
+
2877
2996
  try {
2878
2997
  await handleBrowserExit(browser, {
2879
2998
  forceDebug,
@@ -2950,6 +3069,17 @@ function setupFrameHandling(page, forceDebug) {
2950
3069
  console.log(`\n${messageColors.fileOp('šŸ”„ Emergency browser restart:')} Critical browser errors detected`);
2951
3070
  }
2952
3071
 
3072
+ // NEW: Clear request cache during emergency restart
3073
+ if (smartCache && cacheRequests) {
3074
+ const requestCacheStats = smartCache.getRequestCacheStats();
3075
+ if (requestCacheStats.enabled && requestCacheStats.size > 0) {
3076
+ const clearedCount = smartCache.clearRequestCache();
3077
+ if (forceDebug) {
3078
+ console.log(formatLogMessage('debug', `[SmartCache] Cleared ${clearedCount} request cache entries during emergency restart`));
3079
+ }
3080
+ }
3081
+ }
3082
+
2953
3083
  // Force browser restart immediately
2954
3084
  try {
2955
3085
  // Enhanced emergency restart for Puppeteer 23.x
@@ -3012,6 +3142,22 @@ function setupFrameHandling(page, forceDebug) {
3012
3142
 
3013
3143
  let outputResult;
3014
3144
 
3145
+ // NEW: Clear request cache after processing all sites in the JSON config
3146
+ if (smartCache && cacheRequests) {
3147
+ const requestCacheStats = smartCache.getRequestCacheStats();
3148
+ if (requestCacheStats.enabled && requestCacheStats.size > 0) {
3149
+ const clearedCount = smartCache.clearRequestCache();
3150
+ if (!silentMode && clearedCount > 0) {
3151
+ console.log(`\nšŸ—‘ļø Cleared request cache: ${clearedCount} entries after JSON processing`);
3152
+ }
3153
+ if (forceDebug) {
3154
+ console.log(formatLogMessage('debug',
3155
+ `[SmartCache] Request cache cleared after JSON scan completion (hit rate: ${requestCacheStats.hitRate})`
3156
+ ));
3157
+ }
3158
+ }
3159
+ }
3160
+
3015
3161
  if (!dryRunMode) {
3016
3162
  // Handle all output using the output module
3017
3163
  const outputConfig = {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fanboynz/network-scanner",
3
- "version": "1.0.74",
3
+ "version": "1.0.76",
4
4
  "description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
5
5
  "main": "nwss.js",
6
6
  "scripts": {