@fanboynz/network-scanner 1.0.75 → 1.0.76

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
package/nwss.js CHANGED
@@ -1,4 +1,4 @@
1
- // === Network scanner script (nwss.js) v1.0.75 ===
1
+ // === Network scanner script (nwss.js) v1.0.76 ===
2
2
 
3
3
  // puppeteer for browser automation, fs for file system operations, psl for domain parsing.
4
4
  // const pLimit = require('p-limit'); // Will be dynamically imported
@@ -123,7 +123,7 @@ const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/r
123
123
  const { monitorBrowserHealth, isBrowserHealthy } = require('./lib/browserhealth');
124
124
 
125
125
  // --- Script Configuration & Constants ---
126
- const VERSION = '1.0.75'; // Script version
126
+ const VERSION = '1.0.76'; // Script version
127
127
 
128
128
  // get startTime
129
129
  const startTime = Date.now();
@@ -187,6 +187,7 @@ const testValidation = args.includes('--test-validation');
187
187
  let cleanRules = args.includes('--clean-rules');
188
188
  const clearCache = args.includes('--clear-cache');
189
189
  const ignoreCache = args.includes('--ignore-cache');
190
+ const cacheRequests = args.includes('--cache-requests');
190
191
 
191
192
  let validateRulesFile = null;
192
193
  const validateRulesIndex = args.findIndex(arg => arg === '--validate-rules');
@@ -454,6 +455,7 @@ General Options:
454
455
  --remove-tempfiles Remove Chrome/Puppeteer temporary files before exit
455
456
 
456
457
  Validation Options:
458
+ --cache-requests Cache HTTP requests to avoid re-requesting same URLs within scan
457
459
  --validate-config Validate config.json file and exit
458
460
  --validate-rules [file] Validate rule file format (uses --output/--compare files if no file specified)
459
461
  --clean-rules [file] Clean rule files by removing invalid lines and optionally duplicates (uses --output/--compare files if no file specified)
@@ -522,6 +524,7 @@ Redirect Handling Options:
522
524
  adblock_rules: true/false Generate adblock filter rules with resource types for this site
523
525
  even_blocked: true/false Add matching rules even if requests are blocked (default: false)
524
526
 
527
+ bypass_cache: true/false Skip all caching for this site's URLs (default: false)
525
528
  referrer_headers: "url" or ["url1", "url2"] Set referrer header for realistic traffic sources
526
529
  custom_headers: {"Header": "value"} Add custom HTTP headers to requests
527
530
 
@@ -682,6 +685,7 @@ if (ignoreCache) {
682
685
  } else {
683
686
  smartCache = createSmartCache({
684
687
  ...config,
688
+ cache_requests: cacheRequests, // NEW: Pass request caching flag
685
689
  forceDebug,
686
690
  max_concurrent_sites: MAX_CONCURRENT_SITES, // Pass concurrency info
687
691
  cache_aggressive_mode: MAX_CONCURRENT_SITES > CONCURRENCY_LIMITS.HIGH_CONCURRENCY_THRESHOLD, // Auto-enable for high concurrency
@@ -692,6 +696,24 @@ smartCache = createSmartCache({
692
696
  });
693
697
  }
694
698
 
699
+ // Add safe domain processing helper after smartCache initialization
700
+ function safeMarkDomainProcessed(domain, context, metadata) {
701
+ if (smartCache) {
702
+ try {
703
+ if (typeof smartCache.markDomainProcessed === 'function') {
704
+ smartCache.markDomainProcessed(domain, context, metadata);
705
+ } else {
706
+ // Fallback: trigger cache via shouldSkipDomain
707
+ smartCache.shouldSkipDomain(domain, context);
708
+ }
709
+ } catch (cacheErr) {
710
+ if (forceDebug) {
711
+ console.log(formatLogMessage('debug', `[SmartCache] Error marking domain: ${cacheErr.message}`));
712
+ }
713
+ }
714
+ }
715
+ }
716
+
695
717
  // Handle --clean-rules after config is loaded (so we have access to sites)
696
718
  if (cleanRules || cleanRulesFile) {
697
719
  const filesToClean = cleanRulesFile ? [cleanRulesFile] : [outputFile, compareFile].filter(Boolean);
@@ -1000,6 +1022,16 @@ function shouldProcessUrl(url, forceDebug) {
1000
1022
  }
1001
1023
  }
1002
1024
 
1025
+ /**
1026
+ * Check if URL should bypass all caching for this site
1027
+ * @param {string} url - URL to check
1028
+ * @param {Object} siteConfig - Site configuration
1029
+ * @returns {boolean} True if should bypass cache
1030
+ */
1031
+ function shouldBypassCacheForUrl(url, siteConfig) {
1032
+ return siteConfig.bypass_cache === true;
1033
+ }
1034
+
1003
1035
  // ability to use widcards in ignoreDomains
1004
1036
  function matchesIgnoreDomain(domain, ignorePatterns) {
1005
1037
  return ignorePatterns.some(pattern => {
@@ -1365,6 +1397,11 @@ function setupFrameHandling(page, forceDebug) {
1365
1397
  );
1366
1398
  }
1367
1399
 
1400
+ // Log bypass_cache setting if enabled
1401
+ if (forceDebug && siteConfig.bypass_cache === true) {
1402
+ console.log(formatLogMessage('debug', `Cache bypass enabled for all URLs in site: ${currentUrl}`));
1403
+ }
1404
+
1368
1405
  if (siteConfig.firstParty === 0 && siteConfig.thirdParty === 0) {
1369
1406
  console.warn(`⚠ Skipping ${currentUrl} because both firstParty and thirdParty are disabled.`);
1370
1407
  return { url: currentUrl, rules: [], success: false, skipped: true };
@@ -1891,7 +1928,18 @@ function setupFrameHandling(page, forceDebug) {
1891
1928
 
1892
1929
  // Also mark in smart cache with context (if cache is enabled)
1893
1930
  if (smartCache) {
1894
- smartCache.markDomainProcessed(domain, context, { resourceType, fullSubdomain });
1931
+ try {
1932
+ if (smartCache.markDomainProcessed) {
1933
+ safeMarkDomainProcessed(domain, context, { resourceType, fullSubdomain });
1934
+ } else {
1935
+ // Fallback: use shouldSkipDomain to indirectly cache
1936
+ smartCache.shouldSkipDomain(domain, context);
1937
+ }
1938
+ } catch (cacheErr) {
1939
+ if (forceDebug) {
1940
+ console.log(formatLogMessage('debug', `[SmartCache] Error marking domain: ${cacheErr.message}`));
1941
+ }
1942
+ }
1895
1943
  }
1896
1944
 
1897
1945
  if (matchedDomains instanceof Map) {
@@ -2303,8 +2351,16 @@ function setupFrameHandling(page, forceDebug) {
2303
2351
 
2304
2352
  // If curl is enabled, download and analyze content immediately
2305
2353
  if (useCurl) {
2306
- // Check response cache first if smart cache is available and caching is enabled
2307
- const cachedContent = smartCache ? smartCache.getCachedResponse(reqUrl) : null;
2354
+ // Check bypass_cache before attempting cache lookup
2355
+ let cachedContent = null;
2356
+ if (!shouldBypassCacheForUrl(reqUrl, siteConfig)) {
2357
+ // Check request cache first if smart cache is available and caching is enabled
2358
+ cachedContent = smartCache ? smartCache.getCachedRequest(reqUrl, {
2359
+ method: 'GET',
2360
+ headers: { 'user-agent': curlUserAgent },
2361
+ siteConfig: siteConfig
2362
+ }) : null;
2363
+ }
2308
2364
 
2309
2365
  if (cachedContent && forceDebug) {
2310
2366
  console.log(formatLogMessage('debug', `[SmartCache] Using cached response content for ${reqUrl.substring(0, 50)}...`));
@@ -2321,7 +2377,10 @@ function setupFrameHandling(page, forceDebug) {
2321
2377
  addMatchedDomain, // Pass the helper function
2322
2378
  isDomainAlreadyDetected,
2323
2379
  onContentFetched: smartCache && !ignoreCache ? (url, content) => {
2324
- smartCache.cacheResponse(url, content);
2380
+ // Only cache if not bypassing cache
2381
+ if (!shouldBypassCacheForUrl(url, siteConfig)) {
2382
+ smartCache.cacheRequest(url, { method: 'GET', siteConfig }, { body: content, status: 200 });
2383
+ }
2325
2384
  } : undefined,
2326
2385
  currentUrl,
2327
2386
  perSiteSubDomains,
@@ -2354,6 +2413,12 @@ function setupFrameHandling(page, forceDebug) {
2354
2413
  matchedDomains,
2355
2414
  addMatchedDomain, // Pass the helper function
2356
2415
  isDomainAlreadyDetected,
2416
+ onContentFetched: smartCache && !ignoreCache ? (url, content) => {
2417
+ // Only cache if not bypassing cache
2418
+ if (!shouldBypassCacheForUrl(url, siteConfig)) {
2419
+ smartCache.cacheRequest(url, { method: 'GET', siteConfig }, { body: content, status: 200 });
2420
+ }
2421
+ } : undefined,
2357
2422
  currentUrl,
2358
2423
  perSiteSubDomains,
2359
2424
  ignoreDomains,
@@ -2392,7 +2457,14 @@ function setupFrameHandling(page, forceDebug) {
2392
2457
  regexes,
2393
2458
  matchedDomains,
2394
2459
  addMatchedDomain, // Pass the helper function
2460
+ bypassCache: (url) => shouldBypassCacheForUrl(url, siteConfig),
2395
2461
  isDomainAlreadyDetected,
2462
+ onContentFetched: smartCache && !ignoreCache ? (url, content) => {
2463
+ // Only cache if not bypassing cache
2464
+ if (!shouldBypassCacheForUrl(url, siteConfig)) {
2465
+ smartCache.cacheRequest(url, { method: 'GET', siteConfig }, { body: content, status: 200 });
2466
+ }
2467
+ } : undefined,
2396
2468
  currentUrl,
2397
2469
  perSiteSubDomains,
2398
2470
  ignoreDomains,
@@ -2910,6 +2982,17 @@ function setupFrameHandling(page, forceDebug) {
2910
2982
  console.log(`\n${messageColors.fileOp('šŸ”„ Browser restart triggered:')} ${restartReason}`);
2911
2983
  }
2912
2984
 
2985
+ // NEW: Clear request cache during browser restart to ensure fresh session
2986
+ if (smartCache && cacheRequests) {
2987
+ const requestCacheStats = smartCache.getRequestCacheStats();
2988
+ if (requestCacheStats.enabled && requestCacheStats.size > 0) {
2989
+ const clearedCount = smartCache.clearRequestCache();
2990
+ if (forceDebug) {
2991
+ console.log(formatLogMessage('debug', `[SmartCache] Cleared ${clearedCount} request cache entries during browser restart`));
2992
+ }
2993
+ }
2994
+ }
2995
+
2913
2996
  try {
2914
2997
  await handleBrowserExit(browser, {
2915
2998
  forceDebug,
@@ -2986,6 +3069,17 @@ function setupFrameHandling(page, forceDebug) {
2986
3069
  console.log(`\n${messageColors.fileOp('šŸ”„ Emergency browser restart:')} Critical browser errors detected`);
2987
3070
  }
2988
3071
 
3072
+ // NEW: Clear request cache during emergency restart
3073
+ if (smartCache && cacheRequests) {
3074
+ const requestCacheStats = smartCache.getRequestCacheStats();
3075
+ if (requestCacheStats.enabled && requestCacheStats.size > 0) {
3076
+ const clearedCount = smartCache.clearRequestCache();
3077
+ if (forceDebug) {
3078
+ console.log(formatLogMessage('debug', `[SmartCache] Cleared ${clearedCount} request cache entries during emergency restart`));
3079
+ }
3080
+ }
3081
+ }
3082
+
2989
3083
  // Force browser restart immediately
2990
3084
  try {
2991
3085
  // Enhanced emergency restart for Puppeteer 23.x
@@ -3048,6 +3142,22 @@ function setupFrameHandling(page, forceDebug) {
3048
3142
 
3049
3143
  let outputResult;
3050
3144
 
3145
+ // NEW: Clear request cache after processing all sites in the JSON config
3146
+ if (smartCache && cacheRequests) {
3147
+ const requestCacheStats = smartCache.getRequestCacheStats();
3148
+ if (requestCacheStats.enabled && requestCacheStats.size > 0) {
3149
+ const clearedCount = smartCache.clearRequestCache();
3150
+ if (!silentMode && clearedCount > 0) {
3151
+ console.log(`\nšŸ—‘ļø Cleared request cache: ${clearedCount} entries after JSON processing`);
3152
+ }
3153
+ if (forceDebug) {
3154
+ console.log(formatLogMessage('debug',
3155
+ `[SmartCache] Request cache cleared after JSON scan completion (hit rate: ${requestCacheStats.hitRate})`
3156
+ ));
3157
+ }
3158
+ }
3159
+ }
3160
+
3051
3161
  if (!dryRunMode) {
3052
3162
  // Handle all output using the output module
3053
3163
  const outputConfig = {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fanboynz/network-scanner",
3
- "version": "1.0.75",
3
+ "version": "1.0.76",
4
4
  "description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
5
5
  "main": "nwss.js",
6
6
  "scripts": {