@fanboynz/network-scanner 2.0.33 → 2.0.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/nwss.js +107 -68
  2. package/package.json +1 -1
package/nwss.js CHANGED
@@ -210,7 +210,7 @@ const dryRunMode = args.includes('--dry-run');
210
210
  const compressLogs = args.includes('--compress-logs');
211
211
  const removeTempFiles = args.includes('--remove-tempfiles');
212
212
  const validateConfig = args.includes('--validate-config');
213
- const validateRules = args.includes('--validate-rules');
213
+ let validateRules = args.includes('--validate-rules');
214
214
  const testValidation = args.includes('--test-validation');
215
215
  let cleanRules = args.includes('--clean-rules');
216
216
  const clearCache = args.includes('--clear-cache');
@@ -694,6 +694,11 @@ const {
694
694
  ...otherGlobalConfig
695
695
  } = config;
696
696
 
697
+ // Pre-compile global blocked regexes ONCE (used in every processUrl call)
698
+ const globalBlockedRegexes = Array.isArray(globalBlocked)
699
+ ? globalBlocked.map(pattern => new RegExp(pattern))
700
+ : [];
701
+
697
702
  // Apply global configuration overrides with validation
698
703
  // Priority: Command line args > config.json > defaults
699
704
  const MAX_CONCURRENT_SITES = (() => {
@@ -943,6 +948,39 @@ if (dumpUrls) {
943
948
  }
944
949
  }
945
950
 
951
+ // --- Buffered Log Writer ---
952
+ // Avoids blocking I/O on every intercepted request in debug/dumpurls mode
953
+ const _logBuffers = new Map(); // filePath -> string[]
954
+ const LOG_FLUSH_INTERVAL = 2000; // Flush every 2 seconds
955
+ let _logFlushTimer = null;
956
+
957
+ function bufferedLogWrite(filePath, entry) {
958
+ if (!filePath) return;
959
+ if (!_logBuffers.has(filePath)) {
960
+ _logBuffers.set(filePath, []);
961
+ }
962
+ _logBuffers.get(filePath).push(entry);
963
+ }
964
+
965
+ function flushLogBuffers() {
966
+ for (const [filePath, entries] of _logBuffers) {
967
+ if (entries.length > 0) {
968
+ try {
969
+ fs.appendFileSync(filePath, entries.join(''));
970
+ } catch (err) {
971
+ console.warn(formatLogMessage('warn', `Failed to flush log buffer to ${filePath}: ${err.message}`));
972
+ }
973
+ entries.length = 0; // Clear buffer
974
+ }
975
+ }
976
+ }
977
+
978
+ // Start periodic flush if any logging is enabled
979
+ if (forceDebug || dumpUrls) {
980
+ _logFlushTimer = setInterval(flushLogBuffers, LOG_FLUSH_INTERVAL);
981
+ _logFlushTimer.unref(); // Don't keep process alive just for flushing
982
+ }
983
+
946
984
  // Log comments if debug mode is enabled and comments exist
947
985
  if (forceDebug && globalComments) {
948
986
  const commentList = Array.isArray(globalComments) ? globalComments : [globalComments];
@@ -1047,15 +1085,21 @@ function shouldBypassCacheForUrl(url, siteConfig) {
1047
1085
  return siteConfig.bypass_cache === true;
1048
1086
  }
1049
1087
 
1050
- // ability to use widcards in ignoreDomains
1088
+ // ability to use wildcards in ignoreDomains
1089
+ // Cache compiled wildcard regexes to avoid recompilation on every request
1090
+ const _wildcardRegexCache = new Map();
1051
1091
  function matchesIgnoreDomain(domain, ignorePatterns) {
1052
1092
  return ignorePatterns.some(pattern => {
1053
1093
  if (pattern.includes('*')) {
1054
- // Convert wildcard pattern to regex
1055
- const regexPattern = pattern
1056
- .replace(/\./g, '\\.') // Escape dots
1057
- .replace(/\*/g, '.*'); // Convert * to .*
1058
- return new RegExp(`^${regexPattern}$`).test(domain);
1094
+ let compiled = _wildcardRegexCache.get(pattern);
1095
+ if (!compiled) {
1096
+ const regexPattern = pattern
1097
+ .replace(/[.*+?^${}()|[\]\\]/g, '\\$&') // Escape all regex specials including *
1098
+ .replace(/\\\*/g, '.*'); // Convert escaped \* back to .*
1099
+ compiled = new RegExp(`^${regexPattern}$`);
1100
+ _wildcardRegexCache.set(pattern, compiled);
1101
+ }
1102
+ return compiled.test(domain);
1059
1103
  }
1060
1104
  return domain.endsWith(pattern);
1061
1105
  });
@@ -1065,14 +1109,6 @@ function setupFrameHandling(page, forceDebug) {
1065
1109
  // Track active frames and clear on navigation to prevent detached frame access
1066
1110
  let activeFrames = new Set(); // Use Set to track frame references
1067
1111
 
1068
- // Clear frame tracking on navigation to prevent stale references
1069
- page.on('framenavigated', (frame) => {
1070
- if (frame === page.mainFrame()) {
1071
- // Main frame navigated - clear all tracked frames
1072
- activeFrames.clear();
1073
- }
1074
- });
1075
-
1076
1112
  // Handle frame creation with error suppression
1077
1113
  page.on('frameattached', async (frame) => {
1078
1114
  // Enhanced frame handling with detached frame protection
@@ -1182,10 +1218,16 @@ function setupFrameHandling(page, forceDebug) {
1182
1218
  }
1183
1219
  }
1184
1220
  });
1185
- // Handle frame navigations (keep this for monitoring)
1221
+ // Handle frame navigations - clear stale tracking and monitor activity
1186
1222
  page.on('framenavigated', (frame) => {
1187
1223
 
1188
- // Skip if frame is not in our active set
1224
+ // Main frame navigated - clear all tracked frames to prevent stale references
1225
+ if (frame === page.mainFrame()) {
1226
+ activeFrames.clear();
1227
+ return;
1228
+ }
1229
+
1230
+ // Skip child frames not in our active set
1189
1231
  if (!activeFrames.has(frame)) return;
1190
1232
 
1191
1233
  let frameUrl;
@@ -1311,7 +1353,6 @@ function setupFrameHandling(page, forceDebug) {
1311
1353
  '--disable-client-side-phishing-detection',
1312
1354
  '--enable-features=NetworkService',
1313
1355
  // Disk space controls - 50MB cache limits
1314
- '--disable-features=VizDisplayCompositor',
1315
1356
  `--disk-cache-size=${CACHE_LIMITS.DISK_CACHE_SIZE}`, // 50MB disk cache
1316
1357
  `--media-cache-size=${CACHE_LIMITS.MEDIA_CACHE_SIZE}`, // 50MB media cache
1317
1358
  '--disable-application-cache',
@@ -1324,7 +1365,7 @@ function setupFrameHandling(page, forceDebug) {
1324
1365
  '--disable-ipc-flooding-protection',
1325
1366
  '--aggressive-cache-discard',
1326
1367
  '--memory-pressure-off',
1327
- '--max_old_space_size=2048',
1368
+ '--max_old_space_size=2048', // V8 heap limit
1328
1369
  '--disable-prompt-on-repost', // Fixes form popup on page reload
1329
1370
  '--disable-background-networking',
1330
1371
  '--no-sandbox',
@@ -1339,7 +1380,6 @@ function setupFrameHandling(page, forceDebug) {
1339
1380
  '--disable-extensions',
1340
1381
  '--no-default-browser-check',
1341
1382
  '--safebrowsing-disable-auto-update',
1342
- '--max_old_space_size=1024',
1343
1383
  '--ignore-ssl-errors',
1344
1384
  '--ignore-certificate-errors',
1345
1385
  '--ignore-certificate-errors-spki-list',
@@ -1401,12 +1441,16 @@ function setupFrameHandling(page, forceDebug) {
1401
1441
  // Set up cleanup on process termination
1402
1442
  process.on('SIGINT', async () => {
1403
1443
  if (forceDebug) console.log(formatLogMessage('debug', 'SIGINT received, performing cleanup...'));
1444
+ flushLogBuffers();
1445
+ if (_logFlushTimer) clearInterval(_logFlushTimer);
1404
1446
  await performEmergencyCleanup();
1405
1447
  process.exit(0);
1406
1448
  });
1407
1449
 
1408
1450
  process.on('SIGTERM', async () => {
1409
1451
  if (forceDebug) console.log(formatLogMessage('debug', 'SIGTERM received, performing cleanup...'));
1452
+ flushLogBuffers();
1453
+ if (_logFlushTimer) clearInterval(_logFlushTimer);
1410
1454
  await performEmergencyCleanup();
1411
1455
  process.exit(0);
1412
1456
  });
@@ -2197,11 +2241,10 @@ function setupFrameHandling(page, forceDebug) {
2197
2241
  ? siteConfig.blocked.map(pattern => new RegExp(pattern))
2198
2242
  : [];
2199
2243
 
2200
- // Add global blocked patterns
2201
- const globalBlockedRegexes = Array.isArray(globalBlocked)
2202
- ? globalBlocked.map(pattern => new RegExp(pattern))
2203
- : [];
2204
- const allBlockedRegexes = [...blockedRegexes, ...globalBlockedRegexes];
2244
+ // Combine site-specific with pre-compiled global blocked patterns
2245
+ const allBlockedRegexes = blockedRegexes.length > 0
2246
+ ? [...blockedRegexes, ...globalBlockedRegexes]
2247
+ : globalBlockedRegexes; // Avoid spread when no site-specific patterns
2205
2248
 
2206
2249
  /**
2207
2250
  * Helper function to add domain to matched collection
@@ -2331,12 +2374,14 @@ function setupFrameHandling(page, forceDebug) {
2331
2374
  // - URL matching against blocklists (`blockedRegexes`).
2332
2375
  // - URL matching against filter patterns (`regexes`) for domain extraction.
2333
2376
  // - Global `ignoreDomains` list.
2377
+ // Pre-compute values that are constant for this URL
2378
+ const simplifiedCurrentUrl = getRootDomain(currentUrl);
2379
+
2334
2380
  page.on('request', request => {
2335
2381
  const checkedUrl = request.url();
2336
- const checkedHostname = safeGetDomain(checkedUrl, true);
2337
- const checkedRootDomain = safeGetDomain(checkedUrl, false); // Root domain for first-party detection
2382
+ const fullSubdomain = safeGetDomain(checkedUrl, true); // Full hostname for cache
2383
+ const checkedRootDomain = safeGetDomain(checkedUrl, false);
2338
2384
  // Check against ALL first-party domains (original + all redirects)
2339
- // This prevents redirect destinations from being marked as third-party
2340
2385
  const isFirstParty = checkedRootDomain && firstPartyDomains.has(checkedRootDomain);
2341
2386
 
2342
2387
  // Block infinite iframe loops - safely access frame URL
@@ -2349,9 +2394,9 @@ function setupFrameHandling(page, forceDebug) {
2349
2394
  }
2350
2395
  })();
2351
2396
  if (frameUrl && frameUrl.includes('creative.dmzjmp.com') &&
2352
- request.url().includes('go.dmzjmp.com/api/models')) {
2397
+ checkedUrl.includes('go.dmzjmp.com/api/models')) {
2353
2398
  if (forceDebug) {
2354
- console.log(formatLogMessage('debug', `Blocking potential infinite iframe loop: ${request.url()}`));
2399
+ console.log(formatLogMessage('debug', `Blocking potential infinite iframe loop: ${checkedUrl}`));
2355
2400
  }
2356
2401
  request.abort();
2357
2402
  return;
@@ -2359,19 +2404,19 @@ function setupFrameHandling(page, forceDebug) {
2359
2404
 
2360
2405
  // Enhanced debug logging to show which frame the request came from
2361
2406
  if (forceDebug) {
2362
- let frameUrl = 'unknown-frame';
2407
+ let debugFrameUrl = 'unknown-frame';
2363
2408
  let isMainFrame = false;
2364
2409
 
2365
2410
  try {
2366
2411
  const frame = request.frame();
2367
2412
  if (frame) {
2368
- frameUrl = frame.url();
2413
+ debugFrameUrl = frame.url();
2369
2414
  isMainFrame = frame === page.mainFrame();
2370
2415
  }
2371
2416
  } catch (frameErr) {
2372
- frameUrl = 'detached-frame';
2417
+ debugFrameUrl = 'detached-frame';
2373
2418
  }
2374
- console.log(formatLogMessage('debug', `${messageColors.highlight('[req]')}[frame: ${isMainFrame ? 'main' : 'iframe'}] ${frameUrl} → ${request.url()}`));
2419
+ console.log(formatLogMessage('debug', `${messageColors.highlight('[req]')}[frame: ${isMainFrame ? 'main' : 'iframe'}] ${debugFrameUrl} → ${checkedUrl}`));
2375
2420
  }
2376
2421
 
2377
2422
  // Apply adblock rules BEFORE expensive regex checks for better performance
@@ -2397,46 +2442,36 @@ function setupFrameHandling(page, forceDebug) {
2397
2442
 
2398
2443
  // Show --debug output and the url while its scanning
2399
2444
  if (forceDebug) {
2400
- const simplifiedUrl = getRootDomain(currentUrl);
2401
2445
  const timestamp = new Date().toISOString();
2402
- const logEntry = `${timestamp} [debug req][${simplifiedUrl}] ${request.url()}`;
2446
+ const logEntry = `${timestamp} [debug req][${simplifiedCurrentUrl}] ${checkedUrl}\n`;
2403
2447
 
2404
2448
  // Output to console
2405
- console.log(formatLogMessage('debug', `${messageColors.highlight('[req]')}[${simplifiedUrl}] ${request.url()}`));
2449
+ console.log(formatLogMessage('debug', `${messageColors.highlight('[req]')}[${simplifiedCurrentUrl}] ${checkedUrl}`));
2406
2450
 
2407
- // Output to file
2408
- if (debugLogFile) {
2409
- try {
2410
- fs.appendFileSync(debugLogFile, logEntry + '\n');
2411
- } catch (logErr) {
2412
- console.warn(formatLogMessage('warn', `Failed to write to debug log file: ${logErr.message}`));
2413
- }
2414
- }
2451
+ // Output to file (buffered)
2452
+ bufferedLogWrite(debugLogFile, logEntry);
2415
2453
  }
2416
- const reqUrl = request.url();
2454
+ const reqUrl = checkedUrl;
2417
2455
 
2418
- // ALWAYS extract the FULL subdomain for cache checking to preserve unique subdomains
2419
- const fullSubdomain = safeGetDomain(reqUrl, true); // Always get full subdomain for cache
2420
2456
  const reqDomain = safeGetDomain(reqUrl, perSiteSubDomains); // Output domain based on config
2421
2457
 
2422
2458
  if (allBlockedRegexes.some(re => re.test(reqUrl))) {
2423
2459
  if (forceDebug) {
2424
- // Find which specific pattern matched for debug logging
2425
- const allPatterns = [...(siteConfig.blocked || []), ...globalBlocked];
2426
- const matchedPattern = allPatterns.find(pattern => new RegExp(pattern).test(reqUrl));
2427
- const patternSource = siteConfig.blocked && siteConfig.blocked.includes(matchedPattern) ? 'site' : 'global';
2428
- const simplifiedUrl = getRootDomain(currentUrl);
2429
- console.log(formatLogMessage('debug', `${messageColors.blocked('[blocked]')}[${simplifiedUrl}] ${reqUrl} blocked by ${patternSource} pattern: ${matchedPattern}`));
2430
-
2431
- // Also log to file if debug logging is enabled
2432
- if (debugLogFile) {
2433
- try {
2434
- const timestamp = new Date().toISOString();
2435
- fs.appendFileSync(debugLogFile, `${timestamp} [blocked][${simplifiedUrl}] ${reqUrl} (${patternSource} pattern: ${matchedPattern})\n`);
2436
- } catch (logErr) {
2437
- console.warn(formatLogMessage('warn', `Failed to write blocked domain to debug log: ${logErr.message}`));
2460
+ // Find which specific pattern matched using already-compiled regexes
2461
+ let matchedPattern = '(unknown)';
2462
+ let patternSource = 'global';
2463
+ for (let i = 0; i < allBlockedRegexes.length; i++) {
2464
+ if (allBlockedRegexes[i].test(reqUrl)) {
2465
+ matchedPattern = allBlockedRegexes[i].source;
2466
+ patternSource = i < blockedRegexes.length ? 'site' : 'global';
2467
+ break;
2438
2468
  }
2439
2469
  }
2470
+ console.log(formatLogMessage('debug', `${messageColors.blocked('[blocked]')}[${simplifiedCurrentUrl}] ${reqUrl} blocked by ${patternSource} pattern: ${matchedPattern}`));
2471
+
2472
+ // Also log to file (buffered)
2473
+ const timestamp = new Date().toISOString();
2474
+ bufferedLogWrite(debugLogFile, `${timestamp} [blocked][${simplifiedCurrentUrl}] ${reqUrl} (${patternSource} pattern: ${matchedPattern})\n`);
2440
2475
  }
2441
2476
 
2442
2477
  // NEW: Check if even_blocked is enabled and this URL matches filter regex
@@ -2463,15 +2498,14 @@ function setupFrameHandling(page, forceDebug) {
2463
2498
  addMatchedDomain(reqDomain, resourceType, fullSubdomain);
2464
2499
  }
2465
2500
 
2466
- const simplifiedUrl = getRootDomain(currentUrl);
2467
2501
  if (siteConfig.verbose === 1) {
2468
2502
  const resourceInfo = (adblockRulesMode || siteConfig.adblock_rules) ? ` (${resourceType})` : '';
2469
- console.log(formatLogMessage('match', `[${simplifiedUrl}] ${reqUrl} matched regex: ${matchedRegexPattern} and resourceType: ${resourceType}${resourceInfo}`));
2503
+ console.log(formatLogMessage('match', `[${simplifiedCurrentUrl}] ${reqUrl} matched regex: ${matchedRegexPattern} and resourceType: ${resourceType}${resourceInfo}`));
2470
2504
  }
2471
2505
  if (dumpUrls) {
2472
2506
  const timestamp = new Date().toISOString();
2473
2507
  const resourceInfo = (adblockRulesMode || siteConfig.adblock_rules) ? ` (${resourceType})` : '';
2474
- fs.appendFileSync(matchedUrlsLogFile, `${timestamp} [match][${simplifiedUrl}] ${reqUrl} (resourceType: ${resourceType})${resourceInfo} [BLOCKED BUT ADDED]\n`);
2508
+ bufferedLogWrite(matchedUrlsLogFile, `${timestamp} [match][${simplifiedCurrentUrl}] ${reqUrl} (resourceType: ${resourceType})${resourceInfo} [BLOCKED BUT ADDED]\n`);
2475
2509
  }
2476
2510
  break; // Only match once per URL
2477
2511
  }
@@ -2627,15 +2661,14 @@ function setupFrameHandling(page, forceDebug) {
2627
2661
  } else {
2628
2662
  addMatchedDomain(reqDomain, resourceType);
2629
2663
  }
2630
- const simplifiedUrl = getRootDomain(currentUrl);
2631
2664
  if (siteConfig.verbose === 1) {
2632
2665
  const resourceInfo = (adblockRulesMode || siteConfig.adblock_rules) ? ` (${resourceType})` : '';
2633
- console.log(formatLogMessage('match', `[${simplifiedUrl}] ${reqUrl} matched regex: ${matchedRegexPattern} and resourceType: ${resourceType}${resourceInfo}`));
2666
+ console.log(formatLogMessage('match', `[${simplifiedCurrentUrl}] ${reqUrl} matched regex: ${matchedRegexPattern} and resourceType: ${resourceType}${resourceInfo}`));
2634
2667
  }
2635
2668
  if (dumpUrls) {
2636
2669
  const timestamp = new Date().toISOString();
2637
2670
  const resourceInfo = (adblockRulesMode || siteConfig.adblock_rules) ? ` (${resourceType})` : '';
2638
- fs.appendFileSync(matchedUrlsLogFile, `${timestamp} [match][${simplifiedUrl}] ${reqUrl} (resourceType: ${resourceType})${resourceInfo}\n`);
2671
+ bufferedLogWrite(matchedUrlsLogFile, `${timestamp} [match][${simplifiedCurrentUrl}] ${reqUrl} (resourceType: ${resourceType})${resourceInfo}\n`);
2639
2672
  }
2640
2673
  } else if (hasNetTools && !hasSearchString && !hasSearchStringAnd) {
2641
2674
  // If nettools are configured (whois/dig), perform checks on the domain
@@ -4119,6 +4152,12 @@ function setupFrameHandling(page, forceDebug) {
4119
4152
  }
4120
4153
  }
4121
4154
 
4155
+ // Flush any remaining buffered log entries before compression/exit
4156
+ flushLogBuffers();
4157
+ if (_logFlushTimer) {
4158
+ clearInterval(_logFlushTimer);
4159
+ }
4160
+
4122
4161
  // Compress log files if --compress-logs is enabled
4123
4162
  if (compressLogs && dumpUrls && !dryRunMode) {
4124
4163
  // Collect all existing log files for compression
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fanboynz/network-scanner",
3
- "version": "2.0.33",
3
+ "version": "2.0.35",
4
4
  "description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
5
5
  "main": "nwss.js",
6
6
  "scripts": {