@fanboynz/network-scanner 2.0.24 → 2.0.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -223,7 +223,7 @@ When a page redirects to a new domain, first-party/third-party detection is base
223
223
  | `whois_timeout_multiplier` | Number | `1.5` | Timeout increase multiplier per retry |
224
224
  | `whois_use_fallback` | Boolean | `true` | Add TLD-specific fallback servers |
225
225
  | `whois_retry_on_timeout` | Boolean | `true` | Retry on timeout errors |
226
- | `whois_retry_on_error` | Boolean | `false` | Retry on connection/other errors |
226
+ | `whois_retry_on_error` | Boolean | `true` | Retry on connection/other errors |
227
227
  | `dig` | Array | - | Check dig output for ALL specified terms (AND logic) |
228
228
  | `dig-or` | Array | - | Check dig output for ANY specified term (OR logic) |
229
229
  | `dig_subdomain` | Boolean | `false` | Use subdomain for dig lookup instead of root domain |
@@ -59,7 +59,7 @@ const PRECOMPILED_MOCKS = Object.freeze({
59
59
  postMessage: () => {},
60
60
  disconnect: () => {}
61
61
  }),
62
- getManifest: () => Object.freeze({ name: "Chrome", version: "140.0.0.0" }),
62
+ getManifest: () => Object.freeze({ name: "Chrome", version: "141.0.0.0" }),
63
63
  getURL: (path) => `chrome-extension://invalid/${path}`,
64
64
  id: undefined
65
65
  }),
@@ -91,9 +91,9 @@ const BUILT_IN_PROPERTIES = new Set([
91
91
  // User agent collections with latest versions
92
92
  const USER_AGENT_COLLECTIONS = {
93
93
  chrome: [
94
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36",
95
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36",
96
- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36"
94
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36",
95
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36",
96
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36"
97
97
  ],
98
98
  firefox: [
99
99
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:143.0) Gecko/20100101 Firefox/143.0",
@@ -535,7 +535,7 @@ async function applyUserAgentSpoofing(page, siteConfig, forceDebug, currentUrl)
535
535
  onMessage: { addListener: () => {}, removeListener: () => {} },
536
536
  sendMessage: () => {},
537
537
  connect: () => ({ onMessage: { addListener: () => {}, removeListener: () => {} }, postMessage: () => {}, disconnect: () => {} }),
538
- getManifest: () => ({ name: "Chrome", version: "140.0.0.0" }),
538
+ getManifest: () => ({ name: "Chrome", version: "141.0.0.0" }),
539
539
  getURL: (path) => `chrome-extension://invalid/${path}`,
540
540
  id: undefined
541
541
  },
@@ -1347,6 +1347,19 @@ async function applyFingerprintProtection(page, siteConfig, forceDebug, currentU
1347
1347
 
1348
1348
  try {
1349
1349
  await page.evaluateOnNewDocument(({ spoof, debugEnabled }) => {
1350
+
1351
+ // Define helper functions FIRST in this context
1352
+ function spoofNavigatorProperties(navigator, properties) {
1353
+ for (const [prop, descriptor] of Object.entries(properties)) {
1354
+ safeDefinePropertyLocal(navigator, prop, descriptor);
1355
+ }
1356
+ }
1357
+
1358
+ function spoofScreenProperties(screen, properties) {
1359
+ for (const [prop, descriptor] of Object.entries(properties)) {
1360
+ safeDefinePropertyLocal(screen, prop, descriptor);
1361
+ }
1362
+ }
1350
1363
 
1351
1364
  function safeDefinePropertyLocal(target, property, descriptor) {
1352
1365
  try {
@@ -3,6 +3,7 @@
3
3
 
4
4
  const fs = require('fs');
5
5
  const { spawnSync } = require('child_process');
6
+ const { grepContent } = require('./grep');
6
7
 
7
8
  // Configuration constants for search logic
8
9
  const SEARCH_CONFIG = {
@@ -51,11 +52,12 @@ function parseSearchStrings(searchstring, searchstringAnd) {
51
52
  * @param {Function} addMatchedDomain - Optional helper function for adding domains
52
53
  * @param {string} domain - Domain to add
53
54
  * @param {string} resourceType - Resource type (for --adblock-rules mode)
55
+ * @param {string} fullSubdomain - Full subdomain for cache tracking (optional)
54
56
  */
55
- function addDomainToCollection(matchedDomains, addMatchedDomain, domain, resourceType = null) {
57
+ function addDomainToCollection(matchedDomains, addMatchedDomain, domain, resourceType = null, fullSubdomain = null) {
56
58
  // Use helper function if provided (preferred method)
57
59
  if (typeof addMatchedDomain === 'function') {
58
- addMatchedDomain(domain, resourceType);
60
+ addMatchedDomain(domain, resourceType, fullSubdomain);
59
61
  return;
60
62
  }
61
63
 
@@ -575,6 +577,7 @@ function createResponseHandler(config) {
575
577
  siteConfig,
576
578
  dumpUrls,
577
579
  matchedUrlsLogFile,
580
+ useGrep = false,
578
581
  forceDebug,
579
582
  resourceType // Will be null for response handler
580
583
  } = config;
@@ -584,22 +587,16 @@ function createResponseHandler(config) {
584
587
  const respDomain = perSiteSubDomains ? (new URL(respUrl)).hostname : getRootDomain(respUrl);
585
588
 
586
589
  // Only process responses that match our regex patterns
587
- const matchesRegex = regexes.some(re => re.test(respUrl));
588
- if (!matchesRegex) return;
590
+ const fullSubdomain = (new URL(respUrl)).hostname; // Always get full subdomain for cache tracking
589
591
 
590
- // Extract domain and check if already detected (skip expensive operations)
591
- if (typeof config.isDomainAlreadyDetected === 'function' && config.isDomainAlreadyDetected(respDomain)) {
592
- if (forceDebug) {
593
- console.log(`[debug] Skipping response analysis for already detected domain: ${respDomain}`);
594
- }
592
+ // Skip if already detected to avoid duplicates
593
+ if (typeof config.isDomainAlreadyDetected === 'function' && config.isDomainAlreadyDetected(fullSubdomain)) {
595
594
  return;
596
595
  }
597
-
598
- // Check if this is a first-party response (same domain as the URL being scanned)
599
- const currentUrlHostname = new URL(currentUrl).hostname;
600
- const responseHostname = new URL(respUrl).hostname;
601
- const isFirstParty = currentUrlHostname === responseHostname;
596
+ const matchesRegex = regexes.some(re => re.test(respUrl));
597
+ if (!matchesRegex) return;
602
598
 
599
+ // Extract domain and check if already detected (skip expensive operations)
603
600
  // The main request handler already filtered first-party/third-party requests
604
601
  // This response handler only runs for requests that passed that filter
605
602
  // However, we need to apply the same first-party/third-party logic here for searchstring analysis
@@ -607,6 +604,10 @@ function createResponseHandler(config) {
607
604
 
608
605
  // Apply first-party/third-party filtering for searchstring analysis
609
606
  // Use the exact same logic as the main request handler
607
+
608
+ const currentUrlHostname = new URL(currentUrl).hostname;
609
+ const responseHostname = new URL(respUrl).hostname;
610
+ const isFirstParty = currentUrlHostname === responseHostname;
610
611
  if (isFirstParty && siteConfig.firstParty === false) {
611
612
  if (forceDebug) {
612
613
  console.log(`[debug] Skipping first-party response for searchstring analysis (firstParty=false): ${respUrl}`);
@@ -632,9 +633,61 @@ function createResponseHandler(config) {
632
633
  }
633
634
 
634
635
  const content = await response.text();
636
+
637
+ // Cache the fetched content if callback provided
638
+ if (config.onContentFetched) {
639
+ try {
640
+ config.onContentFetched(respUrl, content);
641
+ } catch (cacheErr) {
642
+ if (forceDebug) {
643
+ console.log(`[debug] Content caching failed: ${cacheErr.message}`);
644
+ }
645
+ }
646
+ }
635
647
 
636
648
  // Check if content contains search strings (OR or AND logic)
637
- const { found, matchedString, logicType, error } = searchContent(content, searchStrings, searchStringsAnd, contentType, respUrl);
649
+ let searchResult;
650
+
651
+ if (useGrep && (searchStrings.length > 0 || searchStringsAnd.length > 0)) {
652
+ // Use grep for pattern matching
653
+ try {
654
+ const allPatterns = [...(searchStrings || []), ...(searchStringsAnd || [])];
655
+ const grepResult = await grepContent(content, allPatterns, {
656
+ ignoreCase: true,
657
+ wholeWord: false,
658
+ regex: false
659
+ });
660
+
661
+ if (hasSearchStringAnd && searchStringsAnd.length > 0) {
662
+ // For AND logic, check that all patterns were found
663
+ const foundPatterns = grepResult.allMatches.map(match => match.pattern);
664
+ const allFound = searchStringsAnd.every(pattern => foundPatterns.includes(pattern));
665
+ searchResult = {
666
+ found: allFound,
667
+ matchedString: allFound ? foundPatterns.join(' AND ') : null,
668
+ logicType: 'AND'
669
+ };
670
+ } else {
671
+ // For OR logic, any match is sufficient
672
+ searchResult = {
673
+ found: grepResult.found,
674
+ matchedString: grepResult.matchedPattern,
675
+ logicType: 'OR'
676
+ };
677
+ }
678
+ } catch (grepErr) {
679
+ if (forceDebug) {
680
+ console.log(`[debug] Grep failed for ${respUrl}, falling back to JavaScript: ${grepErr.message}`);
681
+ }
682
+ // Fallback to JavaScript search
683
+ searchResult = searchContent(content, searchStrings, searchStringsAnd, contentType, respUrl);
684
+ }
685
+ } else {
686
+ // Use JavaScript search
687
+ searchResult = searchContent(content, searchStrings, searchStringsAnd, contentType, respUrl);
688
+ }
689
+
690
+ const { found, matchedString, logicType, error } = searchResult;
638
691
 
639
692
  if (found) {
640
693
  if (!respDomain || matchesIgnoreDomain(respDomain, ignoreDomains)) {
@@ -642,27 +695,31 @@ function createResponseHandler(config) {
642
695
  }
643
696
 
644
697
  // Response handler doesn't have access to specific resource type
645
- addDomainToCollection(matchedDomains, addMatchedDomain, respDomain, null);
698
+ // Use the addMatchedDomain helper which handles fullSubdomain properly
699
+ addMatchedDomain(respDomain, null, fullSubdomain);
646
700
  const simplifiedUrl = getRootDomain(currentUrl);
647
701
 
648
702
  if (siteConfig.verbose === 1) {
649
703
  const partyType = isFirstParty ? 'first-party' : 'third-party';
650
- console.log(`[match][${simplifiedUrl}] ${respUrl} (${partyType}) contains searchstring (${logicType}): "${matchedString}"`);
704
+ const searchMethod = useGrep ? 'grep' : 'js';
705
+ console.log(`[match][${simplifiedUrl}] ${respUrl} (${partyType}, ${searchMethod}) contains searchstring (${logicType}): "${matchedString}"`);
651
706
  }
652
707
 
653
708
  if (dumpUrls) {
654
709
  const timestamp = new Date().toISOString();
655
710
  const partyType = isFirstParty ? 'first-party' : 'third-party';
711
+ const searchMethod = useGrep ? 'grep' : 'js';
656
712
  try {
657
713
  fs.appendFileSync(matchedUrlsLogFile,
658
- `${timestamp} [match][${simplifiedUrl}] ${respUrl} (${partyType}, searchstring (${logicType}): "${matchedString}")\n`);
714
+ `${timestamp} [match][${simplifiedUrl}] ${respUrl} (${partyType}, ${searchMethod}, searchstring (${logicType}): "${matchedString}")\n`);
659
715
  } catch (logErr) {
660
716
  console.warn(`[warn] Failed to write to matched URLs log: ${logErr.message}`);
661
717
  }
662
718
  }
663
719
  } else if (forceDebug) {
664
720
  const partyType = isFirstParty ? 'first-party' : 'third-party';
665
- console.log(`[debug] ${respUrl} (${partyType}) matched regex but no searchstring found`);
721
+ const searchMethod = useGrep ? 'grep' : 'js';
722
+ console.log(`[debug] ${respUrl} (${partyType}, ${searchMethod}) matched regex but no searchstring found`);
666
723
  if (error) {
667
724
  console.log(`[debug] Search error: ${error}`);
668
725
  }
package/nwss.js CHANGED
@@ -1,4 +1,4 @@
1
- // === Network scanner script (nwss.js) v2.0.24 ===
1
+ // === Network scanner script (nwss.js) v2.0.26 ===
2
2
 
3
3
  // puppeteer for browser automation, fs for file system operations, psl for domain parsing.
4
4
  // const pLimit = require('p-limit'); // Will be dynamically imported
@@ -56,7 +56,7 @@ function fastTimeout(ms) {
56
56
  }
57
57
 
58
58
  // --- Configuration Constants ---
59
- const TIMEOUTS = {
59
+ const TIMEOUTS = Object.freeze({
60
60
  DEFAULT_PAGE: 35000, // Standard page load timeout (35s)
61
61
  DEFAULT_NAVIGATION: 25000, // Navigation operation timeout
62
62
  DEFAULT_NAVIGATION_REDUCED: 20000, // Reduced timeout for faster failures
@@ -71,21 +71,32 @@ const TIMEOUTS = {
71
71
  CURL_HANDLER_DELAY: 3000, // Wait for async curl operations
72
72
  PROTOCOL_TIMEOUT: 180000, // Chrome DevTools Protocol timeout
73
73
  REDIRECT_JS_TIMEOUT: 5000 // JavaScript redirect detection timeout
74
- };
74
+ });
75
75
 
76
- const CACHE_LIMITS = {
76
+ const CACHE_LIMITS = Object.freeze({
77
77
  DISK_CACHE_SIZE: 52428800, // 50MB
78
78
  MEDIA_CACHE_SIZE: 52428800, // 50MB
79
79
  DEFAULT_CACHE_PATH: '.cache',
80
80
  DEFAULT_MAX_SIZE: 5000
81
- };
81
+ });
82
82
 
83
- const CONCURRENCY_LIMITS = {
83
+ const CONCURRENCY_LIMITS = Object.freeze({
84
84
  MIN: 1,
85
85
  MAX: 50,
86
86
  DEFAULT: 6,
87
87
  HIGH_CONCURRENCY_THRESHOLD: 12 // Auto-enable aggressive caching above this
88
- };
88
+ });
89
+
90
+ // V8 Optimization: Use Map for user agent lookups instead of object
91
+ const USER_AGENTS = Object.freeze(new Map([
92
+ ['chrome', "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36"],
93
+ ['chrome_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36"],
94
+ ['chrome_linux', "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36"],
95
+ ['firefox', "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:142.0) Gecko/20100101 Firefox/143.0"],
96
+ ['firefox_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:142.0) Gecko/20100101 Firefox/143.0"],
97
+ ['firefox_linux', "Mozilla/5.0 (X11; Linux x86_64; rv:142.0) Gecko/20100101 Firefox/143.0"],
98
+ ['safari', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.6 Safari/605.1.15"]
99
+ ]));
89
100
 
90
101
  const REALTIME_CLEANUP_THRESHOLD = 8; // Default pages to keep for realtime cleanup
91
102
 
@@ -132,7 +143,7 @@ const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/r
132
143
  const { monitorBrowserHealth, isBrowserHealthy, isQuicklyResponsive, performGroupWindowCleanup, performRealtimeWindowCleanup, trackPageForRealtime, updatePageUsage, cleanupPageBeforeReload } = require('./lib/browserhealth');
133
144
 
134
145
  // --- Script Configuration & Constants ---
135
- const VERSION = '2.0.24'; // Script version
146
+ const VERSION = '2.0.26'; // Script version
136
147
 
137
148
  // get startTime
138
149
  const startTime = Date.now();
@@ -997,7 +1008,7 @@ function matchesIgnoreDomain(domain, ignorePatterns) {
997
1008
 
998
1009
  function setupFrameHandling(page, forceDebug) {
999
1010
  // Track active frames and clear on navigation to prevent detached frame access
1000
- let activeFrames = new Map(); // Use Map to track frame state
1011
+ let activeFrames = new Set(); // Use Set to track frame references
1001
1012
 
1002
1013
  // Clear frame tracking on navigation to prevent stale references
1003
1014
  page.on('framenavigated', (frame) => {
@@ -1031,7 +1042,6 @@ function setupFrameHandling(page, forceDebug) {
1031
1042
  // Enhanced frame validation with multiple safety checks
1032
1043
  let frameUrl;
1033
1044
  try {
1034
- // Test frame accessibility first
1035
1045
  frameUrl = frame.url();
1036
1046
 
1037
1047
  // Check if frame is detached (if method exists)
@@ -1041,12 +1051,17 @@ function setupFrameHandling(page, forceDebug) {
1041
1051
  }
1042
1052
  return;
1043
1053
  }
1054
+
1055
+ activeFrames.add(frame);
1056
+
1057
+ if (forceDebug) {
1058
+ console.log(formatLogMessage('debug', `New frame attached: ${frameUrl || 'about:blank'}`));
1059
+ }
1044
1060
  } catch (frameAccessError) {
1045
1061
  // Frame is not accessible (likely detached)
1046
1062
  return;
1047
1063
  }
1048
-
1049
- activeFrames.add(frame);
1064
+
1050
1065
  } catch (detachError) {
1051
1066
  // Frame state checking can throw in 23.x, handle gracefully
1052
1067
  if (forceDebug) {
@@ -1055,14 +1070,10 @@ function setupFrameHandling(page, forceDebug) {
1055
1070
  return;
1056
1071
  }
1057
1072
 
1058
- // Store frame with timestamp for tracking
1059
- activeFrames.set(frame, Date.now());
1060
1073
 
1061
1074
  if (frame !== page.mainFrame() && frame.parentFrame()) { // Only handle child frames
1062
- try {
1063
- if (forceDebug) {
1064
- console.log(formatLogMessage('debug', `New frame attached: ${frameUrl || 'about:blank'}`));
1065
- }
1075
+ let frameUrl;
1076
+ frameUrl = frame.url();
1066
1077
 
1067
1078
  // Don't try to navigate to frames with invalid/empty URLs
1068
1079
  if (!frameUrl ||
@@ -1100,6 +1111,7 @@ function setupFrameHandling(page, forceDebug) {
1100
1111
  // Let frames load naturally - manual navigation often causes Protocol errors
1101
1112
  // await frame.goto(frame.url(), { waitUntil: 'domcontentloaded', timeout: 5000 });
1102
1113
 
1114
+ try {
1103
1115
  if (forceDebug) {
1104
1116
  console.log(formatLogMessage('debug', `Frame will load naturally: ${frameUrl}`));
1105
1117
  }
@@ -1117,11 +1129,11 @@ function setupFrameHandling(page, forceDebug) {
1117
1129
  });
1118
1130
  // Handle frame navigations (keep this for monitoring)
1119
1131
  page.on('framenavigated', (frame) => {
1120
- let frameUrl;
1121
1132
 
1122
1133
  // Skip if frame is not in our active set
1123
1134
  if (!activeFrames.has(frame)) return;
1124
1135
 
1136
+ let frameUrl;
1125
1137
  try {
1126
1138
  frameUrl = frame.url();
1127
1139
  } catch (urlErr) {
@@ -1143,17 +1155,14 @@ function setupFrameHandling(page, forceDebug) {
1143
1155
  // Optional: Handle frame detachment for cleanup
1144
1156
  page.on('framedetached', (frame) => {
1145
1157
  // Remove from active tracking
1146
- activeFrames.delete(frame);
1158
+ activeFrames.delete(frame); // This works for both Map and Set
1147
1159
 
1148
- // Skip logging if we can't access frame URL
1149
- let frameUrl;
1160
+
1150
1161
  if (forceDebug) {
1162
+ let frameUrl;
1151
1163
  try {
1152
1164
  frameUrl = frame.url();
1153
- } catch (urlErr) {
1154
- // Frame already detached, can't get URL
1155
- return;
1156
- }
1165
+
1157
1166
  if (frameUrl &&
1158
1167
  frameUrl !== 'about:blank' &&
1159
1168
  frameUrl !== 'about:srcdoc' &&
@@ -1162,6 +1171,11 @@ function setupFrameHandling(page, forceDebug) {
1162
1171
  !frameUrl.startsWith('chrome-extension://')) {
1163
1172
  console.log(formatLogMessage('debug', `Frame detached: ${frameUrl}`));
1164
1173
  }
1174
+ } catch (urlErr) {
1175
+ // Frame already detached, can't get URL - this is expected
1176
+ return;
1177
+ }
1178
+
1165
1179
  }
1166
1180
  });
1167
1181
  }
@@ -1208,6 +1222,9 @@ function setupFrameHandling(page, forceDebug) {
1208
1222
  '/usr/bin/chromium',
1209
1223
  '/snap/bin/chromium'
1210
1224
  ];
1225
+ // V8 Optimization: Freeze the Chrome paths array since it's constant
1226
+ Object.freeze(systemChromePaths);
1227
+
1211
1228
 
1212
1229
  let executablePath = null;
1213
1230
  for (const chromePath of systemChromePaths) {
@@ -1384,12 +1401,28 @@ function setupFrameHandling(page, forceDebug) {
1384
1401
  * @returns {Promise<object>} A promise that resolves to an object containing scan results.
1385
1402
  */
1386
1403
  async function processUrl(currentUrl, siteConfig, browserInstance) {
1387
- const allowFirstParty = siteConfig.firstParty === true || siteConfig.firstParty === 1;
1388
- const allowThirdParty = siteConfig.thirdParty === undefined || siteConfig.thirdParty === true || siteConfig.thirdParty === 1;
1389
- const perSiteSubDomains = siteConfig.subDomains === 1 ? true : subDomainsMode;
1390
- const siteLocalhostIP = siteConfig.localhost || null;
1391
- const cloudflarePhishBypass = siteConfig.cloudflare_phish === true;
1392
- const cloudflareBypass = siteConfig.cloudflare_bypass === true;
1404
+ // V8 Optimization: Single destructuring to avoid multiple property lookups
1405
+ const {
1406
+ firstParty,
1407
+ thirdParty,
1408
+ subDomains,
1409
+ localhost,
1410
+ cloudflare_phish,
1411
+ cloudflare_bypass,
1412
+ flowproxy_detection,
1413
+ privoxy,
1414
+ pihole,
1415
+ even_blocked,
1416
+ comments,
1417
+ bypass_cache
1418
+ } = siteConfig;
1419
+
1420
+ const allowFirstParty = firstParty === true || firstParty === 1;
1421
+ const allowThirdParty = thirdParty === undefined || thirdParty === true || thirdParty === 1;
1422
+ const perSiteSubDomains = subDomains === 1 ? true : subDomainsMode;
1423
+ const siteLocalhostIP = localhost || null;
1424
+ const cloudflarePhishBypass = cloudflare_phish === true;
1425
+ const cloudflareBypass = cloudflare_bypass === true;
1393
1426
  // Add redirect and same-page loop protection
1394
1427
  const MAX_REDIRECT_DEPTH = siteConfig.max_redirects || 10;
1395
1428
  const redirectHistory = new Set();
@@ -1398,14 +1431,14 @@ function setupFrameHandling(page, forceDebug) {
1398
1431
  const MAX_SAME_PAGE_LOADS = 3;
1399
1432
  let currentPageUrl = currentUrl;
1400
1433
 
1401
- const sitePrivoxy = siteConfig.privoxy === true;
1402
- const sitePihole = siteConfig.pihole === true;
1403
- const flowproxyDetection = siteConfig.flowproxy_detection === true;
1434
+ const sitePrivoxy = privoxy === true;
1435
+ const sitePihole = pihole === true;
1436
+ const flowproxyDetection = flowproxy_detection === true;
1404
1437
 
1405
- const evenBlocked = siteConfig.even_blocked === true;
1438
+ const evenBlocked = even_blocked === true;
1406
1439
  // Log site-level comments if debug mode is enabled
1407
- if (forceDebug && siteConfig.comments) {
1408
- const siteComments = Array.isArray(siteConfig.comments) ? siteConfig.comments : [siteConfig.comments];
1440
+ if (forceDebug && comments) {
1441
+ const siteComments = Array.isArray(comments) ? comments : [comments];
1409
1442
  console.log(formatLogMessage('debug', `Site comments for ${currentUrl}: ${siteComments.length} item(s)`));
1410
1443
  siteComments.forEach((comment, idx) =>
1411
1444
  console.log(formatLogMessage('debug', ` Site comment ${idx + 1}: ${comment}`))
@@ -1413,11 +1446,11 @@ function setupFrameHandling(page, forceDebug) {
1413
1446
  }
1414
1447
 
1415
1448
  // Log bypass_cache setting if enabled
1416
- if (forceDebug && siteConfig.bypass_cache === true) {
1449
+ if (forceDebug && bypass_cache === true) {
1417
1450
  console.log(formatLogMessage('debug', `Cache bypass enabled for all URLs in site: ${currentUrl}`));
1418
1451
  }
1419
1452
 
1420
- if (siteConfig.firstParty === 0 && siteConfig.thirdParty === 0) {
1453
+ if (firstParty === 0 && thirdParty === 0) {
1421
1454
  console.warn(`⚠ Skipping ${currentUrl} because both firstParty and thirdParty are disabled.`);
1422
1455
  return { url: currentUrl, rules: [], success: false, skipped: true };
1423
1456
  }
@@ -1472,14 +1505,14 @@ function setupFrameHandling(page, forceDebug) {
1472
1505
  let finalUrlAfterRedirect = null;
1473
1506
 
1474
1507
  // Enhanced error types for Puppeteer 23.x compatibility
1475
- const CRITICAL_BROWSER_ERRORS = [
1508
+ const CRITICAL_BROWSER_ERRORS = Object.freeze([
1476
1509
  'Protocol error',
1477
1510
  'Target closed',
1478
1511
  'Browser has been closed',
1479
1512
  'Browser protocol broken',
1480
1513
  'Browser process exited',
1481
1514
  'Browser disconnected'
1482
- ];
1515
+ ]);
1483
1516
 
1484
1517
  try {
1485
1518
 
@@ -1904,29 +1937,30 @@ function setupFrameHandling(page, forceDebug) {
1904
1937
 
1905
1938
  // Client Hints protection for Chrome user agents
1906
1939
  if (siteConfig.userAgent && siteConfig.userAgent.toLowerCase().includes('chrome')) {
1940
+ const userAgentKey = siteConfig.userAgent.toLowerCase();
1907
1941
  let platform = 'Windows';
1908
1942
  let platformVersion = '15.0.0';
1909
1943
  let arch = 'x86';
1910
1944
 
1911
- if (siteConfig.userAgent.toLowerCase() === 'chrome_mac') {
1945
+ if (userAgentKey === 'chrome_mac') {
1912
1946
  platform = 'macOS';
1913
- platformVersion = '13.5.0';
1947
+ platformVersion = '13.5.0';
1914
1948
  arch = 'arm';
1915
- } else if (siteConfig.userAgent.toLowerCase() === 'chrome_linux') {
1949
+ } else if (userAgentKey === 'chrome_linux') {
1916
1950
  platform = 'Linux';
1917
1951
  platformVersion = '6.5.0';
1918
1952
  arch = 'x86';
1919
1953
  }
1920
-
1954
+
1921
1955
  await page.setExtraHTTPHeaders({
1922
- 'Sec-CH-UA': '"Chromium";v="140", "Not=A?Brand";v="24", "Google Chrome";v="140"',
1956
+ 'Sec-CH-UA': '"Chromium";v="141", "Not=A?Brand";v="24", "Google Chrome";v="141"',
1923
1957
  'Sec-CH-UA-Platform': `"${platform}"`,
1924
1958
  'Sec-CH-UA-Platform-Version': `"${platformVersion}"`,
1925
1959
  'Sec-CH-UA-Mobile': '?0',
1926
1960
  'Sec-CH-UA-Arch': `"${arch}"`,
1927
1961
  'Sec-CH-UA-Bitness': '"64"',
1928
- 'Sec-CH-UA-Full-Version': '"140.0.7339.208"',
1929
- 'Sec-CH-UA-Full-Version-List': '"Chromium";v="140.0.7339.208", "Not=A?Brand";v="24.0.0.0", "Google Chrome";v="140.0.7339.208"'
1962
+ 'Sec-CH-UA-Full-Version': '"141.0.7390.55"',
1963
+ 'Sec-CH-UA-Full-Version-List': '"Chromium";v="141.0.7390.55", "Not=A?Brand";v="24.0.0.0", "Google Chrome";v="141.0.7390.55"'
1930
1964
  });
1931
1965
  }
1932
1966
  } catch (fingerprintErr) {
@@ -1951,21 +1985,12 @@ function setupFrameHandling(page, forceDebug) {
1951
1985
  // Parse searchstring patterns using module
1952
1986
  const { searchStrings, searchStringsAnd, hasSearchString, hasSearchStringAnd } = parseSearchStrings(siteConfig.searchstring, siteConfig.searchstring_and);
1953
1987
  const useCurl = siteConfig.curl === true; // Use curl if enabled, regardless of searchstring
1954
- let useGrep = siteConfig.grep === true && useCurl; // Grep requires curl to be enabled
1988
+ let useGrep = siteConfig.grep === true; // Grep can work independently
1955
1989
 
1956
1990
  // Get user agent for curl if needed
1957
1991
  let curlUserAgent = '';
1958
1992
  if (useCurl && siteConfig.userAgent) {
1959
- const userAgents = {
1960
- chrome: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36",
1961
- chrome_mac: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36",
1962
- chrome_linux: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36",
1963
- firefox: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:142.0) Gecko/20100101 Firefox/143.0",
1964
- firefox_mac: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:142.0) Gecko/20100101 Firefox/143.0",
1965
- firefox_linux: "Mozilla/5.0 (X11; Linux x86_64; rv:142.0) Gecko/20100101 Firefox/143.0",
1966
- safari: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.6 Safari/605.1.15"
1967
- };
1968
- curlUserAgent = userAgents[siteConfig.userAgent.toLowerCase()] || '';
1993
+ curlUserAgent = USER_AGENTS.get(siteConfig.userAgent.toLowerCase()) || '';
1969
1994
  }
1970
1995
 
1971
1996
  if (useCurl && forceDebug) {
@@ -1973,7 +1998,7 @@ function setupFrameHandling(page, forceDebug) {
1973
1998
  }
1974
1999
 
1975
2000
  if (useGrep && forceDebug) {
1976
- console.log(formatLogMessage('debug', `Grep-based pattern matching enabled for ${currentUrl}`));
2001
+ console.log(formatLogMessage('debug', `Grep-based pattern matching enabled for ${currentUrl}${useCurl ? ' (with curl)' : ' (with response handler)'}`));
1977
2002
  }
1978
2003
 
1979
2004
  // Validate grep availability if needed
@@ -1993,7 +2018,6 @@ function setupFrameHandling(page, forceDebug) {
1993
2018
  if (!curlCheck.isAvailable) {
1994
2019
  console.warn(formatLogMessage('warn', `Curl not available for ${currentUrl}: ${curlCheck.error}. Skipping curl-based analysis.`));
1995
2020
  useCurl = false;
1996
- useGrep = false; // Grep requires curl
1997
2021
  } else if (forceDebug) {
1998
2022
  console.log(formatLogMessage('debug', `Using curl: ${curlCheck.version}`));
1999
2023
  }
@@ -2643,7 +2667,7 @@ function setupFrameHandling(page, forceDebug) {
2643
2667
 
2644
2668
  // If curl is enabled, download and analyze content immediately
2645
2669
  if (useCurl) {
2646
- // Check bypass_cache before attempting cache lookup
2670
+ // Check bypass_cache before attempting cache lookup (curl mode)
2647
2671
  let cachedContent = null;
2648
2672
  if (!shouldBypassCacheForUrl(reqUrl, siteConfig)) {
2649
2673
  // Check request cache first if smart cache is available and caching is enabled
@@ -2732,8 +2756,30 @@ function setupFrameHandling(page, forceDebug) {
2732
2756
  }
2733
2757
  }
2734
2758
  }
2759
+ } else if (useGrep && (hasSearchString || hasSearchStringAnd)) {
2760
+ // Use grep with response handler (no curl)
2761
+ if (forceDebug) {
2762
+ console.log(formatLogMessage('debug', `[grep-response] Queuing ${reqUrl} for grep analysis via response handler`));
2763
+ }
2764
+
2765
+ // Queue for grep processing via response handler
2766
+ // The response handler will download content and call grep
2767
+ if (dryRunMode) {
2768
+ matchedDomains.get('dryRunMatches').push({
2769
+ regex: matchedRegexPattern,
2770
+ domain: reqDomain,
2771
+ resourceType: resourceType,
2772
+ fullUrl: reqUrl,
2773
+ isFirstParty: isFirstParty,
2774
+ needsGrepCheck: true
2775
+ });
2776
+ }
2777
+
2778
+ // Don't process immediately - let response handler do the work
2779
+ if (forceDebug) {
2780
+ console.log(formatLogMessage('debug', `URL ${reqUrl} queued for grep analysis via response handler`));
2781
+ }
2735
2782
  }
2736
-
2737
2783
  // No break needed since we've already determined if regex matched
2738
2784
  }
2739
2785
  request.continue();
@@ -2742,8 +2788,8 @@ function setupFrameHandling(page, forceDebug) {
2742
2788
  // Mark page as actively processing network requests
2743
2789
  updatePageUsage(page, true);
2744
2790
 
2745
- // Add response handler ONLY if searchstring/searchstring_and is defined AND neither curl nor grep is enabled
2746
- if ((hasSearchString || hasSearchStringAnd) && !useCurl && !useGrep) {
2791
+ // Add response handler if searchstring is defined and either no curl, or grep without curl
2792
+ if ((hasSearchString || hasSearchStringAnd) && (!useCurl || (useGrep && !useCurl))) {
2747
2793
  const responseHandler = createResponseHandler({
2748
2794
  searchStrings,
2749
2795
  searchStringsAnd,
@@ -2761,6 +2807,7 @@ function setupFrameHandling(page, forceDebug) {
2761
2807
  } : undefined,
2762
2808
  currentUrl,
2763
2809
  perSiteSubDomains,
2810
+ useGrep, // Pass grep flag to response handler
2764
2811
  ignoreDomains,
2765
2812
  matchesIgnoreDomain,
2766
2813
  getRootDomain,
@@ -3462,20 +3509,29 @@ function setupFrameHandling(page, forceDebug) {
3462
3509
  // Temporarily store the pLimit function
3463
3510
  const originalLimit = limit;
3464
3511
 
3465
- // Create a flat list of all URL tasks with their site configs for true concurrency
3466
- const allTasks = [];
3512
+ // V8 Optimization: Calculate total URLs first to pre-allocate array
3513
+ let totalUrls = 0;
3467
3514
  for (const site of sites) {
3468
3515
  const urlsToProcess = Array.isArray(site.url) ? site.url : [site.url];
3469
- urlsToProcess.forEach(url => {
3470
- allTasks.push({
3516
+ totalUrls += urlsToProcess.length;
3517
+ }
3518
+
3519
+ // Pre-allocate array with exact size to prevent multiple reallocations
3520
+ const allTasks = new Array(totalUrls);
3521
+ let taskIndex = 0;
3522
+
3523
+ // Populate the pre-allocated array
3524
+ for (const site of sites) {
3525
+ const urlsToProcess = Array.isArray(site.url) ? site.url : [site.url];
3526
+ for (const url of urlsToProcess) {
3527
+ allTasks[taskIndex++] = {
3471
3528
  url,
3472
3529
  config: { ...site, _originalUrl: url }, // Preserve original URL for CDP domain checking
3473
- taskId: allTasks.length // For tracking
3474
- });
3475
- });
3530
+ taskId: taskIndex - 1 // For tracking
3531
+ };
3532
+ }
3476
3533
  }
3477
-
3478
- const totalUrls = allTasks.length;
3534
+
3479
3535
 
3480
3536
  let results = [];
3481
3537
  let processedUrlCount = 0;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fanboynz/network-scanner",
3
- "version": "2.0.24",
3
+ "version": "2.0.26",
4
4
  "description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
5
5
  "main": "nwss.js",
6
6
  "scripts": {