@fanboynz/network-scanner 2.0.24 → 2.0.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1347,6 +1347,19 @@ async function applyFingerprintProtection(page, siteConfig, forceDebug, currentU
1347
1347
 
1348
1348
  try {
1349
1349
  await page.evaluateOnNewDocument(({ spoof, debugEnabled }) => {
1350
+
1351
+ // Define helper functions FIRST in this context
1352
+ function spoofNavigatorProperties(navigator, properties) {
1353
+ for (const [prop, descriptor] of Object.entries(properties)) {
1354
+ safeDefinePropertyLocal(navigator, prop, descriptor);
1355
+ }
1356
+ }
1357
+
1358
+ function spoofScreenProperties(screen, properties) {
1359
+ for (const [prop, descriptor] of Object.entries(properties)) {
1360
+ safeDefinePropertyLocal(screen, prop, descriptor);
1361
+ }
1362
+ }
1350
1363
 
1351
1364
  function safeDefinePropertyLocal(target, property, descriptor) {
1352
1365
  try {
@@ -3,6 +3,7 @@
3
3
 
4
4
  const fs = require('fs');
5
5
  const { spawnSync } = require('child_process');
6
+ const { grepContent } = require('./grep');
6
7
 
7
8
  // Configuration constants for search logic
8
9
  const SEARCH_CONFIG = {
@@ -51,11 +52,12 @@ function parseSearchStrings(searchstring, searchstringAnd) {
51
52
  * @param {Function} addMatchedDomain - Optional helper function for adding domains
52
53
  * @param {string} domain - Domain to add
53
54
  * @param {string} resourceType - Resource type (for --adblock-rules mode)
55
+ * @param {string} fullSubdomain - Full subdomain for cache tracking (optional)
54
56
  */
55
- function addDomainToCollection(matchedDomains, addMatchedDomain, domain, resourceType = null) {
57
+ function addDomainToCollection(matchedDomains, addMatchedDomain, domain, resourceType = null, fullSubdomain = null) {
56
58
  // Use helper function if provided (preferred method)
57
59
  if (typeof addMatchedDomain === 'function') {
58
- addMatchedDomain(domain, resourceType);
60
+ addMatchedDomain(domain, resourceType, fullSubdomain);
59
61
  return;
60
62
  }
61
63
 
@@ -575,6 +577,7 @@ function createResponseHandler(config) {
575
577
  siteConfig,
576
578
  dumpUrls,
577
579
  matchedUrlsLogFile,
580
+ useGrep = false,
578
581
  forceDebug,
579
582
  resourceType // Will be null for response handler
580
583
  } = config;
@@ -584,22 +587,16 @@ function createResponseHandler(config) {
584
587
  const respDomain = perSiteSubDomains ? (new URL(respUrl)).hostname : getRootDomain(respUrl);
585
588
 
586
589
  // Only process responses that match our regex patterns
587
- const matchesRegex = regexes.some(re => re.test(respUrl));
588
- if (!matchesRegex) return;
590
+ const fullSubdomain = (new URL(respUrl)).hostname; // Always get full subdomain for cache tracking
589
591
 
590
- // Extract domain and check if already detected (skip expensive operations)
591
- if (typeof config.isDomainAlreadyDetected === 'function' && config.isDomainAlreadyDetected(respDomain)) {
592
- if (forceDebug) {
593
- console.log(`[debug] Skipping response analysis for already detected domain: ${respDomain}`);
594
- }
592
+ // Skip if already detected to avoid duplicates
593
+ if (typeof config.isDomainAlreadyDetected === 'function' && config.isDomainAlreadyDetected(fullSubdomain)) {
595
594
  return;
596
595
  }
597
-
598
- // Check if this is a first-party response (same domain as the URL being scanned)
599
- const currentUrlHostname = new URL(currentUrl).hostname;
600
- const responseHostname = new URL(respUrl).hostname;
601
- const isFirstParty = currentUrlHostname === responseHostname;
596
+ const matchesRegex = regexes.some(re => re.test(respUrl));
597
+ if (!matchesRegex) return;
602
598
 
599
+ // Extract domain and check if already detected (skip expensive operations)
603
600
  // The main request handler already filtered first-party/third-party requests
604
601
  // This response handler only runs for requests that passed that filter
605
602
  // However, we need to apply the same first-party/third-party logic here for searchstring analysis
@@ -607,6 +604,10 @@ function createResponseHandler(config) {
607
604
 
608
605
  // Apply first-party/third-party filtering for searchstring analysis
609
606
  // Use the exact same logic as the main request handler
607
+
608
+ const currentUrlHostname = new URL(currentUrl).hostname;
609
+ const responseHostname = new URL(respUrl).hostname;
610
+ const isFirstParty = currentUrlHostname === responseHostname;
610
611
  if (isFirstParty && siteConfig.firstParty === false) {
611
612
  if (forceDebug) {
612
613
  console.log(`[debug] Skipping first-party response for searchstring analysis (firstParty=false): ${respUrl}`);
@@ -632,9 +633,61 @@ function createResponseHandler(config) {
632
633
  }
633
634
 
634
635
  const content = await response.text();
636
+
637
+ // Cache the fetched content if callback provided
638
+ if (config.onContentFetched) {
639
+ try {
640
+ config.onContentFetched(respUrl, content);
641
+ } catch (cacheErr) {
642
+ if (forceDebug) {
643
+ console.log(`[debug] Content caching failed: ${cacheErr.message}`);
644
+ }
645
+ }
646
+ }
635
647
 
636
648
  // Check if content contains search strings (OR or AND logic)
637
- const { found, matchedString, logicType, error } = searchContent(content, searchStrings, searchStringsAnd, contentType, respUrl);
649
+ let searchResult;
650
+
651
+ if (useGrep && (searchStrings.length > 0 || searchStringsAnd.length > 0)) {
652
+ // Use grep for pattern matching
653
+ try {
654
+ const allPatterns = [...(searchStrings || []), ...(searchStringsAnd || [])];
655
+ const grepResult = await grepContent(content, allPatterns, {
656
+ ignoreCase: true,
657
+ wholeWord: false,
658
+ regex: false
659
+ });
660
+
661
+ if (hasSearchStringAnd && searchStringsAnd.length > 0) {
662
+ // For AND logic, check that all patterns were found
663
+ const foundPatterns = grepResult.allMatches.map(match => match.pattern);
664
+ const allFound = searchStringsAnd.every(pattern => foundPatterns.includes(pattern));
665
+ searchResult = {
666
+ found: allFound,
667
+ matchedString: allFound ? foundPatterns.join(' AND ') : null,
668
+ logicType: 'AND'
669
+ };
670
+ } else {
671
+ // For OR logic, any match is sufficient
672
+ searchResult = {
673
+ found: grepResult.found,
674
+ matchedString: grepResult.matchedPattern,
675
+ logicType: 'OR'
676
+ };
677
+ }
678
+ } catch (grepErr) {
679
+ if (forceDebug) {
680
+ console.log(`[debug] Grep failed for ${respUrl}, falling back to JavaScript: ${grepErr.message}`);
681
+ }
682
+ // Fallback to JavaScript search
683
+ searchResult = searchContent(content, searchStrings, searchStringsAnd, contentType, respUrl);
684
+ }
685
+ } else {
686
+ // Use JavaScript search
687
+ searchResult = searchContent(content, searchStrings, searchStringsAnd, contentType, respUrl);
688
+ }
689
+
690
+ const { found, matchedString, logicType, error } = searchResult;
638
691
 
639
692
  if (found) {
640
693
  if (!respDomain || matchesIgnoreDomain(respDomain, ignoreDomains)) {
@@ -642,27 +695,31 @@ function createResponseHandler(config) {
642
695
  }
643
696
 
644
697
  // Response handler doesn't have access to specific resource type
645
- addDomainToCollection(matchedDomains, addMatchedDomain, respDomain, null);
698
+ // Use the addMatchedDomain helper which handles fullSubdomain properly
699
+ addMatchedDomain(respDomain, null, fullSubdomain);
646
700
  const simplifiedUrl = getRootDomain(currentUrl);
647
701
 
648
702
  if (siteConfig.verbose === 1) {
649
703
  const partyType = isFirstParty ? 'first-party' : 'third-party';
650
- console.log(`[match][${simplifiedUrl}] ${respUrl} (${partyType}) contains searchstring (${logicType}): "${matchedString}"`);
704
+ const searchMethod = useGrep ? 'grep' : 'js';
705
+ console.log(`[match][${simplifiedUrl}] ${respUrl} (${partyType}, ${searchMethod}) contains searchstring (${logicType}): "${matchedString}"`);
651
706
  }
652
707
 
653
708
  if (dumpUrls) {
654
709
  const timestamp = new Date().toISOString();
655
710
  const partyType = isFirstParty ? 'first-party' : 'third-party';
711
+ const searchMethod = useGrep ? 'grep' : 'js';
656
712
  try {
657
713
  fs.appendFileSync(matchedUrlsLogFile,
658
- `${timestamp} [match][${simplifiedUrl}] ${respUrl} (${partyType}, searchstring (${logicType}): "${matchedString}")\n`);
714
+ `${timestamp} [match][${simplifiedUrl}] ${respUrl} (${partyType}, ${searchMethod}, searchstring (${logicType}): "${matchedString}")\n`);
659
715
  } catch (logErr) {
660
716
  console.warn(`[warn] Failed to write to matched URLs log: ${logErr.message}`);
661
717
  }
662
718
  }
663
719
  } else if (forceDebug) {
664
720
  const partyType = isFirstParty ? 'first-party' : 'third-party';
665
- console.log(`[debug] ${respUrl} (${partyType}) matched regex but no searchstring found`);
721
+ const searchMethod = useGrep ? 'grep' : 'js';
722
+ console.log(`[debug] ${respUrl} (${partyType}, ${searchMethod}) matched regex but no searchstring found`);
666
723
  if (error) {
667
724
  console.log(`[debug] Search error: ${error}`);
668
725
  }
package/nwss.js CHANGED
@@ -1,4 +1,4 @@
1
- // === Network scanner script (nwss.js) v2.0.24 ===
1
+ // === Network scanner script (nwss.js) v2.0.25 ===
2
2
 
3
3
  // puppeteer for browser automation, fs for file system operations, psl for domain parsing.
4
4
  // const pLimit = require('p-limit'); // Will be dynamically imported
@@ -132,7 +132,7 @@ const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/r
132
132
  const { monitorBrowserHealth, isBrowserHealthy, isQuicklyResponsive, performGroupWindowCleanup, performRealtimeWindowCleanup, trackPageForRealtime, updatePageUsage, cleanupPageBeforeReload } = require('./lib/browserhealth');
133
133
 
134
134
  // --- Script Configuration & Constants ---
135
- const VERSION = '2.0.24'; // Script version
135
+ const VERSION = '2.0.25'; // Script version
136
136
 
137
137
  // get startTime
138
138
  const startTime = Date.now();
@@ -997,7 +997,7 @@ function matchesIgnoreDomain(domain, ignorePatterns) {
997
997
 
998
998
  function setupFrameHandling(page, forceDebug) {
999
999
  // Track active frames and clear on navigation to prevent detached frame access
1000
- let activeFrames = new Map(); // Use Map to track frame state
1000
+ let activeFrames = new Set(); // Use Set to track frame references
1001
1001
 
1002
1002
  // Clear frame tracking on navigation to prevent stale references
1003
1003
  page.on('framenavigated', (frame) => {
@@ -1031,7 +1031,6 @@ function setupFrameHandling(page, forceDebug) {
1031
1031
  // Enhanced frame validation with multiple safety checks
1032
1032
  let frameUrl;
1033
1033
  try {
1034
- // Test frame accessibility first
1035
1034
  frameUrl = frame.url();
1036
1035
 
1037
1036
  // Check if frame is detached (if method exists)
@@ -1041,12 +1040,17 @@ function setupFrameHandling(page, forceDebug) {
1041
1040
  }
1042
1041
  return;
1043
1042
  }
1043
+
1044
+ activeFrames.add(frame);
1045
+
1046
+ if (forceDebug) {
1047
+ console.log(formatLogMessage('debug', `New frame attached: ${frameUrl || 'about:blank'}`));
1048
+ }
1044
1049
  } catch (frameAccessError) {
1045
1050
  // Frame is not accessible (likely detached)
1046
1051
  return;
1047
1052
  }
1048
-
1049
- activeFrames.add(frame);
1053
+
1050
1054
  } catch (detachError) {
1051
1055
  // Frame state checking can throw in 23.x, handle gracefully
1052
1056
  if (forceDebug) {
@@ -1055,14 +1059,10 @@ function setupFrameHandling(page, forceDebug) {
1055
1059
  return;
1056
1060
  }
1057
1061
 
1058
- // Store frame with timestamp for tracking
1059
- activeFrames.set(frame, Date.now());
1060
1062
 
1061
1063
  if (frame !== page.mainFrame() && frame.parentFrame()) { // Only handle child frames
1062
- try {
1063
- if (forceDebug) {
1064
- console.log(formatLogMessage('debug', `New frame attached: ${frameUrl || 'about:blank'}`));
1065
- }
1064
+ let frameUrl;
1065
+ frameUrl = frame.url();
1066
1066
 
1067
1067
  // Don't try to navigate to frames with invalid/empty URLs
1068
1068
  if (!frameUrl ||
@@ -1100,6 +1100,7 @@ function setupFrameHandling(page, forceDebug) {
1100
1100
  // Let frames load naturally - manual navigation often causes Protocol errors
1101
1101
  // await frame.goto(frame.url(), { waitUntil: 'domcontentloaded', timeout: 5000 });
1102
1102
 
1103
+ try {
1103
1104
  if (forceDebug) {
1104
1105
  console.log(formatLogMessage('debug', `Frame will load naturally: ${frameUrl}`));
1105
1106
  }
@@ -1117,11 +1118,11 @@ function setupFrameHandling(page, forceDebug) {
1117
1118
  });
1118
1119
  // Handle frame navigations (keep this for monitoring)
1119
1120
  page.on('framenavigated', (frame) => {
1120
- let frameUrl;
1121
1121
 
1122
1122
  // Skip if frame is not in our active set
1123
1123
  if (!activeFrames.has(frame)) return;
1124
1124
 
1125
+ let frameUrl;
1125
1126
  try {
1126
1127
  frameUrl = frame.url();
1127
1128
  } catch (urlErr) {
@@ -1143,17 +1144,14 @@ function setupFrameHandling(page, forceDebug) {
1143
1144
  // Optional: Handle frame detachment for cleanup
1144
1145
  page.on('framedetached', (frame) => {
1145
1146
  // Remove from active tracking
1146
- activeFrames.delete(frame);
1147
+ activeFrames.delete(frame); // This works for both Map and Set
1147
1148
 
1148
- // Skip logging if we can't access frame URL
1149
- let frameUrl;
1149
+
1150
1150
  if (forceDebug) {
1151
+ let frameUrl;
1151
1152
  try {
1152
1153
  frameUrl = frame.url();
1153
- } catch (urlErr) {
1154
- // Frame already detached, can't get URL
1155
- return;
1156
- }
1154
+
1157
1155
  if (frameUrl &&
1158
1156
  frameUrl !== 'about:blank' &&
1159
1157
  frameUrl !== 'about:srcdoc' &&
@@ -1162,6 +1160,11 @@ function setupFrameHandling(page, forceDebug) {
1162
1160
  !frameUrl.startsWith('chrome-extension://')) {
1163
1161
  console.log(formatLogMessage('debug', `Frame detached: ${frameUrl}`));
1164
1162
  }
1163
+ } catch (urlErr) {
1164
+ // Frame already detached, can't get URL - this is expected
1165
+ return;
1166
+ }
1167
+
1165
1168
  }
1166
1169
  });
1167
1170
  }
@@ -1951,7 +1954,7 @@ function setupFrameHandling(page, forceDebug) {
1951
1954
  // Parse searchstring patterns using module
1952
1955
  const { searchStrings, searchStringsAnd, hasSearchString, hasSearchStringAnd } = parseSearchStrings(siteConfig.searchstring, siteConfig.searchstring_and);
1953
1956
  const useCurl = siteConfig.curl === true; // Use curl if enabled, regardless of searchstring
1954
- let useGrep = siteConfig.grep === true && useCurl; // Grep requires curl to be enabled
1957
+ let useGrep = siteConfig.grep === true; // Grep can work independently
1955
1958
 
1956
1959
  // Get user agent for curl if needed
1957
1960
  let curlUserAgent = '';
@@ -1973,7 +1976,7 @@ function setupFrameHandling(page, forceDebug) {
1973
1976
  }
1974
1977
 
1975
1978
  if (useGrep && forceDebug) {
1976
- console.log(formatLogMessage('debug', `Grep-based pattern matching enabled for ${currentUrl}`));
1979
+ console.log(formatLogMessage('debug', `Grep-based pattern matching enabled for ${currentUrl}${useCurl ? ' (with curl)' : ' (with response handler)'}`));
1977
1980
  }
1978
1981
 
1979
1982
  // Validate grep availability if needed
@@ -1993,7 +1996,6 @@ function setupFrameHandling(page, forceDebug) {
1993
1996
  if (!curlCheck.isAvailable) {
1994
1997
  console.warn(formatLogMessage('warn', `Curl not available for ${currentUrl}: ${curlCheck.error}. Skipping curl-based analysis.`));
1995
1998
  useCurl = false;
1996
- useGrep = false; // Grep requires curl
1997
1999
  } else if (forceDebug) {
1998
2000
  console.log(formatLogMessage('debug', `Using curl: ${curlCheck.version}`));
1999
2001
  }
@@ -2643,7 +2645,7 @@ function setupFrameHandling(page, forceDebug) {
2643
2645
 
2644
2646
  // If curl is enabled, download and analyze content immediately
2645
2647
  if (useCurl) {
2646
- // Check bypass_cache before attempting cache lookup
2648
+ // Check bypass_cache before attempting cache lookup (curl mode)
2647
2649
  let cachedContent = null;
2648
2650
  if (!shouldBypassCacheForUrl(reqUrl, siteConfig)) {
2649
2651
  // Check request cache first if smart cache is available and caching is enabled
@@ -2732,8 +2734,30 @@ function setupFrameHandling(page, forceDebug) {
2732
2734
  }
2733
2735
  }
2734
2736
  }
2737
+ } else if (useGrep && (hasSearchString || hasSearchStringAnd)) {
2738
+ // Use grep with response handler (no curl)
2739
+ if (forceDebug) {
2740
+ console.log(formatLogMessage('debug', `[grep-response] Queuing ${reqUrl} for grep analysis via response handler`));
2741
+ }
2742
+
2743
+ // Queue for grep processing via response handler
2744
+ // The response handler will download content and call grep
2745
+ if (dryRunMode) {
2746
+ matchedDomains.get('dryRunMatches').push({
2747
+ regex: matchedRegexPattern,
2748
+ domain: reqDomain,
2749
+ resourceType: resourceType,
2750
+ fullUrl: reqUrl,
2751
+ isFirstParty: isFirstParty,
2752
+ needsGrepCheck: true
2753
+ });
2754
+ }
2755
+
2756
+ // Don't process immediately - let response handler do the work
2757
+ if (forceDebug) {
2758
+ console.log(formatLogMessage('debug', `URL ${reqUrl} queued for grep analysis via response handler`));
2759
+ }
2735
2760
  }
2736
-
2737
2761
  // No break needed since we've already determined if regex matched
2738
2762
  }
2739
2763
  request.continue();
@@ -2742,8 +2766,8 @@ function setupFrameHandling(page, forceDebug) {
2742
2766
  // Mark page as actively processing network requests
2743
2767
  updatePageUsage(page, true);
2744
2768
 
2745
- // Add response handler ONLY if searchstring/searchstring_and is defined AND neither curl nor grep is enabled
2746
- if ((hasSearchString || hasSearchStringAnd) && !useCurl && !useGrep) {
2769
+ // Add response handler if searchstring is defined and either no curl, or grep without curl
2770
+ if ((hasSearchString || hasSearchStringAnd) && (!useCurl || (useGrep && !useCurl))) {
2747
2771
  const responseHandler = createResponseHandler({
2748
2772
  searchStrings,
2749
2773
  searchStringsAnd,
@@ -2761,6 +2785,7 @@ function setupFrameHandling(page, forceDebug) {
2761
2785
  } : undefined,
2762
2786
  currentUrl,
2763
2787
  perSiteSubDomains,
2788
+ useGrep, // Pass grep flag to response handler
2764
2789
  ignoreDomains,
2765
2790
  matchesIgnoreDomain,
2766
2791
  getRootDomain,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fanboynz/network-scanner",
3
- "version": "2.0.24",
3
+ "version": "2.0.25",
4
4
  "description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
5
5
  "main": "nwss.js",
6
6
  "scripts": {