@fanboynz/network-scanner 1.0.72 → 1.0.73

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/nwss.js +64 -19
  2. package/package.json +1 -1
package/nwss.js CHANGED
@@ -1,4 +1,4 @@
1
- // === Network scanner script (nwss.js) v1.0.72 ===
1
+ // === Network scanner script (nwss.js) v1.0.73 ===
2
2
 
3
3
  // puppeteer for browser automation, fs for file system operations, psl for domain parsing.
4
4
  // const pLimit = require('p-limit'); // Will be dynamically imported
@@ -123,7 +123,7 @@ const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/r
123
123
  const { monitorBrowserHealth, isBrowserHealthy } = require('./lib/browserhealth');
124
124
 
125
125
  // --- Script Configuration & Constants ---
126
- const VERSION = '1.0.72'; // Script version
126
+ const VERSION = '1.0.73'; // Script version
127
127
 
128
128
  // get startTime
129
129
  const startTime = Date.now();
@@ -474,6 +474,8 @@ Global config.json options:
474
474
  Per-site config.json options:
475
475
  url: "site" or ["site1", "site2"] Single URL or list of URLs
476
476
  filterRegex: "regex" or ["regex1", "regex2"] Patterns to match requests
477
+ regex_and: true/false Use AND logic for multiple filterRegex patterns (default: false)
478
+ When true, ALL regex patterns must match the same URL
477
479
 
478
480
  Redirect Handling Options:
479
481
  follow_redirects: true/false Follow redirects to new domains (default: true)
@@ -1646,6 +1648,9 @@ function setupFrameHandling(page, forceDebug) {
1646
1648
  ? [new RegExp(siteConfig.filterRegex.replace(/^\/(.*)\/$/, '$1'))]
1647
1649
  : [];
1648
1650
 
1651
+ // NEW: Get regex_and setting (defaults to false for backward compatibility)
1652
+ const useRegexAnd = siteConfig.regex_and === true;
1653
+
1649
1654
  // Parse searchstring patterns using module
1650
1655
  const { searchStrings, searchStringsAnd, hasSearchString, hasSearchStringAnd } = parseSearchStrings(siteConfig.searchstring, siteConfig.searchstring_and);
1651
1656
  const useCurl = siteConfig.curl === true; // Use curl if enabled, regardless of searchstring
@@ -1721,6 +1726,11 @@ function setupFrameHandling(page, forceDebug) {
1721
1726
  patterns.forEach((pattern, idx) => {
1722
1727
  console.log(` [${idx + 1}] ${pattern}`);
1723
1728
  });
1729
+ if (useRegexAnd && patterns.length > 1) {
1730
+ console.log(formatLogMessage('info', ` Logic: AND (all patterns must match same URL)`));
1731
+ } else if (patterns.length > 1) {
1732
+ console.log(formatLogMessage('info', ` Logic: OR (any pattern can match)`));
1733
+ }
1724
1734
  }
1725
1735
 
1726
1736
  if (siteConfig.verbose === 1 && (hasSearchString || hasSearchStringAnd)) {
@@ -1987,7 +1997,7 @@ function setupFrameHandling(page, forceDebug) {
1987
1997
  if (!allowedResourceTypes || !Array.isArray(allowedResourceTypes) || allowedResourceTypes.includes(resourceType)) {
1988
1998
  if (dryRunMode) {
1989
1999
  matchedDomains.get('dryRunMatches').push({
1990
- regex: re.source,
2000
+ regex: matchedRegexPattern,
1991
2001
  domain: reqDomain,
1992
2002
  resourceType: resourceType,
1993
2003
  fullUrl: reqUrl,
@@ -2001,7 +2011,7 @@ function setupFrameHandling(page, forceDebug) {
2001
2011
  const simplifiedUrl = getRootDomain(currentUrl);
2002
2012
  if (siteConfig.verbose === 1) {
2003
2013
  const resourceInfo = (adblockRulesMode || siteConfig.adblock_rules) ? ` (${resourceType})` : '';
2004
- console.log(formatLogMessage('match', `[${simplifiedUrl}] ${reqUrl} matched regex: ${re} and resourceType: ${resourceType}${resourceInfo} [BLOCKED BUT ADDED]`));
2014
+ console.log(formatLogMessage('match', `[${simplifiedUrl}] ${reqUrl} matched regex: ${matchedRegexPattern} and resourceType: ${resourceType}${resourceInfo}`));
2005
2015
  }
2006
2016
  if (dumpUrls) {
2007
2017
  const timestamp = new Date().toISOString();
@@ -2037,8 +2047,34 @@ function setupFrameHandling(page, forceDebug) {
2037
2047
  return;
2038
2048
  }
2039
2049
 
2040
- for (const re of regexes) {
2041
- if (re.test(reqUrl)) {
2050
+ // === ENHANCED REGEX MATCHING WITH AND/OR LOGIC ===
2051
+ let regexMatched = false;
2052
+ let matchedRegexPattern = null;
2053
+
2054
+ if (regexes.length > 0) {
2055
+ if (useRegexAnd) {
2056
+ // AND logic: ALL regex patterns must match the same URL
2057
+ const allMatch = regexes.every(re => re.test(reqUrl));
2058
+ if (allMatch) {
2059
+ regexMatched = true;
2060
+ matchedRegexPattern = regexes.map(re => re.source).join(' AND ');
2061
+ if (forceDebug) {
2062
+ console.log(formatLogMessage('debug', `URL ${reqUrl} matched ALL regex patterns (AND logic)`));
2063
+ }
2064
+ }
2065
+ } else {
2066
+ // OR logic: ANY regex pattern can match (original behavior)
2067
+ for (const re of regexes) {
2068
+ if (re.test(reqUrl)) {
2069
+ regexMatched = true;
2070
+ matchedRegexPattern = re.source;
2071
+ break;
2072
+ }
2073
+ }
2074
+ }
2075
+ }
2076
+
2077
+ if (regexMatched) {
2042
2078
  const resourceType = request.resourceType();
2043
2079
 
2044
2080
  // *** UNIVERSAL RESOURCE TYPE FILTER ***
@@ -2049,7 +2085,9 @@ function setupFrameHandling(page, forceDebug) {
2049
2085
  if (forceDebug) {
2050
2086
  console.log(formatLogMessage('debug', `URL ${reqUrl} matches regex but resourceType '${resourceType}' not in allowed types [${allowedResourceTypes.join(', ')}]. Skipping ALL processing.`));
2051
2087
  }
2052
- break; // Skip this URL entirely - doesn't match required resource types
2088
+ // Skip this URL entirely - doesn't match required resource types
2089
+ request.continue();
2090
+ return;
2053
2091
  }
2054
2092
  }
2055
2093
 
@@ -2058,13 +2096,17 @@ function setupFrameHandling(page, forceDebug) {
2058
2096
  if (forceDebug) {
2059
2097
  console.log(formatLogMessage('debug', `Skipping first-party match: ${reqUrl} (firstParty disabled)`));
2060
2098
  }
2061
- break; // Skip this URL - it's first-party but firstParty is disabled
2099
+ // Skip this URL - it's first-party but firstParty is disabled
2100
+ request.continue();
2101
+ return;
2062
2102
  }
2063
2103
  if (!isFirstParty && siteConfig.thirdParty === false) {
2064
2104
  if (forceDebug) {
2065
2105
  console.log(formatLogMessage('debug', `Skipping third-party match: ${reqUrl} (thirdParty disabled)`));
2066
2106
  }
2067
- break; // Skip this URL - it's third-party but thirdParty is disabled
2107
+ // Skip this URL - it's third-party but thirdParty is disabled
2108
+ request.continue();
2109
+ return;
2068
2110
  }
2069
2111
 
2070
2112
  // REMOVED: Check if this URL matches any blocked patterns - if so, skip detection but still continue browser blocking
@@ -2075,7 +2117,7 @@ function setupFrameHandling(page, forceDebug) {
2075
2117
  if (!hasSearchString && !hasSearchStringAnd && !hasNetTools) {
2076
2118
  if (dryRunMode) {
2077
2119
  matchedDomains.get('dryRunMatches').push({
2078
- regex: re.source,
2120
+ regex: matchedRegexPattern,
2079
2121
  domain: reqDomain,
2080
2122
  resourceType: resourceType,
2081
2123
  fullUrl: reqUrl,
@@ -2087,7 +2129,7 @@ function setupFrameHandling(page, forceDebug) {
2087
2129
  const simplifiedUrl = getRootDomain(currentUrl);
2088
2130
  if (siteConfig.verbose === 1) {
2089
2131
  const resourceInfo = (adblockRulesMode || siteConfig.adblock_rules) ? ` (${resourceType})` : '';
2090
- console.log(formatLogMessage('match', `[${simplifiedUrl}] ${reqUrl} matched regex: ${re} and resourceType: ${resourceType}${resourceInfo}`));
2132
+ console.log(formatLogMessage('match', `[${simplifiedUrl}] ${reqUrl} matched regex: ${matchedRegexPattern} and resourceType: ${resourceType}${resourceInfo}`));
2091
2133
  }
2092
2134
  if (dumpUrls) {
2093
2135
  const timestamp = new Date().toISOString();
@@ -2101,17 +2143,19 @@ function setupFrameHandling(page, forceDebug) {
2101
2143
  if (forceDebug) {
2102
2144
  console.log(formatLogMessage('debug', `Skipping nettools check for already detected subdomain: ${fullSubdomain}`));
2103
2145
  }
2104
- break; // Skip to next URL
2146
+ // Skip to next URL
2147
+ request.continue();
2148
+ return;
2105
2149
  }
2106
2150
 
2107
2151
  if (forceDebug) {
2108
- console.log(formatLogMessage('debug', `${reqUrl} matched regex ${re} and resourceType ${resourceType}, queued for nettools check`));
2152
+ console.log(formatLogMessage('debug', `${reqUrl} matched regex ${matchedRegexPattern} and resourceType ${resourceType}, queued for nettools check`));
2109
2153
  }
2110
2154
 
2111
2155
  if (dryRunMode) {
2112
2156
  // For dry run, we'll collect the domain for nettools checking
2113
2157
  matchedDomains.get('dryRunMatches').push({
2114
- regex: re.source,
2158
+ regex: matchedRegexPattern,
2115
2159
  domain: reqDomain,
2116
2160
  resourceType: resourceType,
2117
2161
  fullUrl: reqUrl,
@@ -2175,15 +2219,17 @@ function setupFrameHandling(page, forceDebug) {
2175
2219
  if (forceDebug) {
2176
2220
  console.log(formatLogMessage('debug', `Skipping searchstring check for already detected subdomain: ${fullSubdomain}`));
2177
2221
  }
2178
- break; // Skip to next URL
2222
+ // Skip to next URL
2223
+ request.continue();
2224
+ return;
2179
2225
  }
2180
2226
  if (forceDebug) {
2181
2227
  const searchType = hasSearchStringAnd ? 'searchstring_and' : 'searchstring';
2182
- console.log(formatLogMessage('debug', `${reqUrl} matched regex ${re} and resourceType ${resourceType}, queued for ${searchType} content search`));
2228
+ console.log(formatLogMessage('debug', `${reqUrl} matched regex ${matchedRegexPattern} and resourceType ${resourceType}, queued for ${searchType} content search`));
2183
2229
  }
2184
2230
  if (dryRunMode) {
2185
2231
  matchedDomains.get('dryRunMatches').push({
2186
- regex: re.source,
2232
+ regex: matchedRegexPattern,
2187
2233
  domain: reqDomain,
2188
2234
  resourceType: resourceType,
2189
2235
  fullUrl: reqUrl,
@@ -2270,8 +2316,7 @@ function setupFrameHandling(page, forceDebug) {
2270
2316
  }
2271
2317
  }
2272
2318
 
2273
- break;
2274
- }
2319
+ // No break needed since we've already determined if regex matched
2275
2320
  }
2276
2321
  request.continue();
2277
2322
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fanboynz/network-scanner",
3
- "version": "1.0.72",
3
+ "version": "1.0.73",
4
4
  "description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
5
5
  "main": "nwss.js",
6
6
  "scripts": {