@fanboynz/network-scanner 1.0.72 → 1.0.74

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/nwss.js +91 -20
  2. package/package.json +1 -1
package/nwss.js CHANGED
@@ -1,4 +1,4 @@
1
- // === Network scanner script (nwss.js) v1.0.72 ===
1
+ // === Network scanner script (nwss.js) v1.0.74 ===
2
2
 
3
3
  // puppeteer for browser automation, fs for file system operations, psl for domain parsing.
4
4
  // const pLimit = require('p-limit'); // Will be dynamically imported
@@ -123,7 +123,7 @@ const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/r
123
123
  const { monitorBrowserHealth, isBrowserHealthy } = require('./lib/browserhealth');
124
124
 
125
125
  // --- Script Configuration & Constants ---
126
- const VERSION = '1.0.72'; // Script version
126
+ const VERSION = '1.0.74'; // Script version
127
127
 
128
128
  // get startTime
129
129
  const startTime = Date.now();
@@ -474,6 +474,8 @@ Global config.json options:
474
474
  Per-site config.json options:
475
475
  url: "site" or ["site1", "site2"] Single URL or list of URLs
476
476
  filterRegex: "regex" or ["regex1", "regex2"] Patterns to match requests
477
+ regex_and: true/false Use AND logic for multiple filterRegex patterns (default: false)
478
+ When true, ALL regex patterns must match the same URL
477
479
 
478
480
  Redirect Handling Options:
479
481
  follow_redirects: true/false Follow redirects to new domains (default: true)
@@ -1646,6 +1648,9 @@ function setupFrameHandling(page, forceDebug) {
1646
1648
  ? [new RegExp(siteConfig.filterRegex.replace(/^\/(.*)\/$/, '$1'))]
1647
1649
  : [];
1648
1650
 
1651
+ // NEW: Get regex_and setting (defaults to false for backward compatibility)
1652
+ const useRegexAnd = siteConfig.regex_and === true;
1653
+
1649
1654
  // Parse searchstring patterns using module
1650
1655
  const { searchStrings, searchStringsAnd, hasSearchString, hasSearchStringAnd } = parseSearchStrings(siteConfig.searchstring, siteConfig.searchstring_and);
1651
1656
  const useCurl = siteConfig.curl === true; // Use curl if enabled, regardless of searchstring
@@ -1721,6 +1726,11 @@ function setupFrameHandling(page, forceDebug) {
1721
1726
  patterns.forEach((pattern, idx) => {
1722
1727
  console.log(` [${idx + 1}] ${pattern}`);
1723
1728
  });
1729
+ if (useRegexAnd && patterns.length > 1) {
1730
+ console.log(formatLogMessage('info', ` Logic: AND (all patterns must match same URL)`));
1731
+ } else if (patterns.length > 1) {
1732
+ console.log(formatLogMessage('info', ` Logic: OR (any pattern can match)`));
1733
+ }
1724
1734
  }
1725
1735
 
1726
1736
  if (siteConfig.verbose === 1 && (hasSearchString || hasSearchStringAnd)) {
@@ -1987,7 +1997,7 @@ function setupFrameHandling(page, forceDebug) {
1987
1997
  if (!allowedResourceTypes || !Array.isArray(allowedResourceTypes) || allowedResourceTypes.includes(resourceType)) {
1988
1998
  if (dryRunMode) {
1989
1999
  matchedDomains.get('dryRunMatches').push({
1990
- regex: re.source,
2000
+ regex: matchedRegexPattern,
1991
2001
  domain: reqDomain,
1992
2002
  resourceType: resourceType,
1993
2003
  fullUrl: reqUrl,
@@ -2001,7 +2011,7 @@ function setupFrameHandling(page, forceDebug) {
2001
2011
  const simplifiedUrl = getRootDomain(currentUrl);
2002
2012
  if (siteConfig.verbose === 1) {
2003
2013
  const resourceInfo = (adblockRulesMode || siteConfig.adblock_rules) ? ` (${resourceType})` : '';
2004
- console.log(formatLogMessage('match', `[${simplifiedUrl}] ${reqUrl} matched regex: ${re} and resourceType: ${resourceType}${resourceInfo} [BLOCKED BUT ADDED]`));
2014
+ console.log(formatLogMessage('match', `[${simplifiedUrl}] ${reqUrl} matched regex: ${matchedRegexPattern} and resourceType: ${resourceType}${resourceInfo}`));
2005
2015
  }
2006
2016
  if (dumpUrls) {
2007
2017
  const timestamp = new Date().toISOString();
@@ -2037,8 +2047,34 @@ function setupFrameHandling(page, forceDebug) {
2037
2047
  return;
2038
2048
  }
2039
2049
 
2040
- for (const re of regexes) {
2041
- if (re.test(reqUrl)) {
2050
+ // === ENHANCED REGEX MATCHING WITH AND/OR LOGIC ===
2051
+ let regexMatched = false;
2052
+ let matchedRegexPattern = null;
2053
+
2054
+ if (regexes.length > 0) {
2055
+ if (useRegexAnd) {
2056
+ // AND logic: ALL regex patterns must match the same URL
2057
+ const allMatch = regexes.every(re => re.test(reqUrl));
2058
+ if (allMatch) {
2059
+ regexMatched = true;
2060
+ matchedRegexPattern = regexes.map(re => re.source).join(' AND ');
2061
+ if (forceDebug) {
2062
+ console.log(formatLogMessage('debug', `URL ${reqUrl} matched ALL regex patterns (AND logic)`));
2063
+ }
2064
+ }
2065
+ } else {
2066
+ // OR logic: ANY regex pattern can match (original behavior)
2067
+ for (const re of regexes) {
2068
+ if (re.test(reqUrl)) {
2069
+ regexMatched = true;
2070
+ matchedRegexPattern = re.source;
2071
+ break;
2072
+ }
2073
+ }
2074
+ }
2075
+ }
2076
+
2077
+ if (regexMatched) {
2042
2078
  const resourceType = request.resourceType();
2043
2079
 
2044
2080
  // *** UNIVERSAL RESOURCE TYPE FILTER ***
@@ -2049,7 +2085,9 @@ function setupFrameHandling(page, forceDebug) {
2049
2085
  if (forceDebug) {
2050
2086
  console.log(formatLogMessage('debug', `URL ${reqUrl} matches regex but resourceType '${resourceType}' not in allowed types [${allowedResourceTypes.join(', ')}]. Skipping ALL processing.`));
2051
2087
  }
2052
- break; // Skip this URL entirely - doesn't match required resource types
2088
+ // Skip this URL entirely - doesn't match required resource types
2089
+ request.continue();
2090
+ return;
2053
2091
  }
2054
2092
  }
2055
2093
 
@@ -2058,24 +2096,36 @@ function setupFrameHandling(page, forceDebug) {
2058
2096
  if (forceDebug) {
2059
2097
  console.log(formatLogMessage('debug', `Skipping first-party match: ${reqUrl} (firstParty disabled)`));
2060
2098
  }
2061
- break; // Skip this URL - it's first-party but firstParty is disabled
2099
+ // Skip this URL - it's first-party but firstParty is disabled
2100
+ request.continue();
2101
+ return;
2062
2102
  }
2063
2103
  if (!isFirstParty && siteConfig.thirdParty === false) {
2064
2104
  if (forceDebug) {
2065
2105
  console.log(formatLogMessage('debug', `Skipping third-party match: ${reqUrl} (thirdParty disabled)`));
2066
2106
  }
2067
- break; // Skip this URL - it's third-party but thirdParty is disabled
2107
+ // Skip this URL - it's third-party but thirdParty is disabled
2108
+ request.continue();
2109
+ return;
2068
2110
  }
2069
2111
 
2070
2112
  // REMOVED: Check if this URL matches any blocked patterns - if so, skip detection but still continue browser blocking
2071
2113
  // This check is no longer needed here since even_blocked handles it above
2072
-
2114
+
2115
+ // Check if nettools validation is required - if so, NEVER add domains immediately
2116
+ if (hasNetTools) {
2117
+ if (forceDebug) {
2118
+ console.log(formatLogMessage('debug', `${reqUrl} has nettools validation required - skipping immediate add`));
2119
+ }
2120
+ request.continue();
2121
+ return;
2122
+ }
2073
2123
 
2074
2124
  // If NO searchstring AND NO nettools are defined, match immediately (existing behavior)
2075
2125
  if (!hasSearchString && !hasSearchStringAnd && !hasNetTools) {
2076
2126
  if (dryRunMode) {
2077
2127
  matchedDomains.get('dryRunMatches').push({
2078
- regex: re.source,
2128
+ regex: matchedRegexPattern,
2079
2129
  domain: reqDomain,
2080
2130
  resourceType: resourceType,
2081
2131
  fullUrl: reqUrl,
@@ -2087,7 +2137,7 @@ function setupFrameHandling(page, forceDebug) {
2087
2137
  const simplifiedUrl = getRootDomain(currentUrl);
2088
2138
  if (siteConfig.verbose === 1) {
2089
2139
  const resourceInfo = (adblockRulesMode || siteConfig.adblock_rules) ? ` (${resourceType})` : '';
2090
- console.log(formatLogMessage('match', `[${simplifiedUrl}] ${reqUrl} matched regex: ${re} and resourceType: ${resourceType}${resourceInfo}`));
2140
+ console.log(formatLogMessage('match', `[${simplifiedUrl}] ${reqUrl} matched regex: ${matchedRegexPattern} and resourceType: ${resourceType}${resourceInfo}`));
2091
2141
  }
2092
2142
  if (dumpUrls) {
2093
2143
  const timestamp = new Date().toISOString();
@@ -2101,17 +2151,25 @@ function setupFrameHandling(page, forceDebug) {
2101
2151
  if (forceDebug) {
2102
2152
  console.log(formatLogMessage('debug', `Skipping nettools check for already detected subdomain: ${fullSubdomain}`));
2103
2153
  }
2104
- break; // Skip to next URL
2154
+ // Skip to next URL
2155
+ request.continue();
2156
+ return;
2105
2157
  }
2106
2158
 
2107
2159
  if (forceDebug) {
2108
- console.log(formatLogMessage('debug', `${reqUrl} matched regex ${re} and resourceType ${resourceType}, queued for nettools check`));
2160
+ console.log(formatLogMessage('debug', `${reqUrl} matched regex ${matchedRegexPattern} and resourceType ${resourceType}, queued for nettools check`));
2161
+ }
2162
+
2163
+ // IMPORTANT: Do NOT add domain immediately when nettools validation is required
2164
+ // The nettools handler will add the domain only if validation passes
2165
+ if (forceDebug) {
2166
+ console.log(formatLogMessage('debug', `Domain ${reqDomain} queued for mandatory nettools validation (dig: ${JSON.stringify(siteConfig.dig)})`));
2109
2167
  }
2110
2168
 
2111
2169
  if (dryRunMode) {
2112
2170
  // For dry run, we'll collect the domain for nettools checking
2113
2171
  matchedDomains.get('dryRunMatches').push({
2114
- regex: re.source,
2172
+ regex: matchedRegexPattern,
2115
2173
  domain: reqDomain,
2116
2174
  resourceType: resourceType,
2117
2175
  fullUrl: reqUrl,
@@ -2168,6 +2226,12 @@ function setupFrameHandling(page, forceDebug) {
2168
2226
  // Execute nettools check asynchronously
2169
2227
  const originalDomain = fullSubdomain; // Use full subdomain for nettools
2170
2228
  setImmediate(() => netToolsHandler(reqDomain, originalDomain));
2229
+
2230
+ // Do NOT continue processing this request for immediate domain addition
2231
+ // The nettools handler is responsible for adding the domain if validation passes
2232
+ if (forceDebug) {
2233
+ console.log(formatLogMessage('debug', `Request processing halted for ${reqUrl} - awaiting nettools validation`));
2234
+ }
2171
2235
  } else {
2172
2236
  // If searchstring or searchstring_and IS defined (with or without nettools), queue for content checking
2173
2237
  // Skip searchstring check if full subdomain was already detected
@@ -2175,15 +2239,17 @@ function setupFrameHandling(page, forceDebug) {
2175
2239
  if (forceDebug) {
2176
2240
  console.log(formatLogMessage('debug', `Skipping searchstring check for already detected subdomain: ${fullSubdomain}`));
2177
2241
  }
2178
- break; // Skip to next URL
2242
+ // Skip to next URL
2243
+ request.continue();
2244
+ return;
2179
2245
  }
2180
2246
  if (forceDebug) {
2181
2247
  const searchType = hasSearchStringAnd ? 'searchstring_and' : 'searchstring';
2182
- console.log(formatLogMessage('debug', `${reqUrl} matched regex ${re} and resourceType ${resourceType}, queued for ${searchType} content search`));
2248
+ console.log(formatLogMessage('debug', `${reqUrl} matched regex ${matchedRegexPattern} and resourceType ${resourceType}, queued for ${searchType} content search`));
2183
2249
  }
2184
2250
  if (dryRunMode) {
2185
2251
  matchedDomains.get('dryRunMatches').push({
2186
- regex: re.source,
2252
+ regex: matchedRegexPattern,
2187
2253
  domain: reqDomain,
2188
2254
  resourceType: resourceType,
2189
2255
  fullUrl: reqUrl,
@@ -2191,6 +2257,12 @@ function setupFrameHandling(page, forceDebug) {
2191
2257
  needsSearchStringCheck: true
2192
2258
  });
2193
2259
  }
2260
+ // If we have BOTH searchstring AND nettools, ensure nettools validation still happens
2261
+ if (hasNetTools) {
2262
+ if (forceDebug) {
2263
+ console.log(formatLogMessage('debug', `${reqUrl} requires both content and nettools validation`));
2264
+ }
2265
+ }
2194
2266
  }
2195
2267
 
2196
2268
  // If curl is enabled, download and analyze content immediately
@@ -2270,8 +2342,7 @@ function setupFrameHandling(page, forceDebug) {
2270
2342
  }
2271
2343
  }
2272
2344
 
2273
- break;
2274
- }
2345
+ // No break needed since we've already determined if regex matched
2275
2346
  }
2276
2347
  request.continue();
2277
2348
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fanboynz/network-scanner",
3
- "version": "1.0.72",
3
+ "version": "1.0.74",
4
4
  "description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
5
5
  "main": "nwss.js",
6
6
  "scripts": {