@fanboynz/network-scanner 1.0.72 → 1.0.74
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/nwss.js +91 -20
- package/package.json +1 -1
package/nwss.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// === Network scanner script (nwss.js) v1.0.
|
|
1
|
+
// === Network scanner script (nwss.js) v1.0.74 ===
|
|
2
2
|
|
|
3
3
|
// puppeteer for browser automation, fs for file system operations, psl for domain parsing.
|
|
4
4
|
// const pLimit = require('p-limit'); // Will be dynamically imported
|
|
@@ -123,7 +123,7 @@ const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/r
|
|
|
123
123
|
const { monitorBrowserHealth, isBrowserHealthy } = require('./lib/browserhealth');
|
|
124
124
|
|
|
125
125
|
// --- Script Configuration & Constants ---
|
|
126
|
-
const VERSION = '1.0.
|
|
126
|
+
const VERSION = '1.0.74'; // Script version
|
|
127
127
|
|
|
128
128
|
// get startTime
|
|
129
129
|
const startTime = Date.now();
|
|
@@ -474,6 +474,8 @@ Global config.json options:
|
|
|
474
474
|
Per-site config.json options:
|
|
475
475
|
url: "site" or ["site1", "site2"] Single URL or list of URLs
|
|
476
476
|
filterRegex: "regex" or ["regex1", "regex2"] Patterns to match requests
|
|
477
|
+
regex_and: true/false Use AND logic for multiple filterRegex patterns (default: false)
|
|
478
|
+
When true, ALL regex patterns must match the same URL
|
|
477
479
|
|
|
478
480
|
Redirect Handling Options:
|
|
479
481
|
follow_redirects: true/false Follow redirects to new domains (default: true)
|
|
@@ -1646,6 +1648,9 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1646
1648
|
? [new RegExp(siteConfig.filterRegex.replace(/^\/(.*)\/$/, '$1'))]
|
|
1647
1649
|
: [];
|
|
1648
1650
|
|
|
1651
|
+
// NEW: Get regex_and setting (defaults to false for backward compatibility)
|
|
1652
|
+
const useRegexAnd = siteConfig.regex_and === true;
|
|
1653
|
+
|
|
1649
1654
|
// Parse searchstring patterns using module
|
|
1650
1655
|
const { searchStrings, searchStringsAnd, hasSearchString, hasSearchStringAnd } = parseSearchStrings(siteConfig.searchstring, siteConfig.searchstring_and);
|
|
1651
1656
|
const useCurl = siteConfig.curl === true; // Use curl if enabled, regardless of searchstring
|
|
@@ -1721,6 +1726,11 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1721
1726
|
patterns.forEach((pattern, idx) => {
|
|
1722
1727
|
console.log(` [${idx + 1}] ${pattern}`);
|
|
1723
1728
|
});
|
|
1729
|
+
if (useRegexAnd && patterns.length > 1) {
|
|
1730
|
+
console.log(formatLogMessage('info', ` Logic: AND (all patterns must match same URL)`));
|
|
1731
|
+
} else if (patterns.length > 1) {
|
|
1732
|
+
console.log(formatLogMessage('info', ` Logic: OR (any pattern can match)`));
|
|
1733
|
+
}
|
|
1724
1734
|
}
|
|
1725
1735
|
|
|
1726
1736
|
if (siteConfig.verbose === 1 && (hasSearchString || hasSearchStringAnd)) {
|
|
@@ -1987,7 +1997,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1987
1997
|
if (!allowedResourceTypes || !Array.isArray(allowedResourceTypes) || allowedResourceTypes.includes(resourceType)) {
|
|
1988
1998
|
if (dryRunMode) {
|
|
1989
1999
|
matchedDomains.get('dryRunMatches').push({
|
|
1990
|
-
regex:
|
|
2000
|
+
regex: matchedRegexPattern,
|
|
1991
2001
|
domain: reqDomain,
|
|
1992
2002
|
resourceType: resourceType,
|
|
1993
2003
|
fullUrl: reqUrl,
|
|
@@ -2001,7 +2011,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2001
2011
|
const simplifiedUrl = getRootDomain(currentUrl);
|
|
2002
2012
|
if (siteConfig.verbose === 1) {
|
|
2003
2013
|
const resourceInfo = (adblockRulesMode || siteConfig.adblock_rules) ? ` (${resourceType})` : '';
|
|
2004
|
-
console.log(formatLogMessage('match', `[${simplifiedUrl}] ${reqUrl} matched regex: ${
|
|
2014
|
+
console.log(formatLogMessage('match', `[${simplifiedUrl}] ${reqUrl} matched regex: ${matchedRegexPattern} and resourceType: ${resourceType}${resourceInfo}`));
|
|
2005
2015
|
}
|
|
2006
2016
|
if (dumpUrls) {
|
|
2007
2017
|
const timestamp = new Date().toISOString();
|
|
@@ -2037,8 +2047,34 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2037
2047
|
return;
|
|
2038
2048
|
}
|
|
2039
2049
|
|
|
2040
|
-
|
|
2041
|
-
|
|
2050
|
+
// === ENHANCED REGEX MATCHING WITH AND/OR LOGIC ===
|
|
2051
|
+
let regexMatched = false;
|
|
2052
|
+
let matchedRegexPattern = null;
|
|
2053
|
+
|
|
2054
|
+
if (regexes.length > 0) {
|
|
2055
|
+
if (useRegexAnd) {
|
|
2056
|
+
// AND logic: ALL regex patterns must match the same URL
|
|
2057
|
+
const allMatch = regexes.every(re => re.test(reqUrl));
|
|
2058
|
+
if (allMatch) {
|
|
2059
|
+
regexMatched = true;
|
|
2060
|
+
matchedRegexPattern = regexes.map(re => re.source).join(' AND ');
|
|
2061
|
+
if (forceDebug) {
|
|
2062
|
+
console.log(formatLogMessage('debug', `URL ${reqUrl} matched ALL regex patterns (AND logic)`));
|
|
2063
|
+
}
|
|
2064
|
+
}
|
|
2065
|
+
} else {
|
|
2066
|
+
// OR logic: ANY regex pattern can match (original behavior)
|
|
2067
|
+
for (const re of regexes) {
|
|
2068
|
+
if (re.test(reqUrl)) {
|
|
2069
|
+
regexMatched = true;
|
|
2070
|
+
matchedRegexPattern = re.source;
|
|
2071
|
+
break;
|
|
2072
|
+
}
|
|
2073
|
+
}
|
|
2074
|
+
}
|
|
2075
|
+
}
|
|
2076
|
+
|
|
2077
|
+
if (regexMatched) {
|
|
2042
2078
|
const resourceType = request.resourceType();
|
|
2043
2079
|
|
|
2044
2080
|
// *** UNIVERSAL RESOURCE TYPE FILTER ***
|
|
@@ -2049,7 +2085,9 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2049
2085
|
if (forceDebug) {
|
|
2050
2086
|
console.log(formatLogMessage('debug', `URL ${reqUrl} matches regex but resourceType '${resourceType}' not in allowed types [${allowedResourceTypes.join(', ')}]. Skipping ALL processing.`));
|
|
2051
2087
|
}
|
|
2052
|
-
|
|
2088
|
+
// Skip this URL entirely - doesn't match required resource types
|
|
2089
|
+
request.continue();
|
|
2090
|
+
return;
|
|
2053
2091
|
}
|
|
2054
2092
|
}
|
|
2055
2093
|
|
|
@@ -2058,24 +2096,36 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2058
2096
|
if (forceDebug) {
|
|
2059
2097
|
console.log(formatLogMessage('debug', `Skipping first-party match: ${reqUrl} (firstParty disabled)`));
|
|
2060
2098
|
}
|
|
2061
|
-
|
|
2099
|
+
// Skip this URL - it's first-party but firstParty is disabled
|
|
2100
|
+
request.continue();
|
|
2101
|
+
return;
|
|
2062
2102
|
}
|
|
2063
2103
|
if (!isFirstParty && siteConfig.thirdParty === false) {
|
|
2064
2104
|
if (forceDebug) {
|
|
2065
2105
|
console.log(formatLogMessage('debug', `Skipping third-party match: ${reqUrl} (thirdParty disabled)`));
|
|
2066
2106
|
}
|
|
2067
|
-
|
|
2107
|
+
// Skip this URL - it's third-party but thirdParty is disabled
|
|
2108
|
+
request.continue();
|
|
2109
|
+
return;
|
|
2068
2110
|
}
|
|
2069
2111
|
|
|
2070
2112
|
// REMOVED: Check if this URL matches any blocked patterns - if so, skip detection but still continue browser blocking
|
|
2071
2113
|
// This check is no longer needed here since even_blocked handles it above
|
|
2072
|
-
|
|
2114
|
+
|
|
2115
|
+
// Check if nettools validation is required - if so, NEVER add domains immediately
|
|
2116
|
+
if (hasNetTools) {
|
|
2117
|
+
if (forceDebug) {
|
|
2118
|
+
console.log(formatLogMessage('debug', `${reqUrl} has nettools validation required - skipping immediate add`));
|
|
2119
|
+
}
|
|
2120
|
+
request.continue();
|
|
2121
|
+
return;
|
|
2122
|
+
}
|
|
2073
2123
|
|
|
2074
2124
|
// If NO searchstring AND NO nettools are defined, match immediately (existing behavior)
|
|
2075
2125
|
if (!hasSearchString && !hasSearchStringAnd && !hasNetTools) {
|
|
2076
2126
|
if (dryRunMode) {
|
|
2077
2127
|
matchedDomains.get('dryRunMatches').push({
|
|
2078
|
-
regex:
|
|
2128
|
+
regex: matchedRegexPattern,
|
|
2079
2129
|
domain: reqDomain,
|
|
2080
2130
|
resourceType: resourceType,
|
|
2081
2131
|
fullUrl: reqUrl,
|
|
@@ -2087,7 +2137,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2087
2137
|
const simplifiedUrl = getRootDomain(currentUrl);
|
|
2088
2138
|
if (siteConfig.verbose === 1) {
|
|
2089
2139
|
const resourceInfo = (adblockRulesMode || siteConfig.adblock_rules) ? ` (${resourceType})` : '';
|
|
2090
|
-
console.log(formatLogMessage('match', `[${simplifiedUrl}] ${reqUrl} matched regex: ${
|
|
2140
|
+
console.log(formatLogMessage('match', `[${simplifiedUrl}] ${reqUrl} matched regex: ${matchedRegexPattern} and resourceType: ${resourceType}${resourceInfo}`));
|
|
2091
2141
|
}
|
|
2092
2142
|
if (dumpUrls) {
|
|
2093
2143
|
const timestamp = new Date().toISOString();
|
|
@@ -2101,17 +2151,25 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2101
2151
|
if (forceDebug) {
|
|
2102
2152
|
console.log(formatLogMessage('debug', `Skipping nettools check for already detected subdomain: ${fullSubdomain}`));
|
|
2103
2153
|
}
|
|
2104
|
-
|
|
2154
|
+
// Skip to next URL
|
|
2155
|
+
request.continue();
|
|
2156
|
+
return;
|
|
2105
2157
|
}
|
|
2106
2158
|
|
|
2107
2159
|
if (forceDebug) {
|
|
2108
|
-
console.log(formatLogMessage('debug', `${reqUrl} matched regex ${
|
|
2160
|
+
console.log(formatLogMessage('debug', `${reqUrl} matched regex ${matchedRegexPattern} and resourceType ${resourceType}, queued for nettools check`));
|
|
2161
|
+
}
|
|
2162
|
+
|
|
2163
|
+
// IMPORTANT: Do NOT add domain immediately when nettools validation is required
|
|
2164
|
+
// The nettools handler will add the domain only if validation passes
|
|
2165
|
+
if (forceDebug) {
|
|
2166
|
+
console.log(formatLogMessage('debug', `Domain ${reqDomain} queued for mandatory nettools validation (dig: ${JSON.stringify(siteConfig.dig)})`));
|
|
2109
2167
|
}
|
|
2110
2168
|
|
|
2111
2169
|
if (dryRunMode) {
|
|
2112
2170
|
// For dry run, we'll collect the domain for nettools checking
|
|
2113
2171
|
matchedDomains.get('dryRunMatches').push({
|
|
2114
|
-
regex:
|
|
2172
|
+
regex: matchedRegexPattern,
|
|
2115
2173
|
domain: reqDomain,
|
|
2116
2174
|
resourceType: resourceType,
|
|
2117
2175
|
fullUrl: reqUrl,
|
|
@@ -2168,6 +2226,12 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2168
2226
|
// Execute nettools check asynchronously
|
|
2169
2227
|
const originalDomain = fullSubdomain; // Use full subdomain for nettools
|
|
2170
2228
|
setImmediate(() => netToolsHandler(reqDomain, originalDomain));
|
|
2229
|
+
|
|
2230
|
+
// Do NOT continue processing this request for immediate domain addition
|
|
2231
|
+
// The nettools handler is responsible for adding the domain if validation passes
|
|
2232
|
+
if (forceDebug) {
|
|
2233
|
+
console.log(formatLogMessage('debug', `Request processing halted for ${reqUrl} - awaiting nettools validation`));
|
|
2234
|
+
}
|
|
2171
2235
|
} else {
|
|
2172
2236
|
// If searchstring or searchstring_and IS defined (with or without nettools), queue for content checking
|
|
2173
2237
|
// Skip searchstring check if full subdomain was already detected
|
|
@@ -2175,15 +2239,17 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2175
2239
|
if (forceDebug) {
|
|
2176
2240
|
console.log(formatLogMessage('debug', `Skipping searchstring check for already detected subdomain: ${fullSubdomain}`));
|
|
2177
2241
|
}
|
|
2178
|
-
|
|
2242
|
+
// Skip to next URL
|
|
2243
|
+
request.continue();
|
|
2244
|
+
return;
|
|
2179
2245
|
}
|
|
2180
2246
|
if (forceDebug) {
|
|
2181
2247
|
const searchType = hasSearchStringAnd ? 'searchstring_and' : 'searchstring';
|
|
2182
|
-
console.log(formatLogMessage('debug', `${reqUrl} matched regex ${
|
|
2248
|
+
console.log(formatLogMessage('debug', `${reqUrl} matched regex ${matchedRegexPattern} and resourceType ${resourceType}, queued for ${searchType} content search`));
|
|
2183
2249
|
}
|
|
2184
2250
|
if (dryRunMode) {
|
|
2185
2251
|
matchedDomains.get('dryRunMatches').push({
|
|
2186
|
-
regex:
|
|
2252
|
+
regex: matchedRegexPattern,
|
|
2187
2253
|
domain: reqDomain,
|
|
2188
2254
|
resourceType: resourceType,
|
|
2189
2255
|
fullUrl: reqUrl,
|
|
@@ -2191,6 +2257,12 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2191
2257
|
needsSearchStringCheck: true
|
|
2192
2258
|
});
|
|
2193
2259
|
}
|
|
2260
|
+
// If we have BOTH searchstring AND nettools, ensure nettools validation still happens
|
|
2261
|
+
if (hasNetTools) {
|
|
2262
|
+
if (forceDebug) {
|
|
2263
|
+
console.log(formatLogMessage('debug', `${reqUrl} requires both content and nettools validation`));
|
|
2264
|
+
}
|
|
2265
|
+
}
|
|
2194
2266
|
}
|
|
2195
2267
|
|
|
2196
2268
|
// If curl is enabled, download and analyze content immediately
|
|
@@ -2270,8 +2342,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2270
2342
|
}
|
|
2271
2343
|
}
|
|
2272
2344
|
|
|
2273
|
-
break
|
|
2274
|
-
}
|
|
2345
|
+
// No break needed since we've already determined if regex matched
|
|
2275
2346
|
}
|
|
2276
2347
|
request.continue();
|
|
2277
2348
|
});
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fanboynz/network-scanner",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.74",
|
|
4
4
|
"description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
|
|
5
5
|
"main": "nwss.js",
|
|
6
6
|
"scripts": {
|