@fanboynz/network-scanner 1.0.72 → 1.0.73
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/nwss.js +64 -19
- package/package.json +1 -1
package/nwss.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// === Network scanner script (nwss.js) v1.0.
|
|
1
|
+
// === Network scanner script (nwss.js) v1.0.73 ===
|
|
2
2
|
|
|
3
3
|
// puppeteer for browser automation, fs for file system operations, psl for domain parsing.
|
|
4
4
|
// const pLimit = require('p-limit'); // Will be dynamically imported
|
|
@@ -123,7 +123,7 @@ const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/r
|
|
|
123
123
|
const { monitorBrowserHealth, isBrowserHealthy } = require('./lib/browserhealth');
|
|
124
124
|
|
|
125
125
|
// --- Script Configuration & Constants ---
|
|
126
|
-
const VERSION = '1.0.
|
|
126
|
+
const VERSION = '1.0.73'; // Script version
|
|
127
127
|
|
|
128
128
|
// get startTime
|
|
129
129
|
const startTime = Date.now();
|
|
@@ -474,6 +474,8 @@ Global config.json options:
|
|
|
474
474
|
Per-site config.json options:
|
|
475
475
|
url: "site" or ["site1", "site2"] Single URL or list of URLs
|
|
476
476
|
filterRegex: "regex" or ["regex1", "regex2"] Patterns to match requests
|
|
477
|
+
regex_and: true/false Use AND logic for multiple filterRegex patterns (default: false)
|
|
478
|
+
When true, ALL regex patterns must match the same URL
|
|
477
479
|
|
|
478
480
|
Redirect Handling Options:
|
|
479
481
|
follow_redirects: true/false Follow redirects to new domains (default: true)
|
|
@@ -1646,6 +1648,9 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1646
1648
|
? [new RegExp(siteConfig.filterRegex.replace(/^\/(.*)\/$/, '$1'))]
|
|
1647
1649
|
: [];
|
|
1648
1650
|
|
|
1651
|
+
// NEW: Get regex_and setting (defaults to false for backward compatibility)
|
|
1652
|
+
const useRegexAnd = siteConfig.regex_and === true;
|
|
1653
|
+
|
|
1649
1654
|
// Parse searchstring patterns using module
|
|
1650
1655
|
const { searchStrings, searchStringsAnd, hasSearchString, hasSearchStringAnd } = parseSearchStrings(siteConfig.searchstring, siteConfig.searchstring_and);
|
|
1651
1656
|
const useCurl = siteConfig.curl === true; // Use curl if enabled, regardless of searchstring
|
|
@@ -1721,6 +1726,11 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1721
1726
|
patterns.forEach((pattern, idx) => {
|
|
1722
1727
|
console.log(` [${idx + 1}] ${pattern}`);
|
|
1723
1728
|
});
|
|
1729
|
+
if (useRegexAnd && patterns.length > 1) {
|
|
1730
|
+
console.log(formatLogMessage('info', ` Logic: AND (all patterns must match same URL)`));
|
|
1731
|
+
} else if (patterns.length > 1) {
|
|
1732
|
+
console.log(formatLogMessage('info', ` Logic: OR (any pattern can match)`));
|
|
1733
|
+
}
|
|
1724
1734
|
}
|
|
1725
1735
|
|
|
1726
1736
|
if (siteConfig.verbose === 1 && (hasSearchString || hasSearchStringAnd)) {
|
|
@@ -1987,7 +1997,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1987
1997
|
if (!allowedResourceTypes || !Array.isArray(allowedResourceTypes) || allowedResourceTypes.includes(resourceType)) {
|
|
1988
1998
|
if (dryRunMode) {
|
|
1989
1999
|
matchedDomains.get('dryRunMatches').push({
|
|
1990
|
-
regex:
|
|
2000
|
+
regex: matchedRegexPattern,
|
|
1991
2001
|
domain: reqDomain,
|
|
1992
2002
|
resourceType: resourceType,
|
|
1993
2003
|
fullUrl: reqUrl,
|
|
@@ -2001,7 +2011,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2001
2011
|
const simplifiedUrl = getRootDomain(currentUrl);
|
|
2002
2012
|
if (siteConfig.verbose === 1) {
|
|
2003
2013
|
const resourceInfo = (adblockRulesMode || siteConfig.adblock_rules) ? ` (${resourceType})` : '';
|
|
2004
|
-
console.log(formatLogMessage('match', `[${simplifiedUrl}] ${reqUrl} matched regex: ${
|
|
2014
|
+
console.log(formatLogMessage('match', `[${simplifiedUrl}] ${reqUrl} matched regex: ${matchedRegexPattern} and resourceType: ${resourceType}${resourceInfo}`));
|
|
2005
2015
|
}
|
|
2006
2016
|
if (dumpUrls) {
|
|
2007
2017
|
const timestamp = new Date().toISOString();
|
|
@@ -2037,8 +2047,34 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2037
2047
|
return;
|
|
2038
2048
|
}
|
|
2039
2049
|
|
|
2040
|
-
|
|
2041
|
-
|
|
2050
|
+
// === ENHANCED REGEX MATCHING WITH AND/OR LOGIC ===
|
|
2051
|
+
let regexMatched = false;
|
|
2052
|
+
let matchedRegexPattern = null;
|
|
2053
|
+
|
|
2054
|
+
if (regexes.length > 0) {
|
|
2055
|
+
if (useRegexAnd) {
|
|
2056
|
+
// AND logic: ALL regex patterns must match the same URL
|
|
2057
|
+
const allMatch = regexes.every(re => re.test(reqUrl));
|
|
2058
|
+
if (allMatch) {
|
|
2059
|
+
regexMatched = true;
|
|
2060
|
+
matchedRegexPattern = regexes.map(re => re.source).join(' AND ');
|
|
2061
|
+
if (forceDebug) {
|
|
2062
|
+
console.log(formatLogMessage('debug', `URL ${reqUrl} matched ALL regex patterns (AND logic)`));
|
|
2063
|
+
}
|
|
2064
|
+
}
|
|
2065
|
+
} else {
|
|
2066
|
+
// OR logic: ANY regex pattern can match (original behavior)
|
|
2067
|
+
for (const re of regexes) {
|
|
2068
|
+
if (re.test(reqUrl)) {
|
|
2069
|
+
regexMatched = true;
|
|
2070
|
+
matchedRegexPattern = re.source;
|
|
2071
|
+
break;
|
|
2072
|
+
}
|
|
2073
|
+
}
|
|
2074
|
+
}
|
|
2075
|
+
}
|
|
2076
|
+
|
|
2077
|
+
if (regexMatched) {
|
|
2042
2078
|
const resourceType = request.resourceType();
|
|
2043
2079
|
|
|
2044
2080
|
// *** UNIVERSAL RESOURCE TYPE FILTER ***
|
|
@@ -2049,7 +2085,9 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2049
2085
|
if (forceDebug) {
|
|
2050
2086
|
console.log(formatLogMessage('debug', `URL ${reqUrl} matches regex but resourceType '${resourceType}' not in allowed types [${allowedResourceTypes.join(', ')}]. Skipping ALL processing.`));
|
|
2051
2087
|
}
|
|
2052
|
-
|
|
2088
|
+
// Skip this URL entirely - doesn't match required resource types
|
|
2089
|
+
request.continue();
|
|
2090
|
+
return;
|
|
2053
2091
|
}
|
|
2054
2092
|
}
|
|
2055
2093
|
|
|
@@ -2058,13 +2096,17 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2058
2096
|
if (forceDebug) {
|
|
2059
2097
|
console.log(formatLogMessage('debug', `Skipping first-party match: ${reqUrl} (firstParty disabled)`));
|
|
2060
2098
|
}
|
|
2061
|
-
|
|
2099
|
+
// Skip this URL - it's first-party but firstParty is disabled
|
|
2100
|
+
request.continue();
|
|
2101
|
+
return;
|
|
2062
2102
|
}
|
|
2063
2103
|
if (!isFirstParty && siteConfig.thirdParty === false) {
|
|
2064
2104
|
if (forceDebug) {
|
|
2065
2105
|
console.log(formatLogMessage('debug', `Skipping third-party match: ${reqUrl} (thirdParty disabled)`));
|
|
2066
2106
|
}
|
|
2067
|
-
|
|
2107
|
+
// Skip this URL - it's third-party but thirdParty is disabled
|
|
2108
|
+
request.continue();
|
|
2109
|
+
return;
|
|
2068
2110
|
}
|
|
2069
2111
|
|
|
2070
2112
|
// REMOVED: Check if this URL matches any blocked patterns - if so, skip detection but still continue browser blocking
|
|
@@ -2075,7 +2117,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2075
2117
|
if (!hasSearchString && !hasSearchStringAnd && !hasNetTools) {
|
|
2076
2118
|
if (dryRunMode) {
|
|
2077
2119
|
matchedDomains.get('dryRunMatches').push({
|
|
2078
|
-
regex:
|
|
2120
|
+
regex: matchedRegexPattern,
|
|
2079
2121
|
domain: reqDomain,
|
|
2080
2122
|
resourceType: resourceType,
|
|
2081
2123
|
fullUrl: reqUrl,
|
|
@@ -2087,7 +2129,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2087
2129
|
const simplifiedUrl = getRootDomain(currentUrl);
|
|
2088
2130
|
if (siteConfig.verbose === 1) {
|
|
2089
2131
|
const resourceInfo = (adblockRulesMode || siteConfig.adblock_rules) ? ` (${resourceType})` : '';
|
|
2090
|
-
console.log(formatLogMessage('match', `[${simplifiedUrl}] ${reqUrl} matched regex: ${
|
|
2132
|
+
console.log(formatLogMessage('match', `[${simplifiedUrl}] ${reqUrl} matched regex: ${matchedRegexPattern} and resourceType: ${resourceType}${resourceInfo}`));
|
|
2091
2133
|
}
|
|
2092
2134
|
if (dumpUrls) {
|
|
2093
2135
|
const timestamp = new Date().toISOString();
|
|
@@ -2101,17 +2143,19 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2101
2143
|
if (forceDebug) {
|
|
2102
2144
|
console.log(formatLogMessage('debug', `Skipping nettools check for already detected subdomain: ${fullSubdomain}`));
|
|
2103
2145
|
}
|
|
2104
|
-
|
|
2146
|
+
// Skip to next URL
|
|
2147
|
+
request.continue();
|
|
2148
|
+
return;
|
|
2105
2149
|
}
|
|
2106
2150
|
|
|
2107
2151
|
if (forceDebug) {
|
|
2108
|
-
console.log(formatLogMessage('debug', `${reqUrl} matched regex ${
|
|
2152
|
+
console.log(formatLogMessage('debug', `${reqUrl} matched regex ${matchedRegexPattern} and resourceType ${resourceType}, queued for nettools check`));
|
|
2109
2153
|
}
|
|
2110
2154
|
|
|
2111
2155
|
if (dryRunMode) {
|
|
2112
2156
|
// For dry run, we'll collect the domain for nettools checking
|
|
2113
2157
|
matchedDomains.get('dryRunMatches').push({
|
|
2114
|
-
regex:
|
|
2158
|
+
regex: matchedRegexPattern,
|
|
2115
2159
|
domain: reqDomain,
|
|
2116
2160
|
resourceType: resourceType,
|
|
2117
2161
|
fullUrl: reqUrl,
|
|
@@ -2175,15 +2219,17 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2175
2219
|
if (forceDebug) {
|
|
2176
2220
|
console.log(formatLogMessage('debug', `Skipping searchstring check for already detected subdomain: ${fullSubdomain}`));
|
|
2177
2221
|
}
|
|
2178
|
-
|
|
2222
|
+
// Skip to next URL
|
|
2223
|
+
request.continue();
|
|
2224
|
+
return;
|
|
2179
2225
|
}
|
|
2180
2226
|
if (forceDebug) {
|
|
2181
2227
|
const searchType = hasSearchStringAnd ? 'searchstring_and' : 'searchstring';
|
|
2182
|
-
console.log(formatLogMessage('debug', `${reqUrl} matched regex ${
|
|
2228
|
+
console.log(formatLogMessage('debug', `${reqUrl} matched regex ${matchedRegexPattern} and resourceType ${resourceType}, queued for ${searchType} content search`));
|
|
2183
2229
|
}
|
|
2184
2230
|
if (dryRunMode) {
|
|
2185
2231
|
matchedDomains.get('dryRunMatches').push({
|
|
2186
|
-
regex:
|
|
2232
|
+
regex: matchedRegexPattern,
|
|
2187
2233
|
domain: reqDomain,
|
|
2188
2234
|
resourceType: resourceType,
|
|
2189
2235
|
fullUrl: reqUrl,
|
|
@@ -2270,8 +2316,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2270
2316
|
}
|
|
2271
2317
|
}
|
|
2272
2318
|
|
|
2273
|
-
break
|
|
2274
|
-
}
|
|
2319
|
+
// No break needed since we've already determined if regex matched
|
|
2275
2320
|
}
|
|
2276
2321
|
request.continue();
|
|
2277
2322
|
});
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fanboynz/network-scanner",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.73",
|
|
4
4
|
"description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
|
|
5
5
|
"main": "nwss.js",
|
|
6
6
|
"scripts": {
|