@fanboynz/network-scanner 2.0.61 → 2.0.63
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -2
- package/config.json +3 -3
- package/lib/output.js +35 -24
- package/lib/smart-cache.js +6 -6
- package/nwss.js +73 -23
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -5,7 +5,7 @@ A Puppeteer-based tool for scanning websites to find third-party (or optionally
|
|
|
5
5
|
- Scan websites and detect matching third-party or first-party resources
|
|
6
6
|
- Output Adblock-formatted blocking rules
|
|
7
7
|
- Support for multiple filters per site
|
|
8
|
-
- Grouped titles (! <url>) before site matches
|
|
8
|
+
- Grouped titles (! <url>) before site matches, including redirect source and matching regex
|
|
9
9
|
- Ignore unwanted domains (global and per-site)
|
|
10
10
|
- Block unwanted domains during scan (simulate adblock)
|
|
11
11
|
- Support Chrome, Firefox, Safari user agents (desktop or mobile)
|
|
@@ -64,6 +64,7 @@ A Puppeteer-based tool for scanning websites to find third-party (or optionally
|
|
|
64
64
|
| `--headful` | Launch browser with GUI (not headless) |
|
|
65
65
|
| `--keep-open` | Keep browser and tabs open after scan completes (use with `--headful` for debugging) |
|
|
66
66
|
| `--use-puppeteer-core` | Use `puppeteer-core` with system Chrome instead of bundled Chromium |
|
|
67
|
+
| `--use-obscura` | Connect to running Obscura CDP server (`ws://127.0.0.1:9222` or `OBSCURA_WS` env). Skips fingerprint injection — Obscura provides built-in stealth |
|
|
67
68
|
| `--load-extension <path>` | Load unpacked Chrome extension from directory (can be used multiple times) |
|
|
68
69
|
| `--dns-cache` | Persist dig/whois results to disk between runs (14hr TTL, `.digcache`/`.whoiscache`) |
|
|
69
70
|
| `--block-ads=<files>` | Block ads using EasyList format rules (comma-separated: `easylist.txt,easyprivacy.txt`) |
|
|
@@ -448,7 +449,7 @@ node nwss.js config-clean2.json --debug # .nwssconfig + debug overri
|
|
|
448
449
|
node nwss.js config-other.json --max-concurrent 5 # no match in .nwssconfig, uses CLI flags
|
|
449
450
|
```
|
|
450
451
|
|
|
451
|
-
**Supported settings:** `output`, `max_concurrent`, `dns_cache`, `cache_requests`, `dumpurls`, `remove_tempfiles`, `color`, `remove_dupes`, `compress_logs`, `debug`, `silent`, `verbose`, `headful`, `keep_open`, `dry_run`, `titles`, `sub_domains`, `no_interact`, `ghost_cursor`, `plain`, `cdp`, `dnsmasq`, `unbound`, `privoxy`, `pihole`, `eval_on_doc`, `use_puppeteer_core`, `ignore_cache`, `clear_cache`, `block_ads`, `compare`, `localhost`, `append`.
|
|
452
|
+
**Supported settings:** `output`, `max_concurrent`, `dns_cache`, `cache_requests`, `dumpurls`, `remove_tempfiles`, `color`, `remove_dupes`, `compress_logs`, `debug`, `silent`, `verbose`, `headful`, `keep_open`, `dry_run`, `titles`, `sub_domains`, `no_interact`, `ghost_cursor`, `plain`, `cdp`, `dnsmasq`, `unbound`, `privoxy`, `pihole`, `eval_on_doc`, `use_puppeteer_core`, `use_obscura`, `ignore_cache`, `clear_cache`, `block_ads`, `compare`, `localhost`, `append`.
|
|
452
453
|
|
|
453
454
|
**Priority:** CLI flags > `.nwssconfig` > hardcoded defaults.
|
|
454
455
|
|
|
@@ -461,6 +462,7 @@ These options go at the root level of your config.json:
|
|
|
461
462
|
| Field | Values | Default | Description |
|
|
462
463
|
|:---------------------|:-------|:-------:|:------------|
|
|
463
464
|
| `ignoreDomains` | Array | - | Domains to completely ignore (supports wildcards like `*.ads.com`) |
|
|
465
|
+
| `ignoreDomainsByUrl` | Array | - | Regex patterns; if a request URL matches, the request's root domain is dynamically ignored for the rest of the scan (e.g. `["\\/jwplayer\\/", "\\/build\\/assets\\/"]`) |
|
|
464
466
|
| `blocked` | Array | - | Global regex patterns to block requests (combined with per-site blocked) |
|
|
465
467
|
| `whois_server_mode` | String | `"random"` | Default server selection mode for all sites |
|
|
466
468
|
| `ignore_similar` | Boolean | `true` | Ignore domains similar to already found domains |
|
package/config.json
CHANGED
|
@@ -38,7 +38,7 @@
|
|
|
38
38
|
"sites": [
|
|
39
39
|
{
|
|
40
40
|
"url": "https://www.anandtech.com/",
|
|
41
|
-
"filterRegex": "
|
|
41
|
+
"filterRegex": "teststring",
|
|
42
42
|
"resourceTypes": ["script", "xhr", "document"],
|
|
43
43
|
"reload": 1,
|
|
44
44
|
"timeout": 25000,
|
|
@@ -50,7 +50,7 @@
|
|
|
50
50
|
},
|
|
51
51
|
{
|
|
52
52
|
"url": "https://www.tomshardware.com/",
|
|
53
|
-
"filterRegex": "
|
|
53
|
+
"filterRegex": "anotherstrng",
|
|
54
54
|
"resourceTypes": ["all"],
|
|
55
55
|
"reload": 2,
|
|
56
56
|
"timeout": 25000,
|
|
@@ -61,7 +61,7 @@
|
|
|
61
61
|
},
|
|
62
62
|
{
|
|
63
63
|
"url": ["https://www.tomshardware.com/", "https://www.anandtech.com/"],
|
|
64
|
-
"filterRegex": "
|
|
64
|
+
"filterRegex": "morestrings",
|
|
65
65
|
"resourceTypes": ["all"],
|
|
66
66
|
"reload": 2,
|
|
67
67
|
"timeout": 25000,
|
package/lib/output.js
CHANGED
|
@@ -324,35 +324,40 @@ function buildOutputLines(results, options = {}) {
|
|
|
324
324
|
const { showTitles = false, removeDupes = false, ignoreDomains = [], forLogFile = false } = options;
|
|
325
325
|
|
|
326
326
|
// Consolidate rules from all results, handling multiple results for same URL
|
|
327
|
-
const consolidatedRules = new Map(); // URL -> Set
|
|
327
|
+
const consolidatedRules = new Map(); // URL -> { rules: Set, originalUrl, regexes: Set }
|
|
328
328
|
let successfulPageLoads = 0;
|
|
329
|
-
|
|
329
|
+
|
|
330
330
|
results.forEach(result => {
|
|
331
331
|
if (result) {
|
|
332
332
|
if (result.success) {
|
|
333
333
|
successfulPageLoads++;
|
|
334
334
|
}
|
|
335
335
|
if (result.rules && result.rules.length > 0) {
|
|
336
|
-
// Consolidate rules by URL to handle multiple site entries for same URL
|
|
337
336
|
if (!consolidatedRules.has(result.url)) {
|
|
338
|
-
consolidatedRules.set(result.url, new Set());
|
|
337
|
+
consolidatedRules.set(result.url, { rules: new Set(), originalUrl: result.originalUrl || result.url, regexes: new Set() });
|
|
338
|
+
}
|
|
339
|
+
const entry = consolidatedRules.get(result.url);
|
|
340
|
+
result.rules.forEach(rule => entry.rules.add(rule));
|
|
341
|
+
if (Array.isArray(result.matchedRegexes)) {
|
|
342
|
+
result.matchedRegexes.forEach(rx => entry.regexes.add(rx));
|
|
343
|
+
}
|
|
344
|
+
// Prefer the original URL from any result entry that has one different from final
|
|
345
|
+
if (result.originalUrl && result.originalUrl !== result.url) {
|
|
346
|
+
entry.originalUrl = result.originalUrl;
|
|
339
347
|
}
|
|
340
|
-
|
|
341
|
-
// Add all rules from this result to the consolidated set
|
|
342
|
-
result.rules.forEach(rule => {
|
|
343
|
-
consolidatedRules.get(result.url).add(rule);
|
|
344
|
-
});
|
|
345
348
|
}
|
|
346
349
|
}
|
|
347
350
|
});
|
|
348
351
|
|
|
349
352
|
// Convert consolidated rules back to array format
|
|
350
353
|
const finalSiteRules = [];
|
|
351
|
-
consolidatedRules.forEach((
|
|
352
|
-
if (
|
|
353
|
-
finalSiteRules.push({
|
|
354
|
-
url: url,
|
|
355
|
-
|
|
354
|
+
consolidatedRules.forEach((entry, url) => {
|
|
355
|
+
if (entry.rules.size > 0) {
|
|
356
|
+
finalSiteRules.push({
|
|
357
|
+
url: url,
|
|
358
|
+
originalUrl: entry.originalUrl,
|
|
359
|
+
regexes: Array.from(entry.regexes),
|
|
360
|
+
rules: Array.from(entry.rules)
|
|
356
361
|
});
|
|
357
362
|
}
|
|
358
363
|
});
|
|
@@ -362,35 +367,41 @@ function buildOutputLines(results, options = {}) {
|
|
|
362
367
|
const outputLinesWithTitles = [];
|
|
363
368
|
let filteredOutCount = 0;
|
|
364
369
|
|
|
365
|
-
for (const { url, rules } of finalSiteRules) {
|
|
370
|
+
for (const { url, originalUrl, regexes, rules } of finalSiteRules) {
|
|
366
371
|
if (rules.length > 0) {
|
|
372
|
+
// Build title comments — include redirect source if URL changed and matched regex(es)
|
|
373
|
+
const titleLines = [`! ${url}`];
|
|
374
|
+
if (originalUrl && originalUrl !== url) {
|
|
375
|
+
titleLines.push(`! Redirected from: ${originalUrl}`);
|
|
376
|
+
}
|
|
377
|
+
if (regexes && regexes.length > 0) {
|
|
378
|
+
titleLines.push(`! Regex: ${regexes.join(', ')}`);
|
|
379
|
+
}
|
|
380
|
+
|
|
367
381
|
// Regular output (for -o files and console) - only add titles if --titles flag used
|
|
368
382
|
if (showTitles) {
|
|
369
|
-
outputLines.push(
|
|
383
|
+
outputLines.push(...titleLines);
|
|
370
384
|
}
|
|
371
|
-
|
|
385
|
+
|
|
372
386
|
// Filter out ignored domains from rules
|
|
373
387
|
const filteredRules = rules.filter(rule => {
|
|
374
388
|
const domain = extractDomainFromRule(rule);
|
|
375
389
|
if (domain && matchesIgnoreDomain(domain, ignoreDomains)) {
|
|
376
390
|
filteredOutCount++;
|
|
377
|
-
|
|
378
|
-
// Log each filtered domain
|
|
379
391
|
if (options.forceDebug) {
|
|
380
392
|
console.log(formatLogMessage('debug', `[output-filter] Removed rule matching ignoreDomains: ${rule} (domain: ${domain})`));
|
|
381
393
|
} else if (!options.silentMode) {
|
|
382
394
|
console.log(formatLogMessage('info', `Filtered out: ${domain}`));
|
|
383
|
-
|
|
384
|
-
|
|
395
|
+
}
|
|
385
396
|
return false;
|
|
386
397
|
}
|
|
387
398
|
return true;
|
|
388
399
|
});
|
|
389
|
-
|
|
400
|
+
|
|
390
401
|
outputLines.push(...filteredRules);
|
|
391
|
-
|
|
402
|
+
|
|
392
403
|
// Output with titles (for auto-saved log files) - always add titles
|
|
393
|
-
outputLinesWithTitles.push(
|
|
404
|
+
outputLinesWithTitles.push(...titleLines);
|
|
394
405
|
outputLinesWithTitles.push(...filteredRules);
|
|
395
406
|
}
|
|
396
407
|
}
|
package/lib/smart-cache.js
CHANGED
|
@@ -867,14 +867,14 @@ class SmartCache {
|
|
|
867
867
|
netToolsCacheSize: this.netToolsCache.size,
|
|
868
868
|
similarityCacheSize: this.similarityCache.size,
|
|
869
869
|
regexCacheSize: this.regexCache.size,
|
|
870
|
-
requestHitRate: this._enableRequest ?
|
|
870
|
+
requestHitRate: (this._enableRequest && this.requestCache) ?
|
|
871
871
|
(requestHitRate * 100).toFixed(2) + '%' : '0% (disabled)',
|
|
872
|
-
requestCacheSize: this._enableRequest ? this.requestCache.size : 0,
|
|
873
|
-
requestCacheMemoryMB: this._enableRequest ?
|
|
872
|
+
requestCacheSize: (this._enableRequest && this.requestCache) ? this.requestCache.size : 0,
|
|
873
|
+
requestCacheMemoryMB: (this._enableRequest && this.requestCache) ?
|
|
874
874
|
Math.round((this.requestCache.calculatedSize || 0) / 1048576) : 0,
|
|
875
|
-
totalCacheEntries: this.domainCache.size + this.patternCache.size +
|
|
876
|
-
this.responseCache.size + this.netToolsCache.size +
|
|
877
|
-
this.similarityCache.size + this.regexCache.size + (this._enableRequest ? this.requestCache.size : 0),
|
|
875
|
+
totalCacheEntries: this.domainCache.size + this.patternCache.size +
|
|
876
|
+
this.responseCache.size + this.netToolsCache.size +
|
|
877
|
+
this.similarityCache.size + this.regexCache.size + ((this._enableRequest && this.requestCache) ? this.requestCache.size : 0),
|
|
878
878
|
memoryUsageMB: Math.round(heapUsed / 1048576),
|
|
879
879
|
memoryMaxMB: Math.round(maxHeap / 1048576),
|
|
880
880
|
memoryUsagePercent: ((heapUsed / maxHeap) * 100).toFixed(1) + '%',
|
package/nwss.js
CHANGED
|
@@ -185,9 +185,19 @@ if (fs.existsSync(NWSSCONFIG_PATH)) {
|
|
|
185
185
|
const nwssConfig = JSON.parse(fs.readFileSync(NWSSCONFIG_PATH, 'utf-8'));
|
|
186
186
|
// Find which config file is being used (--custom-json <file> or positional .json arg)
|
|
187
187
|
const customJsonIdx = args.findIndex(arg => arg === '--custom-json');
|
|
188
|
+
const positionalJson = (customJsonIdx === -1)
|
|
189
|
+
? args.find(a => a.endsWith('.json') && !a.startsWith('--'))
|
|
190
|
+
: null;
|
|
188
191
|
const configFilename = (customJsonIdx !== -1 && args[customJsonIdx + 1])
|
|
189
192
|
? args[customJsonIdx + 1]
|
|
190
|
-
:
|
|
193
|
+
: positionalJson;
|
|
194
|
+
|
|
195
|
+
// If a positional .json was used (not --custom-json), wire it to --custom-json
|
|
196
|
+
// so the real config loader picks it up instead of defaulting to config.json
|
|
197
|
+
if (positionalJson && customJsonIdx === -1) {
|
|
198
|
+
args.push('--custom-json', positionalJson);
|
|
199
|
+
process.argv.push('--custom-json', positionalJson);
|
|
200
|
+
}
|
|
191
201
|
|
|
192
202
|
if (configFilename && nwssConfig.configs && nwssConfig.configs[configFilename]) {
|
|
193
203
|
const settings = nwssConfig.configs[configFilename];
|
|
@@ -687,6 +697,7 @@ Validation Options:
|
|
|
687
697
|
|
|
688
698
|
Global config.json options:
|
|
689
699
|
ignoreDomains: ["domain.com", "*.ads.com"] Domains to completely ignore (supports wildcards)
|
|
700
|
+
ignoreDomainsByUrl: ["regex1", "regex2"] Regex patterns; if any request URL matches, the request's root domain is ignored for the rest of the scan
|
|
690
701
|
blocked: ["regex1", "regex2"] Global regex patterns to block requests (combined with per-site blocked)
|
|
691
702
|
whois_server_mode: "random" or "cycle" Default server selection mode for all sites (default: random)
|
|
692
703
|
ignore_similar: true/false Ignore domains similar to already found domains (default: true)
|
|
@@ -854,8 +865,9 @@ try {
|
|
|
854
865
|
// Extract config values while ignoring 'comments' field at global and site levels
|
|
855
866
|
const {
|
|
856
867
|
sites = [],
|
|
857
|
-
ignoreDomains = [],
|
|
858
|
-
|
|
868
|
+
ignoreDomains = [],
|
|
869
|
+
ignoreDomainsByUrl = [],
|
|
870
|
+
blocked: globalBlocked = [],
|
|
859
871
|
whois_delay = 3000,
|
|
860
872
|
whois_server_mode = 'random',
|
|
861
873
|
ignore_similar = true,
|
|
@@ -901,6 +913,15 @@ for (const pattern of ignoreDomains) {
|
|
|
901
913
|
}
|
|
902
914
|
}
|
|
903
915
|
|
|
916
|
+
// Compile ignoreDomainsByUrl patterns once — match request URLs to dynamically ignore domains
|
|
917
|
+
const _ignoreDomainsByUrlRegexes = Array.isArray(ignoreDomainsByUrl)
|
|
918
|
+
? ignoreDomainsByUrl.map(p => {
|
|
919
|
+
try { return getCompiledRegex(p); } catch { return null; }
|
|
920
|
+
}).filter(r => r)
|
|
921
|
+
: [];
|
|
922
|
+
// Runtime Set of domains marked ignored by URL pattern matches — shared across all sites in this scan
|
|
923
|
+
const _dynamicallyIgnoredDomains = new Set();
|
|
924
|
+
|
|
904
925
|
// Apply global configuration overrides with validation
|
|
905
926
|
// Priority: Command line args > config.json > defaults
|
|
906
927
|
const MAX_CONCURRENT_SITES = (() => {
|
|
@@ -1312,6 +1333,8 @@ function shouldBypassCacheForUrl(url, siteConfig) {
|
|
|
1312
1333
|
// Cache compiled wildcard regexes to avoid recompilation on every request
|
|
1313
1334
|
const _wildcardRegexCache = new Map();
|
|
1314
1335
|
function matchesIgnoreDomain(domain, ignorePatterns) {
|
|
1336
|
+
// Dynamically ignored domains (from URL pattern matches via ignoreDomainsByUrl)
|
|
1337
|
+
if (_dynamicallyIgnoredDomains.has(domain)) return true;
|
|
1315
1338
|
// Fast path: exact match or suffix match against Set (O(n) for parts, but no regex)
|
|
1316
1339
|
if (_ignoreDomainsExact.size > 0) {
|
|
1317
1340
|
if (_ignoreDomainsExact.has(domain)) return true;
|
|
@@ -1789,6 +1812,10 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1789
1812
|
* @returns {Promise<object>} A promise that resolves to an object containing scan results.
|
|
1790
1813
|
*/
|
|
1791
1814
|
async function processUrl(currentUrl, siteConfig, browserInstance) {
|
|
1815
|
+
// Preserve the original URL (before any redirect) for output display
|
|
1816
|
+
const originalRequestedUrl = currentUrl;
|
|
1817
|
+
// Track regex patterns that produced matches (for title comments in output)
|
|
1818
|
+
const matchedRegexPatterns = new Set();
|
|
1792
1819
|
// V8 Optimization: Single destructuring to avoid multiple property lookups
|
|
1793
1820
|
const {
|
|
1794
1821
|
firstParty,
|
|
@@ -2553,6 +2580,11 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2553
2580
|
const blockedRegexes = Array.isArray(siteConfig.blocked)
|
|
2554
2581
|
? siteConfig.blocked.map(pattern => getCompiledRegex(pattern))
|
|
2555
2582
|
: [];
|
|
2583
|
+
|
|
2584
|
+
// Pre-build Set for O(1) resourceType lookups (fired per request)
|
|
2585
|
+
const allowedResourceTypesSet = Array.isArray(siteConfig.resourceTypes)
|
|
2586
|
+
? new Set(siteConfig.resourceTypes)
|
|
2587
|
+
: null;
|
|
2556
2588
|
|
|
2557
2589
|
// Combine site-specific with pre-compiled global blocked patterns
|
|
2558
2590
|
const allBlockedRegexes = blockedRegexes.length > 0
|
|
@@ -2774,9 +2806,22 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2774
2806
|
bufferedLogWrite(debugLogFile, logEntry);
|
|
2775
2807
|
}
|
|
2776
2808
|
const reqUrl = checkedUrl;
|
|
2777
|
-
|
|
2809
|
+
|
|
2778
2810
|
const reqDomain = perSiteSubDomains ? fullSubdomain : checkedRootDomain;
|
|
2779
2811
|
|
|
2812
|
+
// ignoreDomainsByUrl — if any pattern matches this URL, mark the root domain as ignored for the rest of the scan
|
|
2813
|
+
if (_ignoreDomainsByUrlRegexes.length > 0 && checkedRootDomain && !_dynamicallyIgnoredDomains.has(checkedRootDomain)) {
|
|
2814
|
+
for (let i = 0; i < _ignoreDomainsByUrlRegexes.length; i++) {
|
|
2815
|
+
if (_ignoreDomainsByUrlRegexes[i].test(reqUrl)) {
|
|
2816
|
+
_dynamicallyIgnoredDomains.add(checkedRootDomain);
|
|
2817
|
+
if (forceDebug) {
|
|
2818
|
+
console.log(formatLogMessage('debug', `[ignoreDomainsByUrl] ${checkedRootDomain} ignored — matched pattern: ${_ignoreDomainsByUrlRegexes[i].source}`));
|
|
2819
|
+
}
|
|
2820
|
+
break;
|
|
2821
|
+
}
|
|
2822
|
+
}
|
|
2823
|
+
}
|
|
2824
|
+
|
|
2780
2825
|
let blockedMatchIndex = -1;
|
|
2781
2826
|
for (let i = 0; i < allBlockedRegexes.length; i++) {
|
|
2782
2827
|
if (allBlockedRegexes[i].test(reqUrl)) {
|
|
@@ -2801,14 +2846,14 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2801
2846
|
if (reqDomain && !matchesIgnoreDomain(reqDomain, ignoreDomains)) {
|
|
2802
2847
|
for (const re of regexes) {
|
|
2803
2848
|
if (re.test(reqUrl)) {
|
|
2849
|
+
const evenBlockedRegexPattern = re.source;
|
|
2804
2850
|
const resourceType = request.resourceType();
|
|
2805
|
-
|
|
2851
|
+
|
|
2806
2852
|
// Apply same filtering logic as unblocked requests
|
|
2807
|
-
|
|
2808
|
-
if (!allowedResourceTypes || !Array.isArray(allowedResourceTypes) || allowedResourceTypes.includes(resourceType)) {
|
|
2853
|
+
if (!allowedResourceTypesSet || allowedResourceTypesSet.has(resourceType)) {
|
|
2809
2854
|
if (dryRunMode) {
|
|
2810
2855
|
addDryRunMatch(matchedDomains, {
|
|
2811
|
-
regex:
|
|
2856
|
+
regex: evenBlockedRegexPattern,
|
|
2812
2857
|
domain: reqDomain,
|
|
2813
2858
|
resourceType: resourceType,
|
|
2814
2859
|
fullUrl: reqUrl,
|
|
@@ -2818,10 +2863,11 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2818
2863
|
} else {
|
|
2819
2864
|
addMatchedDomain(reqDomain, resourceType, fullSubdomain);
|
|
2820
2865
|
}
|
|
2821
|
-
|
|
2866
|
+
matchedRegexPatterns.add(evenBlockedRegexPattern);
|
|
2867
|
+
|
|
2822
2868
|
if (siteConfig.verbose === 1) {
|
|
2823
2869
|
const resourceInfo = (adblockRulesMode || siteConfig.adblock_rules) ? ` (${resourceType})` : '';
|
|
2824
|
-
console.log(formatLogMessage('match', `[${simplifiedCurrentUrl}] ${reqUrl} matched regex: ${
|
|
2870
|
+
console.log(formatLogMessage('match', `[${simplifiedCurrentUrl}] ${reqUrl} matched regex: ${evenBlockedRegexPattern} and resourceType: ${resourceType}${resourceInfo}`));
|
|
2825
2871
|
}
|
|
2826
2872
|
if (dumpUrls) {
|
|
2827
2873
|
const timestamp = new Date().toISOString();
|
|
@@ -2889,11 +2935,10 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2889
2935
|
|
|
2890
2936
|
// *** UNIVERSAL RESOURCE TYPE FILTER ***
|
|
2891
2937
|
// Check resourceTypes filter FIRST, before ANY processing (nettools, searchstring, immediate matching)
|
|
2892
|
-
|
|
2893
|
-
|
|
2894
|
-
if (!allowedResourceTypes.includes(resourceType)) {
|
|
2938
|
+
if (allowedResourceTypesSet && allowedResourceTypesSet.size > 0) {
|
|
2939
|
+
if (!allowedResourceTypesSet.has(resourceType)) {
|
|
2895
2940
|
if (forceDebug) {
|
|
2896
|
-
console.log(formatLogMessage('debug', `URL ${reqUrl} matches regex but resourceType '${resourceType}' not in allowed types [${
|
|
2941
|
+
console.log(formatLogMessage('debug', `URL ${reqUrl} matches regex but resourceType '${resourceType}' not in allowed types [${Array.from(allowedResourceTypesSet).join(', ')}]. Skipping ALL processing.`));
|
|
2897
2942
|
}
|
|
2898
2943
|
// Skip this URL entirely - doesn't match required resource types
|
|
2899
2944
|
request.continue();
|
|
@@ -2981,6 +3026,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2981
3026
|
} else {
|
|
2982
3027
|
addMatchedDomain(reqDomain, resourceType);
|
|
2983
3028
|
}
|
|
3029
|
+
if (matchedRegexPattern) matchedRegexPatterns.add(matchedRegexPattern);
|
|
2984
3030
|
if (siteConfig.verbose === 1) {
|
|
2985
3031
|
const resourceInfo = (adblockRulesMode || siteConfig.adblock_rules) ? ` (${resourceType})` : '';
|
|
2986
3032
|
console.log(formatLogMessage('match', `[${simplifiedCurrentUrl}] ${reqUrl} matched regex: ${matchedRegexPattern} and resourceType: ${resourceType}${resourceInfo}`));
|
|
@@ -4011,12 +4057,14 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
4011
4057
|
};
|
|
4012
4058
|
const formattedRules = formatRules(matchedDomains, siteConfig, globalOptions);
|
|
4013
4059
|
|
|
4014
|
-
return {
|
|
4015
|
-
url: currentUrl,
|
|
4016
|
-
|
|
4060
|
+
return {
|
|
4061
|
+
url: currentUrl,
|
|
4062
|
+
originalUrl: originalRequestedUrl,
|
|
4063
|
+
rules: formattedRules,
|
|
4017
4064
|
success: true,
|
|
4018
4065
|
finalUrl: finalUrlAfterRedirect || currentUrl,
|
|
4019
|
-
redirectDomains: redirectDomainsToExclude
|
|
4066
|
+
redirectDomains: redirectDomainsToExclude,
|
|
4067
|
+
matchedRegexes: Array.from(matchedRegexPatterns)
|
|
4020
4068
|
};
|
|
4021
4069
|
}
|
|
4022
4070
|
|
|
@@ -4072,13 +4120,15 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
4072
4120
|
};
|
|
4073
4121
|
const formattedRules = formatRules(matchedDomains, siteConfig, globalOptions);
|
|
4074
4122
|
if (forceDebug) console.log(formatLogMessage('debug', `Saving ${formattedRules.length} rules despite page load failure`));
|
|
4075
|
-
return {
|
|
4076
|
-
url: currentUrl,
|
|
4077
|
-
|
|
4078
|
-
|
|
4123
|
+
return {
|
|
4124
|
+
url: currentUrl,
|
|
4125
|
+
originalUrl: originalRequestedUrl,
|
|
4126
|
+
rules: formattedRules,
|
|
4127
|
+
success: false,
|
|
4079
4128
|
hasMatches: true,
|
|
4080
4129
|
finalUrl: finalUrlAfterRedirect || currentUrl,
|
|
4081
|
-
redirectDomains: redirectDomainsToExclude
|
|
4130
|
+
redirectDomains: redirectDomainsToExclude,
|
|
4131
|
+
matchedRegexes: Array.from(matchedRegexPatterns)
|
|
4082
4132
|
};
|
|
4083
4133
|
}
|
|
4084
4134
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fanboynz/network-scanner",
|
|
3
|
-
"version": "2.0.
|
|
3
|
+
"version": "2.0.63",
|
|
4
4
|
"description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
|
|
5
5
|
"main": "nwss.js",
|
|
6
6
|
"scripts": {
|