@fanboynz/network-scanner 2.0.60 → 2.0.61
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +21 -0
- package/README.md +51 -1
- package/lib/colorize.js +3 -1
- package/lib/fingerprint.js +41 -11
- package/nwss.js +215 -39
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,27 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to the Network Scanner (nwss.js) project.
|
|
4
4
|
|
|
5
|
+
## [2.0.61] - 2026-03-17
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
- `.nwssconfig` file for per-config-file CLI settings — define output, concurrency, flags per JSON config
|
|
9
|
+
- `--no-color` / `--no-colour` flag to disable colors (colors now enabled by default)
|
|
10
|
+
- Navigation timeout fallback — retries with `waitUntil: networkidle2` on timeout, 10s cap
|
|
11
|
+
- Skip domains after 3 consecutive timeouts in the same scan to avoid wasting time on down sites
|
|
12
|
+
- Fingerprint cache capped at 500 entries with LRU eviction
|
|
13
|
+
|
|
14
|
+
### Fixed
|
|
15
|
+
- `chrome-error://` popup redirects no longer throw errors — continue processing captured requests
|
|
16
|
+
- Suppressed noisy `about:blank` and `chrome-error://` redirect warnings (visible with `--debug` only)
|
|
17
|
+
- Fallback retry skipped for `chrome-error://` redirects (instant failure, not genuine timeout)
|
|
18
|
+
- Page URL checked before fallback retry to detect already-failed state
|
|
19
|
+
- `.nwssconfig` keys support both hyphens and underscores (`dns-cache` and `dns_cache` both work)
|
|
20
|
+
|
|
21
|
+
### Improved
|
|
22
|
+
- Colors enabled by default — no need for `--color` flag or `color: true` in `.nwssconfig`
|
|
23
|
+
- Chrome UA bumped to 146, Firefox UA bumped to 148
|
|
24
|
+
- Sec-CH-UA headers updated to match Chrome 146
|
|
25
|
+
|
|
5
26
|
## [2.0.60] - 2026-03-16
|
|
6
27
|
|
|
7
28
|
### Added
|
package/README.md
CHANGED
|
@@ -29,7 +29,7 @@ A Puppeteer-based tool for scanning websites to find third-party (or optionally
|
|
|
29
29
|
|:---------------------------|:------------|
|
|
30
30
|
| `-o, --output <file>` | Output file for rules. If omitted, prints to console |
|
|
31
31
|
| `--compare <file>` | Remove rules that already exist in this file before output |
|
|
32
|
-
| `--color, --colour`
|
|
32
|
+
| `--no-color, --no-colour` | Disable colored console output (colors enabled by default) |
|
|
33
33
|
| `--append` | Append new rules to output file instead of overwriting (requires `-o`) |
|
|
34
34
|
|
|
35
35
|
### Output Format Options
|
|
@@ -64,6 +64,9 @@ A Puppeteer-based tool for scanning websites to find third-party (or optionally
|
|
|
64
64
|
| `--headful` | Launch browser with GUI (not headless) |
|
|
65
65
|
| `--keep-open` | Keep browser and tabs open after scan completes (use with `--headful` for debugging) |
|
|
66
66
|
| `--use-puppeteer-core` | Use `puppeteer-core` with system Chrome instead of bundled Chromium |
|
|
67
|
+
| `--load-extension <path>` | Load unpacked Chrome extension from directory (can be used multiple times) |
|
|
68
|
+
| `--dns-cache` | Persist dig/whois results to disk between runs (14hr TTL, `.digcache`/`.whoiscache`) |
|
|
69
|
+
| `--block-ads=<files>` | Block ads using EasyList format rules (comma-separated: `easylist.txt,easyprivacy.txt`) |
|
|
67
70
|
| `--cdp` | Enable Chrome DevTools Protocol logging (now per-page if enabled) |
|
|
68
71
|
| `--remove-dupes` | Remove duplicate domains from output (only with `-o`) |
|
|
69
72
|
| `--dry-run` | Console output only: show matching regex, titles, whois/dig/searchstring results, and adblock rules |
|
|
@@ -404,6 +407,53 @@ If a proxy fails mid-scan, Chromium's error code is detected and diagnosed:
|
|
|
404
407
|
|
|
405
408
|
Detected error codes: `ERR_PROXY_CONNECTION_FAILED`, `ERR_SOCKS_CONNECTION_FAILED`, `ERR_TUNNEL_CONNECTION_FAILED`, `ERR_PROXY_AUTH_UNSUPPORTED`, `ERR_PROXY_AUTH_REQUESTED`, `ERR_SOCKS_CONNECTION_HOST_UNREACHABLE`, `ERR_PROXY_CERTIFICATE_INVALID`, `ERR_NO_SUPPORTED_PROXIES`.
|
|
406
409
|
|
|
410
|
+
---
|
|
411
|
+
|
|
412
|
+
## .nwssconfig — Per-Config Settings
|
|
413
|
+
|
|
414
|
+
Create a `.nwssconfig` file in the project root to define CLI settings per config file. When a config filename matches a key, those settings are automatically applied. CLI flags merge with and override `.nwssconfig` settings.
|
|
415
|
+
|
|
416
|
+
```json
|
|
417
|
+
{
|
|
418
|
+
"configs": {
|
|
419
|
+
"config-clean1.json": {
|
|
420
|
+
"output": "outputfile.txt",
|
|
421
|
+
"max_concurrent": 30,
|
|
422
|
+
"dns_cache": true,
|
|
423
|
+
"cache_requests": true,
|
|
424
|
+
"dumpurls": true,
|
|
425
|
+
"remove_tempfiles": true,
|
|
426
|
+
"color": true
|
|
427
|
+
},
|
|
428
|
+
"config-clean2.json": {
|
|
429
|
+
"output": "outputfile.txt",
|
|
430
|
+
"max_concurrent": 15,
|
|
431
|
+
"dns_cache": true,
|
|
432
|
+
"cache_requests": true,
|
|
433
|
+
"dumpurls": true,
|
|
434
|
+
"remove_tempfiles": true,
|
|
435
|
+
"color": true,
|
|
436
|
+
"debug": true,
|
|
437
|
+
"block_ads": "easylist.txt,easyprivacy.txt"
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
```
|
|
442
|
+
|
|
443
|
+
**Usage:**
|
|
444
|
+
|
|
445
|
+
```bash
|
|
446
|
+
node nwss.js config-clean1.json # uses .nwssconfig settings
|
|
447
|
+
node nwss.js config-clean2.json --debug # .nwssconfig + debug override
|
|
448
|
+
node nwss.js config-other.json --max-concurrent 5 # no match in .nwssconfig, uses CLI flags
|
|
449
|
+
```
|
|
450
|
+
|
|
451
|
+
**Supported settings:** `output`, `max_concurrent`, `dns_cache`, `cache_requests`, `dumpurls`, `remove_tempfiles`, `color`, `remove_dupes`, `compress_logs`, `debug`, `silent`, `verbose`, `headful`, `keep_open`, `dry_run`, `titles`, `sub_domains`, `no_interact`, `ghost_cursor`, `plain`, `cdp`, `dnsmasq`, `unbound`, `privoxy`, `pihole`, `eval_on_doc`, `use_puppeteer_core`, `ignore_cache`, `clear_cache`, `block_ads`, `compare`, `localhost`, `append`.
|
|
452
|
+
|
|
453
|
+
**Priority:** CLI flags > `.nwssconfig` > hardcoded defaults.
|
|
454
|
+
|
|
455
|
+
---
|
|
456
|
+
|
|
407
457
|
### Global Configuration Options
|
|
408
458
|
|
|
409
459
|
These options go at the root level of your config.json:
|
package/lib/colorize.js
CHANGED
|
@@ -7,7 +7,9 @@
|
|
|
7
7
|
* @returns {boolean} True if --color or --colour flag is present
|
|
8
8
|
*/
|
|
9
9
|
function shouldEnableColors() {
|
|
10
|
-
|
|
10
|
+
// Colors enabled by default, use --no-color to disable
|
|
11
|
+
if (process.argv.includes('--no-color') || process.argv.includes('--no-colour')) return false;
|
|
12
|
+
return true;
|
|
11
13
|
}
|
|
12
14
|
|
|
13
15
|
// Initialize color support based on command line flags
|
package/lib/fingerprint.js
CHANGED
|
@@ -22,6 +22,7 @@ function seededRandom(seed) {
|
|
|
22
22
|
|
|
23
23
|
// Cache fingerprints per domain so reloads and multi-page visits stay consistent
|
|
24
24
|
const _fingerprintCache = new Map();
|
|
25
|
+
const FINGERPRINT_CACHE_MAX = 500;
|
|
25
26
|
|
|
26
27
|
// Type-specific property spoofing functions for monomorphic optimization
|
|
27
28
|
// Built-in properties that should not be modified
|
|
@@ -32,12 +33,12 @@ const BUILT_IN_PROPERTIES = new Set([
|
|
|
32
33
|
|
|
33
34
|
// User agent collections with latest versions
|
|
34
35
|
const USER_AGENT_COLLECTIONS = Object.freeze(new Map([
|
|
35
|
-
['chrome', "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/
|
|
36
|
-
['chrome_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/
|
|
37
|
-
['chrome_linux', "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/
|
|
38
|
-
['firefox', "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:
|
|
39
|
-
['firefox_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:
|
|
40
|
-
['firefox_linux', "Mozilla/5.0 (X11; Linux x86_64; rv:
|
|
36
|
+
['chrome', "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"],
|
|
37
|
+
['chrome_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"],
|
|
38
|
+
['chrome_linux', "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"],
|
|
39
|
+
['firefox', "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:148.0) Gecko/20100101 Firefox/148.0"],
|
|
40
|
+
['firefox_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:148.0) Gecko/20100101 Firefox/148.0"],
|
|
41
|
+
['firefox_linux', "Mozilla/5.0 (X11; Linux x86_64; rv:148.0) Gecko/20100101 Firefox/148.0"],
|
|
41
42
|
['safari', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.6 Safari/605.1.15"]
|
|
42
43
|
]));
|
|
43
44
|
|
|
@@ -237,7 +238,12 @@ function generateRealisticFingerprint(userAgent, domain = '') {
|
|
|
237
238
|
};
|
|
238
239
|
|
|
239
240
|
// Cache for this domain
|
|
240
|
-
if (domain)
|
|
241
|
+
if (domain) {
|
|
242
|
+
if (_fingerprintCache.size >= FINGERPRINT_CACHE_MAX) {
|
|
243
|
+
_fingerprintCache.delete(_fingerprintCache.keys().next().value);
|
|
244
|
+
}
|
|
245
|
+
_fingerprintCache.set(domain, fingerprint);
|
|
246
|
+
}
|
|
241
247
|
|
|
242
248
|
return fingerprint;
|
|
243
249
|
}
|
|
@@ -495,7 +501,7 @@ async function applyUserAgentSpoofing(page, siteConfig, forceDebug, currentUrl)
|
|
|
495
501
|
}),
|
|
496
502
|
getManifest: () => ({
|
|
497
503
|
name: "Chrome",
|
|
498
|
-
version: "
|
|
504
|
+
version: "146.0.0.0",
|
|
499
505
|
manifest_version: 3,
|
|
500
506
|
description: "Chrome Browser"
|
|
501
507
|
}),
|
|
@@ -1609,18 +1615,41 @@ async function applyUserAgentSpoofing(page, siteConfig, forceDebug, currentUrl)
|
|
|
1609
1615
|
}, 'enhanced mouse/pointer spoofing');
|
|
1610
1616
|
|
|
1611
1617
|
safeExecute(() => {
|
|
1612
|
-
//
|
|
1618
|
+
// Neutralize CDP fingerprinting traps and filter DevTools traces
|
|
1619
|
+
// CDP's Runtime.enable causes the inspector to read properties on console-logged objects.
|
|
1620
|
+
// Detection scripts exploit this via console.debug with Error objects (custom .stack getters)
|
|
1621
|
+
// or objects with Proxy prototypes. Only override console.debug — safest, minimal footprint.
|
|
1622
|
+
|
|
1613
1623
|
const originalConsoleDebug = console.debug;
|
|
1614
1624
|
console.debug = function(...args) {
|
|
1625
|
+
// Filter DevTools-related messages
|
|
1615
1626
|
const message = args.join(' ');
|
|
1616
1627
|
if (typeof message === 'string' && (
|
|
1617
1628
|
message.includes('DevTools') ||
|
|
1618
1629
|
message.includes('Runtime.evaluate') ||
|
|
1619
1630
|
message.includes('Page.addScriptToEvaluateOnNewDocument') ||
|
|
1620
1631
|
message.includes('Protocol error'))) {
|
|
1621
|
-
return;
|
|
1632
|
+
return;
|
|
1622
1633
|
}
|
|
1623
|
-
|
|
1634
|
+
// Sanitize args to neutralize CDP fingerprinting traps
|
|
1635
|
+
const sanitized = args.map(arg => {
|
|
1636
|
+
// Strip Error objects with custom .stack getters (CDP inspector reads .stack)
|
|
1637
|
+
if (arg instanceof Error) {
|
|
1638
|
+
const desc = Object.getOwnPropertyDescriptor(arg, 'stack');
|
|
1639
|
+
if (desc && desc.get) return `${arg.name}: ${arg.message}`;
|
|
1640
|
+
}
|
|
1641
|
+
// Neutralize Proxy prototype traps (CDP inspector walks prototype chain)
|
|
1642
|
+
if (arg !== null && typeof arg === 'object') {
|
|
1643
|
+
try {
|
|
1644
|
+
const proto = Object.getPrototypeOf(arg);
|
|
1645
|
+
if (proto && proto !== Object.prototype && proto !== Array.prototype) {
|
|
1646
|
+
try { Object.keys(proto); } catch { return '[object Object]'; }
|
|
1647
|
+
}
|
|
1648
|
+
} catch { return '[object Object]'; }
|
|
1649
|
+
}
|
|
1650
|
+
return arg;
|
|
1651
|
+
});
|
|
1652
|
+
return originalConsoleDebug.apply(this, sanitized);
|
|
1624
1653
|
};
|
|
1625
1654
|
|
|
1626
1655
|
}, 'console error suppression');
|
|
@@ -1670,6 +1699,7 @@ async function applyUserAgentSpoofing(page, siteConfig, forceDebug, currentUrl)
|
|
|
1670
1699
|
if (typeof window.Image === 'function') maskAsNative(window.Image, 'Image');
|
|
1671
1700
|
if (typeof window.fetch === 'function') maskAsNative(window.fetch, 'fetch');
|
|
1672
1701
|
if (typeof window.PointerEvent === 'function') maskAsNative(window.PointerEvent, 'PointerEvent');
|
|
1702
|
+
if (typeof console.debug === 'function') maskAsNative(console.debug, 'debug');
|
|
1673
1703
|
|
|
1674
1704
|
// Mask property getters on navigator
|
|
1675
1705
|
const navProps = ['userAgentData', 'connection', 'pdfViewerEnabled', 'webdriver',
|
package/nwss.js
CHANGED
|
@@ -2,7 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
// puppeteer for browser automation, fs for file system operations, psl for domain parsing.
|
|
4
4
|
// const pLimit = require('p-limit'); // Will be dynamically imported
|
|
5
|
-
const
|
|
5
|
+
const useObscura = process.argv.includes('--use-obscura');
|
|
6
|
+
const usePuppeteerCore = process.argv.includes('--use-puppeteer-core') || useObscura;
|
|
6
7
|
const puppeteer = usePuppeteerCore ? require('puppeteer-core') : require('puppeteer');
|
|
7
8
|
const fs = require('fs');
|
|
8
9
|
const os = require('os');
|
|
@@ -104,12 +105,12 @@ const CONCURRENCY_LIMITS = Object.freeze({
|
|
|
104
105
|
|
|
105
106
|
// V8 Optimization: Use Map for user agent lookups instead of object
|
|
106
107
|
const USER_AGENTS = Object.freeze(new Map([
|
|
107
|
-
['chrome', "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/
|
|
108
|
-
['chrome_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/
|
|
109
|
-
['chrome_linux', "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/
|
|
110
|
-
['firefox', "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:
|
|
111
|
-
['firefox_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:
|
|
112
|
-
['firefox_linux', "Mozilla/5.0 (X11; Linux x86_64; rv:
|
|
108
|
+
['chrome', "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"],
|
|
109
|
+
['chrome_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"],
|
|
110
|
+
['chrome_linux', "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"],
|
|
111
|
+
['firefox', "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:148.0) Gecko/20100101 Firefox/148.0"],
|
|
112
|
+
['firefox_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:148.0) Gecko/20100101 Firefox/148.0"],
|
|
113
|
+
['firefox_linux', "Mozilla/5.0 (X11; Linux x86_64; rv:148.0) Gecko/20100101 Firefox/148.0"],
|
|
113
114
|
['safari', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.6 Safari/605.1.15"]
|
|
114
115
|
]));
|
|
115
116
|
|
|
@@ -177,6 +178,82 @@ if (args.length === 0) {
|
|
|
177
178
|
args.push('--help');
|
|
178
179
|
}
|
|
179
180
|
|
|
181
|
+
// --- .nwssconfig support: inject per-config settings into args ---
|
|
182
|
+
const NWSSCONFIG_PATH = path.join(__dirname, '.nwssconfig');
|
|
183
|
+
if (fs.existsSync(NWSSCONFIG_PATH)) {
|
|
184
|
+
try {
|
|
185
|
+
const nwssConfig = JSON.parse(fs.readFileSync(NWSSCONFIG_PATH, 'utf-8'));
|
|
186
|
+
// Find which config file is being used (--custom-json <file> or positional .json arg)
|
|
187
|
+
const customJsonIdx = args.findIndex(arg => arg === '--custom-json');
|
|
188
|
+
const configFilename = (customJsonIdx !== -1 && args[customJsonIdx + 1])
|
|
189
|
+
? args[customJsonIdx + 1]
|
|
190
|
+
: args.find(a => a.endsWith('.json') && !a.startsWith('--'));
|
|
191
|
+
|
|
192
|
+
if (configFilename && nwssConfig.configs && nwssConfig.configs[configFilename]) {
|
|
193
|
+
const settings = nwssConfig.configs[configFilename];
|
|
194
|
+
const originalArgs = args.join(' ');
|
|
195
|
+
|
|
196
|
+
// Map settings keys to CLI flags — only inject if not already in args
|
|
197
|
+
const settingsMap = {
|
|
198
|
+
output: ['-o', '--output'],
|
|
199
|
+
max_concurrent: ['--max-concurrent'],
|
|
200
|
+
dns_cache: ['--dns-cache'],
|
|
201
|
+
cache_requests: ['--cache-requests'],
|
|
202
|
+
dumpurls: ['--dumpurls'],
|
|
203
|
+
remove_tempfiles: ['--remove-tempfiles'],
|
|
204
|
+
color: ['--color'],
|
|
205
|
+
remove_dupes: ['--remove-dupes', '--remove-dubes'],
|
|
206
|
+
'remove-dupes': ['--remove-dupes', '--remove-dubes'],
|
|
207
|
+
'remove-dubes': ['--remove-dupes', '--remove-dubes'],
|
|
208
|
+
compress_logs: ['--compress-logs'],
|
|
209
|
+
debug: ['--debug'],
|
|
210
|
+
silent: ['--silent'],
|
|
211
|
+
verbose: ['--verbose'],
|
|
212
|
+
headful: ['--headful'],
|
|
213
|
+
keep_open: ['--keep-open'],
|
|
214
|
+
dry_run: ['--dry-run'],
|
|
215
|
+
titles: ['--titles'],
|
|
216
|
+
sub_domains: ['--sub-domains'],
|
|
217
|
+
no_interact: ['--no-interact'],
|
|
218
|
+
ghost_cursor: ['--ghost-cursor'],
|
|
219
|
+
plain: ['--plain'],
|
|
220
|
+
cdp: ['--cdp'],
|
|
221
|
+
dnsmasq: ['--dnsmasq'],
|
|
222
|
+
unbound: ['--unbound'],
|
|
223
|
+
privoxy: ['--privoxy'],
|
|
224
|
+
pihole: ['--pihole'],
|
|
225
|
+
eval_on_doc: ['--eval-on-doc'],
|
|
226
|
+
use_puppeteer_core: ['--use-puppeteer-core'],
|
|
227
|
+
ignore_cache: ['--ignore-cache'],
|
|
228
|
+
clear_cache: ['--clear-cache'],
|
|
229
|
+
block_ads: ['--block-ads'],
|
|
230
|
+
compare: ['--compare'],
|
|
231
|
+
localhost: ['--localhost'],
|
|
232
|
+
append: ['--append']
|
|
233
|
+
};
|
|
234
|
+
|
|
235
|
+
for (const [key, flags] of Object.entries(settingsMap)) {
|
|
236
|
+
// Support both underscore and hyphen variants (e.g. dns_cache or dns-cache)
|
|
237
|
+
const value = settings[key] !== undefined ? settings[key]
|
|
238
|
+
: settings[key.replace(/_/g, '-')] !== undefined ? settings[key.replace(/_/g, '-')]
|
|
239
|
+
: settings[key.replace(/-/g, '_')] !== undefined ? settings[key.replace(/-/g, '_')]
|
|
240
|
+
: undefined;
|
|
241
|
+
if (value === undefined) continue;
|
|
242
|
+
// Skip if any variant of the flag is already in CLI args
|
|
243
|
+
if (flags.some(f => originalArgs.includes(f))) continue;
|
|
244
|
+
|
|
245
|
+
if (typeof value === 'boolean') {
|
|
246
|
+
if (value) args.push(flags[flags.length - 1]);
|
|
247
|
+
} else if (typeof value === 'string' || typeof value === 'number') {
|
|
248
|
+
args.push(flags[flags.length - 1], String(value));
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
} catch (e) {
|
|
253
|
+
console.error(`Warning: Failed to parse .nwssconfig: ${e.message}`);
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
|
|
180
257
|
const headfulMode = args.includes('--headful');
|
|
181
258
|
const SOURCES_FOLDER = 'sources';
|
|
182
259
|
|
|
@@ -565,6 +642,12 @@ Request Blocking:
|
|
|
565
642
|
--block-ads=<file> Block ads/trackers using EasyList format rules (||domain.com^, /ads/*, etc)
|
|
566
643
|
Works at request-level for maximum performance
|
|
567
644
|
|
|
645
|
+
Per-config settings file (.nwssconfig):
|
|
646
|
+
Place a .nwssconfig file in the project root to define per-config settings.
|
|
647
|
+
When a config filename matches a key in .nwssconfig, those settings are used.
|
|
648
|
+
CLI flags merge with and override .nwssconfig settings.
|
|
649
|
+
See README.md for format details.
|
|
650
|
+
|
|
568
651
|
General Options:
|
|
569
652
|
--verbose Force verbose mode globally
|
|
570
653
|
--debug Force debug mode globally
|
|
@@ -580,6 +663,8 @@ General Options:
|
|
|
580
663
|
--headful Launch browser with GUI (not headless)
|
|
581
664
|
--keep-open Keep browser open after scan completes (use with --headful)
|
|
582
665
|
--use-puppeteer-core Use puppeteer-core with system Chrome instead of bundled Chromium
|
|
666
|
+
--use-obscura Connect to running Obscura CDP server (ws://127.0.0.1:9222 or OBSCURA_WS env)
|
|
667
|
+
Skips fingerprint injection — Obscura provides built-in stealth
|
|
583
668
|
--load-extension <path> Load unpacked Chrome extension from directory
|
|
584
669
|
--cdp Enable Chrome DevTools Protocol logging (now per-page if enabled)
|
|
585
670
|
--remove-dupes Remove duplicate domains from output (only with -o)
|
|
@@ -788,6 +873,23 @@ const globalBlockedRegexes = Array.isArray(globalBlocked)
|
|
|
788
873
|
? globalBlocked.map(pattern => new RegExp(pattern))
|
|
789
874
|
: [];
|
|
790
875
|
|
|
876
|
+
// Cache compiled regexes by pattern string — avoids recompiling same patterns across URLs
|
|
877
|
+
const _compiledRegexCache = new Map();
|
|
878
|
+
function getCompiledRegex(pattern) {
|
|
879
|
+
let compiled = _compiledRegexCache.get(pattern);
|
|
880
|
+
if (!compiled) {
|
|
881
|
+
compiled = new RegExp(pattern.replace(/^\/(.*)\/$/, '$1'));
|
|
882
|
+
if (_compiledRegexCache.size > 2000) _compiledRegexCache.clear();
|
|
883
|
+
_compiledRegexCache.set(pattern, compiled);
|
|
884
|
+
}
|
|
885
|
+
return compiled;
|
|
886
|
+
}
|
|
887
|
+
function getCompiledRegexes(patterns) {
|
|
888
|
+
if (!patterns) return [];
|
|
889
|
+
const arr = Array.isArray(patterns) ? patterns : [patterns];
|
|
890
|
+
return arr.map(p => getCompiledRegex(p));
|
|
891
|
+
}
|
|
892
|
+
|
|
791
893
|
// Pre-split ignoreDomains into exact Set (O(1) lookup) and wildcard array
|
|
792
894
|
const _ignoreDomainsExact = new Set();
|
|
793
895
|
const _ignoreDomainsWildcard = [];
|
|
@@ -1116,12 +1218,19 @@ if (forceDebug && globalComments) {
|
|
|
1116
1218
|
* @param {string} url - The URL string to parse.
|
|
1117
1219
|
* @returns {string} The root domain, or the original hostname if parsing fails (e.g., for IP addresses or invalid URLs), or an empty string on error.
|
|
1118
1220
|
*/
|
|
1221
|
+
const _rootDomainCache = new Map();
|
|
1119
1222
|
function getRootDomain(url) {
|
|
1223
|
+
const cached = _rootDomainCache.get(url);
|
|
1224
|
+
if (cached !== undefined) return cached;
|
|
1120
1225
|
try {
|
|
1121
1226
|
const { hostname } = new URL(url);
|
|
1122
1227
|
const parsed = psl.parse(hostname);
|
|
1123
|
-
|
|
1228
|
+
const result = parsed.domain || hostname;
|
|
1229
|
+
if (_rootDomainCache.size > 5000) _rootDomainCache.clear();
|
|
1230
|
+
_rootDomainCache.set(url, result);
|
|
1231
|
+
return result;
|
|
1124
1232
|
} catch {
|
|
1233
|
+
_rootDomainCache.set(url, '');
|
|
1125
1234
|
return '';
|
|
1126
1235
|
}
|
|
1127
1236
|
}
|
|
@@ -1416,6 +1525,23 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1416
1525
|
* @returns {Promise<import('puppeteer').Browser>} Browser instance
|
|
1417
1526
|
*/
|
|
1418
1527
|
async function createBrowser(extraArgs = []) {
|
|
1528
|
+
// Obscura mode: connect to a running Obscura CDP server instead of launching Chrome
|
|
1529
|
+
if (useObscura) {
|
|
1530
|
+
const obscuraEndpoint = process.env.OBSCURA_WS || 'ws://127.0.0.1:9222/devtools/browser';
|
|
1531
|
+
if (forceDebug) console.log(formatLogMessage('debug', `Connecting to Obscura at ${obscuraEndpoint}`));
|
|
1532
|
+
try {
|
|
1533
|
+
const browser = await puppeteer.connect({ browserWSEndpoint: obscuraEndpoint });
|
|
1534
|
+
if (!silentMode) console.log(messageColors.success(`Connected to Obscura CDP at ${obscuraEndpoint}`));
|
|
1535
|
+
browser._nwssUserDataDir = null; // No temp dir to clean
|
|
1536
|
+
browser._nwssIsObscura = true;
|
|
1537
|
+
return browser;
|
|
1538
|
+
} catch (err) {
|
|
1539
|
+
console.error(formatLogMessage('error', `Failed to connect to Obscura: ${err.message}`));
|
|
1540
|
+
console.error(formatLogMessage('error', `Start Obscura first: obscura serve --port 9222 --stealth`));
|
|
1541
|
+
process.exit(1);
|
|
1542
|
+
}
|
|
1543
|
+
}
|
|
1544
|
+
|
|
1419
1545
|
// Create temporary user data directory that we can fully control and clean up
|
|
1420
1546
|
const tempUserDataDir = path.join(os.tmpdir(), `puppeteer-${Date.now()}-${Math.random().toString(36).substring(7)}`);
|
|
1421
1547
|
userDataDir = tempUserDataDir; // Store for cleanup tracking (use outer scope variable)
|
|
@@ -2231,11 +2357,16 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2231
2357
|
}
|
|
2232
2358
|
|
|
2233
2359
|
// --- Apply all fingerprint spoofing (user agent, Brave, fingerprint protection) ---
|
|
2360
|
+
// Skip when using Obscura — it has built-in stealth that conflicts with our injection
|
|
2234
2361
|
try {
|
|
2235
|
-
|
|
2362
|
+
if (!useObscura) {
|
|
2363
|
+
await applyAllFingerprintSpoofing(page, siteConfig, forceDebug, currentUrl);
|
|
2364
|
+
} else if (forceDebug) {
|
|
2365
|
+
console.log(formatLogMessage('debug', `Skipping fingerprint injection — Obscura provides built-in stealth`));
|
|
2366
|
+
}
|
|
2236
2367
|
|
|
2237
|
-
// Client Hints protection for Chrome user agents
|
|
2238
|
-
if (siteConfig.userAgent && siteConfig.userAgent.toLowerCase().includes('chrome')) {
|
|
2368
|
+
// Client Hints protection for Chrome user agents (skipped under Obscura — it sets its own)
|
|
2369
|
+
if (!useObscura && siteConfig.userAgent && siteConfig.userAgent.toLowerCase().includes('chrome')) {
|
|
2239
2370
|
const userAgentKey = siteConfig.userAgent.toLowerCase();
|
|
2240
2371
|
let platform = 'Windows';
|
|
2241
2372
|
let platformVersion = '15.0.0';
|
|
@@ -2252,14 +2383,14 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2252
2383
|
}
|
|
2253
2384
|
|
|
2254
2385
|
await page.setExtraHTTPHeaders({
|
|
2255
|
-
'Sec-CH-UA': '"Not:A-Brand";v="99", "Google Chrome";v="
|
|
2386
|
+
'Sec-CH-UA': '"Not:A-Brand";v="99", "Google Chrome";v="146", "Chromium";v="146"',
|
|
2256
2387
|
'Sec-CH-UA-Platform': `"${platform}"`,
|
|
2257
2388
|
'Sec-CH-UA-Platform-Version': `"${platformVersion}"`,
|
|
2258
2389
|
'Sec-CH-UA-Mobile': '?0',
|
|
2259
2390
|
'Sec-CH-UA-Arch': `"${arch}"`,
|
|
2260
2391
|
'Sec-CH-UA-Bitness': '"64"',
|
|
2261
|
-
'Sec-CH-UA-Full-Version': '"
|
|
2262
|
-
'Sec-CH-UA-Full-Version-List': '"Not:A-Brand";v="99.0.0.0", "Google Chrome";v="
|
|
2392
|
+
'Sec-CH-UA-Full-Version': '"146.0.0.0"',
|
|
2393
|
+
'Sec-CH-UA-Full-Version-List': '"Not:A-Brand";v="99.0.0.0", "Google Chrome";v="146.0.0.0", "Chromium";v="146.0.0.0"'
|
|
2263
2394
|
});
|
|
2264
2395
|
}
|
|
2265
2396
|
} catch (fingerprintErr) {
|
|
@@ -2272,11 +2403,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2272
2403
|
}
|
|
2273
2404
|
}
|
|
2274
2405
|
|
|
2275
|
-
const regexes =
|
|
2276
|
-
? siteConfig.filterRegex.map(r => new RegExp(r.replace(/^\/(.*)\/$/, '$1')))
|
|
2277
|
-
: siteConfig.filterRegex
|
|
2278
|
-
? [new RegExp(siteConfig.filterRegex.replace(/^\/(.*)\/$/, '$1'))]
|
|
2279
|
-
: [];
|
|
2406
|
+
const regexes = getCompiledRegexes(siteConfig.filterRegex);
|
|
2280
2407
|
|
|
2281
2408
|
// NEW: Get regex_and setting (defaults to false for backward compatibility)
|
|
2282
2409
|
const useRegexAnd = siteConfig.regex_and === true;
|
|
@@ -2424,7 +2551,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2424
2551
|
}
|
|
2425
2552
|
|
|
2426
2553
|
const blockedRegexes = Array.isArray(siteConfig.blocked)
|
|
2427
|
-
? siteConfig.blocked.map(pattern =>
|
|
2554
|
+
? siteConfig.blocked.map(pattern => getCompiledRegex(pattern))
|
|
2428
2555
|
: [];
|
|
2429
2556
|
|
|
2430
2557
|
// Combine site-specific with pre-compiled global blocked patterns
|
|
@@ -3201,7 +3328,23 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3201
3328
|
? { ...defaultGotoOptions, ...siteConfig.goto_options } : defaultGotoOptions;
|
|
3202
3329
|
|
|
3203
3330
|
// Enhanced navigation with redirect handling - passes existing gotoOptions
|
|
3204
|
-
|
|
3331
|
+
let navigationResult;
|
|
3332
|
+
try {
|
|
3333
|
+
navigationResult = await navigateWithRedirectHandling(page, currentUrl, siteConfig, gotoOptions, forceDebug, formatLogMessage);
|
|
3334
|
+
} catch (navErr) {
|
|
3335
|
+
// Only retry on genuine timeouts, not chrome-error:// redirects
|
|
3336
|
+
let pageUrl = '';
|
|
3337
|
+
try { if (!page.isClosed()) pageUrl = page.url(); } catch {}
|
|
3338
|
+
const isPopupFailure = navErr.message.includes('chrome-error://') || navErr.message.includes('invalid URL') ||
|
|
3339
|
+
pageUrl.startsWith('chrome-error://') || pageUrl === 'about:blank';
|
|
3340
|
+
if ((navErr.message.includes('timeout') || navErr.message.includes('Timeout')) && !isPopupFailure) {
|
|
3341
|
+
if (forceDebug) console.log(formatLogMessage('debug', `Navigation timeout, retrying with waitUntil:networkidle2 for ${currentUrl}`));
|
|
3342
|
+
const fallbackOptions = { ...gotoOptions, waitUntil: 'networkidle2', timeout: Math.min(timeout, 10000) };
|
|
3343
|
+
navigationResult = await navigateWithRedirectHandling(page, currentUrl, siteConfig, fallbackOptions, forceDebug, formatLogMessage);
|
|
3344
|
+
} else {
|
|
3345
|
+
throw navErr;
|
|
3346
|
+
}
|
|
3347
|
+
}
|
|
3205
3348
|
|
|
3206
3349
|
const { finalUrl, redirected, redirectChain, originalUrl, redirectDomains } = navigationResult;
|
|
3207
3350
|
|
|
@@ -3257,7 +3400,8 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3257
3400
|
}
|
|
3258
3401
|
|
|
3259
3402
|
if (originalDomain !== finalDomain) {
|
|
3260
|
-
|
|
3403
|
+
const isPopupRedirect = !finalUrl || finalUrl === 'about:blank' || finalUrl.startsWith('chrome-error://');
|
|
3404
|
+
if (!silentMode && !isPopupRedirect) {
|
|
3261
3405
|
console.log(`🔄 Redirect detected: ${originalDomain} → ${finalDomain}`);
|
|
3262
3406
|
}
|
|
3263
3407
|
|
|
@@ -3284,13 +3428,11 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3284
3428
|
}
|
|
3285
3429
|
}
|
|
3286
3430
|
} else {
|
|
3287
|
-
// Invalid final URL
|
|
3288
|
-
console.warn(`⚠ Redirect to invalid URL ignored: ${originalDomain} → ${finalUrl}`);
|
|
3431
|
+
// Invalid final URL (ad popup redirect) - continue with original URL
|
|
3289
3432
|
if (forceDebug) {
|
|
3290
|
-
console.log(formatLogMessage('debug', `
|
|
3433
|
+
console.log(formatLogMessage('debug', `Popup redirect ignored: ${originalDomain} → ${finalUrl}, keeping original: ${originalUrl}`));
|
|
3291
3434
|
}
|
|
3292
|
-
//
|
|
3293
|
-
throw new Error(`Redirect resulted in invalid URL: ${finalUrl}`);
|
|
3435
|
+
// Continue with original URL — requests captured before the redirect are still valid
|
|
3294
3436
|
}
|
|
3295
3437
|
}
|
|
3296
3438
|
}
|
|
@@ -3431,7 +3573,10 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3431
3573
|
const timeoutResult = await handleRedirectTimeout(page, currentUrl, err, safeGetDomain, forceDebug, formatLogMessage);
|
|
3432
3574
|
|
|
3433
3575
|
if (timeoutResult.success) {
|
|
3434
|
-
|
|
3576
|
+
const isPopupRedirect = timeoutResult.finalUrl && (timeoutResult.finalUrl === 'about:blank' || timeoutResult.finalUrl.startsWith('chrome-error://'));
|
|
3577
|
+
if (!isPopupRedirect) {
|
|
3578
|
+
console.log(`⚠ Partial redirect timeout recovered: ${safeGetDomain(currentUrl)} → ${safeGetDomain(timeoutResult.finalUrl)}`);
|
|
3579
|
+
}
|
|
3435
3580
|
currentUrl = timeoutResult.finalUrl; // Use the partial redirect URL
|
|
3436
3581
|
siteCounter++;
|
|
3437
3582
|
// Continue processing with the redirected URL instead of throwing error
|
|
@@ -4060,6 +4205,10 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
4060
4205
|
}
|
|
4061
4206
|
}
|
|
4062
4207
|
|
|
4208
|
+
// Track domain timeout counts — skip domain after 3 failures
|
|
4209
|
+
const domainTimeoutCounts = new Map();
|
|
4210
|
+
const DOMAIN_TIMEOUT_THRESHOLD = 3;
|
|
4211
|
+
|
|
4063
4212
|
// Enhanced hang detection with browser restart recovery
|
|
4064
4213
|
let currentBatchInfo = { batchStart: 0, batchSize: 0 };
|
|
4065
4214
|
let lastProcessedCount = 0;
|
|
@@ -4285,8 +4434,17 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
4285
4434
|
console.log(formatLogMessage('debug', `[CONCURRENCY] Starting ${batchSize} concurrent tasks with limit ${MAX_CONCURRENT_SITES}`));
|
|
4286
4435
|
}
|
|
4287
4436
|
|
|
4288
|
-
// Create tasks with timeout protection
|
|
4289
|
-
const batchTasks = currentBatch.map(task => originalLimit(() =>
|
|
4437
|
+
// Create tasks with timeout protection — skip domains that repeatedly timed out
|
|
4438
|
+
const batchTasks = currentBatch.map(task => originalLimit(() => {
|
|
4439
|
+
try {
|
|
4440
|
+
const taskDomain = new URL(task.url).hostname;
|
|
4441
|
+
if ((domainTimeoutCounts.get(taskDomain) || 0) >= DOMAIN_TIMEOUT_THRESHOLD) {
|
|
4442
|
+
if (!silentMode) console.log(formatLogMessage('info', `Skipping ${task.url} — ${taskDomain} timed out ${DOMAIN_TIMEOUT_THRESHOLD} times`));
|
|
4443
|
+
return { url: task.url, rules: [], success: false, error: 'Domain repeatedly timed out', skipped: true };
|
|
4444
|
+
}
|
|
4445
|
+
} catch {}
|
|
4446
|
+
return processUrl(task.url, task.config, browser);
|
|
4447
|
+
}));
|
|
4290
4448
|
|
|
4291
4449
|
let batchResults;
|
|
4292
4450
|
try {
|
|
@@ -4316,6 +4474,16 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
4316
4474
|
}
|
|
4317
4475
|
}
|
|
4318
4476
|
|
|
4477
|
+
// Track domain timeout counts — skip after threshold
|
|
4478
|
+
for (const result of batchResults) {
|
|
4479
|
+
if (!result.success && !result.skipped && result.error && result.error.includes('timeout')) {
|
|
4480
|
+
try {
|
|
4481
|
+
const domain = new URL(result.url).hostname;
|
|
4482
|
+
domainTimeoutCounts.set(domain, (domainTimeoutCounts.get(domain) || 0) + 1);
|
|
4483
|
+
} catch {}
|
|
4484
|
+
}
|
|
4485
|
+
}
|
|
4486
|
+
|
|
4319
4487
|
// IMPROVED: Much more conservative emergency restart logic
|
|
4320
4488
|
const criticalRestartCount = batchResults.filter(r => r.needsImmediateRestart).length;
|
|
4321
4489
|
// Require either:
|
|
@@ -4651,15 +4819,23 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
4651
4819
|
if (forceDebug) console.log(formatLogMessage('debug', `Browser connection check failed: ${connErr.message}`));
|
|
4652
4820
|
}
|
|
4653
4821
|
|
|
4654
|
-
|
|
4655
|
-
|
|
4656
|
-
|
|
4657
|
-
|
|
4658
|
-
|
|
4659
|
-
|
|
4660
|
-
|
|
4661
|
-
|
|
4662
|
-
|
|
4822
|
+
// Obscura: just disconnect, don't kill — we don't own the browser process
|
|
4823
|
+
let cleanupResult;
|
|
4824
|
+
if (browser._nwssIsObscura) {
|
|
4825
|
+
try { await browser.disconnect(); } catch {}
|
|
4826
|
+
cleanupResult = { success: true, browserClosed: true, tempFilesCleanedCount: 0, userDataCleaned: false, errors: [] };
|
|
4827
|
+
if (forceDebug) console.log(formatLogMessage('debug', `Disconnected from Obscura (process left running)`));
|
|
4828
|
+
} else {
|
|
4829
|
+
cleanupResult = await handleBrowserExit(browser, {
|
|
4830
|
+
forceDebug,
|
|
4831
|
+
timeout: 10000,
|
|
4832
|
+
exitOnFailure: true,
|
|
4833
|
+
cleanTempFiles: true,
|
|
4834
|
+
comprehensiveCleanup: removeTempFiles,
|
|
4835
|
+
userDataDir: browser._nwssUserDataDir,
|
|
4836
|
+
verbose: !silentMode && removeTempFiles
|
|
4837
|
+
});
|
|
4838
|
+
}
|
|
4663
4839
|
|
|
4664
4840
|
if (forceDebug) {
|
|
4665
4841
|
console.log(formatLogMessage('debug', `Final cleanup results: ${cleanupResult.success ? 'success' : 'failed'}`));
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fanboynz/network-scanner",
|
|
3
|
-
"version": "2.0.
|
|
3
|
+
"version": "2.0.61",
|
|
4
4
|
"description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
|
|
5
5
|
"main": "nwss.js",
|
|
6
6
|
"scripts": {
|