@fanboynz/network-scanner 2.0.60 → 2.0.62

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,27 @@
2
2
 
3
3
  All notable changes to the Network Scanner (nwss.js) project.
4
4
 
5
+ ## [2.0.61] - 2026-03-17
6
+
7
+ ### Added
8
+ - `.nwssconfig` file for per-config-file CLI settings — define output, concurrency, flags per JSON config
9
+ - `--no-color` / `--no-colour` flag to disable colors (colors now enabled by default)
10
+ - Navigation timeout fallback — retries with `waitUntil: networkidle2` on timeout, 10s cap
11
+ - Skip domains after 3 consecutive timeouts in the same scan to avoid wasting time on down sites
12
+ - Fingerprint cache capped at 500 entries with LRU eviction
13
+
14
+ ### Fixed
15
+ - `chrome-error://` popup redirects no longer throw errors — continue processing captured requests
16
+ - Suppressed noisy `about:blank` and `chrome-error://` redirect warnings (visible with `--debug` only)
17
+ - Fallback retry skipped for `chrome-error://` redirects (instant failure, not genuine timeout)
18
+ - Page URL checked before fallback retry to detect already-failed state
19
+ - `.nwssconfig` keys support both hyphens and underscores (`dns-cache` and `dns_cache` both work)
20
+
21
+ ### Improved
22
+ - Colors enabled by default — no need for `--color` flag or `color: true` in `.nwssconfig`
23
+ - Chrome UA bumped to 146, Firefox UA bumped to 148
24
+ - Sec-CH-UA headers updated to match Chrome 146
25
+
5
26
  ## [2.0.60] - 2026-03-16
6
27
 
7
28
  ### Added
package/README.md CHANGED
@@ -29,7 +29,7 @@ A Puppeteer-based tool for scanning websites to find third-party (or optionally
29
29
  |:---------------------------|:------------|
30
30
  | `-o, --output <file>` | Output file for rules. If omitted, prints to console |
31
31
  | `--compare <file>` | Remove rules that already exist in this file before output |
32
- | `--color, --colour` | Enable colored console output for status messages |
32
+ | `--no-color, --no-colour` | Disable colored console output (colors enabled by default) |
33
33
  | `--append` | Append new rules to output file instead of overwriting (requires `-o`) |
34
34
 
35
35
  ### Output Format Options
@@ -64,6 +64,9 @@ A Puppeteer-based tool for scanning websites to find third-party (or optionally
64
64
  | `--headful` | Launch browser with GUI (not headless) |
65
65
  | `--keep-open` | Keep browser and tabs open after scan completes (use with `--headful` for debugging) |
66
66
  | `--use-puppeteer-core` | Use `puppeteer-core` with system Chrome instead of bundled Chromium |
67
+ | `--load-extension <path>` | Load unpacked Chrome extension from directory (can be used multiple times) |
68
+ | `--dns-cache` | Persist dig/whois results to disk between runs (14hr TTL, `.digcache`/`.whoiscache`) |
69
+ | `--block-ads=<files>` | Block ads using EasyList format rules (comma-separated: `easylist.txt,easyprivacy.txt`) |
67
70
  | `--cdp` | Enable Chrome DevTools Protocol logging (now per-page if enabled) |
68
71
  | `--remove-dupes` | Remove duplicate domains from output (only with `-o`) |
69
72
  | `--dry-run` | Console output only: show matching regex, titles, whois/dig/searchstring results, and adblock rules |
@@ -404,6 +407,53 @@ If a proxy fails mid-scan, Chromium's error code is detected and diagnosed:
404
407
 
405
408
  Detected error codes: `ERR_PROXY_CONNECTION_FAILED`, `ERR_SOCKS_CONNECTION_FAILED`, `ERR_TUNNEL_CONNECTION_FAILED`, `ERR_PROXY_AUTH_UNSUPPORTED`, `ERR_PROXY_AUTH_REQUESTED`, `ERR_SOCKS_CONNECTION_HOST_UNREACHABLE`, `ERR_PROXY_CERTIFICATE_INVALID`, `ERR_NO_SUPPORTED_PROXIES`.
406
409
 
410
+ ---
411
+
412
+ ## .nwssconfig — Per-Config Settings
413
+
414
+ Create a `.nwssconfig` file in the project root to define CLI settings per config file. When a config filename matches a key, those settings are automatically applied. CLI flags merge with and override `.nwssconfig` settings.
415
+
416
+ ```json
417
+ {
418
+ "configs": {
419
+ "config-clean1.json": {
420
+ "output": "outputfile.txt",
421
+ "max_concurrent": 30,
422
+ "dns_cache": true,
423
+ "cache_requests": true,
424
+ "dumpurls": true,
425
+ "remove_tempfiles": true,
426
+ "color": true
427
+ },
428
+ "config-clean2.json": {
429
+ "output": "outputfile.txt",
430
+ "max_concurrent": 15,
431
+ "dns_cache": true,
432
+ "cache_requests": true,
433
+ "dumpurls": true,
434
+ "remove_tempfiles": true,
435
+ "color": true,
436
+ "debug": true,
437
+ "block_ads": "easylist.txt,easyprivacy.txt"
438
+ }
439
+ }
440
+ }
441
+ ```
442
+
443
+ **Usage:**
444
+
445
+ ```bash
446
+ node nwss.js config-clean1.json # uses .nwssconfig settings
447
+ node nwss.js config-clean2.json --debug # .nwssconfig + debug override
448
+ node nwss.js config-other.json --max-concurrent 5 # no match in .nwssconfig, uses CLI flags
449
+ ```
450
+
451
+ **Supported settings:** `output`, `max_concurrent`, `dns_cache`, `cache_requests`, `dumpurls`, `remove_tempfiles`, `color`, `remove_dupes`, `compress_logs`, `debug`, `silent`, `verbose`, `headful`, `keep_open`, `dry_run`, `titles`, `sub_domains`, `no_interact`, `ghost_cursor`, `plain`, `cdp`, `dnsmasq`, `unbound`, `privoxy`, `pihole`, `eval_on_doc`, `use_puppeteer_core`, `ignore_cache`, `clear_cache`, `block_ads`, `compare`, `localhost`, `append`.
452
+
453
+ **Priority:** CLI flags > `.nwssconfig` > hardcoded defaults.
454
+
455
+ ---
456
+
407
457
  ### Global Configuration Options
408
458
 
409
459
  These options go at the root level of your config.json:
package/lib/colorize.js CHANGED
@@ -7,7 +7,9 @@
7
7
  * @returns {boolean} True if --color or --colour flag is present
8
8
  */
9
9
  function shouldEnableColors() {
10
- return process.argv.includes('--color') || process.argv.includes('--colour');
10
+ // Colors enabled by default, use --no-color to disable
11
+ if (process.argv.includes('--no-color') || process.argv.includes('--no-colour')) return false;
12
+ return true;
11
13
  }
12
14
 
13
15
  // Initialize color support based on command line flags
@@ -22,6 +22,7 @@ function seededRandom(seed) {
22
22
 
23
23
  // Cache fingerprints per domain so reloads and multi-page visits stay consistent
24
24
  const _fingerprintCache = new Map();
25
+ const FINGERPRINT_CACHE_MAX = 500;
25
26
 
26
27
  // Type-specific property spoofing functions for monomorphic optimization
27
28
  // Built-in properties that should not be modified
@@ -32,12 +33,12 @@ const BUILT_IN_PROPERTIES = new Set([
32
33
 
33
34
  // User agent collections with latest versions
34
35
  const USER_AGENT_COLLECTIONS = Object.freeze(new Map([
35
- ['chrome', "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36"],
36
- ['chrome_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36"],
37
- ['chrome_linux', "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36"],
38
- ['firefox', "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:145.0) Gecko/20100101 Firefox/145.0"],
39
- ['firefox_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:145.0) Gecko/20100101 Firefox/145.0"],
40
- ['firefox_linux', "Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Gecko/20100101 Firefox/145.0"],
36
+ ['chrome', "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"],
37
+ ['chrome_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"],
38
+ ['chrome_linux', "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"],
39
+ ['firefox', "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:148.0) Gecko/20100101 Firefox/148.0"],
40
+ ['firefox_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:148.0) Gecko/20100101 Firefox/148.0"],
41
+ ['firefox_linux', "Mozilla/5.0 (X11; Linux x86_64; rv:148.0) Gecko/20100101 Firefox/148.0"],
41
42
  ['safari', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.6 Safari/605.1.15"]
42
43
  ]));
43
44
 
@@ -237,7 +238,12 @@ function generateRealisticFingerprint(userAgent, domain = '') {
237
238
  };
238
239
 
239
240
  // Cache for this domain
240
- if (domain) _fingerprintCache.set(domain, fingerprint);
241
+ if (domain) {
242
+ if (_fingerprintCache.size >= FINGERPRINT_CACHE_MAX) {
243
+ _fingerprintCache.delete(_fingerprintCache.keys().next().value);
244
+ }
245
+ _fingerprintCache.set(domain, fingerprint);
246
+ }
241
247
 
242
248
  return fingerprint;
243
249
  }
@@ -495,7 +501,7 @@ async function applyUserAgentSpoofing(page, siteConfig, forceDebug, currentUrl)
495
501
  }),
496
502
  getManifest: () => ({
497
503
  name: "Chrome",
498
- version: "145.0.0.0",
504
+ version: "146.0.0.0",
499
505
  manifest_version: 3,
500
506
  description: "Chrome Browser"
501
507
  }),
@@ -1609,18 +1615,41 @@ async function applyUserAgentSpoofing(page, siteConfig, forceDebug, currentUrl)
1609
1615
  }, 'enhanced mouse/pointer spoofing');
1610
1616
 
1611
1617
  safeExecute(() => {
1612
- // Filter DevTools/automation traces from console.debug
1618
+ // Neutralize CDP fingerprinting traps and filter DevTools traces
1619
+ // CDP's Runtime.enable causes the inspector to read properties on console-logged objects.
1620
+ // Detection scripts exploit this via console.debug with Error objects (custom .stack getters)
1621
+ // or objects with Proxy prototypes. Only override console.debug — safest, minimal footprint.
1622
+
1613
1623
  const originalConsoleDebug = console.debug;
1614
1624
  console.debug = function(...args) {
1625
+ // Filter DevTools-related messages
1615
1626
  const message = args.join(' ');
1616
1627
  if (typeof message === 'string' && (
1617
1628
  message.includes('DevTools') ||
1618
1629
  message.includes('Runtime.evaluate') ||
1619
1630
  message.includes('Page.addScriptToEvaluateOnNewDocument') ||
1620
1631
  message.includes('Protocol error'))) {
1621
- return; // Silently drop DevTools-related debug messages
1632
+ return;
1622
1633
  }
1623
- return originalConsoleDebug.apply(this, args);
1634
+ // Sanitize args to neutralize CDP fingerprinting traps
1635
+ const sanitized = args.map(arg => {
1636
+ // Strip Error objects with custom .stack getters (CDP inspector reads .stack)
1637
+ if (arg instanceof Error) {
1638
+ const desc = Object.getOwnPropertyDescriptor(arg, 'stack');
1639
+ if (desc && desc.get) return `${arg.name}: ${arg.message}`;
1640
+ }
1641
+ // Neutralize Proxy prototype traps (CDP inspector walks prototype chain)
1642
+ if (arg !== null && typeof arg === 'object') {
1643
+ try {
1644
+ const proto = Object.getPrototypeOf(arg);
1645
+ if (proto && proto !== Object.prototype && proto !== Array.prototype) {
1646
+ try { Object.keys(proto); } catch { return '[object Object]'; }
1647
+ }
1648
+ } catch { return '[object Object]'; }
1649
+ }
1650
+ return arg;
1651
+ });
1652
+ return originalConsoleDebug.apply(this, sanitized);
1624
1653
  };
1625
1654
 
1626
1655
  }, 'console error suppression');
@@ -1670,6 +1699,7 @@ async function applyUserAgentSpoofing(page, siteConfig, forceDebug, currentUrl)
1670
1699
  if (typeof window.Image === 'function') maskAsNative(window.Image, 'Image');
1671
1700
  if (typeof window.fetch === 'function') maskAsNative(window.fetch, 'fetch');
1672
1701
  if (typeof window.PointerEvent === 'function') maskAsNative(window.PointerEvent, 'PointerEvent');
1702
+ if (typeof console.debug === 'function') maskAsNative(console.debug, 'debug');
1673
1703
 
1674
1704
  // Mask property getters on navigator
1675
1705
  const navProps = ['userAgentData', 'connection', 'pdfViewerEnabled', 'webdriver',
@@ -867,14 +867,14 @@ class SmartCache {
867
867
  netToolsCacheSize: this.netToolsCache.size,
868
868
  similarityCacheSize: this.similarityCache.size,
869
869
  regexCacheSize: this.regexCache.size,
870
- requestHitRate: this._enableRequest ?
870
+ requestHitRate: (this._enableRequest && this.requestCache) ?
871
871
  (requestHitRate * 100).toFixed(2) + '%' : '0% (disabled)',
872
- requestCacheSize: this._enableRequest ? this.requestCache.size : 0,
873
- requestCacheMemoryMB: this._enableRequest ?
872
+ requestCacheSize: (this._enableRequest && this.requestCache) ? this.requestCache.size : 0,
873
+ requestCacheMemoryMB: (this._enableRequest && this.requestCache) ?
874
874
  Math.round((this.requestCache.calculatedSize || 0) / 1048576) : 0,
875
- totalCacheEntries: this.domainCache.size + this.patternCache.size +
876
- this.responseCache.size + this.netToolsCache.size +
877
- this.similarityCache.size + this.regexCache.size + (this._enableRequest ? this.requestCache.size : 0),
875
+ totalCacheEntries: this.domainCache.size + this.patternCache.size +
876
+ this.responseCache.size + this.netToolsCache.size +
877
+ this.similarityCache.size + this.regexCache.size + ((this._enableRequest && this.requestCache) ? this.requestCache.size : 0),
878
878
  memoryUsageMB: Math.round(heapUsed / 1048576),
879
879
  memoryMaxMB: Math.round(maxHeap / 1048576),
880
880
  memoryUsagePercent: ((heapUsed / maxHeap) * 100).toFixed(1) + '%',
package/nwss.js CHANGED
@@ -2,7 +2,8 @@
2
2
 
3
3
  // puppeteer for browser automation, fs for file system operations, psl for domain parsing.
4
4
  // const pLimit = require('p-limit'); // Will be dynamically imported
5
- const usePuppeteerCore = process.argv.includes('--use-puppeteer-core');
5
+ const useObscura = process.argv.includes('--use-obscura');
6
+ const usePuppeteerCore = process.argv.includes('--use-puppeteer-core') || useObscura;
6
7
  const puppeteer = usePuppeteerCore ? require('puppeteer-core') : require('puppeteer');
7
8
  const fs = require('fs');
8
9
  const os = require('os');
@@ -104,12 +105,12 @@ const CONCURRENCY_LIMITS = Object.freeze({
104
105
 
105
106
  // V8 Optimization: Use Map for user agent lookups instead of object
106
107
  const USER_AGENTS = Object.freeze(new Map([
107
- ['chrome', "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36"],
108
- ['chrome_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36"],
109
- ['chrome_linux', "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36"],
110
- ['firefox', "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:145.0) Gecko/20100101 Firefox/145.0"],
111
- ['firefox_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:145.0) Gecko/20100101 Firefox/145.0"],
112
- ['firefox_linux', "Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Gecko/20100101 Firefox/145.0"],
108
+ ['chrome', "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"],
109
+ ['chrome_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"],
110
+ ['chrome_linux', "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"],
111
+ ['firefox', "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:148.0) Gecko/20100101 Firefox/148.0"],
112
+ ['firefox_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:148.0) Gecko/20100101 Firefox/148.0"],
113
+ ['firefox_linux', "Mozilla/5.0 (X11; Linux x86_64; rv:148.0) Gecko/20100101 Firefox/148.0"],
113
114
  ['safari', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.6 Safari/605.1.15"]
114
115
  ]));
115
116
 
@@ -177,6 +178,82 @@ if (args.length === 0) {
177
178
  args.push('--help');
178
179
  }
179
180
 
181
+ // --- .nwssconfig support: inject per-config settings into args ---
182
+ const NWSSCONFIG_PATH = path.join(__dirname, '.nwssconfig');
183
+ if (fs.existsSync(NWSSCONFIG_PATH)) {
184
+ try {
185
+ const nwssConfig = JSON.parse(fs.readFileSync(NWSSCONFIG_PATH, 'utf-8'));
186
+ // Find which config file is being used (--custom-json <file> or positional .json arg)
187
+ const customJsonIdx = args.findIndex(arg => arg === '--custom-json');
188
+ const configFilename = (customJsonIdx !== -1 && args[customJsonIdx + 1])
189
+ ? args[customJsonIdx + 1]
190
+ : args.find(a => a.endsWith('.json') && !a.startsWith('--'));
191
+
192
+ if (configFilename && nwssConfig.configs && nwssConfig.configs[configFilename]) {
193
+ const settings = nwssConfig.configs[configFilename];
194
+ const originalArgs = args.join(' ');
195
+
196
+ // Map settings keys to CLI flags — only inject if not already in args
197
+ const settingsMap = {
198
+ output: ['-o', '--output'],
199
+ max_concurrent: ['--max-concurrent'],
200
+ dns_cache: ['--dns-cache'],
201
+ cache_requests: ['--cache-requests'],
202
+ dumpurls: ['--dumpurls'],
203
+ remove_tempfiles: ['--remove-tempfiles'],
204
+ color: ['--color'],
205
+ remove_dupes: ['--remove-dupes', '--remove-dubes'],
206
+ 'remove-dupes': ['--remove-dupes', '--remove-dubes'],
207
+ 'remove-dubes': ['--remove-dupes', '--remove-dubes'],
208
+ compress_logs: ['--compress-logs'],
209
+ debug: ['--debug'],
210
+ silent: ['--silent'],
211
+ verbose: ['--verbose'],
212
+ headful: ['--headful'],
213
+ keep_open: ['--keep-open'],
214
+ dry_run: ['--dry-run'],
215
+ titles: ['--titles'],
216
+ sub_domains: ['--sub-domains'],
217
+ no_interact: ['--no-interact'],
218
+ ghost_cursor: ['--ghost-cursor'],
219
+ plain: ['--plain'],
220
+ cdp: ['--cdp'],
221
+ dnsmasq: ['--dnsmasq'],
222
+ unbound: ['--unbound'],
223
+ privoxy: ['--privoxy'],
224
+ pihole: ['--pihole'],
225
+ eval_on_doc: ['--eval-on-doc'],
226
+ use_puppeteer_core: ['--use-puppeteer-core'],
227
+ ignore_cache: ['--ignore-cache'],
228
+ clear_cache: ['--clear-cache'],
229
+ block_ads: ['--block-ads'],
230
+ compare: ['--compare'],
231
+ localhost: ['--localhost'],
232
+ append: ['--append']
233
+ };
234
+
235
+ for (const [key, flags] of Object.entries(settingsMap)) {
236
+ // Support both underscore and hyphen variants (e.g. dns_cache or dns-cache)
237
+ const value = settings[key] !== undefined ? settings[key]
238
+ : settings[key.replace(/_/g, '-')] !== undefined ? settings[key.replace(/_/g, '-')]
239
+ : settings[key.replace(/-/g, '_')] !== undefined ? settings[key.replace(/-/g, '_')]
240
+ : undefined;
241
+ if (value === undefined) continue;
242
+ // Skip if any variant of the flag is already in CLI args
243
+ if (flags.some(f => originalArgs.includes(f))) continue;
244
+
245
+ if (typeof value === 'boolean') {
246
+ if (value) args.push(flags[flags.length - 1]);
247
+ } else if (typeof value === 'string' || typeof value === 'number') {
248
+ args.push(flags[flags.length - 1], String(value));
249
+ }
250
+ }
251
+ }
252
+ } catch (e) {
253
+ console.error(`Warning: Failed to parse .nwssconfig: ${e.message}`);
254
+ }
255
+ }
256
+
180
257
  const headfulMode = args.includes('--headful');
181
258
  const SOURCES_FOLDER = 'sources';
182
259
 
@@ -565,6 +642,12 @@ Request Blocking:
565
642
  --block-ads=<file> Block ads/trackers using EasyList format rules (||domain.com^, /ads/*, etc)
566
643
  Works at request-level for maximum performance
567
644
 
645
+ Per-config settings file (.nwssconfig):
646
+ Place a .nwssconfig file in the project root to define per-config settings.
647
+ When a config filename matches a key in .nwssconfig, those settings are used.
648
+ CLI flags merge with and override .nwssconfig settings.
649
+ See README.md for format details.
650
+
568
651
  General Options:
569
652
  --verbose Force verbose mode globally
570
653
  --debug Force debug mode globally
@@ -580,6 +663,8 @@ General Options:
580
663
  --headful Launch browser with GUI (not headless)
581
664
  --keep-open Keep browser open after scan completes (use with --headful)
582
665
  --use-puppeteer-core Use puppeteer-core with system Chrome instead of bundled Chromium
666
+ --use-obscura Connect to running Obscura CDP server (ws://127.0.0.1:9222 or OBSCURA_WS env)
667
+ Skips fingerprint injection — Obscura provides built-in stealth
583
668
  --load-extension <path> Load unpacked Chrome extension from directory
584
669
  --cdp Enable Chrome DevTools Protocol logging (now per-page if enabled)
585
670
  --remove-dupes Remove duplicate domains from output (only with -o)
@@ -788,6 +873,23 @@ const globalBlockedRegexes = Array.isArray(globalBlocked)
788
873
  ? globalBlocked.map(pattern => new RegExp(pattern))
789
874
  : [];
790
875
 
876
+ // Cache compiled regexes by pattern string — avoids recompiling same patterns across URLs
877
+ const _compiledRegexCache = new Map();
878
+ function getCompiledRegex(pattern) {
879
+ let compiled = _compiledRegexCache.get(pattern);
880
+ if (!compiled) {
881
+ compiled = new RegExp(pattern.replace(/^\/(.*)\/$/, '$1'));
882
+ if (_compiledRegexCache.size > 2000) _compiledRegexCache.clear();
883
+ _compiledRegexCache.set(pattern, compiled);
884
+ }
885
+ return compiled;
886
+ }
887
+ function getCompiledRegexes(patterns) {
888
+ if (!patterns) return [];
889
+ const arr = Array.isArray(patterns) ? patterns : [patterns];
890
+ return arr.map(p => getCompiledRegex(p));
891
+ }
892
+
791
893
  // Pre-split ignoreDomains into exact Set (O(1) lookup) and wildcard array
792
894
  const _ignoreDomainsExact = new Set();
793
895
  const _ignoreDomainsWildcard = [];
@@ -1116,12 +1218,19 @@ if (forceDebug && globalComments) {
1116
1218
  * @param {string} url - The URL string to parse.
1117
1219
  * @returns {string} The root domain, or the original hostname if parsing fails (e.g., for IP addresses or invalid URLs), or an empty string on error.
1118
1220
  */
1221
+ const _rootDomainCache = new Map();
1119
1222
  function getRootDomain(url) {
1223
+ const cached = _rootDomainCache.get(url);
1224
+ if (cached !== undefined) return cached;
1120
1225
  try {
1121
1226
  const { hostname } = new URL(url);
1122
1227
  const parsed = psl.parse(hostname);
1123
- return parsed.domain || hostname;
1228
+ const result = parsed.domain || hostname;
1229
+ if (_rootDomainCache.size > 5000) _rootDomainCache.clear();
1230
+ _rootDomainCache.set(url, result);
1231
+ return result;
1124
1232
  } catch {
1233
+ _rootDomainCache.set(url, '');
1125
1234
  return '';
1126
1235
  }
1127
1236
  }
@@ -1416,6 +1525,23 @@ function setupFrameHandling(page, forceDebug) {
1416
1525
  * @returns {Promise<import('puppeteer').Browser>} Browser instance
1417
1526
  */
1418
1527
  async function createBrowser(extraArgs = []) {
1528
+ // Obscura mode: connect to a running Obscura CDP server instead of launching Chrome
1529
+ if (useObscura) {
1530
+ const obscuraEndpoint = process.env.OBSCURA_WS || 'ws://127.0.0.1:9222/devtools/browser';
1531
+ if (forceDebug) console.log(formatLogMessage('debug', `Connecting to Obscura at ${obscuraEndpoint}`));
1532
+ try {
1533
+ const browser = await puppeteer.connect({ browserWSEndpoint: obscuraEndpoint });
1534
+ if (!silentMode) console.log(messageColors.success(`Connected to Obscura CDP at ${obscuraEndpoint}`));
1535
+ browser._nwssUserDataDir = null; // No temp dir to clean
1536
+ browser._nwssIsObscura = true;
1537
+ return browser;
1538
+ } catch (err) {
1539
+ console.error(formatLogMessage('error', `Failed to connect to Obscura: ${err.message}`));
1540
+ console.error(formatLogMessage('error', `Start Obscura first: obscura serve --port 9222 --stealth`));
1541
+ process.exit(1);
1542
+ }
1543
+ }
1544
+
1419
1545
  // Create temporary user data directory that we can fully control and clean up
1420
1546
  const tempUserDataDir = path.join(os.tmpdir(), `puppeteer-${Date.now()}-${Math.random().toString(36).substring(7)}`);
1421
1547
  userDataDir = tempUserDataDir; // Store for cleanup tracking (use outer scope variable)
@@ -2231,11 +2357,16 @@ function setupFrameHandling(page, forceDebug) {
2231
2357
  }
2232
2358
 
2233
2359
  // --- Apply all fingerprint spoofing (user agent, Brave, fingerprint protection) ---
2360
+ // Skip when using Obscura — it has built-in stealth that conflicts with our injection
2234
2361
  try {
2235
- await applyAllFingerprintSpoofing(page, siteConfig, forceDebug, currentUrl);
2362
+ if (!useObscura) {
2363
+ await applyAllFingerprintSpoofing(page, siteConfig, forceDebug, currentUrl);
2364
+ } else if (forceDebug) {
2365
+ console.log(formatLogMessage('debug', `Skipping fingerprint injection — Obscura provides built-in stealth`));
2366
+ }
2236
2367
 
2237
- // Client Hints protection for Chrome user agents
2238
- if (siteConfig.userAgent && siteConfig.userAgent.toLowerCase().includes('chrome')) {
2368
+ // Client Hints protection for Chrome user agents (skipped under Obscura — it sets its own)
2369
+ if (!useObscura && siteConfig.userAgent && siteConfig.userAgent.toLowerCase().includes('chrome')) {
2239
2370
  const userAgentKey = siteConfig.userAgent.toLowerCase();
2240
2371
  let platform = 'Windows';
2241
2372
  let platformVersion = '15.0.0';
@@ -2252,14 +2383,14 @@ function setupFrameHandling(page, forceDebug) {
2252
2383
  }
2253
2384
 
2254
2385
  await page.setExtraHTTPHeaders({
2255
- 'Sec-CH-UA': '"Not:A-Brand";v="99", "Google Chrome";v="145", "Chromium";v="145"',
2386
+ 'Sec-CH-UA': '"Not:A-Brand";v="99", "Google Chrome";v="146", "Chromium";v="146"',
2256
2387
  'Sec-CH-UA-Platform': `"${platform}"`,
2257
2388
  'Sec-CH-UA-Platform-Version': `"${platformVersion}"`,
2258
2389
  'Sec-CH-UA-Mobile': '?0',
2259
2390
  'Sec-CH-UA-Arch': `"${arch}"`,
2260
2391
  'Sec-CH-UA-Bitness': '"64"',
2261
- 'Sec-CH-UA-Full-Version': '"145.0.7632.160"',
2262
- 'Sec-CH-UA-Full-Version-List': '"Not:A-Brand";v="99.0.0.0", "Google Chrome";v="145.0.7632.160", "Chromium";v="145.0.7632.160"'
2392
+ 'Sec-CH-UA-Full-Version': '"146.0.0.0"',
2393
+ 'Sec-CH-UA-Full-Version-List': '"Not:A-Brand";v="99.0.0.0", "Google Chrome";v="146.0.0.0", "Chromium";v="146.0.0.0"'
2263
2394
  });
2264
2395
  }
2265
2396
  } catch (fingerprintErr) {
@@ -2272,11 +2403,7 @@ function setupFrameHandling(page, forceDebug) {
2272
2403
  }
2273
2404
  }
2274
2405
 
2275
- const regexes = Array.isArray(siteConfig.filterRegex)
2276
- ? siteConfig.filterRegex.map(r => new RegExp(r.replace(/^\/(.*)\/$/, '$1')))
2277
- : siteConfig.filterRegex
2278
- ? [new RegExp(siteConfig.filterRegex.replace(/^\/(.*)\/$/, '$1'))]
2279
- : [];
2406
+ const regexes = getCompiledRegexes(siteConfig.filterRegex);
2280
2407
 
2281
2408
  // NEW: Get regex_and setting (defaults to false for backward compatibility)
2282
2409
  const useRegexAnd = siteConfig.regex_and === true;
@@ -2424,7 +2551,7 @@ function setupFrameHandling(page, forceDebug) {
2424
2551
  }
2425
2552
 
2426
2553
  const blockedRegexes = Array.isArray(siteConfig.blocked)
2427
- ? siteConfig.blocked.map(pattern => new RegExp(pattern))
2554
+ ? siteConfig.blocked.map(pattern => getCompiledRegex(pattern))
2428
2555
  : [];
2429
2556
 
2430
2557
  // Combine site-specific with pre-compiled global blocked patterns
@@ -3201,7 +3328,23 @@ function setupFrameHandling(page, forceDebug) {
3201
3328
  ? { ...defaultGotoOptions, ...siteConfig.goto_options } : defaultGotoOptions;
3202
3329
 
3203
3330
  // Enhanced navigation with redirect handling - passes existing gotoOptions
3204
- const navigationResult = await navigateWithRedirectHandling(page, currentUrl, siteConfig, gotoOptions, forceDebug, formatLogMessage);
3331
+ let navigationResult;
3332
+ try {
3333
+ navigationResult = await navigateWithRedirectHandling(page, currentUrl, siteConfig, gotoOptions, forceDebug, formatLogMessage);
3334
+ } catch (navErr) {
3335
+ // Only retry on genuine timeouts, not chrome-error:// redirects
3336
+ let pageUrl = '';
3337
+ try { if (!page.isClosed()) pageUrl = page.url(); } catch {}
3338
+ const isPopupFailure = navErr.message.includes('chrome-error://') || navErr.message.includes('invalid URL') ||
3339
+ pageUrl.startsWith('chrome-error://') || pageUrl === 'about:blank';
3340
+ if ((navErr.message.includes('timeout') || navErr.message.includes('Timeout')) && !isPopupFailure) {
3341
+ if (forceDebug) console.log(formatLogMessage('debug', `Navigation timeout, retrying with waitUntil:networkidle2 for ${currentUrl}`));
3342
+ const fallbackOptions = { ...gotoOptions, waitUntil: 'networkidle2', timeout: Math.min(timeout, 10000) };
3343
+ navigationResult = await navigateWithRedirectHandling(page, currentUrl, siteConfig, fallbackOptions, forceDebug, formatLogMessage);
3344
+ } else {
3345
+ throw navErr;
3346
+ }
3347
+ }
3205
3348
 
3206
3349
  const { finalUrl, redirected, redirectChain, originalUrl, redirectDomains } = navigationResult;
3207
3350
 
@@ -3257,7 +3400,8 @@ function setupFrameHandling(page, forceDebug) {
3257
3400
  }
3258
3401
 
3259
3402
  if (originalDomain !== finalDomain) {
3260
- if (!silentMode) {
3403
+ const isPopupRedirect = !finalUrl || finalUrl === 'about:blank' || finalUrl.startsWith('chrome-error://');
3404
+ if (!silentMode && !isPopupRedirect) {
3261
3405
  console.log(`🔄 Redirect detected: ${originalDomain} → ${finalDomain}`);
3262
3406
  }
3263
3407
 
@@ -3284,13 +3428,11 @@ function setupFrameHandling(page, forceDebug) {
3284
3428
  }
3285
3429
  }
3286
3430
  } else {
3287
- // Invalid final URL - don't update currentUrl, treat as failed redirect
3288
- console.warn(`⚠ Redirect to invalid URL ignored: ${originalDomain} → ${finalUrl}`);
3431
+ // Invalid final URL (ad popup redirect) - continue with original URL
3289
3432
  if (forceDebug) {
3290
- console.log(formatLogMessage('debug', `Redirect chain ended with invalid URL, keeping original: ${originalUrl}`));
3433
+ console.log(formatLogMessage('debug', `Popup redirect ignored: ${originalDomain} ${finalUrl}, keeping original: ${originalUrl}`));
3291
3434
  }
3292
- // Keep processing with the original URL or throw an error
3293
- throw new Error(`Redirect resulted in invalid URL: ${finalUrl}`);
3435
+ // Continue with original URL requests captured before the redirect are still valid
3294
3436
  }
3295
3437
  }
3296
3438
  }
@@ -3431,7 +3573,10 @@ function setupFrameHandling(page, forceDebug) {
3431
3573
  const timeoutResult = await handleRedirectTimeout(page, currentUrl, err, safeGetDomain, forceDebug, formatLogMessage);
3432
3574
 
3433
3575
  if (timeoutResult.success) {
3434
- console.log(`⚠ Partial redirect timeout recovered: ${safeGetDomain(currentUrl)} → ${safeGetDomain(timeoutResult.finalUrl)}`);
3576
+ const isPopupRedirect = timeoutResult.finalUrl && (timeoutResult.finalUrl === 'about:blank' || timeoutResult.finalUrl.startsWith('chrome-error://'));
3577
+ if (!isPopupRedirect) {
3578
+ console.log(`⚠ Partial redirect timeout recovered: ${safeGetDomain(currentUrl)} → ${safeGetDomain(timeoutResult.finalUrl)}`);
3579
+ }
3435
3580
  currentUrl = timeoutResult.finalUrl; // Use the partial redirect URL
3436
3581
  siteCounter++;
3437
3582
  // Continue processing with the redirected URL instead of throwing error
@@ -4060,6 +4205,10 @@ function setupFrameHandling(page, forceDebug) {
4060
4205
  }
4061
4206
  }
4062
4207
 
4208
+ // Track domain timeout counts — skip domain after 3 failures
4209
+ const domainTimeoutCounts = new Map();
4210
+ const DOMAIN_TIMEOUT_THRESHOLD = 3;
4211
+
4063
4212
  // Enhanced hang detection with browser restart recovery
4064
4213
  let currentBatchInfo = { batchStart: 0, batchSize: 0 };
4065
4214
  let lastProcessedCount = 0;
@@ -4285,8 +4434,17 @@ function setupFrameHandling(page, forceDebug) {
4285
4434
  console.log(formatLogMessage('debug', `[CONCURRENCY] Starting ${batchSize} concurrent tasks with limit ${MAX_CONCURRENT_SITES}`));
4286
4435
  }
4287
4436
 
4288
- // Create tasks with timeout protection
4289
- const batchTasks = currentBatch.map(task => originalLimit(() => processUrl(task.url, task.config, browser)));
4437
+ // Create tasks with timeout protection — skip domains that repeatedly timed out
4438
+ const batchTasks = currentBatch.map(task => originalLimit(() => {
4439
+ try {
4440
+ const taskDomain = new URL(task.url).hostname;
4441
+ if ((domainTimeoutCounts.get(taskDomain) || 0) >= DOMAIN_TIMEOUT_THRESHOLD) {
4442
+ if (!silentMode) console.log(formatLogMessage('info', `Skipping ${task.url} — ${taskDomain} timed out ${DOMAIN_TIMEOUT_THRESHOLD} times`));
4443
+ return { url: task.url, rules: [], success: false, error: 'Domain repeatedly timed out', skipped: true };
4444
+ }
4445
+ } catch {}
4446
+ return processUrl(task.url, task.config, browser);
4447
+ }));
4290
4448
 
4291
4449
  let batchResults;
4292
4450
  try {
@@ -4316,6 +4474,16 @@ function setupFrameHandling(page, forceDebug) {
4316
4474
  }
4317
4475
  }
4318
4476
 
4477
+ // Track domain timeout counts — skip after threshold
4478
+ for (const result of batchResults) {
4479
+ if (!result.success && !result.skipped && result.error && result.error.includes('timeout')) {
4480
+ try {
4481
+ const domain = new URL(result.url).hostname;
4482
+ domainTimeoutCounts.set(domain, (domainTimeoutCounts.get(domain) || 0) + 1);
4483
+ } catch {}
4484
+ }
4485
+ }
4486
+
4319
4487
  // IMPROVED: Much more conservative emergency restart logic
4320
4488
  const criticalRestartCount = batchResults.filter(r => r.needsImmediateRestart).length;
4321
4489
  // Require either:
@@ -4651,15 +4819,23 @@ function setupFrameHandling(page, forceDebug) {
4651
4819
  if (forceDebug) console.log(formatLogMessage('debug', `Browser connection check failed: ${connErr.message}`));
4652
4820
  }
4653
4821
 
4654
- const cleanupResult = await handleBrowserExit(browser, {
4655
- forceDebug,
4656
- timeout: 10000,
4657
- exitOnFailure: true,
4658
- cleanTempFiles: true,
4659
- comprehensiveCleanup: removeTempFiles, // Use --remove-tempfiles flag
4660
- userDataDir: browser._nwssUserDataDir,
4661
- verbose: !silentMode && removeTempFiles // Show verbose output only if removing temp files and not silent
4662
- });
4822
+ // Obscura: just disconnect, don't kill — we don't own the browser process
4823
+ let cleanupResult;
4824
+ if (browser._nwssIsObscura) {
4825
+ try { await browser.disconnect(); } catch {}
4826
+ cleanupResult = { success: true, browserClosed: true, tempFilesCleanedCount: 0, userDataCleaned: false, errors: [] };
4827
+ if (forceDebug) console.log(formatLogMessage('debug', `Disconnected from Obscura (process left running)`));
4828
+ } else {
4829
+ cleanupResult = await handleBrowserExit(browser, {
4830
+ forceDebug,
4831
+ timeout: 10000,
4832
+ exitOnFailure: true,
4833
+ cleanTempFiles: true,
4834
+ comprehensiveCleanup: removeTempFiles,
4835
+ userDataDir: browser._nwssUserDataDir,
4836
+ verbose: !silentMode && removeTempFiles
4837
+ });
4838
+ }
4663
4839
 
4664
4840
  if (forceDebug) {
4665
4841
  console.log(formatLogMessage('debug', `Final cleanup results: ${cleanupResult.success ? 'success' : 'failed'}`));
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fanboynz/network-scanner",
3
- "version": "2.0.60",
3
+ "version": "2.0.62",
4
4
  "description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
5
5
  "main": "nwss.js",
6
6
  "scripts": {