@fanboynz/network-scanner 2.0.66 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,41 +1,170 @@
1
1
  name: Publish to NPM
2
+
2
3
  on:
3
4
  workflow_dispatch:
5
+ inputs:
6
+ version_bump:
7
+ description: 'Version bump type'
8
+ required: true
9
+ default: 'patch'
10
+ type: choice
11
+ options:
12
+ - patch
13
+ - minor
14
+ - major
15
+ - explicit
16
+ explicit_version:
17
+ description: 'Explicit version (required if version_bump = explicit, e.g. 3.0.0)'
18
+ required: false
19
+ default: ''
20
+ release_notes_source:
21
+ description: 'GitHub Release body source'
22
+ required: true
23
+ default: 'changelog'
24
+ type: choice
25
+ options:
26
+ - changelog
27
+ - manual
28
+ manual_release_notes:
29
+ description: 'Manual release notes (used if release_notes_source = manual; supports markdown)'
30
+ required: false
31
+ default: ''
32
+ prerelease:
33
+ description: 'Mark GitHub Release as pre-release'
34
+ required: false
35
+ default: false
36
+ type: boolean
4
37
 
5
38
  jobs:
6
39
  publish:
7
40
  runs-on: ubuntu-latest
8
41
  permissions:
9
42
  contents: write
10
-
43
+
11
44
  steps:
12
45
  - uses: actions/checkout@v5
13
46
  with:
14
47
  token: ${{ secrets.GITHUB_TOKEN }}
15
48
  fetch-depth: 0
16
-
49
+
17
50
  - name: Setup Node.js
51
+ # Must be >=22.12.0 to satisfy package.json engines.node and to
52
+ # support require()-of-ESM for puppeteer 25 (the project's
53
+ # current floor). Bumping below this re-introduces the EBADENGINE
54
+ # warnings seen on 20.x runners and will fail npm publish for
55
+ # consumers on the same Node version.
18
56
  uses: actions/setup-node@v5
19
57
  with:
20
- node-version: '20'
58
+ node-version: '22'
21
59
  registry-url: 'https://registry.npmjs.org'
22
-
60
+
23
61
  - run: npm ci
24
62
  - run: npm run lint
25
-
63
+
26
64
  - name: Configure git
27
65
  run: |
28
66
  git config user.name "github-actions[bot]"
29
67
  git config user.email "github-actions[bot]@users.noreply.github.com"
30
-
31
- - name: Version and publish
68
+
69
+ - name: Validate explicit-version input
70
+ if: inputs.version_bump == 'explicit'
71
+ run: |
72
+ if [ -z "${{ inputs.explicit_version }}" ]; then
73
+ echo "::error::version_bump=explicit but explicit_version is empty"
74
+ exit 1
75
+ fi
76
+ # Loose semver shape check -- npm version will do the strict validation
77
+ if ! echo "${{ inputs.explicit_version }}" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+([+-].+)?$'; then
78
+ echo "::error::explicit_version '${{ inputs.explicit_version }}' does not look like semver (e.g. 3.0.0)"
79
+ exit 1
80
+ fi
81
+
82
+ - name: Bump version + promote CHANGELOG Unreleased
83
+ id: version
84
+ run: |
85
+ # Use --no-git-tag-version so we can fold the CHANGELOG promotion
86
+ # into the same commit + tag. Otherwise the auto-commit npm creates
87
+ # would not include the changelog edit (and amending afterwards
88
+ # would orphan the tag).
89
+ if [ "${{ inputs.version_bump }}" = "explicit" ]; then
90
+ npm version "${{ inputs.explicit_version }}" --no-git-tag-version
91
+ else
92
+ npm version "${{ inputs.version_bump }}" --no-git-tag-version
93
+ fi
94
+ NEW_VERSION=$(node -p "require('./package.json').version")
95
+ echo "version=$NEW_VERSION" >> "$GITHUB_OUTPUT"
96
+ DATE=$(date -u +%Y-%m-%d)
97
+ echo "date=$DATE" >> "$GITHUB_OUTPUT"
98
+
99
+ # If CHANGELOG has an [Unreleased] section, rename its header to
100
+ # the new version + today's date. Idempotent: skips if the header
101
+ # is already a versioned one (e.g. for re-runs after a failure).
102
+ if grep -q '^## \[Unreleased\]$' CHANGELOG.md; then
103
+ sed -i "s/^## \[Unreleased\]\$/## [$NEW_VERSION] - $DATE/" CHANGELOG.md
104
+ echo "Promoted [Unreleased] -> [$NEW_VERSION] - $DATE"
105
+ else
106
+ echo "::warning::No [Unreleased] section in CHANGELOG.md; skipping promotion. Release notes will use whatever already exists at [$NEW_VERSION]."
107
+ fi
108
+
109
+ # Single combined commit + tag so the tag points at HEAD containing
110
+ # BOTH the package.json bump AND the changelog promotion.
111
+ git add package.json package-lock.json CHANGELOG.md
112
+ git commit -m "$NEW_VERSION"
113
+ git tag "v$NEW_VERSION"
114
+
115
+ - name: Extract release notes from CHANGELOG
116
+ id: changelog_notes
117
+ if: inputs.release_notes_source == 'changelog'
118
+ run: |
119
+ NEW_VERSION="${{ steps.version.outputs.version }}"
120
+ # Pull every line between `## [VERSION]` and the next `## [` header.
121
+ # awk: flag=1 after the start line; flag=0 at the next ## [ header.
122
+ NOTES=$(awk -v v="$NEW_VERSION" '
123
+ $0 ~ "^## \\[" v "\\]" { flag = 1; next }
124
+ $0 ~ "^## \\[" { flag = 0 }
125
+ flag { print }
126
+ ' CHANGELOG.md)
127
+
128
+ if [ -z "$(echo "$NOTES" | tr -d '[:space:]')" ]; then
129
+ echo "::warning::No CHANGELOG section found for $NEW_VERSION -- release body will be empty"
130
+ fi
131
+
132
+ # Multi-line GITHUB_OUTPUT requires a delimited heredoc
133
+ {
134
+ echo "notes<<NOTES_EOF"
135
+ echo "$NOTES"
136
+ echo "NOTES_EOF"
137
+ } >> "$GITHUB_OUTPUT"
138
+
139
+ - name: Use manual release notes
140
+ id: manual_notes
141
+ if: inputs.release_notes_source == 'manual'
32
142
  run: |
33
- npm version patch
34
- npm publish
143
+ {
144
+ echo "notes<<NOTES_EOF"
145
+ echo "${{ inputs.manual_release_notes }}"
146
+ echo "NOTES_EOF"
147
+ } >> "$GITHUB_OUTPUT"
148
+
149
+ - name: Publish to npm
150
+ # Publish BEFORE pushing to GitHub: if npm rejects (auth/network/
151
+ # name-collision), nothing reaches GitHub and a re-run is clean.
152
+ run: npm publish
35
153
  env:
36
154
  NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
37
-
38
- - name: Push changes
155
+
156
+ - name: Push commits and tag
39
157
  run: git push --follow-tags
40
158
  env:
41
159
  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
160
+
161
+ - name: Create GitHub Release
162
+ uses: softprops/action-gh-release@v2
163
+ with:
164
+ tag_name: v${{ steps.version.outputs.version }}
165
+ name: v${{ steps.version.outputs.version }}
166
+ body: ${{ inputs.release_notes_source == 'changelog' && steps.changelog_notes.outputs.notes || steps.manual_notes.outputs.notes }}
167
+ draft: false
168
+ prerelease: ${{ inputs.prerelease }}
169
+ env:
170
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
package/CHANGELOG.md CHANGED
@@ -2,6 +2,170 @@
2
2
 
3
3
  All notable changes to the Network Scanner (nwss.js) project.
4
4
 
5
+ ## [3.0.1] - 2026-05-24
6
+
7
+ ### Security
8
+ - **Proxy credentials redacted in debug logs** — `lib/proxy.js` `getProxyInfo()` now replaces the `username:password@` segment with `[redacted]@` before logging; `lib/socks-relay.js` strips the username from both the relay-startup log (`auth: [redacted]` / `no auth`) and the close log (regex-trims the `:username` suffix from the relay key, IPv6-safe). Prior output exposed SOCKS5 credentials to anyone the user shared a debug dump, screenshot, or support ticket with.
9
+
10
+ ### Added
11
+ - `scripts/test-stealth.js` — stealth smoke-test harness. Launches Puppeteer with `applyAllFingerprintSpoofing` applied and reports what bot.sannysoft.com / creepjs / browserleaks.com/javascript concluded. Flags: `--headful`, `--no-spoof` (baseline), `--ua=<family>` (validated against `USER_AGENT_COLLECTIONS`), `--format=json` (stable schema for diff/jq A/B), `--help`, positional target filtering. `PUPPETEER_NO_SANDBOX=1` env-var opt-in for CI/root containers (sandbox is on by default). Caught 3 real bugs that 5 rounds of static review missed.
12
+ - `USER_AGENT_COLLECTIONS` exported from `lib/fingerprint.js` — single source of truth for valid UA families, consumed by the test harness so the list isn't duplicated.
13
+
14
+ ### Fixed
15
+ - **Puppeteer 25 compatibility** — `browser.isConnected()` (removed in Puppeteer 25 per [puppeteer#14910](https://github.com/puppeteer/puppeteer/pull/14910)) replaced with the `browser.connected` property at 14 call sites across 6 files. Compatible with both Puppeteer 24 and 25.
16
+ - **Fingerprint own-goal — PHANTOM_PROPERTIES + SELENIUM_DRIVER** — spoofing did `delete window[prop]` followed by `defineProperty(prop, { get: () => undefined })`. The undefined-returning getters left the properties detectable via the `in` operator, defeating the delete. Now only deletes. (caught by `scripts/test-stealth.js` sannysoft)
17
+ - **`navigator.plugins instanceof PluginArray` failed** — the spoof returned a plain array. Now `Object.setPrototypeOf(pluginsArray, PluginArray.prototype)` with fallback to `Object.getPrototypeOf(navigator.plugins)` for environments where `PluginArray` isn't a global.
18
+ - **`navigator.plugins[0].toString() === '[object Plugin]'` failed** — plain plugin objects returned `[object Object]`. Each plugin now wraps via `Object.create(Plugin.prototype)` with `Symbol.toStringTag` fallback.
19
+ - **`window.chrome` descriptor was a fingerprinting tell** — had `writable: false, enumerable: false`; real Chrome has both `true`. Aligned.
20
+ - **`_fingerprintCache` cross-UA poisoning** — was keyed by domain only, so the same domain visited under a different UA returned cached values from the wrong OS. Now keyed by `${domain}|${userAgent}`.
21
+ - **7 broken regex patterns** in the fingerprint error-suppression list — double backslashes (`\\.X`) parsed as literal-backslash + wildcard and never matched real errors. All 7 repaired.
22
+ - Constructor `.name` / `.length` preserved through 5 wrapper sites (Error, Image, RTCPeerConnection, PointerEvent, WheelEvent) — wrapped ctors had `.name = ''` and `.length = 0`, a fingerprinting tell.
23
+ - `Error` static properties (`stackTraceLimit`, `captureStackTrace`, `prepareStackTrace`) forward to the OriginalError via live getter/setter instead of snapshot-copy (snapshot diverged once any caller mutated the wrapped Error).
24
+ - `navigator.connection` fallback returns a closure-captured stable object — was re-allocating per call, so object identity changed every access.
25
+ - `chrome.runtime.getManifest()` derives version from the spoofed UA instead of returning a hardcoded older version.
26
+
27
+ ### Improved
28
+ - `isBrowserDead` helper extracted — deduped 3 spoof sites that hand-rolled the same `isConnected`/`closed` check.
29
+ - `preserveCtorIdentity` helper added — applied at the 5 wrapper sites above.
30
+ - GPU pool seeded by `domain + ':gpu'` (was just `domain`) — keeps per-domain GPU stable while decoupling it from any other per-domain seed we might add.
31
+ - 10 dead module-level exports trimmed from `lib/fingerprint.js`.
32
+ - `safeDefinePropertyLocal` forces `configurable: true` instead of merging it from the caller's descriptor (caller-side opt-in was unreliable).
33
+
34
+ ## [3.0.0] - 2026-05-23
35
+
36
+ ### Changed
37
+ - **Engines floor bumped**: `engines.node` from `>=22.0.0` to `>=22.12.0` to match Puppeteer 25's stable `require()`-of-ESM requirement. Anyone running on Node 22.0–22.11 will see an npm engine warning and should upgrade.
38
+ - **Puppeteer dependency floor bumped**: `puppeteer` and `puppeteer-core` from `>=20.0.0` to `>=24.0.0`. Range still permits both v24 and v25 — pick via `npm install puppeteer@24` or `npm install puppeteer@25` according to taste. Dev lockfile moved to `puppeteer@25.0.4`.
39
+ - Audit confirms no breaking-change impact from Puppeteer 25's `executablePath`/`defaultArgs` Promise return — neither is called in this codebase. `require('puppeteer')` continues to work on the now-ESM-only package thanks to Node 22.12+'s stable require-of-ESM.
40
+
41
+ ### Added
42
+ - `blockDomainsByUrl` config key (top-level) — regex patterns mirroring `ignoreDomainsByUrl` but for active blocking. A matching request URL triggers Puppeteer `request.abort()` on the triggering request, the request's root domain, and all subsequent requests to that domain or its subdomains for the rest of the scan
43
+ - Cloudflare aggregate stats accessible via `getAggregateStats({reset})` — returns `byOutcome`, `bySolveMethod`, `maxDurationMs`, `avgDurationMs`, `failures`, `timedOut` counts; bumped on every URL regardless of debug mode
44
+ - Cloudflare per-stage timing breakdown in outcome lines: `q=Xms p=Xms c=Xms` (zero-stage suffixes omitted)
45
+ - Production-level Cloudflare outcome logs: `warn` severity for `!overallSuccess || timedOut`, `info` for 5xx origin-error pages, debug-only on success
46
+ - DNS pre-check positive-resolution shortcut — hosts already proven live by dig or whois within the cache TTL skip the c-ares pre-check via a `knownResolvedHostnames` index (also warmed at startup from disk-loaded dig/whois caches)
47
+ - DNS pre-check skip summary now reports both NXDOMAIN-cache and positive-cache savings: `DNS pre-check skipped: N URL(s) via M unresolvable host(s), N URL(s) via M resolved host(s)`
48
+ - `[blocked-stats]` per-pattern hit counters reported at scan end — surfaces which `blocked` patterns are doing work vs. which are stale
49
+ - `disable_adblock` per-site config flag to escape global ad-blocking layers
50
+ - `capture_popups` now runs whois/dig validation on matched popup URLs
51
+ - `lib/spawn-async.js` shared async-spawn helper module — consolidates 4 near-identical Promise wrappers across curl/grep/searchstring
52
+
53
+ ### Fixed
54
+ - **Security**: nettools shell-injection vector closed — `exec(string)` replaced with `execFile(cmd, args)` (no shell); config-supplied `whois_server` and `recordType` values can no longer execute commands via `$()`/backticks/etc.
55
+ - Cloudflare `detectChallengeLoop` off-by-one bug — counted the current URL against itself, tripping `>= 2` threshold one iteration early
56
+ - Cloudflare `detectChallengeLoop` threshold was unreachable with default `cloudflare_max_retries = 2`; new exact-match path catches reload-to-same-URL loops at attempt 2
57
+ - Cloudflare outcome cache namespace collision — now stored in a separate Map (was sharing keys with the detection cache, getting evicted by detection-cache pressure)
58
+ - `ignoreDomains` dynamic Set didn't cascade to subdomains — `ignoreDomainsByUrl` dynamic adds now apply parent-walk just like static config (e.g. dynamically-ignored `example.com` now also catches `cdn.example.com`)
59
+ - `blocked` / `blockDomainsByUrl` / `ignoreDomainsByUrl` regex compile failures unified — was silent-drop for *byUrl and hard-throw for blocked; now all warn loudly with `[config] X pattern dropped (compile error): "..." -- regex msg` and continue
60
+ - adblock pattern-cache key mismatch — anchored patterns (`||example.com`) were missing their own cache because get/set used different keys
61
+ - grep AND-logic silently dropped non-matching rules; ENOBUFS silently truncated output on large pages
62
+ - Cloudflare debug logs rendered literal `"undefined"` when detection short-circuited on non-HTTP pages (popup → about:blank case)
63
+ - Outcome label `no_indicators` was lying when detection short-circuited on non-HTTP page URL; now correctly reports `skipped(non-http)`
64
+ - Cloudflare `handleLegacyCheckbox` selector list aligned with detection — dropped orphan `.cf-turnstile input[type="checkbox"]` selector that had no matching detection entry
65
+ - Cloudflare `safeWaitForNavigation` warn was unconditional; now `forceDebug`-gated (was spamming stderr on phishing-bypass nav failures in production)
66
+ - Cloudflare `enhancedParallelChallengeDetection` had zero callers — deleted
67
+ - `analyzeCloudflareChallenge` ignored managed-challenge signals (`.cf-managed-challenge`, `[data-cf-managed]`); now folded into `isChallengePresent`
68
+ - `isChallengeCompleted` double-queried the same DOM element; cached once
69
+ - Various correctness fixes across compare (inline hosts-comment stripping), curl, dry-run, flowproxy (error-path bug, cookie parsing), referrer, searchstring, validate_rules modules
70
+ - 30+ dead exports trimmed across nettools (11), cloudflare (18 → then re-trimmed after refactor), adblock, adblock-rust, compare, dry-run
71
+
72
+ ### Improved
73
+ - Dig/whois cache TTL 14h → 20h, capacity 1000 → 2000 entries each — covers overnight scan-then-rescan cadence without forcing fresh lookups
74
+ - nettools disk-cache writes now atomic (tmp + rename) — surviving SIGKILL/OOM/power-loss mid-write no longer leaves a truncated file that wipes the cache on next load
75
+ - Corrupt `.digcache`/`.whoiscache` files surface a `[dns-cache] X was unreadable (...); starting fresh` warn instead of silently resetting
76
+ - `dnsCacheStats.freshDig`/`freshWhois` arrays capped at 1000 entries (FIFO) — no more unbounded growth on scans with thousands of unique fresh lookups
77
+ - nettools `enableDiskCache` made idempotent (uses the previously-dead `diskCacheEnabled` flag); also warms the resolved-hostnames index from loaded entries
78
+ - 200+ log sites unified through `formatLogMessage` + subsystem tags across cloudflare, adblock, adblock-rust, compare, ignore_similar, validate_rules, wireguard_vpn, dry-run, smart-cache, flowproxy, browserexit, redirect, post-processing, cdp, output, interaction modules
79
+ - Cloudflare `runWithRetries` helper extracted — verification-challenge and phishing-warning retry harnesses collapsed from ~150 lines of duplication to thin hook-driven wrappers
80
+ - Cloudflare 14-line debug block in `handleVerificationChallenge` collapsed to one structured line: `Challenge detected: turnstile=t js=f ... title="..."`
81
+ - Cloudflare timing constants pruned (4 dead, 1 dead local var); `waitForTimeout(page, ms)` renamed to `fastTimeout(ms)`, unused `page` arg dropped
82
+ - Cloudflare `attemptChallengeSolve` post-failure diagnostic + `JS challenge` body.textContent now capped (2KB) per poll — was materializing MB on content-heavy pages
83
+ - adblock-rust: zero-copy deserialize, eager buffer release, FIFOCache rename for honest naming
84
+ - `interaction.js` performance: ~350ms saved per no-click interaction, ~750ms per with-click
85
+ - nwss per-URL timeout 120s → 75s for faster hang recovery
86
+ - Popup handler honors both `ignoreDomainsByUrl` and `blockDomainsByUrl`
87
+ - Early `ignoreDomains` gate added at main request handler — skips dig/whois/regex cycles on ignored hostnames
88
+ - `--dns-cache` help text refreshed (was stale "3hr/4hr TTL"; now "20h TTL, 2000-entry cap each")
89
+
90
+ ## [2.0.66] - 2026-05-20
91
+
92
+ ### Added
93
+ - DNS pre-check before `page.goto()` to skip unresolvable hosts fast — `--no-dns-precheck` to disable
94
+ - In-process SOCKS5 auth relay so `socks5://user:pass@host` URLs work end-to-end
95
+ - socks-relay handshake-phase watchdog so stalled clients can't sit forever
96
+ - DNS pre-check EAI_AGAIN retry-once + FIFO cap on negative cache
97
+
98
+ ### Fixed
99
+ - proxy.js: SOCKS auth false-success + SOCKS4 remote-DNS footgun
100
+ - DNS pre-check was starving under scan load (`dns.lookup` queued behind Puppeteer's libuv threadpool); switched to `dns.resolve` (c-ares, no threadpool contention)
101
+ - DNS pre-check: clear the timeout timer when lookup wins the race
102
+ - Bumped `ws` override to >=8.20.1 (CVE-2026-45736, GHSA-58qx-3vcg-4xpx)
103
+
104
+ ### Improved
105
+ - Neutralize Fullscreen API so sites can't hijack the window in `--headful` mode
106
+ - socks-relay: disable Nagle + reject unoffered no-auth selection
107
+
108
+ ## [2.0.65] - 2026-05-15
109
+
110
+ ### Added
111
+ - Cloudflare 5xx origin-error page detection — recognizes `<domain> | 5xx: <reason>` titles, marks as `error_page(522)` etc. instead of treating as a bypass target
112
+ - Per-URL Cloudflare outcome summary log with cookie state + error-code signal
113
+ - HTTP status + cf-ray captured at `page.goto()` time and threaded through to the Cloudflare outcome line
114
+ - Surface Cloudflare 5xx origin-error page count in scan stats
115
+ - HANG CHECK: per-URL progress counter + per-URL timeout + short-circuit queued URLs on restart flag
116
+ - Surface adblock-rust engine stats in debug exit output
117
+
118
+ ### Fixed
119
+ - HANG CHECK detection logic was debug-gated and never fired in production
120
+ - `--validate-config` TDZ crash by moving block below config load
121
+ - Scan-exit hang: cleanups now run on normal completion (was relying on `process.exit(0)` to skip them)
122
+ - nettools: pending-lookup leak + signal-handler conflict with nwss.js cleanup
123
+ - cloudflare: null-safe error categorization, unref'd cache timer, body.textContent reuse
124
+ - Suppressed contradictory "no indicators / error page detected" log pair
125
+
126
+ ### Improved
127
+ - cloudflare: precompile skip-proto regex, combine within-category selectors, rename outcome key
128
+ - redirect.js: skip `detectCommonJSRedirects` in production, cap `outerHTML`, filter `chrome-error://`
129
+ - Cloudflare module banner + "no indicators" log deduped (was firing once per URL)
130
+ - npm update: adblock-rs, lru-cache, puppeteer patch bumps
131
+ - Removed dead `scanner-script-org.js` prototype
132
+
133
+ ## [2.0.64] - 2026-05-02
134
+
135
+ ### Added
136
+ - `--adblock-engine=rust` option using Brave's adblock-rs (faster on large filter lists; requires `npm install adblock-rs`)
137
+ - Cache hygiene: atomic write, version key, 30-day prune, JSDoc
138
+
139
+ ### Fixed
140
+ - adblock-rs always returning `no_match` (4th arg to `engine.check` was missing — caused silent total-block-failure)
141
+ - Drop existsSync before readFileSync in cache load path (avoids redundant stat + TOCTOU)
142
+
143
+ ### Improved
144
+ - Reduce wrapper memory: zero-copy deserialize, eager buffer release
145
+ - Bumped `engines.node` floor to >=22
146
+ - npm update: `p-limit` 4.0 → 7.x (ESM API unchanged), `lru-cache` 10.4 → 11.3 (drop-in), `globals` 16.5 → 17.6 (dev-dep), `eslint` patch bump
147
+ - V8 micro-opts in adblock-rs hot path (null-proto resource-type map, bound engine.check)
148
+
149
+ ## [2.0.63] - 2026-04-25
150
+
151
+ ### Added
152
+ - `ignoreDomainsByUrl` config (top-level) — regex patterns; if any request URL matches, the request's root domain is dynamically ignored for the rest of the scan
153
+ - Redirect source and matching regex now included in `adblock_rules` log titles
154
+
155
+ ### Fixed
156
+ - Positional `.json` arg was ignored by config loader (always defaulted to `config.json`)
157
+ - ReferenceError on `allowedResourceTypes` in debug log
158
+ - ReferenceError on `matchedRegexPattern` in even_blocked path
159
+
160
+ ### Improved
161
+ - Convert resourceTypes filter to Set for O(1) lookups in hot path
162
+ - Sample `config.json` filterRegex values updated
163
+
164
+ ## [2.0.62] - 2026-04-25
165
+
166
+ ### Fixed
167
+ - TypeError in `SmartCache.getStats` when `requestCache` fails to initialize
168
+
5
169
  ## [2.0.61] - 2026-03-17
6
170
 
7
171
  ### Added
package/CLAUDE.md CHANGED
@@ -4,46 +4,57 @@ Puppeteer-based network scanner for analyzing web traffic, generating adblock fi
4
4
 
5
5
  ## Project Structure
6
6
 
7
- - `nwss.js` — Main entry point (~4,600 lines). CLI args, URL processing, orchestration.
7
+ - `nwss.js` — Main entry point (~5,800 lines). CLI args, URL processing, orchestration.
8
8
  - `config.json` — Default scan configuration (sites, filters, options).
9
- - `lib/` — 28 focused, single-purpose modules:
9
+ - `lib/` — 32 focused, single-purpose modules:
10
10
  - `fingerprint.js` — Bot detection evasion (device/GPU/timezone spoofing)
11
11
  - `cloudflare.js` — Cloudflare challenge detection and solving
12
12
  - `browserhealth.js` — Memory management and browser lifecycle
13
13
  - `interaction.js` — Human-like mouse/scroll/typing simulation
14
+ - `ghost-cursor.js` — Bezier-curve cursor pathing for human-like mouse movement
14
15
  - `smart-cache.js` — Multi-layer caching with persistence
15
16
  - `nettools.js` — WHOIS/dig integration
16
17
  - `output.js` — Multi-format rule output (adblock, dnsmasq, unbound, pihole, etc.)
17
18
  - `proxy.js` — SOCKS5/HTTP proxy support
19
+ - `socks-relay.js` — Local SOCKS proxy relay/chain helper
18
20
  - `wireguard_vpn.js` / `openvpn_vpn.js` — VPN routing
19
- - `adblock.js` — Adblock filter parsing and validation
21
+ - `adblock.js` — Adblock filter parsing and validation (native JS engine)
22
+ - `adblock-rust.js` — Drop-in adblock.js replacement backed by Brave's `adblock-rs` Rust engine; same matcher shape (`shouldBlock`, `getStats`, `rules`) so callers swap with one `require()`
20
23
  - `validate_rules.js` — Domain and rule format validation
21
24
  - `colorize.js` — Console output formatting and colors
22
25
  - `domain-cache.js` — Domain detection cache for performance
23
26
  - `post-processing.js` — Result cleanup and deduplication
27
+ - `spawn-async.js` — Shared `runProcess(cmd, args, opts)` helper used by curl/grep/searchstring; resolves (never rejects) with `{code, signal, stdout, stderr, truncated, error}`, enforces timeout + stdout caps
24
28
  - `redirect.js`, `referrer.js`, `cdp.js`, `curl.js`, `grep.js`, `compare.js`, `compress.js`, `dry-run.js`, `browserexit.js`, `clear_sitedata.js`, `flowproxy.js`, `ignore_similar.js`, `searchstring.js`
25
29
  - `.github/workflows/npm-publish.yml` — Automated npm publishing
26
30
  - `nwss.1` — Man page
27
31
 
28
32
  ## Tech Stack
29
33
 
30
- - **Node.js** >=22.0.0
31
- - **puppeteer** >=20.0.0 — Headless browser automation
32
- - **psl** — Public Suffix List for domain parsing
34
+ - **Node.js** >=22.12.0 (required for stable `require()` of ESM-only puppeteer 25)
35
+ - **puppeteer** >=24.0.0 — Headless browser automation. Range permits both v24 and v25; dev lockfile is on v25.
36
+ - **psl** — Public Suffix List for domain parsing (prefer this over hand-curated TLD lists)
33
37
  - **lru-cache** — LRU cache implementation
34
38
  - **p-limit** — Concurrency limiting (dynamically imported)
39
+ - **adblock-rs** — Optional native Rust filter engine, used by `lib/adblock-rust.js`. Install with `npm install adblock-rs` (requires Rust toolchain). Not a hard dep — `lib/adblock.js` is the default.
35
40
  - **eslint** — Linting (`npm run lint`)
36
41
 
37
42
  ## Conventions
38
43
 
39
44
  - Store modular functionality in `./lib/` with focused, single-purpose modules
40
45
  - Use `messageColors` and `formatLogMessage` from `./lib/colorize` for consistent console output
46
+ - Prefix every log line with a subsystem tag, e.g. `const TAG = messageColors.processing('[adblock]');` then `formatLogMessage('warn', `${TAG} ...`)`. Keeps mixed-module output attributable; every module in `lib/` follows this — match it when adding new ones.
47
+ - Pick severities deliberately: `warn` for actual errors/failures (cache write fail, native exception), `debug` for diagnostic chatter (cache misses, parse summaries, per-match traces)
41
48
  - Implement timeout protection for all Puppeteer operations using `Promise.race` patterns
42
49
  - Handle browser lifecycle with comprehensive cleanup in try-finally blocks
43
50
  - Validate all external tool availability before use (grep, curl, whois, dig)
44
51
  - Use `forceDebug` flag for detailed logging, `silentMode` for minimal output
45
52
  - Use `Object.freeze` for constant configuration objects (TIMEOUTS, CACHE_LIMITS, CONCURRENCY_LIMITS)
46
- - Use `fastTimeout(ms)` helper instead of `node:timers/promises` for Puppeteer 22.x compatibility
53
+ - Use `fastTimeout(ms)` helper instead of `node:timers/promises` for delays — project convention since the Puppeteer 22.x `page.waitForTimeout` removal, retained as the standard for all Promise-based sleeps
54
+ - Prefer `runProcess` from `./lib/spawn-async` over bare `child_process.spawn`/`spawnSync` for new external-tool calls. It resolves (never rejects), enforces a SIGKILL timeout + stdout cap, and returns a uniform result object. `lib/wireguard_vpn.js` intentionally stays on `spawnSync` — startup-only validation paths where sync is simpler. Don't follow that exception unless you have the same justification.
55
+ - Prefer `net.isIP()` over hand-rolled IPv4/IPv6 regexes for IP validation
56
+ - For disk-cache writes use the atomic `tmpPath = path + '.' + pid + '.tmp'` + `fs.renameSync` pattern (see `lib/adblock-rust.js`) so a killed process never leaves a half-written cache file
57
+ - Keep `module.exports` minimal — trim helpers that have no external consumers (grep the repo before deciding); internal-only functions stay as functions but leave the exports surface
47
58
 
48
59
  ## Running
49
60
 
@@ -55,6 +66,28 @@ node nwss.js --dry-run # Preview without network calls
55
66
  node nwss.js --headful # Launch with browser GUI
56
67
  ```
57
68
 
69
+ ## Stealth Testing
70
+
71
+ `scripts/test-stealth.js` is a smoke-test harness for the fingerprint spoofing
72
+ stack. Launches Puppeteer with `applyAllFingerprintSpoofing` applied (same
73
+ call shape nwss.js uses), navigates to public bot-detection pages, and
74
+ reports what they concluded. Use it to A/B a stealth change — run before the
75
+ edit, run after, diff. Found 3 real bugs that 5 rounds of static review
76
+ missed (PHANTOM/SELENIUM own-goal, PluginArray instanceof, Plugin toString).
77
+
78
+ ```bash
79
+ node scripts/test-stealth.js # all targets, human-readable
80
+ node scripts/test-stealth.js sannysoft # one target
81
+ node scripts/test-stealth.js --no-spoof # baseline (spoof disabled)
82
+ node scripts/test-stealth.js --format=json # machine-readable for diff/jq
83
+ node scripts/test-stealth.js --help # full flag list
84
+ ```
85
+
86
+ Set `PUPPETEER_NO_SANDBOX=1` when running as root (CI containers). Off by
87
+ default so local dev doesn't silently drop the sandbox. The harness depends
88
+ on `USER_AGENT_COLLECTIONS` exported from `lib/fingerprint.js` — keep that
89
+ export in sync if the UA list changes.
90
+
58
91
  ## Files to Ignore
59
92
 
60
93
  - `node_modules/**`
package/README.md CHANGED
@@ -66,7 +66,8 @@ A Puppeteer-based tool for scanning websites to find third-party (or optionally
66
66
  | `--use-puppeteer-core` | Use `puppeteer-core` with system Chrome instead of bundled Chromium |
67
67
  | `--use-obscura` | Connect to running Obscura CDP server (`ws://127.0.0.1:9222` or `OBSCURA_WS` env). Skips fingerprint injection — Obscura provides built-in stealth |
68
68
  | `--load-extension <path>` | Load unpacked Chrome extension from directory (can be used multiple times) |
69
- | `--dns-cache` | Persist dig/whois results to disk between runs (14hr TTL, `.digcache`/`.whoiscache`) |
69
+ | `--dns-cache` | Persist dig/whois results to disk between runs (20hr TTL, 2000-entry cap each, `.digcache`/`.whoiscache`). Disk writes are atomic (tmp + rename); corrupt cache files are detected on load with a `[dns-cache]` warn line and reset cleanly. |
70
+ | `--no-dns-precheck` | Disable per-URL DNS resolution check before page navigation. By default, hosts that dig/whois have already proven live (within the 20hr cache TTL) skip their c-ares pre-check via a positive-resolution index. |
70
71
  | `--block-ads=<files>` | Block ads using EasyList format rules (comma-separated: `easylist.txt,easyprivacy.txt`) |
71
72
  | `--cdp` | Enable Chrome DevTools Protocol logging (now per-page if enabled) |
72
73
  | `--remove-dupes` | Remove duplicate domains from output (only with `-o`) |
@@ -101,6 +102,12 @@ Example:
101
102
  "googleapis.com",
102
103
  "googletagmanager.com"
103
104
  ],
105
+ "ignoreDomainsByUrl": [
106
+ "\\/jwplayer\\/"
107
+ ],
108
+ "blockDomainsByUrl": [
109
+ "\\/tracker\\/"
110
+ ],
104
111
  "sites": [
105
112
  {
106
113
  "url": "https://example.com/",
@@ -461,9 +468,10 @@ These options go at the root level of your config.json:
461
468
 
462
469
  | Field | Values | Default | Description |
463
470
  |:---------------------|:-------|:-------:|:------------|
464
- | `ignoreDomains` | Array | - | Domains to completely ignore (supports wildcards like `*.ads.com`) |
465
- | `ignoreDomainsByUrl` | Array | - | Regex patterns; if a request URL matches, the request's root domain is dynamically ignored for the rest of the scan (e.g. `["\\/jwplayer\\/", "\\/build\\/assets\\/"]`) |
466
- | `blocked` | Array | - | Global regex patterns to block requests (combined with per-site blocked) |
471
+ | `ignoreDomains` | Array | - | Domains to completely ignore (supports wildcards like `*.ads.com`). Subdomains of any listed entry are also ignored via parent-walk (e.g. `example.com` ignores `cdn.example.com` and `a.b.example.com`). |
472
+ | `ignoreDomainsByUrl` | Array | - | Regex patterns; if a request URL matches, the request's root domain is dynamically ignored for the rest of the scan AND any subsequent request to its subdomains (cascade matches the static `ignoreDomains` semantic). Example: `["\\/jwplayer\\/", "\\/build\\/assets\\/"]` |
473
+ | `blockDomainsByUrl` | Array | - | Symmetric to `ignoreDomainsByUrl` but for active blocking. Regex patterns; if a request URL matches, the request's root domain is added to a dynamic block set and ALL subsequent requests on that root (and subdomains) are aborted via Puppeteer for the rest of the scan. The triggering request itself is also aborted. Use when seeing a trigger URL is sufficient evidence the whole host is hostile. |
474
+ | `blocked` | Array | - | Global regex patterns to block requests (combined with per-site blocked). Patterns that fail to compile are warned about at scan start (`[config] blocked (global) pattern dropped (compile error): ...`) instead of crashing startup or silently disappearing. Per-pattern hit counts are reported at scan end via `[blocked-stats]` lines so stale patterns are easy to spot. |
467
475
  | `whois_server_mode` | String | `"random"` | Default server selection mode for all sites |
468
476
  | `ignore_similar` | Boolean | `true` | Ignore domains similar to already found domains |
469
477
  | `ignore_similar_threshold` | Integer | `80` | Similarity threshold percentage for ignore_similar |
@@ -782,4 +790,21 @@ your_username ALL=(root) NOPASSWD: /usr/bin/wg-quick, /usr/bin/wg
782
790
  - Ghost-cursor (`cursor_mode: "ghost"`) is optional — install with `npm i ghost-cursor`. Falls back to built-in mouse if not installed
783
791
  - Ghost-cursor duration defaults to `interact_duration` (or 2000ms), capped by the 15s hard timeout
784
792
 
793
+ ## Stealth Testing
794
+
795
+ `scripts/test-stealth.js` is a developer-facing smoke test for the fingerprint spoofing stack in `lib/fingerprint.js`. It launches Puppeteer with the same `applyAllFingerprintSpoofing` call that `nwss.js` uses, navigates to public bot-detection pages, and reports what they concluded — pass/warn/fail counts from sannysoft, trust score from creepjs, raw navigator values from browserleaks.
796
+
797
+ Use it to A/B a stealth change: run before the edit, run after, diff the output. Not a unit test — it doesn't assert; it reports.
798
+
799
+ ```bash
800
+ node scripts/test-stealth.js # all targets, human-readable
801
+ node scripts/test-stealth.js sannysoft # one target only
802
+ node scripts/test-stealth.js --no-spoof # baseline (spoof disabled)
803
+ node scripts/test-stealth.js --ua=firefox # spoof a different UA family
804
+ node scripts/test-stealth.js --format=json # machine-readable for diff/jq
805
+ node scripts/test-stealth.js --help # full flag list
806
+ ```
807
+
808
+ Set `PUPPETEER_NO_SANDBOX=1` when running as root (CI containers, some Docker setups). Off by default so local dev doesn't silently drop the Chromium sandbox.
809
+
785
810
  ---
@@ -10,6 +10,10 @@ const fs = require('fs');
10
10
  const path = require('path');
11
11
  const os = require('os');
12
12
  const crypto = require('crypto');
13
+ const { formatLogMessage, messageColors } = require('./colorize');
14
+ // Subsystem tag matches the project convention used by other modules
15
+ // (lib/adblock.js, flowproxy, cloudflare, curl, grep, etc.).
16
+ const ADBLOCK_RUST_TAG = messageColors.processing('[adblock-rust]');
13
17
 
14
18
  let adblockRust = null;
15
19
  let adblockRustVersion = null;
@@ -77,18 +81,19 @@ const RESOURCE_TYPE_MAP = Object.assign(Object.create(null), {
77
81
  '': ''
78
82
  });
79
83
 
80
- function normalizeResourceType(type) {
81
- if (!type) return '';
82
- return RESOURCE_TYPE_MAP[type] || 'other';
83
- }
84
+ // Removed: normalizeResourceType() helper. The hot path in shouldBlock
85
+ // inlines the (RESOURCE_TYPE_MAP[rt] || 'other') lookup directly to skip
86
+ // the function-call frame; the standalone helper had zero callers.
84
87
 
85
- // Small FIFO cache keyed on (url \0 sourceUrl \0 resourceType). Despite the
86
- // class name, eviction is insertion-order, not access-order — `get()` does not
87
- // promote. For this workload (per-page request bursts whose working set fits
88
- // in maxSize) FIFO and true LRU produce the same evictions, so the simpler
89
- // path wins. If cache effectiveness becomes a concern with larger working
90
- // sets, promote on hit by re-inserting (delete + set).
91
- class ResultLRU {
88
+ // Small FIFO cache keyed on (url \0 sourceUrl \0 resourceType). Eviction
89
+ // is insertion-order — `get()` does not promote. For this workload
90
+ // (per-page request bursts whose working set fits in maxSize) FIFO and
91
+ // true LRU produce the same evictions, so the simpler path wins. If
92
+ // cache effectiveness becomes a concern with larger working sets,
93
+ // promote on hit by re-inserting (delete + set). Renamed from ResultLRU
94
+ // since the previous name lied about the eviction policy — matches
95
+ // the FIFOCache rename in lib/adblock.js.
96
+ class FIFOCache {
92
97
  constructor(maxSize) {
93
98
  this.cache = new Map();
94
99
  this.maxSize = maxSize;
@@ -172,7 +177,7 @@ function parseAdblockRules(filePathOrArray, options = {}) {
172
177
  compiled = fs.readFileSync(cachePath);
173
178
  } catch (err) {
174
179
  if (err.code !== 'ENOENT' && enableLogging) {
175
- console.log(`[Adblock-Rust] Cache read failed (${err.message}); reparsing`);
180
+ console.log(formatLogMessage('debug', `${ADBLOCK_RUST_TAG} Cache read failed (${err.message}); reparsing`));
176
181
  }
177
182
  }
178
183
  if (compiled) {
@@ -196,7 +201,7 @@ function parseAdblockRules(filePathOrArray, options = {}) {
196
201
  // Corrupt cache or version mismatch — fall through to a fresh parse.
197
202
  engine = null;
198
203
  if (enableLogging) {
199
- console.log(`[Adblock-Rust] Cache deserialize failed (${err.message}); reparsing`);
204
+ console.log(formatLogMessage('debug', `${ADBLOCK_RUST_TAG} Cache deserialize failed (${err.message}); reparsing`));
200
205
  }
201
206
  }
202
207
  }
@@ -240,7 +245,7 @@ function parseAdblockRules(filePathOrArray, options = {}) {
240
245
  pruneOldCacheFiles(cacheDir, cacheTtlMs);
241
246
  } catch (err) {
242
247
  if (enableLogging) {
243
- console.log(`[Adblock-Rust] Cache write failed (${err.message}); continuing`);
248
+ console.log(formatLogMessage('warn', `${ADBLOCK_RUST_TAG} Cache write failed (${err.message}); continuing`));
244
249
  }
245
250
  }
246
251
  }
@@ -262,7 +267,7 @@ function parseAdblockRules(filePathOrArray, options = {}) {
262
267
  cacheMisses: 0
263
268
  };
264
269
 
265
- const resultCache = new ResultLRU(resultCacheSize);
270
+ const resultCache = new FIFOCache(resultCacheSize);
266
271
  // Hot-path optimization: shared "no_match" object — most checks return this,
267
272
  // skip per-call object allocation. Safe because callers only read fields.
268
273
  const NO_MATCH = Object.freeze({ blocked: false, rule: null, reason: 'no_match' });
@@ -275,9 +280,9 @@ function parseAdblockRules(filePathOrArray, options = {}) {
275
280
 
276
281
  if (enableLogging) {
277
282
  if (cacheHit) {
278
- console.log(`[Adblock-Rust] Restored compiled engine from ${cachePath} (${(totalBytes/1024/1024).toFixed(2)}MB source, ${filePaths.length} list${filePaths.length>1?'s':''})`);
283
+ console.log(formatLogMessage('debug', `${ADBLOCK_RUST_TAG} Restored compiled engine from ${cachePath} (${(totalBytes/1024/1024).toFixed(2)}MB source, ${filePaths.length} list${filePaths.length>1?'s':''})`));
279
284
  } else {
280
- console.log(`[Adblock-Rust] Compiled ${ruleCount} rules from ${filePaths.length} list${filePaths.length>1?'s':''} (${(totalBytes/1024/1024).toFixed(2)}MB)`);
285
+ console.log(formatLogMessage('debug', `${ADBLOCK_RUST_TAG} Compiled ${ruleCount} rules from ${filePaths.length} list${filePaths.length>1?'s':''} (${(totalBytes/1024/1024).toFixed(2)}MB)`));
281
286
  }
282
287
  }
283
288
 
@@ -315,7 +320,7 @@ function parseAdblockRules(filePathOrArray, options = {}) {
315
320
  } catch (err) {
316
321
  stats.errors++;
317
322
  if (enableLogging) {
318
- console.log(`[Adblock-Rust] Error checking ${url}: ${err.message}`);
323
+ console.log(formatLogMessage('warn', `${ADBLOCK_RUST_TAG} Error checking ${url}: ${err.message}`));
319
324
  }
320
325
  // Don't cache errors — next call may succeed (transient native panic).
321
326
  return { blocked: false, rule: null, reason: 'error' };