@fanboynz/network-scanner 2.0.66 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/npm-publish.yml +134 -10
- package/CHANGELOG.md +135 -0
- package/CLAUDE.md +18 -7
- package/README.md +12 -4
- package/lib/adblock-rust.js +23 -18
- package/lib/adblock.js +127 -82
- package/lib/browserexit.js +210 -200
- package/lib/browserhealth.js +84 -60
- package/lib/cdp.js +103 -81
- package/lib/clear_sitedata.js +61 -159
- package/lib/cloudflare.js +579 -409
- package/lib/colorize.js +29 -12
- package/lib/compare.js +16 -8
- package/lib/compress.js +2 -1
- package/lib/curl.js +287 -220
- package/lib/domain-cache.js +87 -40
- package/lib/dry-run.js +137 -194
- package/lib/fingerprint.js +20 -18
- package/lib/flowproxy.js +391 -188
- package/lib/ghost-cursor.js +8 -7
- package/lib/grep.js +248 -171
- package/lib/ignore_similar.js +70 -124
- package/lib/interaction.js +132 -235
- package/lib/nettools.js +309 -87
- package/lib/openvpn_vpn.js +12 -11
- package/lib/output.js +92 -59
- package/lib/post-processing.js +216 -162
- package/lib/redirect.js +46 -30
- package/lib/referrer.js +158 -165
- package/lib/searchstring.js +290 -381
- package/lib/smart-cache.js +141 -91
- package/lib/socks-relay.js +8 -7
- package/lib/spawn-async.js +137 -0
- package/lib/validate_rules.js +188 -176
- package/lib/wireguard_vpn.js +111 -117
- package/nwss.js +740 -156
- package/package.json +4 -4
|
@@ -1,41 +1,165 @@
|
|
|
1
1
|
name: Publish to NPM
|
|
2
|
+
|
|
2
3
|
on:
|
|
3
4
|
workflow_dispatch:
|
|
5
|
+
inputs:
|
|
6
|
+
version_bump:
|
|
7
|
+
description: 'Version bump type'
|
|
8
|
+
required: true
|
|
9
|
+
default: 'patch'
|
|
10
|
+
type: choice
|
|
11
|
+
options:
|
|
12
|
+
- patch
|
|
13
|
+
- minor
|
|
14
|
+
- major
|
|
15
|
+
- explicit
|
|
16
|
+
explicit_version:
|
|
17
|
+
description: 'Explicit version (required if version_bump = explicit, e.g. 3.0.0)'
|
|
18
|
+
required: false
|
|
19
|
+
default: ''
|
|
20
|
+
release_notes_source:
|
|
21
|
+
description: 'GitHub Release body source'
|
|
22
|
+
required: true
|
|
23
|
+
default: 'changelog'
|
|
24
|
+
type: choice
|
|
25
|
+
options:
|
|
26
|
+
- changelog
|
|
27
|
+
- manual
|
|
28
|
+
manual_release_notes:
|
|
29
|
+
description: 'Manual release notes (used if release_notes_source = manual; supports markdown)'
|
|
30
|
+
required: false
|
|
31
|
+
default: ''
|
|
32
|
+
prerelease:
|
|
33
|
+
description: 'Mark GitHub Release as pre-release'
|
|
34
|
+
required: false
|
|
35
|
+
default: false
|
|
36
|
+
type: boolean
|
|
4
37
|
|
|
5
38
|
jobs:
|
|
6
39
|
publish:
|
|
7
40
|
runs-on: ubuntu-latest
|
|
8
41
|
permissions:
|
|
9
42
|
contents: write
|
|
10
|
-
|
|
43
|
+
|
|
11
44
|
steps:
|
|
12
45
|
- uses: actions/checkout@v5
|
|
13
46
|
with:
|
|
14
47
|
token: ${{ secrets.GITHUB_TOKEN }}
|
|
15
48
|
fetch-depth: 0
|
|
16
|
-
|
|
49
|
+
|
|
17
50
|
- name: Setup Node.js
|
|
18
51
|
uses: actions/setup-node@v5
|
|
19
52
|
with:
|
|
20
53
|
node-version: '20'
|
|
21
54
|
registry-url: 'https://registry.npmjs.org'
|
|
22
|
-
|
|
55
|
+
|
|
23
56
|
- run: npm ci
|
|
24
57
|
- run: npm run lint
|
|
25
|
-
|
|
58
|
+
|
|
26
59
|
- name: Configure git
|
|
27
60
|
run: |
|
|
28
61
|
git config user.name "github-actions[bot]"
|
|
29
62
|
git config user.email "github-actions[bot]@users.noreply.github.com"
|
|
30
|
-
|
|
31
|
-
- name:
|
|
63
|
+
|
|
64
|
+
- name: Validate explicit-version input
|
|
65
|
+
if: inputs.version_bump == 'explicit'
|
|
66
|
+
run: |
|
|
67
|
+
if [ -z "${{ inputs.explicit_version }}" ]; then
|
|
68
|
+
echo "::error::version_bump=explicit but explicit_version is empty"
|
|
69
|
+
exit 1
|
|
70
|
+
fi
|
|
71
|
+
# Loose semver shape check -- npm version will do the strict validation
|
|
72
|
+
if ! echo "${{ inputs.explicit_version }}" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+([+-].+)?$'; then
|
|
73
|
+
echo "::error::explicit_version '${{ inputs.explicit_version }}' does not look like semver (e.g. 3.0.0)"
|
|
74
|
+
exit 1
|
|
75
|
+
fi
|
|
76
|
+
|
|
77
|
+
- name: Bump version + promote CHANGELOG Unreleased
|
|
78
|
+
id: version
|
|
79
|
+
run: |
|
|
80
|
+
# Use --no-git-tag-version so we can fold the CHANGELOG promotion
|
|
81
|
+
# into the same commit + tag. Otherwise the auto-commit npm creates
|
|
82
|
+
# would not include the changelog edit (and amending afterwards
|
|
83
|
+
# would orphan the tag).
|
|
84
|
+
if [ "${{ inputs.version_bump }}" = "explicit" ]; then
|
|
85
|
+
npm version "${{ inputs.explicit_version }}" --no-git-tag-version
|
|
86
|
+
else
|
|
87
|
+
npm version "${{ inputs.version_bump }}" --no-git-tag-version
|
|
88
|
+
fi
|
|
89
|
+
NEW_VERSION=$(node -p "require('./package.json').version")
|
|
90
|
+
echo "version=$NEW_VERSION" >> "$GITHUB_OUTPUT"
|
|
91
|
+
DATE=$(date -u +%Y-%m-%d)
|
|
92
|
+
echo "date=$DATE" >> "$GITHUB_OUTPUT"
|
|
93
|
+
|
|
94
|
+
# If CHANGELOG has an [Unreleased] section, rename its header to
|
|
95
|
+
# the new version + today's date. Idempotent: skips if the header
|
|
96
|
+
# is already a versioned one (e.g. for re-runs after a failure).
|
|
97
|
+
if grep -q '^## \[Unreleased\]$' CHANGELOG.md; then
|
|
98
|
+
sed -i "s/^## \[Unreleased\]\$/## [$NEW_VERSION] - $DATE/" CHANGELOG.md
|
|
99
|
+
echo "Promoted [Unreleased] -> [$NEW_VERSION] - $DATE"
|
|
100
|
+
else
|
|
101
|
+
echo "::warning::No [Unreleased] section in CHANGELOG.md; skipping promotion. Release notes will use whatever already exists at [$NEW_VERSION]."
|
|
102
|
+
fi
|
|
103
|
+
|
|
104
|
+
# Single combined commit + tag so the tag points at HEAD containing
|
|
105
|
+
# BOTH the package.json bump AND the changelog promotion.
|
|
106
|
+
git add package.json package-lock.json CHANGELOG.md
|
|
107
|
+
git commit -m "$NEW_VERSION"
|
|
108
|
+
git tag "v$NEW_VERSION"
|
|
109
|
+
|
|
110
|
+
- name: Extract release notes from CHANGELOG
|
|
111
|
+
id: changelog_notes
|
|
112
|
+
if: inputs.release_notes_source == 'changelog'
|
|
113
|
+
run: |
|
|
114
|
+
NEW_VERSION="${{ steps.version.outputs.version }}"
|
|
115
|
+
# Pull every line between `## [VERSION]` and the next `## [` header.
|
|
116
|
+
# awk: flag=1 after the start line; flag=0 at the next ## [ header.
|
|
117
|
+
NOTES=$(awk -v v="$NEW_VERSION" '
|
|
118
|
+
$0 ~ "^## \\[" v "\\]" { flag = 1; next }
|
|
119
|
+
$0 ~ "^## \\[" { flag = 0 }
|
|
120
|
+
flag { print }
|
|
121
|
+
' CHANGELOG.md)
|
|
122
|
+
|
|
123
|
+
if [ -z "$(echo "$NOTES" | tr -d '[:space:]')" ]; then
|
|
124
|
+
echo "::warning::No CHANGELOG section found for $NEW_VERSION -- release body will be empty"
|
|
125
|
+
fi
|
|
126
|
+
|
|
127
|
+
# Multi-line GITHUB_OUTPUT requires a delimited heredoc
|
|
128
|
+
{
|
|
129
|
+
echo "notes<<NOTES_EOF"
|
|
130
|
+
echo "$NOTES"
|
|
131
|
+
echo "NOTES_EOF"
|
|
132
|
+
} >> "$GITHUB_OUTPUT"
|
|
133
|
+
|
|
134
|
+
- name: Use manual release notes
|
|
135
|
+
id: manual_notes
|
|
136
|
+
if: inputs.release_notes_source == 'manual'
|
|
32
137
|
run: |
|
|
33
|
-
|
|
34
|
-
|
|
138
|
+
{
|
|
139
|
+
echo "notes<<NOTES_EOF"
|
|
140
|
+
echo "${{ inputs.manual_release_notes }}"
|
|
141
|
+
echo "NOTES_EOF"
|
|
142
|
+
} >> "$GITHUB_OUTPUT"
|
|
143
|
+
|
|
144
|
+
- name: Publish to npm
|
|
145
|
+
# Publish BEFORE pushing to GitHub: if npm rejects (auth/network/
|
|
146
|
+
# name-collision), nothing reaches GitHub and a re-run is clean.
|
|
147
|
+
run: npm publish
|
|
35
148
|
env:
|
|
36
149
|
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
|
37
|
-
|
|
38
|
-
- name: Push
|
|
150
|
+
|
|
151
|
+
- name: Push commits and tag
|
|
39
152
|
run: git push --follow-tags
|
|
40
153
|
env:
|
|
41
154
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
155
|
+
|
|
156
|
+
- name: Create GitHub Release
|
|
157
|
+
uses: softprops/action-gh-release@v2
|
|
158
|
+
with:
|
|
159
|
+
tag_name: v${{ steps.version.outputs.version }}
|
|
160
|
+
name: v${{ steps.version.outputs.version }}
|
|
161
|
+
body: ${{ inputs.release_notes_source == 'changelog' && steps.changelog_notes.outputs.notes || steps.manual_notes.outputs.notes }}
|
|
162
|
+
draft: false
|
|
163
|
+
prerelease: ${{ inputs.prerelease }}
|
|
164
|
+
env:
|
|
165
|
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,141 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to the Network Scanner (nwss.js) project.
|
|
4
4
|
|
|
5
|
+
## [3.0.0] - 2026-05-23
|
|
6
|
+
|
|
7
|
+
### Changed
|
|
8
|
+
- **Engines floor bumped**: `engines.node` from `>=22.0.0` to `>=22.12.0` to match Puppeteer 25's stable `require()`-of-ESM requirement. Anyone running on Node 22.0–22.11 will see an npm engine warning and should upgrade.
|
|
9
|
+
- **Puppeteer dependency floor bumped**: `puppeteer` and `puppeteer-core` from `>=20.0.0` to `>=24.0.0`. Range still permits both v24 and v25 — pick via `npm install puppeteer@24` or `npm install puppeteer@25` according to taste. Dev lockfile moved to `puppeteer@25.0.4`.
|
|
10
|
+
- Audit confirms no breaking-change impact from Puppeteer 25's `executablePath`/`defaultArgs` Promise return — neither is called in this codebase. `require('puppeteer')` continues to work on the now-ESM-only package thanks to Node 22.12+'s stable require-of-ESM.
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
- `blockDomainsByUrl` config key (top-level) — regex patterns mirroring `ignoreDomainsByUrl` but for active blocking. A matching request URL triggers Puppeteer `request.abort()` on the triggering request, the request's root domain, and all subsequent requests to that domain or its subdomains for the rest of the scan
|
|
14
|
+
- Cloudflare aggregate stats accessible via `getAggregateStats({reset})` — returns `byOutcome`, `bySolveMethod`, `maxDurationMs`, `avgDurationMs`, `failures`, `timedOut` counts; bumped on every URL regardless of debug mode
|
|
15
|
+
- Cloudflare per-stage timing breakdown in outcome lines: `q=Xms p=Xms c=Xms` (zero-stage suffixes omitted)
|
|
16
|
+
- Production-level Cloudflare outcome logs: `warn` severity for `!overallSuccess || timedOut`, `info` for 5xx origin-error pages, debug-only on success
|
|
17
|
+
- DNS pre-check positive-resolution shortcut — hosts already proven live by dig or whois within the cache TTL skip the c-ares pre-check via a `knownResolvedHostnames` index (also warmed at startup from disk-loaded dig/whois caches)
|
|
18
|
+
- DNS pre-check skip summary now reports both NXDOMAIN-cache and positive-cache savings: `DNS pre-check skipped: N URL(s) via M unresolvable host(s), N URL(s) via M resolved host(s)`
|
|
19
|
+
- `[blocked-stats]` per-pattern hit counters reported at scan end — surfaces which `blocked` patterns are doing work vs. which are stale
|
|
20
|
+
- `disable_adblock` per-site config flag to escape global ad-blocking layers
|
|
21
|
+
- `capture_popups` now runs whois/dig validation on matched popup URLs
|
|
22
|
+
- `lib/spawn-async.js` shared async-spawn helper module — consolidates 4 near-identical Promise wrappers across curl/grep/searchstring
|
|
23
|
+
|
|
24
|
+
### Fixed
|
|
25
|
+
- **Security**: nettools shell-injection vector closed — `exec(string)` replaced with `execFile(cmd, args)` (no shell); config-supplied `whois_server` and `recordType` values can no longer execute commands via `$()`/backticks/etc.
|
|
26
|
+
- Cloudflare `detectChallengeLoop` off-by-one bug — counted the current URL against itself, tripping `>= 2` threshold one iteration early
|
|
27
|
+
- Cloudflare `detectChallengeLoop` threshold was unreachable with default `cloudflare_max_retries = 2`; new exact-match path catches reload-to-same-URL loops at attempt 2
|
|
28
|
+
- Cloudflare outcome cache namespace collision — now stored in a separate Map (was sharing keys with the detection cache, getting evicted by detection-cache pressure)
|
|
29
|
+
- `ignoreDomains` dynamic Set didn't cascade to subdomains — `ignoreDomainsByUrl` dynamic adds now apply parent-walk just like static config (e.g. dynamically-ignored `example.com` now also catches `cdn.example.com`)
|
|
30
|
+
- `blocked` / `blockDomainsByUrl` / `ignoreDomainsByUrl` regex compile failures unified — was silent-drop for *byUrl and hard-throw for blocked; now all warn loudly with `[config] X pattern dropped (compile error): "..." -- regex msg` and continue
|
|
31
|
+
- adblock pattern-cache key mismatch — anchored patterns (`||example.com`) were missing their own cache because get/set used different keys
|
|
32
|
+
- grep AND-logic silently dropped non-matching rules; ENOBUFS silently truncated output on large pages
|
|
33
|
+
- Cloudflare debug logs rendered literal `"undefined"` when detection short-circuited on non-HTTP pages (popup → about:blank case)
|
|
34
|
+
- Outcome label `no_indicators` was lying when detection short-circuited on non-HTTP page URL; now correctly reports `skipped(non-http)`
|
|
35
|
+
- Cloudflare `handleLegacyCheckbox` selector list aligned with detection — dropped orphan `.cf-turnstile input[type="checkbox"]` selector that had no matching detection entry
|
|
36
|
+
- Cloudflare `safeWaitForNavigation` warn was unconditional; now `forceDebug`-gated (was spamming stderr on phishing-bypass nav failures in production)
|
|
37
|
+
- Cloudflare `enhancedParallelChallengeDetection` had zero callers — deleted
|
|
38
|
+
- `analyzeCloudflareChallenge` ignored managed-challenge signals (`.cf-managed-challenge`, `[data-cf-managed]`); now folded into `isChallengePresent`
|
|
39
|
+
- `isChallengeCompleted` double-queried the same DOM element; cached once
|
|
40
|
+
- Various correctness fixes across compare (inline hosts-comment stripping), curl, dry-run, flowproxy (error-path bug, cookie parsing), referrer, searchstring, validate_rules modules
|
|
41
|
+
- 30+ dead exports trimmed across nettools (11), cloudflare (18 → then re-trimmed after refactor), adblock, adblock-rust, compare, dry-run
|
|
42
|
+
|
|
43
|
+
### Improved
|
|
44
|
+
- Dig/whois cache TTL 14h → 20h, capacity 1000 → 2000 entries each — covers overnight scan-then-rescan cadence without forcing fresh lookups
|
|
45
|
+
- nettools disk-cache writes now atomic (tmp + rename) — surviving SIGKILL/OOM/power-loss mid-write no longer leaves a truncated file that wipes the cache on next load
|
|
46
|
+
- Corrupt `.digcache`/`.whoiscache` files surface a `[dns-cache] X was unreadable (...); starting fresh` warn instead of silently resetting
|
|
47
|
+
- `dnsCacheStats.freshDig`/`freshWhois` arrays capped at 1000 entries (FIFO) — no more unbounded growth on scans with thousands of unique fresh lookups
|
|
48
|
+
- nettools `enableDiskCache` made idempotent (uses the previously-dead `diskCacheEnabled` flag); also warms the resolved-hostnames index from loaded entries
|
|
49
|
+
- 200+ log sites unified through `formatLogMessage` + subsystem tags across cloudflare, adblock, adblock-rust, compare, ignore_similar, validate_rules, wireguard_vpn, dry-run, smart-cache, flowproxy, browserexit, redirect, post-processing, cdp, output, interaction modules
|
|
50
|
+
- Cloudflare `runWithRetries` helper extracted — verification-challenge and phishing-warning retry harnesses collapsed from ~150 lines of duplication to thin hook-driven wrappers
|
|
51
|
+
- Cloudflare 14-line debug block in `handleVerificationChallenge` collapsed to one structured line: `Challenge detected: turnstile=t js=f ... title="..."`
|
|
52
|
+
- Cloudflare timing constants pruned (4 dead, 1 dead local var); `waitForTimeout(page, ms)` renamed to `fastTimeout(ms)`, unused `page` arg dropped
|
|
53
|
+
- Cloudflare `attemptChallengeSolve` post-failure diagnostic + `JS challenge` body.textContent now capped (2KB) per poll — was materializing MB on content-heavy pages
|
|
54
|
+
- adblock-rust: zero-copy deserialize, eager buffer release, FIFOCache rename for honest naming
|
|
55
|
+
- `interaction.js` performance: ~350ms saved per no-click interaction, ~750ms per with-click
|
|
56
|
+
- nwss per-URL timeout 120s → 75s for faster hang recovery
|
|
57
|
+
- Popup handler honors both `ignoreDomainsByUrl` and `blockDomainsByUrl`
|
|
58
|
+
- Early `ignoreDomains` gate added at main request handler — skips dig/whois/regex cycles on ignored hostnames
|
|
59
|
+
- `--dns-cache` help text refreshed (was stale "3hr/4hr TTL"; now "20h TTL, 2000-entry cap each")
|
|
60
|
+
|
|
61
|
+
## [2.0.66] - 2026-05-20
|
|
62
|
+
|
|
63
|
+
### Added
|
|
64
|
+
- DNS pre-check before `page.goto()` to skip unresolvable hosts fast — `--no-dns-precheck` to disable
|
|
65
|
+
- In-process SOCKS5 auth relay so `socks5://user:pass@host` URLs work end-to-end
|
|
66
|
+
- socks-relay handshake-phase watchdog so stalled clients can't sit forever
|
|
67
|
+
- DNS pre-check EAI_AGAIN retry-once + FIFO cap on negative cache
|
|
68
|
+
|
|
69
|
+
### Fixed
|
|
70
|
+
- proxy.js: SOCKS auth false-success + SOCKS4 remote-DNS footgun
|
|
71
|
+
- DNS pre-check was starving under scan load (`dns.lookup` queued behind Puppeteer's libuv threadpool); switched to `dns.resolve` (c-ares, no threadpool contention)
|
|
72
|
+
- DNS pre-check: clear the timeout timer when lookup wins the race
|
|
73
|
+
- Bumped `ws` override to >=8.20.1 (CVE-2026-45736, GHSA-58qx-3vcg-4xpx)
|
|
74
|
+
|
|
75
|
+
### Improved
|
|
76
|
+
- Neutralize Fullscreen API so sites can't hijack the window in `--headful` mode
|
|
77
|
+
- socks-relay: disable Nagle + reject unoffered no-auth selection
|
|
78
|
+
|
|
79
|
+
## [2.0.65] - 2026-05-15
|
|
80
|
+
|
|
81
|
+
### Added
|
|
82
|
+
- Cloudflare 5xx origin-error page detection — recognizes `<domain> | 5xx: <reason>` titles, marks as `error_page(522)` etc. instead of treating as a bypass target
|
|
83
|
+
- Per-URL Cloudflare outcome summary log with cookie state + error-code signal
|
|
84
|
+
- HTTP status + cf-ray captured at `page.goto()` time and threaded through to the Cloudflare outcome line
|
|
85
|
+
- Surface Cloudflare 5xx origin-error page count in scan stats
|
|
86
|
+
- HANG CHECK: per-URL progress counter + per-URL timeout + short-circuit queued URLs on restart flag
|
|
87
|
+
- Surface adblock-rust engine stats in debug exit output
|
|
88
|
+
|
|
89
|
+
### Fixed
|
|
90
|
+
- HANG CHECK detection logic was debug-gated and never fired in production
|
|
91
|
+
- `--validate-config` TDZ crash by moving block below config load
|
|
92
|
+
- Scan-exit hang: cleanups now run on normal completion (was relying on `process.exit(0)` to skip them)
|
|
93
|
+
- nettools: pending-lookup leak + signal-handler conflict with nwss.js cleanup
|
|
94
|
+
- cloudflare: null-safe error categorization, unref'd cache timer, body.textContent reuse
|
|
95
|
+
- Suppressed contradictory "no indicators / error page detected" log pair
|
|
96
|
+
|
|
97
|
+
### Improved
|
|
98
|
+
- cloudflare: precompile skip-proto regex, combine within-category selectors, rename outcome key
|
|
99
|
+
- redirect.js: skip `detectCommonJSRedirects` in production, cap `outerHTML`, filter `chrome-error://`
|
|
100
|
+
- Cloudflare module banner + "no indicators" log deduped (was firing once per URL)
|
|
101
|
+
- npm update: adblock-rs, lru-cache, puppeteer patch bumps
|
|
102
|
+
- Removed dead `scanner-script-org.js` prototype
|
|
103
|
+
|
|
104
|
+
## [2.0.64] - 2026-05-02
|
|
105
|
+
|
|
106
|
+
### Added
|
|
107
|
+
- `--adblock-engine=rust` option using Brave's adblock-rs (faster on large filter lists; requires `npm install adblock-rs`)
|
|
108
|
+
- Cache hygiene: atomic write, version key, 30-day prune, JSDoc
|
|
109
|
+
|
|
110
|
+
### Fixed
|
|
111
|
+
- adblock-rs always returning `no_match` (4th arg to `engine.check` was missing — caused silent total-block-failure)
|
|
112
|
+
- Drop existsSync before readFileSync in cache load path (avoids redundant stat + TOCTOU)
|
|
113
|
+
|
|
114
|
+
### Improved
|
|
115
|
+
- Reduce wrapper memory: zero-copy deserialize, eager buffer release
|
|
116
|
+
- Bumped `engines.node` floor to >=22
|
|
117
|
+
- npm update: `p-limit` 4.0 → 7.x (ESM API unchanged), `lru-cache` 10.4 → 11.3 (drop-in), `globals` 16.5 → 17.6 (dev-dep), `eslint` patch bump
|
|
118
|
+
- V8 micro-opts in adblock-rs hot path (null-proto resource-type map, bound engine.check)
|
|
119
|
+
|
|
120
|
+
## [2.0.63] - 2026-04-25
|
|
121
|
+
|
|
122
|
+
### Added
|
|
123
|
+
- `ignoreDomainsByUrl` config (top-level) — regex patterns; if any request URL matches, the request's root domain is dynamically ignored for the rest of the scan
|
|
124
|
+
- Redirect source and matching regex now included in `adblock_rules` log titles
|
|
125
|
+
|
|
126
|
+
### Fixed
|
|
127
|
+
- Positional `.json` arg was ignored by config loader (always defaulted to `config.json`)
|
|
128
|
+
- ReferenceError on `allowedResourceTypes` in debug log
|
|
129
|
+
- ReferenceError on `matchedRegexPattern` in even_blocked path
|
|
130
|
+
|
|
131
|
+
### Improved
|
|
132
|
+
- Convert resourceTypes filter to Set for O(1) lookups in hot path
|
|
133
|
+
- Sample `config.json` filterRegex values updated
|
|
134
|
+
|
|
135
|
+
## [2.0.62] - 2026-04-25
|
|
136
|
+
|
|
137
|
+
### Fixed
|
|
138
|
+
- TypeError in `SmartCache.getStats` when `requestCache` fails to initialize
|
|
139
|
+
|
|
5
140
|
## [2.0.61] - 2026-03-17
|
|
6
141
|
|
|
7
142
|
### Added
|
package/CLAUDE.md
CHANGED
|
@@ -4,46 +4,57 @@ Puppeteer-based network scanner for analyzing web traffic, generating adblock fi
|
|
|
4
4
|
|
|
5
5
|
## Project Structure
|
|
6
6
|
|
|
7
|
-
- `nwss.js` — Main entry point (~
|
|
7
|
+
- `nwss.js` — Main entry point (~5,800 lines). CLI args, URL processing, orchestration.
|
|
8
8
|
- `config.json` — Default scan configuration (sites, filters, options).
|
|
9
|
-
- `lib/` —
|
|
9
|
+
- `lib/` — 32 focused, single-purpose modules:
|
|
10
10
|
- `fingerprint.js` — Bot detection evasion (device/GPU/timezone spoofing)
|
|
11
11
|
- `cloudflare.js` — Cloudflare challenge detection and solving
|
|
12
12
|
- `browserhealth.js` — Memory management and browser lifecycle
|
|
13
13
|
- `interaction.js` — Human-like mouse/scroll/typing simulation
|
|
14
|
+
- `ghost-cursor.js` — Bezier-curve cursor pathing for human-like mouse movement
|
|
14
15
|
- `smart-cache.js` — Multi-layer caching with persistence
|
|
15
16
|
- `nettools.js` — WHOIS/dig integration
|
|
16
17
|
- `output.js` — Multi-format rule output (adblock, dnsmasq, unbound, pihole, etc.)
|
|
17
18
|
- `proxy.js` — SOCKS5/HTTP proxy support
|
|
19
|
+
- `socks-relay.js` — Local SOCKS proxy relay/chain helper
|
|
18
20
|
- `wireguard_vpn.js` / `openvpn_vpn.js` — VPN routing
|
|
19
|
-
- `adblock.js` — Adblock filter parsing and validation
|
|
21
|
+
- `adblock.js` — Adblock filter parsing and validation (native JS engine)
|
|
22
|
+
- `adblock-rust.js` — Drop-in adblock.js replacement backed by Brave's `adblock-rs` Rust engine; same matcher shape (`shouldBlock`, `getStats`, `rules`) so callers swap with one `require()`
|
|
20
23
|
- `validate_rules.js` — Domain and rule format validation
|
|
21
24
|
- `colorize.js` — Console output formatting and colors
|
|
22
25
|
- `domain-cache.js` — Domain detection cache for performance
|
|
23
26
|
- `post-processing.js` — Result cleanup and deduplication
|
|
27
|
+
- `spawn-async.js` — Shared `runProcess(cmd, args, opts)` helper used by curl/grep/searchstring; resolves (never rejects) with `{code, signal, stdout, stderr, truncated, error}`, enforces timeout + stdout caps
|
|
24
28
|
- `redirect.js`, `referrer.js`, `cdp.js`, `curl.js`, `grep.js`, `compare.js`, `compress.js`, `dry-run.js`, `browserexit.js`, `clear_sitedata.js`, `flowproxy.js`, `ignore_similar.js`, `searchstring.js`
|
|
25
29
|
- `.github/workflows/npm-publish.yml` — Automated npm publishing
|
|
26
30
|
- `nwss.1` — Man page
|
|
27
31
|
|
|
28
32
|
## Tech Stack
|
|
29
33
|
|
|
30
|
-
- **Node.js** >=22.
|
|
31
|
-
- **puppeteer** >=
|
|
32
|
-
- **psl** — Public Suffix List for domain parsing
|
|
34
|
+
- **Node.js** >=22.12.0 (required for stable `require()` of ESM-only puppeteer 25)
|
|
35
|
+
- **puppeteer** >=24.0.0 — Headless browser automation. Range permits both v24 and v25; dev lockfile is on v25.
|
|
36
|
+
- **psl** — Public Suffix List for domain parsing (prefer this over hand-curated TLD lists)
|
|
33
37
|
- **lru-cache** — LRU cache implementation
|
|
34
38
|
- **p-limit** — Concurrency limiting (dynamically imported)
|
|
39
|
+
- **adblock-rs** — Optional native Rust filter engine, used by `lib/adblock-rust.js`. Install with `npm install adblock-rs` (requires Rust toolchain). Not a hard dep — `lib/adblock.js` is the default.
|
|
35
40
|
- **eslint** — Linting (`npm run lint`)
|
|
36
41
|
|
|
37
42
|
## Conventions
|
|
38
43
|
|
|
39
44
|
- Store modular functionality in `./lib/` with focused, single-purpose modules
|
|
40
45
|
- Use `messageColors` and `formatLogMessage` from `./lib/colorize` for consistent console output
|
|
46
|
+
- Prefix every log line with a subsystem tag, e.g. `const TAG = messageColors.processing('[adblock]');` then `formatLogMessage('warn', `${TAG} ...`)`. Keeps mixed-module output attributable; every module in `lib/` follows this — match it when adding new ones.
|
|
47
|
+
- Pick severities deliberately: `warn` for actual errors/failures (cache write fail, native exception), `debug` for diagnostic chatter (cache misses, parse summaries, per-match traces)
|
|
41
48
|
- Implement timeout protection for all Puppeteer operations using `Promise.race` patterns
|
|
42
49
|
- Handle browser lifecycle with comprehensive cleanup in try-finally blocks
|
|
43
50
|
- Validate all external tool availability before use (grep, curl, whois, dig)
|
|
44
51
|
- Use `forceDebug` flag for detailed logging, `silentMode` for minimal output
|
|
45
52
|
- Use `Object.freeze` for constant configuration objects (TIMEOUTS, CACHE_LIMITS, CONCURRENCY_LIMITS)
|
|
46
|
-
- Use `fastTimeout(ms)` helper instead of `node:timers/promises` for Puppeteer 22.x
|
|
53
|
+
- Use `fastTimeout(ms)` helper instead of `node:timers/promises` for delays — project convention since the Puppeteer 22.x `page.waitForTimeout` removal, retained as the standard for all Promise-based sleeps
|
|
54
|
+
- Prefer `runProcess` from `./lib/spawn-async` over bare `child_process.spawn`/`spawnSync` for new external-tool calls. It resolves (never rejects), enforces a SIGKILL timeout + stdout cap, and returns a uniform result object. `lib/wireguard_vpn.js` intentionally stays on `spawnSync` — startup-only validation paths where sync is simpler. Don't follow that exception unless you have the same justification.
|
|
55
|
+
- Prefer `net.isIP()` over hand-rolled IPv4/IPv6 regexes for IP validation
|
|
56
|
+
- For disk-cache writes use the atomic `tmpPath = path + '.' + pid + '.tmp'` + `fs.renameSync` pattern (see `lib/adblock-rust.js`) so a killed process never leaves a half-written cache file
|
|
57
|
+
- Keep `module.exports` minimal — trim helpers that have no external consumers (grep the repo before deciding); internal-only functions stay as functions but leave the exports surface
|
|
47
58
|
|
|
48
59
|
## Running
|
|
49
60
|
|
package/README.md
CHANGED
|
@@ -66,7 +66,8 @@ A Puppeteer-based tool for scanning websites to find third-party (or optionally
|
|
|
66
66
|
| `--use-puppeteer-core` | Use `puppeteer-core` with system Chrome instead of bundled Chromium |
|
|
67
67
|
| `--use-obscura` | Connect to running Obscura CDP server (`ws://127.0.0.1:9222` or `OBSCURA_WS` env). Skips fingerprint injection — Obscura provides built-in stealth |
|
|
68
68
|
| `--load-extension <path>` | Load unpacked Chrome extension from directory (can be used multiple times) |
|
|
69
|
-
| `--dns-cache` | Persist dig/whois results to disk between runs (
|
|
69
|
+
| `--dns-cache` | Persist dig/whois results to disk between runs (20hr TTL, 2000-entry cap each, `.digcache`/`.whoiscache`). Disk writes are atomic (tmp + rename); corrupt cache files are detected on load with a `[dns-cache]` warn line and reset cleanly. |
|
|
70
|
+
| `--no-dns-precheck` | Disable per-URL DNS resolution check before page navigation. By default, hosts that dig/whois have already proven live (within the 20hr cache TTL) skip their c-ares pre-check via a positive-resolution index. |
|
|
70
71
|
| `--block-ads=<files>` | Block ads using EasyList format rules (comma-separated: `easylist.txt,easyprivacy.txt`) |
|
|
71
72
|
| `--cdp` | Enable Chrome DevTools Protocol logging (now per-page if enabled) |
|
|
72
73
|
| `--remove-dupes` | Remove duplicate domains from output (only with `-o`) |
|
|
@@ -101,6 +102,12 @@ Example:
|
|
|
101
102
|
"googleapis.com",
|
|
102
103
|
"googletagmanager.com"
|
|
103
104
|
],
|
|
105
|
+
"ignoreDomainsByUrl": [
|
|
106
|
+
"\\/jwplayer\\/"
|
|
107
|
+
],
|
|
108
|
+
"blockDomainsByUrl": [
|
|
109
|
+
"\\/tracker\\/"
|
|
110
|
+
],
|
|
104
111
|
"sites": [
|
|
105
112
|
{
|
|
106
113
|
"url": "https://example.com/",
|
|
@@ -461,9 +468,10 @@ These options go at the root level of your config.json:
|
|
|
461
468
|
|
|
462
469
|
| Field | Values | Default | Description |
|
|
463
470
|
|:---------------------|:-------|:-------:|:------------|
|
|
464
|
-
| `ignoreDomains` | Array | - | Domains to completely ignore (supports wildcards like `*.ads.com`) |
|
|
465
|
-
| `ignoreDomainsByUrl` | Array | - | Regex patterns; if a request URL matches, the request's root domain is dynamically ignored for the rest of the scan (
|
|
466
|
-
| `
|
|
471
|
+
| `ignoreDomains` | Array | - | Domains to completely ignore (supports wildcards like `*.ads.com`). Subdomains of any listed entry are also ignored via parent-walk (e.g. `example.com` ignores `cdn.example.com` and `a.b.example.com`). |
|
|
472
|
+
| `ignoreDomainsByUrl` | Array | - | Regex patterns; if a request URL matches, the request's root domain is dynamically ignored for the rest of the scan AND any subsequent request to its subdomains (cascade matches the static `ignoreDomains` semantic). Example: `["\\/jwplayer\\/", "\\/build\\/assets\\/"]` |
|
|
473
|
+
| `blockDomainsByUrl` | Array | - | Symmetric to `ignoreDomainsByUrl` but for active blocking. Regex patterns; if a request URL matches, the request's root domain is added to a dynamic block set and ALL subsequent requests on that root (and subdomains) are aborted via Puppeteer for the rest of the scan. The triggering request itself is also aborted. Use when seeing a trigger URL is sufficient evidence the whole host is hostile. |
|
|
474
|
+
| `blocked` | Array | - | Global regex patterns to block requests (combined with per-site blocked). Patterns that fail to compile are warned about at scan start (`[config] blocked (global) pattern dropped (compile error): ...`) instead of crashing startup or silently disappearing. Per-pattern hit counts are reported at scan end via `[blocked-stats]` lines so stale patterns are easy to spot. |
|
|
467
475
|
| `whois_server_mode` | String | `"random"` | Default server selection mode for all sites |
|
|
468
476
|
| `ignore_similar` | Boolean | `true` | Ignore domains similar to already found domains |
|
|
469
477
|
| `ignore_similar_threshold` | Integer | `80` | Similarity threshold percentage for ignore_similar |
|
package/lib/adblock-rust.js
CHANGED
|
@@ -10,6 +10,10 @@ const fs = require('fs');
|
|
|
10
10
|
const path = require('path');
|
|
11
11
|
const os = require('os');
|
|
12
12
|
const crypto = require('crypto');
|
|
13
|
+
const { formatLogMessage, messageColors } = require('./colorize');
|
|
14
|
+
// Subsystem tag matches the project convention used by other modules
|
|
15
|
+
// (lib/adblock.js, flowproxy, cloudflare, curl, grep, etc.).
|
|
16
|
+
const ADBLOCK_RUST_TAG = messageColors.processing('[adblock-rust]');
|
|
13
17
|
|
|
14
18
|
let adblockRust = null;
|
|
15
19
|
let adblockRustVersion = null;
|
|
@@ -77,18 +81,19 @@ const RESOURCE_TYPE_MAP = Object.assign(Object.create(null), {
|
|
|
77
81
|
'': ''
|
|
78
82
|
});
|
|
79
83
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
}
|
|
84
|
+
// Removed: normalizeResourceType() helper. The hot path in shouldBlock
|
|
85
|
+
// inlines the (RESOURCE_TYPE_MAP[rt] || 'other') lookup directly to skip
|
|
86
|
+
// the function-call frame; the standalone helper had zero callers.
|
|
84
87
|
|
|
85
|
-
// Small FIFO cache keyed on (url \0 sourceUrl \0 resourceType).
|
|
86
|
-
//
|
|
87
|
-
//
|
|
88
|
-
//
|
|
89
|
-
//
|
|
90
|
-
//
|
|
91
|
-
|
|
88
|
+
// Small FIFO cache keyed on (url \0 sourceUrl \0 resourceType). Eviction
|
|
89
|
+
// is insertion-order — `get()` does not promote. For this workload
|
|
90
|
+
// (per-page request bursts whose working set fits in maxSize) FIFO and
|
|
91
|
+
// true LRU produce the same evictions, so the simpler path wins. If
|
|
92
|
+
// cache effectiveness becomes a concern with larger working sets,
|
|
93
|
+
// promote on hit by re-inserting (delete + set). Renamed from ResultLRU
|
|
94
|
+
// since the previous name lied about the eviction policy — matches
|
|
95
|
+
// the FIFOCache rename in lib/adblock.js.
|
|
96
|
+
class FIFOCache {
|
|
92
97
|
constructor(maxSize) {
|
|
93
98
|
this.cache = new Map();
|
|
94
99
|
this.maxSize = maxSize;
|
|
@@ -172,7 +177,7 @@ function parseAdblockRules(filePathOrArray, options = {}) {
|
|
|
172
177
|
compiled = fs.readFileSync(cachePath);
|
|
173
178
|
} catch (err) {
|
|
174
179
|
if (err.code !== 'ENOENT' && enableLogging) {
|
|
175
|
-
console.log(
|
|
180
|
+
console.log(formatLogMessage('debug', `${ADBLOCK_RUST_TAG} Cache read failed (${err.message}); reparsing`));
|
|
176
181
|
}
|
|
177
182
|
}
|
|
178
183
|
if (compiled) {
|
|
@@ -196,7 +201,7 @@ function parseAdblockRules(filePathOrArray, options = {}) {
|
|
|
196
201
|
// Corrupt cache or version mismatch — fall through to a fresh parse.
|
|
197
202
|
engine = null;
|
|
198
203
|
if (enableLogging) {
|
|
199
|
-
console.log(
|
|
204
|
+
console.log(formatLogMessage('debug', `${ADBLOCK_RUST_TAG} Cache deserialize failed (${err.message}); reparsing`));
|
|
200
205
|
}
|
|
201
206
|
}
|
|
202
207
|
}
|
|
@@ -240,7 +245,7 @@ function parseAdblockRules(filePathOrArray, options = {}) {
|
|
|
240
245
|
pruneOldCacheFiles(cacheDir, cacheTtlMs);
|
|
241
246
|
} catch (err) {
|
|
242
247
|
if (enableLogging) {
|
|
243
|
-
console.log(
|
|
248
|
+
console.log(formatLogMessage('warn', `${ADBLOCK_RUST_TAG} Cache write failed (${err.message}); continuing`));
|
|
244
249
|
}
|
|
245
250
|
}
|
|
246
251
|
}
|
|
@@ -262,7 +267,7 @@ function parseAdblockRules(filePathOrArray, options = {}) {
|
|
|
262
267
|
cacheMisses: 0
|
|
263
268
|
};
|
|
264
269
|
|
|
265
|
-
const resultCache = new
|
|
270
|
+
const resultCache = new FIFOCache(resultCacheSize);
|
|
266
271
|
// Hot-path optimization: shared "no_match" object — most checks return this,
|
|
267
272
|
// skip per-call object allocation. Safe because callers only read fields.
|
|
268
273
|
const NO_MATCH = Object.freeze({ blocked: false, rule: null, reason: 'no_match' });
|
|
@@ -275,9 +280,9 @@ function parseAdblockRules(filePathOrArray, options = {}) {
|
|
|
275
280
|
|
|
276
281
|
if (enableLogging) {
|
|
277
282
|
if (cacheHit) {
|
|
278
|
-
console.log(
|
|
283
|
+
console.log(formatLogMessage('debug', `${ADBLOCK_RUST_TAG} Restored compiled engine from ${cachePath} (${(totalBytes/1024/1024).toFixed(2)}MB source, ${filePaths.length} list${filePaths.length>1?'s':''})`));
|
|
279
284
|
} else {
|
|
280
|
-
console.log(
|
|
285
|
+
console.log(formatLogMessage('debug', `${ADBLOCK_RUST_TAG} Compiled ${ruleCount} rules from ${filePaths.length} list${filePaths.length>1?'s':''} (${(totalBytes/1024/1024).toFixed(2)}MB)`));
|
|
281
286
|
}
|
|
282
287
|
}
|
|
283
288
|
|
|
@@ -315,7 +320,7 @@ function parseAdblockRules(filePathOrArray, options = {}) {
|
|
|
315
320
|
} catch (err) {
|
|
316
321
|
stats.errors++;
|
|
317
322
|
if (enableLogging) {
|
|
318
|
-
console.log(
|
|
323
|
+
console.log(formatLogMessage('warn', `${ADBLOCK_RUST_TAG} Error checking ${url}: ${err.message}`));
|
|
319
324
|
}
|
|
320
325
|
// Don't cache errors — next call may succeed (transient native panic).
|
|
321
326
|
return { blocked: false, rule: null, reason: 'error' };
|