@fanboynz/network-scanner 2.0.63 → 2.0.64

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CLAUDE.md CHANGED
@@ -27,7 +27,7 @@ Puppeteer-based network scanner for analyzing web traffic, generating adblock fi
27
27
 
28
28
  ## Tech Stack
29
29
 
30
- - **Node.js** >=20.0.0
30
+ - **Node.js** >=22.0.0
31
31
  - **puppeteer** >=20.0.0 — Headless browser automation
32
32
  - **psl** — Public Suffix List for domain parsing
33
33
  - **lru-cache** — LRU cache implementation
@@ -0,0 +1,368 @@
1
+ // === Adblock Rust Engine Wrapper (adblock-rust.js) ===
2
+ // Drop-in replacement for ./lib/adblock that delegates matching to Brave's
3
+ // adblock-rust engine (npm: adblock-rs) for higher throughput on large lists.
4
+ //
5
+ // Exposes the same parseAdblockRules(filePath, options) factory and the same
6
+ // matcher shape ({ shouldBlock, getStats, rules }) so nwss.js can switch
7
+ // engines with a single require() swap.
8
+
9
+ const fs = require('fs');
10
+ const path = require('path');
11
+ const os = require('os');
12
+ const crypto = require('crypto');
13
+
14
+ let adblockRust = null;
15
+ let adblockRustVersion = null;
16
+ function loadAdblockRust() {
17
+ if (adblockRust) return adblockRust;
18
+ try {
19
+ adblockRust = require('adblock-rs');
20
+ // Read once for the cache key — serialized engine format is not promised
21
+ // stable across versions, so partitioning cache files by version means
22
+ // upgrades cleanly invalidate without producing confusing deserialize
23
+ // failures on the warm path.
24
+ adblockRustVersion = require('adblock-rs/package.json').version;
25
+ } catch (err) {
26
+ throw new Error(
27
+ "adblock-rs is not installed. Install with: npm install adblock-rs " +
28
+ "(requires Rust toolchain for native build). Original error: " + err.message
29
+ );
30
+ }
31
+ return adblockRust;
32
+ }
33
+
34
+ // Best-effort cleanup of stale serialized engines. Filter lists change roughly
35
+ // monthly; cache files older than this are unlikely to be reused and only cost
36
+ // disk space. Runs once per cold parse and swallows all errors — cleanup
37
+ // failure must never block a scan.
38
+ function pruneOldCacheFiles(cacheDir, maxAgeMs) {
39
+ try {
40
+ const cutoff = Date.now() - maxAgeMs;
41
+ const files = fs.readdirSync(cacheDir);
42
+ for (const name of files) {
43
+ // Only touch our own files; `.tmp` covers stray writes from killed
44
+ // processes. Skip anything else (in case the dir is shared).
45
+ if (!name.endsWith('.bin') && !name.endsWith('.tmp')) continue;
46
+ const full = path.join(cacheDir, name);
47
+ try {
48
+ if (fs.statSync(full).mtimeMs < cutoff) fs.unlinkSync(full);
49
+ } catch (_) { /* file vanished mid-walk — fine */ }
50
+ }
51
+ } catch (_) { /* dir doesn't exist or unreadable — fine */ }
52
+ }
53
+
54
+ // Map Puppeteer/CDP resource type names to adblock-rust request types.
55
+ // Uses a null-prototype object so lookups skip the prototype chain — small but
56
+ // free win on a hot-path lookup that runs once per network request.
57
+ const RESOURCE_TYPE_MAP = Object.assign(Object.create(null), {
58
+ 'document': 'main_frame',
59
+ 'subdocument': 'sub_frame',
60
+ 'stylesheet': 'stylesheet',
61
+ 'script': 'script',
62
+ 'image': 'image',
63
+ 'font': 'font',
64
+ 'media': 'media',
65
+ 'texttrack': 'media',
66
+ 'xhr': 'xmlhttprequest',
67
+ 'fetch': 'xmlhttprequest',
68
+ 'xmlhttprequest': 'xmlhttprequest',
69
+ 'eventsource': 'other',
70
+ 'websocket': 'websocket',
71
+ 'manifest': 'other',
72
+ 'signedexchange': 'other',
73
+ 'ping': 'ping',
74
+ 'cspviolationreport': 'other',
75
+ 'preflight': 'other',
76
+ 'other': 'other',
77
+ '': ''
78
+ });
79
+
80
+ function normalizeResourceType(type) {
81
+ if (!type) return '';
82
+ return RESOURCE_TYPE_MAP[type] || 'other';
83
+ }
84
+
85
+ // Small FIFO cache keyed on (url \0 sourceUrl \0 resourceType). Despite the
86
+ // class name, eviction is insertion-order, not access-order — `get()` does not
87
+ // promote. For this workload (per-page request bursts whose working set fits
88
+ // in maxSize) FIFO and true LRU produce the same evictions, so the simpler
89
+ // path wins. If cache effectiveness becomes a concern with larger working
90
+ // sets, promote on hit by re-inserting (delete + set).
91
+ class ResultLRU {
92
+ constructor(maxSize) {
93
+ this.cache = new Map();
94
+ this.maxSize = maxSize;
95
+ }
96
+ get(k) { return this.cache.get(k); }
97
+ set(k, v) {
98
+ if (this.cache.size >= this.maxSize) {
99
+ this.cache.delete(this.cache.keys().next().value);
100
+ }
101
+ this.cache.set(k, v);
102
+ }
103
+ }
104
+
105
+ /**
106
+ * Build a request-blocking matcher backed by Brave's adblock-rs engine.
107
+ *
108
+ * @param {string|string[]} filePathOrArray - One filter list path, or an array
109
+ * of paths to load in order. Order is significant: it affects rule
110
+ * precedence and the cache key.
111
+ * @param {object} [options]
112
+ * @param {boolean} [options.enableLogging=false] - Print parse + cache events.
113
+ * @param {number} [options.resultCacheSize=32000] - Max entries in the
114
+ * per-matcher result cache (FIFO eviction).
115
+ * @param {boolean} [options.useDiskCache=true] - Persist the compiled engine
116
+ * to disk and reload on next run with the same input lists + library version.
117
+ * @param {string} [options.cacheDir] - Directory for compiled-engine cache
118
+ * files. Defaults to a folder under the OS temp dir.
119
+ * @param {number} [options.cacheTtlMs=2592000000] - Files in cacheDir older
120
+ * than this are pruned during cold parse. Default 30 days.
121
+ * @returns {{shouldBlock: Function, getStats: Function, rules: object}}
122
+ */
123
+ function parseAdblockRules(filePathOrArray, options = {}) {
124
+ const {
125
+ enableLogging = false,
126
+ resultCacheSize = 32000,
127
+ useDiskCache = true,
128
+ cacheDir = path.join(os.tmpdir(), 'nwss-adblock-rs-cache'),
129
+ cacheTtlMs = 30 * 24 * 60 * 60 * 1000
130
+ } = options;
131
+ const rust = loadAdblockRust();
132
+
133
+ // Accept a single path or an array of paths — caller no longer needs to
134
+ // materialize a temp concatenation file for multi-list scans.
135
+ const filePaths = Array.isArray(filePathOrArray) ? filePathOrArray : [filePathOrArray];
136
+
137
+ // Read all files up front; hash the raw bytes so the disk cache key reflects
138
+ // both content changes and list-order changes. Mix in the adblock-rs version
139
+ // so a library upgrade (which may change the serialized format) doesn't try
140
+ // to deserialize an incompatible blob.
141
+ const buffers = [];
142
+ const hash = crypto.createHash('sha256');
143
+ hash.update('adblock-rs:' + adblockRustVersion + '\0');
144
+ let totalBytes = 0;
145
+ for (const fp of filePaths) {
146
+ let buf;
147
+ try {
148
+ buf = fs.readFileSync(fp);
149
+ } catch (err) {
150
+ throw new Error(`Adblock rules file not found: ${fp}`);
151
+ }
152
+ buffers.push(buf);
153
+ hash.update(buf);
154
+ hash.update('\0');
155
+ totalBytes += buf.length;
156
+ }
157
+ const cacheKey = hash.digest('hex');
158
+ const cachePath = path.join(cacheDir, cacheKey + '.bin');
159
+
160
+ let engine = null;
161
+ let ruleCount = 0;
162
+ let cacheHit = false;
163
+
164
+ // Fast path: deserialize a previously-compiled engine if available.
165
+ // Skip the existsSync/readFileSync double-syscall pattern — let readFileSync
166
+ // throw ENOENT and treat it as a clean cache-miss. Avoids a redundant stat()
167
+ // and the TOCTOU race where the cache file could be removed between the
168
+ // exists check and the read.
169
+ if (useDiskCache) {
170
+ let compiled;
171
+ try {
172
+ compiled = fs.readFileSync(cachePath);
173
+ } catch (err) {
174
+ if (err.code !== 'ENOENT' && enableLogging) {
175
+ console.log(`[Adblock-Rust] Cache read failed (${err.message}); reparsing`);
176
+ }
177
+ }
178
+ if (compiled) {
179
+ try {
180
+ engine = new rust.Engine(new rust.FilterSet(enableLogging), true);
181
+ // Avoid copying the ~10MB serialized engine when the underlying
182
+ // ArrayBuffer is exclusively ours (true for any read above Node's
183
+ // ~4KB Buffer pool threshold — i.e. always for compiled engines).
184
+ // Fall back to slicing only when the Buffer is a view into a pooled
185
+ // backing store, which would otherwise leak unrelated data.
186
+ const ab = (compiled.byteOffset === 0 &&
187
+ compiled.byteLength === compiled.buffer.byteLength)
188
+ ? compiled.buffer
189
+ : compiled.buffer.slice(
190
+ compiled.byteOffset,
191
+ compiled.byteOffset + compiled.byteLength
192
+ );
193
+ engine.deserialize(ab);
194
+ cacheHit = true;
195
+ } catch (err) {
196
+ // Corrupt cache or version mismatch — fall through to a fresh parse.
197
+ engine = null;
198
+ if (enableLogging) {
199
+ console.log(`[Adblock-Rust] Cache deserialize failed (${err.message}); reparsing`);
200
+ }
201
+ }
202
+ }
203
+ }
204
+
205
+ if (!engine) {
206
+ // Slow path: parse every list. Use addFilters per-file so a single bad
207
+ // line in one list does not blast the whole input, and so the per-list
208
+ // line count is correct. Release each buffer's reference as soon as it
209
+ // is consumed so GC can reclaim the file bytes mid-loop instead of
210
+ // holding all input files (~3-5MB combined for easylist+easyprivacy)
211
+ // alive until the function returns.
212
+ const filterSet = new rust.FilterSet(enableLogging);
213
+ for (let i = 0; i < buffers.length; i++) {
214
+ const buf = buffers[i];
215
+ buffers[i] = null;
216
+ const lines = buf.toString('utf-8').split('\n');
217
+ for (let j = 0; j < lines.length; j++) {
218
+ const line = lines[j];
219
+ if (line.length === 0) continue;
220
+ if (line.charCodeAt(0) === 0x21) continue;
221
+ ruleCount++;
222
+ }
223
+ filterSet.addFilters(lines);
224
+ }
225
+ engine = new rust.Engine(filterSet, true);
226
+
227
+ if (useDiskCache) {
228
+ try {
229
+ fs.mkdirSync(cacheDir, { recursive: true });
230
+ const serialized = engine.serialize();
231
+ // Atomic write: writeFileSync to a per-pid tmp path then rename. If
232
+ // the process is killed mid-write we leave a stray .tmp file (cleaned
233
+ // up by the TTL prune on a future run) but the final cachePath is
234
+ // either complete or absent — never half-written.
235
+ const tmpPath = cachePath + '.' + process.pid + '.tmp';
236
+ fs.writeFileSync(tmpPath, Buffer.from(serialized));
237
+ fs.renameSync(tmpPath, cachePath);
238
+ // Best-effort prune of stale cache files. Done after our own write so
239
+ // we never delete the entry we just created.
240
+ pruneOldCacheFiles(cacheDir, cacheTtlMs);
241
+ } catch (err) {
242
+ if (enableLogging) {
243
+ console.log(`[Adblock-Rust] Cache write failed (${err.message}); continuing`);
244
+ }
245
+ }
246
+ }
247
+ }
248
+
249
+ const stats = {
250
+ // When deserialized from cache we don't see the rules; report bytes instead
251
+ // so the startup banner remains informative.
252
+ total: cacheHit ? null : ruleCount,
253
+ bytes: totalBytes,
254
+ engine: 'adblock-rust',
255
+ fromDiskCache: cacheHit,
256
+ listCount: filePaths.length,
257
+ blocked: 0,
258
+ allowed: 0,
259
+ exceptions: 0,
260
+ errors: 0,
261
+ cacheHits: 0,
262
+ cacheMisses: 0
263
+ };
264
+
265
+ const resultCache = new ResultLRU(resultCacheSize);
266
+ // Hot-path optimization: shared "no_match" object — most checks return this,
267
+ // skip per-call object allocation. Safe because callers only read fields.
268
+ const NO_MATCH = Object.freeze({ blocked: false, rule: null, reason: 'no_match' });
269
+ // Bind once: skips the prototype property lookup for `engine.check` on every
270
+ // call. The adblock-rs forwarder still does an internal name concat per
271
+ // invocation; bypassing that further would require reaching into the native
272
+ // binding (engine.boxed + blocker.Engine_check), which is brittle across
273
+ // library versions.
274
+ const engineCheck = engine.check.bind(engine);
275
+
276
+ if (enableLogging) {
277
+ if (cacheHit) {
278
+ console.log(`[Adblock-Rust] Restored compiled engine from ${cachePath} (${(totalBytes/1024/1024).toFixed(2)}MB source, ${filePaths.length} list${filePaths.length>1?'s':''})`);
279
+ } else {
280
+ console.log(`[Adblock-Rust] Compiled ${ruleCount} rules from ${filePaths.length} list${filePaths.length>1?'s':''} (${(totalBytes/1024/1024).toFixed(2)}MB)`);
281
+ }
282
+ }
283
+
284
+ return {
285
+ rules: { stats },
286
+
287
+ shouldBlock(url, sourceUrl, resourceType) {
288
+ // Avoid default-parameter syntax in the hot path — explicit null/undefined
289
+ // checks are slightly cheaper for V8's argument adaptor.
290
+ const src = sourceUrl || '';
291
+ const rt = resourceType || '';
292
+ // Single null-proto object lookup; falls back to 'other' for unknown types.
293
+ const normType = rt ? (RESOURCE_TYPE_MAP[rt] || 'other') : '';
294
+ const key = url + '\0' + src + '\0' + normType;
295
+ const cached = resultCache.get(key);
296
+ if (cached !== undefined) {
297
+ stats.cacheHits++;
298
+ return cached;
299
+ }
300
+ stats.cacheMisses++;
301
+
302
+ // Narrow try/catch to the native call only — keeps the rest of the
303
+ // function on TurboFan's fast path and avoids exception-handler overhead
304
+ // on stats updates and Map operations.
305
+ let result;
306
+ try {
307
+ // Pass empty string (not the request URL) when source is unknown — the
308
+ // engine then skips first/third-party determination instead of treating
309
+ // the request as same-origin to itself, which would suppress
310
+ // $third-party rules entirely.
311
+ // The 4th arg MUST be true: with false adblock-rs returns a bare
312
+ // boolean instead of the {matched, exception, filter, important}
313
+ // object we read below, which silently breaks matching.
314
+ result = engineCheck(url, src, normType, true);
315
+ } catch (err) {
316
+ stats.errors++;
317
+ if (enableLogging) {
318
+ console.log(`[Adblock-Rust] Error checking ${url}: ${err.message}`);
319
+ }
320
+ // Don't cache errors — next call may succeed (transient native panic).
321
+ return { blocked: false, rule: null, reason: 'error' };
322
+ }
323
+
324
+ // engine.check is contract-bound to return an object; no null guard
325
+ // needed. Reading each field once into a local keeps the IC monomorphic.
326
+ let r;
327
+ if (result.matched) {
328
+ const exception = result.exception;
329
+ if (exception) {
330
+ stats.exceptions++;
331
+ r = { blocked: false, rule: exception, reason: 'whitelisted' };
332
+ } else {
333
+ stats.blocked++;
334
+ r = {
335
+ blocked: true,
336
+ rule: result.filter || null,
337
+ reason: result.important ? 'important_rule' : 'adblock_rust'
338
+ };
339
+ }
340
+ } else {
341
+ stats.allowed++;
342
+ r = NO_MATCH;
343
+ }
344
+
345
+ resultCache.set(key, r);
346
+ return r;
347
+ },
348
+
349
+ getStats() {
350
+ const total = stats.cacheHits + stats.cacheMisses;
351
+ const hitRate = total > 0 ? ((stats.cacheHits / total) * 100).toFixed(1) + '%' : '0%';
352
+ return {
353
+ ...stats,
354
+ cache: {
355
+ hits: stats.cacheHits,
356
+ misses: stats.cacheMisses,
357
+ hitRate,
358
+ size: resultCache.cache.size,
359
+ maxSize: resultCache.maxSize
360
+ }
361
+ };
362
+ }
363
+ };
364
+ }
365
+
366
+ module.exports = {
367
+ parseAdblockRules
368
+ };
package/nwss.js CHANGED
@@ -58,7 +58,8 @@ const { clearSiteData } = require('./lib/clear_sitedata');
58
58
  // Referrer header generation
59
59
  const { getReferrerForUrl, validateReferrerConfig, validateReferrerDisable } = require('./lib/referrer');
60
60
  // Adblock rules parser
61
- const { parseAdblockRules } = require('./lib/adblock');
61
+ const adblockJs = require('./lib/adblock');
62
+ const adblockRust = require('./lib/adblock-rust');
62
63
  // WireGuard VPN
63
64
  const { connectForSite: wgConnect, disconnectForSite: wgDisconnect, disconnectAll: wgDisconnectAll, validateVpnConfig, normalizeVpnConfig } = require('./lib/wireguard_vpn');
64
65
  // OpenVPN
@@ -594,6 +595,22 @@ if (validateRules || validateRulesFile) {
594
595
  }
595
596
  }
596
597
 
598
+ // Parse --adblock-engine=<js|rust> (default: js). Selects the matcher backend
599
+ // used by --block-ads. The rust engine requires the optional adblock-rs package.
600
+ const adblockEngineIndex = args.findIndex(arg => arg.startsWith('--adblock-engine'));
601
+ let adblockEngineName = 'js';
602
+ if (adblockEngineIndex !== -1) {
603
+ const engineArg = args[adblockEngineIndex].includes('=')
604
+ ? args[adblockEngineIndex].split('=')[1]
605
+ : args[adblockEngineIndex + 1];
606
+ if (engineArg === 'rust' || engineArg === 'js') {
607
+ adblockEngineName = engineArg;
608
+ } else {
609
+ console.log(`Error: --adblock-engine must be 'js' or 'rust' (got: ${engineArg})`);
610
+ process.exit(1);
611
+ }
612
+ }
613
+
597
614
  // Parse --block-ads argument for request-level ad blocking (supports comma-separated lists)
598
615
  const blockAdsIndex = args.findIndex(arg => arg.startsWith('--block-ads'));
599
616
  if (blockAdsIndex !== -1) {
@@ -614,18 +631,31 @@ if (blockAdsIndex !== -1) {
614
631
  }
615
632
  }
616
633
 
617
- // Concatenate multiple lists into a single temp file for the parser
618
- let rulesFile = rulesFiles[0];
619
- if (rulesFiles.length > 1) {
620
- rulesFile = path.join(os.tmpdir(), `nwss-adblock-combined-${Date.now()}.txt`);
621
- const combined = rulesFiles.map(f => fs.readFileSync(f, 'utf-8')).join('\n');
622
- fs.writeFileSync(rulesFile, combined);
623
- }
624
-
625
634
  adblockEnabled = true;
626
- adblockMatcher = parseAdblockRules(rulesFile, { enableLogging: forceDebug });
635
+ const engine = adblockEngineName === 'rust' ? adblockRust : adblockJs;
636
+ try {
637
+ if (engine === adblockRust) {
638
+ // Rust wrapper accepts an array directly — no temp file needed.
639
+ adblockMatcher = engine.parseAdblockRules(rulesFiles, { enableLogging: forceDebug });
640
+ } else {
641
+ // JS engine takes a single path; concat to a temp file when multiple lists.
642
+ let rulesFile = rulesFiles[0];
643
+ if (rulesFiles.length > 1) {
644
+ rulesFile = path.join(os.tmpdir(), `nwss-adblock-combined-${Date.now()}.txt`);
645
+ const combined = rulesFiles.map(f => fs.readFileSync(f, 'utf-8')).join('\n');
646
+ fs.writeFileSync(rulesFile, combined);
647
+ }
648
+ adblockMatcher = engine.parseAdblockRules(rulesFile, { enableLogging: forceDebug });
649
+ }
650
+ } catch (err) {
651
+ console.log(`Error: Failed to load adblock engine '${adblockEngineName}': ${err.message}`);
652
+ process.exit(1);
653
+ }
627
654
  const stats = adblockMatcher.getStats();
628
- if (!silentMode) console.log(messageColors.success(`Adblock enabled: Loaded ${stats.total} blocking rules from ${rulesFiles.length} list${rulesFiles.length > 1 ? 's' : ''}`));
655
+ const ruleDesc = stats.total != null
656
+ ? `${stats.total} blocking rules`
657
+ : `compiled engine (cached)`;
658
+ if (!silentMode) console.log(messageColors.success(`Adblock enabled (${adblockEngineName}): Loaded ${ruleDesc} from ${rulesFiles.length} list${rulesFiles.length > 1 ? 's' : ''}`));
629
659
  }
630
660
 
631
661
  if (args.includes('--help') || args.includes('-h')) {
@@ -651,6 +681,9 @@ Output Format Options:
651
681
  Request Blocking:
652
682
  --block-ads=<file> Block ads/trackers using EasyList format rules (||domain.com^, /ads/*, etc)
653
683
  Works at request-level for maximum performance
684
+ Supports comma-separated lists: --block-ads=easylist.txt,easyprivacy.txt
685
+ --adblock-engine=<js|rust> Matcher backend for --block-ads (default: js)
686
+ 'rust' uses Brave's adblock-rs (faster on large lists; needs: npm i adblock-rs)
654
687
 
655
688
  Per-config settings file (.nwssconfig):
656
689
  Place a .nwssconfig file in the project root to define per-config settings.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fanboynz/network-scanner",
3
- "version": "2.0.63",
3
+ "version": "2.0.64",
4
4
  "description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
5
5
  "main": "nwss.js",
6
6
  "scripts": {
@@ -11,8 +11,8 @@
11
11
  },
12
12
  "dependencies": {
13
13
  "ghost-cursor": "^1.4.2",
14
- "lru-cache": "^10.4.3",
15
- "p-limit": "^4.0.0",
14
+ "lru-cache": "^11.3.5",
15
+ "p-limit": "^7.3.0",
16
16
  "psl": "^1.15.0",
17
17
  "puppeteer": ">=20.0.0"
18
18
  },
@@ -36,7 +36,7 @@
36
36
  "author": "FanboyNZ",
37
37
  "license": "GPL-3.0",
38
38
  "engines": {
39
- "node": ">=20.0.0"
39
+ "node": ">=22.0.0"
40
40
  },
41
41
  "repository": {
42
42
  "type": "git",
@@ -50,10 +50,11 @@
50
50
  },
51
51
  "homepage": "https://github.com/ryanbr/network-scanner",
52
52
  "optionalDependencies": {
53
+ "adblock-rs": "^0.12.3",
53
54
  "puppeteer-core": ">=20.0.0"
54
55
  },
55
56
  "devDependencies": {
56
57
  "eslint": "^10.0.2",
57
- "globals": "^16.3.0"
58
+ "globals": "^17.6.0"
58
59
  }
59
60
  }