@fanboynz/network-scanner 2.0.63 → 2.0.64
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +1 -1
- package/lib/adblock-rust.js +368 -0
- package/nwss.js +44 -11
- package/package.json +6 -5
package/CLAUDE.md
CHANGED
|
@@ -27,7 +27,7 @@ Puppeteer-based network scanner for analyzing web traffic, generating adblock fi
|
|
|
27
27
|
|
|
28
28
|
## Tech Stack
|
|
29
29
|
|
|
30
|
-
- **Node.js** >=
|
|
30
|
+
- **Node.js** >=22.0.0
|
|
31
31
|
- **puppeteer** >=20.0.0 — Headless browser automation
|
|
32
32
|
- **psl** — Public Suffix List for domain parsing
|
|
33
33
|
- **lru-cache** — LRU cache implementation
|
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
// === Adblock Rust Engine Wrapper (adblock-rust.js) ===
|
|
2
|
+
// Drop-in replacement for ./lib/adblock that delegates matching to Brave's
|
|
3
|
+
// adblock-rust engine (npm: adblock-rs) for higher throughput on large lists.
|
|
4
|
+
//
|
|
5
|
+
// Exposes the same parseAdblockRules(filePath, options) factory and the same
|
|
6
|
+
// matcher shape ({ shouldBlock, getStats, rules }) so nwss.js can switch
|
|
7
|
+
// engines with a single require() swap.
|
|
8
|
+
|
|
9
|
+
const fs = require('fs');
|
|
10
|
+
const path = require('path');
|
|
11
|
+
const os = require('os');
|
|
12
|
+
const crypto = require('crypto');
|
|
13
|
+
|
|
14
|
+
let adblockRust = null;
|
|
15
|
+
let adblockRustVersion = null;
|
|
16
|
+
function loadAdblockRust() {
|
|
17
|
+
if (adblockRust) return adblockRust;
|
|
18
|
+
try {
|
|
19
|
+
adblockRust = require('adblock-rs');
|
|
20
|
+
// Read once for the cache key — serialized engine format is not promised
|
|
21
|
+
// stable across versions, so partitioning cache files by version means
|
|
22
|
+
// upgrades cleanly invalidate without producing confusing deserialize
|
|
23
|
+
// failures on the warm path.
|
|
24
|
+
adblockRustVersion = require('adblock-rs/package.json').version;
|
|
25
|
+
} catch (err) {
|
|
26
|
+
throw new Error(
|
|
27
|
+
"adblock-rs is not installed. Install with: npm install adblock-rs " +
|
|
28
|
+
"(requires Rust toolchain for native build). Original error: " + err.message
|
|
29
|
+
);
|
|
30
|
+
}
|
|
31
|
+
return adblockRust;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// Best-effort cleanup of stale serialized engines. Filter lists change roughly
|
|
35
|
+
// monthly; cache files older than this are unlikely to be reused and only cost
|
|
36
|
+
// disk space. Runs once per cold parse and swallows all errors — cleanup
|
|
37
|
+
// failure must never block a scan.
|
|
38
|
+
function pruneOldCacheFiles(cacheDir, maxAgeMs) {
|
|
39
|
+
try {
|
|
40
|
+
const cutoff = Date.now() - maxAgeMs;
|
|
41
|
+
const files = fs.readdirSync(cacheDir);
|
|
42
|
+
for (const name of files) {
|
|
43
|
+
// Only touch our own files; `.tmp` covers stray writes from killed
|
|
44
|
+
// processes. Skip anything else (in case the dir is shared).
|
|
45
|
+
if (!name.endsWith('.bin') && !name.endsWith('.tmp')) continue;
|
|
46
|
+
const full = path.join(cacheDir, name);
|
|
47
|
+
try {
|
|
48
|
+
if (fs.statSync(full).mtimeMs < cutoff) fs.unlinkSync(full);
|
|
49
|
+
} catch (_) { /* file vanished mid-walk — fine */ }
|
|
50
|
+
}
|
|
51
|
+
} catch (_) { /* dir doesn't exist or unreadable — fine */ }
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Map Puppeteer/CDP resource type names to adblock-rust request types.
|
|
55
|
+
// Uses a null-prototype object so lookups skip the prototype chain — small but
|
|
56
|
+
// free win on a hot-path lookup that runs once per network request.
|
|
57
|
+
const RESOURCE_TYPE_MAP = Object.assign(Object.create(null), {
|
|
58
|
+
'document': 'main_frame',
|
|
59
|
+
'subdocument': 'sub_frame',
|
|
60
|
+
'stylesheet': 'stylesheet',
|
|
61
|
+
'script': 'script',
|
|
62
|
+
'image': 'image',
|
|
63
|
+
'font': 'font',
|
|
64
|
+
'media': 'media',
|
|
65
|
+
'texttrack': 'media',
|
|
66
|
+
'xhr': 'xmlhttprequest',
|
|
67
|
+
'fetch': 'xmlhttprequest',
|
|
68
|
+
'xmlhttprequest': 'xmlhttprequest',
|
|
69
|
+
'eventsource': 'other',
|
|
70
|
+
'websocket': 'websocket',
|
|
71
|
+
'manifest': 'other',
|
|
72
|
+
'signedexchange': 'other',
|
|
73
|
+
'ping': 'ping',
|
|
74
|
+
'cspviolationreport': 'other',
|
|
75
|
+
'preflight': 'other',
|
|
76
|
+
'other': 'other',
|
|
77
|
+
'': ''
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
function normalizeResourceType(type) {
|
|
81
|
+
if (!type) return '';
|
|
82
|
+
return RESOURCE_TYPE_MAP[type] || 'other';
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Small FIFO cache keyed on (url \0 sourceUrl \0 resourceType). Despite the
|
|
86
|
+
// class name, eviction is insertion-order, not access-order — `get()` does not
|
|
87
|
+
// promote. For this workload (per-page request bursts whose working set fits
|
|
88
|
+
// in maxSize) FIFO and true LRU produce the same evictions, so the simpler
|
|
89
|
+
// path wins. If cache effectiveness becomes a concern with larger working
|
|
90
|
+
// sets, promote on hit by re-inserting (delete + set).
|
|
91
|
+
class ResultLRU {
|
|
92
|
+
constructor(maxSize) {
|
|
93
|
+
this.cache = new Map();
|
|
94
|
+
this.maxSize = maxSize;
|
|
95
|
+
}
|
|
96
|
+
get(k) { return this.cache.get(k); }
|
|
97
|
+
set(k, v) {
|
|
98
|
+
if (this.cache.size >= this.maxSize) {
|
|
99
|
+
this.cache.delete(this.cache.keys().next().value);
|
|
100
|
+
}
|
|
101
|
+
this.cache.set(k, v);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Build a request-blocking matcher backed by Brave's adblock-rs engine.
|
|
107
|
+
*
|
|
108
|
+
* @param {string|string[]} filePathOrArray - One filter list path, or an array
|
|
109
|
+
* of paths to load in order. Order is significant: it affects rule
|
|
110
|
+
* precedence and the cache key.
|
|
111
|
+
* @param {object} [options]
|
|
112
|
+
* @param {boolean} [options.enableLogging=false] - Print parse + cache events.
|
|
113
|
+
* @param {number} [options.resultCacheSize=32000] - Max entries in the
|
|
114
|
+
* per-matcher result cache (FIFO eviction).
|
|
115
|
+
* @param {boolean} [options.useDiskCache=true] - Persist the compiled engine
|
|
116
|
+
* to disk and reload on next run with the same input lists + library version.
|
|
117
|
+
* @param {string} [options.cacheDir] - Directory for compiled-engine cache
|
|
118
|
+
* files. Defaults to a folder under the OS temp dir.
|
|
119
|
+
* @param {number} [options.cacheTtlMs=2592000000] - Files in cacheDir older
|
|
120
|
+
* than this are pruned during cold parse. Default 30 days.
|
|
121
|
+
* @returns {{shouldBlock: Function, getStats: Function, rules: object}}
|
|
122
|
+
*/
|
|
123
|
+
function parseAdblockRules(filePathOrArray, options = {}) {
|
|
124
|
+
const {
|
|
125
|
+
enableLogging = false,
|
|
126
|
+
resultCacheSize = 32000,
|
|
127
|
+
useDiskCache = true,
|
|
128
|
+
cacheDir = path.join(os.tmpdir(), 'nwss-adblock-rs-cache'),
|
|
129
|
+
cacheTtlMs = 30 * 24 * 60 * 60 * 1000
|
|
130
|
+
} = options;
|
|
131
|
+
const rust = loadAdblockRust();
|
|
132
|
+
|
|
133
|
+
// Accept a single path or an array of paths — caller no longer needs to
|
|
134
|
+
// materialize a temp concatenation file for multi-list scans.
|
|
135
|
+
const filePaths = Array.isArray(filePathOrArray) ? filePathOrArray : [filePathOrArray];
|
|
136
|
+
|
|
137
|
+
// Read all files up front; hash the raw bytes so the disk cache key reflects
|
|
138
|
+
// both content changes and list-order changes. Mix in the adblock-rs version
|
|
139
|
+
// so a library upgrade (which may change the serialized format) doesn't try
|
|
140
|
+
// to deserialize an incompatible blob.
|
|
141
|
+
const buffers = [];
|
|
142
|
+
const hash = crypto.createHash('sha256');
|
|
143
|
+
hash.update('adblock-rs:' + adblockRustVersion + '\0');
|
|
144
|
+
let totalBytes = 0;
|
|
145
|
+
for (const fp of filePaths) {
|
|
146
|
+
let buf;
|
|
147
|
+
try {
|
|
148
|
+
buf = fs.readFileSync(fp);
|
|
149
|
+
} catch (err) {
|
|
150
|
+
throw new Error(`Adblock rules file not found: ${fp}`);
|
|
151
|
+
}
|
|
152
|
+
buffers.push(buf);
|
|
153
|
+
hash.update(buf);
|
|
154
|
+
hash.update('\0');
|
|
155
|
+
totalBytes += buf.length;
|
|
156
|
+
}
|
|
157
|
+
const cacheKey = hash.digest('hex');
|
|
158
|
+
const cachePath = path.join(cacheDir, cacheKey + '.bin');
|
|
159
|
+
|
|
160
|
+
let engine = null;
|
|
161
|
+
let ruleCount = 0;
|
|
162
|
+
let cacheHit = false;
|
|
163
|
+
|
|
164
|
+
// Fast path: deserialize a previously-compiled engine if available.
|
|
165
|
+
// Skip the existsSync/readFileSync double-syscall pattern — let readFileSync
|
|
166
|
+
// throw ENOENT and treat it as a clean cache-miss. Avoids a redundant stat()
|
|
167
|
+
// and the TOCTOU race where the cache file could be removed between the
|
|
168
|
+
// exists check and the read.
|
|
169
|
+
if (useDiskCache) {
|
|
170
|
+
let compiled;
|
|
171
|
+
try {
|
|
172
|
+
compiled = fs.readFileSync(cachePath);
|
|
173
|
+
} catch (err) {
|
|
174
|
+
if (err.code !== 'ENOENT' && enableLogging) {
|
|
175
|
+
console.log(`[Adblock-Rust] Cache read failed (${err.message}); reparsing`);
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
if (compiled) {
|
|
179
|
+
try {
|
|
180
|
+
engine = new rust.Engine(new rust.FilterSet(enableLogging), true);
|
|
181
|
+
// Avoid copying the ~10MB serialized engine when the underlying
|
|
182
|
+
// ArrayBuffer is exclusively ours (true for any read above Node's
|
|
183
|
+
// ~4KB Buffer pool threshold — i.e. always for compiled engines).
|
|
184
|
+
// Fall back to slicing only when the Buffer is a view into a pooled
|
|
185
|
+
// backing store, which would otherwise leak unrelated data.
|
|
186
|
+
const ab = (compiled.byteOffset === 0 &&
|
|
187
|
+
compiled.byteLength === compiled.buffer.byteLength)
|
|
188
|
+
? compiled.buffer
|
|
189
|
+
: compiled.buffer.slice(
|
|
190
|
+
compiled.byteOffset,
|
|
191
|
+
compiled.byteOffset + compiled.byteLength
|
|
192
|
+
);
|
|
193
|
+
engine.deserialize(ab);
|
|
194
|
+
cacheHit = true;
|
|
195
|
+
} catch (err) {
|
|
196
|
+
// Corrupt cache or version mismatch — fall through to a fresh parse.
|
|
197
|
+
engine = null;
|
|
198
|
+
if (enableLogging) {
|
|
199
|
+
console.log(`[Adblock-Rust] Cache deserialize failed (${err.message}); reparsing`);
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
if (!engine) {
|
|
206
|
+
// Slow path: parse every list. Use addFilters per-file so a single bad
|
|
207
|
+
// line in one list does not blast the whole input, and so the per-list
|
|
208
|
+
// line count is correct. Release each buffer's reference as soon as it
|
|
209
|
+
// is consumed so GC can reclaim the file bytes mid-loop instead of
|
|
210
|
+
// holding all input files (~3-5MB combined for easylist+easyprivacy)
|
|
211
|
+
// alive until the function returns.
|
|
212
|
+
const filterSet = new rust.FilterSet(enableLogging);
|
|
213
|
+
for (let i = 0; i < buffers.length; i++) {
|
|
214
|
+
const buf = buffers[i];
|
|
215
|
+
buffers[i] = null;
|
|
216
|
+
const lines = buf.toString('utf-8').split('\n');
|
|
217
|
+
for (let j = 0; j < lines.length; j++) {
|
|
218
|
+
const line = lines[j];
|
|
219
|
+
if (line.length === 0) continue;
|
|
220
|
+
if (line.charCodeAt(0) === 0x21) continue;
|
|
221
|
+
ruleCount++;
|
|
222
|
+
}
|
|
223
|
+
filterSet.addFilters(lines);
|
|
224
|
+
}
|
|
225
|
+
engine = new rust.Engine(filterSet, true);
|
|
226
|
+
|
|
227
|
+
if (useDiskCache) {
|
|
228
|
+
try {
|
|
229
|
+
fs.mkdirSync(cacheDir, { recursive: true });
|
|
230
|
+
const serialized = engine.serialize();
|
|
231
|
+
// Atomic write: writeFileSync to a per-pid tmp path then rename. If
|
|
232
|
+
// the process is killed mid-write we leave a stray .tmp file (cleaned
|
|
233
|
+
// up by the TTL prune on a future run) but the final cachePath is
|
|
234
|
+
// either complete or absent — never half-written.
|
|
235
|
+
const tmpPath = cachePath + '.' + process.pid + '.tmp';
|
|
236
|
+
fs.writeFileSync(tmpPath, Buffer.from(serialized));
|
|
237
|
+
fs.renameSync(tmpPath, cachePath);
|
|
238
|
+
// Best-effort prune of stale cache files. Done after our own write so
|
|
239
|
+
// we never delete the entry we just created.
|
|
240
|
+
pruneOldCacheFiles(cacheDir, cacheTtlMs);
|
|
241
|
+
} catch (err) {
|
|
242
|
+
if (enableLogging) {
|
|
243
|
+
console.log(`[Adblock-Rust] Cache write failed (${err.message}); continuing`);
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
const stats = {
|
|
250
|
+
// When deserialized from cache we don't see the rules; report bytes instead
|
|
251
|
+
// so the startup banner remains informative.
|
|
252
|
+
total: cacheHit ? null : ruleCount,
|
|
253
|
+
bytes: totalBytes,
|
|
254
|
+
engine: 'adblock-rust',
|
|
255
|
+
fromDiskCache: cacheHit,
|
|
256
|
+
listCount: filePaths.length,
|
|
257
|
+
blocked: 0,
|
|
258
|
+
allowed: 0,
|
|
259
|
+
exceptions: 0,
|
|
260
|
+
errors: 0,
|
|
261
|
+
cacheHits: 0,
|
|
262
|
+
cacheMisses: 0
|
|
263
|
+
};
|
|
264
|
+
|
|
265
|
+
const resultCache = new ResultLRU(resultCacheSize);
|
|
266
|
+
// Hot-path optimization: shared "no_match" object — most checks return this,
|
|
267
|
+
// skip per-call object allocation. Safe because callers only read fields.
|
|
268
|
+
const NO_MATCH = Object.freeze({ blocked: false, rule: null, reason: 'no_match' });
|
|
269
|
+
// Bind once: skips the prototype property lookup for `engine.check` on every
|
|
270
|
+
// call. The adblock-rs forwarder still does an internal name concat per
|
|
271
|
+
// invocation; bypassing that further would require reaching into the native
|
|
272
|
+
// binding (engine.boxed + blocker.Engine_check), which is brittle across
|
|
273
|
+
// library versions.
|
|
274
|
+
const engineCheck = engine.check.bind(engine);
|
|
275
|
+
|
|
276
|
+
if (enableLogging) {
|
|
277
|
+
if (cacheHit) {
|
|
278
|
+
console.log(`[Adblock-Rust] Restored compiled engine from ${cachePath} (${(totalBytes/1024/1024).toFixed(2)}MB source, ${filePaths.length} list${filePaths.length>1?'s':''})`);
|
|
279
|
+
} else {
|
|
280
|
+
console.log(`[Adblock-Rust] Compiled ${ruleCount} rules from ${filePaths.length} list${filePaths.length>1?'s':''} (${(totalBytes/1024/1024).toFixed(2)}MB)`);
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
return {
|
|
285
|
+
rules: { stats },
|
|
286
|
+
|
|
287
|
+
shouldBlock(url, sourceUrl, resourceType) {
|
|
288
|
+
// Avoid default-parameter syntax in the hot path — explicit null/undefined
|
|
289
|
+
// checks are slightly cheaper for V8's argument adaptor.
|
|
290
|
+
const src = sourceUrl || '';
|
|
291
|
+
const rt = resourceType || '';
|
|
292
|
+
// Single null-proto object lookup; falls back to 'other' for unknown types.
|
|
293
|
+
const normType = rt ? (RESOURCE_TYPE_MAP[rt] || 'other') : '';
|
|
294
|
+
const key = url + '\0' + src + '\0' + normType;
|
|
295
|
+
const cached = resultCache.get(key);
|
|
296
|
+
if (cached !== undefined) {
|
|
297
|
+
stats.cacheHits++;
|
|
298
|
+
return cached;
|
|
299
|
+
}
|
|
300
|
+
stats.cacheMisses++;
|
|
301
|
+
|
|
302
|
+
// Narrow try/catch to the native call only — keeps the rest of the
|
|
303
|
+
// function on TurboFan's fast path and avoids exception-handler overhead
|
|
304
|
+
// on stats updates and Map operations.
|
|
305
|
+
let result;
|
|
306
|
+
try {
|
|
307
|
+
// Pass empty string (not the request URL) when source is unknown — the
|
|
308
|
+
// engine then skips first/third-party determination instead of treating
|
|
309
|
+
// the request as same-origin to itself, which would suppress
|
|
310
|
+
// $third-party rules entirely.
|
|
311
|
+
// The 4th arg MUST be true: with false adblock-rs returns a bare
|
|
312
|
+
// boolean instead of the {matched, exception, filter, important}
|
|
313
|
+
// object we read below, which silently breaks matching.
|
|
314
|
+
result = engineCheck(url, src, normType, true);
|
|
315
|
+
} catch (err) {
|
|
316
|
+
stats.errors++;
|
|
317
|
+
if (enableLogging) {
|
|
318
|
+
console.log(`[Adblock-Rust] Error checking ${url}: ${err.message}`);
|
|
319
|
+
}
|
|
320
|
+
// Don't cache errors — next call may succeed (transient native panic).
|
|
321
|
+
return { blocked: false, rule: null, reason: 'error' };
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
// engine.check is contract-bound to return an object; no null guard
|
|
325
|
+
// needed. Reading each field once into a local keeps the IC monomorphic.
|
|
326
|
+
let r;
|
|
327
|
+
if (result.matched) {
|
|
328
|
+
const exception = result.exception;
|
|
329
|
+
if (exception) {
|
|
330
|
+
stats.exceptions++;
|
|
331
|
+
r = { blocked: false, rule: exception, reason: 'whitelisted' };
|
|
332
|
+
} else {
|
|
333
|
+
stats.blocked++;
|
|
334
|
+
r = {
|
|
335
|
+
blocked: true,
|
|
336
|
+
rule: result.filter || null,
|
|
337
|
+
reason: result.important ? 'important_rule' : 'adblock_rust'
|
|
338
|
+
};
|
|
339
|
+
}
|
|
340
|
+
} else {
|
|
341
|
+
stats.allowed++;
|
|
342
|
+
r = NO_MATCH;
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
resultCache.set(key, r);
|
|
346
|
+
return r;
|
|
347
|
+
},
|
|
348
|
+
|
|
349
|
+
getStats() {
|
|
350
|
+
const total = stats.cacheHits + stats.cacheMisses;
|
|
351
|
+
const hitRate = total > 0 ? ((stats.cacheHits / total) * 100).toFixed(1) + '%' : '0%';
|
|
352
|
+
return {
|
|
353
|
+
...stats,
|
|
354
|
+
cache: {
|
|
355
|
+
hits: stats.cacheHits,
|
|
356
|
+
misses: stats.cacheMisses,
|
|
357
|
+
hitRate,
|
|
358
|
+
size: resultCache.cache.size,
|
|
359
|
+
maxSize: resultCache.maxSize
|
|
360
|
+
}
|
|
361
|
+
};
|
|
362
|
+
}
|
|
363
|
+
};
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
module.exports = {
|
|
367
|
+
parseAdblockRules
|
|
368
|
+
};
|
package/nwss.js
CHANGED
|
@@ -58,7 +58,8 @@ const { clearSiteData } = require('./lib/clear_sitedata');
|
|
|
58
58
|
// Referrer header generation
|
|
59
59
|
const { getReferrerForUrl, validateReferrerConfig, validateReferrerDisable } = require('./lib/referrer');
|
|
60
60
|
// Adblock rules parser
|
|
61
|
-
const
|
|
61
|
+
const adblockJs = require('./lib/adblock');
|
|
62
|
+
const adblockRust = require('./lib/adblock-rust');
|
|
62
63
|
// WireGuard VPN
|
|
63
64
|
const { connectForSite: wgConnect, disconnectForSite: wgDisconnect, disconnectAll: wgDisconnectAll, validateVpnConfig, normalizeVpnConfig } = require('./lib/wireguard_vpn');
|
|
64
65
|
// OpenVPN
|
|
@@ -594,6 +595,22 @@ if (validateRules || validateRulesFile) {
|
|
|
594
595
|
}
|
|
595
596
|
}
|
|
596
597
|
|
|
598
|
+
// Parse --adblock-engine=<js|rust> (default: js). Selects the matcher backend
|
|
599
|
+
// used by --block-ads. The rust engine requires the optional adblock-rs package.
|
|
600
|
+
const adblockEngineIndex = args.findIndex(arg => arg.startsWith('--adblock-engine'));
|
|
601
|
+
let adblockEngineName = 'js';
|
|
602
|
+
if (adblockEngineIndex !== -1) {
|
|
603
|
+
const engineArg = args[adblockEngineIndex].includes('=')
|
|
604
|
+
? args[adblockEngineIndex].split('=')[1]
|
|
605
|
+
: args[adblockEngineIndex + 1];
|
|
606
|
+
if (engineArg === 'rust' || engineArg === 'js') {
|
|
607
|
+
adblockEngineName = engineArg;
|
|
608
|
+
} else {
|
|
609
|
+
console.log(`Error: --adblock-engine must be 'js' or 'rust' (got: ${engineArg})`);
|
|
610
|
+
process.exit(1);
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
|
|
597
614
|
// Parse --block-ads argument for request-level ad blocking (supports comma-separated lists)
|
|
598
615
|
const blockAdsIndex = args.findIndex(arg => arg.startsWith('--block-ads'));
|
|
599
616
|
if (blockAdsIndex !== -1) {
|
|
@@ -614,18 +631,31 @@ if (blockAdsIndex !== -1) {
|
|
|
614
631
|
}
|
|
615
632
|
}
|
|
616
633
|
|
|
617
|
-
// Concatenate multiple lists into a single temp file for the parser
|
|
618
|
-
let rulesFile = rulesFiles[0];
|
|
619
|
-
if (rulesFiles.length > 1) {
|
|
620
|
-
rulesFile = path.join(os.tmpdir(), `nwss-adblock-combined-${Date.now()}.txt`);
|
|
621
|
-
const combined = rulesFiles.map(f => fs.readFileSync(f, 'utf-8')).join('\n');
|
|
622
|
-
fs.writeFileSync(rulesFile, combined);
|
|
623
|
-
}
|
|
624
|
-
|
|
625
634
|
adblockEnabled = true;
|
|
626
|
-
|
|
635
|
+
const engine = adblockEngineName === 'rust' ? adblockRust : adblockJs;
|
|
636
|
+
try {
|
|
637
|
+
if (engine === adblockRust) {
|
|
638
|
+
// Rust wrapper accepts an array directly — no temp file needed.
|
|
639
|
+
adblockMatcher = engine.parseAdblockRules(rulesFiles, { enableLogging: forceDebug });
|
|
640
|
+
} else {
|
|
641
|
+
// JS engine takes a single path; concat to a temp file when multiple lists.
|
|
642
|
+
let rulesFile = rulesFiles[0];
|
|
643
|
+
if (rulesFiles.length > 1) {
|
|
644
|
+
rulesFile = path.join(os.tmpdir(), `nwss-adblock-combined-${Date.now()}.txt`);
|
|
645
|
+
const combined = rulesFiles.map(f => fs.readFileSync(f, 'utf-8')).join('\n');
|
|
646
|
+
fs.writeFileSync(rulesFile, combined);
|
|
647
|
+
}
|
|
648
|
+
adblockMatcher = engine.parseAdblockRules(rulesFile, { enableLogging: forceDebug });
|
|
649
|
+
}
|
|
650
|
+
} catch (err) {
|
|
651
|
+
console.log(`Error: Failed to load adblock engine '${adblockEngineName}': ${err.message}`);
|
|
652
|
+
process.exit(1);
|
|
653
|
+
}
|
|
627
654
|
const stats = adblockMatcher.getStats();
|
|
628
|
-
|
|
655
|
+
const ruleDesc = stats.total != null
|
|
656
|
+
? `${stats.total} blocking rules`
|
|
657
|
+
: `compiled engine (cached)`;
|
|
658
|
+
if (!silentMode) console.log(messageColors.success(`Adblock enabled (${adblockEngineName}): Loaded ${ruleDesc} from ${rulesFiles.length} list${rulesFiles.length > 1 ? 's' : ''}`));
|
|
629
659
|
}
|
|
630
660
|
|
|
631
661
|
if (args.includes('--help') || args.includes('-h')) {
|
|
@@ -651,6 +681,9 @@ Output Format Options:
|
|
|
651
681
|
Request Blocking:
|
|
652
682
|
--block-ads=<file> Block ads/trackers using EasyList format rules (||domain.com^, /ads/*, etc)
|
|
653
683
|
Works at request-level for maximum performance
|
|
684
|
+
Supports comma-separated lists: --block-ads=easylist.txt,easyprivacy.txt
|
|
685
|
+
--adblock-engine=<js|rust> Matcher backend for --block-ads (default: js)
|
|
686
|
+
'rust' uses Brave's adblock-rs (faster on large lists; needs: npm i adblock-rs)
|
|
654
687
|
|
|
655
688
|
Per-config settings file (.nwssconfig):
|
|
656
689
|
Place a .nwssconfig file in the project root to define per-config settings.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fanboynz/network-scanner",
|
|
3
|
-
"version": "2.0.
|
|
3
|
+
"version": "2.0.64",
|
|
4
4
|
"description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
|
|
5
5
|
"main": "nwss.js",
|
|
6
6
|
"scripts": {
|
|
@@ -11,8 +11,8 @@
|
|
|
11
11
|
},
|
|
12
12
|
"dependencies": {
|
|
13
13
|
"ghost-cursor": "^1.4.2",
|
|
14
|
-
"lru-cache": "^
|
|
15
|
-
"p-limit": "^
|
|
14
|
+
"lru-cache": "^11.3.5",
|
|
15
|
+
"p-limit": "^7.3.0",
|
|
16
16
|
"psl": "^1.15.0",
|
|
17
17
|
"puppeteer": ">=20.0.0"
|
|
18
18
|
},
|
|
@@ -36,7 +36,7 @@
|
|
|
36
36
|
"author": "FanboyNZ",
|
|
37
37
|
"license": "GPL-3.0",
|
|
38
38
|
"engines": {
|
|
39
|
-
"node": ">=
|
|
39
|
+
"node": ">=22.0.0"
|
|
40
40
|
},
|
|
41
41
|
"repository": {
|
|
42
42
|
"type": "git",
|
|
@@ -50,10 +50,11 @@
|
|
|
50
50
|
},
|
|
51
51
|
"homepage": "https://github.com/ryanbr/network-scanner",
|
|
52
52
|
"optionalDependencies": {
|
|
53
|
+
"adblock-rs": "^0.12.3",
|
|
53
54
|
"puppeteer-core": ">=20.0.0"
|
|
54
55
|
},
|
|
55
56
|
"devDependencies": {
|
|
56
57
|
"eslint": "^10.0.2",
|
|
57
|
-
"globals": "^
|
|
58
|
+
"globals": "^17.6.0"
|
|
58
59
|
}
|
|
59
60
|
}
|