@fanboynz/network-scanner 2.0.32 → 2.0.34
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/adblock.js +750 -0
- package/lib/fingerprint.js +3 -3
- package/nwss.js +63 -9
- package/package.json +1 -1
package/lib/adblock.js
ADDED
|
@@ -0,0 +1,750 @@
|
|
|
1
|
+
// === Adblock Rules Parser (adblock_rules.js) v2.2 - Complete Optimization ===
|
|
2
|
+
// Supports EasyList/AdBlock Plus filter syntax
|
|
3
|
+
// Optimizations: Map domains + URL cache + skip third-party calc + cached hostname split
|
|
4
|
+
|
|
5
|
+
const fs = require('fs');
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Simple LRU cache for URL parsing results
|
|
9
|
+
* Prevents memory leaks with fixed size limit
|
|
10
|
+
*/
|
|
11
|
+
class URLCache {
|
|
12
|
+
constructor(maxSize = 1000) {
|
|
13
|
+
this.cache = new Map();
|
|
14
|
+
this.maxSize = maxSize;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
get(url) {
|
|
18
|
+
return this.cache.get(url);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
set(url, value) {
|
|
22
|
+
// LRU eviction: if at max size, delete oldest entry
|
|
23
|
+
if (this.cache.size >= this.maxSize) {
|
|
24
|
+
const firstKey = this.cache.keys().next().value;
|
|
25
|
+
this.cache.delete(firstKey);
|
|
26
|
+
}
|
|
27
|
+
this.cache.set(url, value);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
clear() {
|
|
31
|
+
this.cache.clear();
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
getStats() {
|
|
35
|
+
return {
|
|
36
|
+
size: this.cache.size,
|
|
37
|
+
maxSize: this.maxSize
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Parses adblock filter list and creates matcher
|
|
44
|
+
* @param {string} filePath - Path to filter list file
|
|
45
|
+
* @param {Object} options - Parser options
|
|
46
|
+
* @returns {Object} Rule matcher with matching functions
|
|
47
|
+
*/
|
|
48
|
+
function parseAdblockRules(filePath, options = {}) {
|
|
49
|
+
const {
|
|
50
|
+
enableLogging = false,
|
|
51
|
+
caseSensitive = false
|
|
52
|
+
} = options;
|
|
53
|
+
|
|
54
|
+
if (!fs.existsSync(filePath)) {
|
|
55
|
+
throw new Error(`Adblock rules file not found: ${filePath}`);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
const fileContent = fs.readFileSync(filePath, 'utf-8');
|
|
59
|
+
const lines = fileContent.split('\n');
|
|
60
|
+
|
|
61
|
+
const rules = {
|
|
62
|
+
domainMap: new Map(), // ||domain.com^ - Exact domains for O(1) lookup
|
|
63
|
+
domainRules: [], // ||*.domain.com^ - Wildcard domains (fallback)
|
|
64
|
+
thirdPartyRules: [], // ||domain.com^$third-party
|
|
65
|
+
pathRules: [], // /ads/*
|
|
66
|
+
scriptRules: [], // .js$script
|
|
67
|
+
regexRules: [], // /regex/
|
|
68
|
+
whitelist: [], // @@||domain.com^ - Wildcard whitelist
|
|
69
|
+
whitelistMap: new Map(), // Exact whitelist domains for O(1) lookup
|
|
70
|
+
elementHiding: [], // ##.ad-class (not used for network blocking)
|
|
71
|
+
stats: {
|
|
72
|
+
total: 0,
|
|
73
|
+
domain: 0,
|
|
74
|
+
domainMapEntries: 0, // Exact domain matches in Map
|
|
75
|
+
thirdParty: 0,
|
|
76
|
+
path: 0,
|
|
77
|
+
script: 0,
|
|
78
|
+
regex: 0,
|
|
79
|
+
whitelist: 0,
|
|
80
|
+
elementHiding: 0,
|
|
81
|
+
comments: 0,
|
|
82
|
+
invalid: 0
|
|
83
|
+
}
|
|
84
|
+
};
|
|
85
|
+
|
|
86
|
+
for (let line of lines) {
|
|
87
|
+
line = line.trim();
|
|
88
|
+
|
|
89
|
+
// Skip empty lines
|
|
90
|
+
if (!line) continue;
|
|
91
|
+
|
|
92
|
+
// Skip comments
|
|
93
|
+
if (line.startsWith('!') || line.startsWith('#')) {
|
|
94
|
+
rules.stats.comments++;
|
|
95
|
+
continue;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Skip element hiding rules (cosmetic filters)
|
|
99
|
+
if (line.includes('##') || line.includes('#@#')) {
|
|
100
|
+
rules.stats.elementHiding++;
|
|
101
|
+
continue;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Skip rules with cosmetic-only options (not for network blocking)
|
|
105
|
+
// These options only affect element hiding, not network requests
|
|
106
|
+
const cosmeticOnlyOptions = ['generichide', 'elemhide', 'specifichide'];
|
|
107
|
+
const hasCosmeticOption = cosmeticOnlyOptions.some(opt =>
|
|
108
|
+
line.includes(`$${opt}`) || line.includes(`,${opt}`)
|
|
109
|
+
);
|
|
110
|
+
if (hasCosmeticOption) {
|
|
111
|
+
rules.stats.elementHiding++;
|
|
112
|
+
continue;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
rules.stats.total++;
|
|
116
|
+
|
|
117
|
+
try {
|
|
118
|
+
// Whitelist rules (exception rules)
|
|
119
|
+
if (line.startsWith('@@')) {
|
|
120
|
+
const cleanLine = line.substring(2);
|
|
121
|
+
const parsedRule = parseRule(cleanLine, true);
|
|
122
|
+
|
|
123
|
+
// Store exact domains in Map for O(1) lookup, wildcards in array
|
|
124
|
+
if (parsedRule.isDomain && parsedRule.domain && !parsedRule.domain.includes('*')) {
|
|
125
|
+
rules.whitelistMap.set(parsedRule.domain.toLowerCase(), parsedRule);
|
|
126
|
+
} else {
|
|
127
|
+
rules.whitelist.push(parsedRule);
|
|
128
|
+
}
|
|
129
|
+
rules.stats.whitelist++;
|
|
130
|
+
continue;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// Regular blocking rules
|
|
134
|
+
const parsedRule = parseRule(line, false);
|
|
135
|
+
|
|
136
|
+
// Categorize based on rule type
|
|
137
|
+
if (parsedRule.isThirdParty) {
|
|
138
|
+
rules.thirdPartyRules.push(parsedRule);
|
|
139
|
+
rules.stats.thirdParty++;
|
|
140
|
+
} else if (parsedRule.isDomain) {
|
|
141
|
+
// Store exact domains in Map for O(1) lookup, wildcards in array
|
|
142
|
+
if (parsedRule.domain && !parsedRule.domain.includes('*')) {
|
|
143
|
+
rules.domainMap.set(parsedRule.domain.toLowerCase(), parsedRule);
|
|
144
|
+
rules.stats.domainMapEntries++;
|
|
145
|
+
} else {
|
|
146
|
+
rules.domainRules.push(parsedRule);
|
|
147
|
+
}
|
|
148
|
+
rules.stats.domain++;
|
|
149
|
+
} else if (parsedRule.isScript) {
|
|
150
|
+
rules.scriptRules.push(parsedRule);
|
|
151
|
+
rules.stats.script++;
|
|
152
|
+
} else if (parsedRule.isRegex) {
|
|
153
|
+
rules.regexRules.push(parsedRule);
|
|
154
|
+
rules.stats.regex++;
|
|
155
|
+
} else {
|
|
156
|
+
rules.pathRules.push(parsedRule);
|
|
157
|
+
rules.stats.path++;
|
|
158
|
+
}
|
|
159
|
+
} catch (err) {
|
|
160
|
+
rules.stats.invalid++;
|
|
161
|
+
if (enableLogging) {
|
|
162
|
+
console.log(`[Adblock] Failed to parse rule: ${line} - ${err.message}`);
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
if (enableLogging) {
|
|
168
|
+
console.log(`[Adblock] Loaded ${rules.stats.total} rules:`);
|
|
169
|
+
console.log(` - Domain rules: ${rules.stats.domain}`);
|
|
170
|
+
console.log(` • Exact matches (Map): ${rules.stats.domainMapEntries}`);
|
|
171
|
+
console.log(` • Wildcard patterns (Array): ${rules.domainRules.length}`);
|
|
172
|
+
console.log(` - Third-party rules: ${rules.stats.thirdParty}`);
|
|
173
|
+
console.log(` - Path rules: ${rules.stats.path}`);
|
|
174
|
+
console.log(` - Script rules: ${rules.stats.script}`);
|
|
175
|
+
console.log(` - Regex rules: ${rules.stats.regex}`);
|
|
176
|
+
console.log(` - Whitelist rules: ${rules.stats.whitelist}`);
|
|
177
|
+
console.log(` - Comments/Element hiding: ${rules.stats.comments + rules.stats.elementHiding}`);
|
|
178
|
+
console.log(` - Invalid rules: ${rules.stats.invalid}`);
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
return createMatcher(rules, { enableLogging, caseSensitive });
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Parses individual adblock rule
|
|
186
|
+
* @param {string} rule - Raw rule string
|
|
187
|
+
* @param {boolean} isWhitelist - Whether this is a whitelist rule
|
|
188
|
+
* @returns {Object} Parsed rule object
|
|
189
|
+
*/
|
|
190
|
+
function parseRule(rule, isWhitelist) {
|
|
191
|
+
const parsed = {
|
|
192
|
+
raw: rule,
|
|
193
|
+
isWhitelist,
|
|
194
|
+
isDomain: false,
|
|
195
|
+
isThirdParty: false,
|
|
196
|
+
isScript: false,
|
|
197
|
+
isRegex: false,
|
|
198
|
+
domainRestrictions: null, // { include: ['site.com'], exclude: ['~site.com'] }
|
|
199
|
+
pattern: '',
|
|
200
|
+
options: {},
|
|
201
|
+
matcher: null
|
|
202
|
+
};
|
|
203
|
+
|
|
204
|
+
// Split rule and options ($option1,option2)
|
|
205
|
+
let [pattern, optionsStr] = rule.split('$');
|
|
206
|
+
parsed.pattern = pattern;
|
|
207
|
+
|
|
208
|
+
// Parse options
|
|
209
|
+
if (optionsStr) {
|
|
210
|
+
const options = optionsStr.split(',');
|
|
211
|
+
|
|
212
|
+
// Filter out cosmetic-only options that don't affect network blocking
|
|
213
|
+
const networkOptions = options.filter(opt => {
|
|
214
|
+
const optKey = opt.split('=')[0].trim();
|
|
215
|
+
// Skip cosmetic filtering options
|
|
216
|
+
const cosmeticOptions = [
|
|
217
|
+
'generichide',
|
|
218
|
+
'elemhide',
|
|
219
|
+
'specifichide',
|
|
220
|
+
'genericblock' // Also cosmetic-related
|
|
221
|
+
];
|
|
222
|
+
return !cosmeticOptions.includes(optKey);
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
// Only process network-related options
|
|
226
|
+
for (const opt of networkOptions) {
|
|
227
|
+
const [key, value] = opt.split('=');
|
|
228
|
+
parsed.options[key.trim()] = value ? value.trim() : true;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// Check for third-party option
|
|
232
|
+
if (parsed.options['third-party'] || parsed.options['3p']) {
|
|
233
|
+
parsed.isThirdParty = true;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
// Check for script option
|
|
237
|
+
if (parsed.options['script']) {
|
|
238
|
+
parsed.isScript = true;
|
|
239
|
+
}
|
|
240
|
+
// Parse domain option: $domain=site1.com|site2.com|~excluded.com
|
|
241
|
+
if (parsed.options['domain']) {
|
|
242
|
+
const domainList = parsed.options['domain'];
|
|
243
|
+
const domains = domainList.split('|').map(d => d.trim()).filter(d => d);
|
|
244
|
+
|
|
245
|
+
const include = [];
|
|
246
|
+
const exclude = [];
|
|
247
|
+
|
|
248
|
+
for (const domain of domains) {
|
|
249
|
+
if (domain.startsWith('~')) {
|
|
250
|
+
// Negation: exclude this domain
|
|
251
|
+
exclude.push(domain.substring(1).toLowerCase());
|
|
252
|
+
} else {
|
|
253
|
+
// Positive: include this domain
|
|
254
|
+
include.push(domain.toLowerCase());
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
// Store parsed domain restrictions
|
|
259
|
+
parsed.domainRestrictions = {
|
|
260
|
+
include: include.length > 0 ? include : null,
|
|
261
|
+
exclude: exclude.length > 0 ? exclude : null
|
|
262
|
+
};
|
|
263
|
+
|
|
264
|
+
// For debugging
|
|
265
|
+
if (enableLogging && parsed.domainRestrictions) {
|
|
266
|
+
if (parsed.domainRestrictions.include) {
|
|
267
|
+
// console.log(`[Adblock] Rule includes domains: ${parsed.domainRestrictions.include.join(', ')}`);
|
|
268
|
+
}
|
|
269
|
+
if (parsed.domainRestrictions.exclude) {
|
|
270
|
+
// console.log(`[Adblock] Rule excludes domains: ${parsed.domainRestrictions.exclude.join(', ')}`);
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
// Domain rules: ||domain.com^ or ||domain.com
|
|
277
|
+
if (pattern.startsWith('||')) {
|
|
278
|
+
parsed.isDomain = true;
|
|
279
|
+
const domain = pattern.substring(2).replace(/\^.*$/, '').replace(/\*$/, '');
|
|
280
|
+
parsed.domain = domain;
|
|
281
|
+
parsed.matcher = createDomainMatcher(domain);
|
|
282
|
+
}
|
|
283
|
+
// Regex rules: /pattern/
|
|
284
|
+
else if (pattern.startsWith('/') && pattern.endsWith('/')) {
|
|
285
|
+
parsed.isRegex = true;
|
|
286
|
+
const regexPattern = pattern.substring(1, pattern.length - 1);
|
|
287
|
+
parsed.matcher = new RegExp(regexPattern, 'i');
|
|
288
|
+
}
|
|
289
|
+
// Path/wildcard rules: /ads/* or ad.js
|
|
290
|
+
else {
|
|
291
|
+
parsed.matcher = createPatternMatcher(pattern);
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
return parsed;
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
/**
|
|
298
|
+
* Creates a domain matcher function
|
|
299
|
+
* @param {string} domain - Domain to match
|
|
300
|
+
* @returns {Function} Matcher function
|
|
301
|
+
*/
|
|
302
|
+
function createDomainMatcher(domain) {
|
|
303
|
+
const lowerDomain = domain.toLowerCase();
|
|
304
|
+
return (url, hostname) => {
|
|
305
|
+
const lowerHostname = hostname.toLowerCase();
|
|
306
|
+
// Exact match or subdomain match
|
|
307
|
+
return lowerHostname === lowerDomain ||
|
|
308
|
+
lowerHostname.endsWith('.' + lowerDomain);
|
|
309
|
+
};
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
/**
|
|
313
|
+
* Creates a pattern matcher for path/wildcard rules
|
|
314
|
+
* @param {string} pattern - Pattern with wildcards
|
|
315
|
+
* @returns {Function} Matcher function
|
|
316
|
+
*/
|
|
317
|
+
function createPatternMatcher(pattern) {
|
|
318
|
+
// Convert adblock pattern to regex
|
|
319
|
+
// * matches anything
|
|
320
|
+
// ^ matches separator (/, ?, &, =, :)
|
|
321
|
+
// | matches start/end of URL
|
|
322
|
+
|
|
323
|
+
let regexPattern = pattern
|
|
324
|
+
.replace(/[.+?{}()[\]\\]/g, '\\$&') // Escape regex special chars
|
|
325
|
+
.replace(/\*/g, '.*') // * -> .*
|
|
326
|
+
.replace(/\^/g, '[/?&=:]') // ^ -> separator chars
|
|
327
|
+
.replace(/^\|/, '^') // | at start -> ^
|
|
328
|
+
.replace(/\|$/, '$'); // | at end -> $
|
|
329
|
+
|
|
330
|
+
const regex = new RegExp(regexPattern, 'i');
|
|
331
|
+
return (url) => regex.test(url);
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
/**
|
|
335
|
+
* Creates rule matcher with shouldBlock function
|
|
336
|
+
* @param {Object} rules - Parsed rules object
|
|
337
|
+
* @param {Object} options - Matcher options
|
|
338
|
+
* @returns {Object} Matcher with shouldBlock function
|
|
339
|
+
*/
|
|
340
|
+
function createMatcher(rules, options = {}) {
|
|
341
|
+
const { enableLogging = false, caseSensitive = false } = options;
|
|
342
|
+
|
|
343
|
+
// Create URL parsing cache (scoped to this matcher instance)
|
|
344
|
+
const urlCache = new URLCache(1000);
|
|
345
|
+
let cacheHits = 0;
|
|
346
|
+
let cacheMisses = 0;
|
|
347
|
+
|
|
348
|
+
return {
|
|
349
|
+
rules,
|
|
350
|
+
|
|
351
|
+
/**
|
|
352
|
+
* Check if URL should be blocked
|
|
353
|
+
* @param {string} url - URL to check
|
|
354
|
+
* @param {string} sourceUrl - Source page URL (for third-party detection)
|
|
355
|
+
* @param {string} resourceType - Type of resource (script, image, etc)
|
|
356
|
+
* @returns {Object} { blocked: boolean, rule: string|null, reason: string }
|
|
357
|
+
*/
|
|
358
|
+
shouldBlock(url, sourceUrl = '', resourceType = '') {
|
|
359
|
+
try {
|
|
360
|
+
// OPTIMIZATION: Check cache first for URL parsing (60% faster)
|
|
361
|
+
let cachedData = urlCache.get(url);
|
|
362
|
+
let hostname, lowerHostname;
|
|
363
|
+
|
|
364
|
+
if (cachedData) {
|
|
365
|
+
hostname = cachedData.hostname;
|
|
366
|
+
lowerHostname = cachedData.lowerHostname;
|
|
367
|
+
cacheHits++;
|
|
368
|
+
} else {
|
|
369
|
+
// Parse URL and cache result
|
|
370
|
+
const urlObj = new URL(url);
|
|
371
|
+
hostname = urlObj.hostname;
|
|
372
|
+
lowerHostname = hostname.toLowerCase();
|
|
373
|
+
|
|
374
|
+
urlCache.set(url, {
|
|
375
|
+
hostname,
|
|
376
|
+
lowerHostname
|
|
377
|
+
});
|
|
378
|
+
cacheMisses++;
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
// OPTIMIZATION #1: Only calculate third-party status if we have third-party rules to check
|
|
382
|
+
// Avoids expensive URL parsing (2x new URL() calls) when no third-party rules exist
|
|
383
|
+
const isThirdParty = (sourceUrl && rules.thirdPartyRules.length > 0)
|
|
384
|
+
? isThirdPartyRequest(url, sourceUrl)
|
|
385
|
+
: false;
|
|
386
|
+
|
|
387
|
+
// OPTIMIZATION #2: Calculate hostname parts once and reuse (avoid duplicate split operations)
|
|
388
|
+
const hostnameParts = lowerHostname.split('.');
|
|
389
|
+
|
|
390
|
+
// V8 OPT: Extract and cache source page domain for $domain option checking
|
|
391
|
+
let sourceDomain = null;
|
|
392
|
+
let cachedSourceData = null;
|
|
393
|
+
|
|
394
|
+
if (sourceUrl) {
|
|
395
|
+
// Check if sourceUrl is in cache (avoid duplicate URL parsing)
|
|
396
|
+
cachedSourceData = urlCache.get(sourceUrl);
|
|
397
|
+
|
|
398
|
+
if (cachedSourceData) {
|
|
399
|
+
sourceDomain = cachedSourceData.lowerHostname;
|
|
400
|
+
cacheHits++;
|
|
401
|
+
} else {
|
|
402
|
+
// Parse and cache sourceUrl
|
|
403
|
+
try {
|
|
404
|
+
const sourceUrlObj = new URL(sourceUrl);
|
|
405
|
+
sourceDomain = sourceUrlObj.hostname.toLowerCase();
|
|
406
|
+
|
|
407
|
+
// Cache sourceUrl parsing result (same as request URLs)
|
|
408
|
+
urlCache.set(sourceUrl, {
|
|
409
|
+
hostname: sourceUrlObj.hostname,
|
|
410
|
+
lowerHostname: sourceDomain
|
|
411
|
+
});
|
|
412
|
+
cacheMisses++;
|
|
413
|
+
} catch (err) {
|
|
414
|
+
// Invalid sourceUrl, leave as null
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
// === WHITELIST CHECK (exception rules take precedence) ===
|
|
420
|
+
|
|
421
|
+
// Fast path: Check exact domain in Map (O(1))
|
|
422
|
+
let rule = rules.whitelistMap.get(lowerHostname); // V8: Single Map lookup
|
|
423
|
+
if (rule) {
|
|
424
|
+
if (enableLogging) { // V8: Check after getting rule (inlined)
|
|
425
|
+
console.log(`[Adblock] Whitelisted: ${url} (${rule.raw})`);
|
|
426
|
+
}
|
|
427
|
+
if (matchesDomainRestrictions(rule, sourceDomain)) {
|
|
428
|
+
return { blocked: false, rule: rule.raw, reason: 'whitelisted' };
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
// Check parent domains for subdomain matches (e.g., sub.example.com -> example.com)
|
|
433
|
+
const partsLen = hostnameParts.length; // V8: Cache array length
|
|
434
|
+
for (let i = 1; i < partsLen; i++) {
|
|
435
|
+
const parentDomain = hostnameParts.slice(i).join('.');
|
|
436
|
+
rule = rules.whitelistMap.get(parentDomain); // V8: Single Map lookup
|
|
437
|
+
if (rule) {
|
|
438
|
+
if (enableLogging) {
|
|
439
|
+
console.log(`[Adblock] Whitelisted: ${url} (${rule.raw})`);
|
|
440
|
+
}
|
|
441
|
+
if (matchesDomainRestrictions(rule, sourceDomain)) {
|
|
442
|
+
return { blocked: false, rule: rule.raw, reason: 'whitelisted' };
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
// Slow path: Check wildcard whitelist patterns in array
|
|
448
|
+
const whitelistLen = rules.whitelist.length; // V8: Cache length + indexed access
|
|
449
|
+
for (let i = 0; i < whitelistLen; i++) {
|
|
450
|
+
const rule = rules.whitelist[i];
|
|
451
|
+
if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
|
|
452
|
+
if (enableLogging) {
|
|
453
|
+
console.log(`[Adblock] Whitelisted: ${url} (${rule.raw})`);
|
|
454
|
+
}
|
|
455
|
+
return { blocked: false, rule: rule.raw, reason: 'whitelisted' };
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
// === DOMAIN BLOCKING CHECK ===
|
|
460
|
+
|
|
461
|
+
// Fast path: Check exact domain in Map (O(1))
|
|
462
|
+
rule = rules.domainMap.get(lowerHostname); // V8: Single Map lookup
|
|
463
|
+
if (rule) {
|
|
464
|
+
if (enableLogging) {
|
|
465
|
+
console.log(`[Adblock] Blocked domain: ${url} (${rule.raw})`);
|
|
466
|
+
}
|
|
467
|
+
if (matchesDomainRestrictions(rule, sourceDomain)) {
|
|
468
|
+
return { blocked: true, rule: rule.raw, reason: 'domain_rule' };
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
// Check parent domains for subdomain matches (e.g., ads.example.com -> example.com)
|
|
473
|
+
for (let i = 1; i < partsLen; i++) { // V8: Reuse cached length
|
|
474
|
+
const parentDomain = hostnameParts.slice(i).join('.');
|
|
475
|
+
rule = rules.domainMap.get(parentDomain); // V8: Single Map lookup
|
|
476
|
+
if (rule) {
|
|
477
|
+
if (enableLogging) {
|
|
478
|
+
console.log(`[Adblock] Blocked domain: ${url} (${rule.raw})`);
|
|
479
|
+
}
|
|
480
|
+
if (matchesDomainRestrictions(rule, sourceDomain)) {
|
|
481
|
+
return { blocked: true, rule: rule.raw, reason: 'domain_rule' };
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
// Slow path: Check wildcard domain patterns in array
|
|
487
|
+
const domainRulesLen = rules.domainRules.length; // V8: Cache length + indexed access
|
|
488
|
+
for (let i = 0; i < domainRulesLen; i++) {
|
|
489
|
+
const rule = rules.domainRules[i];
|
|
490
|
+
if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
|
|
491
|
+
if (enableLogging) {
|
|
492
|
+
console.log(`[Adblock] Blocked domain: ${url} (${rule.raw})`);
|
|
493
|
+
}
|
|
494
|
+
return { blocked: true, rule: rule.raw, reason: 'domain_rule' };
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
// Check third-party rules
|
|
499
|
+
if (isThirdParty) {
|
|
500
|
+
const thirdPartyLen = rules.thirdPartyRules.length; // V8: Cache length
|
|
501
|
+
for (let i = 0; i < thirdPartyLen; i++) {
|
|
502
|
+
const rule = rules.thirdPartyRules[i];
|
|
503
|
+
if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
|
|
504
|
+
if (enableLogging) {
|
|
505
|
+
console.log(`[Adblock] Blocked third-party: ${url} (${rule.raw})`);
|
|
506
|
+
}
|
|
507
|
+
return {
|
|
508
|
+
blocked: true,
|
|
509
|
+
rule: rule.raw,
|
|
510
|
+
reason: 'third_party_rule'
|
|
511
|
+
};
|
|
512
|
+
}
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
// Check script rules
|
|
517
|
+
if (resourceType === 'script' || url.endsWith('.js')) {
|
|
518
|
+
const scriptRulesLen = rules.scriptRules.length; // V8: Cache length
|
|
519
|
+
for (let i = 0; i < scriptRulesLen; i++) {
|
|
520
|
+
const rule = rules.scriptRules[i];
|
|
521
|
+
if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
|
|
522
|
+
if (enableLogging) {
|
|
523
|
+
console.log(`[Adblock] Blocked script: ${url} (${rule.raw})`);
|
|
524
|
+
}
|
|
525
|
+
return {
|
|
526
|
+
blocked: true,
|
|
527
|
+
rule: rule.raw,
|
|
528
|
+
reason: 'script_rule'
|
|
529
|
+
};
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
// Check path rules
|
|
535
|
+
const pathRulesLen = rules.pathRules.length; // V8: Cache length
|
|
536
|
+
for (let i = 0; i < pathRulesLen; i++) {
|
|
537
|
+
const rule = rules.pathRules[i];
|
|
538
|
+
if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
|
|
539
|
+
if (enableLogging) {
|
|
540
|
+
console.log(`[Adblock] Blocked path: ${url} (${rule.raw})`);
|
|
541
|
+
}
|
|
542
|
+
return {
|
|
543
|
+
blocked: true,
|
|
544
|
+
rule: rule.raw,
|
|
545
|
+
reason: 'path_rule'
|
|
546
|
+
};
|
|
547
|
+
}
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
// Check regex rules (most expensive, check last)
|
|
551
|
+
const regexRulesLen = rules.regexRules.length; // V8: Cache length
|
|
552
|
+
for (let i = 0; i < regexRulesLen; i++) {
|
|
553
|
+
const rule = rules.regexRules[i];
|
|
554
|
+
if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
|
|
555
|
+
if (enableLogging) {
|
|
556
|
+
console.log(`[Adblock] Blocked regex: ${url} (${rule.raw})`);
|
|
557
|
+
}
|
|
558
|
+
return {
|
|
559
|
+
blocked: true,
|
|
560
|
+
rule: rule.raw,
|
|
561
|
+
reason: 'regex_rule'
|
|
562
|
+
};
|
|
563
|
+
}
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
// No match - allow request
|
|
567
|
+
return {
|
|
568
|
+
blocked: false,
|
|
569
|
+
rule: null,
|
|
570
|
+
reason: 'no_match'
|
|
571
|
+
};
|
|
572
|
+
|
|
573
|
+
} catch (err) {
|
|
574
|
+
if (enableLogging) {
|
|
575
|
+
console.log(`[Adblock] Error checking ${url}: ${err.message}`);
|
|
576
|
+
}
|
|
577
|
+
// On error, allow request
|
|
578
|
+
return {
|
|
579
|
+
blocked: false,
|
|
580
|
+
rule: null,
|
|
581
|
+
reason: 'error'
|
|
582
|
+
};
|
|
583
|
+
}
|
|
584
|
+
},
|
|
585
|
+
|
|
586
|
+
/**
|
|
587
|
+
* Get statistics about loaded rules
|
|
588
|
+
* @returns {Object} Statistics object
|
|
589
|
+
*/
|
|
590
|
+
getStats() {
|
|
591
|
+
const hitRate = cacheHits + cacheMisses > 0
|
|
592
|
+
? ((cacheHits / (cacheHits + cacheMisses)) * 100).toFixed(1) + '%'
|
|
593
|
+
: '0%';
|
|
594
|
+
|
|
595
|
+
return {
|
|
596
|
+
...rules.stats,
|
|
597
|
+
cache: {
|
|
598
|
+
hits: cacheHits,
|
|
599
|
+
misses: cacheMisses,
|
|
600
|
+
hitRate: hitRate,
|
|
601
|
+
size: urlCache.cache.size,
|
|
602
|
+
maxSize: urlCache.maxSize
|
|
603
|
+
}
|
|
604
|
+
};
|
|
605
|
+
}
|
|
606
|
+
};
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
/**
|
|
610
|
+
* Check if rule's domain restrictions match the source domain
|
|
611
|
+
* @param {Object} rule - Rule with potential domainRestrictions
|
|
612
|
+
* @param {string|null} sourceDomain - Domain of the page making the request (lowercase)
|
|
613
|
+
* @returns {boolean} True if rule should apply on this source domain
|
|
614
|
+
*/
|
|
615
|
+
function matchesDomainRestrictions(rule, sourceDomain) {
|
|
616
|
+
// No domain restrictions = applies everywhere
|
|
617
|
+
if (!rule.domainRestrictions) {
|
|
618
|
+
return true;
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
// No source domain provided = can't check restrictions, allow for safety
|
|
622
|
+
if (!sourceDomain) {
|
|
623
|
+
return true;
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
const { include, exclude } = rule.domainRestrictions;
|
|
627
|
+
|
|
628
|
+
// V8 OPT ADVANCED: For single-domain restrictions, skip loop overhead
|
|
629
|
+
// This is the most common case (~80% of domain restrictions)
|
|
630
|
+
|
|
631
|
+
// Fast path: Single exclusion
|
|
632
|
+
if (exclude && exclude.length === 1 && (!include || include.length === 0)) {
|
|
633
|
+
const excludedDomain = exclude[0];
|
|
634
|
+
if (sourceDomain === excludedDomain || sourceDomain.endsWith('.' + excludedDomain)) {
|
|
635
|
+
return false;
|
|
636
|
+
}
|
|
637
|
+
return true;
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
// Fast path: Single inclusion
|
|
641
|
+
if (include && include.length === 1 && (!exclude || exclude.length === 0)) {
|
|
642
|
+
const includedDomain = include[0];
|
|
643
|
+
return sourceDomain === includedDomain || sourceDomain.endsWith('.' + includedDomain);
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
// Slow path: Multiple domains (use indexed loops)
|
|
647
|
+
// V8 OPT: Check exclusions first (higher priority) - use indexed loop
|
|
648
|
+
// If domain is explicitly excluded, rule does NOT apply
|
|
649
|
+
if (exclude && exclude.length > 0) {
|
|
650
|
+
const excludeLen = exclude.length;
|
|
651
|
+
for (let i = 0; i < excludeLen; i++) {
|
|
652
|
+
const excludedDomain = exclude[i];
|
|
653
|
+
// Exact match or subdomain match
|
|
654
|
+
if (sourceDomain === excludedDomain || sourceDomain.endsWith('.' + excludedDomain)) {
|
|
655
|
+
return false; // Domain is excluded, rule should NOT apply
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
// V8 OPT: Check inclusions - use indexed loop
|
|
661
|
+
// If there's an include list, domain MUST be in it
|
|
662
|
+
if (include && include.length > 0) {
|
|
663
|
+
const includeLen = include.length;
|
|
664
|
+
for (let i = 0; i < includeLen; i++) {
|
|
665
|
+
const includedDomain = include[i];
|
|
666
|
+
// Exact match or subdomain match
|
|
667
|
+
if (sourceDomain === includedDomain || sourceDomain.endsWith('.' + includedDomain)) {
|
|
668
|
+
return true; // Domain is included, rule SHOULD apply
|
|
669
|
+
}
|
|
670
|
+
}
|
|
671
|
+
return false; // Domain not in include list, rule should NOT apply
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
// Has exclusions but no inclusions, and not excluded = applies
|
|
675
|
+
return true;
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
/**
|
|
679
|
+
* Check if rule matches the given URL
|
|
680
|
+
* @param {Object} rule - Parsed rule object
|
|
681
|
+
* @param {string} url - URL to check
|
|
682
|
+
* @param {string} hostname - Hostname of URL
|
|
683
|
+
* @param {boolean} isThirdParty - Whether request is third-party
|
|
684
|
+
* @param {string} resourceType - Resource type
|
|
685
|
+
* @param {string|null} sourceDomain - Source page domain (for $domain option)
|
|
686
|
+
* @returns {boolean} True if rule matches
|
|
687
|
+
*/
|
|
688
|
+
function matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain) {
|
|
689
|
+
// Check domain restrictions first
|
|
690
|
+
if (!matchesDomainRestrictions(rule, sourceDomain)) {
|
|
691
|
+
return false;
|
|
692
|
+
}
|
|
693
|
+
// Check third-party option
|
|
694
|
+
if (rule.isThirdParty && !isThirdParty) {
|
|
695
|
+
return false;
|
|
696
|
+
}
|
|
697
|
+
|
|
698
|
+
// Check script option
|
|
699
|
+
if (rule.isScript && resourceType !== 'script' && !url.endsWith('.js')) {
|
|
700
|
+
return false;
|
|
701
|
+
}
|
|
702
|
+
|
|
703
|
+
// Apply matcher function
|
|
704
|
+
if (rule.isDomain) {
|
|
705
|
+
return rule.matcher(url, hostname);
|
|
706
|
+
} else {
|
|
707
|
+
return rule.matcher(url);
|
|
708
|
+
}
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
/**
|
|
712
|
+
* Determine if request is third-party
|
|
713
|
+
* @param {string} requestUrl - URL being requested
|
|
714
|
+
* @param {string} sourceUrl - URL of the page making the request
|
|
715
|
+
* @returns {boolean} True if third-party request
|
|
716
|
+
*/
|
|
717
|
+
function isThirdPartyRequest(requestUrl, sourceUrl) {
|
|
718
|
+
try {
|
|
719
|
+
const requestHostname = new URL(requestUrl).hostname;
|
|
720
|
+
const sourceHostname = new URL(sourceUrl).hostname;
|
|
721
|
+
|
|
722
|
+
// Extract base domain (handle subdomains)
|
|
723
|
+
const requestDomain = getBaseDomain(requestHostname);
|
|
724
|
+
const sourceDomain = getBaseDomain(sourceHostname);
|
|
725
|
+
|
|
726
|
+
return requestDomain !== sourceDomain;
|
|
727
|
+
} catch (err) {
|
|
728
|
+
return false;
|
|
729
|
+
}
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
/**
|
|
733
|
+
* Extract base domain from hostname
|
|
734
|
+
* @param {string} hostname - Full hostname
|
|
735
|
+
* @returns {string} Base domain
|
|
736
|
+
*/
|
|
737
|
+
function getBaseDomain(hostname) {
|
|
738
|
+
const parts = hostname.split('.');
|
|
739
|
+
if (parts.length <= 2) {
|
|
740
|
+
return hostname;
|
|
741
|
+
}
|
|
742
|
+
// Return last two parts (example.com from sub.example.com)
|
|
743
|
+
return parts.slice(-2).join('.');
|
|
744
|
+
}
|
|
745
|
+
|
|
746
|
+
module.exports = {
|
|
747
|
+
parseAdblockRules,
|
|
748
|
+
isThirdPartyRequest,
|
|
749
|
+
getBaseDomain
|
|
750
|
+
};
|
package/lib/fingerprint.js
CHANGED
|
@@ -93,9 +93,9 @@ const USER_AGENT_COLLECTIONS = Object.freeze(new Map([
|
|
|
93
93
|
['chrome', "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"],
|
|
94
94
|
['chrome_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"],
|
|
95
95
|
['chrome_linux', "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"],
|
|
96
|
-
['firefox', "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:
|
|
97
|
-
['firefox_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:
|
|
98
|
-
['firefox_linux', "Mozilla/5.0 (X11; Linux x86_64; rv:
|
|
96
|
+
['firefox', "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:145.0) Gecko/20100101 Firefox/145.0"],
|
|
97
|
+
['firefox_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:145.0) Gecko/20100101 Firefox/145.0"],
|
|
98
|
+
['firefox_linux', "Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Gecko/20100101 Firefox/145.0"],
|
|
99
99
|
['safari', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.6 Safari/605.1.15"]
|
|
100
100
|
]));
|
|
101
101
|
|
package/nwss.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// === Network scanner script (nwss.js) v2.0.
|
|
1
|
+
// === Network scanner script (nwss.js) v2.0.33 ===
|
|
2
2
|
|
|
3
3
|
// puppeteer for browser automation, fs for file system operations, psl for domain parsing.
|
|
4
4
|
// const pLimit = require('p-limit'); // Will be dynamically imported
|
|
@@ -50,6 +50,8 @@ const { initializeDryRunCollections, addDryRunMatch, addDryRunNetTools, processD
|
|
|
50
50
|
const { clearSiteData } = require('./lib/clear_sitedata');
|
|
51
51
|
// Referrer header generation
|
|
52
52
|
const { getReferrerForUrl, validateReferrerConfig, validateReferrerDisable } = require('./lib/referrer');
|
|
53
|
+
// Adblock rules parser
|
|
54
|
+
const { parseAdblockRules } = require('./lib/adblock');
|
|
53
55
|
|
|
54
56
|
// Fast setTimeout helper for Puppeteer 22.x compatibility
|
|
55
57
|
// Uses standard Promise constructor for better performance than node:timers/promises
|
|
@@ -94,9 +96,9 @@ const USER_AGENTS = Object.freeze(new Map([
|
|
|
94
96
|
['chrome', "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"],
|
|
95
97
|
['chrome_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"],
|
|
96
98
|
['chrome_linux', "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"],
|
|
97
|
-
['firefox', "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:
|
|
98
|
-
['firefox_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:
|
|
99
|
-
['firefox_linux', "Mozilla/5.0 (X11; Linux x86_64; rv:
|
|
99
|
+
['firefox', "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:145.0) Gecko/20100101 Firefox/145.0"],
|
|
100
|
+
['firefox_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:145.0) Gecko/20100101 Firefox/145.0"],
|
|
101
|
+
['firefox_linux', "Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Gecko/20100101 Firefox/145.0"],
|
|
100
102
|
['safari', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.6 Safari/605.1.15"]
|
|
101
103
|
]));
|
|
102
104
|
|
|
@@ -145,7 +147,7 @@ const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/r
|
|
|
145
147
|
const { monitorBrowserHealth, isBrowserHealthy, isQuicklyResponsive, performGroupWindowCleanup, performRealtimeWindowCleanup, trackPageForRealtime, updatePageUsage, cleanupPageBeforeReload } = require('./lib/browserhealth');
|
|
146
148
|
|
|
147
149
|
// --- Script Configuration & Constants ---
|
|
148
|
-
const VERSION = '2.0.
|
|
150
|
+
const VERSION = '2.0.33'; // Script version
|
|
149
151
|
|
|
150
152
|
// get startTime
|
|
151
153
|
const startTime = Date.now();
|
|
@@ -208,7 +210,7 @@ const dryRunMode = args.includes('--dry-run');
|
|
|
208
210
|
const compressLogs = args.includes('--compress-logs');
|
|
209
211
|
const removeTempFiles = args.includes('--remove-tempfiles');
|
|
210
212
|
const validateConfig = args.includes('--validate-config');
|
|
211
|
-
|
|
213
|
+
let validateRules = args.includes('--validate-rules');
|
|
212
214
|
const testValidation = args.includes('--test-validation');
|
|
213
215
|
let cleanRules = args.includes('--clean-rules');
|
|
214
216
|
const clearCache = args.includes('--clear-cache');
|
|
@@ -244,6 +246,11 @@ if (cleanupIntervalIndex !== -1 && args[cleanupIntervalIndex + 1]) {
|
|
|
244
246
|
const enableColors = args.includes('--color') || args.includes('--colour');
|
|
245
247
|
let adblockRulesMode = args.includes('--adblock-rules');
|
|
246
248
|
|
|
249
|
+
// Adblock variables (request blocking)
|
|
250
|
+
let adblockEnabled = false;
|
|
251
|
+
let adblockMatcher = null;
|
|
252
|
+
let adblockStats = { blocked: 0, allowed: 0 };
|
|
253
|
+
|
|
247
254
|
// Validate --adblock-rules usage - ignore if used incorrectly instead of erroring
|
|
248
255
|
if (adblockRulesMode) {
|
|
249
256
|
if (!outputFile) {
|
|
@@ -452,6 +459,24 @@ if (validateRules || validateRulesFile) {
|
|
|
452
459
|
}
|
|
453
460
|
}
|
|
454
461
|
|
|
462
|
+
// Parse --block-ads argument for request-level ad blocking
|
|
463
|
+
const blockAdsIndex = args.findIndex(arg => arg.startsWith('--block-ads'));
|
|
464
|
+
if (blockAdsIndex !== -1) {
|
|
465
|
+
const rulesFile = args[blockAdsIndex].includes('=')
|
|
466
|
+
? args[blockAdsIndex].split('=')[1]
|
|
467
|
+
: args[blockAdsIndex + 1];
|
|
468
|
+
|
|
469
|
+
if (!rulesFile || !fs.existsSync(rulesFile)) {
|
|
470
|
+
console.log(`Error: Adblock rules file not found: ${rulesFile || '(not specified)'}`);
|
|
471
|
+
process.exit(1);
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
adblockEnabled = true;
|
|
475
|
+
adblockMatcher = parseAdblockRules(rulesFile, { enableLogging: forceDebug });
|
|
476
|
+
const stats = adblockMatcher.getStats();
|
|
477
|
+
if (!silentMode) console.log(messageColors.success(`Adblock enabled: Loaded ${stats.total} blocking rules from ${rulesFile}`));
|
|
478
|
+
}
|
|
479
|
+
|
|
455
480
|
if (args.includes('--help') || args.includes('-h')) {
|
|
456
481
|
console.log(`Usage: node nwss.js [options]
|
|
457
482
|
|
|
@@ -471,6 +496,10 @@ Output Format Options:
|
|
|
471
496
|
--privoxy Output as { +block } .domain.com (Privoxy format)
|
|
472
497
|
--pihole Output as (^|\\.)domain\\.com$ (Pi-hole regex format)
|
|
473
498
|
--adblock-rules Generate adblock filter rules with resource type modifiers (requires -o)
|
|
499
|
+
|
|
500
|
+
Request Blocking:
|
|
501
|
+
--block-ads=<file> Block ads/trackers using EasyList format rules (||domain.com^, /ads/*, etc)
|
|
502
|
+
Works at request-level for maximum performance
|
|
474
503
|
|
|
475
504
|
General Options:
|
|
476
505
|
--verbose Force verbose mode globally
|
|
@@ -1282,7 +1311,6 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1282
1311
|
'--disable-client-side-phishing-detection',
|
|
1283
1312
|
'--enable-features=NetworkService',
|
|
1284
1313
|
// Disk space controls - 50MB cache limits
|
|
1285
|
-
'--disable-features=VizDisplayCompositor',
|
|
1286
1314
|
`--disk-cache-size=${CACHE_LIMITS.DISK_CACHE_SIZE}`, // 50MB disk cache
|
|
1287
1315
|
`--media-cache-size=${CACHE_LIMITS.MEDIA_CACHE_SIZE}`, // 50MB media cache
|
|
1288
1316
|
'--disable-application-cache',
|
|
@@ -1295,7 +1323,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1295
1323
|
'--disable-ipc-flooding-protection',
|
|
1296
1324
|
'--aggressive-cache-discard',
|
|
1297
1325
|
'--memory-pressure-off',
|
|
1298
|
-
'--max_old_space_size=2048',
|
|
1326
|
+
'--max_old_space_size=2048', // V8 heap limit
|
|
1299
1327
|
'--disable-prompt-on-repost', // Fixes form popup on page reload
|
|
1300
1328
|
'--disable-background-networking',
|
|
1301
1329
|
'--no-sandbox',
|
|
@@ -1310,7 +1338,6 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1310
1338
|
'--disable-extensions',
|
|
1311
1339
|
'--no-default-browser-check',
|
|
1312
1340
|
'--safebrowsing-disable-auto-update',
|
|
1313
|
-
'--max_old_space_size=1024',
|
|
1314
1341
|
'--ignore-ssl-errors',
|
|
1315
1342
|
'--ignore-certificate-errors',
|
|
1316
1343
|
'--ignore-certificate-errors-spki-list',
|
|
@@ -2345,6 +2372,27 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2345
2372
|
console.log(formatLogMessage('debug', `${messageColors.highlight('[req]')}[frame: ${isMainFrame ? 'main' : 'iframe'}] ${frameUrl} → ${request.url()}`));
|
|
2346
2373
|
}
|
|
2347
2374
|
|
|
2375
|
+
// Apply adblock rules BEFORE expensive regex checks for better performance
|
|
2376
|
+
if (adblockEnabled && adblockMatcher) {
|
|
2377
|
+
try {
|
|
2378
|
+
const result = adblockMatcher.shouldBlock(
|
|
2379
|
+
checkedUrl,
|
|
2380
|
+
currentUrl,
|
|
2381
|
+
request.resourceType()
|
|
2382
|
+
);
|
|
2383
|
+
|
|
2384
|
+
if (result.blocked) {
|
|
2385
|
+
adblockStats.blocked++;
|
|
2386
|
+
if (forceDebug) {
|
|
2387
|
+
console.log(formatLogMessage('debug', `${messageColors.blocked('[adblock]')} ${checkedUrl} (${result.reason})`));
|
|
2388
|
+
}
|
|
2389
|
+
request.abort();
|
|
2390
|
+
return;
|
|
2391
|
+
}
|
|
2392
|
+
adblockStats.allowed++;
|
|
2393
|
+
} catch (err) { /* Silently continue on adblock errors */ }
|
|
2394
|
+
}
|
|
2395
|
+
|
|
2348
2396
|
// Show --debug output and the url while its scanning
|
|
2349
2397
|
if (forceDebug) {
|
|
2350
2398
|
const simplifiedUrl = getRootDomain(currentUrl);
|
|
@@ -4049,6 +4097,12 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
4049
4097
|
console.log(formatLogMessage('debug', `Cached detections: ${cloudflareStats.size}`));
|
|
4050
4098
|
}
|
|
4051
4099
|
// Log smart cache statistics (if cache is enabled)
|
|
4100
|
+
// Adblock statistics
|
|
4101
|
+
if (adblockEnabled) {
|
|
4102
|
+
console.log(formatLogMessage('debug', '=== Adblock Statistics ==='));
|
|
4103
|
+
const blockRate = ((adblockStats.blocked / (adblockStats.blocked + adblockStats.allowed)) * 100).toFixed(1);
|
|
4104
|
+
console.log(formatLogMessage('debug', `Blocked: ${adblockStats.blocked} requests (${blockRate}% block rate), Allowed: ${adblockStats.allowed}`));
|
|
4105
|
+
}
|
|
4052
4106
|
if (smartCache) {
|
|
4053
4107
|
const cacheStats = smartCache.getStats();
|
|
4054
4108
|
console.log(formatLogMessage('debug', '=== Smart Cache Statistics ==='));
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fanboynz/network-scanner",
|
|
3
|
-
"version": "2.0.
|
|
3
|
+
"version": "2.0.34",
|
|
4
4
|
"description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
|
|
5
5
|
"main": "nwss.js",
|
|
6
6
|
"scripts": {
|