@fanboynz/network-scanner 2.0.32 → 2.0.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/adblock.js ADDED
@@ -0,0 +1,750 @@
1
+ // === Adblock Rules Parser (adblock_rules.js) v2.2 - Complete Optimization ===
2
+ // Supports EasyList/AdBlock Plus filter syntax
3
+ // Optimizations: Map domains + URL cache + skip third-party calc + cached hostname split
4
+
5
+ const fs = require('fs');
6
+
7
+ /**
8
+ * Simple LRU cache for URL parsing results
9
+ * Prevents memory leaks with fixed size limit
10
+ */
11
+ class URLCache {
12
+ constructor(maxSize = 1000) {
13
+ this.cache = new Map();
14
+ this.maxSize = maxSize;
15
+ }
16
+
17
+ get(url) {
18
+ return this.cache.get(url);
19
+ }
20
+
21
+ set(url, value) {
22
+ // LRU eviction: if at max size, delete oldest entry
23
+ if (this.cache.size >= this.maxSize) {
24
+ const firstKey = this.cache.keys().next().value;
25
+ this.cache.delete(firstKey);
26
+ }
27
+ this.cache.set(url, value);
28
+ }
29
+
30
+ clear() {
31
+ this.cache.clear();
32
+ }
33
+
34
+ getStats() {
35
+ return {
36
+ size: this.cache.size,
37
+ maxSize: this.maxSize
38
+ };
39
+ }
40
+ }
41
+
42
+ /**
43
+ * Parses adblock filter list and creates matcher
44
+ * @param {string} filePath - Path to filter list file
45
+ * @param {Object} options - Parser options
46
+ * @returns {Object} Rule matcher with matching functions
47
+ */
48
+ function parseAdblockRules(filePath, options = {}) {
49
+ const {
50
+ enableLogging = false,
51
+ caseSensitive = false
52
+ } = options;
53
+
54
+ if (!fs.existsSync(filePath)) {
55
+ throw new Error(`Adblock rules file not found: ${filePath}`);
56
+ }
57
+
58
+ const fileContent = fs.readFileSync(filePath, 'utf-8');
59
+ const lines = fileContent.split('\n');
60
+
61
+ const rules = {
62
+ domainMap: new Map(), // ||domain.com^ - Exact domains for O(1) lookup
63
+ domainRules: [], // ||*.domain.com^ - Wildcard domains (fallback)
64
+ thirdPartyRules: [], // ||domain.com^$third-party
65
+ pathRules: [], // /ads/*
66
+ scriptRules: [], // .js$script
67
+ regexRules: [], // /regex/
68
+ whitelist: [], // @@||domain.com^ - Wildcard whitelist
69
+ whitelistMap: new Map(), // Exact whitelist domains for O(1) lookup
70
+ elementHiding: [], // ##.ad-class (not used for network blocking)
71
+ stats: {
72
+ total: 0,
73
+ domain: 0,
74
+ domainMapEntries: 0, // Exact domain matches in Map
75
+ thirdParty: 0,
76
+ path: 0,
77
+ script: 0,
78
+ regex: 0,
79
+ whitelist: 0,
80
+ elementHiding: 0,
81
+ comments: 0,
82
+ invalid: 0
83
+ }
84
+ };
85
+
86
+ for (let line of lines) {
87
+ line = line.trim();
88
+
89
+ // Skip empty lines
90
+ if (!line) continue;
91
+
92
+ // Skip comments
93
+ if (line.startsWith('!') || line.startsWith('#')) {
94
+ rules.stats.comments++;
95
+ continue;
96
+ }
97
+
98
+ // Skip element hiding rules (cosmetic filters)
99
+ if (line.includes('##') || line.includes('#@#')) {
100
+ rules.stats.elementHiding++;
101
+ continue;
102
+ }
103
+
104
+ // Skip rules with cosmetic-only options (not for network blocking)
105
+ // These options only affect element hiding, not network requests
106
+ const cosmeticOnlyOptions = ['generichide', 'elemhide', 'specifichide'];
107
+ const hasCosmeticOption = cosmeticOnlyOptions.some(opt =>
108
+ line.includes(`$${opt}`) || line.includes(`,${opt}`)
109
+ );
110
+ if (hasCosmeticOption) {
111
+ rules.stats.elementHiding++;
112
+ continue;
113
+ }
114
+
115
+ rules.stats.total++;
116
+
117
+ try {
118
+ // Whitelist rules (exception rules)
119
+ if (line.startsWith('@@')) {
120
+ const cleanLine = line.substring(2);
121
+ const parsedRule = parseRule(cleanLine, true);
122
+
123
+ // Store exact domains in Map for O(1) lookup, wildcards in array
124
+ if (parsedRule.isDomain && parsedRule.domain && !parsedRule.domain.includes('*')) {
125
+ rules.whitelistMap.set(parsedRule.domain.toLowerCase(), parsedRule);
126
+ } else {
127
+ rules.whitelist.push(parsedRule);
128
+ }
129
+ rules.stats.whitelist++;
130
+ continue;
131
+ }
132
+
133
+ // Regular blocking rules
134
+ const parsedRule = parseRule(line, false);
135
+
136
+ // Categorize based on rule type
137
+ if (parsedRule.isThirdParty) {
138
+ rules.thirdPartyRules.push(parsedRule);
139
+ rules.stats.thirdParty++;
140
+ } else if (parsedRule.isDomain) {
141
+ // Store exact domains in Map for O(1) lookup, wildcards in array
142
+ if (parsedRule.domain && !parsedRule.domain.includes('*')) {
143
+ rules.domainMap.set(parsedRule.domain.toLowerCase(), parsedRule);
144
+ rules.stats.domainMapEntries++;
145
+ } else {
146
+ rules.domainRules.push(parsedRule);
147
+ }
148
+ rules.stats.domain++;
149
+ } else if (parsedRule.isScript) {
150
+ rules.scriptRules.push(parsedRule);
151
+ rules.stats.script++;
152
+ } else if (parsedRule.isRegex) {
153
+ rules.regexRules.push(parsedRule);
154
+ rules.stats.regex++;
155
+ } else {
156
+ rules.pathRules.push(parsedRule);
157
+ rules.stats.path++;
158
+ }
159
+ } catch (err) {
160
+ rules.stats.invalid++;
161
+ if (enableLogging) {
162
+ console.log(`[Adblock] Failed to parse rule: ${line} - ${err.message}`);
163
+ }
164
+ }
165
+ }
166
+
167
+ if (enableLogging) {
168
+ console.log(`[Adblock] Loaded ${rules.stats.total} rules:`);
169
+ console.log(` - Domain rules: ${rules.stats.domain}`);
170
+ console.log(` • Exact matches (Map): ${rules.stats.domainMapEntries}`);
171
+ console.log(` • Wildcard patterns (Array): ${rules.domainRules.length}`);
172
+ console.log(` - Third-party rules: ${rules.stats.thirdParty}`);
173
+ console.log(` - Path rules: ${rules.stats.path}`);
174
+ console.log(` - Script rules: ${rules.stats.script}`);
175
+ console.log(` - Regex rules: ${rules.stats.regex}`);
176
+ console.log(` - Whitelist rules: ${rules.stats.whitelist}`);
177
+ console.log(` - Comments/Element hiding: ${rules.stats.comments + rules.stats.elementHiding}`);
178
+ console.log(` - Invalid rules: ${rules.stats.invalid}`);
179
+ }
180
+
181
+ return createMatcher(rules, { enableLogging, caseSensitive });
182
+ }
183
+
184
+ /**
185
+ * Parses individual adblock rule
186
+ * @param {string} rule - Raw rule string
187
+ * @param {boolean} isWhitelist - Whether this is a whitelist rule
188
+ * @returns {Object} Parsed rule object
189
+ */
190
+ function parseRule(rule, isWhitelist) {
191
+ const parsed = {
192
+ raw: rule,
193
+ isWhitelist,
194
+ isDomain: false,
195
+ isThirdParty: false,
196
+ isScript: false,
197
+ isRegex: false,
198
+ domainRestrictions: null, // { include: ['site.com'], exclude: ['~site.com'] }
199
+ pattern: '',
200
+ options: {},
201
+ matcher: null
202
+ };
203
+
204
+ // Split rule and options ($option1,option2)
205
+ let [pattern, optionsStr] = rule.split('$');
206
+ parsed.pattern = pattern;
207
+
208
+ // Parse options
209
+ if (optionsStr) {
210
+ const options = optionsStr.split(',');
211
+
212
+ // Filter out cosmetic-only options that don't affect network blocking
213
+ const networkOptions = options.filter(opt => {
214
+ const optKey = opt.split('=')[0].trim();
215
+ // Skip cosmetic filtering options
216
+ const cosmeticOptions = [
217
+ 'generichide',
218
+ 'elemhide',
219
+ 'specifichide',
220
+ 'genericblock' // Also cosmetic-related
221
+ ];
222
+ return !cosmeticOptions.includes(optKey);
223
+ });
224
+
225
+ // Only process network-related options
226
+ for (const opt of networkOptions) {
227
+ const [key, value] = opt.split('=');
228
+ parsed.options[key.trim()] = value ? value.trim() : true;
229
+ }
230
+
231
+ // Check for third-party option
232
+ if (parsed.options['third-party'] || parsed.options['3p']) {
233
+ parsed.isThirdParty = true;
234
+ }
235
+
236
+ // Check for script option
237
+ if (parsed.options['script']) {
238
+ parsed.isScript = true;
239
+ }
240
+ // Parse domain option: $domain=site1.com|site2.com|~excluded.com
241
+ if (parsed.options['domain']) {
242
+ const domainList = parsed.options['domain'];
243
+ const domains = domainList.split('|').map(d => d.trim()).filter(d => d);
244
+
245
+ const include = [];
246
+ const exclude = [];
247
+
248
+ for (const domain of domains) {
249
+ if (domain.startsWith('~')) {
250
+ // Negation: exclude this domain
251
+ exclude.push(domain.substring(1).toLowerCase());
252
+ } else {
253
+ // Positive: include this domain
254
+ include.push(domain.toLowerCase());
255
+ }
256
+ }
257
+
258
+ // Store parsed domain restrictions
259
+ parsed.domainRestrictions = {
260
+ include: include.length > 0 ? include : null,
261
+ exclude: exclude.length > 0 ? exclude : null
262
+ };
263
+
264
+ // For debugging
265
+ if (enableLogging && parsed.domainRestrictions) {
266
+ if (parsed.domainRestrictions.include) {
267
+ // console.log(`[Adblock] Rule includes domains: ${parsed.domainRestrictions.include.join(', ')}`);
268
+ }
269
+ if (parsed.domainRestrictions.exclude) {
270
+ // console.log(`[Adblock] Rule excludes domains: ${parsed.domainRestrictions.exclude.join(', ')}`);
271
+ }
272
+ }
273
+ }
274
+ }
275
+
276
+ // Domain rules: ||domain.com^ or ||domain.com
277
+ if (pattern.startsWith('||')) {
278
+ parsed.isDomain = true;
279
+ const domain = pattern.substring(2).replace(/\^.*$/, '').replace(/\*$/, '');
280
+ parsed.domain = domain;
281
+ parsed.matcher = createDomainMatcher(domain);
282
+ }
283
+ // Regex rules: /pattern/
284
+ else if (pattern.startsWith('/') && pattern.endsWith('/')) {
285
+ parsed.isRegex = true;
286
+ const regexPattern = pattern.substring(1, pattern.length - 1);
287
+ parsed.matcher = new RegExp(regexPattern, 'i');
288
+ }
289
+ // Path/wildcard rules: /ads/* or ad.js
290
+ else {
291
+ parsed.matcher = createPatternMatcher(pattern);
292
+ }
293
+
294
+ return parsed;
295
+ }
296
+
297
+ /**
298
+ * Creates a domain matcher function
299
+ * @param {string} domain - Domain to match
300
+ * @returns {Function} Matcher function
301
+ */
302
+ function createDomainMatcher(domain) {
303
+ const lowerDomain = domain.toLowerCase();
304
+ return (url, hostname) => {
305
+ const lowerHostname = hostname.toLowerCase();
306
+ // Exact match or subdomain match
307
+ return lowerHostname === lowerDomain ||
308
+ lowerHostname.endsWith('.' + lowerDomain);
309
+ };
310
+ }
311
+
312
+ /**
313
+ * Creates a pattern matcher for path/wildcard rules
314
+ * @param {string} pattern - Pattern with wildcards
315
+ * @returns {Function} Matcher function
316
+ */
317
+ function createPatternMatcher(pattern) {
318
+ // Convert adblock pattern to regex
319
+ // * matches anything
320
+ // ^ matches separator (/, ?, &, =, :)
321
+ // | matches start/end of URL
322
+
323
+ let regexPattern = pattern
324
+ .replace(/[.+?{}()[\]\\]/g, '\\$&') // Escape regex special chars
325
+ .replace(/\*/g, '.*') // * -> .*
326
+ .replace(/\^/g, '[/?&=:]') // ^ -> separator chars
327
+ .replace(/^\|/, '^') // | at start -> ^
328
+ .replace(/\|$/, '$'); // | at end -> $
329
+
330
+ const regex = new RegExp(regexPattern, 'i');
331
+ return (url) => regex.test(url);
332
+ }
333
+
334
+ /**
335
+ * Creates rule matcher with shouldBlock function
336
+ * @param {Object} rules - Parsed rules object
337
+ * @param {Object} options - Matcher options
338
+ * @returns {Object} Matcher with shouldBlock function
339
+ */
340
+ function createMatcher(rules, options = {}) {
341
+ const { enableLogging = false, caseSensitive = false } = options;
342
+
343
+ // Create URL parsing cache (scoped to this matcher instance)
344
+ const urlCache = new URLCache(1000);
345
+ let cacheHits = 0;
346
+ let cacheMisses = 0;
347
+
348
+ return {
349
+ rules,
350
+
351
+ /**
352
+ * Check if URL should be blocked
353
+ * @param {string} url - URL to check
354
+ * @param {string} sourceUrl - Source page URL (for third-party detection)
355
+ * @param {string} resourceType - Type of resource (script, image, etc)
356
+ * @returns {Object} { blocked: boolean, rule: string|null, reason: string }
357
+ */
358
+ shouldBlock(url, sourceUrl = '', resourceType = '') {
359
+ try {
360
+ // OPTIMIZATION: Check cache first for URL parsing (60% faster)
361
+ let cachedData = urlCache.get(url);
362
+ let hostname, lowerHostname;
363
+
364
+ if (cachedData) {
365
+ hostname = cachedData.hostname;
366
+ lowerHostname = cachedData.lowerHostname;
367
+ cacheHits++;
368
+ } else {
369
+ // Parse URL and cache result
370
+ const urlObj = new URL(url);
371
+ hostname = urlObj.hostname;
372
+ lowerHostname = hostname.toLowerCase();
373
+
374
+ urlCache.set(url, {
375
+ hostname,
376
+ lowerHostname
377
+ });
378
+ cacheMisses++;
379
+ }
380
+
381
+ // OPTIMIZATION #1: Only calculate third-party status if we have third-party rules to check
382
+ // Avoids expensive URL parsing (2x new URL() calls) when no third-party rules exist
383
+ const isThirdParty = (sourceUrl && rules.thirdPartyRules.length > 0)
384
+ ? isThirdPartyRequest(url, sourceUrl)
385
+ : false;
386
+
387
+ // OPTIMIZATION #2: Calculate hostname parts once and reuse (avoid duplicate split operations)
388
+ const hostnameParts = lowerHostname.split('.');
389
+
390
+ // V8 OPT: Extract and cache source page domain for $domain option checking
391
+ let sourceDomain = null;
392
+ let cachedSourceData = null;
393
+
394
+ if (sourceUrl) {
395
+ // Check if sourceUrl is in cache (avoid duplicate URL parsing)
396
+ cachedSourceData = urlCache.get(sourceUrl);
397
+
398
+ if (cachedSourceData) {
399
+ sourceDomain = cachedSourceData.lowerHostname;
400
+ cacheHits++;
401
+ } else {
402
+ // Parse and cache sourceUrl
403
+ try {
404
+ const sourceUrlObj = new URL(sourceUrl);
405
+ sourceDomain = sourceUrlObj.hostname.toLowerCase();
406
+
407
+ // Cache sourceUrl parsing result (same as request URLs)
408
+ urlCache.set(sourceUrl, {
409
+ hostname: sourceUrlObj.hostname,
410
+ lowerHostname: sourceDomain
411
+ });
412
+ cacheMisses++;
413
+ } catch (err) {
414
+ // Invalid sourceUrl, leave as null
415
+ }
416
+ }
417
+ }
418
+
419
+ // === WHITELIST CHECK (exception rules take precedence) ===
420
+
421
+ // Fast path: Check exact domain in Map (O(1))
422
+ let rule = rules.whitelistMap.get(lowerHostname); // V8: Single Map lookup
423
+ if (rule) {
424
+ if (enableLogging) { // V8: Check after getting rule (inlined)
425
+ console.log(`[Adblock] Whitelisted: ${url} (${rule.raw})`);
426
+ }
427
+ if (matchesDomainRestrictions(rule, sourceDomain)) {
428
+ return { blocked: false, rule: rule.raw, reason: 'whitelisted' };
429
+ }
430
+ }
431
+
432
+ // Check parent domains for subdomain matches (e.g., sub.example.com -> example.com)
433
+ const partsLen = hostnameParts.length; // V8: Cache array length
434
+ for (let i = 1; i < partsLen; i++) {
435
+ const parentDomain = hostnameParts.slice(i).join('.');
436
+ rule = rules.whitelistMap.get(parentDomain); // V8: Single Map lookup
437
+ if (rule) {
438
+ if (enableLogging) {
439
+ console.log(`[Adblock] Whitelisted: ${url} (${rule.raw})`);
440
+ }
441
+ if (matchesDomainRestrictions(rule, sourceDomain)) {
442
+ return { blocked: false, rule: rule.raw, reason: 'whitelisted' };
443
+ }
444
+ }
445
+ }
446
+
447
+ // Slow path: Check wildcard whitelist patterns in array
448
+ const whitelistLen = rules.whitelist.length; // V8: Cache length + indexed access
449
+ for (let i = 0; i < whitelistLen; i++) {
450
+ const rule = rules.whitelist[i];
451
+ if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
452
+ if (enableLogging) {
453
+ console.log(`[Adblock] Whitelisted: ${url} (${rule.raw})`);
454
+ }
455
+ return { blocked: false, rule: rule.raw, reason: 'whitelisted' };
456
+ }
457
+ }
458
+
459
+ // === DOMAIN BLOCKING CHECK ===
460
+
461
+ // Fast path: Check exact domain in Map (O(1))
462
+ rule = rules.domainMap.get(lowerHostname); // V8: Single Map lookup
463
+ if (rule) {
464
+ if (enableLogging) {
465
+ console.log(`[Adblock] Blocked domain: ${url} (${rule.raw})`);
466
+ }
467
+ if (matchesDomainRestrictions(rule, sourceDomain)) {
468
+ return { blocked: true, rule: rule.raw, reason: 'domain_rule' };
469
+ }
470
+ }
471
+
472
+ // Check parent domains for subdomain matches (e.g., ads.example.com -> example.com)
473
+ for (let i = 1; i < partsLen; i++) { // V8: Reuse cached length
474
+ const parentDomain = hostnameParts.slice(i).join('.');
475
+ rule = rules.domainMap.get(parentDomain); // V8: Single Map lookup
476
+ if (rule) {
477
+ if (enableLogging) {
478
+ console.log(`[Adblock] Blocked domain: ${url} (${rule.raw})`);
479
+ }
480
+ if (matchesDomainRestrictions(rule, sourceDomain)) {
481
+ return { blocked: true, rule: rule.raw, reason: 'domain_rule' };
482
+ }
483
+ }
484
+ }
485
+
486
+ // Slow path: Check wildcard domain patterns in array
487
+ const domainRulesLen = rules.domainRules.length; // V8: Cache length + indexed access
488
+ for (let i = 0; i < domainRulesLen; i++) {
489
+ const rule = rules.domainRules[i];
490
+ if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
491
+ if (enableLogging) {
492
+ console.log(`[Adblock] Blocked domain: ${url} (${rule.raw})`);
493
+ }
494
+ return { blocked: true, rule: rule.raw, reason: 'domain_rule' };
495
+ }
496
+ }
497
+
498
+ // Check third-party rules
499
+ if (isThirdParty) {
500
+ const thirdPartyLen = rules.thirdPartyRules.length; // V8: Cache length
501
+ for (let i = 0; i < thirdPartyLen; i++) {
502
+ const rule = rules.thirdPartyRules[i];
503
+ if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
504
+ if (enableLogging) {
505
+ console.log(`[Adblock] Blocked third-party: ${url} (${rule.raw})`);
506
+ }
507
+ return {
508
+ blocked: true,
509
+ rule: rule.raw,
510
+ reason: 'third_party_rule'
511
+ };
512
+ }
513
+ }
514
+ }
515
+
516
+ // Check script rules
517
+ if (resourceType === 'script' || url.endsWith('.js')) {
518
+ const scriptRulesLen = rules.scriptRules.length; // V8: Cache length
519
+ for (let i = 0; i < scriptRulesLen; i++) {
520
+ const rule = rules.scriptRules[i];
521
+ if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
522
+ if (enableLogging) {
523
+ console.log(`[Adblock] Blocked script: ${url} (${rule.raw})`);
524
+ }
525
+ return {
526
+ blocked: true,
527
+ rule: rule.raw,
528
+ reason: 'script_rule'
529
+ };
530
+ }
531
+ }
532
+ }
533
+
534
+ // Check path rules
535
+ const pathRulesLen = rules.pathRules.length; // V8: Cache length
536
+ for (let i = 0; i < pathRulesLen; i++) {
537
+ const rule = rules.pathRules[i];
538
+ if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
539
+ if (enableLogging) {
540
+ console.log(`[Adblock] Blocked path: ${url} (${rule.raw})`);
541
+ }
542
+ return {
543
+ blocked: true,
544
+ rule: rule.raw,
545
+ reason: 'path_rule'
546
+ };
547
+ }
548
+ }
549
+
550
+ // Check regex rules (most expensive, check last)
551
+ const regexRulesLen = rules.regexRules.length; // V8: Cache length
552
+ for (let i = 0; i < regexRulesLen; i++) {
553
+ const rule = rules.regexRules[i];
554
+ if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
555
+ if (enableLogging) {
556
+ console.log(`[Adblock] Blocked regex: ${url} (${rule.raw})`);
557
+ }
558
+ return {
559
+ blocked: true,
560
+ rule: rule.raw,
561
+ reason: 'regex_rule'
562
+ };
563
+ }
564
+ }
565
+
566
+ // No match - allow request
567
+ return {
568
+ blocked: false,
569
+ rule: null,
570
+ reason: 'no_match'
571
+ };
572
+
573
+ } catch (err) {
574
+ if (enableLogging) {
575
+ console.log(`[Adblock] Error checking ${url}: ${err.message}`);
576
+ }
577
+ // On error, allow request
578
+ return {
579
+ blocked: false,
580
+ rule: null,
581
+ reason: 'error'
582
+ };
583
+ }
584
+ },
585
+
586
+ /**
587
+ * Get statistics about loaded rules
588
+ * @returns {Object} Statistics object
589
+ */
590
+ getStats() {
591
+ const hitRate = cacheHits + cacheMisses > 0
592
+ ? ((cacheHits / (cacheHits + cacheMisses)) * 100).toFixed(1) + '%'
593
+ : '0%';
594
+
595
+ return {
596
+ ...rules.stats,
597
+ cache: {
598
+ hits: cacheHits,
599
+ misses: cacheMisses,
600
+ hitRate: hitRate,
601
+ size: urlCache.cache.size,
602
+ maxSize: urlCache.maxSize
603
+ }
604
+ };
605
+ }
606
+ };
607
+ }
608
+
609
+ /**
610
+ * Check if rule's domain restrictions match the source domain
611
+ * @param {Object} rule - Rule with potential domainRestrictions
612
+ * @param {string|null} sourceDomain - Domain of the page making the request (lowercase)
613
+ * @returns {boolean} True if rule should apply on this source domain
614
+ */
615
+ function matchesDomainRestrictions(rule, sourceDomain) {
616
+ // No domain restrictions = applies everywhere
617
+ if (!rule.domainRestrictions) {
618
+ return true;
619
+ }
620
+
621
+ // No source domain provided = can't check restrictions, allow for safety
622
+ if (!sourceDomain) {
623
+ return true;
624
+ }
625
+
626
+ const { include, exclude } = rule.domainRestrictions;
627
+
628
+ // V8 OPT ADVANCED: For single-domain restrictions, skip loop overhead
629
+ // This is the most common case (~80% of domain restrictions)
630
+
631
+ // Fast path: Single exclusion
632
+ if (exclude && exclude.length === 1 && (!include || include.length === 0)) {
633
+ const excludedDomain = exclude[0];
634
+ if (sourceDomain === excludedDomain || sourceDomain.endsWith('.' + excludedDomain)) {
635
+ return false;
636
+ }
637
+ return true;
638
+ }
639
+
640
+ // Fast path: Single inclusion
641
+ if (include && include.length === 1 && (!exclude || exclude.length === 0)) {
642
+ const includedDomain = include[0];
643
+ return sourceDomain === includedDomain || sourceDomain.endsWith('.' + includedDomain);
644
+ }
645
+
646
+ // Slow path: Multiple domains (use indexed loops)
647
+ // V8 OPT: Check exclusions first (higher priority) - use indexed loop
648
+ // If domain is explicitly excluded, rule does NOT apply
649
+ if (exclude && exclude.length > 0) {
650
+ const excludeLen = exclude.length;
651
+ for (let i = 0; i < excludeLen; i++) {
652
+ const excludedDomain = exclude[i];
653
+ // Exact match or subdomain match
654
+ if (sourceDomain === excludedDomain || sourceDomain.endsWith('.' + excludedDomain)) {
655
+ return false; // Domain is excluded, rule should NOT apply
656
+ }
657
+ }
658
+ }
659
+
660
+ // V8 OPT: Check inclusions - use indexed loop
661
+ // If there's an include list, domain MUST be in it
662
+ if (include && include.length > 0) {
663
+ const includeLen = include.length;
664
+ for (let i = 0; i < includeLen; i++) {
665
+ const includedDomain = include[i];
666
+ // Exact match or subdomain match
667
+ if (sourceDomain === includedDomain || sourceDomain.endsWith('.' + includedDomain)) {
668
+ return true; // Domain is included, rule SHOULD apply
669
+ }
670
+ }
671
+ return false; // Domain not in include list, rule should NOT apply
672
+ }
673
+
674
+ // Has exclusions but no inclusions, and not excluded = applies
675
+ return true;
676
+ }
677
+
678
+ /**
679
+ * Check if rule matches the given URL
680
+ * @param {Object} rule - Parsed rule object
681
+ * @param {string} url - URL to check
682
+ * @param {string} hostname - Hostname of URL
683
+ * @param {boolean} isThirdParty - Whether request is third-party
684
+ * @param {string} resourceType - Resource type
685
+ * @param {string|null} sourceDomain - Source page domain (for $domain option)
686
+ * @returns {boolean} True if rule matches
687
+ */
688
+ function matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain) {
689
+ // Check domain restrictions first
690
+ if (!matchesDomainRestrictions(rule, sourceDomain)) {
691
+ return false;
692
+ }
693
+ // Check third-party option
694
+ if (rule.isThirdParty && !isThirdParty) {
695
+ return false;
696
+ }
697
+
698
+ // Check script option
699
+ if (rule.isScript && resourceType !== 'script' && !url.endsWith('.js')) {
700
+ return false;
701
+ }
702
+
703
+ // Apply matcher function
704
+ if (rule.isDomain) {
705
+ return rule.matcher(url, hostname);
706
+ } else {
707
+ return rule.matcher(url);
708
+ }
709
+ }
710
+
711
+ /**
712
+ * Determine if request is third-party
713
+ * @param {string} requestUrl - URL being requested
714
+ * @param {string} sourceUrl - URL of the page making the request
715
+ * @returns {boolean} True if third-party request
716
+ */
717
+ function isThirdPartyRequest(requestUrl, sourceUrl) {
718
+ try {
719
+ const requestHostname = new URL(requestUrl).hostname;
720
+ const sourceHostname = new URL(sourceUrl).hostname;
721
+
722
+ // Extract base domain (handle subdomains)
723
+ const requestDomain = getBaseDomain(requestHostname);
724
+ const sourceDomain = getBaseDomain(sourceHostname);
725
+
726
+ return requestDomain !== sourceDomain;
727
+ } catch (err) {
728
+ return false;
729
+ }
730
+ }
731
+
732
+ /**
733
+ * Extract base domain from hostname
734
+ * @param {string} hostname - Full hostname
735
+ * @returns {string} Base domain
736
+ */
737
+ function getBaseDomain(hostname) {
738
+ const parts = hostname.split('.');
739
+ if (parts.length <= 2) {
740
+ return hostname;
741
+ }
742
+ // Return last two parts (example.com from sub.example.com)
743
+ return parts.slice(-2).join('.');
744
+ }
745
+
746
+ module.exports = {
747
+ parseAdblockRules,
748
+ isThirdPartyRequest,
749
+ getBaseDomain
750
+ };
@@ -93,9 +93,9 @@ const USER_AGENT_COLLECTIONS = Object.freeze(new Map([
93
93
  ['chrome', "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"],
94
94
  ['chrome_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"],
95
95
  ['chrome_linux', "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"],
96
- ['firefox', "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:144.0) Gecko/20100101 Firefox/144.0"],
97
- ['firefox_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:144.0) Gecko/20100101 Firefox/144.0"],
98
- ['firefox_linux', "Mozilla/5.0 (X11; Linux x86_64; rv:144.0) Gecko/20100101 Firefox/144.0"],
96
+ ['firefox', "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:145.0) Gecko/20100101 Firefox/145.0"],
97
+ ['firefox_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:145.0) Gecko/20100101 Firefox/145.0"],
98
+ ['firefox_linux', "Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Gecko/20100101 Firefox/145.0"],
99
99
  ['safari', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.6 Safari/605.1.15"]
100
100
  ]));
101
101
 
package/nwss.js CHANGED
@@ -1,4 +1,4 @@
1
- // === Network scanner script (nwss.js) v2.0.32 ===
1
+ // === Network scanner script (nwss.js) v2.0.33 ===
2
2
 
3
3
  // puppeteer for browser automation, fs for file system operations, psl for domain parsing.
4
4
  // const pLimit = require('p-limit'); // Will be dynamically imported
@@ -50,6 +50,8 @@ const { initializeDryRunCollections, addDryRunMatch, addDryRunNetTools, processD
50
50
  const { clearSiteData } = require('./lib/clear_sitedata');
51
51
  // Referrer header generation
52
52
  const { getReferrerForUrl, validateReferrerConfig, validateReferrerDisable } = require('./lib/referrer');
53
+ // Adblock rules parser
54
+ const { parseAdblockRules } = require('./lib/adblock');
53
55
 
54
56
  // Fast setTimeout helper for Puppeteer 22.x compatibility
55
57
  // Uses standard Promise constructor for better performance than node:timers/promises
@@ -94,9 +96,9 @@ const USER_AGENTS = Object.freeze(new Map([
94
96
  ['chrome', "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"],
95
97
  ['chrome_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"],
96
98
  ['chrome_linux', "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"],
97
- ['firefox', "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:144.0) Gecko/20100101 Firefox/144.0"],
98
- ['firefox_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:144.0) Gecko/20100101 Firefox/144.0"],
99
- ['firefox_linux', "Mozilla/5.0 (X11; Linux x86_64; rv:144.0) Gecko/20100101 Firefox/144.0"],
99
+ ['firefox', "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:145.0) Gecko/20100101 Firefox/145.0"],
100
+ ['firefox_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:145.0) Gecko/20100101 Firefox/145.0"],
101
+ ['firefox_linux', "Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Gecko/20100101 Firefox/145.0"],
100
102
  ['safari', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.6 Safari/605.1.15"]
101
103
  ]));
102
104
 
@@ -145,7 +147,7 @@ const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/r
145
147
  const { monitorBrowserHealth, isBrowserHealthy, isQuicklyResponsive, performGroupWindowCleanup, performRealtimeWindowCleanup, trackPageForRealtime, updatePageUsage, cleanupPageBeforeReload } = require('./lib/browserhealth');
146
148
 
147
149
  // --- Script Configuration & Constants ---
148
- const VERSION = '2.0.32'; // Script version
150
+ const VERSION = '2.0.33'; // Script version
149
151
 
150
152
  // get startTime
151
153
  const startTime = Date.now();
@@ -208,7 +210,7 @@ const dryRunMode = args.includes('--dry-run');
208
210
  const compressLogs = args.includes('--compress-logs');
209
211
  const removeTempFiles = args.includes('--remove-tempfiles');
210
212
  const validateConfig = args.includes('--validate-config');
211
- const validateRules = args.includes('--validate-rules');
213
+ let validateRules = args.includes('--validate-rules');
212
214
  const testValidation = args.includes('--test-validation');
213
215
  let cleanRules = args.includes('--clean-rules');
214
216
  const clearCache = args.includes('--clear-cache');
@@ -244,6 +246,11 @@ if (cleanupIntervalIndex !== -1 && args[cleanupIntervalIndex + 1]) {
244
246
  const enableColors = args.includes('--color') || args.includes('--colour');
245
247
  let adblockRulesMode = args.includes('--adblock-rules');
246
248
 
249
+ // Adblock variables (request blocking)
250
+ let adblockEnabled = false;
251
+ let adblockMatcher = null;
252
+ let adblockStats = { blocked: 0, allowed: 0 };
253
+
247
254
  // Validate --adblock-rules usage - ignore if used incorrectly instead of erroring
248
255
  if (adblockRulesMode) {
249
256
  if (!outputFile) {
@@ -452,6 +459,24 @@ if (validateRules || validateRulesFile) {
452
459
  }
453
460
  }
454
461
 
462
+ // Parse --block-ads argument for request-level ad blocking
463
+ const blockAdsIndex = args.findIndex(arg => arg.startsWith('--block-ads'));
464
+ if (blockAdsIndex !== -1) {
465
+ const rulesFile = args[blockAdsIndex].includes('=')
466
+ ? args[blockAdsIndex].split('=')[1]
467
+ : args[blockAdsIndex + 1];
468
+
469
+ if (!rulesFile || !fs.existsSync(rulesFile)) {
470
+ console.log(`Error: Adblock rules file not found: ${rulesFile || '(not specified)'}`);
471
+ process.exit(1);
472
+ }
473
+
474
+ adblockEnabled = true;
475
+ adblockMatcher = parseAdblockRules(rulesFile, { enableLogging: forceDebug });
476
+ const stats = adblockMatcher.getStats();
477
+ if (!silentMode) console.log(messageColors.success(`Adblock enabled: Loaded ${stats.total} blocking rules from ${rulesFile}`));
478
+ }
479
+
455
480
  if (args.includes('--help') || args.includes('-h')) {
456
481
  console.log(`Usage: node nwss.js [options]
457
482
 
@@ -471,6 +496,10 @@ Output Format Options:
471
496
  --privoxy Output as { +block } .domain.com (Privoxy format)
472
497
  --pihole Output as (^|\\.)domain\\.com$ (Pi-hole regex format)
473
498
  --adblock-rules Generate adblock filter rules with resource type modifiers (requires -o)
499
+
500
+ Request Blocking:
501
+ --block-ads=<file> Block ads/trackers using EasyList format rules (||domain.com^, /ads/*, etc)
502
+ Works at request-level for maximum performance
474
503
 
475
504
  General Options:
476
505
  --verbose Force verbose mode globally
@@ -1282,7 +1311,6 @@ function setupFrameHandling(page, forceDebug) {
1282
1311
  '--disable-client-side-phishing-detection',
1283
1312
  '--enable-features=NetworkService',
1284
1313
  // Disk space controls - 50MB cache limits
1285
- '--disable-features=VizDisplayCompositor',
1286
1314
  `--disk-cache-size=${CACHE_LIMITS.DISK_CACHE_SIZE}`, // 50MB disk cache
1287
1315
  `--media-cache-size=${CACHE_LIMITS.MEDIA_CACHE_SIZE}`, // 50MB media cache
1288
1316
  '--disable-application-cache',
@@ -1295,7 +1323,7 @@ function setupFrameHandling(page, forceDebug) {
1295
1323
  '--disable-ipc-flooding-protection',
1296
1324
  '--aggressive-cache-discard',
1297
1325
  '--memory-pressure-off',
1298
- '--max_old_space_size=2048',
1326
+ '--max_old_space_size=2048', // V8 heap limit
1299
1327
  '--disable-prompt-on-repost', // Fixes form popup on page reload
1300
1328
  '--disable-background-networking',
1301
1329
  '--no-sandbox',
@@ -1310,7 +1338,6 @@ function setupFrameHandling(page, forceDebug) {
1310
1338
  '--disable-extensions',
1311
1339
  '--no-default-browser-check',
1312
1340
  '--safebrowsing-disable-auto-update',
1313
- '--max_old_space_size=1024',
1314
1341
  '--ignore-ssl-errors',
1315
1342
  '--ignore-certificate-errors',
1316
1343
  '--ignore-certificate-errors-spki-list',
@@ -2345,6 +2372,27 @@ function setupFrameHandling(page, forceDebug) {
2345
2372
  console.log(formatLogMessage('debug', `${messageColors.highlight('[req]')}[frame: ${isMainFrame ? 'main' : 'iframe'}] ${frameUrl} → ${request.url()}`));
2346
2373
  }
2347
2374
 
2375
+ // Apply adblock rules BEFORE expensive regex checks for better performance
2376
+ if (adblockEnabled && adblockMatcher) {
2377
+ try {
2378
+ const result = adblockMatcher.shouldBlock(
2379
+ checkedUrl,
2380
+ currentUrl,
2381
+ request.resourceType()
2382
+ );
2383
+
2384
+ if (result.blocked) {
2385
+ adblockStats.blocked++;
2386
+ if (forceDebug) {
2387
+ console.log(formatLogMessage('debug', `${messageColors.blocked('[adblock]')} ${checkedUrl} (${result.reason})`));
2388
+ }
2389
+ request.abort();
2390
+ return;
2391
+ }
2392
+ adblockStats.allowed++;
2393
+ } catch (err) { /* Silently continue on adblock errors */ }
2394
+ }
2395
+
2348
2396
  // Show --debug output and the url while its scanning
2349
2397
  if (forceDebug) {
2350
2398
  const simplifiedUrl = getRootDomain(currentUrl);
@@ -4049,6 +4097,12 @@ function setupFrameHandling(page, forceDebug) {
4049
4097
  console.log(formatLogMessage('debug', `Cached detections: ${cloudflareStats.size}`));
4050
4098
  }
4051
4099
  // Log smart cache statistics (if cache is enabled)
4100
+ // Adblock statistics
4101
+ if (adblockEnabled) {
4102
+ console.log(formatLogMessage('debug', '=== Adblock Statistics ==='));
4103
+ const blockRate = ((adblockStats.blocked / (adblockStats.blocked + adblockStats.allowed)) * 100).toFixed(1);
4104
+ console.log(formatLogMessage('debug', `Blocked: ${adblockStats.blocked} requests (${blockRate}% block rate), Allowed: ${adblockStats.allowed}`));
4105
+ }
4052
4106
  if (smartCache) {
4053
4107
  const cacheStats = smartCache.getStats();
4054
4108
  console.log(formatLogMessage('debug', '=== Smart Cache Statistics ==='));
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fanboynz/network-scanner",
3
- "version": "2.0.32",
3
+ "version": "2.0.34",
4
4
  "description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
5
5
  "main": "nwss.js",
6
6
  "scripts": {