@fanboynz/network-scanner 2.0.31 → 2.0.33

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/adblock.js ADDED
@@ -0,0 +1,750 @@
1
+ // === Adblock Rules Parser (adblock_rules.js) v2.2 - Complete Optimization ===
2
+ // Supports EasyList/AdBlock Plus filter syntax
3
+ // Optimizations: Map domains + URL cache + skip third-party calc + cached hostname split
4
+
5
+ const fs = require('fs');
6
+
7
+ /**
8
+ * Simple LRU cache for URL parsing results
9
+ * Prevents memory leaks with fixed size limit
10
+ */
11
+ class URLCache {
12
+ constructor(maxSize = 1000) {
13
+ this.cache = new Map();
14
+ this.maxSize = maxSize;
15
+ }
16
+
17
+ get(url) {
18
+ return this.cache.get(url);
19
+ }
20
+
21
+ set(url, value) {
22
+ // LRU eviction: if at max size, delete oldest entry
23
+ if (this.cache.size >= this.maxSize) {
24
+ const firstKey = this.cache.keys().next().value;
25
+ this.cache.delete(firstKey);
26
+ }
27
+ this.cache.set(url, value);
28
+ }
29
+
30
+ clear() {
31
+ this.cache.clear();
32
+ }
33
+
34
+ getStats() {
35
+ return {
36
+ size: this.cache.size,
37
+ maxSize: this.maxSize
38
+ };
39
+ }
40
+ }
41
+
42
+ /**
43
+ * Parses adblock filter list and creates matcher
44
+ * @param {string} filePath - Path to filter list file
45
+ * @param {Object} options - Parser options
46
+ * @returns {Object} Rule matcher with matching functions
47
+ */
48
+ function parseAdblockRules(filePath, options = {}) {
49
+ const {
50
+ enableLogging = false,
51
+ caseSensitive = false
52
+ } = options;
53
+
54
+ if (!fs.existsSync(filePath)) {
55
+ throw new Error(`Adblock rules file not found: ${filePath}`);
56
+ }
57
+
58
+ const fileContent = fs.readFileSync(filePath, 'utf-8');
59
+ const lines = fileContent.split('\n');
60
+
61
+ const rules = {
62
+ domainMap: new Map(), // ||domain.com^ - Exact domains for O(1) lookup
63
+ domainRules: [], // ||*.domain.com^ - Wildcard domains (fallback)
64
+ thirdPartyRules: [], // ||domain.com^$third-party
65
+ pathRules: [], // /ads/*
66
+ scriptRules: [], // .js$script
67
+ regexRules: [], // /regex/
68
+ whitelist: [], // @@||domain.com^ - Wildcard whitelist
69
+ whitelistMap: new Map(), // Exact whitelist domains for O(1) lookup
70
+ elementHiding: [], // ##.ad-class (not used for network blocking)
71
+ stats: {
72
+ total: 0,
73
+ domain: 0,
74
+ domainMapEntries: 0, // Exact domain matches in Map
75
+ thirdParty: 0,
76
+ path: 0,
77
+ script: 0,
78
+ regex: 0,
79
+ whitelist: 0,
80
+ elementHiding: 0,
81
+ comments: 0,
82
+ invalid: 0
83
+ }
84
+ };
85
+
86
+ for (let line of lines) {
87
+ line = line.trim();
88
+
89
+ // Skip empty lines
90
+ if (!line) continue;
91
+
92
+ // Skip comments
93
+ if (line.startsWith('!') || line.startsWith('#')) {
94
+ rules.stats.comments++;
95
+ continue;
96
+ }
97
+
98
+ // Skip element hiding rules (cosmetic filters)
99
+ if (line.includes('##') || line.includes('#@#')) {
100
+ rules.stats.elementHiding++;
101
+ continue;
102
+ }
103
+
104
+ // Skip rules with cosmetic-only options (not for network blocking)
105
+ // These options only affect element hiding, not network requests
106
+ const cosmeticOnlyOptions = ['generichide', 'elemhide', 'specifichide'];
107
+ const hasCosmeticOption = cosmeticOnlyOptions.some(opt =>
108
+ line.includes(`$${opt}`) || line.includes(`,${opt}`)
109
+ );
110
+ if (hasCosmeticOption) {
111
+ rules.stats.elementHiding++;
112
+ continue;
113
+ }
114
+
115
+ rules.stats.total++;
116
+
117
+ try {
118
+ // Whitelist rules (exception rules)
119
+ if (line.startsWith('@@')) {
120
+ const cleanLine = line.substring(2);
121
+ const parsedRule = parseRule(cleanLine, true);
122
+
123
+ // Store exact domains in Map for O(1) lookup, wildcards in array
124
+ if (parsedRule.isDomain && parsedRule.domain && !parsedRule.domain.includes('*')) {
125
+ rules.whitelistMap.set(parsedRule.domain.toLowerCase(), parsedRule);
126
+ } else {
127
+ rules.whitelist.push(parsedRule);
128
+ }
129
+ rules.stats.whitelist++;
130
+ continue;
131
+ }
132
+
133
+ // Regular blocking rules
134
+ const parsedRule = parseRule(line, false);
135
+
136
+ // Categorize based on rule type
137
+ if (parsedRule.isThirdParty) {
138
+ rules.thirdPartyRules.push(parsedRule);
139
+ rules.stats.thirdParty++;
140
+ } else if (parsedRule.isDomain) {
141
+ // Store exact domains in Map for O(1) lookup, wildcards in array
142
+ if (parsedRule.domain && !parsedRule.domain.includes('*')) {
143
+ rules.domainMap.set(parsedRule.domain.toLowerCase(), parsedRule);
144
+ rules.stats.domainMapEntries++;
145
+ } else {
146
+ rules.domainRules.push(parsedRule);
147
+ }
148
+ rules.stats.domain++;
149
+ } else if (parsedRule.isScript) {
150
+ rules.scriptRules.push(parsedRule);
151
+ rules.stats.script++;
152
+ } else if (parsedRule.isRegex) {
153
+ rules.regexRules.push(parsedRule);
154
+ rules.stats.regex++;
155
+ } else {
156
+ rules.pathRules.push(parsedRule);
157
+ rules.stats.path++;
158
+ }
159
+ } catch (err) {
160
+ rules.stats.invalid++;
161
+ if (enableLogging) {
162
+ console.log(`[Adblock] Failed to parse rule: ${line} - ${err.message}`);
163
+ }
164
+ }
165
+ }
166
+
167
+ if (enableLogging) {
168
+ console.log(`[Adblock] Loaded ${rules.stats.total} rules:`);
169
+ console.log(` - Domain rules: ${rules.stats.domain}`);
170
+ console.log(` • Exact matches (Map): ${rules.stats.domainMapEntries}`);
171
+ console.log(` • Wildcard patterns (Array): ${rules.domainRules.length}`);
172
+ console.log(` - Third-party rules: ${rules.stats.thirdParty}`);
173
+ console.log(` - Path rules: ${rules.stats.path}`);
174
+ console.log(` - Script rules: ${rules.stats.script}`);
175
+ console.log(` - Regex rules: ${rules.stats.regex}`);
176
+ console.log(` - Whitelist rules: ${rules.stats.whitelist}`);
177
+ console.log(` - Comments/Element hiding: ${rules.stats.comments + rules.stats.elementHiding}`);
178
+ console.log(` - Invalid rules: ${rules.stats.invalid}`);
179
+ }
180
+
181
+ return createMatcher(rules, { enableLogging, caseSensitive });
182
+ }
183
+
184
+ /**
185
+ * Parses individual adblock rule
186
+ * @param {string} rule - Raw rule string
187
+ * @param {boolean} isWhitelist - Whether this is a whitelist rule
188
+ * @returns {Object} Parsed rule object
189
+ */
190
+ function parseRule(rule, isWhitelist) {
191
+ const parsed = {
192
+ raw: rule,
193
+ isWhitelist,
194
+ isDomain: false,
195
+ isThirdParty: false,
196
+ isScript: false,
197
+ isRegex: false,
198
+ domainRestrictions: null, // { include: ['site.com'], exclude: ['~site.com'] }
199
+ pattern: '',
200
+ options: {},
201
+ matcher: null
202
+ };
203
+
204
+ // Split rule and options ($option1,option2)
205
+ let [pattern, optionsStr] = rule.split('$');
206
+ parsed.pattern = pattern;
207
+
208
+ // Parse options
209
+ if (optionsStr) {
210
+ const options = optionsStr.split(',');
211
+
212
+ // Filter out cosmetic-only options that don't affect network blocking
213
+ const networkOptions = options.filter(opt => {
214
+ const optKey = opt.split('=')[0].trim();
215
+ // Skip cosmetic filtering options
216
+ const cosmeticOptions = [
217
+ 'generichide',
218
+ 'elemhide',
219
+ 'specifichide',
220
+ 'genericblock' // Also cosmetic-related
221
+ ];
222
+ return !cosmeticOptions.includes(optKey);
223
+ });
224
+
225
+ // Only process network-related options
226
+ for (const opt of networkOptions) {
227
+ const [key, value] = opt.split('=');
228
+ parsed.options[key.trim()] = value ? value.trim() : true;
229
+ }
230
+
231
+ // Check for third-party option
232
+ if (parsed.options['third-party'] || parsed.options['3p']) {
233
+ parsed.isThirdParty = true;
234
+ }
235
+
236
+ // Check for script option
237
+ if (parsed.options['script']) {
238
+ parsed.isScript = true;
239
+ }
240
+ // Parse domain option: $domain=site1.com|site2.com|~excluded.com
241
+ if (parsed.options['domain']) {
242
+ const domainList = parsed.options['domain'];
243
+ const domains = domainList.split('|').map(d => d.trim()).filter(d => d);
244
+
245
+ const include = [];
246
+ const exclude = [];
247
+
248
+ for (const domain of domains) {
249
+ if (domain.startsWith('~')) {
250
+ // Negation: exclude this domain
251
+ exclude.push(domain.substring(1).toLowerCase());
252
+ } else {
253
+ // Positive: include this domain
254
+ include.push(domain.toLowerCase());
255
+ }
256
+ }
257
+
258
+ // Store parsed domain restrictions
259
+ parsed.domainRestrictions = {
260
+ include: include.length > 0 ? include : null,
261
+ exclude: exclude.length > 0 ? exclude : null
262
+ };
263
+
264
+ // For debugging
265
+ if (enableLogging && parsed.domainRestrictions) {
266
+ if (parsed.domainRestrictions.include) {
267
+ // console.log(`[Adblock] Rule includes domains: ${parsed.domainRestrictions.include.join(', ')}`);
268
+ }
269
+ if (parsed.domainRestrictions.exclude) {
270
+ // console.log(`[Adblock] Rule excludes domains: ${parsed.domainRestrictions.exclude.join(', ')}`);
271
+ }
272
+ }
273
+ }
274
+ }
275
+
276
+ // Domain rules: ||domain.com^ or ||domain.com
277
+ if (pattern.startsWith('||')) {
278
+ parsed.isDomain = true;
279
+ const domain = pattern.substring(2).replace(/\^.*$/, '').replace(/\*$/, '');
280
+ parsed.domain = domain;
281
+ parsed.matcher = createDomainMatcher(domain);
282
+ }
283
+ // Regex rules: /pattern/
284
+ else if (pattern.startsWith('/') && pattern.endsWith('/')) {
285
+ parsed.isRegex = true;
286
+ const regexPattern = pattern.substring(1, pattern.length - 1);
287
+ parsed.matcher = new RegExp(regexPattern, 'i');
288
+ }
289
+ // Path/wildcard rules: /ads/* or ad.js
290
+ else {
291
+ parsed.matcher = createPatternMatcher(pattern);
292
+ }
293
+
294
+ return parsed;
295
+ }
296
+
297
+ /**
298
+ * Creates a domain matcher function
299
+ * @param {string} domain - Domain to match
300
+ * @returns {Function} Matcher function
301
+ */
302
+ function createDomainMatcher(domain) {
303
+ const lowerDomain = domain.toLowerCase();
304
+ return (url, hostname) => {
305
+ const lowerHostname = hostname.toLowerCase();
306
+ // Exact match or subdomain match
307
+ return lowerHostname === lowerDomain ||
308
+ lowerHostname.endsWith('.' + lowerDomain);
309
+ };
310
+ }
311
+
312
+ /**
313
+ * Creates a pattern matcher for path/wildcard rules
314
+ * @param {string} pattern - Pattern with wildcards
315
+ * @returns {Function} Matcher function
316
+ */
317
+ function createPatternMatcher(pattern) {
318
+ // Convert adblock pattern to regex
319
+ // * matches anything
320
+ // ^ matches separator (/, ?, &, =, :)
321
+ // | matches start/end of URL
322
+
323
+ let regexPattern = pattern
324
+ .replace(/[.+?{}()[\]\\]/g, '\\$&') // Escape regex special chars
325
+ .replace(/\*/g, '.*') // * -> .*
326
+ .replace(/\^/g, '[/?&=:]') // ^ -> separator chars
327
+ .replace(/^\|/, '^') // | at start -> ^
328
+ .replace(/\|$/, '$'); // | at end -> $
329
+
330
+ const regex = new RegExp(regexPattern, 'i');
331
+ return (url) => regex.test(url);
332
+ }
333
+
334
+ /**
335
+ * Creates rule matcher with shouldBlock function
336
+ * @param {Object} rules - Parsed rules object
337
+ * @param {Object} options - Matcher options
338
+ * @returns {Object} Matcher with shouldBlock function
339
+ */
340
+ function createMatcher(rules, options = {}) {
341
+ const { enableLogging = false, caseSensitive = false } = options;
342
+
343
+ // Create URL parsing cache (scoped to this matcher instance)
344
+ const urlCache = new URLCache(1000);
345
+ let cacheHits = 0;
346
+ let cacheMisses = 0;
347
+
348
+ return {
349
+ rules,
350
+
351
+ /**
352
+ * Check if URL should be blocked
353
+ * @param {string} url - URL to check
354
+ * @param {string} sourceUrl - Source page URL (for third-party detection)
355
+ * @param {string} resourceType - Type of resource (script, image, etc)
356
+ * @returns {Object} { blocked: boolean, rule: string|null, reason: string }
357
+ */
358
+ shouldBlock(url, sourceUrl = '', resourceType = '') {
359
+ try {
360
+ // OPTIMIZATION: Check cache first for URL parsing (60% faster)
361
+ let cachedData = urlCache.get(url);
362
+ let hostname, lowerHostname;
363
+
364
+ if (cachedData) {
365
+ hostname = cachedData.hostname;
366
+ lowerHostname = cachedData.lowerHostname;
367
+ cacheHits++;
368
+ } else {
369
+ // Parse URL and cache result
370
+ const urlObj = new URL(url);
371
+ hostname = urlObj.hostname;
372
+ lowerHostname = hostname.toLowerCase();
373
+
374
+ urlCache.set(url, {
375
+ hostname,
376
+ lowerHostname
377
+ });
378
+ cacheMisses++;
379
+ }
380
+
381
+ // OPTIMIZATION #1: Only calculate third-party status if we have third-party rules to check
382
+ // Avoids expensive URL parsing (2x new URL() calls) when no third-party rules exist
383
+ const isThirdParty = (sourceUrl && rules.thirdPartyRules.length > 0)
384
+ ? isThirdPartyRequest(url, sourceUrl)
385
+ : false;
386
+
387
+ // OPTIMIZATION #2: Calculate hostname parts once and reuse (avoid duplicate split operations)
388
+ const hostnameParts = lowerHostname.split('.');
389
+
390
+ // V8 OPT: Extract and cache source page domain for $domain option checking
391
+ let sourceDomain = null;
392
+ let cachedSourceData = null;
393
+
394
+ if (sourceUrl) {
395
+ // Check if sourceUrl is in cache (avoid duplicate URL parsing)
396
+ cachedSourceData = urlCache.get(sourceUrl);
397
+
398
+ if (cachedSourceData) {
399
+ sourceDomain = cachedSourceData.lowerHostname;
400
+ cacheHits++;
401
+ } else {
402
+ // Parse and cache sourceUrl
403
+ try {
404
+ const sourceUrlObj = new URL(sourceUrl);
405
+ sourceDomain = sourceUrlObj.hostname.toLowerCase();
406
+
407
+ // Cache sourceUrl parsing result (same as request URLs)
408
+ urlCache.set(sourceUrl, {
409
+ hostname: sourceUrlObj.hostname,
410
+ lowerHostname: sourceDomain
411
+ });
412
+ cacheMisses++;
413
+ } catch (err) {
414
+ // Invalid sourceUrl, leave as null
415
+ }
416
+ }
417
+ }
418
+
419
+ // === WHITELIST CHECK (exception rules take precedence) ===
420
+
421
+ // Fast path: Check exact domain in Map (O(1))
422
+ let rule = rules.whitelistMap.get(lowerHostname); // V8: Single Map lookup
423
+ if (rule) {
424
+ if (enableLogging) { // V8: Check after getting rule (inlined)
425
+ console.log(`[Adblock] Whitelisted: ${url} (${rule.raw})`);
426
+ }
427
+ if (matchesDomainRestrictions(rule, sourceDomain)) {
428
+ return { blocked: false, rule: rule.raw, reason: 'whitelisted' };
429
+ }
430
+ }
431
+
432
+ // Check parent domains for subdomain matches (e.g., sub.example.com -> example.com)
433
+ const partsLen = hostnameParts.length; // V8: Cache array length
434
+ for (let i = 1; i < partsLen; i++) {
435
+ const parentDomain = hostnameParts.slice(i).join('.');
436
+ rule = rules.whitelistMap.get(parentDomain); // V8: Single Map lookup
437
+ if (rule) {
438
+ if (enableLogging) {
439
+ console.log(`[Adblock] Whitelisted: ${url} (${rule.raw})`);
440
+ }
441
+ if (matchesDomainRestrictions(rule, sourceDomain)) {
442
+ return { blocked: false, rule: rule.raw, reason: 'whitelisted' };
443
+ }
444
+ }
445
+ }
446
+
447
+ // Slow path: Check wildcard whitelist patterns in array
448
+ const whitelistLen = rules.whitelist.length; // V8: Cache length + indexed access
449
+ for (let i = 0; i < whitelistLen; i++) {
450
+ const rule = rules.whitelist[i];
451
+ if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
452
+ if (enableLogging) {
453
+ console.log(`[Adblock] Whitelisted: ${url} (${rule.raw})`);
454
+ }
455
+ return { blocked: false, rule: rule.raw, reason: 'whitelisted' };
456
+ }
457
+ }
458
+
459
+ // === DOMAIN BLOCKING CHECK ===
460
+
461
+ // Fast path: Check exact domain in Map (O(1))
462
+ rule = rules.domainMap.get(lowerHostname); // V8: Single Map lookup
463
+ if (rule) {
464
+ if (enableLogging) {
465
+ console.log(`[Adblock] Blocked domain: ${url} (${rule.raw})`);
466
+ }
467
+ if (matchesDomainRestrictions(rule, sourceDomain)) {
468
+ return { blocked: true, rule: rule.raw, reason: 'domain_rule' };
469
+ }
470
+ }
471
+
472
+ // Check parent domains for subdomain matches (e.g., ads.example.com -> example.com)
473
+ for (let i = 1; i < partsLen; i++) { // V8: Reuse cached length
474
+ const parentDomain = hostnameParts.slice(i).join('.');
475
+ rule = rules.domainMap.get(parentDomain); // V8: Single Map lookup
476
+ if (rule) {
477
+ if (enableLogging) {
478
+ console.log(`[Adblock] Blocked domain: ${url} (${rule.raw})`);
479
+ }
480
+ if (matchesDomainRestrictions(rule, sourceDomain)) {
481
+ return { blocked: true, rule: rule.raw, reason: 'domain_rule' };
482
+ }
483
+ }
484
+ }
485
+
486
+ // Slow path: Check wildcard domain patterns in array
487
+ const domainRulesLen = rules.domainRules.length; // V8: Cache length + indexed access
488
+ for (let i = 0; i < domainRulesLen; i++) {
489
+ const rule = rules.domainRules[i];
490
+ if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
491
+ if (enableLogging) {
492
+ console.log(`[Adblock] Blocked domain: ${url} (${rule.raw})`);
493
+ }
494
+ return { blocked: true, rule: rule.raw, reason: 'domain_rule' };
495
+ }
496
+ }
497
+
498
+ // Check third-party rules
499
+ if (isThirdParty) {
500
+ const thirdPartyLen = rules.thirdPartyRules.length; // V8: Cache length
501
+ for (let i = 0; i < thirdPartyLen; i++) {
502
+ const rule = rules.thirdPartyRules[i];
503
+ if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
504
+ if (enableLogging) {
505
+ console.log(`[Adblock] Blocked third-party: ${url} (${rule.raw})`);
506
+ }
507
+ return {
508
+ blocked: true,
509
+ rule: rule.raw,
510
+ reason: 'third_party_rule'
511
+ };
512
+ }
513
+ }
514
+ }
515
+
516
+ // Check script rules
517
+ if (resourceType === 'script' || url.endsWith('.js')) {
518
+ const scriptRulesLen = rules.scriptRules.length; // V8: Cache length
519
+ for (let i = 0; i < scriptRulesLen; i++) {
520
+ const rule = rules.scriptRules[i];
521
+ if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
522
+ if (enableLogging) {
523
+ console.log(`[Adblock] Blocked script: ${url} (${rule.raw})`);
524
+ }
525
+ return {
526
+ blocked: true,
527
+ rule: rule.raw,
528
+ reason: 'script_rule'
529
+ };
530
+ }
531
+ }
532
+ }
533
+
534
+ // Check path rules
535
+ const pathRulesLen = rules.pathRules.length; // V8: Cache length
536
+ for (let i = 0; i < pathRulesLen; i++) {
537
+ const rule = rules.pathRules[i];
538
+ if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
539
+ if (enableLogging) {
540
+ console.log(`[Adblock] Blocked path: ${url} (${rule.raw})`);
541
+ }
542
+ return {
543
+ blocked: true,
544
+ rule: rule.raw,
545
+ reason: 'path_rule'
546
+ };
547
+ }
548
+ }
549
+
550
+ // Check regex rules (most expensive, check last)
551
+ const regexRulesLen = rules.regexRules.length; // V8: Cache length
552
+ for (let i = 0; i < regexRulesLen; i++) {
553
+ const rule = rules.regexRules[i];
554
+ if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
555
+ if (enableLogging) {
556
+ console.log(`[Adblock] Blocked regex: ${url} (${rule.raw})`);
557
+ }
558
+ return {
559
+ blocked: true,
560
+ rule: rule.raw,
561
+ reason: 'regex_rule'
562
+ };
563
+ }
564
+ }
565
+
566
+ // No match - allow request
567
+ return {
568
+ blocked: false,
569
+ rule: null,
570
+ reason: 'no_match'
571
+ };
572
+
573
+ } catch (err) {
574
+ if (enableLogging) {
575
+ console.log(`[Adblock] Error checking ${url}: ${err.message}`);
576
+ }
577
+ // On error, allow request
578
+ return {
579
+ blocked: false,
580
+ rule: null,
581
+ reason: 'error'
582
+ };
583
+ }
584
+ },
585
+
586
+ /**
587
+ * Get statistics about loaded rules
588
+ * @returns {Object} Statistics object
589
+ */
590
+ getStats() {
591
+ const hitRate = cacheHits + cacheMisses > 0
592
+ ? ((cacheHits / (cacheHits + cacheMisses)) * 100).toFixed(1) + '%'
593
+ : '0%';
594
+
595
+ return {
596
+ ...rules.stats,
597
+ cache: {
598
+ hits: cacheHits,
599
+ misses: cacheMisses,
600
+ hitRate: hitRate,
601
+ size: urlCache.cache.size,
602
+ maxSize: urlCache.maxSize
603
+ }
604
+ };
605
+ }
606
+ };
607
+ }
608
+
609
+ /**
610
+ * Check if rule's domain restrictions match the source domain
611
+ * @param {Object} rule - Rule with potential domainRestrictions
612
+ * @param {string|null} sourceDomain - Domain of the page making the request (lowercase)
613
+ * @returns {boolean} True if rule should apply on this source domain
614
+ */
615
+ function matchesDomainRestrictions(rule, sourceDomain) {
616
+ // No domain restrictions = applies everywhere
617
+ if (!rule.domainRestrictions) {
618
+ return true;
619
+ }
620
+
621
+ // No source domain provided = can't check restrictions, allow for safety
622
+ if (!sourceDomain) {
623
+ return true;
624
+ }
625
+
626
+ const { include, exclude } = rule.domainRestrictions;
627
+
628
+ // V8 OPT ADVANCED: For single-domain restrictions, skip loop overhead
629
+ // This is the most common case (~80% of domain restrictions)
630
+
631
+ // Fast path: Single exclusion
632
+ if (exclude && exclude.length === 1 && (!include || include.length === 0)) {
633
+ const excludedDomain = exclude[0];
634
+ if (sourceDomain === excludedDomain || sourceDomain.endsWith('.' + excludedDomain)) {
635
+ return false;
636
+ }
637
+ return true;
638
+ }
639
+
640
+ // Fast path: Single inclusion
641
+ if (include && include.length === 1 && (!exclude || exclude.length === 0)) {
642
+ const includedDomain = include[0];
643
+ return sourceDomain === includedDomain || sourceDomain.endsWith('.' + includedDomain);
644
+ }
645
+
646
+ // Slow path: Multiple domains (use indexed loops)
647
+ // V8 OPT: Check exclusions first (higher priority) - use indexed loop
648
+ // If domain is explicitly excluded, rule does NOT apply
649
+ if (exclude && exclude.length > 0) {
650
+ const excludeLen = exclude.length;
651
+ for (let i = 0; i < excludeLen; i++) {
652
+ const excludedDomain = exclude[i];
653
+ // Exact match or subdomain match
654
+ if (sourceDomain === excludedDomain || sourceDomain.endsWith('.' + excludedDomain)) {
655
+ return false; // Domain is excluded, rule should NOT apply
656
+ }
657
+ }
658
+ }
659
+
660
+ // V8 OPT: Check inclusions - use indexed loop
661
+ // If there's an include list, domain MUST be in it
662
+ if (include && include.length > 0) {
663
+ const includeLen = include.length;
664
+ for (let i = 0; i < includeLen; i++) {
665
+ const includedDomain = include[i];
666
+ // Exact match or subdomain match
667
+ if (sourceDomain === includedDomain || sourceDomain.endsWith('.' + includedDomain)) {
668
+ return true; // Domain is included, rule SHOULD apply
669
+ }
670
+ }
671
+ return false; // Domain not in include list, rule should NOT apply
672
+ }
673
+
674
+ // Has exclusions but no inclusions, and not excluded = applies
675
+ return true;
676
+ }
677
+
678
+ /**
679
+ * Check if rule matches the given URL
680
+ * @param {Object} rule - Parsed rule object
681
+ * @param {string} url - URL to check
682
+ * @param {string} hostname - Hostname of URL
683
+ * @param {boolean} isThirdParty - Whether request is third-party
684
+ * @param {string} resourceType - Resource type
685
+ * @param {string|null} sourceDomain - Source page domain (for $domain option)
686
+ * @returns {boolean} True if rule matches
687
+ */
688
+ function matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain) {
689
+ // Check domain restrictions first
690
+ if (!matchesDomainRestrictions(rule, sourceDomain)) {
691
+ return false;
692
+ }
693
+ // Check third-party option
694
+ if (rule.isThirdParty && !isThirdParty) {
695
+ return false;
696
+ }
697
+
698
+ // Check script option
699
+ if (rule.isScript && resourceType !== 'script' && !url.endsWith('.js')) {
700
+ return false;
701
+ }
702
+
703
+ // Apply matcher function
704
+ if (rule.isDomain) {
705
+ return rule.matcher(url, hostname);
706
+ } else {
707
+ return rule.matcher(url);
708
+ }
709
+ }
710
+
711
+ /**
712
+ * Determine if request is third-party
713
+ * @param {string} requestUrl - URL being requested
714
+ * @param {string} sourceUrl - URL of the page making the request
715
+ * @returns {boolean} True if third-party request
716
+ */
717
+ function isThirdPartyRequest(requestUrl, sourceUrl) {
718
+ try {
719
+ const requestHostname = new URL(requestUrl).hostname;
720
+ const sourceHostname = new URL(sourceUrl).hostname;
721
+
722
+ // Extract base domain (handle subdomains)
723
+ const requestDomain = getBaseDomain(requestHostname);
724
+ const sourceDomain = getBaseDomain(sourceHostname);
725
+
726
+ return requestDomain !== sourceDomain;
727
+ } catch (err) {
728
+ return false;
729
+ }
730
+ }
731
+
732
+ /**
733
+ * Extract base domain from hostname
734
+ * @param {string} hostname - Full hostname
735
+ * @returns {string} Base domain
736
+ */
737
+ function getBaseDomain(hostname) {
738
+ const parts = hostname.split('.');
739
+ if (parts.length <= 2) {
740
+ return hostname;
741
+ }
742
+ // Return last two parts (example.com from sub.example.com)
743
+ return parts.slice(-2).join('.');
744
+ }
745
+
746
+ module.exports = {
747
+ parseAdblockRules,
748
+ isThirdPartyRequest,
749
+ getBaseDomain
750
+ };
@@ -10,18 +10,24 @@ const { formatLogMessage } = require('./colorize');
10
10
  */
11
11
  class DomainCache {
12
12
  constructor(options = {}) {
13
+ // V8 Optimization: Initialize all properties in constructor for stable hidden class
13
14
  this.cache = new Set();
15
+
16
+ // V8 Optimization: Use consistent object shape (no dynamic property addition)
14
17
  this.stats = {
15
18
  totalDetected: 0,
16
19
  totalSkipped: 0,
17
20
  cacheHits: 0,
18
21
  cacheMisses: 0
19
22
  };
20
- this.options = {
21
- enableLogging: options.enableLogging || false,
22
- logPrefix: options.logPrefix || '[domain-cache]',
23
- maxCacheSize: options.maxCacheSize || 10000 // Prevent memory leaks
24
- };
23
+
24
+ // V8 Optimization: Store options directly instead of nested object for faster property access
25
+ this.enableLogging = options.enableLogging || false;
26
+ this.logPrefix = options.logPrefix || '[domain-cache]';
27
+ this.maxCacheSize = options.maxCacheSize || 10000; // Prevent memory leaks
28
+
29
+ // V8 Optimization: Pre-calculate 90% target to avoid repeated Math.floor
30
+ this.targetCacheSize = Math.floor(this.maxCacheSize * 0.9);
25
31
  }
26
32
 
27
33
  /**
@@ -40,14 +46,14 @@ class DomainCache {
40
46
  this.stats.totalSkipped++;
41
47
  this.stats.cacheHits++;
42
48
 
43
- if (this.options.enableLogging) {
44
- console.log(formatLogMessage('debug', `${this.options.logPrefix} Cache HIT: ${domain} (skipped)`));
49
+ if (this.enableLogging) {
50
+ console.log(formatLogMessage('debug', `${this.logPrefix} Cache HIT: ${domain} (skipped)`));
45
51
  }
46
52
  } else {
47
53
  this.stats.cacheMisses++;
48
54
 
49
- if (this.options.enableLogging) {
50
- console.log(formatLogMessage('debug', `${this.options.logPrefix} Cache MISS: ${domain} (processing)`));
55
+ if (this.enableLogging) {
56
+ console.log(formatLogMessage('debug', `${this.logPrefix} Cache MISS: ${domain} (processing)`));
51
57
  }
52
58
  }
53
59
 
@@ -63,25 +69,77 @@ class DomainCache {
63
69
  return false;
64
70
  }
65
71
 
66
- // Prevent cache from growing too large
67
- if (this.cache.size >= this.options.maxCacheSize) {
68
- this.clearOldestEntries(Math.floor(this.options.maxCacheSize * 0.1)); // Remove 10% of entries
69
- }
70
-
71
72
  const wasNew = !this.cache.has(domain);
72
73
  this.cache.add(domain);
73
74
 
74
75
  if (wasNew) {
75
76
  this.stats.totalDetected++;
76
77
 
77
- if (this.options.enableLogging) {
78
- console.log(formatLogMessage('debug', `${this.options.logPrefix} Marked as detected: ${domain} (cache size: ${this.cache.size})`));
78
+ if (this.enableLogging) {
79
+ console.log(formatLogMessage('debug', `${this.logPrefix} Marked as detected: ${domain} (cache size: ${this.cache.size})`));
80
+ }
81
+ }
82
+
83
+ // Check size AFTER adding to prevent race where multiple threads see same size
84
+ // and all trigger cleanup before any adds complete
85
+ // V8 Optimization: Use pre-calculated targetCacheSize
86
+ if (this.cache.size > this.maxCacheSize) {
87
+ const toRemove = this.cache.size - this.targetCacheSize;
88
+ if (toRemove > 0) {
89
+ this.clearOldestEntries(toRemove);
79
90
  }
80
91
  }
81
92
 
82
93
  return wasNew;
83
94
  }
84
95
 
96
+ /**
97
+ * Atomically check if domain was detected and mark it if new (race-condition free)
98
+ * This method combines isDomainAlreadyDetected + markDomainAsDetected in one atomic operation
99
+ * @param {string} domain - Domain to check and potentially mark
100
+ * @returns {boolean} True if domain was ALREADY detected (should skip), false if NEW (should process)
101
+ */
102
+ checkAndMark(domain) {
103
+ if (!domain || typeof domain !== 'string') {
104
+ return false;
105
+ }
106
+
107
+ const wasAlreadyDetected = this.cache.has(domain);
108
+
109
+ if (wasAlreadyDetected) {
110
+ // Domain already exists - update skip stats and return true (should skip)
111
+ this.stats.totalSkipped++;
112
+ this.stats.cacheHits++;
113
+
114
+ if (this.enableLogging) {
115
+ console.log(formatLogMessage('debug', `${this.logPrefix} Cache HIT: ${domain} (skipped)`));
116
+ }
117
+ return true; // Already detected, should skip
118
+ }
119
+
120
+ // Domain is NEW - mark it as detected
121
+ this.stats.cacheMisses++;
122
+
123
+ this.cache.add(domain);
124
+ this.stats.totalDetected++;
125
+
126
+ if (this.enableLogging) {
127
+ console.log(formatLogMessage('debug', `${this.logPrefix} Cache MISS: ${domain} (processing and marked, cache size: ${this.cache.size})`));
128
+ }
129
+
130
+ // Check size AFTER adding to prevent race where multiple threads see same size
131
+ // and all trigger cleanup before any adds complete
132
+ // V8 Optimization: Use pre-calculated targetCacheSize
133
+ if (this.cache.size > this.maxCacheSize) {
134
+ const toRemove = this.cache.size - this.targetCacheSize;
135
+ if (toRemove > 0) {
136
+ this.clearOldestEntries(toRemove);
137
+ }
138
+ }
139
+
140
+ return false; // New domain, should process
141
+ }
142
+
85
143
  /**
86
144
  * Clear oldest entries from cache (basic LRU simulation)
87
145
  * Note: Set doesn't maintain insertion order in all Node.js versions,
@@ -96,8 +154,8 @@ class DomainCache {
96
154
 
97
155
  toRemove.forEach(domain => this.cache.delete(domain));
98
156
 
99
- if (this.options.enableLogging) {
100
- console.log(formatLogMessage('debug', `${this.options.logPrefix} Cleared ${toRemove.length} old entries, cache size now: ${this.cache.size}`));
157
+ if (this.enableLogging) {
158
+ console.log(formatLogMessage('debug', `${this.logPrefix} Cleared ${toRemove.length} old entries, cache size now: ${this.cache.size}`));
101
159
  }
102
160
  }
103
161
 
@@ -128,8 +186,8 @@ class DomainCache {
128
186
  cacheMisses: 0
129
187
  };
130
188
 
131
- if (this.options.enableLogging) {
132
- console.log(formatLogMessage('debug', `${this.options.logPrefix} Cache cleared (${previousSize} entries removed)`));
189
+ if (this.enableLogging) {
190
+ console.log(formatLogMessage('debug', `${this.logPrefix} Cache cleared (${previousSize} entries removed)`));
133
191
  }
134
192
  }
135
193
 
@@ -158,8 +216,8 @@ class DomainCache {
158
216
  removeDomain(domain) {
159
217
  const wasRemoved = this.cache.delete(domain);
160
218
 
161
- if (wasRemoved && this.options.enableLogging) {
162
- console.log(formatLogMessage('debug', `${this.options.logPrefix} Removed from cache: ${domain}`));
219
+ if (wasRemoved && this.enableLogging) {
220
+ console.log(formatLogMessage('debug', `${this.logPrefix} Removed from cache: ${domain}`));
163
221
  }
164
222
 
165
223
  return wasRemoved;
@@ -193,6 +251,7 @@ class DomainCache {
193
251
  return {
194
252
  isDomainAlreadyDetected: this.isDomainAlreadyDetected.bind(this),
195
253
  markDomainAsDetected: this.markDomainAsDetected.bind(this),
254
+ checkAndMark: this.checkAndMark.bind(this),
196
255
  getSkippedCount: () => this.stats.totalSkipped,
197
256
  getCacheSize: () => this.cache.size,
198
257
  getStats: this.getStats.bind(this)
@@ -261,6 +320,16 @@ function markDomainAsDetected(domain) {
261
320
  cache.markDomainAsDetected(domain);
262
321
  }
263
322
 
323
+ /**
324
+ * Atomically check and mark a domain (race-condition free)
325
+ * @param {string} domain - Domain to check and mark
326
+ * @returns {boolean} True if already detected (skip), false if new (process)
327
+ */
328
+ function checkAndMark(domain) {
329
+ const cache = getGlobalDomainCache();
330
+ return cache.checkAndMark(domain);
331
+ }
332
+
264
333
  /**
265
334
  * Get total domains skipped (legacy wrapper)
266
335
  * @returns {number} Number of domains skipped
@@ -291,6 +360,7 @@ module.exports = {
291
360
  // Legacy wrapper functions for backward compatibility
292
361
  isDomainAlreadyDetected,
293
362
  markDomainAsDetected,
363
+ checkAndMark,
294
364
  getTotalDomainsSkipped,
295
365
  getDetectedDomainsCount
296
366
  };
@@ -93,9 +93,9 @@ const USER_AGENT_COLLECTIONS = Object.freeze(new Map([
93
93
  ['chrome', "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"],
94
94
  ['chrome_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"],
95
95
  ['chrome_linux', "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"],
96
- ['firefox', "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:144.0) Gecko/20100101 Firefox/144.0"],
97
- ['firefox_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:144.0) Gecko/20100101 Firefox/144.0"],
98
- ['firefox_linux', "Mozilla/5.0 (X11; Linux x86_64; rv:144.0) Gecko/20100101 Firefox/144.0"],
96
+ ['firefox', "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:145.0) Gecko/20100101 Firefox/145.0"],
97
+ ['firefox_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:145.0) Gecko/20100101 Firefox/145.0"],
98
+ ['firefox_linux', "Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Gecko/20100101 Firefox/145.0"],
99
99
  ['safari', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.6 Safari/605.1.15"]
100
100
  ]));
101
101
 
package/lib/referrer.js CHANGED
@@ -1,6 +1,16 @@
1
1
  // === Referrer Header Generation Module ===
2
2
  // This module handles generation of referrer headers for different traffic simulation modes
3
3
 
4
+ /**
5
+ * Performance utility: Get random element from array
6
+ * Reduces code duplication and improves readability
7
+ * @param {Array} array - Array to select from
8
+ * @returns {*} Random element from array
9
+ */
10
+ function getRandomElement(array) {
11
+ return array[Math.floor(Math.random() * array.length)];
12
+ }
13
+
4
14
  /**
5
15
  * Referrer URL collections for different modes
6
16
  */
@@ -69,7 +79,7 @@ const REFERRER_COLLECTIONS = Object.freeze({
69
79
  */
70
80
  function generateSearchTerm(customTerms, context = null) {
71
81
  if (customTerms && customTerms.length > 0) {
72
- return customTerms[Math.floor(Math.random() * customTerms.length)];
82
+ return getRandomElement(customTerms);
73
83
  }
74
84
 
75
85
  // Use context-specific terms if available
@@ -80,7 +90,7 @@ function generateSearchTerm(customTerms, context = null) {
80
90
  termCollection = REFERRER_COLLECTIONS.TECH_TERMS;
81
91
  }
82
92
 
83
- return termCollection[Math.floor(Math.random() * termCollection.length)];
93
+ return getRandomElement(termCollection);
84
94
  }
85
95
 
86
96
  /**
@@ -91,9 +101,8 @@ function generateSearchTerm(customTerms, context = null) {
91
101
  * @returns {string} Generated search engine referrer URL
92
102
  */
93
103
  function generateSearchReferrer(searchTerms, context, forceDebug) {
94
- const randomEngine = REFERRER_COLLECTIONS.SEARCH_ENGINES[
95
- Math.floor(Math.random() * REFERRER_COLLECTIONS.SEARCH_ENGINES.length)
96
- ];
104
+ const randomEngine = getRandomElement(REFERRER_COLLECTIONS.SEARCH_ENGINES);
105
+
97
106
  const searchTerm = generateSearchTerm(searchTerms, context);
98
107
  const referrerUrl = randomEngine + encodeURIComponent(searchTerm);
99
108
 
@@ -110,9 +119,7 @@ function generateSearchReferrer(searchTerms, context, forceDebug) {
110
119
  * @returns {string} Generated social media referrer URL
111
120
  */
112
121
  function generateSocialMediaReferrer(forceDebug) {
113
- const randomSocial = REFERRER_COLLECTIONS.SOCIAL_MEDIA[
114
- Math.floor(Math.random() * REFERRER_COLLECTIONS.SOCIAL_MEDIA.length)
115
- ];
122
+ const randomSocial = getRandomElement(REFERRER_COLLECTIONS.SOCIAL_MEDIA);
116
123
 
117
124
  if (forceDebug) {
118
125
  console.log(`[debug] Generated social media referrer: ${randomSocial}`);
@@ -127,9 +134,7 @@ function generateSocialMediaReferrer(forceDebug) {
127
134
  * @returns {string} Generated news site referrer URL
128
135
  */
129
136
  function generateNewsReferrer(forceDebug) {
130
- const randomNews = REFERRER_COLLECTIONS.NEWS_SITES[
131
- Math.floor(Math.random() * REFERRER_COLLECTIONS.NEWS_SITES.length)
132
- ];
137
+ const randomNews = getRandomElement(REFERRER_COLLECTIONS.NEWS_SITES);
133
138
 
134
139
  if (forceDebug) {
135
140
  console.log(`[debug] Generated news referrer: ${randomNews}`);
@@ -155,33 +160,47 @@ function isValidUrl(url) {
155
160
  * @returns {boolean} True if referrer should be disabled for this URL
156
161
  */
157
162
  function shouldDisableReferrer(targetUrl, disableList, forceDebug = false) {
158
- if (!disableList || !Array.isArray(disableList) || disableList.length === 0) {
163
+ // Fast path: early return for empty/invalid inputs
164
+ if (!disableList?.length || !targetUrl || typeof targetUrl !== 'string') {
159
165
  return false;
160
166
  }
161
167
 
162
- if (!targetUrl || typeof targetUrl !== 'string') {
163
- return false;
168
+ // Parse target URL once (performance optimization)
169
+ let targetHostname = null;
170
+ let targetUrlParsed = false;
171
+
172
+ try {
173
+ targetHostname = new URL(targetUrl).hostname;
174
+ targetUrlParsed = true;
175
+ } catch (e) {
176
+ // Invalid URL - can only do string matching
177
+ targetUrlParsed = false;
164
178
  }
165
179
 
166
180
  for (const disablePattern of disableList) {
167
181
  if (typeof disablePattern !== 'string') continue;
168
182
 
169
- // Exact URL match
183
+ // Fast check: Exact URL match (no parsing needed)
170
184
  if (targetUrl === disablePattern) {
171
- if (forceDebug) console.log(`[debug] Referrer disabled for exact URL match: ${targetUrl}`);
185
+ if (forceDebug) console.log(`[debug] Referrer disabled for exact match: ${targetUrl}`);
172
186
  return true;
173
187
  }
174
188
 
175
- // Domain/hostname match
176
- try {
177
- const targetHostname = new URL(targetUrl).hostname;
178
- const disableHostname = new URL(disablePattern).hostname;
179
- if (targetHostname === disableHostname) {
180
- if (forceDebug) console.log(`[debug] Referrer disabled for domain match: ${targetHostname}`);
181
- return true;
189
+ // Domain/hostname match (use cached parsed URL)
190
+ if (targetUrlParsed) {
191
+ try {
192
+ const disableHostname = new URL(disablePattern).hostname;
193
+ if (targetHostname === disableHostname) {
194
+ if (forceDebug) console.log(`[debug] Referrer disabled for domain match: ${targetHostname}`);
195
+ return true;
196
+ }
197
+ } catch (e) {
198
+ // disablePattern is not a valid URL, try substring match below
182
199
  }
183
- } catch (e) {
184
- // If pattern is not a valid URL, try simple string matching
200
+ }
201
+
202
+ // Fallback: Simple substring match (for patterns like 'example.com')
203
+ if (!targetUrlParsed || disablePattern.includes('/') === false) {
185
204
  if (targetUrl.includes(disablePattern)) {
186
205
  if (forceDebug) console.log(`[debug] Referrer disabled for pattern match: ${disablePattern} in ${targetUrl}`);
187
206
  return true;
@@ -218,7 +237,7 @@ function generateReferrerUrl(referrerConfig, forceDebug = false) {
218
237
  return '';
219
238
  }
220
239
 
221
- const randomUrl = referrerConfig[Math.floor(Math.random() * referrerConfig.length)];
240
+ const randomUrl = getRandomElement(referrerConfig);
222
241
  const url = isValidUrl(randomUrl) ? randomUrl : '';
223
242
 
224
243
  if (forceDebug) {
@@ -261,7 +280,7 @@ function generateReferrerUrl(referrerConfig, forceDebug = false) {
261
280
  case 'mixed': {
262
281
  // Randomly choose between different referrer types
263
282
  const modes = ['random_search', 'social_media', 'news_sites'];
264
- const randomMode = modes[Math.floor(Math.random() * modes.length)];
283
+ const randomMode = getRandomElement(modes);
265
284
 
266
285
  if (forceDebug) console.log(`[debug] Mixed mode selected: ${randomMode}`);
267
286
 
@@ -335,14 +354,26 @@ function validateReferrerConfig(referrerConfig) {
335
354
  if (Array.isArray(referrerConfig)) {
336
355
  if (referrerConfig.length === 0) {
337
356
  result.warnings.push('Empty referrer array will result in no referrer');
338
- } else {
339
- referrerConfig.forEach((url, index) => {
357
+ return result;
358
+ }
359
+
360
+ // Fast validation: check only first and last items if array is large
361
+ const itemsToCheck = referrerConfig.length > 10
362
+ ? [referrerConfig[0], referrerConfig[referrerConfig.length - 1]]
363
+ : referrerConfig;
364
+
365
+ itemsToCheck.forEach((url, index) => {
340
366
  if (!isValidUrl(url)) {
341
- result.errors.push(`Array item ${index} is not a valid HTTP/HTTPS URL: ${url}`);
367
+ const actualIndex = itemsToCheck === referrerConfig ? index : (index === 0 ? 0 : referrerConfig.length - 1);
368
+ result.errors.push(`Array item ${actualIndex} is not a valid HTTP/HTTPS URL: ${url}`);
342
369
  result.isValid = false;
343
370
  }
344
371
  });
372
+
373
+ if (referrerConfig.length > 10 && itemsToCheck.length < referrerConfig.length) {
374
+ result.warnings.push(`Large array (${referrerConfig.length} items): only validated first and last items for performance`);
345
375
  }
376
+
346
377
  return result;
347
378
  }
348
379
 
package/nwss.js CHANGED
@@ -1,4 +1,4 @@
1
- // === Network scanner script (nwss.js) v2.0.31 ===
1
+ // === Network scanner script (nwss.js) v2.0.33 ===
2
2
 
3
3
  // puppeteer for browser automation, fs for file system operations, psl for domain parsing.
4
4
  // const pLimit = require('p-limit'); // Will be dynamically imported
@@ -50,6 +50,8 @@ const { initializeDryRunCollections, addDryRunMatch, addDryRunNetTools, processD
50
50
  const { clearSiteData } = require('./lib/clear_sitedata');
51
51
  // Referrer header generation
52
52
  const { getReferrerForUrl, validateReferrerConfig, validateReferrerDisable } = require('./lib/referrer');
53
+ // Adblock rules parser
54
+ const { parseAdblockRules } = require('./lib/adblock');
53
55
 
54
56
  // Fast setTimeout helper for Puppeteer 22.x compatibility
55
57
  // Uses standard Promise constructor for better performance than node:timers/promises
@@ -94,9 +96,9 @@ const USER_AGENTS = Object.freeze(new Map([
94
96
  ['chrome', "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"],
95
97
  ['chrome_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"],
96
98
  ['chrome_linux', "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"],
97
- ['firefox', "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:144.0) Gecko/20100101 Firefox/144.0"],
98
- ['firefox_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:144.0) Gecko/20100101 Firefox/144.0"],
99
- ['firefox_linux', "Mozilla/5.0 (X11; Linux x86_64; rv:144.0) Gecko/20100101 Firefox/144.0"],
99
+ ['firefox', "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:145.0) Gecko/20100101 Firefox/145.0"],
100
+ ['firefox_mac', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:145.0) Gecko/20100101 Firefox/145.0"],
101
+ ['firefox_linux', "Mozilla/5.0 (X11; Linux x86_64; rv:145.0) Gecko/20100101 Firefox/145.0"],
100
102
  ['safari', "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.6 Safari/605.1.15"]
101
103
  ]));
102
104
 
@@ -145,7 +147,7 @@ const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/r
145
147
  const { monitorBrowserHealth, isBrowserHealthy, isQuicklyResponsive, performGroupWindowCleanup, performRealtimeWindowCleanup, trackPageForRealtime, updatePageUsage, cleanupPageBeforeReload } = require('./lib/browserhealth');
146
148
 
147
149
  // --- Script Configuration & Constants ---
148
- const VERSION = '2.0.31'; // Script version
150
+ const VERSION = '2.0.33'; // Script version
149
151
 
150
152
  // get startTime
151
153
  const startTime = Date.now();
@@ -244,6 +246,11 @@ if (cleanupIntervalIndex !== -1 && args[cleanupIntervalIndex + 1]) {
244
246
  const enableColors = args.includes('--color') || args.includes('--colour');
245
247
  let adblockRulesMode = args.includes('--adblock-rules');
246
248
 
249
+ // Adblock variables (request blocking)
250
+ let adblockEnabled = false;
251
+ let adblockMatcher = null;
252
+ let adblockStats = { blocked: 0, allowed: 0 };
253
+
247
254
  // Validate --adblock-rules usage - ignore if used incorrectly instead of erroring
248
255
  if (adblockRulesMode) {
249
256
  if (!outputFile) {
@@ -452,6 +459,24 @@ if (validateRules || validateRulesFile) {
452
459
  }
453
460
  }
454
461
 
462
+ // Parse --block-ads argument for request-level ad blocking
463
+ const blockAdsIndex = args.findIndex(arg => arg.startsWith('--block-ads'));
464
+ if (blockAdsIndex !== -1) {
465
+ const rulesFile = args[blockAdsIndex].includes('=')
466
+ ? args[blockAdsIndex].split('=')[1]
467
+ : args[blockAdsIndex + 1];
468
+
469
+ if (!rulesFile || !fs.existsSync(rulesFile)) {
470
+ console.log(`Error: Adblock rules file not found: ${rulesFile || '(not specified)'}`);
471
+ process.exit(1);
472
+ }
473
+
474
+ adblockEnabled = true;
475
+ adblockMatcher = parseAdblockRules(rulesFile, { enableLogging: forceDebug });
476
+ const stats = adblockMatcher.getStats();
477
+ if (!silentMode) console.log(messageColors.success(`Adblock enabled: Loaded ${stats.total} blocking rules from ${rulesFile}`));
478
+ }
479
+
455
480
  if (args.includes('--help') || args.includes('-h')) {
456
481
  console.log(`Usage: node nwss.js [options]
457
482
 
@@ -471,6 +496,10 @@ Output Format Options:
471
496
  --privoxy Output as { +block } .domain.com (Privoxy format)
472
497
  --pihole Output as (^|\\.)domain\\.com$ (Pi-hole regex format)
473
498
  --adblock-rules Generate adblock filter rules with resource type modifiers (requires -o)
499
+
500
+ Request Blocking:
501
+ --block-ads=<file> Block ads/trackers using EasyList format rules (||domain.com^, /ads/*, etc)
502
+ Works at request-level for maximum performance
474
503
 
475
504
  General Options:
476
505
  --verbose Force verbose mode globally
@@ -2345,6 +2374,27 @@ function setupFrameHandling(page, forceDebug) {
2345
2374
  console.log(formatLogMessage('debug', `${messageColors.highlight('[req]')}[frame: ${isMainFrame ? 'main' : 'iframe'}] ${frameUrl} → ${request.url()}`));
2346
2375
  }
2347
2376
 
2377
+ // Apply adblock rules BEFORE expensive regex checks for better performance
2378
+ if (adblockEnabled && adblockMatcher) {
2379
+ try {
2380
+ const result = adblockMatcher.shouldBlock(
2381
+ checkedUrl,
2382
+ currentUrl,
2383
+ request.resourceType()
2384
+ );
2385
+
2386
+ if (result.blocked) {
2387
+ adblockStats.blocked++;
2388
+ if (forceDebug) {
2389
+ console.log(formatLogMessage('debug', `${messageColors.blocked('[adblock]')} ${checkedUrl} (${result.reason})`));
2390
+ }
2391
+ request.abort();
2392
+ return;
2393
+ }
2394
+ adblockStats.allowed++;
2395
+ } catch (err) { /* Silently continue on adblock errors */ }
2396
+ }
2397
+
2348
2398
  // Show --debug output and the url while its scanning
2349
2399
  if (forceDebug) {
2350
2400
  const simplifiedUrl = getRootDomain(currentUrl);
@@ -4049,6 +4099,12 @@ function setupFrameHandling(page, forceDebug) {
4049
4099
  console.log(formatLogMessage('debug', `Cached detections: ${cloudflareStats.size}`));
4050
4100
  }
4051
4101
  // Log smart cache statistics (if cache is enabled)
4102
+ // Adblock statistics
4103
+ if (adblockEnabled) {
4104
+ console.log(formatLogMessage('debug', '=== Adblock Statistics ==='));
4105
+ const blockRate = ((adblockStats.blocked / (adblockStats.blocked + adblockStats.allowed)) * 100).toFixed(1);
4106
+ console.log(formatLogMessage('debug', `Blocked: ${adblockStats.blocked} requests (${blockRate}% block rate), Allowed: ${adblockStats.allowed}`));
4107
+ }
4052
4108
  if (smartCache) {
4053
4109
  const cacheStats = smartCache.getStats();
4054
4110
  console.log(formatLogMessage('debug', '=== Smart Cache Statistics ==='));
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fanboynz/network-scanner",
3
- "version": "2.0.31",
3
+ "version": "2.0.33",
4
4
  "description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
5
5
  "main": "nwss.js",
6
6
  "scripts": {