@fanboynz/network-scanner 2.0.59 → 2.0.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/adblock.js CHANGED
@@ -3,6 +3,16 @@
3
3
  // Optimizations: Map domains + URL cache + skip third-party calc + cached hostname split
4
4
 
5
5
  const fs = require('fs');
6
+ const psl = require('psl');
7
+
8
+ // Hoisted constants — avoid recreating per rule (~80K times for EasyList)
9
+ const COSMETIC_OPTIONS = new Set(['generichide', 'elemhide', 'specifichide', 'genericblock']);
10
+ const PARSE_TYPE_MAP = {
11
+ 'script': 'script', 'stylesheet': 'stylesheet', 'css': 'stylesheet',
12
+ 'image': 'image', 'xmlhttprequest': 'xhr', 'xhr': 'xhr', 'font': 'font',
13
+ 'media': 'media', 'websocket': 'websocket', 'subdocument': 'subdocument',
14
+ 'document': 'document', 'ping': 'ping', 'other': 'other'
15
+ };
6
16
 
7
17
  /**
8
18
  * Simple LRU cache for URL parsing results
@@ -106,10 +116,10 @@ function parseAdblockRules(filePath, options = {}) {
106
116
 
107
117
  // Skip rules with cosmetic-only options (not for network blocking)
108
118
  // These options only affect element hiding, not network requests
109
- const cosmeticOnlyOptions = ['generichide', 'elemhide', 'specifichide'];
110
- const hasCosmeticOption = cosmeticOnlyOptions.some(opt =>
111
- line.includes(`$${opt}`) || line.includes(`,${opt}`)
112
- );
119
+ let hasCosmeticOption = false;
120
+ for (const opt of COSMETIC_OPTIONS) {
121
+ if (line.includes(`$${opt}`) || line.includes(`,${opt}`)) { hasCosmeticOption = true; break; }
122
+ }
113
123
  if (hasCosmeticOption) {
114
124
  rules.stats.elementHiding++;
115
125
  continue;
@@ -121,7 +131,7 @@ function parseAdblockRules(filePath, options = {}) {
121
131
  // Whitelist rules (exception rules)
122
132
  if (line.startsWith('@@')) {
123
133
  const cleanLine = line.substring(2);
124
- const parsedRule = parseRule(cleanLine, true);
134
+ const parsedRule = parseRule(cleanLine, true, enableLogging);
125
135
 
126
136
  // Store exact domains in Map for O(1) lookup, wildcards in array
127
137
  if (parsedRule.isDomain && parsedRule.domain && !parsedRule.domain.includes('*')) {
@@ -134,7 +144,7 @@ function parseAdblockRules(filePath, options = {}) {
134
144
  }
135
145
 
136
146
  // Regular blocking rules
137
- const parsedRule = parseRule(line, false);
147
+ const parsedRule = parseRule(line, false, enableLogging);
138
148
 
139
149
  // Categorize based on rule type
140
150
  if (parsedRule.isThirdParty) {
@@ -194,20 +204,19 @@ function parseAdblockRules(filePath, options = {}) {
194
204
  * @param {boolean} isWhitelist - Whether this is a whitelist rule
195
205
  * @returns {Object} Parsed rule object
196
206
  */
197
- function parseRule(rule, isWhitelist) {
207
+ function parseRule(rule, isWhitelist, enableLogging = false) {
198
208
  const parsed = {
199
- raw: rule,
209
+ raw: enableLogging ? rule : null, // Only store for logging — saves memory on large lists
200
210
  isWhitelist,
201
211
  isDomain: false,
202
212
  isThirdParty: false,
203
213
  isFirstParty: false,
204
214
  isScript: false,
205
- resourceTypes: null, // Array of allowed resource types, null = all types
206
- excludedResourceTypes: null, // Array of excluded resource types ($~script, $~image)
215
+ resourceTypes: null, // Set of allowed resource types, null = all types
216
+ excludedResourceTypes: null, // Set of excluded resource types ($~script, $~image)
207
217
  isRegex: false,
208
218
  domainRestrictions: null, // { include: ['site.com'], exclude: ['~site.com'] }
209
219
  pattern: '',
210
- options: {},
211
220
  matcher: null
212
221
  };
213
222
 
@@ -215,105 +224,81 @@ function parseRule(rule, isWhitelist) {
215
224
  let [pattern, optionsStr] = rule.split('$');
216
225
  parsed.pattern = pattern;
217
226
 
218
- // Parse options
227
+ // Parse options into local object (not stored on parsed — freed after this block)
219
228
  if (optionsStr) {
220
229
  const options = optionsStr.split(',');
230
+ const parsedOptions = {};
221
231
 
222
- // Filter out cosmetic-only options that don't affect network blocking
223
- const networkOptions = options.filter(opt => {
224
- const optKey = opt.split('=')[0].trim();
225
- // Skip cosmetic filtering options
226
- const cosmeticOptions = [
227
- 'generichide',
228
- 'elemhide',
229
- 'specifichide',
230
- 'genericblock' // Also cosmetic-related
231
- ];
232
- return !cosmeticOptions.includes(optKey);
233
- });
234
-
235
- // Only process network-related options
236
- for (const opt of networkOptions) {
232
+ for (const opt of options) {
237
233
  const [key, value] = opt.split('=');
238
- parsed.options[key.trim()] = value ? value.trim() : true;
234
+ const trimmedKey = key.trim();
235
+ if (!COSMETIC_OPTIONS.has(trimmedKey)) {
236
+ parsedOptions[trimmedKey] = value ? value.trim() : true;
237
+ }
239
238
  }
240
-
239
+
241
240
  // Check for third-party option
242
- if (parsed.options['third-party'] || parsed.options['3p']) {
241
+ if (parsedOptions['third-party'] || parsedOptions['3p']) {
243
242
  parsed.isThirdParty = true;
244
243
  }
245
-
244
+
246
245
  // Check for first-party option ($first-party, $1p, $~third-party)
247
- if (parsed.options['first-party'] || parsed.options['1p'] || parsed.options['~third-party']) {
246
+ if (parsedOptions['first-party'] || parsedOptions['1p'] || parsedOptions['~third-party']) {
248
247
  parsed.isFirstParty = true;
249
248
  }
250
249
 
251
- // Parse resource type options
252
- const TYPE_MAP = {
253
- 'script': 'script',
254
- 'stylesheet': 'stylesheet',
255
- 'css': 'stylesheet',
256
- 'image': 'image',
257
- 'xmlhttprequest': 'xhr',
258
- 'xhr': 'xhr',
259
- 'font': 'font',
260
- 'media': 'media',
261
- 'websocket': 'websocket',
262
- 'subdocument': 'subdocument',
263
- 'document': 'document',
264
- 'ping': 'ping',
265
- 'other': 'other'
266
- };
267
-
268
- const matchedTypes = Object.keys(parsed.options)
269
- .filter(key => TYPE_MAP[key])
270
- .map(key => TYPE_MAP[key]);
271
-
272
- const excludedTypes = Object.keys(parsed.options)
273
- .filter(key => key.startsWith('~') && TYPE_MAP[key.substring(1)])
274
- .map(key => TYPE_MAP[key.substring(1)]);
250
+ // Parse resource type options using module-level PARSE_TYPE_MAP
251
+ const matchedTypes = Object.keys(parsedOptions)
252
+ .filter(key => PARSE_TYPE_MAP[key])
253
+ .map(key => PARSE_TYPE_MAP[key]);
254
+
255
+ const excludedTypes = Object.keys(parsedOptions)
256
+ .filter(key => key.startsWith('~') && PARSE_TYPE_MAP[key.substring(1)])
257
+ .map(key => PARSE_TYPE_MAP[key.substring(1)]);
275
258
 
276
259
  if (matchedTypes.length > 0) {
277
- parsed.resourceTypes = matchedTypes;
278
- if (parsed.options['script']) {
260
+ // $document rules act as full domain blocks — no resource type restriction
261
+ if (matchedTypes.length === 1 && matchedTypes[0] === 'document') {
262
+ // Don't set resourceTypes — treat as standard block matching all types
263
+ } else {
264
+ parsed.resourceTypes = new Set(matchedTypes);
265
+ }
266
+ if (parsedOptions['script']) {
279
267
  parsed.isScript = true;
280
268
  }
281
269
  }
282
270
 
283
271
  if (excludedTypes.length > 0) {
284
- parsed.excludedResourceTypes = excludedTypes;
272
+ parsed.excludedResourceTypes = new Set(excludedTypes);
285
273
  }
286
274
 
287
275
  // Parse domain option: $domain=site1.com|site2.com|~excluded.com
288
- if (parsed.options['domain']) {
289
- const domainList = parsed.options['domain'];
276
+ if (parsedOptions['domain']) {
277
+ const domainList = parsedOptions['domain'];
290
278
  const domains = domainList.split('|').map(d => d.trim()).filter(d => d);
291
-
279
+
292
280
  const include = [];
293
281
  const exclude = [];
294
-
282
+
295
283
  for (const domain of domains) {
296
284
  if (domain.startsWith('~')) {
297
- // Negation: exclude this domain
298
285
  exclude.push(domain.substring(1).toLowerCase());
299
286
  } else {
300
- // Positive: include this domain
301
287
  include.push(domain.toLowerCase());
302
288
  }
303
289
  }
304
-
305
- // Store parsed domain restrictions
290
+
306
291
  parsed.domainRestrictions = {
307
292
  include: include.length > 0 ? include : null,
308
293
  exclude: exclude.length > 0 ? exclude : null
309
294
  };
310
-
295
+ }
296
+ // parsedOptions goes out of scope here — GC can reclaim
311
297
  }
312
- }
313
298
 
314
299
  // Domain rules: ||domain.com^ or ||domain.com
315
300
  if (pattern.startsWith('||')) {
316
- const domain = pattern.substring(2).replace(/\^.*$/, '').replace(/\*$/, '');
301
+ const domain = pattern.substring(2).replace(/[\^\/\*].*$/, '');
317
302
  const afterDomain = pattern.substring(2 + domain.length);
318
303
  if (!afterDomain || afterDomain === '^') {
319
304
  // Pure domain rule: ||domain.com^ or ||domain.com
@@ -321,16 +306,34 @@ function parseRule(rule, isWhitelist) {
321
306
  parsed.domain = domain;
322
307
  parsed.matcher = createDomainMatcher(domain);
323
308
  } else {
324
- // Domain + path rule: ||domain.com^*path or ||domain.com/path
325
- parsed.matcher = createPatternMatcher(pattern);
309
+ // Domain + path rule: ||domain.com/path or ||domain.com^*path
310
+ // Split into fast domain check + path pattern to avoid full-URL regex
311
+ parsed.isDomain = true;
312
+ parsed.domain = domain;
313
+ const domainMatcher = createDomainMatcher(domain);
314
+ const pathMatcher = createPatternMatcher(afterDomain);
315
+ parsed.matcher = (url, hostname) => {
316
+ if (!domainMatcher(url, hostname)) return false;
317
+ // Extract path portion after hostname for path matching
318
+ const hostIdx = url.indexOf(hostname);
319
+ if (hostIdx === -1) return false;
320
+ const pathPart = url.substring(hostIdx + hostname.length);
321
+ return pathMatcher(pathPart);
322
+ };
326
323
  }
327
324
  }
328
325
  // Regex rules: /pattern/
329
326
  else if (pattern.startsWith('/') && pattern.endsWith('/')) {
330
327
  parsed.isRegex = true;
331
- const regexPattern = pattern.substring(1, pattern.length - 1);
332
- const regex = new RegExp(regexPattern, 'i');
333
- parsed.matcher = (url) => regex.test(url);
328
+ const cached = _regexCache.get(pattern);
329
+ if (cached) {
330
+ parsed.matcher = cached;
331
+ } else {
332
+ const regexPattern = pattern.substring(1, pattern.length - 1);
333
+ const regex = new RegExp(regexPattern, 'i');
334
+ parsed.matcher = (url) => regex.test(url);
335
+ _regexCache.set(pattern, parsed.matcher);
336
+ }
334
337
  }
335
338
  // Path/wildcard rules: /ads/* or ad.js
336
339
  else {
@@ -348,34 +351,58 @@ function parseRule(rule, isWhitelist) {
348
351
  function createDomainMatcher(domain) {
349
352
  const lowerDomain = domain.toLowerCase();
350
353
  const dotDomain = '.' + lowerDomain;
354
+ // hostname is already lowercased by shouldBlock() before being passed here
351
355
  return (url, hostname) => {
352
- const lowerHostname = hostname.toLowerCase();
353
- // Exact match or subdomain match
354
- return lowerHostname === lowerDomain ||
355
- lowerHostname.endsWith(dotDomain);
356
+ return hostname === lowerDomain ||
357
+ hostname.endsWith(dotDomain);
356
358
  };
357
359
  }
358
360
 
361
+ /**
362
+ * Shared regex cache — deduplicates identical compiled patterns across rules
363
+ * Large lists (EasyList ~80K rules) often have thousands of duplicate patterns
364
+ */
365
+ const _regexCache = new Map();
366
+
359
367
  /**
360
368
  * Creates a pattern matcher for path/wildcard rules
361
369
  * @param {string} pattern - Pattern with wildcards
362
370
  * @returns {Function} Matcher function
363
371
  */
364
372
  function createPatternMatcher(pattern) {
373
+ // Check cache for already-compiled identical pattern
374
+ const cached = _regexCache.get(pattern);
375
+ if (cached) return cached;
376
+
365
377
  // Convert adblock pattern to regex
366
378
  // * matches anything
367
379
  // ^ matches separator (/, ?, &, =, :)
368
380
  // | matches start/end of URL
369
-
381
+
382
+ // Handle | anchors before escaping — only at very start/end of pattern
383
+ let anchorStart = false;
384
+ let anchorEnd = false;
385
+ if (pattern.startsWith('|') && !pattern.startsWith('||')) {
386
+ anchorStart = true;
387
+ pattern = pattern.substring(1);
388
+ }
389
+ if (pattern.endsWith('|')) {
390
+ anchorEnd = true;
391
+ pattern = pattern.slice(0, -1);
392
+ }
393
+
370
394
  let regexPattern = pattern
371
- .replace(/[.+?{}()[\]\\]/g, '\\$&') // Escape regex special chars
395
+ .replace(/[.+?{}()[\]\\|]/g, '\\$&') // Escape regex special chars including literal |
372
396
  .replace(/\*/g, '.*') // * -> .*
373
- .replace(/\^/g, '[/?&=:]') // ^ -> separator chars
374
- .replace(/^\|/, '^') // | at start -> ^
375
- .replace(/\|$/, '$'); // | at end -> $
376
-
397
+ .replace(/\^/g, '[/?&=:]'); // ^ -> separator chars
398
+
399
+ if (anchorStart) regexPattern = '^' + regexPattern;
400
+ if (anchorEnd) regexPattern = regexPattern + '$';
401
+
377
402
  const regex = new RegExp(regexPattern, 'i');
378
- return (url) => regex.test(url);
403
+ const matcher = (url) => regex.test(url);
404
+ _regexCache.set(pattern, matcher);
405
+ return matcher;
379
406
  }
380
407
 
381
408
  /**
@@ -387,15 +414,25 @@ function createPatternMatcher(pattern) {
387
414
  function createMatcher(rules, options = {}) {
388
415
  const { enableLogging = false, caseSensitive = false } = options;
389
416
 
390
- const urlCache = new URLCache(8000);
417
+ const urlCache = new URLCache(16000);
391
418
  let cacheHits = 0;
392
419
  let cacheMisses = 0;
393
420
  const hasPartyRules = rules.thirdPartyRules.length > 0 || rules.firstPartyRules.length > 0;
394
- const resultCache = new URLCache(8000); // Cache full shouldBlock results
395
-
421
+ // Result cache with LRU eviction evicts oldest entries one at a time
422
+ // instead of clearing everything when full
423
+ const resultCache = new URLCache(32000);
424
+
425
+ function resultCacheGet(url, sourceUrl, resourceType) {
426
+ return resultCache.get(url + '\0' + sourceUrl + '\0' + resourceType);
427
+ }
428
+
429
+ function resultCacheSet(url, sourceUrl, resourceType, result) {
430
+ resultCache.set(url + '\0' + sourceUrl + '\0' + resourceType, result);
431
+ }
432
+
396
433
  return {
397
434
  rules,
398
-
435
+
399
436
  /**
400
437
  * Check if URL should be blocked
401
438
  * @param {string} url - URL to check
@@ -406,8 +443,7 @@ function createMatcher(rules, options = {}) {
406
443
  shouldBlock(url, sourceUrl = '', resourceType = '') {
407
444
  try {
408
445
  // Check result cache — same URL+source+type always produces same result
409
- const resultKey = url + '\0' + sourceUrl + '\0' + resourceType;
410
- const cachedResult = resultCache.get(resultKey);
446
+ const cachedResult = resultCacheGet(url, sourceUrl, resourceType);
411
447
  if (cachedResult) {
412
448
  cacheHits++;
413
449
  return cachedResult;
@@ -435,14 +471,16 @@ function createMatcher(rules, options = {}) {
435
471
  cacheMisses++;
436
472
  }
437
473
 
438
- // Calculate hostname parts once and reuse
439
- const hostnameParts = lowerHostname.split('.');
440
-
441
- // Precompute parent domains once, reused for whitelist and block checks
442
- const parentDomains = [];
443
- const partsLen = hostnameParts.length;
444
- for (let i = 1; i < partsLen; i++) {
445
- parentDomains.push(hostnameParts.slice(i).join('.'));
474
+ // Lazy parent domain computation only built when exact Map lookup misses
475
+ let parentDomains = null;
476
+ function getParentDomains() {
477
+ if (parentDomains) return parentDomains;
478
+ parentDomains = [];
479
+ const hostnameParts = lowerHostname.split('.');
480
+ for (let i = 1; i < hostnameParts.length; i++) {
481
+ parentDomains.push(hostnameParts.slice(i).join('.'));
482
+ }
483
+ return parentDomains;
446
484
  }
447
485
 
448
486
  // Extract and cache source page domain for $domain and third-party checks
@@ -482,26 +520,27 @@ function createMatcher(rules, options = {}) {
482
520
  // Fast path: Check exact domain in Map (O(1))
483
521
  let rule = rules.whitelistMap.get(lowerHostname); // V8: Single Map lookup
484
522
  if (rule) {
485
- if (enableLogging) { // V8: Check after getting rule (inlined)
486
- console.log(`[Adblock] Whitelisted: ${url} (${rule.raw})`);
487
- }
488
- if (matchesDomainRestrictions(rule, sourceDomain)) {
489
- const r = { blocked: false, rule: rule.raw, reason: 'whitelisted' };
490
- resultCache.set(resultKey, r);
523
+ if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
524
+ if (enableLogging) {
525
+ console.log(`[Adblock] Whitelisted: ${url} (${rule.raw || rule.pattern})`);
526
+ }
527
+ const r = { blocked: false, rule: rule.raw || rule.pattern, reason: 'whitelisted' };
528
+ resultCacheSet(url, sourceUrl, resourceType, r);
491
529
  return r;
492
530
  }
493
531
  }
494
-
532
+
495
533
  // Check parent domains for subdomain matches (e.g., sub.example.com -> example.com)
496
- for (let i = 0; i < parentDomains.length; i++) {
497
- rule = rules.whitelistMap.get(parentDomains[i]);
534
+ const parents = getParentDomains();
535
+ for (let i = 0; i < parents.length; i++) {
536
+ rule = rules.whitelistMap.get(parents[i]);
498
537
  if (rule) {
499
- if (enableLogging) {
500
- console.log(`[Adblock] Whitelisted: ${url} (${rule.raw})`);
501
- }
502
- if (matchesDomainRestrictions(rule, sourceDomain)) {
503
- const r = { blocked: false, rule: rule.raw, reason: 'whitelisted' };
504
- resultCache.set(resultKey, r);
538
+ if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
539
+ if (enableLogging) {
540
+ console.log(`[Adblock] Whitelisted: ${url} (${rule.raw || rule.pattern})`);
541
+ }
542
+ const r = { blocked: false, rule: rule.raw || rule.pattern, reason: 'whitelisted' };
543
+ resultCacheSet(url, sourceUrl, resourceType, r);
505
544
  return r;
506
545
  }
507
546
  }
@@ -513,39 +552,39 @@ function createMatcher(rules, options = {}) {
513
552
  const rule = rules.whitelist[i];
514
553
  if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
515
554
  if (enableLogging) {
516
- console.log(`[Adblock] Whitelisted: ${url} (${rule.raw})`);
555
+ console.log(`[Adblock] Whitelisted: ${url} (${rule.raw || rule.pattern})`);
517
556
  }
518
- const r = { blocked: false, rule: rule.raw, reason: 'whitelisted' };
519
- resultCache.set(resultKey, r);
557
+ const r = { blocked: false, rule: rule.raw || rule.pattern, reason: 'whitelisted' };
558
+ resultCacheSet(url, sourceUrl, resourceType, r);
520
559
  return r;
521
560
  }
522
561
  }
523
562
 
524
563
  // === DOMAIN BLOCKING CHECK ===
525
-
564
+
526
565
  // Fast path: Check exact domain in Map (O(1))
527
566
  rule = rules.domainMap.get(lowerHostname); // V8: Single Map lookup
528
567
  if (rule) {
529
- if (enableLogging) {
530
- console.log(`[Adblock] Blocked domain: ${url} (${rule.raw})`);
531
- }
532
- if (matchesDomainRestrictions(rule, sourceDomain)) {
533
- const r = { blocked: true, rule: rule.raw, reason: 'domain_rule' };
534
- resultCache.set(resultKey, r);
568
+ if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
569
+ if (enableLogging) {
570
+ console.log(`[Adblock] Blocked domain: ${url} (${rule.raw || rule.pattern})`);
571
+ }
572
+ const r = { blocked: true, rule: rule.raw || rule.pattern, reason: 'domain_rule' };
573
+ resultCacheSet(url, sourceUrl, resourceType, r);
535
574
  return r;
536
575
  }
537
576
  }
538
-
577
+
539
578
  // Check parent domains for subdomain matches (e.g., ads.example.com -> example.com)
540
- for (let i = 0; i < parentDomains.length; i++) {
541
- rule = rules.domainMap.get(parentDomains[i]);
579
+ for (let i = 0; i < parents.length; i++) {
580
+ rule = rules.domainMap.get(parents[i]);
542
581
  if (rule) {
543
- if (enableLogging) {
544
- console.log(`[Adblock] Blocked domain: ${url} (${rule.raw})`);
545
- }
546
- if (matchesDomainRestrictions(rule, sourceDomain)) {
547
- const r = { blocked: true, rule: rule.raw, reason: 'domain_rule' };
548
- resultCache.set(resultKey, r);
582
+ if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
583
+ if (enableLogging) {
584
+ console.log(`[Adblock] Blocked domain: ${url} (${rule.raw || rule.pattern})`);
585
+ }
586
+ const r = { blocked: true, rule: rule.raw || rule.pattern, reason: 'domain_rule' };
587
+ resultCacheSet(url, sourceUrl, resourceType, r);
549
588
  return r;
550
589
  }
551
590
  }
@@ -557,10 +596,10 @@ function createMatcher(rules, options = {}) {
557
596
  const rule = rules.domainRules[i];
558
597
  if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
559
598
  if (enableLogging) {
560
- console.log(`[Adblock] Blocked domain: ${url} (${rule.raw})`);
599
+ console.log(`[Adblock] Blocked domain: ${url} (${rule.raw || rule.pattern})`);
561
600
  }
562
- const r = { blocked: true, rule: rule.raw, reason: 'domain_rule' };
563
- resultCache.set(resultKey, r);
601
+ const r = { blocked: true, rule: rule.raw || rule.pattern, reason: 'domain_rule' };
602
+ resultCacheSet(url, sourceUrl, resourceType, r);
564
603
  return r;
565
604
  }
566
605
  }
@@ -572,10 +611,10 @@ function createMatcher(rules, options = {}) {
572
611
  const rule = rules.thirdPartyRules[i];
573
612
  if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
574
613
  if (enableLogging) {
575
- console.log(`[Adblock] Blocked third-party: ${url} (${rule.raw})`);
614
+ console.log(`[Adblock] Blocked third-party: ${url} (${rule.raw || rule.pattern})`);
576
615
  }
577
- const r = { blocked: true, rule: rule.raw, reason: 'third_party_rule' };
578
- resultCache.set(resultKey, r);
616
+ const r = { blocked: true, rule: rule.raw || rule.pattern, reason: 'third_party_rule' };
617
+ resultCacheSet(url, sourceUrl, resourceType, r);
579
618
  return r;
580
619
  }
581
620
  }
@@ -588,10 +627,10 @@ function createMatcher(rules, options = {}) {
588
627
  const rule = rules.firstPartyRules[i];
589
628
  if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
590
629
  if (enableLogging) {
591
- console.log(`[Adblock] Blocked first-party: ${url} (${rule.raw})`);
630
+ console.log(`[Adblock] Blocked first-party: ${url} (${rule.raw || rule.pattern})`);
592
631
  }
593
- const r = { blocked: true, rule: rule.raw, reason: 'first_party_rule' };
594
- resultCache.set(resultKey, r);
632
+ const r = { blocked: true, rule: rule.raw || rule.pattern, reason: 'first_party_rule' };
633
+ resultCacheSet(url, sourceUrl, resourceType, r);
595
634
  return r;
596
635
  }
597
636
  }
@@ -604,10 +643,10 @@ function createMatcher(rules, options = {}) {
604
643
  const rule = rules.scriptRules[i];
605
644
  if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
606
645
  if (enableLogging) {
607
- console.log(`[Adblock] Blocked script: ${url} (${rule.raw})`);
646
+ console.log(`[Adblock] Blocked script: ${url} (${rule.raw || rule.pattern})`);
608
647
  }
609
- const r = { blocked: true, rule: rule.raw, reason: 'script_rule' };
610
- resultCache.set(resultKey, r);
648
+ const r = { blocked: true, rule: rule.raw || rule.pattern, reason: 'script_rule' };
649
+ resultCacheSet(url, sourceUrl, resourceType, r);
611
650
  return r;
612
651
  }
613
652
  }
@@ -619,10 +658,10 @@ function createMatcher(rules, options = {}) {
619
658
  const rule = rules.pathRules[i];
620
659
  if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
621
660
  if (enableLogging) {
622
- console.log(`[Adblock] Blocked path: ${url} (${rule.raw})`);
661
+ console.log(`[Adblock] Blocked path: ${url} (${rule.raw || rule.pattern})`);
623
662
  }
624
- const r = { blocked: true, rule: rule.raw, reason: 'path_rule' };
625
- resultCache.set(resultKey, r);
663
+ const r = { blocked: true, rule: rule.raw || rule.pattern, reason: 'path_rule' };
664
+ resultCacheSet(url, sourceUrl, resourceType, r);
626
665
  return r;
627
666
  }
628
667
  }
@@ -633,17 +672,17 @@ function createMatcher(rules, options = {}) {
633
672
  const rule = rules.regexRules[i];
634
673
  if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
635
674
  if (enableLogging) {
636
- console.log(`[Adblock] Blocked regex: ${url} (${rule.raw})`);
675
+ console.log(`[Adblock] Blocked regex: ${url} (${rule.raw || rule.pattern})`);
637
676
  }
638
- const r = { blocked: true, rule: rule.raw, reason: 'regex_rule' };
639
- resultCache.set(resultKey, r);
677
+ const r = { blocked: true, rule: rule.raw || rule.pattern, reason: 'regex_rule' };
678
+ resultCacheSet(url, sourceUrl, resourceType, r);
640
679
  return r;
641
680
  }
642
681
  }
643
682
 
644
683
  // No match - allow request
645
684
  const r = { blocked: false, rule: null, reason: 'no_match' };
646
- resultCache.set(resultKey, r);
685
+ resultCacheSet(url, sourceUrl, resourceType, r);
647
686
  return r;
648
687
 
649
688
  } catch (err) {
@@ -674,7 +713,8 @@ function createMatcher(rules, options = {}) {
674
713
  hits: cacheHits,
675
714
  misses: cacheMisses,
676
715
  hitRate: hitRate,
677
- size: urlCache.cache.size,
716
+ urlCacheSize: urlCache.cache.size,
717
+ resultCacheSize: resultCache.cache.size,
678
718
  maxSize: urlCache.maxSize
679
719
  }
680
720
  };
@@ -785,26 +825,18 @@ function matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDoma
785
825
  return false;
786
826
  }
787
827
 
788
- // Check resource type restrictions
789
- if (rule.resourceTypes) {
790
- if (!resourceType) {
791
- // No resource type info available — allow match for safety
792
- } else {
793
- // Normalize Puppeteer resource types to match our type names
794
- const normalizedType = RESOURCE_TYPE_ALIASES[resourceType] || resourceType;
795
- if (!rule.resourceTypes.includes(normalizedType)) {
796
- return false;
797
- }
828
+ // Normalize resource type once for both checks
829
+ if (resourceType && (rule.resourceTypes || rule.excludedResourceTypes)) {
830
+ const normalizedType = RESOURCE_TYPE_ALIASES[resourceType] || resourceType;
831
+
832
+ // Check resource type restrictions
833
+ if (rule.resourceTypes && !rule.resourceTypes.has(normalizedType)) {
834
+ return false;
798
835
  }
799
- }
800
836
 
801
- // Check negated resource type restrictions ($~script, $~image, etc.)
802
- if (rule.excludedResourceTypes) {
803
- if (resourceType) {
804
- const normalizedType = RESOURCE_TYPE_ALIASES[resourceType] || resourceType;
805
- if (rule.excludedResourceTypes.includes(normalizedType)) {
806
- return false;
807
- }
837
+ // Check negated resource type restrictions ($~script, $~image, etc.)
838
+ if (rule.excludedResourceTypes && rule.excludedResourceTypes.has(normalizedType)) {
839
+ return false;
808
840
  }
809
841
  }
810
842
 
@@ -817,17 +849,21 @@ function matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDoma
817
849
  }
818
850
 
819
851
  /**
820
- * Extract base domain from hostname
852
+ * Extract base domain from hostname using Public Suffix List
853
+ * Correctly handles multi-part TLDs like .co.uk, .com.au, .com.br
821
854
  * @param {string} hostname - Full hostname
822
855
  * @returns {string} Base domain
823
856
  */
857
+ const _baseDomainCache = new Map();
824
858
  function getBaseDomain(hostname) {
825
- const parts = hostname.split('.');
826
- if (parts.length <= 2) {
827
- return hostname;
828
- }
829
- // Return last two parts (example.com from sub.example.com)
830
- return parts.slice(-2).join('.');
859
+ const cached = _baseDomainCache.get(hostname);
860
+ if (cached) return cached;
861
+ const parsed = psl.parse(hostname);
862
+ const result = (parsed && parsed.domain) ? parsed.domain : hostname;
863
+ // Cap cache size
864
+ if (_baseDomainCache.size > 10000) _baseDomainCache.clear();
865
+ _baseDomainCache.set(hostname, result);
866
+ return result;
831
867
  }
832
868
 
833
869
  module.exports = {