@fanboynz/network-scanner 2.0.59 → 2.0.60
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +30 -0
- package/lib/adblock.js +215 -179
- package/lib/compare.js +19 -32
- package/lib/domain-cache.js +9 -7
- package/lib/grep.js +9 -13
- package/lib/nettools.js +177 -42
- package/lib/output.js +17 -30
- package/nwss.js +75 -23
- package/package.json +1 -1
package/lib/adblock.js
CHANGED
|
@@ -3,6 +3,16 @@
|
|
|
3
3
|
// Optimizations: Map domains + URL cache + skip third-party calc + cached hostname split
|
|
4
4
|
|
|
5
5
|
const fs = require('fs');
|
|
6
|
+
const psl = require('psl');
|
|
7
|
+
|
|
8
|
+
// Hoisted constants — avoid recreating per rule (~80K times for EasyList)
|
|
9
|
+
const COSMETIC_OPTIONS = new Set(['generichide', 'elemhide', 'specifichide', 'genericblock']);
|
|
10
|
+
const PARSE_TYPE_MAP = {
|
|
11
|
+
'script': 'script', 'stylesheet': 'stylesheet', 'css': 'stylesheet',
|
|
12
|
+
'image': 'image', 'xmlhttprequest': 'xhr', 'xhr': 'xhr', 'font': 'font',
|
|
13
|
+
'media': 'media', 'websocket': 'websocket', 'subdocument': 'subdocument',
|
|
14
|
+
'document': 'document', 'ping': 'ping', 'other': 'other'
|
|
15
|
+
};
|
|
6
16
|
|
|
7
17
|
/**
|
|
8
18
|
* Simple LRU cache for URL parsing results
|
|
@@ -106,10 +116,10 @@ function parseAdblockRules(filePath, options = {}) {
|
|
|
106
116
|
|
|
107
117
|
// Skip rules with cosmetic-only options (not for network blocking)
|
|
108
118
|
// These options only affect element hiding, not network requests
|
|
109
|
-
|
|
110
|
-
const
|
|
111
|
-
line.includes(`$${opt}`) || line.includes(`,${opt}`)
|
|
112
|
-
|
|
119
|
+
let hasCosmeticOption = false;
|
|
120
|
+
for (const opt of COSMETIC_OPTIONS) {
|
|
121
|
+
if (line.includes(`$${opt}`) || line.includes(`,${opt}`)) { hasCosmeticOption = true; break; }
|
|
122
|
+
}
|
|
113
123
|
if (hasCosmeticOption) {
|
|
114
124
|
rules.stats.elementHiding++;
|
|
115
125
|
continue;
|
|
@@ -121,7 +131,7 @@ function parseAdblockRules(filePath, options = {}) {
|
|
|
121
131
|
// Whitelist rules (exception rules)
|
|
122
132
|
if (line.startsWith('@@')) {
|
|
123
133
|
const cleanLine = line.substring(2);
|
|
124
|
-
const parsedRule = parseRule(cleanLine, true);
|
|
134
|
+
const parsedRule = parseRule(cleanLine, true, enableLogging);
|
|
125
135
|
|
|
126
136
|
// Store exact domains in Map for O(1) lookup, wildcards in array
|
|
127
137
|
if (parsedRule.isDomain && parsedRule.domain && !parsedRule.domain.includes('*')) {
|
|
@@ -134,7 +144,7 @@ function parseAdblockRules(filePath, options = {}) {
|
|
|
134
144
|
}
|
|
135
145
|
|
|
136
146
|
// Regular blocking rules
|
|
137
|
-
const parsedRule = parseRule(line, false);
|
|
147
|
+
const parsedRule = parseRule(line, false, enableLogging);
|
|
138
148
|
|
|
139
149
|
// Categorize based on rule type
|
|
140
150
|
if (parsedRule.isThirdParty) {
|
|
@@ -194,20 +204,19 @@ function parseAdblockRules(filePath, options = {}) {
|
|
|
194
204
|
* @param {boolean} isWhitelist - Whether this is a whitelist rule
|
|
195
205
|
* @returns {Object} Parsed rule object
|
|
196
206
|
*/
|
|
197
|
-
function parseRule(rule, isWhitelist) {
|
|
207
|
+
function parseRule(rule, isWhitelist, enableLogging = false) {
|
|
198
208
|
const parsed = {
|
|
199
|
-
raw: rule,
|
|
209
|
+
raw: enableLogging ? rule : null, // Only store for logging — saves memory on large lists
|
|
200
210
|
isWhitelist,
|
|
201
211
|
isDomain: false,
|
|
202
212
|
isThirdParty: false,
|
|
203
213
|
isFirstParty: false,
|
|
204
214
|
isScript: false,
|
|
205
|
-
resourceTypes: null, //
|
|
206
|
-
excludedResourceTypes: null, //
|
|
215
|
+
resourceTypes: null, // Set of allowed resource types, null = all types
|
|
216
|
+
excludedResourceTypes: null, // Set of excluded resource types ($~script, $~image)
|
|
207
217
|
isRegex: false,
|
|
208
218
|
domainRestrictions: null, // { include: ['site.com'], exclude: ['~site.com'] }
|
|
209
219
|
pattern: '',
|
|
210
|
-
options: {},
|
|
211
220
|
matcher: null
|
|
212
221
|
};
|
|
213
222
|
|
|
@@ -215,105 +224,81 @@ function parseRule(rule, isWhitelist) {
|
|
|
215
224
|
let [pattern, optionsStr] = rule.split('$');
|
|
216
225
|
parsed.pattern = pattern;
|
|
217
226
|
|
|
218
|
-
// Parse options
|
|
227
|
+
// Parse options into local object (not stored on parsed — freed after this block)
|
|
219
228
|
if (optionsStr) {
|
|
220
229
|
const options = optionsStr.split(',');
|
|
230
|
+
const parsedOptions = {};
|
|
221
231
|
|
|
222
|
-
|
|
223
|
-
const networkOptions = options.filter(opt => {
|
|
224
|
-
const optKey = opt.split('=')[0].trim();
|
|
225
|
-
// Skip cosmetic filtering options
|
|
226
|
-
const cosmeticOptions = [
|
|
227
|
-
'generichide',
|
|
228
|
-
'elemhide',
|
|
229
|
-
'specifichide',
|
|
230
|
-
'genericblock' // Also cosmetic-related
|
|
231
|
-
];
|
|
232
|
-
return !cosmeticOptions.includes(optKey);
|
|
233
|
-
});
|
|
234
|
-
|
|
235
|
-
// Only process network-related options
|
|
236
|
-
for (const opt of networkOptions) {
|
|
232
|
+
for (const opt of options) {
|
|
237
233
|
const [key, value] = opt.split('=');
|
|
238
|
-
|
|
234
|
+
const trimmedKey = key.trim();
|
|
235
|
+
if (!COSMETIC_OPTIONS.has(trimmedKey)) {
|
|
236
|
+
parsedOptions[trimmedKey] = value ? value.trim() : true;
|
|
237
|
+
}
|
|
239
238
|
}
|
|
240
|
-
|
|
239
|
+
|
|
241
240
|
// Check for third-party option
|
|
242
|
-
if (
|
|
241
|
+
if (parsedOptions['third-party'] || parsedOptions['3p']) {
|
|
243
242
|
parsed.isThirdParty = true;
|
|
244
243
|
}
|
|
245
|
-
|
|
244
|
+
|
|
246
245
|
// Check for first-party option ($first-party, $1p, $~third-party)
|
|
247
|
-
if (
|
|
246
|
+
if (parsedOptions['first-party'] || parsedOptions['1p'] || parsedOptions['~third-party']) {
|
|
248
247
|
parsed.isFirstParty = true;
|
|
249
248
|
}
|
|
250
249
|
|
|
251
|
-
// Parse resource type options
|
|
252
|
-
const
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
'
|
|
258
|
-
|
|
259
|
-
'font': 'font',
|
|
260
|
-
'media': 'media',
|
|
261
|
-
'websocket': 'websocket',
|
|
262
|
-
'subdocument': 'subdocument',
|
|
263
|
-
'document': 'document',
|
|
264
|
-
'ping': 'ping',
|
|
265
|
-
'other': 'other'
|
|
266
|
-
};
|
|
267
|
-
|
|
268
|
-
const matchedTypes = Object.keys(parsed.options)
|
|
269
|
-
.filter(key => TYPE_MAP[key])
|
|
270
|
-
.map(key => TYPE_MAP[key]);
|
|
271
|
-
|
|
272
|
-
const excludedTypes = Object.keys(parsed.options)
|
|
273
|
-
.filter(key => key.startsWith('~') && TYPE_MAP[key.substring(1)])
|
|
274
|
-
.map(key => TYPE_MAP[key.substring(1)]);
|
|
250
|
+
// Parse resource type options using module-level PARSE_TYPE_MAP
|
|
251
|
+
const matchedTypes = Object.keys(parsedOptions)
|
|
252
|
+
.filter(key => PARSE_TYPE_MAP[key])
|
|
253
|
+
.map(key => PARSE_TYPE_MAP[key]);
|
|
254
|
+
|
|
255
|
+
const excludedTypes = Object.keys(parsedOptions)
|
|
256
|
+
.filter(key => key.startsWith('~') && PARSE_TYPE_MAP[key.substring(1)])
|
|
257
|
+
.map(key => PARSE_TYPE_MAP[key.substring(1)]);
|
|
275
258
|
|
|
276
259
|
if (matchedTypes.length > 0) {
|
|
277
|
-
|
|
278
|
-
if (
|
|
260
|
+
// $document rules act as full domain blocks — no resource type restriction
|
|
261
|
+
if (matchedTypes.length === 1 && matchedTypes[0] === 'document') {
|
|
262
|
+
// Don't set resourceTypes — treat as standard block matching all types
|
|
263
|
+
} else {
|
|
264
|
+
parsed.resourceTypes = new Set(matchedTypes);
|
|
265
|
+
}
|
|
266
|
+
if (parsedOptions['script']) {
|
|
279
267
|
parsed.isScript = true;
|
|
280
268
|
}
|
|
281
269
|
}
|
|
282
270
|
|
|
283
271
|
if (excludedTypes.length > 0) {
|
|
284
|
-
parsed.excludedResourceTypes = excludedTypes;
|
|
272
|
+
parsed.excludedResourceTypes = new Set(excludedTypes);
|
|
285
273
|
}
|
|
286
274
|
|
|
287
275
|
// Parse domain option: $domain=site1.com|site2.com|~excluded.com
|
|
288
|
-
if (
|
|
289
|
-
const domainList =
|
|
276
|
+
if (parsedOptions['domain']) {
|
|
277
|
+
const domainList = parsedOptions['domain'];
|
|
290
278
|
const domains = domainList.split('|').map(d => d.trim()).filter(d => d);
|
|
291
|
-
|
|
279
|
+
|
|
292
280
|
const include = [];
|
|
293
281
|
const exclude = [];
|
|
294
|
-
|
|
282
|
+
|
|
295
283
|
for (const domain of domains) {
|
|
296
284
|
if (domain.startsWith('~')) {
|
|
297
|
-
// Negation: exclude this domain
|
|
298
285
|
exclude.push(domain.substring(1).toLowerCase());
|
|
299
286
|
} else {
|
|
300
|
-
// Positive: include this domain
|
|
301
287
|
include.push(domain.toLowerCase());
|
|
302
288
|
}
|
|
303
289
|
}
|
|
304
|
-
|
|
305
|
-
// Store parsed domain restrictions
|
|
290
|
+
|
|
306
291
|
parsed.domainRestrictions = {
|
|
307
292
|
include: include.length > 0 ? include : null,
|
|
308
293
|
exclude: exclude.length > 0 ? exclude : null
|
|
309
294
|
};
|
|
310
|
-
|
|
295
|
+
}
|
|
296
|
+
// parsedOptions goes out of scope here — GC can reclaim
|
|
311
297
|
}
|
|
312
|
-
}
|
|
313
298
|
|
|
314
299
|
// Domain rules: ||domain.com^ or ||domain.com
|
|
315
300
|
if (pattern.startsWith('||')) {
|
|
316
|
-
const domain = pattern.substring(2).replace(
|
|
301
|
+
const domain = pattern.substring(2).replace(/[\^\/\*].*$/, '');
|
|
317
302
|
const afterDomain = pattern.substring(2 + domain.length);
|
|
318
303
|
if (!afterDomain || afterDomain === '^') {
|
|
319
304
|
// Pure domain rule: ||domain.com^ or ||domain.com
|
|
@@ -321,16 +306,34 @@ function parseRule(rule, isWhitelist) {
|
|
|
321
306
|
parsed.domain = domain;
|
|
322
307
|
parsed.matcher = createDomainMatcher(domain);
|
|
323
308
|
} else {
|
|
324
|
-
// Domain + path rule: ||domain.com
|
|
325
|
-
|
|
309
|
+
// Domain + path rule: ||domain.com/path or ||domain.com^*path
|
|
310
|
+
// Split into fast domain check + path pattern to avoid full-URL regex
|
|
311
|
+
parsed.isDomain = true;
|
|
312
|
+
parsed.domain = domain;
|
|
313
|
+
const domainMatcher = createDomainMatcher(domain);
|
|
314
|
+
const pathMatcher = createPatternMatcher(afterDomain);
|
|
315
|
+
parsed.matcher = (url, hostname) => {
|
|
316
|
+
if (!domainMatcher(url, hostname)) return false;
|
|
317
|
+
// Extract path portion after hostname for path matching
|
|
318
|
+
const hostIdx = url.indexOf(hostname);
|
|
319
|
+
if (hostIdx === -1) return false;
|
|
320
|
+
const pathPart = url.substring(hostIdx + hostname.length);
|
|
321
|
+
return pathMatcher(pathPart);
|
|
322
|
+
};
|
|
326
323
|
}
|
|
327
324
|
}
|
|
328
325
|
// Regex rules: /pattern/
|
|
329
326
|
else if (pattern.startsWith('/') && pattern.endsWith('/')) {
|
|
330
327
|
parsed.isRegex = true;
|
|
331
|
-
const
|
|
332
|
-
|
|
333
|
-
|
|
328
|
+
const cached = _regexCache.get(pattern);
|
|
329
|
+
if (cached) {
|
|
330
|
+
parsed.matcher = cached;
|
|
331
|
+
} else {
|
|
332
|
+
const regexPattern = pattern.substring(1, pattern.length - 1);
|
|
333
|
+
const regex = new RegExp(regexPattern, 'i');
|
|
334
|
+
parsed.matcher = (url) => regex.test(url);
|
|
335
|
+
_regexCache.set(pattern, parsed.matcher);
|
|
336
|
+
}
|
|
334
337
|
}
|
|
335
338
|
// Path/wildcard rules: /ads/* or ad.js
|
|
336
339
|
else {
|
|
@@ -348,34 +351,58 @@ function parseRule(rule, isWhitelist) {
|
|
|
348
351
|
function createDomainMatcher(domain) {
|
|
349
352
|
const lowerDomain = domain.toLowerCase();
|
|
350
353
|
const dotDomain = '.' + lowerDomain;
|
|
354
|
+
// hostname is already lowercased by shouldBlock() before being passed here
|
|
351
355
|
return (url, hostname) => {
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
return lowerHostname === lowerDomain ||
|
|
355
|
-
lowerHostname.endsWith(dotDomain);
|
|
356
|
+
return hostname === lowerDomain ||
|
|
357
|
+
hostname.endsWith(dotDomain);
|
|
356
358
|
};
|
|
357
359
|
}
|
|
358
360
|
|
|
361
|
+
/**
|
|
362
|
+
* Shared regex cache — deduplicates identical compiled patterns across rules
|
|
363
|
+
* Large lists (EasyList ~80K rules) often have thousands of duplicate patterns
|
|
364
|
+
*/
|
|
365
|
+
const _regexCache = new Map();
|
|
366
|
+
|
|
359
367
|
/**
|
|
360
368
|
* Creates a pattern matcher for path/wildcard rules
|
|
361
369
|
* @param {string} pattern - Pattern with wildcards
|
|
362
370
|
* @returns {Function} Matcher function
|
|
363
371
|
*/
|
|
364
372
|
function createPatternMatcher(pattern) {
|
|
373
|
+
// Check cache for already-compiled identical pattern
|
|
374
|
+
const cached = _regexCache.get(pattern);
|
|
375
|
+
if (cached) return cached;
|
|
376
|
+
|
|
365
377
|
// Convert adblock pattern to regex
|
|
366
378
|
// * matches anything
|
|
367
379
|
// ^ matches separator (/, ?, &, =, :)
|
|
368
380
|
// | matches start/end of URL
|
|
369
|
-
|
|
381
|
+
|
|
382
|
+
// Handle | anchors before escaping — only at very start/end of pattern
|
|
383
|
+
let anchorStart = false;
|
|
384
|
+
let anchorEnd = false;
|
|
385
|
+
if (pattern.startsWith('|') && !pattern.startsWith('||')) {
|
|
386
|
+
anchorStart = true;
|
|
387
|
+
pattern = pattern.substring(1);
|
|
388
|
+
}
|
|
389
|
+
if (pattern.endsWith('|')) {
|
|
390
|
+
anchorEnd = true;
|
|
391
|
+
pattern = pattern.slice(0, -1);
|
|
392
|
+
}
|
|
393
|
+
|
|
370
394
|
let regexPattern = pattern
|
|
371
|
-
.replace(/[.+?{}()[\]
|
|
395
|
+
.replace(/[.+?{}()[\]\\|]/g, '\\$&') // Escape regex special chars including literal |
|
|
372
396
|
.replace(/\*/g, '.*') // * -> .*
|
|
373
|
-
.replace(/\^/g, '[/?&=:]')
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
397
|
+
.replace(/\^/g, '[/?&=:]'); // ^ -> separator chars
|
|
398
|
+
|
|
399
|
+
if (anchorStart) regexPattern = '^' + regexPattern;
|
|
400
|
+
if (anchorEnd) regexPattern = regexPattern + '$';
|
|
401
|
+
|
|
377
402
|
const regex = new RegExp(regexPattern, 'i');
|
|
378
|
-
|
|
403
|
+
const matcher = (url) => regex.test(url);
|
|
404
|
+
_regexCache.set(pattern, matcher);
|
|
405
|
+
return matcher;
|
|
379
406
|
}
|
|
380
407
|
|
|
381
408
|
/**
|
|
@@ -387,15 +414,25 @@ function createPatternMatcher(pattern) {
|
|
|
387
414
|
function createMatcher(rules, options = {}) {
|
|
388
415
|
const { enableLogging = false, caseSensitive = false } = options;
|
|
389
416
|
|
|
390
|
-
const urlCache = new URLCache(
|
|
417
|
+
const urlCache = new URLCache(16000);
|
|
391
418
|
let cacheHits = 0;
|
|
392
419
|
let cacheMisses = 0;
|
|
393
420
|
const hasPartyRules = rules.thirdPartyRules.length > 0 || rules.firstPartyRules.length > 0;
|
|
394
|
-
|
|
395
|
-
|
|
421
|
+
// Result cache with LRU eviction — evicts oldest entries one at a time
|
|
422
|
+
// instead of clearing everything when full
|
|
423
|
+
const resultCache = new URLCache(32000);
|
|
424
|
+
|
|
425
|
+
function resultCacheGet(url, sourceUrl, resourceType) {
|
|
426
|
+
return resultCache.get(url + '\0' + sourceUrl + '\0' + resourceType);
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
function resultCacheSet(url, sourceUrl, resourceType, result) {
|
|
430
|
+
resultCache.set(url + '\0' + sourceUrl + '\0' + resourceType, result);
|
|
431
|
+
}
|
|
432
|
+
|
|
396
433
|
return {
|
|
397
434
|
rules,
|
|
398
|
-
|
|
435
|
+
|
|
399
436
|
/**
|
|
400
437
|
* Check if URL should be blocked
|
|
401
438
|
* @param {string} url - URL to check
|
|
@@ -406,8 +443,7 @@ function createMatcher(rules, options = {}) {
|
|
|
406
443
|
shouldBlock(url, sourceUrl = '', resourceType = '') {
|
|
407
444
|
try {
|
|
408
445
|
// Check result cache — same URL+source+type always produces same result
|
|
409
|
-
const
|
|
410
|
-
const cachedResult = resultCache.get(resultKey);
|
|
446
|
+
const cachedResult = resultCacheGet(url, sourceUrl, resourceType);
|
|
411
447
|
if (cachedResult) {
|
|
412
448
|
cacheHits++;
|
|
413
449
|
return cachedResult;
|
|
@@ -435,14 +471,16 @@ function createMatcher(rules, options = {}) {
|
|
|
435
471
|
cacheMisses++;
|
|
436
472
|
}
|
|
437
473
|
|
|
438
|
-
//
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
474
|
+
// Lazy parent domain computation — only built when exact Map lookup misses
|
|
475
|
+
let parentDomains = null;
|
|
476
|
+
function getParentDomains() {
|
|
477
|
+
if (parentDomains) return parentDomains;
|
|
478
|
+
parentDomains = [];
|
|
479
|
+
const hostnameParts = lowerHostname.split('.');
|
|
480
|
+
for (let i = 1; i < hostnameParts.length; i++) {
|
|
481
|
+
parentDomains.push(hostnameParts.slice(i).join('.'));
|
|
482
|
+
}
|
|
483
|
+
return parentDomains;
|
|
446
484
|
}
|
|
447
485
|
|
|
448
486
|
// Extract and cache source page domain for $domain and third-party checks
|
|
@@ -482,26 +520,27 @@ function createMatcher(rules, options = {}) {
|
|
|
482
520
|
// Fast path: Check exact domain in Map (O(1))
|
|
483
521
|
let rule = rules.whitelistMap.get(lowerHostname); // V8: Single Map lookup
|
|
484
522
|
if (rule) {
|
|
485
|
-
if (
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
const r = { blocked: false, rule: rule.raw, reason: 'whitelisted' };
|
|
490
|
-
|
|
523
|
+
if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
|
|
524
|
+
if (enableLogging) {
|
|
525
|
+
console.log(`[Adblock] Whitelisted: ${url} (${rule.raw || rule.pattern})`);
|
|
526
|
+
}
|
|
527
|
+
const r = { blocked: false, rule: rule.raw || rule.pattern, reason: 'whitelisted' };
|
|
528
|
+
resultCacheSet(url, sourceUrl, resourceType, r);
|
|
491
529
|
return r;
|
|
492
530
|
}
|
|
493
531
|
}
|
|
494
|
-
|
|
532
|
+
|
|
495
533
|
// Check parent domains for subdomain matches (e.g., sub.example.com -> example.com)
|
|
496
|
-
|
|
497
|
-
|
|
534
|
+
const parents = getParentDomains();
|
|
535
|
+
for (let i = 0; i < parents.length; i++) {
|
|
536
|
+
rule = rules.whitelistMap.get(parents[i]);
|
|
498
537
|
if (rule) {
|
|
499
|
-
if (
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
const r = { blocked: false, rule: rule.raw, reason: 'whitelisted' };
|
|
504
|
-
|
|
538
|
+
if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
|
|
539
|
+
if (enableLogging) {
|
|
540
|
+
console.log(`[Adblock] Whitelisted: ${url} (${rule.raw || rule.pattern})`);
|
|
541
|
+
}
|
|
542
|
+
const r = { blocked: false, rule: rule.raw || rule.pattern, reason: 'whitelisted' };
|
|
543
|
+
resultCacheSet(url, sourceUrl, resourceType, r);
|
|
505
544
|
return r;
|
|
506
545
|
}
|
|
507
546
|
}
|
|
@@ -513,39 +552,39 @@ function createMatcher(rules, options = {}) {
|
|
|
513
552
|
const rule = rules.whitelist[i];
|
|
514
553
|
if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
|
|
515
554
|
if (enableLogging) {
|
|
516
|
-
console.log(`[Adblock] Whitelisted: ${url} (${rule.raw})`);
|
|
555
|
+
console.log(`[Adblock] Whitelisted: ${url} (${rule.raw || rule.pattern})`);
|
|
517
556
|
}
|
|
518
|
-
const r = { blocked: false, rule: rule.raw, reason: 'whitelisted' };
|
|
519
|
-
|
|
557
|
+
const r = { blocked: false, rule: rule.raw || rule.pattern, reason: 'whitelisted' };
|
|
558
|
+
resultCacheSet(url, sourceUrl, resourceType, r);
|
|
520
559
|
return r;
|
|
521
560
|
}
|
|
522
561
|
}
|
|
523
562
|
|
|
524
563
|
// === DOMAIN BLOCKING CHECK ===
|
|
525
|
-
|
|
564
|
+
|
|
526
565
|
// Fast path: Check exact domain in Map (O(1))
|
|
527
566
|
rule = rules.domainMap.get(lowerHostname); // V8: Single Map lookup
|
|
528
567
|
if (rule) {
|
|
529
|
-
if (
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
const r = { blocked: true, rule: rule.raw, reason: 'domain_rule' };
|
|
534
|
-
|
|
568
|
+
if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
|
|
569
|
+
if (enableLogging) {
|
|
570
|
+
console.log(`[Adblock] Blocked domain: ${url} (${rule.raw || rule.pattern})`);
|
|
571
|
+
}
|
|
572
|
+
const r = { blocked: true, rule: rule.raw || rule.pattern, reason: 'domain_rule' };
|
|
573
|
+
resultCacheSet(url, sourceUrl, resourceType, r);
|
|
535
574
|
return r;
|
|
536
575
|
}
|
|
537
576
|
}
|
|
538
|
-
|
|
577
|
+
|
|
539
578
|
// Check parent domains for subdomain matches (e.g., ads.example.com -> example.com)
|
|
540
|
-
for (let i = 0; i <
|
|
541
|
-
rule = rules.domainMap.get(
|
|
579
|
+
for (let i = 0; i < parents.length; i++) {
|
|
580
|
+
rule = rules.domainMap.get(parents[i]);
|
|
542
581
|
if (rule) {
|
|
543
|
-
if (
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
const r = { blocked: true, rule: rule.raw, reason: 'domain_rule' };
|
|
548
|
-
|
|
582
|
+
if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
|
|
583
|
+
if (enableLogging) {
|
|
584
|
+
console.log(`[Adblock] Blocked domain: ${url} (${rule.raw || rule.pattern})`);
|
|
585
|
+
}
|
|
586
|
+
const r = { blocked: true, rule: rule.raw || rule.pattern, reason: 'domain_rule' };
|
|
587
|
+
resultCacheSet(url, sourceUrl, resourceType, r);
|
|
549
588
|
return r;
|
|
550
589
|
}
|
|
551
590
|
}
|
|
@@ -557,10 +596,10 @@ function createMatcher(rules, options = {}) {
|
|
|
557
596
|
const rule = rules.domainRules[i];
|
|
558
597
|
if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
|
|
559
598
|
if (enableLogging) {
|
|
560
|
-
console.log(`[Adblock] Blocked domain: ${url} (${rule.raw})`);
|
|
599
|
+
console.log(`[Adblock] Blocked domain: ${url} (${rule.raw || rule.pattern})`);
|
|
561
600
|
}
|
|
562
|
-
const r = { blocked: true, rule: rule.raw, reason: 'domain_rule' };
|
|
563
|
-
|
|
601
|
+
const r = { blocked: true, rule: rule.raw || rule.pattern, reason: 'domain_rule' };
|
|
602
|
+
resultCacheSet(url, sourceUrl, resourceType, r);
|
|
564
603
|
return r;
|
|
565
604
|
}
|
|
566
605
|
}
|
|
@@ -572,10 +611,10 @@ function createMatcher(rules, options = {}) {
|
|
|
572
611
|
const rule = rules.thirdPartyRules[i];
|
|
573
612
|
if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
|
|
574
613
|
if (enableLogging) {
|
|
575
|
-
console.log(`[Adblock] Blocked third-party: ${url} (${rule.raw})`);
|
|
614
|
+
console.log(`[Adblock] Blocked third-party: ${url} (${rule.raw || rule.pattern})`);
|
|
576
615
|
}
|
|
577
|
-
const r = { blocked: true, rule: rule.raw, reason: 'third_party_rule' };
|
|
578
|
-
|
|
616
|
+
const r = { blocked: true, rule: rule.raw || rule.pattern, reason: 'third_party_rule' };
|
|
617
|
+
resultCacheSet(url, sourceUrl, resourceType, r);
|
|
579
618
|
return r;
|
|
580
619
|
}
|
|
581
620
|
}
|
|
@@ -588,10 +627,10 @@ function createMatcher(rules, options = {}) {
|
|
|
588
627
|
const rule = rules.firstPartyRules[i];
|
|
589
628
|
if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
|
|
590
629
|
if (enableLogging) {
|
|
591
|
-
console.log(`[Adblock] Blocked first-party: ${url} (${rule.raw})`);
|
|
630
|
+
console.log(`[Adblock] Blocked first-party: ${url} (${rule.raw || rule.pattern})`);
|
|
592
631
|
}
|
|
593
|
-
const r = { blocked: true, rule: rule.raw, reason: 'first_party_rule' };
|
|
594
|
-
|
|
632
|
+
const r = { blocked: true, rule: rule.raw || rule.pattern, reason: 'first_party_rule' };
|
|
633
|
+
resultCacheSet(url, sourceUrl, resourceType, r);
|
|
595
634
|
return r;
|
|
596
635
|
}
|
|
597
636
|
}
|
|
@@ -604,10 +643,10 @@ function createMatcher(rules, options = {}) {
|
|
|
604
643
|
const rule = rules.scriptRules[i];
|
|
605
644
|
if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
|
|
606
645
|
if (enableLogging) {
|
|
607
|
-
console.log(`[Adblock] Blocked script: ${url} (${rule.raw})`);
|
|
646
|
+
console.log(`[Adblock] Blocked script: ${url} (${rule.raw || rule.pattern})`);
|
|
608
647
|
}
|
|
609
|
-
const r = { blocked: true, rule: rule.raw, reason: 'script_rule' };
|
|
610
|
-
|
|
648
|
+
const r = { blocked: true, rule: rule.raw || rule.pattern, reason: 'script_rule' };
|
|
649
|
+
resultCacheSet(url, sourceUrl, resourceType, r);
|
|
611
650
|
return r;
|
|
612
651
|
}
|
|
613
652
|
}
|
|
@@ -619,10 +658,10 @@ function createMatcher(rules, options = {}) {
|
|
|
619
658
|
const rule = rules.pathRules[i];
|
|
620
659
|
if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
|
|
621
660
|
if (enableLogging) {
|
|
622
|
-
console.log(`[Adblock] Blocked path: ${url} (${rule.raw})`);
|
|
661
|
+
console.log(`[Adblock] Blocked path: ${url} (${rule.raw || rule.pattern})`);
|
|
623
662
|
}
|
|
624
|
-
const r = { blocked: true, rule: rule.raw, reason: 'path_rule' };
|
|
625
|
-
|
|
663
|
+
const r = { blocked: true, rule: rule.raw || rule.pattern, reason: 'path_rule' };
|
|
664
|
+
resultCacheSet(url, sourceUrl, resourceType, r);
|
|
626
665
|
return r;
|
|
627
666
|
}
|
|
628
667
|
}
|
|
@@ -633,17 +672,17 @@ function createMatcher(rules, options = {}) {
|
|
|
633
672
|
const rule = rules.regexRules[i];
|
|
634
673
|
if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) {
|
|
635
674
|
if (enableLogging) {
|
|
636
|
-
console.log(`[Adblock] Blocked regex: ${url} (${rule.raw})`);
|
|
675
|
+
console.log(`[Adblock] Blocked regex: ${url} (${rule.raw || rule.pattern})`);
|
|
637
676
|
}
|
|
638
|
-
const r = { blocked: true, rule: rule.raw, reason: 'regex_rule' };
|
|
639
|
-
|
|
677
|
+
const r = { blocked: true, rule: rule.raw || rule.pattern, reason: 'regex_rule' };
|
|
678
|
+
resultCacheSet(url, sourceUrl, resourceType, r);
|
|
640
679
|
return r;
|
|
641
680
|
}
|
|
642
681
|
}
|
|
643
682
|
|
|
644
683
|
// No match - allow request
|
|
645
684
|
const r = { blocked: false, rule: null, reason: 'no_match' };
|
|
646
|
-
|
|
685
|
+
resultCacheSet(url, sourceUrl, resourceType, r);
|
|
647
686
|
return r;
|
|
648
687
|
|
|
649
688
|
} catch (err) {
|
|
@@ -674,7 +713,8 @@ function createMatcher(rules, options = {}) {
|
|
|
674
713
|
hits: cacheHits,
|
|
675
714
|
misses: cacheMisses,
|
|
676
715
|
hitRate: hitRate,
|
|
677
|
-
|
|
716
|
+
urlCacheSize: urlCache.cache.size,
|
|
717
|
+
resultCacheSize: resultCache.cache.size,
|
|
678
718
|
maxSize: urlCache.maxSize
|
|
679
719
|
}
|
|
680
720
|
};
|
|
@@ -785,26 +825,18 @@ function matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDoma
|
|
|
785
825
|
return false;
|
|
786
826
|
}
|
|
787
827
|
|
|
788
|
-
//
|
|
789
|
-
if (rule.resourceTypes) {
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
if (!rule.resourceTypes.includes(normalizedType)) {
|
|
796
|
-
return false;
|
|
797
|
-
}
|
|
828
|
+
// Normalize resource type once for both checks
|
|
829
|
+
if (resourceType && (rule.resourceTypes || rule.excludedResourceTypes)) {
|
|
830
|
+
const normalizedType = RESOURCE_TYPE_ALIASES[resourceType] || resourceType;
|
|
831
|
+
|
|
832
|
+
// Check resource type restrictions
|
|
833
|
+
if (rule.resourceTypes && !rule.resourceTypes.has(normalizedType)) {
|
|
834
|
+
return false;
|
|
798
835
|
}
|
|
799
|
-
}
|
|
800
836
|
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
const normalizedType = RESOURCE_TYPE_ALIASES[resourceType] || resourceType;
|
|
805
|
-
if (rule.excludedResourceTypes.includes(normalizedType)) {
|
|
806
|
-
return false;
|
|
807
|
-
}
|
|
837
|
+
// Check negated resource type restrictions ($~script, $~image, etc.)
|
|
838
|
+
if (rule.excludedResourceTypes && rule.excludedResourceTypes.has(normalizedType)) {
|
|
839
|
+
return false;
|
|
808
840
|
}
|
|
809
841
|
}
|
|
810
842
|
|
|
@@ -817,17 +849,21 @@ function matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDoma
|
|
|
817
849
|
}
|
|
818
850
|
|
|
819
851
|
/**
|
|
820
|
-
* Extract base domain from hostname
|
|
852
|
+
* Extract base domain from hostname using Public Suffix List
|
|
853
|
+
* Correctly handles multi-part TLDs like .co.uk, .com.au, .com.br
|
|
821
854
|
* @param {string} hostname - Full hostname
|
|
822
855
|
* @returns {string} Base domain
|
|
823
856
|
*/
|
|
857
|
+
const _baseDomainCache = new Map();
|
|
824
858
|
function getBaseDomain(hostname) {
|
|
825
|
-
const
|
|
826
|
-
if (
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
//
|
|
830
|
-
|
|
859
|
+
const cached = _baseDomainCache.get(hostname);
|
|
860
|
+
if (cached) return cached;
|
|
861
|
+
const parsed = psl.parse(hostname);
|
|
862
|
+
const result = (parsed && parsed.domain) ? parsed.domain : hostname;
|
|
863
|
+
// Cap cache size
|
|
864
|
+
if (_baseDomainCache.size > 10000) _baseDomainCache.clear();
|
|
865
|
+
_baseDomainCache.set(hostname, result);
|
|
866
|
+
return result;
|
|
831
867
|
}
|
|
832
868
|
|
|
833
869
|
module.exports = {
|