@fanboynz/network-scanner 2.0.66 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/npm-publish.yml +134 -10
- package/CHANGELOG.md +135 -0
- package/CLAUDE.md +18 -7
- package/README.md +12 -4
- package/lib/adblock-rust.js +23 -18
- package/lib/adblock.js +127 -82
- package/lib/browserexit.js +210 -200
- package/lib/browserhealth.js +84 -60
- package/lib/cdp.js +103 -81
- package/lib/clear_sitedata.js +61 -159
- package/lib/cloudflare.js +579 -409
- package/lib/colorize.js +29 -12
- package/lib/compare.js +16 -8
- package/lib/compress.js +2 -1
- package/lib/curl.js +287 -220
- package/lib/domain-cache.js +87 -40
- package/lib/dry-run.js +137 -194
- package/lib/fingerprint.js +20 -18
- package/lib/flowproxy.js +391 -188
- package/lib/ghost-cursor.js +8 -7
- package/lib/grep.js +248 -171
- package/lib/ignore_similar.js +70 -124
- package/lib/interaction.js +132 -235
- package/lib/nettools.js +309 -87
- package/lib/openvpn_vpn.js +12 -11
- package/lib/output.js +92 -59
- package/lib/post-processing.js +216 -162
- package/lib/redirect.js +46 -30
- package/lib/referrer.js +158 -165
- package/lib/searchstring.js +290 -381
- package/lib/smart-cache.js +141 -91
- package/lib/socks-relay.js +8 -7
- package/lib/spawn-async.js +137 -0
- package/lib/validate_rules.js +188 -176
- package/lib/wireguard_vpn.js +111 -117
- package/nwss.js +740 -156
- package/package.json +4 -4
package/lib/post-processing.js
CHANGED
|
@@ -10,23 +10,48 @@ const REGEX_DNSMASQ_LOCAL = /local=\/([^/]+)\//;
|
|
|
10
10
|
const REGEX_DNSMASQ_SERVER = /server=\/([^/]+)\//;
|
|
11
11
|
const REGEX_UNBOUND = /local-zone:\s*"([^"]+)\.?"/;
|
|
12
12
|
const REGEX_PRIVOXY = /\{\s*\+block\s*\}\s*\.?([^\s]+)/;
|
|
13
|
-
|
|
13
|
+
// Pi-hole prefix detect + strip (tolerates optional backslash before the dot,
|
|
14
|
+
// matching how output.js writes both). The old single-regex with a trailing
|
|
15
|
+
// `\.\w+$` was capturing everything up to (but not including) the TLD, so
|
|
16
|
+
// 'example.com' came out as 'example' and downstream filters never matched.
|
|
17
|
+
const REGEX_PIHOLE_PREFIX = /^\(\^\|\\?\.\)/;
|
|
18
|
+
const REGEX_TRAILING_DOLLAR = /\$$/;
|
|
14
19
|
const REGEX_DOMAIN_FALLBACK = /([a-zA-Z0-9][a-zA-Z0-9.-]*\.[a-zA-Z]{2,})/;
|
|
15
20
|
const REGEX_WHITESPACE = /\s+/;
|
|
16
21
|
const REGEX_UNESCAPE_DOT = /\\\./g;
|
|
22
|
+
// Regex meta-chars we must escape in a wildcard pattern before turning '*'
|
|
23
|
+
// into '.*'. Without this, a pattern like 'foo+bar.com' would treat '+' as
|
|
24
|
+
// a quantifier, and 'foo(bar.com' would throw on RegExp construction.
|
|
25
|
+
const REGEX_META_ESCAPE = /[.+?^${}()|[\]\\]/g;
|
|
26
|
+
// Sentinel regex that never matches — used when a pattern is so malformed
|
|
27
|
+
// that even our escaped version fails to compile.
|
|
28
|
+
const NEVER_MATCH = /(?!)/;
|
|
17
29
|
|
|
18
30
|
// Cache for compiled wildcard regex patterns
|
|
19
31
|
const wildcardRegexCache = new Map();
|
|
20
32
|
|
|
21
33
|
/**
|
|
22
|
-
* Get or compile a wildcard pattern regex (cached)
|
|
34
|
+
* Get or compile a wildcard pattern regex (cached). Escapes every regex
|
|
35
|
+
* metacharacter except '*' before turning '*' into '.*'. The previous
|
|
36
|
+
* version only escaped '.', so patterns with '+', '(', '[', etc. would
|
|
37
|
+
* either silently misbehave or throw synchronously out of the caller.
|
|
23
38
|
* @param {string} pattern - Wildcard pattern string
|
|
24
39
|
* @returns {RegExp} Compiled regex
|
|
25
40
|
*/
|
|
26
41
|
function getWildcardRegex(pattern) {
|
|
27
42
|
let regex = wildcardRegexCache.get(pattern);
|
|
28
43
|
if (!regex) {
|
|
29
|
-
|
|
44
|
+
try {
|
|
45
|
+
regex = new RegExp(
|
|
46
|
+
'^' +
|
|
47
|
+
pattern.replace(REGEX_META_ESCAPE, '\\$&').replace(/\*/g, '.*') +
|
|
48
|
+
'$'
|
|
49
|
+
);
|
|
50
|
+
} catch (_) {
|
|
51
|
+
// Defensive belt-and-braces: a still-malformed pattern becomes
|
|
52
|
+
// never-match instead of crashing the calling cleanup loop.
|
|
53
|
+
regex = NEVER_MATCH;
|
|
54
|
+
}
|
|
30
55
|
wildcardRegexCache.set(pattern, regex);
|
|
31
56
|
// Cap cache size
|
|
32
57
|
if (wildcardRegexCache.size > 200) {
|
|
@@ -56,6 +81,24 @@ function safeGetDomain(url, getFullHostname = false) {
|
|
|
56
81
|
}
|
|
57
82
|
}
|
|
58
83
|
|
|
84
|
+
/**
|
|
85
|
+
* Extract the registrable root domain from an already-parsed hostname,
|
|
86
|
+
* skipping the URL-parse round-trip that safeGetDomain pays. Use when the
|
|
87
|
+
* caller already knows the input is a bare hostname (no scheme, path, port).
|
|
88
|
+
* @param {string} hostname - Bare hostname (e.g. 'sub.example.com')
|
|
89
|
+
* @returns {string} Registrable root domain ('example.com'), or hostname back
|
|
90
|
+
* on psl parse failure, or '' on bad input
|
|
91
|
+
*/
|
|
92
|
+
function getDomainFromHostname(hostname) {
|
|
93
|
+
if (!hostname || typeof hostname !== 'string') return '';
|
|
94
|
+
try {
|
|
95
|
+
const parsed = psl.parse(hostname);
|
|
96
|
+
return parsed.domain || hostname;
|
|
97
|
+
} catch (_) {
|
|
98
|
+
return '';
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
59
102
|
/**
|
|
60
103
|
* Enhanced domain extraction helper - single source of truth for all rule formats
|
|
61
104
|
* (Was duplicated inline in cleanupIgnoreDomains and cleanupFirstPartyDomains)
|
|
@@ -105,10 +148,15 @@ function extractDomainFromRule(rule) {
|
|
|
105
148
|
return match ? match[1] : null;
|
|
106
149
|
}
|
|
107
150
|
|
|
108
|
-
// Pi-hole regex: (^|\.)domain\.com$
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
151
|
+
// Pi-hole regex: (^|\.)domain\.com$
|
|
152
|
+
// Strip the leading '(^|\.)' (or '(^|.)') prefix, unescape '\.' to '.',
|
|
153
|
+
// and drop the trailing '$'. Matches output.js's extractDomainFromRule
|
|
154
|
+
// shape — the old regex-based capture here lost the TLD.
|
|
155
|
+
if (rule.charCodeAt(0) === 40 && REGEX_PIHOLE_PREFIX.test(rule)) { // '('
|
|
156
|
+
return rule
|
|
157
|
+
.replace(REGEX_PIHOLE_PREFIX, '')
|
|
158
|
+
.replace(REGEX_UNESCAPE_DOT, '.')
|
|
159
|
+
.replace(REGEX_TRAILING_DOLLAR, '');
|
|
112
160
|
}
|
|
113
161
|
|
|
114
162
|
// Fallback: any domain-like pattern
|
|
@@ -125,43 +173,57 @@ function extractDomainFromRule(rule) {
|
|
|
125
173
|
*/
|
|
126
174
|
function shouldIgnoreAsIgnoreDomain(domain, ignorePatterns, forceDebug) {
|
|
127
175
|
if (!domain || !ignorePatterns || ignorePatterns.length === 0) {
|
|
128
|
-
return { shouldIgnore: false, reason: 'No ignore patterns' };
|
|
176
|
+
return { shouldIgnore: false, reason: 'No ignore patterns', matchType: null };
|
|
129
177
|
}
|
|
130
178
|
|
|
179
|
+
// domain is loop-invariant — its registrable root only needs computing once
|
|
180
|
+
// (and only if at least one '*.' pattern is encountered). Previously we
|
|
181
|
+
// called getDomainFromHostname(domain) once per '*.'-shaped pattern.
|
|
182
|
+
let domainRoot = null;
|
|
183
|
+
let domainRootComputed = false;
|
|
184
|
+
|
|
131
185
|
for (let i = 0; i < ignorePatterns.length; i++) {
|
|
132
186
|
const pattern = ignorePatterns[i];
|
|
133
187
|
if (pattern.includes('*')) {
|
|
134
188
|
if (pattern.startsWith('*.')) {
|
|
135
|
-
// Pattern: *.example.com
|
|
189
|
+
// Pattern: *.example.com — both sides are already bare hostnames,
|
|
190
|
+
// skip the 'http://' wrap + URL parse.
|
|
136
191
|
const wildcardDomain = pattern.substring(2);
|
|
137
|
-
const wildcardRoot =
|
|
138
|
-
|
|
139
|
-
|
|
192
|
+
const wildcardRoot = getDomainFromHostname(wildcardDomain);
|
|
193
|
+
if (!domainRootComputed) {
|
|
194
|
+
domainRoot = getDomainFromHostname(domain);
|
|
195
|
+
domainRootComputed = true;
|
|
196
|
+
}
|
|
197
|
+
|
|
140
198
|
if (wildcardRoot === domainRoot) {
|
|
141
|
-
|
|
199
|
+
if (forceDebug) console.log(formatLogMessage('debug', '[ignoreDomains] ' + domain + ' matches wildcard pattern ' + pattern + ' (root=' + wildcardRoot + ')'));
|
|
200
|
+
return { shouldIgnore: true, reason: 'Matches wildcard ignore pattern: ' + pattern, matchType: 'wildcard' };
|
|
142
201
|
}
|
|
143
202
|
} else if (pattern.endsWith('.*')) {
|
|
144
203
|
// Pattern: example.*
|
|
145
204
|
const baseDomain = pattern.slice(0, -2);
|
|
146
205
|
if (domain.startsWith(baseDomain + '.')) {
|
|
147
|
-
|
|
206
|
+
if (forceDebug) console.log(formatLogMessage('debug', '[ignoreDomains] ' + domain + ' matches TLD-wildcard pattern ' + pattern));
|
|
207
|
+
return { shouldIgnore: true, reason: 'Matches wildcard TLD ignore pattern: ' + pattern, matchType: 'wildcard' };
|
|
148
208
|
}
|
|
149
209
|
} else {
|
|
150
210
|
// Complex wildcard -- use cached regex
|
|
151
211
|
const wildcardRegex = getWildcardRegex(pattern);
|
|
152
212
|
if (wildcardRegex.test(domain)) {
|
|
153
|
-
|
|
213
|
+
if (forceDebug) console.log(formatLogMessage('debug', '[ignoreDomains] ' + domain + ' matches complex wildcard pattern ' + pattern));
|
|
214
|
+
return { shouldIgnore: true, reason: 'Matches complex wildcard ignore pattern: ' + pattern, matchType: 'wildcard' };
|
|
154
215
|
}
|
|
155
216
|
}
|
|
156
217
|
} else {
|
|
157
218
|
// Exact pattern matching
|
|
158
219
|
if (domain === pattern || domain.endsWith('.' + pattern)) {
|
|
159
|
-
|
|
220
|
+
if (forceDebug) console.log(formatLogMessage('debug', '[ignoreDomains] ' + domain + ' matches exact pattern ' + pattern));
|
|
221
|
+
return { shouldIgnore: true, reason: 'Matches exact ignore pattern: ' + pattern, matchType: 'exact' };
|
|
160
222
|
}
|
|
161
223
|
}
|
|
162
224
|
}
|
|
163
225
|
|
|
164
|
-
return { shouldIgnore: false, reason: 'No ignore pattern matches' };
|
|
226
|
+
return { shouldIgnore: false, reason: 'No ignore pattern matches', matchType: null };
|
|
165
227
|
}
|
|
166
228
|
|
|
167
229
|
/**
|
|
@@ -173,38 +235,43 @@ function shouldIgnoreAsIgnoreDomain(domain, ignorePatterns, forceDebug) {
|
|
|
173
235
|
*/
|
|
174
236
|
function shouldRemoveAsFirstParty(extractedDomain, scannedRootDomain, forceDebug) {
|
|
175
237
|
if (!extractedDomain || !scannedRootDomain) {
|
|
176
|
-
return { shouldRemove: false, reason: 'Missing domain data' };
|
|
238
|
+
return { shouldRemove: false, reason: 'Missing domain data', matchType: null };
|
|
177
239
|
}
|
|
178
240
|
|
|
179
241
|
if (extractedDomain.includes('*')) {
|
|
180
242
|
if (extractedDomain.startsWith('*.')) {
|
|
181
243
|
const wildcardDomain = extractedDomain.substring(2);
|
|
182
|
-
const wildcardRoot =
|
|
183
|
-
|
|
244
|
+
const wildcardRoot = getDomainFromHostname(wildcardDomain);
|
|
245
|
+
|
|
184
246
|
if (wildcardRoot === scannedRootDomain) {
|
|
185
|
-
|
|
247
|
+
if (forceDebug) console.log(formatLogMessage('debug', '[firstParty] ' + extractedDomain + ' matches root domain via wildcard subdomain (*.' + wildcardRoot + ')'));
|
|
248
|
+
return { shouldRemove: true, reason: 'Wildcard subdomain pattern matches root domain (*.' + wildcardRoot + ')', matchType: 'wildcard' };
|
|
186
249
|
}
|
|
187
250
|
} else if (extractedDomain.endsWith('.*')) {
|
|
188
251
|
const baseDomain = extractedDomain.slice(0, -2);
|
|
189
252
|
if (scannedRootDomain.startsWith(baseDomain + '.')) {
|
|
190
|
-
|
|
253
|
+
if (forceDebug) console.log(formatLogMessage('debug', '[firstParty] ' + extractedDomain + ' matches root domain via TLD-wildcard (' + baseDomain + '.*)'));
|
|
254
|
+
return { shouldRemove: true, reason: 'Wildcard TLD pattern matches base domain (' + baseDomain + '.*)', matchType: 'wildcard' };
|
|
191
255
|
}
|
|
192
256
|
} else {
|
|
193
257
|
// Complex wildcard -- use cached regex
|
|
194
258
|
const wildcardRegex = getWildcardRegex(extractedDomain);
|
|
195
259
|
if (wildcardRegex.test(scannedRootDomain)) {
|
|
196
|
-
|
|
260
|
+
if (forceDebug) console.log(formatLogMessage('debug', '[firstParty] ' + extractedDomain + ' matches root domain via complex wildcard'));
|
|
261
|
+
return { shouldRemove: true, reason: 'Complex wildcard pattern matches root domain (' + extractedDomain + ')', matchType: 'wildcard' };
|
|
197
262
|
}
|
|
198
263
|
}
|
|
199
264
|
}
|
|
200
265
|
|
|
201
|
-
// Standard exact root domain matching
|
|
202
|
-
|
|
266
|
+
// Standard exact root domain matching — extractedDomain is already a bare
|
|
267
|
+
// hostname out of extractDomainFromRule.
|
|
268
|
+
const extractedRoot = getDomainFromHostname(extractedDomain);
|
|
203
269
|
if (extractedRoot === scannedRootDomain) {
|
|
204
|
-
|
|
270
|
+
if (forceDebug) console.log(formatLogMessage('debug', '[firstParty] ' + extractedDomain + ' matches root domain ' + scannedRootDomain + ' exactly (root=' + extractedRoot + ')'));
|
|
271
|
+
return { shouldRemove: true, reason: 'Exact root domain match (' + extractedRoot + ')', matchType: 'exact' };
|
|
205
272
|
}
|
|
206
273
|
|
|
207
|
-
return { shouldRemove: false, reason: 'No first-party match detected' };
|
|
274
|
+
return { shouldRemove: false, reason: 'No first-party match detected', matchType: null };
|
|
208
275
|
}
|
|
209
276
|
|
|
210
277
|
/**
|
|
@@ -246,19 +313,27 @@ function cleanupIgnoreDomains(results, ignoreDomains, options = {}) {
|
|
|
246
313
|
console.log(formatLogMessage('debug', '[ignoreDomains cleanup] Processing ' + results.length + ' results against ' + ignoreDomains.length + ' ignore patterns'));
|
|
247
314
|
}
|
|
248
315
|
|
|
249
|
-
|
|
316
|
+
// We mutate result.rules in place and return `results` directly — the
|
|
317
|
+
// previous version allocated a separate cleanedResults array but pushed
|
|
318
|
+
// every original result reference into it unchanged, which was pure waste
|
|
319
|
+
// (and gave callers a false sense of immutability when the input was
|
|
320
|
+
// being mutated anyway).
|
|
250
321
|
let totalRulesRemoved = 0;
|
|
251
322
|
let sitesAffected = 0;
|
|
323
|
+
// The per-rule detail objects in removedRules are only consumed by the
|
|
324
|
+
// forceDebug per-rule list — skip allocating them on the silent/non-debug
|
|
325
|
+
// path. Counts (wildcard/exact) are tracked separately because the
|
|
326
|
+
// !silentMode summary still needs them.
|
|
327
|
+
const needsDetails = forceDebug;
|
|
252
328
|
|
|
253
329
|
for (let ri = 0; ri < results.length; ri++) {
|
|
254
330
|
const result = results[ri];
|
|
255
|
-
if (!result.rules || result.rules.length === 0)
|
|
256
|
-
cleanedResults.push(result);
|
|
257
|
-
continue;
|
|
258
|
-
}
|
|
331
|
+
if (!result.rules || result.rules.length === 0) continue;
|
|
259
332
|
|
|
260
333
|
const cleanedRules = [];
|
|
261
|
-
const removedRules = [];
|
|
334
|
+
const removedRules = needsDetails ? [] : null;
|
|
335
|
+
let removedCount = 0;
|
|
336
|
+
let wildcardCount = 0;
|
|
262
337
|
|
|
263
338
|
for (let j = 0; j < result.rules.length; j++) {
|
|
264
339
|
const rule = result.rules[j];
|
|
@@ -267,17 +342,21 @@ function cleanupIgnoreDomains(results, ignoreDomains, options = {}) {
|
|
|
267
342
|
try {
|
|
268
343
|
// Use shared extractDomainFromRule (was duplicated inline)
|
|
269
344
|
const extractedDomain = extractDomainFromRule(rule);
|
|
270
|
-
|
|
345
|
+
|
|
271
346
|
if (extractedDomain) {
|
|
272
347
|
const ignoreResult = shouldIgnoreAsIgnoreDomain(extractedDomain, ignoreDomains, forceDebug);
|
|
273
|
-
|
|
348
|
+
|
|
274
349
|
if (ignoreResult.shouldIgnore) {
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
350
|
+
removedCount++;
|
|
351
|
+
if (ignoreResult.matchType === 'wildcard') wildcardCount++;
|
|
352
|
+
if (needsDetails) {
|
|
353
|
+
removedRules.push({
|
|
354
|
+
rule: rule,
|
|
355
|
+
domain: extractedDomain,
|
|
356
|
+
reason: 'ignoreDomains: ' + ignoreResult.reason,
|
|
357
|
+
matchType: ignoreResult.matchType
|
|
358
|
+
});
|
|
359
|
+
}
|
|
281
360
|
kept = false;
|
|
282
361
|
}
|
|
283
362
|
}
|
|
@@ -294,25 +373,18 @@ function cleanupIgnoreDomains(results, ignoreDomains, options = {}) {
|
|
|
294
373
|
|
|
295
374
|
// Mutate rules directly instead of spreading entire result object
|
|
296
375
|
result.rules = cleanedRules;
|
|
297
|
-
cleanedResults.push(result);
|
|
298
376
|
|
|
299
|
-
if (
|
|
377
|
+
if (removedCount > 0) {
|
|
300
378
|
sitesAffected++;
|
|
301
|
-
totalRulesRemoved +=
|
|
302
|
-
|
|
379
|
+
totalRulesRemoved += removedCount;
|
|
380
|
+
|
|
303
381
|
if (!silentMode) {
|
|
304
|
-
|
|
305
|
-
let
|
|
306
|
-
for (let k = 0; k < removedRules.length; k++) {
|
|
307
|
-
if (removedRules[k].matchType === 'wildcard') wildcardCount++;
|
|
308
|
-
}
|
|
309
|
-
const exactCount = removedRules.length - wildcardCount;
|
|
310
|
-
|
|
311
|
-
let cleanupMessage = '?? Removed ' + removedRules.length + ' ignoreDomains rule(s) from ' + safeGetDomain(result.url) + ' (final cleanup)';
|
|
382
|
+
const exactCount = removedCount - wildcardCount;
|
|
383
|
+
let cleanupMessage = 'Removed ' + removedCount + ' ignoreDomains rule(s) from ' + safeGetDomain(result.url) + ' (final cleanup)';
|
|
312
384
|
if (wildcardCount > 0) {
|
|
313
385
|
cleanupMessage += ' [' + wildcardCount + ' wildcard, ' + exactCount + ' exact]';
|
|
314
386
|
}
|
|
315
|
-
|
|
387
|
+
|
|
316
388
|
if (messageColors && messageColors.cleanup) {
|
|
317
389
|
console.log(messageColors.cleanup(cleanupMessage));
|
|
318
390
|
} else {
|
|
@@ -328,9 +400,11 @@ function cleanupIgnoreDomains(results, ignoreDomains, options = {}) {
|
|
|
328
400
|
}
|
|
329
401
|
}
|
|
330
402
|
|
|
331
|
-
// Summary
|
|
403
|
+
// Summary. When silentMode hides the visible message but rules WERE
|
|
404
|
+
// removed, the debug log used to claim "no rules found" — fixed by
|
|
405
|
+
// gating the "no rules" message on the actual count.
|
|
332
406
|
if (totalRulesRemoved > 0 && !silentMode) {
|
|
333
|
-
const summaryMessage = '\
|
|
407
|
+
const summaryMessage = '\nignoreDomains cleanup completed: Removed ' + totalRulesRemoved + ' rules from ' + sitesAffected + ' site(s)';
|
|
334
408
|
|
|
335
409
|
if (messageColors && messageColors.cleanup) {
|
|
336
410
|
console.log(messageColors.cleanup(summaryMessage));
|
|
@@ -338,10 +412,12 @@ function cleanupIgnoreDomains(results, ignoreDomains, options = {}) {
|
|
|
338
412
|
console.log(summaryMessage);
|
|
339
413
|
}
|
|
340
414
|
} else if (forceDebug) {
|
|
341
|
-
console.log(formatLogMessage('debug',
|
|
415
|
+
console.log(formatLogMessage('debug', totalRulesRemoved > 0
|
|
416
|
+
? '[ignoreDomains cleanup] (silentMode) Removed ' + totalRulesRemoved + ' rules from ' + sitesAffected + ' site(s)'
|
|
417
|
+
: '[ignoreDomains cleanup] No ignoreDomains rules found to remove'));
|
|
342
418
|
}
|
|
343
419
|
|
|
344
|
-
return
|
|
420
|
+
return results;
|
|
345
421
|
}
|
|
346
422
|
|
|
347
423
|
/**
|
|
@@ -366,32 +442,29 @@ function cleanupFirstPartyDomains(results, sites, options = {}) {
|
|
|
366
442
|
// Use pre-built map if passed, otherwise build it
|
|
367
443
|
const urlToSiteConfig = options._urlToSiteConfig || buildUrlToSiteConfig(sites);
|
|
368
444
|
|
|
369
|
-
|
|
445
|
+
// Mutate result.rules in place; return `results` directly.
|
|
370
446
|
let totalRulesRemoved = 0;
|
|
371
447
|
let sitesAffected = 0;
|
|
448
|
+
const needsDetails = forceDebug;
|
|
372
449
|
|
|
373
450
|
for (let ri = 0; ri < results.length; ri++) {
|
|
374
451
|
const result = results[ri];
|
|
375
452
|
const siteConfig = urlToSiteConfig.get(result.url);
|
|
376
453
|
const shouldCleanFirstParty = siteConfig && siteConfig.firstParty === false;
|
|
377
|
-
|
|
378
|
-
if (!shouldCleanFirstParty || !result.rules || result.rules.length === 0)
|
|
379
|
-
cleanedResults.push(result);
|
|
380
|
-
continue;
|
|
381
|
-
}
|
|
454
|
+
|
|
455
|
+
if (!shouldCleanFirstParty || !result.rules || result.rules.length === 0) continue;
|
|
382
456
|
|
|
383
457
|
if (forceDebug) {
|
|
384
458
|
console.log(formatLogMessage('debug', '[cleanup] Processing ' + result.url + ' (firstParty: false detected)'));
|
|
385
459
|
}
|
|
386
460
|
|
|
387
461
|
const scannedDomain = safeGetDomain(result.url, false);
|
|
388
|
-
if (!scannedDomain)
|
|
389
|
-
cleanedResults.push(result);
|
|
390
|
-
continue;
|
|
391
|
-
}
|
|
462
|
+
if (!scannedDomain) continue;
|
|
392
463
|
|
|
393
464
|
const cleanedRules = [];
|
|
394
|
-
const removedRules = [];
|
|
465
|
+
const removedRules = needsDetails ? [] : null;
|
|
466
|
+
let removedCount = 0;
|
|
467
|
+
let wildcardCount = 0;
|
|
395
468
|
|
|
396
469
|
for (let j = 0; j < result.rules.length; j++) {
|
|
397
470
|
const rule = result.rules[j];
|
|
@@ -403,15 +476,19 @@ function cleanupFirstPartyDomains(results, sites, options = {}) {
|
|
|
403
476
|
|
|
404
477
|
if (extractedDomain) {
|
|
405
478
|
const matchResult = shouldRemoveAsFirstParty(extractedDomain, scannedDomain, forceDebug);
|
|
406
|
-
|
|
479
|
+
|
|
407
480
|
if (matchResult.shouldRemove) {
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
481
|
+
removedCount++;
|
|
482
|
+
if (matchResult.matchType === 'wildcard') wildcardCount++;
|
|
483
|
+
if (needsDetails) {
|
|
484
|
+
removedRules.push({
|
|
485
|
+
rule: rule,
|
|
486
|
+
domain: extractedDomain,
|
|
487
|
+
rootDomain: scannedDomain,
|
|
488
|
+
reason: 'First-party: ' + matchResult.reason + ' (firstParty: false)',
|
|
489
|
+
matchType: matchResult.matchType
|
|
490
|
+
});
|
|
491
|
+
}
|
|
415
492
|
kept = false;
|
|
416
493
|
}
|
|
417
494
|
}
|
|
@@ -428,21 +505,14 @@ function cleanupFirstPartyDomains(results, sites, options = {}) {
|
|
|
428
505
|
|
|
429
506
|
// Mutate rules directly instead of { ...result, rules: cleanedRules }
|
|
430
507
|
result.rules = cleanedRules;
|
|
431
|
-
cleanedResults.push(result);
|
|
432
508
|
|
|
433
|
-
if (
|
|
509
|
+
if (removedCount > 0) {
|
|
434
510
|
sitesAffected++;
|
|
435
|
-
totalRulesRemoved +=
|
|
436
|
-
|
|
511
|
+
totalRulesRemoved += removedCount;
|
|
512
|
+
|
|
437
513
|
if (!silentMode) {
|
|
438
|
-
|
|
439
|
-
let
|
|
440
|
-
for (let k = 0; k < removedRules.length; k++) {
|
|
441
|
-
if (removedRules[k].matchType === 'wildcard') wildcardCount++;
|
|
442
|
-
}
|
|
443
|
-
const exactCount = removedRules.length - wildcardCount;
|
|
444
|
-
|
|
445
|
-
let cleanupMessage = '?? Cleaned ' + removedRules.length + ' first-party rule(s) from ' + scannedDomain + ' (firstParty: false)';
|
|
514
|
+
const exactCount = removedCount - wildcardCount;
|
|
515
|
+
let cleanupMessage = 'Cleaned ' + removedCount + ' first-party rule(s) from ' + scannedDomain + ' (firstParty: false)';
|
|
446
516
|
if (wildcardCount > 0) {
|
|
447
517
|
cleanupMessage += ' [' + wildcardCount + ' wildcard, ' + exactCount + ' exact]';
|
|
448
518
|
}
|
|
@@ -462,49 +532,52 @@ function cleanupFirstPartyDomains(results, sites, options = {}) {
|
|
|
462
532
|
}
|
|
463
533
|
}
|
|
464
534
|
|
|
465
|
-
// Summary
|
|
535
|
+
// Summary (see ignoreDomains cleanup for the silentMode/forceDebug gating logic).
|
|
466
536
|
if (totalRulesRemoved > 0 && !silentMode) {
|
|
467
|
-
const summaryMessage = '\
|
|
537
|
+
const summaryMessage = '\nFirst-party cleanup completed: Removed ' + totalRulesRemoved + ' rules from ' + sitesAffected + ' site(s) with firstParty: false';
|
|
468
538
|
if (messageColors && messageColors.cleanup) {
|
|
469
539
|
console.log(messageColors.cleanup(summaryMessage));
|
|
470
540
|
} else {
|
|
471
541
|
console.log(summaryMessage);
|
|
472
542
|
}
|
|
473
543
|
} else if (forceDebug) {
|
|
474
|
-
console.log(formatLogMessage('debug',
|
|
544
|
+
console.log(formatLogMessage('debug', totalRulesRemoved > 0
|
|
545
|
+
? '[cleanup] (silentMode) Removed ' + totalRulesRemoved + ' first-party rules from ' + sitesAffected + ' site(s)'
|
|
546
|
+
: '[cleanup] No first-party rules found to remove'));
|
|
475
547
|
}
|
|
476
548
|
|
|
477
|
-
return
|
|
549
|
+
return results;
|
|
478
550
|
}
|
|
479
551
|
|
|
480
552
|
/**
|
|
481
|
-
* Validates scan results and
|
|
482
|
-
*
|
|
553
|
+
* Validates scan results and prunes structurally invalid rules
|
|
554
|
+
* (empty strings, non-strings, whitespace-only). Does NOT filter by
|
|
555
|
+
* ignoreDomains — that's cleanupIgnoreDomains's job and it runs earlier.
|
|
556
|
+
*
|
|
483
557
|
* @param {Array} results - Array of scan results
|
|
484
558
|
* @param {Object} options - Options object
|
|
485
559
|
* @param {boolean} options.forceDebug - Debug logging flag
|
|
486
|
-
* @param {Array} options.ignoreDomains - Domains to ignore
|
|
487
560
|
* @returns {Array} Validated results
|
|
488
561
|
*/
|
|
489
562
|
function validateScanResults(results, options = {}) {
|
|
490
|
-
const { forceDebug = false
|
|
491
|
-
|
|
563
|
+
const { forceDebug = false } = options;
|
|
564
|
+
|
|
492
565
|
if (!results || results.length === 0) {
|
|
493
566
|
return results;
|
|
494
567
|
}
|
|
495
568
|
|
|
569
|
+
// NOTE: this function used to also filter rules whose text contained any
|
|
570
|
+
// wildcard-stripped ignoreDomains pattern as a literal substring. Two bugs
|
|
571
|
+
// stacked: (a) .replace('*', '') only stripped the FIRST '*' (so '*.x.*'
|
|
572
|
+
// stayed wildcarded), (b) substring matching was semantically wrong — a
|
|
573
|
+
// pattern of 'ads' would silently kill any rule containing 'headstart'.
|
|
574
|
+
// cleanupIgnoreDomains already runs before this step with the correct
|
|
575
|
+
// extract-and-match logic, so the ignore-pattern branch here is both
|
|
576
|
+
// redundant AND unsafe. Now this function does only what it should: prune
|
|
577
|
+
// structurally invalid rules.
|
|
496
578
|
let totalValidated = 0;
|
|
497
579
|
let totalRemoved = 0;
|
|
498
580
|
|
|
499
|
-
// Pre-strip wildcards from ignore patterns once (was done per rule per pattern)
|
|
500
|
-
let strippedIgnorePatterns = null;
|
|
501
|
-
if (ignoreDomains.length > 0) {
|
|
502
|
-
strippedIgnorePatterns = new Array(ignoreDomains.length);
|
|
503
|
-
for (let i = 0; i < ignoreDomains.length; i++) {
|
|
504
|
-
strippedIgnorePatterns[i] = ignoreDomains[i].replace('*', '');
|
|
505
|
-
}
|
|
506
|
-
}
|
|
507
|
-
|
|
508
581
|
for (let ri = 0; ri < results.length; ri++) {
|
|
509
582
|
const result = results[ri];
|
|
510
583
|
if (!result.rules || result.rules.length === 0) {
|
|
@@ -513,11 +586,9 @@ function validateScanResults(results, options = {}) {
|
|
|
513
586
|
|
|
514
587
|
const originalCount = result.rules.length;
|
|
515
588
|
const validRules = [];
|
|
516
|
-
|
|
589
|
+
|
|
517
590
|
for (let j = 0; j < result.rules.length; j++) {
|
|
518
591
|
const rule = result.rules[j];
|
|
519
|
-
|
|
520
|
-
// Basic validation
|
|
521
592
|
if (!rule || typeof rule !== 'string' || rule.trim().length === 0) {
|
|
522
593
|
if (forceDebug) {
|
|
523
594
|
console.log(formatLogMessage('debug', '[validation] Removed empty/invalid rule'));
|
|
@@ -525,29 +596,10 @@ function validateScanResults(results, options = {}) {
|
|
|
525
596
|
totalRemoved++;
|
|
526
597
|
continue;
|
|
527
598
|
}
|
|
528
|
-
|
|
529
|
-
// Check against stripped ignore patterns
|
|
530
|
-
let ignored = false;
|
|
531
|
-
if (strippedIgnorePatterns) {
|
|
532
|
-
for (let k = 0; k < strippedIgnorePatterns.length; k++) {
|
|
533
|
-
if (rule.includes(strippedIgnorePatterns[k])) {
|
|
534
|
-
if (forceDebug) {
|
|
535
|
-
console.log(formatLogMessage('debug', '[validation] Removed rule matching ignore pattern: ' + ignoreDomains[k]));
|
|
536
|
-
}
|
|
537
|
-
totalRemoved++;
|
|
538
|
-
ignored = true;
|
|
539
|
-
break;
|
|
540
|
-
}
|
|
541
|
-
}
|
|
542
|
-
}
|
|
543
|
-
|
|
544
|
-
if (!ignored) {
|
|
545
|
-
validRules.push(rule);
|
|
546
|
-
}
|
|
599
|
+
validRules.push(rule);
|
|
547
600
|
}
|
|
548
601
|
|
|
549
602
|
totalValidated += originalCount;
|
|
550
|
-
// Mutate in place instead of spread
|
|
551
603
|
result.rules = validRules;
|
|
552
604
|
}
|
|
553
605
|
|
|
@@ -579,54 +631,53 @@ function finalFirstPartyValidation(results, sites, options = {}) {
|
|
|
579
631
|
// Use pre-built map if passed, otherwise build it
|
|
580
632
|
const urlToSiteConfig = options._urlToSiteConfig || buildUrlToSiteConfig(sites);
|
|
581
633
|
|
|
582
|
-
|
|
634
|
+
// Mutate result.rules in place; return `results` directly.
|
|
583
635
|
let totalViolationsFound = 0;
|
|
584
636
|
let sitesWithViolations = 0;
|
|
637
|
+
const needsDetails = forceDebug;
|
|
585
638
|
|
|
586
639
|
for (let ri = 0; ri < results.length; ri++) {
|
|
587
640
|
const result = results[ri];
|
|
588
641
|
const siteConfig = urlToSiteConfig.get(result.url);
|
|
589
642
|
const shouldValidate = siteConfig && siteConfig.firstParty === false;
|
|
590
|
-
|
|
591
|
-
if (!shouldValidate || !result.rules || result.rules.length === 0)
|
|
592
|
-
finalResults.push(result);
|
|
593
|
-
continue;
|
|
594
|
-
}
|
|
643
|
+
|
|
644
|
+
if (!shouldValidate || !result.rules || result.rules.length === 0) continue;
|
|
595
645
|
|
|
596
646
|
const scannedDomain = safeGetDomain(result.url, false);
|
|
597
|
-
if (!scannedDomain)
|
|
598
|
-
finalResults.push(result);
|
|
599
|
-
continue;
|
|
600
|
-
}
|
|
647
|
+
if (!scannedDomain) continue;
|
|
601
648
|
|
|
602
649
|
const cleanedRules = [];
|
|
603
|
-
const violatingRules = [];
|
|
650
|
+
const violatingRules = needsDetails ? [] : null;
|
|
651
|
+
let violationCount = 0;
|
|
604
652
|
|
|
605
653
|
for (let j = 0; j < result.rules.length; j++) {
|
|
606
654
|
const rule = result.rules[j];
|
|
607
655
|
const extractedDomain = extractDomainFromRule(rule);
|
|
608
|
-
|
|
656
|
+
|
|
609
657
|
if (extractedDomain) {
|
|
610
658
|
const matchResult = shouldRemoveAsFirstParty(extractedDomain, scannedDomain, forceDebug);
|
|
611
|
-
|
|
659
|
+
|
|
612
660
|
if (matchResult.shouldRemove) {
|
|
613
|
-
|
|
614
|
-
rule: rule,
|
|
615
|
-
domain: extractedDomain,
|
|
616
|
-
reason: 'VALIDATION FAILURE: ' + matchResult.reason
|
|
617
|
-
});
|
|
661
|
+
violationCount++;
|
|
618
662
|
totalViolationsFound++;
|
|
663
|
+
if (needsDetails) {
|
|
664
|
+
violatingRules.push({
|
|
665
|
+
rule: rule,
|
|
666
|
+
domain: extractedDomain,
|
|
667
|
+
reason: 'VALIDATION FAILURE: ' + matchResult.reason
|
|
668
|
+
});
|
|
669
|
+
}
|
|
619
670
|
continue;
|
|
620
671
|
}
|
|
621
672
|
}
|
|
622
673
|
cleanedRules.push(rule);
|
|
623
674
|
}
|
|
624
675
|
|
|
625
|
-
if (
|
|
676
|
+
if (violationCount > 0) {
|
|
626
677
|
sitesWithViolations++;
|
|
627
|
-
|
|
678
|
+
|
|
628
679
|
if (!silentMode) {
|
|
629
|
-
const errorMessage = '
|
|
680
|
+
const errorMessage = 'CONFIG VIOLATION: Found ' + violationCount + ' first-party rule(s) in ' + scannedDomain + ' (firstParty: false)';
|
|
630
681
|
if (messageColors && messageColors.error) {
|
|
631
682
|
console.log(messageColors.error(errorMessage));
|
|
632
683
|
} else {
|
|
@@ -644,19 +695,20 @@ function finalFirstPartyValidation(results, sites, options = {}) {
|
|
|
644
695
|
|
|
645
696
|
// Mutate in place
|
|
646
697
|
result.rules = cleanedRules;
|
|
647
|
-
finalResults.push(result);
|
|
648
698
|
}
|
|
649
699
|
|
|
650
|
-
// Summary
|
|
700
|
+
// Summary (see ignoreDomains cleanup for the silentMode/forceDebug gating logic).
|
|
651
701
|
if (totalViolationsFound > 0 && !silentMode) {
|
|
652
|
-
const summaryMessage = '\
|
|
702
|
+
const summaryMessage = '\nSCAN FILTERING FAILURE: Removed ' + totalViolationsFound + ' first-party rules from ' + sitesWithViolations + ' site(s) in post-processing';
|
|
653
703
|
console.log(summaryMessage);
|
|
654
|
-
console.log('
|
|
704
|
+
console.log('This indicates firstParty: false filtering failed during scan - consider investigating root cause.');
|
|
655
705
|
} else if (forceDebug) {
|
|
656
|
-
console.log(formatLogMessage('debug',
|
|
706
|
+
console.log(formatLogMessage('debug', totalViolationsFound > 0
|
|
707
|
+
? '[final-validation] (silentMode) Removed ' + totalViolationsFound + ' first-party violations from ' + sitesWithViolations + ' site(s)'
|
|
708
|
+
: '[final-validation] No first-party violations found - filtering working correctly'));
|
|
657
709
|
}
|
|
658
710
|
|
|
659
|
-
return
|
|
711
|
+
return results;
|
|
660
712
|
}
|
|
661
713
|
|
|
662
714
|
/**
|
|
@@ -683,15 +735,17 @@ function processResults(results, sites, options = {}) {
|
|
|
683
735
|
|
|
684
736
|
// Step 1: Clean up first-party domains
|
|
685
737
|
let processedResults = cleanupFirstPartyDomains(results, sites, sharedOptions);
|
|
686
|
-
|
|
687
|
-
// Step 2: Clean up ignoreDomains (final safety net)
|
|
688
|
-
|
|
689
|
-
|
|
738
|
+
|
|
739
|
+
// Step 2: Clean up ignoreDomains (final safety net). sharedOptions carries
|
|
740
|
+
// _urlToSiteConfig which this step ignores, but using sharedOptions keeps
|
|
741
|
+
// the four calls visually consistent.
|
|
742
|
+
processedResults = cleanupIgnoreDomains(processedResults, options.ignoreDomains || [], sharedOptions);
|
|
743
|
+
|
|
690
744
|
// Step 3: Final validation for firstParty: false configurations
|
|
691
745
|
processedResults = finalFirstPartyValidation(processedResults, sites, sharedOptions);
|
|
692
746
|
|
|
693
747
|
// Step 4: Validate results
|
|
694
|
-
processedResults = validateScanResults(processedResults,
|
|
748
|
+
processedResults = validateScanResults(processedResults, sharedOptions);
|
|
695
749
|
|
|
696
750
|
if (forceDebug) {
|
|
697
751
|
let totalRules = 0;
|