@fanboynz/network-scanner 2.0.55 → 2.0.57
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/npm-publish.yml +3 -4
- package/lib/browserhealth.js +207 -179
- package/lib/cloudflare.js +117 -65
- package/lib/ignore_similar.js +78 -209
- package/lib/post-processing.js +282 -356
- package/lib/smart-cache.js +347 -267
- package/nwss.js +53 -13
- package/package.json +3 -2
package/lib/post-processing.js
CHANGED
|
@@ -2,6 +2,40 @@
|
|
|
2
2
|
// Handles cleanup and validation of scan results after scanning is complete
|
|
3
3
|
|
|
4
4
|
const { formatLogMessage, messageColors } = require('./colorize');
|
|
5
|
+
const psl = require('psl');
|
|
6
|
+
|
|
7
|
+
// Precompiled regex patterns (avoids recompilation per rule)
|
|
8
|
+
const REGEX_ADBLOCK = /^\|\|([^/\^]+)/;
|
|
9
|
+
const REGEX_DNSMASQ_LOCAL = /local=\/([^/]+)\//;
|
|
10
|
+
const REGEX_DNSMASQ_SERVER = /server=\/([^/]+)\//;
|
|
11
|
+
const REGEX_UNBOUND = /local-zone:\s*"([^"]+)\.?"/;
|
|
12
|
+
const REGEX_PRIVOXY = /\{\s*\+block\s*\}\s*\.?([^\s]+)/;
|
|
13
|
+
const REGEX_PIHOLE = /^\(\^\|\\\.\)(.+)\\\.\w+\$$/;
|
|
14
|
+
const REGEX_DOMAIN_FALLBACK = /([a-zA-Z0-9][a-zA-Z0-9.-]*\.[a-zA-Z]{2,})/;
|
|
15
|
+
const REGEX_WHITESPACE = /\s+/;
|
|
16
|
+
const REGEX_UNESCAPE_DOT = /\\\./g;
|
|
17
|
+
|
|
18
|
+
// Cache for compiled wildcard regex patterns
|
|
19
|
+
const wildcardRegexCache = new Map();
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Get or compile a wildcard pattern regex (cached)
|
|
23
|
+
* @param {string} pattern - Wildcard pattern string
|
|
24
|
+
* @returns {RegExp} Compiled regex
|
|
25
|
+
*/
|
|
26
|
+
function getWildcardRegex(pattern) {
|
|
27
|
+
let regex = wildcardRegexCache.get(pattern);
|
|
28
|
+
if (!regex) {
|
|
29
|
+
regex = new RegExp('^' + pattern.replace(/\./g, '\\.').replace(/\*/g, '.*') + '$');
|
|
30
|
+
wildcardRegexCache.set(pattern, regex);
|
|
31
|
+
// Cap cache size
|
|
32
|
+
if (wildcardRegexCache.size > 200) {
|
|
33
|
+
const firstKey = wildcardRegexCache.keys().next().value;
|
|
34
|
+
wildcardRegexCache.delete(firstKey);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
return regex;
|
|
38
|
+
}
|
|
5
39
|
|
|
6
40
|
/**
|
|
7
41
|
* Safely extracts hostname from a URL, handling malformed URLs gracefully
|
|
@@ -11,20 +45,77 @@ const { formatLogMessage, messageColors } = require('./colorize');
|
|
|
11
45
|
*/
|
|
12
46
|
function safeGetDomain(url, getFullHostname = false) {
|
|
13
47
|
try {
|
|
14
|
-
const psl = require('psl');
|
|
15
48
|
const parsedUrl = new URL(url);
|
|
16
49
|
if (getFullHostname) {
|
|
17
50
|
return parsedUrl.hostname;
|
|
18
|
-
} else {
|
|
19
|
-
// Extract root domain using psl library
|
|
20
|
-
const parsed = psl.parse(parsedUrl.hostname);
|
|
21
|
-
return parsed.domain || parsedUrl.hostname;
|
|
22
51
|
}
|
|
52
|
+
const parsed = psl.parse(parsedUrl.hostname);
|
|
53
|
+
return parsed.domain || parsedUrl.hostname;
|
|
23
54
|
} catch (urlError) {
|
|
24
55
|
return '';
|
|
25
56
|
}
|
|
26
57
|
}
|
|
27
58
|
|
|
59
|
+
/**
|
|
60
|
+
* Enhanced domain extraction helper - single source of truth for all rule formats
|
|
61
|
+
* (Was duplicated inline in cleanupIgnoreDomains and cleanupFirstPartyDomains)
|
|
62
|
+
* @param {string} rule - Rule string in various formats
|
|
63
|
+
* @returns {string|null} Extracted domain or null if not found
|
|
64
|
+
*/
|
|
65
|
+
function extractDomainFromRule(rule) {
|
|
66
|
+
if (!rule || typeof rule !== 'string') {
|
|
67
|
+
return null;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// Adblock: ||domain.com^
|
|
71
|
+
if (rule.charCodeAt(0) === 124 && rule.charCodeAt(1) === 124 && rule.includes('^')) { // '||' + '^'
|
|
72
|
+
const match = REGEX_ADBLOCK.exec(rule);
|
|
73
|
+
return match ? match[1] : null;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Hosts file: 127.0.0.1 domain / 0.0.0.0 domain
|
|
77
|
+
if (rule.charCodeAt(0) === 49 || rule.charCodeAt(0) === 48) { // '1' or '0'
|
|
78
|
+
if (rule.startsWith('127.0.0.1 ') || rule.startsWith('0.0.0.0 ')) {
|
|
79
|
+
const parts = rule.split(REGEX_WHITESPACE);
|
|
80
|
+
return parts.length >= 2 ? parts[1] : null;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// dnsmasq: local=/domain.com/
|
|
85
|
+
if (rule.includes('local=/')) {
|
|
86
|
+
const match = REGEX_DNSMASQ_LOCAL.exec(rule);
|
|
87
|
+
return match ? match[1] : null;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// dnsmasq old: server=/domain.com/
|
|
91
|
+
if (rule.includes('server=/')) {
|
|
92
|
+
const match = REGEX_DNSMASQ_SERVER.exec(rule);
|
|
93
|
+
return match ? match[1] : null;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Unbound: local-zone: "domain.com." always_null
|
|
97
|
+
if (rule.includes('local-zone:') && rule.includes('always_null')) {
|
|
98
|
+
const match = REGEX_UNBOUND.exec(rule);
|
|
99
|
+
return match ? match[1] : null;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Privoxy: { +block } .domain.com
|
|
103
|
+
if (rule.includes('+block') && rule.includes('.')) {
|
|
104
|
+
const match = REGEX_PRIVOXY.exec(rule);
|
|
105
|
+
return match ? match[1] : null;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// Pi-hole regex: (^|\.)domain\.com$ -- single match (was tested then matched separately)
|
|
109
|
+
if (rule.charCodeAt(0) === 40) { // '('
|
|
110
|
+
const match = REGEX_PIHOLE.exec(rule);
|
|
111
|
+
return match ? match[1].replace(REGEX_UNESCAPE_DOT, '.') : null;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Fallback: any domain-like pattern
|
|
115
|
+
const domainMatch = REGEX_DOMAIN_FALLBACK.exec(rule);
|
|
116
|
+
return domainMatch ? domainMatch[1] : null;
|
|
117
|
+
}
|
|
118
|
+
|
|
28
119
|
/**
|
|
29
120
|
* Enhanced domain matching for ignoreDomains patterns (including wildcards)
|
|
30
121
|
* @param {string} domain - Domain to check
|
|
@@ -37,47 +128,35 @@ function shouldIgnoreAsIgnoreDomain(domain, ignorePatterns, forceDebug) {
|
|
|
37
128
|
return { shouldIgnore: false, reason: 'No ignore patterns' };
|
|
38
129
|
}
|
|
39
130
|
|
|
40
|
-
for (
|
|
131
|
+
for (let i = 0; i < ignorePatterns.length; i++) {
|
|
132
|
+
const pattern = ignorePatterns[i];
|
|
41
133
|
if (pattern.includes('*')) {
|
|
42
|
-
// Handle wildcard patterns
|
|
43
134
|
if (pattern.startsWith('*.')) {
|
|
44
135
|
// Pattern: *.example.com
|
|
45
|
-
const wildcardDomain = pattern.substring(2);
|
|
46
|
-
const wildcardRoot = safeGetDomain(
|
|
47
|
-
const domainRoot = safeGetDomain(
|
|
136
|
+
const wildcardDomain = pattern.substring(2);
|
|
137
|
+
const wildcardRoot = safeGetDomain('http://' + wildcardDomain, false);
|
|
138
|
+
const domainRoot = safeGetDomain('http://' + domain, false);
|
|
48
139
|
|
|
49
140
|
if (wildcardRoot === domainRoot) {
|
|
50
|
-
return {
|
|
51
|
-
shouldIgnore: true,
|
|
52
|
-
reason: `Matches wildcard ignore pattern: ${pattern}`
|
|
53
|
-
};
|
|
141
|
+
return { shouldIgnore: true, reason: 'Matches wildcard ignore pattern: ' + pattern };
|
|
54
142
|
}
|
|
55
143
|
} else if (pattern.endsWith('.*')) {
|
|
56
144
|
// Pattern: example.*
|
|
57
|
-
const baseDomain = pattern.slice(0, -2);
|
|
145
|
+
const baseDomain = pattern.slice(0, -2);
|
|
58
146
|
if (domain.startsWith(baseDomain + '.')) {
|
|
59
|
-
return {
|
|
60
|
-
shouldIgnore: true,
|
|
61
|
-
reason: `Matches wildcard TLD ignore pattern: ${pattern}`
|
|
62
|
-
};
|
|
147
|
+
return { shouldIgnore: true, reason: 'Matches wildcard TLD ignore pattern: ' + pattern };
|
|
63
148
|
}
|
|
64
149
|
} else {
|
|
65
|
-
// Complex wildcard
|
|
66
|
-
const wildcardRegex =
|
|
150
|
+
// Complex wildcard -- use cached regex
|
|
151
|
+
const wildcardRegex = getWildcardRegex(pattern);
|
|
67
152
|
if (wildcardRegex.test(domain)) {
|
|
68
|
-
return {
|
|
69
|
-
shouldIgnore: true,
|
|
70
|
-
reason: `Matches complex wildcard ignore pattern: ${pattern}`
|
|
71
|
-
};
|
|
153
|
+
return { shouldIgnore: true, reason: 'Matches complex wildcard ignore pattern: ' + pattern };
|
|
72
154
|
}
|
|
73
155
|
}
|
|
74
156
|
} else {
|
|
75
157
|
// Exact pattern matching
|
|
76
158
|
if (domain === pattern || domain.endsWith('.' + pattern)) {
|
|
77
|
-
return {
|
|
78
|
-
shouldIgnore: true,
|
|
79
|
-
reason: `Matches exact ignore pattern: ${pattern}`
|
|
80
|
-
};
|
|
159
|
+
return { shouldIgnore: true, reason: 'Matches exact ignore pattern: ' + pattern };
|
|
81
160
|
}
|
|
82
161
|
}
|
|
83
162
|
}
|
|
@@ -97,53 +176,54 @@ function shouldRemoveAsFirstParty(extractedDomain, scannedRootDomain, forceDebug
|
|
|
97
176
|
return { shouldRemove: false, reason: 'Missing domain data' };
|
|
98
177
|
}
|
|
99
178
|
|
|
100
|
-
// Handle wildcard patterns
|
|
101
179
|
if (extractedDomain.includes('*')) {
|
|
102
|
-
// Common wildcard patterns
|
|
103
180
|
if (extractedDomain.startsWith('*.')) {
|
|
104
|
-
|
|
105
|
-
const
|
|
106
|
-
const wildcardRoot = safeGetDomain(`http://${wildcardDomain}`, false);
|
|
181
|
+
const wildcardDomain = extractedDomain.substring(2);
|
|
182
|
+
const wildcardRoot = safeGetDomain('http://' + wildcardDomain, false);
|
|
107
183
|
|
|
108
184
|
if (wildcardRoot === scannedRootDomain) {
|
|
109
|
-
return {
|
|
110
|
-
shouldRemove: true,
|
|
111
|
-
reason: `Wildcard subdomain pattern matches root domain (*.${wildcardRoot})`
|
|
112
|
-
};
|
|
185
|
+
return { shouldRemove: true, reason: 'Wildcard subdomain pattern matches root domain (*.' + wildcardRoot + ')' };
|
|
113
186
|
}
|
|
114
187
|
} else if (extractedDomain.endsWith('.*')) {
|
|
115
|
-
|
|
116
|
-
const baseDomain = extractedDomain.slice(0, -2); // Remove ".*"
|
|
188
|
+
const baseDomain = extractedDomain.slice(0, -2);
|
|
117
189
|
if (scannedRootDomain.startsWith(baseDomain + '.')) {
|
|
118
|
-
return {
|
|
119
|
-
shouldRemove: true,
|
|
120
|
-
reason: `Wildcard TLD pattern matches base domain (${baseDomain}.*)`
|
|
121
|
-
};
|
|
190
|
+
return { shouldRemove: true, reason: 'Wildcard TLD pattern matches base domain (' + baseDomain + '.*)' };
|
|
122
191
|
}
|
|
123
|
-
} else
|
|
124
|
-
//
|
|
125
|
-
const wildcardRegex =
|
|
192
|
+
} else {
|
|
193
|
+
// Complex wildcard -- use cached regex
|
|
194
|
+
const wildcardRegex = getWildcardRegex(extractedDomain);
|
|
126
195
|
if (wildcardRegex.test(scannedRootDomain)) {
|
|
127
|
-
return {
|
|
128
|
-
shouldRemove: true,
|
|
129
|
-
reason: `Complex wildcard pattern matches root domain (${extractedDomain})`
|
|
130
|
-
};
|
|
196
|
+
return { shouldRemove: true, reason: 'Complex wildcard pattern matches root domain (' + extractedDomain + ')' };
|
|
131
197
|
}
|
|
132
198
|
}
|
|
133
199
|
}
|
|
134
200
|
|
|
135
201
|
// Standard exact root domain matching
|
|
136
|
-
const extractedRoot = safeGetDomain(
|
|
202
|
+
const extractedRoot = safeGetDomain('http://' + extractedDomain, false);
|
|
137
203
|
if (extractedRoot === scannedRootDomain) {
|
|
138
|
-
return {
|
|
139
|
-
shouldRemove: true,
|
|
140
|
-
reason: `Exact root domain match (${extractedRoot})`
|
|
141
|
-
};
|
|
204
|
+
return { shouldRemove: true, reason: 'Exact root domain match (' + extractedRoot + ')' };
|
|
142
205
|
}
|
|
143
206
|
|
|
144
207
|
return { shouldRemove: false, reason: 'No first-party match detected' };
|
|
145
208
|
}
|
|
146
209
|
|
|
210
|
+
/**
|
|
211
|
+
* Build URL-to-site-config mapping (shared between cleanup functions)
|
|
212
|
+
* @param {Array} sites - Array of site configurations
|
|
213
|
+
* @returns {Map} URL to site config mapping
|
|
214
|
+
*/
|
|
215
|
+
function buildUrlToSiteConfig(sites) {
|
|
216
|
+
const map = new Map();
|
|
217
|
+
for (let i = 0; i < sites.length; i++) {
|
|
218
|
+
const site = sites[i];
|
|
219
|
+
const urls = Array.isArray(site.url) ? site.url : [site.url];
|
|
220
|
+
for (let j = 0; j < urls.length; j++) {
|
|
221
|
+
map.set(urls[j], site);
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
return map;
|
|
225
|
+
}
|
|
226
|
+
|
|
147
227
|
/**
|
|
148
228
|
* Post-scan cleanup function to remove ignoreDomains from results
|
|
149
229
|
* This is a final safety net to catch any domains that should have been ignored
|
|
@@ -163,80 +243,31 @@ function cleanupIgnoreDomains(results, ignoreDomains, options = {}) {
|
|
|
163
243
|
}
|
|
164
244
|
|
|
165
245
|
if (forceDebug) {
|
|
166
|
-
console.log(formatLogMessage('debug',
|
|
246
|
+
console.log(formatLogMessage('debug', '[ignoreDomains cleanup] Processing ' + results.length + ' results against ' + ignoreDomains.length + ' ignore patterns'));
|
|
167
247
|
}
|
|
168
248
|
|
|
169
249
|
const cleanedResults = [];
|
|
170
250
|
let totalRulesRemoved = 0;
|
|
171
251
|
let sitesAffected = 0;
|
|
172
252
|
|
|
173
|
-
results.
|
|
253
|
+
for (let ri = 0; ri < results.length; ri++) {
|
|
254
|
+
const result = results[ri];
|
|
174
255
|
if (!result.rules || result.rules.length === 0) {
|
|
175
256
|
cleanedResults.push(result);
|
|
176
|
-
|
|
257
|
+
continue;
|
|
177
258
|
}
|
|
178
259
|
|
|
179
|
-
const originalRulesCount = result.rules.length;
|
|
180
260
|
const cleanedRules = [];
|
|
181
261
|
const removedRules = [];
|
|
182
262
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
let
|
|
263
|
+
for (let j = 0; j < result.rules.length; j++) {
|
|
264
|
+
const rule = result.rules[j];
|
|
265
|
+
let kept = true;
|
|
186
266
|
|
|
187
267
|
try {
|
|
188
|
-
//
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
const match = rule.match(/^\|\|([^/\^]+)/);
|
|
192
|
-
if (match) {
|
|
193
|
-
extractedDomain = match[1];
|
|
194
|
-
}
|
|
195
|
-
} else if (rule.startsWith('127.0.0.1 ') || rule.startsWith('0.0.0.0 ')) {
|
|
196
|
-
// hosts file format
|
|
197
|
-
const parts = rule.split(/\s+/);
|
|
198
|
-
if (parts.length >= 2) {
|
|
199
|
-
extractedDomain = parts[1];
|
|
200
|
-
}
|
|
201
|
-
} else if (rule.includes('local=/') && rule.includes('/')) {
|
|
202
|
-
// dnsmasq format: local=/domain.com/
|
|
203
|
-
const match = rule.match(/local=\/([^/]+)\//);
|
|
204
|
-
if (match) {
|
|
205
|
-
extractedDomain = match[1];
|
|
206
|
-
}
|
|
207
|
-
} else if (rule.includes('server=/') && rule.includes('/')) {
|
|
208
|
-
// dnsmasq old format: server=/domain.com/
|
|
209
|
-
const match = rule.match(/server=\/([^/]+)\//);
|
|
210
|
-
if (match) {
|
|
211
|
-
extractedDomain = match[1];
|
|
212
|
-
}
|
|
213
|
-
} else if (rule.includes('local-zone:') && rule.includes('always_null')) {
|
|
214
|
-
// unbound format: local-zone: "domain.com." always_null
|
|
215
|
-
const match = rule.match(/local-zone:\s*"([^"]+)\.?"/);
|
|
216
|
-
if (match) {
|
|
217
|
-
extractedDomain = match[1];
|
|
218
|
-
}
|
|
219
|
-
} else if (rule.includes('+block') && rule.includes('.')) {
|
|
220
|
-
// privoxy format: { +block } .domain.com
|
|
221
|
-
const match = rule.match(/\{\s*\+block\s*\}\s*\.?([^\s]+)/);
|
|
222
|
-
if (match) {
|
|
223
|
-
extractedDomain = match[1];
|
|
224
|
-
}
|
|
225
|
-
} else if (rule.match(/^\(\^\|\\\.\).*\\\.\w+\$$/)) {
|
|
226
|
-
// pi-hole regex format: (^|\.)domain\.com$
|
|
227
|
-
const match = rule.match(/^\(\^\|\\\.\)(.+)\\\.\w+\$$/);
|
|
228
|
-
if (match) {
|
|
229
|
-
// Unescape the domain
|
|
230
|
-
extractedDomain = match[1].replace(/\\\./g, '.');
|
|
231
|
-
}
|
|
232
|
-
} else {
|
|
233
|
-
// Try to extract any domain-like pattern as fallback
|
|
234
|
-
const domainMatch = rule.match(/([a-zA-Z0-9][a-zA-Z0-9.-]*\.[a-zA-Z]{2,})/);
|
|
235
|
-
if (domainMatch) {
|
|
236
|
-
extractedDomain = domainMatch[1];
|
|
237
|
-
}
|
|
238
|
-
}
|
|
239
|
-
// Check if extracted domain should be ignored
|
|
268
|
+
// Use shared extractDomainFromRule (was duplicated inline)
|
|
269
|
+
const extractedDomain = extractDomainFromRule(rule);
|
|
270
|
+
|
|
240
271
|
if (extractedDomain) {
|
|
241
272
|
const ignoreResult = shouldIgnoreAsIgnoreDomain(extractedDomain, ignoreDomains, forceDebug);
|
|
242
273
|
|
|
@@ -244,35 +275,42 @@ function cleanupIgnoreDomains(results, ignoreDomains, options = {}) {
|
|
|
244
275
|
removedRules.push({
|
|
245
276
|
rule: rule,
|
|
246
277
|
domain: extractedDomain,
|
|
247
|
-
reason:
|
|
278
|
+
reason: 'ignoreDomains: ' + ignoreResult.reason,
|
|
248
279
|
matchType: ignoreResult.reason.includes('wildcard') ? 'wildcard' : 'exact'
|
|
249
280
|
});
|
|
250
|
-
|
|
281
|
+
kept = false;
|
|
251
282
|
}
|
|
252
283
|
}
|
|
253
284
|
} catch (parseErr) {
|
|
254
285
|
if (forceDebug) {
|
|
255
|
-
console.log(formatLogMessage('debug',
|
|
286
|
+
console.log(formatLogMessage('debug', '[ignoreDomains cleanup] Failed to parse rule: ' + rule + ' - ' + parseErr.message));
|
|
256
287
|
}
|
|
257
288
|
}
|
|
258
289
|
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
290
|
+
if (kept) {
|
|
291
|
+
cleanedRules.push(rule);
|
|
292
|
+
}
|
|
293
|
+
}
|
|
262
294
|
|
|
263
|
-
|
|
295
|
+
// Mutate rules directly instead of spreading entire result object
|
|
296
|
+
result.rules = cleanedRules;
|
|
297
|
+
cleanedResults.push(result);
|
|
264
298
|
|
|
265
299
|
if (removedRules.length > 0) {
|
|
266
300
|
sitesAffected++;
|
|
267
301
|
totalRulesRemoved += removedRules.length;
|
|
268
302
|
|
|
269
303
|
if (!silentMode) {
|
|
270
|
-
|
|
271
|
-
|
|
304
|
+
// Single-pass count instead of two .filter() calls
|
|
305
|
+
let wildcardCount = 0;
|
|
306
|
+
for (let k = 0; k < removedRules.length; k++) {
|
|
307
|
+
if (removedRules[k].matchType === 'wildcard') wildcardCount++;
|
|
308
|
+
}
|
|
309
|
+
const exactCount = removedRules.length - wildcardCount;
|
|
272
310
|
|
|
273
|
-
let cleanupMessage =
|
|
311
|
+
let cleanupMessage = '?? Removed ' + removedRules.length + ' ignoreDomains rule(s) from ' + safeGetDomain(result.url) + ' (final cleanup)';
|
|
274
312
|
if (wildcardCount > 0) {
|
|
275
|
-
cleanupMessage +=
|
|
313
|
+
cleanupMessage += ' [' + wildcardCount + ' wildcard, ' + exactCount + ' exact]';
|
|
276
314
|
}
|
|
277
315
|
|
|
278
316
|
if (messageColors && messageColors.cleanup) {
|
|
@@ -282,28 +320,18 @@ function cleanupIgnoreDomains(results, ignoreDomains, options = {}) {
|
|
|
282
320
|
}
|
|
283
321
|
}
|
|
284
322
|
if (forceDebug) {
|
|
285
|
-
console.log(formatLogMessage('debug',
|
|
286
|
-
removedRules.
|
|
287
|
-
console.log(formatLogMessage('debug',
|
|
288
|
-
}
|
|
323
|
+
console.log(formatLogMessage('debug', '[ignoreDomains cleanup] Removed rules from ' + result.url + ':'));
|
|
324
|
+
for (let k = 0; k < removedRules.length; k++) {
|
|
325
|
+
console.log(formatLogMessage('debug', ' [' + (k + 1) + '] ' + removedRules[k].rule + ' (' + removedRules[k].reason + ') [' + removedRules[k].matchType + ']'));
|
|
326
|
+
}
|
|
289
327
|
}
|
|
290
328
|
}
|
|
291
|
-
}
|
|
329
|
+
}
|
|
292
330
|
|
|
293
331
|
// Summary
|
|
294
332
|
if (totalRulesRemoved > 0 && !silentMode) {
|
|
295
|
-
const
|
|
296
|
-
|
|
297
|
-
acc.push(...result.removedIgnoreDomains);
|
|
298
|
-
}
|
|
299
|
-
return acc;
|
|
300
|
-
}, []);
|
|
301
|
-
|
|
302
|
-
const totalWildcardCount = allRemovedRules.filter(r => r.matchType === 'wildcard').length;
|
|
303
|
-
const totalExactCount = allRemovedRules.filter(r => r.matchType === 'exact').length;
|
|
304
|
-
|
|
305
|
-
const summaryMessage = `\n?? ignoreDomains cleanup completed: Removed ${totalRulesRemoved} rules from ${sitesAffected} site(s)` +
|
|
306
|
-
(totalWildcardCount > 0 ? ` [${totalWildcardCount} wildcard patterns, ${totalExactCount} exact matches]` : '');
|
|
333
|
+
const summaryMessage = '\n?? ignoreDomains cleanup completed: Removed ' + totalRulesRemoved + ' rules from ' + sitesAffected + ' site(s)';
|
|
334
|
+
|
|
307
335
|
if (messageColors && messageColors.cleanup) {
|
|
308
336
|
console.log(messageColors.cleanup(summaryMessage));
|
|
309
337
|
} else {
|
|
@@ -316,77 +344,6 @@ function cleanupIgnoreDomains(results, ignoreDomains, options = {}) {
|
|
|
316
344
|
return cleanedResults;
|
|
317
345
|
}
|
|
318
346
|
|
|
319
|
-
/**
|
|
320
|
-
* Enhanced domain extraction helper that reuses existing parsing logic
|
|
321
|
-
* @param {string} rule - Rule string in various formats
|
|
322
|
-
* @returns {string|null} Extracted domain or null if not found
|
|
323
|
-
*/
|
|
324
|
-
function extractDomainFromRule(rule) {
|
|
325
|
-
if (!rule || typeof rule !== 'string') {
|
|
326
|
-
return null;
|
|
327
|
-
}
|
|
328
|
-
|
|
329
|
-
try {
|
|
330
|
-
// Reuse the existing parsing logic from cleanupFirstPartyDomains
|
|
331
|
-
let extractedDomain = null;
|
|
332
|
-
|
|
333
|
-
if (rule.startsWith('||') && rule.includes('^')) {
|
|
334
|
-
// ||domain.com^ format (adblock)
|
|
335
|
-
const match = rule.match(/^\|\|([^/\^]+)/);
|
|
336
|
-
if (match) {
|
|
337
|
-
extractedDomain = match[1];
|
|
338
|
-
}
|
|
339
|
-
} else if (rule.startsWith('127.0.0.1 ') || rule.startsWith('0.0.0.0 ')) {
|
|
340
|
-
// hosts file format
|
|
341
|
-
const parts = rule.split(/\s+/);
|
|
342
|
-
if (parts.length >= 2) {
|
|
343
|
-
extractedDomain = parts[1];
|
|
344
|
-
}
|
|
345
|
-
} else if (rule.includes('local=/') && rule.includes('/')) {
|
|
346
|
-
// dnsmasq format: local=/domain.com/
|
|
347
|
-
const match = rule.match(/local=\/([^/]+)\//);
|
|
348
|
-
if (match) {
|
|
349
|
-
extractedDomain = match[1];
|
|
350
|
-
}
|
|
351
|
-
} else if (rule.includes('server=/') && rule.includes('/')) {
|
|
352
|
-
// dnsmasq old format: server=/domain.com/
|
|
353
|
-
const match = rule.match(/server=\/([^/]+)\//);
|
|
354
|
-
if (match) {
|
|
355
|
-
extractedDomain = match[1];
|
|
356
|
-
}
|
|
357
|
-
} else if (rule.includes('local-zone:') && rule.includes('always_null')) {
|
|
358
|
-
// unbound format: local-zone: "domain.com." always_null
|
|
359
|
-
const match = rule.match(/local-zone:\s*"([^"]+)\.?"/);
|
|
360
|
-
if (match) {
|
|
361
|
-
extractedDomain = match[1];
|
|
362
|
-
}
|
|
363
|
-
} else if (rule.includes('+block') && rule.includes('.')) {
|
|
364
|
-
// privoxy format: { +block } .domain.com
|
|
365
|
-
const match = rule.match(/\{\s*\+block\s*\}\s*\.?([^\s]+)/);
|
|
366
|
-
if (match) {
|
|
367
|
-
extractedDomain = match[1];
|
|
368
|
-
}
|
|
369
|
-
} else if (rule.match(/^\(\^\|\\\.\).*\\\.\w+\$$/)) {
|
|
370
|
-
// pi-hole regex format: (^|\.)domain\.com$
|
|
371
|
-
const match = rule.match(/^\(\^\|\\\.\)(.+)\\\.\w+\$$/);
|
|
372
|
-
if (match) {
|
|
373
|
-
// Unescape the domain
|
|
374
|
-
extractedDomain = match[1].replace(/\\\./g, '.');
|
|
375
|
-
}
|
|
376
|
-
} else {
|
|
377
|
-
// Try to extract any domain-like pattern as fallback
|
|
378
|
-
const domainMatch = rule.match(/([a-zA-Z0-9][a-zA-Z0-9.-]*\.[a-zA-Z]{2,})/);
|
|
379
|
-
if (domainMatch) {
|
|
380
|
-
extractedDomain = domainMatch[1];
|
|
381
|
-
}
|
|
382
|
-
}
|
|
383
|
-
|
|
384
|
-
return extractedDomain;
|
|
385
|
-
} catch (parseErr) {
|
|
386
|
-
return null;
|
|
387
|
-
}
|
|
388
|
-
}
|
|
389
|
-
|
|
390
347
|
/**
|
|
391
348
|
* Post-scan cleanup function to remove first-party domains from results
|
|
392
349
|
* Only processes sites that have firstParty: false in their configuration
|
|
@@ -396,6 +353,7 @@ function extractDomainFromRule(rule) {
|
|
|
396
353
|
* @param {Object} options - Options object
|
|
397
354
|
* @param {boolean} options.forceDebug - Debug logging flag
|
|
398
355
|
* @param {boolean} options.silentMode - Silent mode flag
|
|
356
|
+
* @param {Map} [options._urlToSiteConfig] - Pre-built URL mapping (internal optimization)
|
|
399
357
|
* @returns {Array} Cleaned results with conditional first-party removal
|
|
400
358
|
*/
|
|
401
359
|
function cleanupFirstPartyDomains(results, sites, options = {}) {
|
|
@@ -405,105 +363,44 @@ function cleanupFirstPartyDomains(results, sites, options = {}) {
|
|
|
405
363
|
return results;
|
|
406
364
|
}
|
|
407
365
|
|
|
408
|
-
//
|
|
409
|
-
const urlToSiteConfig =
|
|
410
|
-
sites.forEach(site => {
|
|
411
|
-
const urls = Array.isArray(site.url) ? site.url : [site.url];
|
|
412
|
-
urls.forEach(url => {
|
|
413
|
-
urlToSiteConfig.set(url, site);
|
|
414
|
-
});
|
|
415
|
-
});
|
|
366
|
+
// Use pre-built map if passed, otherwise build it
|
|
367
|
+
const urlToSiteConfig = options._urlToSiteConfig || buildUrlToSiteConfig(sites);
|
|
416
368
|
|
|
417
369
|
const cleanedResults = [];
|
|
418
370
|
let totalRulesRemoved = 0;
|
|
419
371
|
let sitesAffected = 0;
|
|
420
372
|
|
|
421
|
-
results.
|
|
422
|
-
|
|
373
|
+
for (let ri = 0; ri < results.length; ri++) {
|
|
374
|
+
const result = results[ri];
|
|
423
375
|
const siteConfig = urlToSiteConfig.get(result.url);
|
|
424
|
-
|
|
425
|
-
// Only clean if firstParty is explicitly set to false
|
|
426
376
|
const shouldCleanFirstParty = siteConfig && siteConfig.firstParty === false;
|
|
427
377
|
|
|
428
378
|
if (!shouldCleanFirstParty || !result.rules || result.rules.length === 0) {
|
|
429
379
|
cleanedResults.push(result);
|
|
430
|
-
|
|
380
|
+
continue;
|
|
431
381
|
}
|
|
432
382
|
|
|
433
383
|
if (forceDebug) {
|
|
434
|
-
console.log(formatLogMessage('debug',
|
|
384
|
+
console.log(formatLogMessage('debug', '[cleanup] Processing ' + result.url + ' (firstParty: false detected)'));
|
|
435
385
|
}
|
|
436
386
|
|
|
437
|
-
// Get the scanned domain for this specific result
|
|
438
387
|
const scannedDomain = safeGetDomain(result.url, false);
|
|
439
388
|
if (!scannedDomain) {
|
|
440
389
|
cleanedResults.push(result);
|
|
441
|
-
|
|
390
|
+
continue;
|
|
442
391
|
}
|
|
443
392
|
|
|
444
|
-
const originalRulesCount = result.rules.length;
|
|
445
393
|
const cleanedRules = [];
|
|
446
394
|
const removedRules = [];
|
|
447
395
|
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
let
|
|
451
|
-
let extractedDomain = null;
|
|
396
|
+
for (let j = 0; j < result.rules.length; j++) {
|
|
397
|
+
const rule = result.rules[j];
|
|
398
|
+
let kept = true;
|
|
452
399
|
|
|
453
400
|
try {
|
|
454
|
-
//
|
|
455
|
-
|
|
456
|
-
// ||domain.com^ format (adblock)
|
|
457
|
-
const match = rule.match(/^\|\|([^/\^]+)/);
|
|
458
|
-
if (match) {
|
|
459
|
-
extractedDomain = match[1];
|
|
460
|
-
}
|
|
461
|
-
} else if (rule.startsWith('127.0.0.1 ') || rule.startsWith('0.0.0.0 ')) {
|
|
462
|
-
// hosts file format
|
|
463
|
-
const parts = rule.split(/\s+/);
|
|
464
|
-
if (parts.length >= 2) {
|
|
465
|
-
extractedDomain = parts[1];
|
|
466
|
-
}
|
|
467
|
-
} else if (rule.includes('local=/') && rule.includes('/')) {
|
|
468
|
-
// dnsmasq format: local=/domain.com/
|
|
469
|
-
const match = rule.match(/local=\/([^/]+)\//);
|
|
470
|
-
if (match) {
|
|
471
|
-
extractedDomain = match[1];
|
|
472
|
-
}
|
|
473
|
-
} else if (rule.includes('server=/') && rule.includes('/')) {
|
|
474
|
-
// dnsmasq old format: server=/domain.com/
|
|
475
|
-
const match = rule.match(/server=\/([^/]+)\//);
|
|
476
|
-
if (match) {
|
|
477
|
-
extractedDomain = match[1];
|
|
478
|
-
}
|
|
479
|
-
} else if (rule.includes('local-zone:') && rule.includes('always_null')) {
|
|
480
|
-
// unbound format: local-zone: "domain.com." always_null
|
|
481
|
-
const match = rule.match(/local-zone:\s*"([^"]+)\.?"/);
|
|
482
|
-
if (match) {
|
|
483
|
-
extractedDomain = match[1];
|
|
484
|
-
}
|
|
485
|
-
} else if (rule.includes('+block') && rule.includes('.')) {
|
|
486
|
-
// privoxy format: { +block } .domain.com
|
|
487
|
-
const match = rule.match(/\{\s*\+block\s*\}\s*\.?([^\s]+)/);
|
|
488
|
-
if (match) {
|
|
489
|
-
extractedDomain = match[1];
|
|
490
|
-
}
|
|
491
|
-
} else if (rule.match(/^\(\^\|\\\.\).*\\\.\w+\$$/)) {
|
|
492
|
-
// pi-hole regex format: (^|\.)domain\.com$
|
|
493
|
-
const match = rule.match(/^\(\^\|\\\.\)(.+)\\\.\w+\$$/);
|
|
494
|
-
if (match) {
|
|
495
|
-
// Unescape the domain
|
|
496
|
-
extractedDomain = match[1].replace(/\\\./g, '.');
|
|
497
|
-
}
|
|
498
|
-
} else {
|
|
499
|
-
// Try to extract any domain-like pattern as fallback
|
|
500
|
-
const domainMatch = rule.match(/([a-zA-Z0-9][a-zA-Z0-9.-]*\.[a-zA-Z]{2,})/);
|
|
501
|
-
if (domainMatch) {
|
|
502
|
-
extractedDomain = domainMatch[1];
|
|
503
|
-
}
|
|
504
|
-
}
|
|
401
|
+
// Use shared extractDomainFromRule (was duplicated inline)
|
|
402
|
+
const extractedDomain = extractDomainFromRule(rule);
|
|
505
403
|
|
|
506
|
-
// Check if extracted domain is a first-party domain
|
|
507
404
|
if (extractedDomain) {
|
|
508
405
|
const matchResult = shouldRemoveAsFirstParty(extractedDomain, scannedDomain, forceDebug);
|
|
509
406
|
|
|
@@ -512,35 +409,42 @@ function cleanupFirstPartyDomains(results, sites, options = {}) {
|
|
|
512
409
|
rule: rule,
|
|
513
410
|
domain: extractedDomain,
|
|
514
411
|
rootDomain: scannedDomain,
|
|
515
|
-
reason:
|
|
412
|
+
reason: 'First-party: ' + matchResult.reason + ' (firstParty: false)',
|
|
516
413
|
matchType: matchResult.reason.includes('Wildcard') ? 'wildcard' : 'exact'
|
|
517
414
|
});
|
|
518
|
-
|
|
415
|
+
kept = false;
|
|
519
416
|
}
|
|
520
417
|
}
|
|
521
418
|
} catch (parseErr) {
|
|
522
419
|
if (forceDebug) {
|
|
523
|
-
console.log(formatLogMessage('debug',
|
|
420
|
+
console.log(formatLogMessage('debug', '[cleanup] Failed to parse rule: ' + rule + ' - ' + parseErr.message));
|
|
524
421
|
}
|
|
525
422
|
}
|
|
526
423
|
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
424
|
+
if (kept) {
|
|
425
|
+
cleanedRules.push(rule);
|
|
426
|
+
}
|
|
427
|
+
}
|
|
530
428
|
|
|
531
|
-
|
|
429
|
+
// Mutate rules directly instead of { ...result, rules: cleanedRules }
|
|
430
|
+
result.rules = cleanedRules;
|
|
431
|
+
cleanedResults.push(result);
|
|
532
432
|
|
|
533
433
|
if (removedRules.length > 0) {
|
|
534
434
|
sitesAffected++;
|
|
535
435
|
totalRulesRemoved += removedRules.length;
|
|
536
436
|
|
|
537
437
|
if (!silentMode) {
|
|
538
|
-
|
|
539
|
-
|
|
438
|
+
// Single-pass count
|
|
439
|
+
let wildcardCount = 0;
|
|
440
|
+
for (let k = 0; k < removedRules.length; k++) {
|
|
441
|
+
if (removedRules[k].matchType === 'wildcard') wildcardCount++;
|
|
442
|
+
}
|
|
443
|
+
const exactCount = removedRules.length - wildcardCount;
|
|
540
444
|
|
|
541
|
-
let cleanupMessage =
|
|
445
|
+
let cleanupMessage = '?? Cleaned ' + removedRules.length + ' first-party rule(s) from ' + scannedDomain + ' (firstParty: false)';
|
|
542
446
|
if (wildcardCount > 0) {
|
|
543
|
-
cleanupMessage +=
|
|
447
|
+
cleanupMessage += ' [' + wildcardCount + ' wildcard, ' + exactCount + ' exact]';
|
|
544
448
|
}
|
|
545
449
|
if (messageColors && messageColors.cleanup) {
|
|
546
450
|
console.log(messageColors.cleanup(cleanupMessage));
|
|
@@ -550,17 +454,17 @@ function cleanupFirstPartyDomains(results, sites, options = {}) {
|
|
|
550
454
|
}
|
|
551
455
|
|
|
552
456
|
if (forceDebug) {
|
|
553
|
-
console.log(formatLogMessage('debug',
|
|
554
|
-
removedRules.
|
|
555
|
-
console.log(formatLogMessage('debug',
|
|
556
|
-
}
|
|
457
|
+
console.log(formatLogMessage('debug', '[cleanup] Removed rules from ' + result.url + ':'));
|
|
458
|
+
for (let k = 0; k < removedRules.length; k++) {
|
|
459
|
+
console.log(formatLogMessage('debug', ' [' + (k + 1) + '] ' + removedRules[k].rule + ' (' + removedRules[k].reason + ') [' + removedRules[k].matchType + ']'));
|
|
460
|
+
}
|
|
557
461
|
}
|
|
558
462
|
}
|
|
559
|
-
}
|
|
463
|
+
}
|
|
560
464
|
|
|
561
465
|
// Summary
|
|
562
466
|
if (totalRulesRemoved > 0 && !silentMode) {
|
|
563
|
-
const summaryMessage =
|
|
467
|
+
const summaryMessage = '\n?? First-party cleanup completed: Removed ' + totalRulesRemoved + ' rules from ' + sitesAffected + ' site(s) with firstParty: false';
|
|
564
468
|
if (messageColors && messageColors.cleanup) {
|
|
565
469
|
console.log(messageColors.cleanup(summaryMessage));
|
|
566
470
|
} else {
|
|
@@ -592,47 +496,66 @@ function validateScanResults(results, options = {}) {
|
|
|
592
496
|
let totalValidated = 0;
|
|
593
497
|
let totalRemoved = 0;
|
|
594
498
|
|
|
595
|
-
|
|
499
|
+
// Pre-strip wildcards from ignore patterns once (was done per rule per pattern)
|
|
500
|
+
let strippedIgnorePatterns = null;
|
|
501
|
+
if (ignoreDomains.length > 0) {
|
|
502
|
+
strippedIgnorePatterns = new Array(ignoreDomains.length);
|
|
503
|
+
for (let i = 0; i < ignoreDomains.length; i++) {
|
|
504
|
+
strippedIgnorePatterns[i] = ignoreDomains[i].replace('*', '');
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
for (let ri = 0; ri < results.length; ri++) {
|
|
509
|
+
const result = results[ri];
|
|
596
510
|
if (!result.rules || result.rules.length === 0) {
|
|
597
|
-
|
|
511
|
+
continue;
|
|
598
512
|
}
|
|
599
513
|
|
|
600
514
|
const originalCount = result.rules.length;
|
|
601
|
-
const validRules =
|
|
602
|
-
|
|
515
|
+
const validRules = [];
|
|
516
|
+
|
|
517
|
+
for (let j = 0; j < result.rules.length; j++) {
|
|
518
|
+
const rule = result.rules[j];
|
|
519
|
+
|
|
520
|
+
// Basic validation
|
|
603
521
|
if (!rule || typeof rule !== 'string' || rule.trim().length === 0) {
|
|
604
522
|
if (forceDebug) {
|
|
605
|
-
console.log(formatLogMessage('debug',
|
|
523
|
+
console.log(formatLogMessage('debug', '[validation] Removed empty/invalid rule'));
|
|
606
524
|
}
|
|
607
525
|
totalRemoved++;
|
|
608
|
-
|
|
526
|
+
continue;
|
|
609
527
|
}
|
|
610
528
|
|
|
611
|
-
// Check against ignore
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
529
|
+
// Check against stripped ignore patterns
|
|
530
|
+
let ignored = false;
|
|
531
|
+
if (strippedIgnorePatterns) {
|
|
532
|
+
for (let k = 0; k < strippedIgnorePatterns.length; k++) {
|
|
533
|
+
if (rule.includes(strippedIgnorePatterns[k])) {
|
|
615
534
|
if (forceDebug) {
|
|
616
|
-
console.log(formatLogMessage('debug',
|
|
535
|
+
console.log(formatLogMessage('debug', '[validation] Removed rule matching ignore pattern: ' + ignoreDomains[k]));
|
|
617
536
|
}
|
|
618
537
|
totalRemoved++;
|
|
619
|
-
|
|
538
|
+
ignored = true;
|
|
539
|
+
break;
|
|
620
540
|
}
|
|
621
541
|
}
|
|
622
542
|
}
|
|
623
543
|
|
|
624
|
-
|
|
625
|
-
|
|
544
|
+
if (!ignored) {
|
|
545
|
+
validRules.push(rule);
|
|
546
|
+
}
|
|
547
|
+
}
|
|
626
548
|
|
|
627
549
|
totalValidated += originalCount;
|
|
628
|
-
|
|
629
|
-
|
|
550
|
+
// Mutate in place instead of spread
|
|
551
|
+
result.rules = validRules;
|
|
552
|
+
}
|
|
630
553
|
|
|
631
554
|
if (forceDebug && totalRemoved > 0) {
|
|
632
|
-
console.log(formatLogMessage('debug',
|
|
555
|
+
console.log(formatLogMessage('debug', '[validation] Validated ' + totalValidated + ' rules, removed ' + totalRemoved + ' invalid rules'));
|
|
633
556
|
}
|
|
634
557
|
|
|
635
|
-
return
|
|
558
|
+
return results;
|
|
636
559
|
}
|
|
637
560
|
|
|
638
561
|
|
|
@@ -643,6 +566,7 @@ function validateScanResults(results, options = {}) {
|
|
|
643
566
|
* @param {Array} results - Array of scan results
|
|
644
567
|
* @param {Array} sites - Array of site configurations
|
|
645
568
|
* @param {Object} options - Options object
|
|
569
|
+
* @param {Map} [options._urlToSiteConfig] - Pre-built URL mapping (internal optimization)
|
|
646
570
|
* @returns {Array} Results with any remaining first-party domains removed
|
|
647
571
|
*/
|
|
648
572
|
function finalFirstPartyValidation(results, sites, options = {}) {
|
|
@@ -652,64 +576,57 @@ function finalFirstPartyValidation(results, sites, options = {}) {
|
|
|
652
576
|
return results;
|
|
653
577
|
}
|
|
654
578
|
|
|
655
|
-
//
|
|
656
|
-
const urlToSiteConfig =
|
|
657
|
-
sites.forEach(site => {
|
|
658
|
-
const urls = Array.isArray(site.url) ? site.url : [site.url];
|
|
659
|
-
urls.forEach(url => {
|
|
660
|
-
urlToSiteConfig.set(url, site);
|
|
661
|
-
});
|
|
662
|
-
});
|
|
579
|
+
// Use pre-built map if passed, otherwise build it
|
|
580
|
+
const urlToSiteConfig = options._urlToSiteConfig || buildUrlToSiteConfig(sites);
|
|
663
581
|
|
|
664
582
|
const finalResults = [];
|
|
665
583
|
let totalViolationsFound = 0;
|
|
666
584
|
let sitesWithViolations = 0;
|
|
667
585
|
|
|
668
|
-
results.
|
|
586
|
+
for (let ri = 0; ri < results.length; ri++) {
|
|
587
|
+
const result = results[ri];
|
|
669
588
|
const siteConfig = urlToSiteConfig.get(result.url);
|
|
670
|
-
|
|
671
|
-
// Only validate sites with firstParty: false
|
|
672
589
|
const shouldValidate = siteConfig && siteConfig.firstParty === false;
|
|
673
590
|
|
|
674
591
|
if (!shouldValidate || !result.rules || result.rules.length === 0) {
|
|
675
592
|
finalResults.push(result);
|
|
676
|
-
|
|
593
|
+
continue;
|
|
677
594
|
}
|
|
678
595
|
|
|
679
596
|
const scannedDomain = safeGetDomain(result.url, false);
|
|
680
597
|
if (!scannedDomain) {
|
|
681
598
|
finalResults.push(result);
|
|
682
|
-
|
|
599
|
+
continue;
|
|
683
600
|
}
|
|
684
601
|
|
|
685
|
-
// Reuse the same filtering logic pattern from cleanupFirstPartyDomains
|
|
686
602
|
const cleanedRules = [];
|
|
687
603
|
const violatingRules = [];
|
|
688
604
|
|
|
689
|
-
result.rules.
|
|
605
|
+
for (let j = 0; j < result.rules.length; j++) {
|
|
606
|
+
const rule = result.rules[j];
|
|
690
607
|
const extractedDomain = extractDomainFromRule(rule);
|
|
608
|
+
|
|
691
609
|
if (extractedDomain) {
|
|
692
|
-
// Reuse the shouldRemoveAsFirstParty logic
|
|
693
610
|
const matchResult = shouldRemoveAsFirstParty(extractedDomain, scannedDomain, forceDebug);
|
|
694
611
|
|
|
695
612
|
if (matchResult.shouldRemove) {
|
|
696
613
|
violatingRules.push({
|
|
697
614
|
rule: rule,
|
|
698
615
|
domain: extractedDomain,
|
|
699
|
-
reason:
|
|
616
|
+
reason: 'VALIDATION FAILURE: ' + matchResult.reason
|
|
700
617
|
});
|
|
701
618
|
totalViolationsFound++;
|
|
702
|
-
|
|
619
|
+
continue;
|
|
703
620
|
}
|
|
704
621
|
}
|
|
705
622
|
cleanedRules.push(rule);
|
|
706
|
-
}
|
|
623
|
+
}
|
|
707
624
|
|
|
708
625
|
if (violatingRules.length > 0) {
|
|
709
626
|
sitesWithViolations++;
|
|
710
627
|
|
|
711
628
|
if (!silentMode) {
|
|
712
|
-
const errorMessage =
|
|
629
|
+
const errorMessage = '? CONFIG VIOLATION: Found ' + violatingRules.length + ' first-party rule(s) in ' + scannedDomain + ' (firstParty: false)';
|
|
713
630
|
if (messageColors && messageColors.error) {
|
|
714
631
|
console.log(messageColors.error(errorMessage));
|
|
715
632
|
} else {
|
|
@@ -718,19 +635,21 @@ function finalFirstPartyValidation(results, sites, options = {}) {
|
|
|
718
635
|
}
|
|
719
636
|
|
|
720
637
|
if (forceDebug) {
|
|
721
|
-
console.log(formatLogMessage('debug',
|
|
722
|
-
violatingRules.
|
|
723
|
-
console.log(formatLogMessage('debug',
|
|
724
|
-
}
|
|
638
|
+
console.log(formatLogMessage('debug', '[final-validation] Violations found for ' + result.url + ':'));
|
|
639
|
+
for (let k = 0; k < violatingRules.length; k++) {
|
|
640
|
+
console.log(formatLogMessage('debug', ' [' + (k + 1) + '] ' + violatingRules[k].rule + ' -> ' + violatingRules[k].domain));
|
|
641
|
+
}
|
|
725
642
|
}
|
|
726
643
|
}
|
|
727
644
|
|
|
728
|
-
|
|
729
|
-
|
|
645
|
+
// Mutate in place
|
|
646
|
+
result.rules = cleanedRules;
|
|
647
|
+
finalResults.push(result);
|
|
648
|
+
}
|
|
730
649
|
|
|
731
|
-
// Summary
|
|
650
|
+
// Summary
|
|
732
651
|
if (totalViolationsFound > 0 && !silentMode) {
|
|
733
|
-
const summaryMessage =
|
|
652
|
+
const summaryMessage = '\n? SCAN FILTERING FAILURE: Removed ' + totalViolationsFound + ' first-party rules from ' + sitesWithViolations + ' site(s) in post-processing';
|
|
734
653
|
console.log(summaryMessage);
|
|
735
654
|
console.log('?? This indicates firstParty: false filtering failed during scan - consider investigating root cause.');
|
|
736
655
|
} else if (forceDebug) {
|
|
@@ -755,24 +674,31 @@ function processResults(results, sites, options = {}) {
|
|
|
755
674
|
const { forceDebug = false, silentMode = false } = options;
|
|
756
675
|
|
|
757
676
|
if (forceDebug) {
|
|
758
|
-
console.log(formatLogMessage('debug',
|
|
677
|
+
console.log(formatLogMessage('debug', '[post-processing] Starting post-processing of ' + results.length + ' results'));
|
|
759
678
|
}
|
|
760
679
|
|
|
680
|
+
// Build URL-to-config map once, share across all steps
|
|
681
|
+
const urlToSiteConfig = buildUrlToSiteConfig(sites);
|
|
682
|
+
const sharedOptions = Object.assign({}, options, { _urlToSiteConfig: urlToSiteConfig });
|
|
683
|
+
|
|
761
684
|
// Step 1: Clean up first-party domains
|
|
762
|
-
let processedResults = cleanupFirstPartyDomains(results, sites,
|
|
685
|
+
let processedResults = cleanupFirstPartyDomains(results, sites, sharedOptions);
|
|
763
686
|
|
|
764
687
|
// Step 2: Clean up ignoreDomains (final safety net)
|
|
765
688
|
processedResults = cleanupIgnoreDomains(processedResults, options.ignoreDomains || [], options);
|
|
766
689
|
|
|
767
690
|
// Step 3: Final validation for firstParty: false configurations
|
|
768
|
-
processedResults = finalFirstPartyValidation(processedResults, sites,
|
|
691
|
+
processedResults = finalFirstPartyValidation(processedResults, sites, sharedOptions);
|
|
769
692
|
|
|
770
693
|
// Step 4: Validate results
|
|
771
694
|
processedResults = validateScanResults(processedResults, options);
|
|
772
695
|
|
|
773
696
|
if (forceDebug) {
|
|
774
|
-
|
|
775
|
-
|
|
697
|
+
let totalRules = 0;
|
|
698
|
+
for (let i = 0; i < processedResults.length; i++) {
|
|
699
|
+
totalRules += processedResults[i].rules ? processedResults[i].rules.length : 0;
|
|
700
|
+
}
|
|
701
|
+
console.log(formatLogMessage('debug', '[post-processing] Completed: ' + totalRules + ' total rules remaining'));
|
|
776
702
|
}
|
|
777
703
|
|
|
778
704
|
return processedResults;
|
|
@@ -785,4 +711,4 @@ module.exports = {
|
|
|
785
711
|
extractDomainFromRule,
|
|
786
712
|
validateScanResults,
|
|
787
713
|
processResults
|
|
788
|
-
};
|
|
714
|
+
};
|