@fanboynz/network-scanner 1.0.67 → 1.0.69
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/post-processing.js +607 -0
- package/nwss.js +144 -51
- package/package.json +1 -1
- package/regex-tool/index.html +713 -0
|
@@ -0,0 +1,607 @@
|
|
|
1
|
+
// === Post-Processing Module for Network Scanner ===
|
|
2
|
+
// Handles cleanup and validation of scan results after scanning is complete
|
|
3
|
+
|
|
4
|
+
const { formatLogMessage, messageColors } = require('./colorize');
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Safely extracts hostname from a URL, handling malformed URLs gracefully
|
|
8
|
+
* @param {string} url - The URL string to parse
|
|
9
|
+
* @param {boolean} getFullHostname - If true, returns full hostname; if false, returns root domain
|
|
10
|
+
* @returns {string} The hostname/domain, or empty string if URL is invalid
|
|
11
|
+
*/
|
|
12
|
+
function safeGetDomain(url, getFullHostname = false) {
|
|
13
|
+
try {
|
|
14
|
+
const psl = require('psl');
|
|
15
|
+
const parsedUrl = new URL(url);
|
|
16
|
+
if (getFullHostname) {
|
|
17
|
+
return parsedUrl.hostname;
|
|
18
|
+
} else {
|
|
19
|
+
// Extract root domain using psl library
|
|
20
|
+
const parsed = psl.parse(parsedUrl.hostname);
|
|
21
|
+
return parsed.domain || parsedUrl.hostname;
|
|
22
|
+
}
|
|
23
|
+
} catch (urlError) {
|
|
24
|
+
return '';
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Enhanced domain matching for ignoreDomains patterns (including wildcards)
|
|
30
|
+
* @param {string} domain - Domain to check
|
|
31
|
+
* @param {Array} ignorePatterns - Array of ignore patterns (supports wildcards)
|
|
32
|
+
* @param {boolean} forceDebug - Debug logging flag
|
|
33
|
+
* @returns {Object} Match result with shouldIgnore flag and reason
|
|
34
|
+
*/
|
|
35
|
+
function shouldIgnoreAsIgnoreDomain(domain, ignorePatterns, forceDebug) {
|
|
36
|
+
if (!domain || !ignorePatterns || ignorePatterns.length === 0) {
|
|
37
|
+
return { shouldIgnore: false, reason: 'No ignore patterns' };
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
for (const pattern of ignorePatterns) {
|
|
41
|
+
if (pattern.includes('*')) {
|
|
42
|
+
// Handle wildcard patterns
|
|
43
|
+
if (pattern.startsWith('*.')) {
|
|
44
|
+
// Pattern: *.example.com
|
|
45
|
+
const wildcardDomain = pattern.substring(2); // Remove "*."
|
|
46
|
+
const wildcardRoot = safeGetDomain(`http://${wildcardDomain}`, false);
|
|
47
|
+
const domainRoot = safeGetDomain(`http://${domain}`, false);
|
|
48
|
+
|
|
49
|
+
if (wildcardRoot === domainRoot) {
|
|
50
|
+
return {
|
|
51
|
+
shouldIgnore: true,
|
|
52
|
+
reason: `Matches wildcard ignore pattern: ${pattern}`
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
} else if (pattern.endsWith('.*')) {
|
|
56
|
+
// Pattern: example.*
|
|
57
|
+
const baseDomain = pattern.slice(0, -2); // Remove ".*"
|
|
58
|
+
if (domain.startsWith(baseDomain + '.')) {
|
|
59
|
+
return {
|
|
60
|
+
shouldIgnore: true,
|
|
61
|
+
reason: `Matches wildcard TLD ignore pattern: ${pattern}`
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
} else {
|
|
65
|
+
// Complex wildcard pattern
|
|
66
|
+
const wildcardRegex = new RegExp('^' + pattern.replace(/\*/g, '.*').replace(/\./g, '\\.') + '$');
|
|
67
|
+
if (wildcardRegex.test(domain)) {
|
|
68
|
+
return {
|
|
69
|
+
shouldIgnore: true,
|
|
70
|
+
reason: `Matches complex wildcard ignore pattern: ${pattern}`
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
} else {
|
|
75
|
+
// Exact pattern matching
|
|
76
|
+
if (domain === pattern || domain.endsWith('.' + pattern)) {
|
|
77
|
+
return {
|
|
78
|
+
shouldIgnore: true,
|
|
79
|
+
reason: `Matches exact ignore pattern: ${pattern}`
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
return { shouldIgnore: false, reason: 'No ignore pattern matches' };
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Enhanced domain matching that handles wildcards and first-party detection
|
|
90
|
+
* @param {string} extractedDomain - Domain extracted from rule
|
|
91
|
+
* @param {string} scannedRootDomain - Root domain of the scanned site
|
|
92
|
+
* @param {boolean} forceDebug - Debug logging flag
|
|
93
|
+
* @returns {Object} Match result with shouldRemove flag and reason
|
|
94
|
+
*/
|
|
95
|
+
function shouldRemoveAsFirstParty(extractedDomain, scannedRootDomain, forceDebug) {
|
|
96
|
+
if (!extractedDomain || !scannedRootDomain) {
|
|
97
|
+
return { shouldRemove: false, reason: 'Missing domain data' };
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Handle wildcard patterns
|
|
101
|
+
if (extractedDomain.includes('*')) {
|
|
102
|
+
// Common wildcard patterns
|
|
103
|
+
if (extractedDomain.startsWith('*.')) {
|
|
104
|
+
// Pattern: *.example.com
|
|
105
|
+
const wildcardDomain = extractedDomain.substring(2); // Remove "*."
|
|
106
|
+
const wildcardRoot = safeGetDomain(`http://${wildcardDomain}`, false);
|
|
107
|
+
|
|
108
|
+
if (wildcardRoot === scannedRootDomain) {
|
|
109
|
+
return {
|
|
110
|
+
shouldRemove: true,
|
|
111
|
+
reason: `Wildcard subdomain pattern matches root domain (*.${wildcardRoot})`
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
} else if (extractedDomain.endsWith('.*')) {
|
|
115
|
+
// Pattern: example.*
|
|
116
|
+
const baseDomain = extractedDomain.slice(0, -2); // Remove ".*"
|
|
117
|
+
if (scannedRootDomain.startsWith(baseDomain + '.')) {
|
|
118
|
+
return {
|
|
119
|
+
shouldRemove: true,
|
|
120
|
+
reason: `Wildcard TLD pattern matches base domain (${baseDomain}.*)`
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
} else if (extractedDomain.includes('*')) {
|
|
124
|
+
// Pattern: sub*.example.com or other wildcard positions
|
|
125
|
+
const wildcardRegex = new RegExp('^' + extractedDomain.replace(/\*/g, '.*').replace(/\./g, '\\.') + '$');
|
|
126
|
+
if (wildcardRegex.test(scannedRootDomain)) {
|
|
127
|
+
return {
|
|
128
|
+
shouldRemove: true,
|
|
129
|
+
reason: `Complex wildcard pattern matches root domain (${extractedDomain})`
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Standard exact root domain matching
|
|
136
|
+
const extractedRoot = safeGetDomain(`http://${extractedDomain}`, false);
|
|
137
|
+
if (extractedRoot === scannedRootDomain) {
|
|
138
|
+
return {
|
|
139
|
+
shouldRemove: true,
|
|
140
|
+
reason: `Exact root domain match (${extractedRoot})`
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
return { shouldRemove: false, reason: 'No first-party match detected' };
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Post-scan cleanup function to remove ignoreDomains from results
|
|
149
|
+
* This is a final safety net to catch any domains that should have been ignored
|
|
150
|
+
*
|
|
151
|
+
* @param {Array} results - Array of scan results from all sites
|
|
152
|
+
* @param {Array} ignoreDomains - Array of domains/patterns to ignore
|
|
153
|
+
* @param {Object} options - Options object
|
|
154
|
+
* @param {boolean} options.forceDebug - Debug logging flag
|
|
155
|
+
* @param {boolean} options.silentMode - Silent mode flag
|
|
156
|
+
* @returns {Array} Cleaned results with ignoreDomains removed
|
|
157
|
+
*/
|
|
158
|
+
function cleanupIgnoreDomains(results, ignoreDomains, options = {}) {
|
|
159
|
+
const { forceDebug = false, silentMode = false } = options;
|
|
160
|
+
|
|
161
|
+
if (!results || results.length === 0 || !ignoreDomains || ignoreDomains.length === 0) {
|
|
162
|
+
return results;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
if (forceDebug) {
|
|
166
|
+
console.log(formatLogMessage('debug', `[ignoreDomains cleanup] Processing ${results.length} results against ${ignoreDomains.length} ignore patterns`));
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
const cleanedResults = [];
|
|
170
|
+
let totalRulesRemoved = 0;
|
|
171
|
+
let sitesAffected = 0;
|
|
172
|
+
|
|
173
|
+
results.forEach(result => {
|
|
174
|
+
if (!result.rules || result.rules.length === 0) {
|
|
175
|
+
cleanedResults.push(result);
|
|
176
|
+
return;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
const originalRulesCount = result.rules.length;
|
|
180
|
+
const cleanedRules = [];
|
|
181
|
+
const removedRules = [];
|
|
182
|
+
|
|
183
|
+
// Filter out rules that match ignoreDomains patterns
|
|
184
|
+
result.rules.forEach(rule => {
|
|
185
|
+
let extractedDomain = null;
|
|
186
|
+
|
|
187
|
+
try {
|
|
188
|
+
// Extract domain from different rule formats (same logic as first-party cleanup)
|
|
189
|
+
if (rule.startsWith('||') && rule.includes('^')) {
|
|
190
|
+
// ||domain.com^ format (adblock)
|
|
191
|
+
const match = rule.match(/^\|\|([^/\^]+)/);
|
|
192
|
+
if (match) {
|
|
193
|
+
extractedDomain = match[1];
|
|
194
|
+
}
|
|
195
|
+
} else if (rule.startsWith('127.0.0.1 ') || rule.startsWith('0.0.0.0 ')) {
|
|
196
|
+
// hosts file format
|
|
197
|
+
const parts = rule.split(/\s+/);
|
|
198
|
+
if (parts.length >= 2) {
|
|
199
|
+
extractedDomain = parts[1];
|
|
200
|
+
}
|
|
201
|
+
} else if (rule.includes('local=/') && rule.includes('/')) {
|
|
202
|
+
// dnsmasq format: local=/domain.com/
|
|
203
|
+
const match = rule.match(/local=\/([^/]+)\//);
|
|
204
|
+
if (match) {
|
|
205
|
+
extractedDomain = match[1];
|
|
206
|
+
}
|
|
207
|
+
} else if (rule.includes('server=/') && rule.includes('/')) {
|
|
208
|
+
// dnsmasq old format: server=/domain.com/
|
|
209
|
+
const match = rule.match(/server=\/([^/]+)\//);
|
|
210
|
+
if (match) {
|
|
211
|
+
extractedDomain = match[1];
|
|
212
|
+
}
|
|
213
|
+
} else if (rule.includes('local-zone:') && rule.includes('always_null')) {
|
|
214
|
+
// unbound format: local-zone: "domain.com." always_null
|
|
215
|
+
const match = rule.match(/local-zone:\s*"([^"]+)\.?"/);
|
|
216
|
+
if (match) {
|
|
217
|
+
extractedDomain = match[1];
|
|
218
|
+
}
|
|
219
|
+
} else if (rule.includes('+block') && rule.includes('.')) {
|
|
220
|
+
// privoxy format: { +block } .domain.com
|
|
221
|
+
const match = rule.match(/\{\s*\+block\s*\}\s*\.?([^\s]+)/);
|
|
222
|
+
if (match) {
|
|
223
|
+
extractedDomain = match[1];
|
|
224
|
+
}
|
|
225
|
+
} else if (rule.match(/^\(\^\|\\\.\).*\\\.\w+\$$/)) {
|
|
226
|
+
// pi-hole regex format: (^|\.)domain\.com$
|
|
227
|
+
const match = rule.match(/^\(\^\|\\\.\)(.+)\\\.\w+\$$/);
|
|
228
|
+
if (match) {
|
|
229
|
+
// Unescape the domain
|
|
230
|
+
extractedDomain = match[1].replace(/\\\./g, '.');
|
|
231
|
+
}
|
|
232
|
+
} else {
|
|
233
|
+
// Try to extract any domain-like pattern as fallback
|
|
234
|
+
const domainMatch = rule.match(/([a-zA-Z0-9][a-zA-Z0-9.-]*\.[a-zA-Z]{2,})/);
|
|
235
|
+
if (domainMatch) {
|
|
236
|
+
extractedDomain = domainMatch[1];
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
// Check if extracted domain should be ignored
|
|
240
|
+
if (extractedDomain) {
|
|
241
|
+
const ignoreResult = shouldIgnoreAsIgnoreDomain(extractedDomain, ignoreDomains, forceDebug);
|
|
242
|
+
|
|
243
|
+
if (ignoreResult.shouldIgnore) {
|
|
244
|
+
removedRules.push({
|
|
245
|
+
rule: rule,
|
|
246
|
+
domain: extractedDomain,
|
|
247
|
+
reason: `ignoreDomains: ${ignoreResult.reason}`,
|
|
248
|
+
matchType: ignoreResult.reason.includes('wildcard') ? 'wildcard' : 'exact'
|
|
249
|
+
});
|
|
250
|
+
return; // Exit early - rule should be removed
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
} catch (parseErr) {
|
|
254
|
+
if (forceDebug) {
|
|
255
|
+
console.log(formatLogMessage('debug', `[ignoreDomains cleanup] Failed to parse rule: ${rule} - ${parseErr.message}`));
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
// If we reach here, the rule should be kept
|
|
260
|
+
cleanedRules.push(rule);
|
|
261
|
+
});
|
|
262
|
+
|
|
263
|
+
cleanedResults.push({ ...result, rules: cleanedRules });
|
|
264
|
+
|
|
265
|
+
if (removedRules.length > 0) {
|
|
266
|
+
sitesAffected++;
|
|
267
|
+
totalRulesRemoved += removedRules.length;
|
|
268
|
+
|
|
269
|
+
if (!silentMode) {
|
|
270
|
+
const wildcardCount = removedRules.filter(r => r.matchType === 'wildcard').length;
|
|
271
|
+
const exactCount = removedRules.filter(r => r.matchType === 'exact').length;
|
|
272
|
+
|
|
273
|
+
let cleanupMessage = `?? Removed ${removedRules.length} ignoreDomains rule(s) from ${safeGetDomain(result.url)} (final cleanup)`;
|
|
274
|
+
if (wildcardCount > 0) {
|
|
275
|
+
cleanupMessage += ` [${wildcardCount} wildcard, ${exactCount} exact]`;
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
if (messageColors && messageColors.cleanup) {
|
|
279
|
+
console.log(messageColors.cleanup(cleanupMessage));
|
|
280
|
+
} else {
|
|
281
|
+
console.log(cleanupMessage);
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
if (forceDebug) {
|
|
285
|
+
console.log(formatLogMessage('debug', `[ignoreDomains cleanup] Removed rules from ${result.url}:`));
|
|
286
|
+
removedRules.forEach((removed, idx) => {
|
|
287
|
+
console.log(formatLogMessage('debug', ` [${idx + 1}] ${removed.rule} (${removed.reason}) [${removed.matchType}]`));
|
|
288
|
+
});
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
});
|
|
292
|
+
|
|
293
|
+
// Summary
|
|
294
|
+
if (totalRulesRemoved > 0 && !silentMode) {
|
|
295
|
+
const allRemovedRules = cleanedResults.reduce((acc, result) => {
|
|
296
|
+
if (result.removedIgnoreDomains) {
|
|
297
|
+
acc.push(...result.removedIgnoreDomains);
|
|
298
|
+
}
|
|
299
|
+
return acc;
|
|
300
|
+
}, []);
|
|
301
|
+
|
|
302
|
+
const totalWildcardCount = allRemovedRules.filter(r => r.matchType === 'wildcard').length;
|
|
303
|
+
const totalExactCount = allRemovedRules.filter(r => r.matchType === 'exact').length;
|
|
304
|
+
|
|
305
|
+
const summaryMessage = `\n?? ignoreDomains cleanup completed: Removed ${totalRulesRemoved} rules from ${sitesAffected} site(s)` +
|
|
306
|
+
(totalWildcardCount > 0 ? ` [${totalWildcardCount} wildcard patterns, ${totalExactCount} exact matches]` : '');
|
|
307
|
+
if (messageColors && messageColors.cleanup) {
|
|
308
|
+
console.log(messageColors.cleanup(summaryMessage));
|
|
309
|
+
} else {
|
|
310
|
+
console.log(summaryMessage);
|
|
311
|
+
}
|
|
312
|
+
} else if (forceDebug) {
|
|
313
|
+
console.log(formatLogMessage('debug', '[ignoreDomains cleanup] No ignoreDomains rules found to remove'));
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
return cleanedResults;
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
/**
|
|
320
|
+
* Post-scan cleanup function to remove first-party domains from results
|
|
321
|
+
* Only processes sites that have firstParty: false in their configuration
|
|
322
|
+
*
|
|
323
|
+
* @param {Array} results - Array of scan results from all sites
|
|
324
|
+
* @param {Array} sites - Array of site configurations
|
|
325
|
+
* @param {Object} options - Options object
|
|
326
|
+
* @param {boolean} options.forceDebug - Debug logging flag
|
|
327
|
+
* @param {boolean} options.silentMode - Silent mode flag
|
|
328
|
+
* @returns {Array} Cleaned results with conditional first-party removal
|
|
329
|
+
*/
|
|
330
|
+
function cleanupFirstPartyDomains(results, sites, options = {}) {
|
|
331
|
+
const { forceDebug = false, silentMode = false } = options;
|
|
332
|
+
|
|
333
|
+
if (!results || results.length === 0) {
|
|
334
|
+
return results;
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
// Build mapping of URLs to their site configs
|
|
338
|
+
const urlToSiteConfig = new Map();
|
|
339
|
+
sites.forEach(site => {
|
|
340
|
+
const urls = Array.isArray(site.url) ? site.url : [site.url];
|
|
341
|
+
urls.forEach(url => {
|
|
342
|
+
urlToSiteConfig.set(url, site);
|
|
343
|
+
});
|
|
344
|
+
});
|
|
345
|
+
|
|
346
|
+
const cleanedResults = [];
|
|
347
|
+
let totalRulesRemoved = 0;
|
|
348
|
+
let sitesAffected = 0;
|
|
349
|
+
|
|
350
|
+
results.forEach(result => {
|
|
351
|
+
// Find the site config for this result
|
|
352
|
+
const siteConfig = urlToSiteConfig.get(result.url);
|
|
353
|
+
|
|
354
|
+
// Only clean if firstParty is explicitly set to false
|
|
355
|
+
const shouldCleanFirstParty = siteConfig && siteConfig.firstParty === false;
|
|
356
|
+
|
|
357
|
+
if (!shouldCleanFirstParty || !result.rules || result.rules.length === 0) {
|
|
358
|
+
cleanedResults.push(result);
|
|
359
|
+
return;
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
if (forceDebug) {
|
|
363
|
+
console.log(formatLogMessage('debug', `[cleanup] Processing ${result.url} (firstParty: false detected)`));
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
// Get the scanned domain for this specific result
|
|
367
|
+
const scannedDomain = safeGetDomain(result.url, false);
|
|
368
|
+
if (!scannedDomain) {
|
|
369
|
+
cleanedResults.push(result);
|
|
370
|
+
return;
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
const originalRulesCount = result.rules.length;
|
|
374
|
+
const cleanedRules = [];
|
|
375
|
+
const removedRules = [];
|
|
376
|
+
|
|
377
|
+
// Filter out rules that match the scanned domain
|
|
378
|
+
result.rules.forEach(rule => {
|
|
379
|
+
let shouldRemove = false;
|
|
380
|
+
let extractedDomain = null;
|
|
381
|
+
|
|
382
|
+
try {
|
|
383
|
+
// Extract domain from different rule formats
|
|
384
|
+
if (rule.startsWith('||') && rule.includes('^')) {
|
|
385
|
+
// ||domain.com^ format (adblock)
|
|
386
|
+
const match = rule.match(/^\|\|([^/\^]+)/);
|
|
387
|
+
if (match) {
|
|
388
|
+
extractedDomain = match[1];
|
|
389
|
+
}
|
|
390
|
+
} else if (rule.startsWith('127.0.0.1 ') || rule.startsWith('0.0.0.0 ')) {
|
|
391
|
+
// hosts file format
|
|
392
|
+
const parts = rule.split(/\s+/);
|
|
393
|
+
if (parts.length >= 2) {
|
|
394
|
+
extractedDomain = parts[1];
|
|
395
|
+
}
|
|
396
|
+
} else if (rule.includes('local=/') && rule.includes('/')) {
|
|
397
|
+
// dnsmasq format: local=/domain.com/
|
|
398
|
+
const match = rule.match(/local=\/([^/]+)\//);
|
|
399
|
+
if (match) {
|
|
400
|
+
extractedDomain = match[1];
|
|
401
|
+
}
|
|
402
|
+
} else if (rule.includes('server=/') && rule.includes('/')) {
|
|
403
|
+
// dnsmasq old format: server=/domain.com/
|
|
404
|
+
const match = rule.match(/server=\/([^/]+)\//);
|
|
405
|
+
if (match) {
|
|
406
|
+
extractedDomain = match[1];
|
|
407
|
+
}
|
|
408
|
+
} else if (rule.includes('local-zone:') && rule.includes('always_null')) {
|
|
409
|
+
// unbound format: local-zone: "domain.com." always_null
|
|
410
|
+
const match = rule.match(/local-zone:\s*"([^"]+)\.?"/);
|
|
411
|
+
if (match) {
|
|
412
|
+
extractedDomain = match[1];
|
|
413
|
+
}
|
|
414
|
+
} else if (rule.includes('+block') && rule.includes('.')) {
|
|
415
|
+
// privoxy format: { +block } .domain.com
|
|
416
|
+
const match = rule.match(/\{\s*\+block\s*\}\s*\.?([^\s]+)/);
|
|
417
|
+
if (match) {
|
|
418
|
+
extractedDomain = match[1];
|
|
419
|
+
}
|
|
420
|
+
} else if (rule.match(/^\(\^\|\\\.\).*\\\.\w+\$$/)) {
|
|
421
|
+
// pi-hole regex format: (^|\.)domain\.com$
|
|
422
|
+
const match = rule.match(/^\(\^\|\\\.\)(.+)\\\.\w+\$$/);
|
|
423
|
+
if (match) {
|
|
424
|
+
// Unescape the domain
|
|
425
|
+
extractedDomain = match[1].replace(/\\\./g, '.');
|
|
426
|
+
}
|
|
427
|
+
} else {
|
|
428
|
+
// Try to extract any domain-like pattern as fallback
|
|
429
|
+
const domainMatch = rule.match(/([a-zA-Z0-9][a-zA-Z0-9.-]*\.[a-zA-Z]{2,})/);
|
|
430
|
+
if (domainMatch) {
|
|
431
|
+
extractedDomain = domainMatch[1];
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
// Check if extracted domain is a first-party domain
|
|
436
|
+
if (extractedDomain) {
|
|
437
|
+
const matchResult = shouldRemoveAsFirstParty(extractedDomain, scannedDomain, forceDebug);
|
|
438
|
+
|
|
439
|
+
if (matchResult.shouldRemove) {
|
|
440
|
+
removedRules.push({
|
|
441
|
+
rule: rule,
|
|
442
|
+
domain: extractedDomain,
|
|
443
|
+
rootDomain: scannedDomain,
|
|
444
|
+
reason: `First-party: ${matchResult.reason} (firstParty: false)`,
|
|
445
|
+
matchType: matchResult.reason.includes('Wildcard') ? 'wildcard' : 'exact'
|
|
446
|
+
});
|
|
447
|
+
return; // Exit early - rule should be removed
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
} catch (parseErr) {
|
|
451
|
+
if (forceDebug) {
|
|
452
|
+
console.log(formatLogMessage('debug', `[cleanup] Failed to parse rule: ${rule} - ${parseErr.message}`));
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
// If we reach here, the rule should be kept
|
|
457
|
+
cleanedRules.push(rule);
|
|
458
|
+
});
|
|
459
|
+
|
|
460
|
+
cleanedResults.push({ ...result, rules: cleanedRules });
|
|
461
|
+
|
|
462
|
+
if (removedRules.length > 0) {
|
|
463
|
+
sitesAffected++;
|
|
464
|
+
totalRulesRemoved += removedRules.length;
|
|
465
|
+
|
|
466
|
+
if (!silentMode) {
|
|
467
|
+
const wildcardCount = removedRules.filter(r => r.matchType === 'wildcard').length;
|
|
468
|
+
const exactCount = removedRules.filter(r => r.matchType === 'exact').length;
|
|
469
|
+
|
|
470
|
+
let cleanupMessage = `?? Cleaned ${removedRules.length} first-party rule(s) from ${scannedDomain} (firstParty: false)`;
|
|
471
|
+
if (wildcardCount > 0) {
|
|
472
|
+
cleanupMessage += ` [${wildcardCount} wildcard, ${exactCount} exact]`;
|
|
473
|
+
}
|
|
474
|
+
if (messageColors && messageColors.cleanup) {
|
|
475
|
+
console.log(messageColors.cleanup(cleanupMessage));
|
|
476
|
+
} else {
|
|
477
|
+
console.log(cleanupMessage);
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
if (forceDebug) {
|
|
482
|
+
console.log(formatLogMessage('debug', `[cleanup] Removed rules from ${result.url}:`));
|
|
483
|
+
removedRules.forEach((removed, idx) => {
|
|
484
|
+
console.log(formatLogMessage('debug', ` [${idx + 1}] ${removed.rule} (${removed.reason}) [${removed.matchType}]`));
|
|
485
|
+
});
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
});
|
|
489
|
+
|
|
490
|
+
// Summary
|
|
491
|
+
if (totalRulesRemoved > 0 && !silentMode) {
|
|
492
|
+
const summaryMessage = `\n?? First-party cleanup completed: Removed ${totalRulesRemoved} rules from ${sitesAffected} site(s) with firstParty: false`;
|
|
493
|
+
if (messageColors && messageColors.cleanup) {
|
|
494
|
+
console.log(messageColors.cleanup(summaryMessage));
|
|
495
|
+
} else {
|
|
496
|
+
console.log(summaryMessage);
|
|
497
|
+
}
|
|
498
|
+
} else if (forceDebug) {
|
|
499
|
+
console.log(formatLogMessage('debug', '[cleanup] No first-party rules found to remove'));
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
return cleanedResults;
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
/**
|
|
506
|
+
* Validates scan results and removes any obvious false positives
|
|
507
|
+
*
|
|
508
|
+
* @param {Array} results - Array of scan results
|
|
509
|
+
* @param {Object} options - Options object
|
|
510
|
+
* @param {boolean} options.forceDebug - Debug logging flag
|
|
511
|
+
* @param {Array} options.ignoreDomains - Domains to ignore
|
|
512
|
+
* @returns {Array} Validated results
|
|
513
|
+
*/
|
|
514
|
+
function validateScanResults(results, options = {}) {
|
|
515
|
+
const { forceDebug = false, ignoreDomains = [] } = options;
|
|
516
|
+
|
|
517
|
+
if (!results || results.length === 0) {
|
|
518
|
+
return results;
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
let totalValidated = 0;
|
|
522
|
+
let totalRemoved = 0;
|
|
523
|
+
|
|
524
|
+
const validatedResults = results.map(result => {
|
|
525
|
+
if (!result.rules || result.rules.length === 0) {
|
|
526
|
+
return result;
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
const originalCount = result.rules.length;
|
|
530
|
+
const validRules = result.rules.filter(rule => {
|
|
531
|
+
// Basic validation - ensure rule isn't empty or malformed
|
|
532
|
+
if (!rule || typeof rule !== 'string' || rule.trim().length === 0) {
|
|
533
|
+
if (forceDebug) {
|
|
534
|
+
console.log(formatLogMessage('debug', `[validation] Removed empty/invalid rule`));
|
|
535
|
+
}
|
|
536
|
+
totalRemoved++;
|
|
537
|
+
return false;
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
// Check against ignore domains if provided
|
|
541
|
+
if (ignoreDomains.length > 0) {
|
|
542
|
+
for (const ignorePattern of ignoreDomains) {
|
|
543
|
+
if (rule.includes(ignorePattern.replace('*', ''))) {
|
|
544
|
+
if (forceDebug) {
|
|
545
|
+
console.log(formatLogMessage('debug', `[validation] Removed rule matching ignore pattern: ${ignorePattern}`));
|
|
546
|
+
}
|
|
547
|
+
totalRemoved++;
|
|
548
|
+
return false;
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
return true;
|
|
554
|
+
});
|
|
555
|
+
|
|
556
|
+
totalValidated += originalCount;
|
|
557
|
+
return { ...result, rules: validRules };
|
|
558
|
+
});
|
|
559
|
+
|
|
560
|
+
if (forceDebug && totalRemoved > 0) {
|
|
561
|
+
console.log(formatLogMessage('debug', `[validation] Validated ${totalValidated} rules, removed ${totalRemoved} invalid rules`));
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
return validatedResults;
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
/**
|
|
568
|
+
* Main post-processing function that runs all cleanup and validation steps
|
|
569
|
+
*
|
|
570
|
+
* @param {Array} results - Array of scan results from all sites
|
|
571
|
+
* @param {Array} sites - Array of site configurations
|
|
572
|
+
* @param {Object} options - Options object
|
|
573
|
+
* @param {boolean} options.forceDebug - Debug logging flag
|
|
574
|
+
* @param {boolean} options.silentMode - Silent mode flag
|
|
575
|
+
* @param {Array} options.ignoreDomains - Domains to ignore during validation
|
|
576
|
+
* @returns {Array} Fully processed and cleaned results
|
|
577
|
+
*/
|
|
578
|
+
function processResults(results, sites, options = {}) {
|
|
579
|
+
const { forceDebug = false, silentMode = false } = options;
|
|
580
|
+
|
|
581
|
+
if (forceDebug) {
|
|
582
|
+
console.log(formatLogMessage('debug', `[post-processing] Starting post-processing of ${results.length} results`));
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
// Step 1: Clean up first-party domains
|
|
586
|
+
let processedResults = cleanupFirstPartyDomains(results, sites, options);
|
|
587
|
+
|
|
588
|
+
// Step 2: Clean up ignoreDomains (final safety net)
|
|
589
|
+
processedResults = cleanupIgnoreDomains(processedResults, options.ignoreDomains || [], options);
|
|
590
|
+
|
|
591
|
+
// Step 3: Validate results
|
|
592
|
+
processedResults = validateScanResults(processedResults, options);
|
|
593
|
+
|
|
594
|
+
if (forceDebug) {
|
|
595
|
+
const totalRules = processedResults.reduce((sum, r) => sum + (r.rules ? r.rules.length : 0), 0);
|
|
596
|
+
console.log(formatLogMessage('debug', `[post-processing] Completed: ${totalRules} total rules remaining`));
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
return processedResults;
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
module.exports = {
|
|
603
|
+
cleanupFirstPartyDomains,
|
|
604
|
+
cleanupIgnoreDomains,
|
|
605
|
+
validateScanResults,
|
|
606
|
+
processResults
|
|
607
|
+
};
|