@fanboynz/network-scanner 2.0.55 → 2.0.57

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,40 @@
2
2
  // Handles cleanup and validation of scan results after scanning is complete
3
3
 
4
4
  const { formatLogMessage, messageColors } = require('./colorize');
5
+ const psl = require('psl');
6
+
7
+ // Precompiled regex patterns (avoids recompilation per rule)
8
+ const REGEX_ADBLOCK = /^\|\|([^/\^]+)/;
9
+ const REGEX_DNSMASQ_LOCAL = /local=\/([^/]+)\//;
10
+ const REGEX_DNSMASQ_SERVER = /server=\/([^/]+)\//;
11
+ const REGEX_UNBOUND = /local-zone:\s*"([^"]+)\.?"/;
12
+ const REGEX_PRIVOXY = /\{\s*\+block\s*\}\s*\.?([^\s]+)/;
13
+ const REGEX_PIHOLE = /^\(\^\|\\\.\)(.+)\\\.\w+\$$/;
14
+ const REGEX_DOMAIN_FALLBACK = /([a-zA-Z0-9][a-zA-Z0-9.-]*\.[a-zA-Z]{2,})/;
15
+ const REGEX_WHITESPACE = /\s+/;
16
+ const REGEX_UNESCAPE_DOT = /\\\./g;
17
+
18
+ // Cache for compiled wildcard regex patterns
19
+ const wildcardRegexCache = new Map();
20
+
21
+ /**
22
+ * Get or compile a wildcard pattern regex (cached)
23
+ * @param {string} pattern - Wildcard pattern string
24
+ * @returns {RegExp} Compiled regex
25
+ */
26
+ function getWildcardRegex(pattern) {
27
+ let regex = wildcardRegexCache.get(pattern);
28
+ if (!regex) {
29
+ regex = new RegExp('^' + pattern.replace(/\./g, '\\.').replace(/\*/g, '.*') + '$');
30
+ wildcardRegexCache.set(pattern, regex);
31
+ // Cap cache size
32
+ if (wildcardRegexCache.size > 200) {
33
+ const firstKey = wildcardRegexCache.keys().next().value;
34
+ wildcardRegexCache.delete(firstKey);
35
+ }
36
+ }
37
+ return regex;
38
+ }
5
39
 
6
40
  /**
7
41
  * Safely extracts hostname from a URL, handling malformed URLs gracefully
@@ -11,20 +45,77 @@ const { formatLogMessage, messageColors } = require('./colorize');
11
45
  */
12
46
  function safeGetDomain(url, getFullHostname = false) {
13
47
  try {
14
- const psl = require('psl');
15
48
  const parsedUrl = new URL(url);
16
49
  if (getFullHostname) {
17
50
  return parsedUrl.hostname;
18
- } else {
19
- // Extract root domain using psl library
20
- const parsed = psl.parse(parsedUrl.hostname);
21
- return parsed.domain || parsedUrl.hostname;
22
51
  }
52
+ const parsed = psl.parse(parsedUrl.hostname);
53
+ return parsed.domain || parsedUrl.hostname;
23
54
  } catch (urlError) {
24
55
  return '';
25
56
  }
26
57
  }
27
58
 
59
+ /**
60
+ * Enhanced domain extraction helper - single source of truth for all rule formats
61
+ * (Was duplicated inline in cleanupIgnoreDomains and cleanupFirstPartyDomains)
62
+ * @param {string} rule - Rule string in various formats
63
+ * @returns {string|null} Extracted domain or null if not found
64
+ */
65
+ function extractDomainFromRule(rule) {
66
+ if (!rule || typeof rule !== 'string') {
67
+ return null;
68
+ }
69
+
70
+ // Adblock: ||domain.com^
71
+ if (rule.charCodeAt(0) === 124 && rule.charCodeAt(1) === 124 && rule.includes('^')) { // '||' + '^'
72
+ const match = REGEX_ADBLOCK.exec(rule);
73
+ return match ? match[1] : null;
74
+ }
75
+
76
+ // Hosts file: 127.0.0.1 domain / 0.0.0.0 domain
77
+ if (rule.charCodeAt(0) === 49 || rule.charCodeAt(0) === 48) { // '1' or '0'
78
+ if (rule.startsWith('127.0.0.1 ') || rule.startsWith('0.0.0.0 ')) {
79
+ const parts = rule.split(REGEX_WHITESPACE);
80
+ return parts.length >= 2 ? parts[1] : null;
81
+ }
82
+ }
83
+
84
+ // dnsmasq: local=/domain.com/
85
+ if (rule.includes('local=/')) {
86
+ const match = REGEX_DNSMASQ_LOCAL.exec(rule);
87
+ return match ? match[1] : null;
88
+ }
89
+
90
+ // dnsmasq old: server=/domain.com/
91
+ if (rule.includes('server=/')) {
92
+ const match = REGEX_DNSMASQ_SERVER.exec(rule);
93
+ return match ? match[1] : null;
94
+ }
95
+
96
+ // Unbound: local-zone: "domain.com." always_null
97
+ if (rule.includes('local-zone:') && rule.includes('always_null')) {
98
+ const match = REGEX_UNBOUND.exec(rule);
99
+ return match ? match[1] : null;
100
+ }
101
+
102
+ // Privoxy: { +block } .domain.com
103
+ if (rule.includes('+block') && rule.includes('.')) {
104
+ const match = REGEX_PRIVOXY.exec(rule);
105
+ return match ? match[1] : null;
106
+ }
107
+
108
+ // Pi-hole regex: (^|\.)domain\.com$ -- single match (was tested then matched separately)
109
+ if (rule.charCodeAt(0) === 40) { // '('
110
+ const match = REGEX_PIHOLE.exec(rule);
111
+ return match ? match[1].replace(REGEX_UNESCAPE_DOT, '.') : null;
112
+ }
113
+
114
+ // Fallback: any domain-like pattern
115
+ const domainMatch = REGEX_DOMAIN_FALLBACK.exec(rule);
116
+ return domainMatch ? domainMatch[1] : null;
117
+ }
118
+
28
119
  /**
29
120
  * Enhanced domain matching for ignoreDomains patterns (including wildcards)
30
121
  * @param {string} domain - Domain to check
@@ -37,47 +128,35 @@ function shouldIgnoreAsIgnoreDomain(domain, ignorePatterns, forceDebug) {
37
128
  return { shouldIgnore: false, reason: 'No ignore patterns' };
38
129
  }
39
130
 
40
- for (const pattern of ignorePatterns) {
131
+ for (let i = 0; i < ignorePatterns.length; i++) {
132
+ const pattern = ignorePatterns[i];
41
133
  if (pattern.includes('*')) {
42
- // Handle wildcard patterns
43
134
  if (pattern.startsWith('*.')) {
44
135
  // Pattern: *.example.com
45
- const wildcardDomain = pattern.substring(2); // Remove "*."
46
- const wildcardRoot = safeGetDomain(`http://${wildcardDomain}`, false);
47
- const domainRoot = safeGetDomain(`http://${domain}`, false);
136
+ const wildcardDomain = pattern.substring(2);
137
+ const wildcardRoot = safeGetDomain('http://' + wildcardDomain, false);
138
+ const domainRoot = safeGetDomain('http://' + domain, false);
48
139
 
49
140
  if (wildcardRoot === domainRoot) {
50
- return {
51
- shouldIgnore: true,
52
- reason: `Matches wildcard ignore pattern: ${pattern}`
53
- };
141
+ return { shouldIgnore: true, reason: 'Matches wildcard ignore pattern: ' + pattern };
54
142
  }
55
143
  } else if (pattern.endsWith('.*')) {
56
144
  // Pattern: example.*
57
- const baseDomain = pattern.slice(0, -2); // Remove ".*"
145
+ const baseDomain = pattern.slice(0, -2);
58
146
  if (domain.startsWith(baseDomain + '.')) {
59
- return {
60
- shouldIgnore: true,
61
- reason: `Matches wildcard TLD ignore pattern: ${pattern}`
62
- };
147
+ return { shouldIgnore: true, reason: 'Matches wildcard TLD ignore pattern: ' + pattern };
63
148
  }
64
149
  } else {
65
- // Complex wildcard pattern
66
- const wildcardRegex = new RegExp('^' + pattern.replace(/\*/g, '.*').replace(/\./g, '\\.') + '$');
150
+ // Complex wildcard -- use cached regex
151
+ const wildcardRegex = getWildcardRegex(pattern);
67
152
  if (wildcardRegex.test(domain)) {
68
- return {
69
- shouldIgnore: true,
70
- reason: `Matches complex wildcard ignore pattern: ${pattern}`
71
- };
153
+ return { shouldIgnore: true, reason: 'Matches complex wildcard ignore pattern: ' + pattern };
72
154
  }
73
155
  }
74
156
  } else {
75
157
  // Exact pattern matching
76
158
  if (domain === pattern || domain.endsWith('.' + pattern)) {
77
- return {
78
- shouldIgnore: true,
79
- reason: `Matches exact ignore pattern: ${pattern}`
80
- };
159
+ return { shouldIgnore: true, reason: 'Matches exact ignore pattern: ' + pattern };
81
160
  }
82
161
  }
83
162
  }
@@ -97,53 +176,54 @@ function shouldRemoveAsFirstParty(extractedDomain, scannedRootDomain, forceDebug
97
176
  return { shouldRemove: false, reason: 'Missing domain data' };
98
177
  }
99
178
 
100
- // Handle wildcard patterns
101
179
  if (extractedDomain.includes('*')) {
102
- // Common wildcard patterns
103
180
  if (extractedDomain.startsWith('*.')) {
104
- // Pattern: *.example.com
105
- const wildcardDomain = extractedDomain.substring(2); // Remove "*."
106
- const wildcardRoot = safeGetDomain(`http://${wildcardDomain}`, false);
181
+ const wildcardDomain = extractedDomain.substring(2);
182
+ const wildcardRoot = safeGetDomain('http://' + wildcardDomain, false);
107
183
 
108
184
  if (wildcardRoot === scannedRootDomain) {
109
- return {
110
- shouldRemove: true,
111
- reason: `Wildcard subdomain pattern matches root domain (*.${wildcardRoot})`
112
- };
185
+ return { shouldRemove: true, reason: 'Wildcard subdomain pattern matches root domain (*.' + wildcardRoot + ')' };
113
186
  }
114
187
  } else if (extractedDomain.endsWith('.*')) {
115
- // Pattern: example.*
116
- const baseDomain = extractedDomain.slice(0, -2); // Remove ".*"
188
+ const baseDomain = extractedDomain.slice(0, -2);
117
189
  if (scannedRootDomain.startsWith(baseDomain + '.')) {
118
- return {
119
- shouldRemove: true,
120
- reason: `Wildcard TLD pattern matches base domain (${baseDomain}.*)`
121
- };
190
+ return { shouldRemove: true, reason: 'Wildcard TLD pattern matches base domain (' + baseDomain + '.*)' };
122
191
  }
123
- } else if (extractedDomain.includes('*')) {
124
- // Pattern: sub*.example.com or other wildcard positions
125
- const wildcardRegex = new RegExp('^' + extractedDomain.replace(/\*/g, '.*').replace(/\./g, '\\.') + '$');
192
+ } else {
193
+ // Complex wildcard -- use cached regex
194
+ const wildcardRegex = getWildcardRegex(extractedDomain);
126
195
  if (wildcardRegex.test(scannedRootDomain)) {
127
- return {
128
- shouldRemove: true,
129
- reason: `Complex wildcard pattern matches root domain (${extractedDomain})`
130
- };
196
+ return { shouldRemove: true, reason: 'Complex wildcard pattern matches root domain (' + extractedDomain + ')' };
131
197
  }
132
198
  }
133
199
  }
134
200
 
135
201
  // Standard exact root domain matching
136
- const extractedRoot = safeGetDomain(`http://${extractedDomain}`, false);
202
+ const extractedRoot = safeGetDomain('http://' + extractedDomain, false);
137
203
  if (extractedRoot === scannedRootDomain) {
138
- return {
139
- shouldRemove: true,
140
- reason: `Exact root domain match (${extractedRoot})`
141
- };
204
+ return { shouldRemove: true, reason: 'Exact root domain match (' + extractedRoot + ')' };
142
205
  }
143
206
 
144
207
  return { shouldRemove: false, reason: 'No first-party match detected' };
145
208
  }
146
209
 
210
+ /**
211
+ * Build URL-to-site-config mapping (shared between cleanup functions)
212
+ * @param {Array} sites - Array of site configurations
213
+ * @returns {Map} URL to site config mapping
214
+ */
215
+ function buildUrlToSiteConfig(sites) {
216
+ const map = new Map();
217
+ for (let i = 0; i < sites.length; i++) {
218
+ const site = sites[i];
219
+ const urls = Array.isArray(site.url) ? site.url : [site.url];
220
+ for (let j = 0; j < urls.length; j++) {
221
+ map.set(urls[j], site);
222
+ }
223
+ }
224
+ return map;
225
+ }
226
+
147
227
  /**
148
228
  * Post-scan cleanup function to remove ignoreDomains from results
149
229
  * This is a final safety net to catch any domains that should have been ignored
@@ -163,80 +243,31 @@ function cleanupIgnoreDomains(results, ignoreDomains, options = {}) {
163
243
  }
164
244
 
165
245
  if (forceDebug) {
166
- console.log(formatLogMessage('debug', `[ignoreDomains cleanup] Processing ${results.length} results against ${ignoreDomains.length} ignore patterns`));
246
+ console.log(formatLogMessage('debug', '[ignoreDomains cleanup] Processing ' + results.length + ' results against ' + ignoreDomains.length + ' ignore patterns'));
167
247
  }
168
248
 
169
249
  const cleanedResults = [];
170
250
  let totalRulesRemoved = 0;
171
251
  let sitesAffected = 0;
172
252
 
173
- results.forEach(result => {
253
+ for (let ri = 0; ri < results.length; ri++) {
254
+ const result = results[ri];
174
255
  if (!result.rules || result.rules.length === 0) {
175
256
  cleanedResults.push(result);
176
- return;
257
+ continue;
177
258
  }
178
259
 
179
- const originalRulesCount = result.rules.length;
180
260
  const cleanedRules = [];
181
261
  const removedRules = [];
182
262
 
183
- // Filter out rules that match ignoreDomains patterns
184
- result.rules.forEach(rule => {
185
- let extractedDomain = null;
263
+ for (let j = 0; j < result.rules.length; j++) {
264
+ const rule = result.rules[j];
265
+ let kept = true;
186
266
 
187
267
  try {
188
- // Extract domain from different rule formats (same logic as first-party cleanup)
189
- if (rule.startsWith('||') && rule.includes('^')) {
190
- // ||domain.com^ format (adblock)
191
- const match = rule.match(/^\|\|([^/\^]+)/);
192
- if (match) {
193
- extractedDomain = match[1];
194
- }
195
- } else if (rule.startsWith('127.0.0.1 ') || rule.startsWith('0.0.0.0 ')) {
196
- // hosts file format
197
- const parts = rule.split(/\s+/);
198
- if (parts.length >= 2) {
199
- extractedDomain = parts[1];
200
- }
201
- } else if (rule.includes('local=/') && rule.includes('/')) {
202
- // dnsmasq format: local=/domain.com/
203
- const match = rule.match(/local=\/([^/]+)\//);
204
- if (match) {
205
- extractedDomain = match[1];
206
- }
207
- } else if (rule.includes('server=/') && rule.includes('/')) {
208
- // dnsmasq old format: server=/domain.com/
209
- const match = rule.match(/server=\/([^/]+)\//);
210
- if (match) {
211
- extractedDomain = match[1];
212
- }
213
- } else if (rule.includes('local-zone:') && rule.includes('always_null')) {
214
- // unbound format: local-zone: "domain.com." always_null
215
- const match = rule.match(/local-zone:\s*"([^"]+)\.?"/);
216
- if (match) {
217
- extractedDomain = match[1];
218
- }
219
- } else if (rule.includes('+block') && rule.includes('.')) {
220
- // privoxy format: { +block } .domain.com
221
- const match = rule.match(/\{\s*\+block\s*\}\s*\.?([^\s]+)/);
222
- if (match) {
223
- extractedDomain = match[1];
224
- }
225
- } else if (rule.match(/^\(\^\|\\\.\).*\\\.\w+\$$/)) {
226
- // pi-hole regex format: (^|\.)domain\.com$
227
- const match = rule.match(/^\(\^\|\\\.\)(.+)\\\.\w+\$$/);
228
- if (match) {
229
- // Unescape the domain
230
- extractedDomain = match[1].replace(/\\\./g, '.');
231
- }
232
- } else {
233
- // Try to extract any domain-like pattern as fallback
234
- const domainMatch = rule.match(/([a-zA-Z0-9][a-zA-Z0-9.-]*\.[a-zA-Z]{2,})/);
235
- if (domainMatch) {
236
- extractedDomain = domainMatch[1];
237
- }
238
- }
239
- // Check if extracted domain should be ignored
268
+ // Use shared extractDomainFromRule (was duplicated inline)
269
+ const extractedDomain = extractDomainFromRule(rule);
270
+
240
271
  if (extractedDomain) {
241
272
  const ignoreResult = shouldIgnoreAsIgnoreDomain(extractedDomain, ignoreDomains, forceDebug);
242
273
 
@@ -244,35 +275,42 @@ function cleanupIgnoreDomains(results, ignoreDomains, options = {}) {
244
275
  removedRules.push({
245
276
  rule: rule,
246
277
  domain: extractedDomain,
247
- reason: `ignoreDomains: ${ignoreResult.reason}`,
278
+ reason: 'ignoreDomains: ' + ignoreResult.reason,
248
279
  matchType: ignoreResult.reason.includes('wildcard') ? 'wildcard' : 'exact'
249
280
  });
250
- return; // Exit early - rule should be removed
281
+ kept = false;
251
282
  }
252
283
  }
253
284
  } catch (parseErr) {
254
285
  if (forceDebug) {
255
- console.log(formatLogMessage('debug', `[ignoreDomains cleanup] Failed to parse rule: ${rule} - ${parseErr.message}`));
286
+ console.log(formatLogMessage('debug', '[ignoreDomains cleanup] Failed to parse rule: ' + rule + ' - ' + parseErr.message));
256
287
  }
257
288
  }
258
289
 
259
- // If we reach here, the rule should be kept
260
- cleanedRules.push(rule);
261
- });
290
+ if (kept) {
291
+ cleanedRules.push(rule);
292
+ }
293
+ }
262
294
 
263
- cleanedResults.push({ ...result, rules: cleanedRules });
295
+ // Mutate rules directly instead of spreading entire result object
296
+ result.rules = cleanedRules;
297
+ cleanedResults.push(result);
264
298
 
265
299
  if (removedRules.length > 0) {
266
300
  sitesAffected++;
267
301
  totalRulesRemoved += removedRules.length;
268
302
 
269
303
  if (!silentMode) {
270
- const wildcardCount = removedRules.filter(r => r.matchType === 'wildcard').length;
271
- const exactCount = removedRules.filter(r => r.matchType === 'exact').length;
304
+ // Single-pass count instead of two .filter() calls
305
+ let wildcardCount = 0;
306
+ for (let k = 0; k < removedRules.length; k++) {
307
+ if (removedRules[k].matchType === 'wildcard') wildcardCount++;
308
+ }
309
+ const exactCount = removedRules.length - wildcardCount;
272
310
 
273
- let cleanupMessage = `?? Removed ${removedRules.length} ignoreDomains rule(s) from ${safeGetDomain(result.url)} (final cleanup)`;
311
+ let cleanupMessage = '?? Removed ' + removedRules.length + ' ignoreDomains rule(s) from ' + safeGetDomain(result.url) + ' (final cleanup)';
274
312
  if (wildcardCount > 0) {
275
- cleanupMessage += ` [${wildcardCount} wildcard, ${exactCount} exact]`;
313
+ cleanupMessage += ' [' + wildcardCount + ' wildcard, ' + exactCount + ' exact]';
276
314
  }
277
315
 
278
316
  if (messageColors && messageColors.cleanup) {
@@ -282,28 +320,18 @@ function cleanupIgnoreDomains(results, ignoreDomains, options = {}) {
282
320
  }
283
321
  }
284
322
  if (forceDebug) {
285
- console.log(formatLogMessage('debug', `[ignoreDomains cleanup] Removed rules from ${result.url}:`));
286
- removedRules.forEach((removed, idx) => {
287
- console.log(formatLogMessage('debug', ` [${idx + 1}] ${removed.rule} (${removed.reason}) [${removed.matchType}]`));
288
- });
323
+ console.log(formatLogMessage('debug', '[ignoreDomains cleanup] Removed rules from ' + result.url + ':'));
324
+ for (let k = 0; k < removedRules.length; k++) {
325
+ console.log(formatLogMessage('debug', ' [' + (k + 1) + '] ' + removedRules[k].rule + ' (' + removedRules[k].reason + ') [' + removedRules[k].matchType + ']'));
326
+ }
289
327
  }
290
328
  }
291
- });
329
+ }
292
330
 
293
331
  // Summary
294
332
  if (totalRulesRemoved > 0 && !silentMode) {
295
- const allRemovedRules = cleanedResults.reduce((acc, result) => {
296
- if (result.removedIgnoreDomains) {
297
- acc.push(...result.removedIgnoreDomains);
298
- }
299
- return acc;
300
- }, []);
301
-
302
- const totalWildcardCount = allRemovedRules.filter(r => r.matchType === 'wildcard').length;
303
- const totalExactCount = allRemovedRules.filter(r => r.matchType === 'exact').length;
304
-
305
- const summaryMessage = `\n?? ignoreDomains cleanup completed: Removed ${totalRulesRemoved} rules from ${sitesAffected} site(s)` +
306
- (totalWildcardCount > 0 ? ` [${totalWildcardCount} wildcard patterns, ${totalExactCount} exact matches]` : '');
333
+ const summaryMessage = '\n?? ignoreDomains cleanup completed: Removed ' + totalRulesRemoved + ' rules from ' + sitesAffected + ' site(s)';
334
+
307
335
  if (messageColors && messageColors.cleanup) {
308
336
  console.log(messageColors.cleanup(summaryMessage));
309
337
  } else {
@@ -316,77 +344,6 @@ function cleanupIgnoreDomains(results, ignoreDomains, options = {}) {
316
344
  return cleanedResults;
317
345
  }
318
346
 
319
- /**
320
- * Enhanced domain extraction helper that reuses existing parsing logic
321
- * @param {string} rule - Rule string in various formats
322
- * @returns {string|null} Extracted domain or null if not found
323
- */
324
- function extractDomainFromRule(rule) {
325
- if (!rule || typeof rule !== 'string') {
326
- return null;
327
- }
328
-
329
- try {
330
- // Reuse the existing parsing logic from cleanupFirstPartyDomains
331
- let extractedDomain = null;
332
-
333
- if (rule.startsWith('||') && rule.includes('^')) {
334
- // ||domain.com^ format (adblock)
335
- const match = rule.match(/^\|\|([^/\^]+)/);
336
- if (match) {
337
- extractedDomain = match[1];
338
- }
339
- } else if (rule.startsWith('127.0.0.1 ') || rule.startsWith('0.0.0.0 ')) {
340
- // hosts file format
341
- const parts = rule.split(/\s+/);
342
- if (parts.length >= 2) {
343
- extractedDomain = parts[1];
344
- }
345
- } else if (rule.includes('local=/') && rule.includes('/')) {
346
- // dnsmasq format: local=/domain.com/
347
- const match = rule.match(/local=\/([^/]+)\//);
348
- if (match) {
349
- extractedDomain = match[1];
350
- }
351
- } else if (rule.includes('server=/') && rule.includes('/')) {
352
- // dnsmasq old format: server=/domain.com/
353
- const match = rule.match(/server=\/([^/]+)\//);
354
- if (match) {
355
- extractedDomain = match[1];
356
- }
357
- } else if (rule.includes('local-zone:') && rule.includes('always_null')) {
358
- // unbound format: local-zone: "domain.com." always_null
359
- const match = rule.match(/local-zone:\s*"([^"]+)\.?"/);
360
- if (match) {
361
- extractedDomain = match[1];
362
- }
363
- } else if (rule.includes('+block') && rule.includes('.')) {
364
- // privoxy format: { +block } .domain.com
365
- const match = rule.match(/\{\s*\+block\s*\}\s*\.?([^\s]+)/);
366
- if (match) {
367
- extractedDomain = match[1];
368
- }
369
- } else if (rule.match(/^\(\^\|\\\.\).*\\\.\w+\$$/)) {
370
- // pi-hole regex format: (^|\.)domain\.com$
371
- const match = rule.match(/^\(\^\|\\\.\)(.+)\\\.\w+\$$/);
372
- if (match) {
373
- // Unescape the domain
374
- extractedDomain = match[1].replace(/\\\./g, '.');
375
- }
376
- } else {
377
- // Try to extract any domain-like pattern as fallback
378
- const domainMatch = rule.match(/([a-zA-Z0-9][a-zA-Z0-9.-]*\.[a-zA-Z]{2,})/);
379
- if (domainMatch) {
380
- extractedDomain = domainMatch[1];
381
- }
382
- }
383
-
384
- return extractedDomain;
385
- } catch (parseErr) {
386
- return null;
387
- }
388
- }
389
-
390
347
  /**
391
348
  * Post-scan cleanup function to remove first-party domains from results
392
349
  * Only processes sites that have firstParty: false in their configuration
@@ -396,6 +353,7 @@ function extractDomainFromRule(rule) {
396
353
  * @param {Object} options - Options object
397
354
  * @param {boolean} options.forceDebug - Debug logging flag
398
355
  * @param {boolean} options.silentMode - Silent mode flag
356
+ * @param {Map} [options._urlToSiteConfig] - Pre-built URL mapping (internal optimization)
399
357
  * @returns {Array} Cleaned results with conditional first-party removal
400
358
  */
401
359
  function cleanupFirstPartyDomains(results, sites, options = {}) {
@@ -405,105 +363,44 @@ function cleanupFirstPartyDomains(results, sites, options = {}) {
405
363
  return results;
406
364
  }
407
365
 
408
- // Build mapping of URLs to their site configs
409
- const urlToSiteConfig = new Map();
410
- sites.forEach(site => {
411
- const urls = Array.isArray(site.url) ? site.url : [site.url];
412
- urls.forEach(url => {
413
- urlToSiteConfig.set(url, site);
414
- });
415
- });
366
+ // Use pre-built map if passed, otherwise build it
367
+ const urlToSiteConfig = options._urlToSiteConfig || buildUrlToSiteConfig(sites);
416
368
 
417
369
  const cleanedResults = [];
418
370
  let totalRulesRemoved = 0;
419
371
  let sitesAffected = 0;
420
372
 
421
- results.forEach(result => {
422
- // Find the site config for this result
373
+ for (let ri = 0; ri < results.length; ri++) {
374
+ const result = results[ri];
423
375
  const siteConfig = urlToSiteConfig.get(result.url);
424
-
425
- // Only clean if firstParty is explicitly set to false
426
376
  const shouldCleanFirstParty = siteConfig && siteConfig.firstParty === false;
427
377
 
428
378
  if (!shouldCleanFirstParty || !result.rules || result.rules.length === 0) {
429
379
  cleanedResults.push(result);
430
- return;
380
+ continue;
431
381
  }
432
382
 
433
383
  if (forceDebug) {
434
- console.log(formatLogMessage('debug', `[cleanup] Processing ${result.url} (firstParty: false detected)`));
384
+ console.log(formatLogMessage('debug', '[cleanup] Processing ' + result.url + ' (firstParty: false detected)'));
435
385
  }
436
386
 
437
- // Get the scanned domain for this specific result
438
387
  const scannedDomain = safeGetDomain(result.url, false);
439
388
  if (!scannedDomain) {
440
389
  cleanedResults.push(result);
441
- return;
390
+ continue;
442
391
  }
443
392
 
444
- const originalRulesCount = result.rules.length;
445
393
  const cleanedRules = [];
446
394
  const removedRules = [];
447
395
 
448
- // Filter out rules that match the scanned domain
449
- result.rules.forEach(rule => {
450
- let shouldRemove = false;
451
- let extractedDomain = null;
396
+ for (let j = 0; j < result.rules.length; j++) {
397
+ const rule = result.rules[j];
398
+ let kept = true;
452
399
 
453
400
  try {
454
- // Extract domain from different rule formats
455
- if (rule.startsWith('||') && rule.includes('^')) {
456
- // ||domain.com^ format (adblock)
457
- const match = rule.match(/^\|\|([^/\^]+)/);
458
- if (match) {
459
- extractedDomain = match[1];
460
- }
461
- } else if (rule.startsWith('127.0.0.1 ') || rule.startsWith('0.0.0.0 ')) {
462
- // hosts file format
463
- const parts = rule.split(/\s+/);
464
- if (parts.length >= 2) {
465
- extractedDomain = parts[1];
466
- }
467
- } else if (rule.includes('local=/') && rule.includes('/')) {
468
- // dnsmasq format: local=/domain.com/
469
- const match = rule.match(/local=\/([^/]+)\//);
470
- if (match) {
471
- extractedDomain = match[1];
472
- }
473
- } else if (rule.includes('server=/') && rule.includes('/')) {
474
- // dnsmasq old format: server=/domain.com/
475
- const match = rule.match(/server=\/([^/]+)\//);
476
- if (match) {
477
- extractedDomain = match[1];
478
- }
479
- } else if (rule.includes('local-zone:') && rule.includes('always_null')) {
480
- // unbound format: local-zone: "domain.com." always_null
481
- const match = rule.match(/local-zone:\s*"([^"]+)\.?"/);
482
- if (match) {
483
- extractedDomain = match[1];
484
- }
485
- } else if (rule.includes('+block') && rule.includes('.')) {
486
- // privoxy format: { +block } .domain.com
487
- const match = rule.match(/\{\s*\+block\s*\}\s*\.?([^\s]+)/);
488
- if (match) {
489
- extractedDomain = match[1];
490
- }
491
- } else if (rule.match(/^\(\^\|\\\.\).*\\\.\w+\$$/)) {
492
- // pi-hole regex format: (^|\.)domain\.com$
493
- const match = rule.match(/^\(\^\|\\\.\)(.+)\\\.\w+\$$/);
494
- if (match) {
495
- // Unescape the domain
496
- extractedDomain = match[1].replace(/\\\./g, '.');
497
- }
498
- } else {
499
- // Try to extract any domain-like pattern as fallback
500
- const domainMatch = rule.match(/([a-zA-Z0-9][a-zA-Z0-9.-]*\.[a-zA-Z]{2,})/);
501
- if (domainMatch) {
502
- extractedDomain = domainMatch[1];
503
- }
504
- }
401
+ // Use shared extractDomainFromRule (was duplicated inline)
402
+ const extractedDomain = extractDomainFromRule(rule);
505
403
 
506
- // Check if extracted domain is a first-party domain
507
404
  if (extractedDomain) {
508
405
  const matchResult = shouldRemoveAsFirstParty(extractedDomain, scannedDomain, forceDebug);
509
406
 
@@ -512,35 +409,42 @@ function cleanupFirstPartyDomains(results, sites, options = {}) {
512
409
  rule: rule,
513
410
  domain: extractedDomain,
514
411
  rootDomain: scannedDomain,
515
- reason: `First-party: ${matchResult.reason} (firstParty: false)`,
412
+ reason: 'First-party: ' + matchResult.reason + ' (firstParty: false)',
516
413
  matchType: matchResult.reason.includes('Wildcard') ? 'wildcard' : 'exact'
517
414
  });
518
- return; // Exit early - rule should be removed
415
+ kept = false;
519
416
  }
520
417
  }
521
418
  } catch (parseErr) {
522
419
  if (forceDebug) {
523
- console.log(formatLogMessage('debug', `[cleanup] Failed to parse rule: ${rule} - ${parseErr.message}`));
420
+ console.log(formatLogMessage('debug', '[cleanup] Failed to parse rule: ' + rule + ' - ' + parseErr.message));
524
421
  }
525
422
  }
526
423
 
527
- // If we reach here, the rule should be kept
528
- cleanedRules.push(rule);
529
- });
424
+ if (kept) {
425
+ cleanedRules.push(rule);
426
+ }
427
+ }
530
428
 
531
- cleanedResults.push({ ...result, rules: cleanedRules });
429
+ // Mutate rules directly instead of { ...result, rules: cleanedRules }
430
+ result.rules = cleanedRules;
431
+ cleanedResults.push(result);
532
432
 
533
433
  if (removedRules.length > 0) {
534
434
  sitesAffected++;
535
435
  totalRulesRemoved += removedRules.length;
536
436
 
537
437
  if (!silentMode) {
538
- const wildcardCount = removedRules.filter(r => r.matchType === 'wildcard').length;
539
- const exactCount = removedRules.filter(r => r.matchType === 'exact').length;
438
+ // Single-pass count
439
+ let wildcardCount = 0;
440
+ for (let k = 0; k < removedRules.length; k++) {
441
+ if (removedRules[k].matchType === 'wildcard') wildcardCount++;
442
+ }
443
+ const exactCount = removedRules.length - wildcardCount;
540
444
 
541
- let cleanupMessage = `?? Cleaned ${removedRules.length} first-party rule(s) from ${scannedDomain} (firstParty: false)`;
445
+ let cleanupMessage = '?? Cleaned ' + removedRules.length + ' first-party rule(s) from ' + scannedDomain + ' (firstParty: false)';
542
446
  if (wildcardCount > 0) {
543
- cleanupMessage += ` [${wildcardCount} wildcard, ${exactCount} exact]`;
447
+ cleanupMessage += ' [' + wildcardCount + ' wildcard, ' + exactCount + ' exact]';
544
448
  }
545
449
  if (messageColors && messageColors.cleanup) {
546
450
  console.log(messageColors.cleanup(cleanupMessage));
@@ -550,17 +454,17 @@ function cleanupFirstPartyDomains(results, sites, options = {}) {
550
454
  }
551
455
 
552
456
  if (forceDebug) {
553
- console.log(formatLogMessage('debug', `[cleanup] Removed rules from ${result.url}:`));
554
- removedRules.forEach((removed, idx) => {
555
- console.log(formatLogMessage('debug', ` [${idx + 1}] ${removed.rule} (${removed.reason}) [${removed.matchType}]`));
556
- });
457
+ console.log(formatLogMessage('debug', '[cleanup] Removed rules from ' + result.url + ':'));
458
+ for (let k = 0; k < removedRules.length; k++) {
459
+ console.log(formatLogMessage('debug', ' [' + (k + 1) + '] ' + removedRules[k].rule + ' (' + removedRules[k].reason + ') [' + removedRules[k].matchType + ']'));
460
+ }
557
461
  }
558
462
  }
559
- });
463
+ }
560
464
 
561
465
  // Summary
562
466
  if (totalRulesRemoved > 0 && !silentMode) {
563
- const summaryMessage = `\n?? First-party cleanup completed: Removed ${totalRulesRemoved} rules from ${sitesAffected} site(s) with firstParty: false`;
467
+ const summaryMessage = '\n?? First-party cleanup completed: Removed ' + totalRulesRemoved + ' rules from ' + sitesAffected + ' site(s) with firstParty: false';
564
468
  if (messageColors && messageColors.cleanup) {
565
469
  console.log(messageColors.cleanup(summaryMessage));
566
470
  } else {
@@ -592,47 +496,66 @@ function validateScanResults(results, options = {}) {
592
496
  let totalValidated = 0;
593
497
  let totalRemoved = 0;
594
498
 
595
- const validatedResults = results.map(result => {
499
+ // Pre-strip wildcards from ignore patterns once (was done per rule per pattern)
500
+ let strippedIgnorePatterns = null;
501
+ if (ignoreDomains.length > 0) {
502
+ strippedIgnorePatterns = new Array(ignoreDomains.length);
503
+ for (let i = 0; i < ignoreDomains.length; i++) {
504
+ strippedIgnorePatterns[i] = ignoreDomains[i].replace('*', '');
505
+ }
506
+ }
507
+
508
+ for (let ri = 0; ri < results.length; ri++) {
509
+ const result = results[ri];
596
510
  if (!result.rules || result.rules.length === 0) {
597
- return result;
511
+ continue;
598
512
  }
599
513
 
600
514
  const originalCount = result.rules.length;
601
- const validRules = result.rules.filter(rule => {
602
- // Basic validation - ensure rule isn't empty or malformed
515
+ const validRules = [];
516
+
517
+ for (let j = 0; j < result.rules.length; j++) {
518
+ const rule = result.rules[j];
519
+
520
+ // Basic validation
603
521
  if (!rule || typeof rule !== 'string' || rule.trim().length === 0) {
604
522
  if (forceDebug) {
605
- console.log(formatLogMessage('debug', `[validation] Removed empty/invalid rule`));
523
+ console.log(formatLogMessage('debug', '[validation] Removed empty/invalid rule'));
606
524
  }
607
525
  totalRemoved++;
608
- return false;
526
+ continue;
609
527
  }
610
528
 
611
- // Check against ignore domains if provided
612
- if (ignoreDomains.length > 0) {
613
- for (const ignorePattern of ignoreDomains) {
614
- if (rule.includes(ignorePattern.replace('*', ''))) {
529
+ // Check against stripped ignore patterns
530
+ let ignored = false;
531
+ if (strippedIgnorePatterns) {
532
+ for (let k = 0; k < strippedIgnorePatterns.length; k++) {
533
+ if (rule.includes(strippedIgnorePatterns[k])) {
615
534
  if (forceDebug) {
616
- console.log(formatLogMessage('debug', `[validation] Removed rule matching ignore pattern: ${ignorePattern}`));
535
+ console.log(formatLogMessage('debug', '[validation] Removed rule matching ignore pattern: ' + ignoreDomains[k]));
617
536
  }
618
537
  totalRemoved++;
619
- return false;
538
+ ignored = true;
539
+ break;
620
540
  }
621
541
  }
622
542
  }
623
543
 
624
- return true;
625
- });
544
+ if (!ignored) {
545
+ validRules.push(rule);
546
+ }
547
+ }
626
548
 
627
549
  totalValidated += originalCount;
628
- return { ...result, rules: validRules };
629
- });
550
+ // Mutate in place instead of spread
551
+ result.rules = validRules;
552
+ }
630
553
 
631
554
  if (forceDebug && totalRemoved > 0) {
632
- console.log(formatLogMessage('debug', `[validation] Validated ${totalValidated} rules, removed ${totalRemoved} invalid rules`));
555
+ console.log(formatLogMessage('debug', '[validation] Validated ' + totalValidated + ' rules, removed ' + totalRemoved + ' invalid rules'));
633
556
  }
634
557
 
635
- return validatedResults;
558
+ return results;
636
559
  }
637
560
 
638
561
 
@@ -643,6 +566,7 @@ function validateScanResults(results, options = {}) {
643
566
  * @param {Array} results - Array of scan results
644
567
  * @param {Array} sites - Array of site configurations
645
568
  * @param {Object} options - Options object
569
+ * @param {Map} [options._urlToSiteConfig] - Pre-built URL mapping (internal optimization)
646
570
  * @returns {Array} Results with any remaining first-party domains removed
647
571
  */
648
572
  function finalFirstPartyValidation(results, sites, options = {}) {
@@ -652,64 +576,57 @@ function finalFirstPartyValidation(results, sites, options = {}) {
652
576
  return results;
653
577
  }
654
578
 
655
- // Reuse the URL-to-config mapping pattern from cleanupFirstPartyDomains
656
- const urlToSiteConfig = new Map();
657
- sites.forEach(site => {
658
- const urls = Array.isArray(site.url) ? site.url : [site.url];
659
- urls.forEach(url => {
660
- urlToSiteConfig.set(url, site);
661
- });
662
- });
579
+ // Use pre-built map if passed, otherwise build it
580
+ const urlToSiteConfig = options._urlToSiteConfig || buildUrlToSiteConfig(sites);
663
581
 
664
582
  const finalResults = [];
665
583
  let totalViolationsFound = 0;
666
584
  let sitesWithViolations = 0;
667
585
 
668
- results.forEach(result => {
586
+ for (let ri = 0; ri < results.length; ri++) {
587
+ const result = results[ri];
669
588
  const siteConfig = urlToSiteConfig.get(result.url);
670
-
671
- // Only validate sites with firstParty: false
672
589
  const shouldValidate = siteConfig && siteConfig.firstParty === false;
673
590
 
674
591
  if (!shouldValidate || !result.rules || result.rules.length === 0) {
675
592
  finalResults.push(result);
676
- return;
593
+ continue;
677
594
  }
678
595
 
679
596
  const scannedDomain = safeGetDomain(result.url, false);
680
597
  if (!scannedDomain) {
681
598
  finalResults.push(result);
682
- return;
599
+ continue;
683
600
  }
684
601
 
685
- // Reuse the same filtering logic pattern from cleanupFirstPartyDomains
686
602
  const cleanedRules = [];
687
603
  const violatingRules = [];
688
604
 
689
- result.rules.forEach(rule => {
605
+ for (let j = 0; j < result.rules.length; j++) {
606
+ const rule = result.rules[j];
690
607
  const extractedDomain = extractDomainFromRule(rule);
608
+
691
609
  if (extractedDomain) {
692
- // Reuse the shouldRemoveAsFirstParty logic
693
610
  const matchResult = shouldRemoveAsFirstParty(extractedDomain, scannedDomain, forceDebug);
694
611
 
695
612
  if (matchResult.shouldRemove) {
696
613
  violatingRules.push({
697
614
  rule: rule,
698
615
  domain: extractedDomain,
699
- reason: `VALIDATION FAILURE: ${matchResult.reason}`
616
+ reason: 'VALIDATION FAILURE: ' + matchResult.reason
700
617
  });
701
618
  totalViolationsFound++;
702
- return;
619
+ continue;
703
620
  }
704
621
  }
705
622
  cleanedRules.push(rule);
706
- });
623
+ }
707
624
 
708
625
  if (violatingRules.length > 0) {
709
626
  sitesWithViolations++;
710
627
 
711
628
  if (!silentMode) {
712
- const errorMessage = `? CONFIG VIOLATION: Found ${violatingRules.length} first-party rule(s) in ${scannedDomain} (firstParty: false)`;
629
+ const errorMessage = '? CONFIG VIOLATION: Found ' + violatingRules.length + ' first-party rule(s) in ' + scannedDomain + ' (firstParty: false)';
713
630
  if (messageColors && messageColors.error) {
714
631
  console.log(messageColors.error(errorMessage));
715
632
  } else {
@@ -718,19 +635,21 @@ function finalFirstPartyValidation(results, sites, options = {}) {
718
635
  }
719
636
 
720
637
  if (forceDebug) {
721
- console.log(formatLogMessage('debug', `[final-validation] Violations found for ${result.url}:`));
722
- violatingRules.forEach((violation, idx) => {
723
- console.log(formatLogMessage('debug', ` [${idx + 1}] ${violation.rule} -> ${violation.domain}`));
724
- });
638
+ console.log(formatLogMessage('debug', '[final-validation] Violations found for ' + result.url + ':'));
639
+ for (let k = 0; k < violatingRules.length; k++) {
640
+ console.log(formatLogMessage('debug', ' [' + (k + 1) + '] ' + violatingRules[k].rule + ' -> ' + violatingRules[k].domain));
641
+ }
725
642
  }
726
643
  }
727
644
 
728
- finalResults.push({ ...result, rules: cleanedRules });
729
- });
645
+ // Mutate in place
646
+ result.rules = cleanedRules;
647
+ finalResults.push(result);
648
+ }
730
649
 
731
- // Summary using existing message patterns
650
+ // Summary
732
651
  if (totalViolationsFound > 0 && !silentMode) {
733
- const summaryMessage = `\n? SCAN FILTERING FAILURE: Removed ${totalViolationsFound} first-party rules from ${sitesWithViolations} site(s) in post-processing`;
652
+ const summaryMessage = '\n? SCAN FILTERING FAILURE: Removed ' + totalViolationsFound + ' first-party rules from ' + sitesWithViolations + ' site(s) in post-processing';
734
653
  console.log(summaryMessage);
735
654
  console.log('?? This indicates firstParty: false filtering failed during scan - consider investigating root cause.');
736
655
  } else if (forceDebug) {
@@ -755,24 +674,31 @@ function processResults(results, sites, options = {}) {
755
674
  const { forceDebug = false, silentMode = false } = options;
756
675
 
757
676
  if (forceDebug) {
758
- console.log(formatLogMessage('debug', `[post-processing] Starting post-processing of ${results.length} results`));
677
+ console.log(formatLogMessage('debug', '[post-processing] Starting post-processing of ' + results.length + ' results'));
759
678
  }
760
679
 
680
+ // Build URL-to-config map once, share across all steps
681
+ const urlToSiteConfig = buildUrlToSiteConfig(sites);
682
+ const sharedOptions = Object.assign({}, options, { _urlToSiteConfig: urlToSiteConfig });
683
+
761
684
  // Step 1: Clean up first-party domains
762
- let processedResults = cleanupFirstPartyDomains(results, sites, options);
685
+ let processedResults = cleanupFirstPartyDomains(results, sites, sharedOptions);
763
686
 
764
687
  // Step 2: Clean up ignoreDomains (final safety net)
765
688
  processedResults = cleanupIgnoreDomains(processedResults, options.ignoreDomains || [], options);
766
689
 
767
690
  // Step 3: Final validation for firstParty: false configurations
768
- processedResults = finalFirstPartyValidation(processedResults, sites, options);
691
+ processedResults = finalFirstPartyValidation(processedResults, sites, sharedOptions);
769
692
 
770
693
  // Step 4: Validate results
771
694
  processedResults = validateScanResults(processedResults, options);
772
695
 
773
696
  if (forceDebug) {
774
- const totalRules = processedResults.reduce((sum, r) => sum + (r.rules ? r.rules.length : 0), 0);
775
- console.log(formatLogMessage('debug', `[post-processing] Completed: ${totalRules} total rules remaining`));
697
+ let totalRules = 0;
698
+ for (let i = 0; i < processedResults.length; i++) {
699
+ totalRules += processedResults[i].rules ? processedResults[i].rules.length : 0;
700
+ }
701
+ console.log(formatLogMessage('debug', '[post-processing] Completed: ' + totalRules + ' total rules remaining'));
776
702
  }
777
703
 
778
704
  return processedResults;
@@ -785,4 +711,4 @@ module.exports = {
785
711
  extractDomainFromRule,
786
712
  validateScanResults,
787
713
  processResults
788
- };
714
+ };