@fanboynz/network-scanner 2.0.55 → 2.0.57
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/npm-publish.yml +3 -4
- package/lib/browserhealth.js +207 -179
- package/lib/cloudflare.js +117 -65
- package/lib/ignore_similar.js +78 -209
- package/lib/post-processing.js +282 -356
- package/lib/smart-cache.js +347 -267
- package/nwss.js +53 -13
- package/package.json +3 -2
package/lib/ignore_similar.js
CHANGED
|
@@ -1,30 +1,37 @@
|
|
|
1
1
|
const { formatLogMessage } = require('./colorize');
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
3
|
+
// Precompiled regex (avoids recompilation per getBaseDomainName call)
|
|
4
|
+
const REGEX_PROTOCOL = /^https?:\/\//;
|
|
5
|
+
const REGEX_WWW = /^www\./;
|
|
6
|
+
|
|
7
|
+
// Multi-part TLD lookup (module-level Set, O(1) instead of per-call array + O(n) .includes)
|
|
8
|
+
const MULTI_PART_TLDS = new Set([
|
|
9
|
+
'co.uk', 'co.nz', 'com.au', 'co.za', 'co.in', 'co.jp', 'co.kr',
|
|
10
|
+
'com.br', 'com.mx', 'com.ar', 'com.co', 'com.pe', 'com.ve',
|
|
11
|
+
'co.th', 'co.id', 'co.il', 'co.ke', 'co.tz', 'co.zw', 'co.bw',
|
|
12
|
+
'com.sg', 'com.my', 'com.hk', 'com.tw', 'com.ph', 'com.vn',
|
|
13
|
+
'co.cr', 'co.ug', 'co.zm', 'co.ao', 'co.mz', 'co.ls',
|
|
14
|
+
'org.uk', 'me.uk', 'ltd.uk', 'plc.uk', 'gov.uk', 'ac.uk', 'sch.uk',
|
|
15
|
+
'com.de', 'org.de', 'com.fr', 'org.fr', 'com.es', 'org.es',
|
|
16
|
+
'com.it', 'org.it', 'com.pl', 'org.pl', 'com.nl', 'org.nl',
|
|
17
|
+
'com.ru', 'org.ru', 'com.ua', 'org.ua', 'com.tr', 'org.tr',
|
|
18
|
+
'or.jp', 'ne.jp', 'ac.jp', 'ed.jp', 'go.jp',
|
|
19
|
+
'or.kr', 'ne.kr', 'com.cn', 'org.cn', 'net.cn', 'edu.cn', 'gov.cn',
|
|
20
|
+
'org.in', 'net.in', 'org.au', 'net.au', 'edu.au', 'gov.au',
|
|
21
|
+
'org.nz', 'net.nz', 'org.il', 'net.il', 'org.za', 'net.za',
|
|
22
|
+
'org.br', 'net.br', 'edu.br', 'gov.br', 'org.ar', 'org.mx',
|
|
23
|
+
'org.co', 'org.pe', 'com.cl', 'org.cl', 'com.uy', 'org.uy',
|
|
24
|
+
'org.ve', 'com.do', 'org.do', 'com.pr', 'org.pr',
|
|
25
|
+
'com.gt', 'org.gt', 'com.pa', 'org.pa', 'com.sv', 'org.sv',
|
|
26
|
+
'com.ni', 'org.ni', 'com.hn', 'org.hn', 'org.cr',
|
|
27
|
+
'com.eg', 'org.eg', 'or.ke'
|
|
28
|
+
]);
|
|
29
|
+
|
|
30
|
+
// 3-part TLD lookup
|
|
31
|
+
const THREE_PART_TLDS = new Set(['com.au.com', 'co.uk.com']);
|
|
16
32
|
|
|
17
33
|
/**
|
|
18
34
|
* Extracts the base domain name without TLD for similarity comparison
|
|
19
|
-
*
|
|
20
|
-
* Examples:
|
|
21
|
-
* - "ads.google.com" -> "google"
|
|
22
|
-
* - "tracker.facebook.co.uk" -> "facebook"
|
|
23
|
-
* - "cdn.example.org" -> "example"
|
|
24
|
-
*
|
|
25
|
-
* Why we do this: We want to compare the actual brand/company name part
|
|
26
|
-
* of domains, not be fooled by different TLDs or subdomains.
|
|
27
|
-
*
|
|
28
35
|
* @param {string} domain - The domain to process
|
|
29
36
|
* @returns {string} The base domain name
|
|
30
37
|
*/
|
|
@@ -33,259 +40,146 @@ function getBaseDomainName(domain) {
|
|
|
33
40
|
return '';
|
|
34
41
|
}
|
|
35
42
|
|
|
36
|
-
|
|
37
|
-
domain = domain.replace(
|
|
43
|
+
domain = domain.replace(REGEX_PROTOCOL, '');
|
|
44
|
+
domain = domain.replace(REGEX_WWW, '');
|
|
38
45
|
|
|
39
|
-
// Remove www prefix (standardize domain format)
|
|
40
|
-
domain = domain.replace(/^www\./, '');
|
|
41
|
-
|
|
42
|
-
// Split by dots and get the part before the last dot (TLD)
|
|
43
46
|
const parts = domain.split('.');
|
|
44
47
|
if (parts.length < 2) {
|
|
45
|
-
return domain;
|
|
48
|
+
return domain;
|
|
46
49
|
}
|
|
47
50
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
*
|
|
51
|
-
* Many countries use multi-part TLDs like "co.uk", "com.au", etc.
|
|
52
|
-
* We need to account for these when extracting the base domain name.
|
|
53
|
-
*
|
|
54
|
-
* Without this logic:
|
|
55
|
-
* - "example.co.uk" would incorrectly return "co" instead of "example"
|
|
56
|
-
* - "google.com.au" would return "com" instead of "google"
|
|
57
|
-
*
|
|
58
|
-
* This extensive list covers most common multi-part TLDs worldwide.
|
|
59
|
-
*/
|
|
60
|
-
const multiPartTLDs = [
|
|
61
|
-
// Common Anglo countries
|
|
62
|
-
'co.uk', 'co.nz', 'com.au', 'co.za', 'co.in', 'co.jp', 'co.kr',
|
|
63
|
-
|
|
64
|
-
// Latin America
|
|
65
|
-
'com.br', 'com.mx', 'com.ar', 'com.co', 'com.pe', 'com.ve',
|
|
66
|
-
|
|
67
|
-
// Asia-Pacific
|
|
68
|
-
'co.th', 'co.id', 'co.il', 'co.ke', 'co.tz', 'co.zw', 'co.bw',
|
|
69
|
-
'com.sg', 'com.my', 'com.hk', 'com.tw', 'com.ph', 'com.vn',
|
|
70
|
-
|
|
71
|
-
// Central America & Africa
|
|
72
|
-
'co.cr', 'co.ug', 'co.zm', 'co.ao', 'co.mz', 'co.ls',
|
|
73
|
-
|
|
74
|
-
// Europe extensions
|
|
75
|
-
'org.uk', 'me.uk', 'ltd.uk', 'plc.uk', 'gov.uk', 'ac.uk', 'sch.uk',
|
|
76
|
-
'com.de', 'org.de', 'com.fr', 'org.fr', 'com.es', 'org.es',
|
|
77
|
-
'com.it', 'org.it', 'com.pl', 'org.pl', 'com.nl', 'org.nl',
|
|
78
|
-
'com.ru', 'org.ru', 'com.ua', 'org.ua', 'com.tr', 'org.tr',
|
|
79
|
-
|
|
80
|
-
// Asia-Pacific extensions detailed
|
|
81
|
-
'or.jp', 'ne.jp', 'ac.jp', 'ed.jp', 'go.jp',
|
|
82
|
-
'or.kr', 'ne.kr', 'com.cn', 'org.cn', 'net.cn', 'edu.cn', 'gov.cn',
|
|
83
|
-
'org.in', 'net.in', 'org.au', 'net.au', 'edu.au', 'gov.au',
|
|
84
|
-
'org.nz', 'net.nz', 'org.il', 'net.il', 'org.za', 'net.za',
|
|
85
|
-
|
|
86
|
-
// Americas extensions detailed
|
|
87
|
-
'org.br', 'net.br', 'edu.br', 'gov.br', 'org.ar', 'org.mx',
|
|
88
|
-
'org.co', 'org.pe', 'com.cl', 'org.cl', 'com.uy', 'org.uy',
|
|
89
|
-
'org.ve', 'com.do', 'org.do', 'com.pr', 'org.pr',
|
|
90
|
-
|
|
91
|
-
// Central America & Caribbean
|
|
92
|
-
'com.gt', 'org.gt', 'com.pa', 'org.pa', 'com.sv', 'org.sv',
|
|
93
|
-
'com.ni', 'org.ni', 'com.hn', 'org.hn', 'org.cr',
|
|
94
|
-
|
|
95
|
-
// Middle East & Africa extensions
|
|
96
|
-
'com.eg', 'org.eg', 'or.ke'
|
|
97
|
-
];
|
|
51
|
+
// Check multi-part TLD (O(1) Set lookup instead of O(n) array scan)
|
|
52
|
+
const lastTwoParts = parts[parts.length - 2] + '.' + parts[parts.length - 1];
|
|
98
53
|
|
|
99
|
-
|
|
100
|
-
const lastTwoParts = parts.slice(-2).join('.'); // e.g., "co.uk"
|
|
101
|
-
const lastThreeParts = parts.length >= 3 ? parts.slice(-3).join('.') : ''; // e.g., "com.au.com"
|
|
102
|
-
|
|
103
|
-
// Handle 2-part TLDs (most common case)
|
|
104
|
-
// Example: "google.co.uk" -> parts = ["google", "co", "uk"] -> return "google"
|
|
105
|
-
if (multiPartTLDs.includes(lastTwoParts)) {
|
|
54
|
+
if (MULTI_PART_TLDS.has(lastTwoParts)) {
|
|
106
55
|
return parts.length >= 3 ? parts[parts.length - 3] : parts[0];
|
|
107
56
|
}
|
|
108
57
|
|
|
109
|
-
// Handle rare 3-part TLDs
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
58
|
+
// Handle rare 3-part TLDs
|
|
59
|
+
if (parts.length >= 4) {
|
|
60
|
+
const lastThreeParts = parts[parts.length - 3] + '.' + lastTwoParts;
|
|
61
|
+
if (THREE_PART_TLDS.has(lastThreeParts)) {
|
|
62
|
+
return parts[parts.length - 4];
|
|
63
|
+
}
|
|
114
64
|
}
|
|
115
65
|
|
|
116
|
-
// For standard TLDs, take the second-to-last part
|
|
117
|
-
// Example: "google.com" -> parts = ["google", "com"] -> return "google"
|
|
118
66
|
return parts[parts.length - 2];
|
|
119
67
|
}
|
|
120
68
|
|
|
121
69
|
/**
|
|
122
70
|
* Calculates similarity between two domain base names using Levenshtein distance
|
|
123
|
-
*
|
|
124
|
-
* The Levenshtein distance is the minimum number of single-character edits
|
|
125
|
-
* (insertions, deletions, substitutions) needed to transform one string into another.
|
|
126
|
-
*
|
|
127
|
-
* We convert this to a percentage similarity for easier threshold comparison.
|
|
128
|
-
*
|
|
129
|
-
* Examples:
|
|
130
|
-
* - "google" vs "googl" = 83% similar (1 deletion needed)
|
|
131
|
-
* - "facebook" vs "facebo0k" = 87% similar (1 substitution needed)
|
|
132
|
-
* - "amazon" vs "amaz0n" = 83% similar (1 substitution needed)
|
|
133
|
-
*
|
|
134
|
-
* Why this matters: Malicious domains often use typosquatting techniques
|
|
135
|
-
* like character substitution, insertion, or deletion to appear legitimate.
|
|
136
|
-
*
|
|
137
71
|
* @param {string} domain1 - First domain base name
|
|
138
72
|
* @param {string} domain2 - Second domain base name
|
|
139
73
|
* @returns {number} Similarity percentage (0-100)
|
|
140
74
|
*/
|
|
141
75
|
function calculateSimilarity(domain1, domain2) {
|
|
142
|
-
// Exact match = 100% similar (optimization for common case)
|
|
143
76
|
if (domain1 === domain2) return 100;
|
|
144
|
-
|
|
145
|
-
// Empty strings have no similarity
|
|
146
77
|
if (!domain1 || !domain2) return 0;
|
|
147
78
|
|
|
148
|
-
// Identify longer and shorter strings for algorithm efficiency
|
|
149
79
|
const longer = domain1.length > domain2.length ? domain1 : domain2;
|
|
150
80
|
const shorter = domain1.length > domain2.length ? domain2 : domain1;
|
|
151
81
|
|
|
152
|
-
// Edge case: empty longer string means both are empty (100% similar)
|
|
153
82
|
if (longer.length === 0) return 100;
|
|
154
83
|
|
|
155
|
-
// Calculate edit distance using dynamic programming algorithm
|
|
156
84
|
const distance = levenshteinDistance(longer, shorter);
|
|
157
|
-
|
|
158
|
-
// Convert to percentage: (max_length - edits_needed) / max_length * 100
|
|
159
|
-
// Higher percentage = more similar
|
|
160
85
|
return Math.round(((longer.length - distance) / longer.length) * 100);
|
|
161
86
|
}
|
|
162
87
|
|
|
163
88
|
/**
|
|
164
|
-
* Calculates Levenshtein distance
|
|
165
|
-
*
|
|
166
|
-
* This is the core algorithm that powers our similarity detection.
|
|
167
|
-
* Time complexity: O(m*n) where m and n are string lengths
|
|
168
|
-
* Space complexity: O(m*n) for the matrix
|
|
169
|
-
*
|
|
170
|
-
* The algorithm builds a matrix where each cell [i,j] represents the minimum
|
|
171
|
-
* edit distance between the first i characters of str1 and first j characters of str2.
|
|
172
|
-
*
|
|
173
|
-
* Dynamic programming recurrence relation:
|
|
174
|
-
* - If characters match: matrix[i][j] = matrix[i-1][j-1] (no edit needed)
|
|
175
|
-
* - If different: matrix[i][j] = 1 + min(substitution, insertion, deletion)
|
|
176
|
-
*
|
|
89
|
+
* Calculates Levenshtein distance using two-row approach
|
|
90
|
+
* Same results as original, but O(min(m,n)) space instead of O(m*n)
|
|
177
91
|
* @param {string} str1 - First string
|
|
178
92
|
* @param {string} str2 - Second string
|
|
179
|
-
* @returns {number} Edit distance
|
|
93
|
+
* @returns {number} Edit distance
|
|
180
94
|
*/
|
|
181
95
|
function levenshteinDistance(str1, str2) {
|
|
182
|
-
|
|
183
|
-
const
|
|
96
|
+
const m = str1.length;
|
|
97
|
+
const n = str2.length;
|
|
184
98
|
|
|
185
|
-
//
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
99
|
+
// Ensure we iterate over the shorter dimension for row arrays
|
|
100
|
+
if (m < n) return levenshteinDistance(str2, str1);
|
|
101
|
+
|
|
102
|
+
// Two rows instead of full matrix
|
|
103
|
+
let prevRow = new Array(n + 1);
|
|
104
|
+
let currRow = new Array(n + 1);
|
|
189
105
|
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
matrix[0][j] = j;
|
|
106
|
+
for (let j = 0; j <= n; j++) {
|
|
107
|
+
prevRow[j] = j;
|
|
193
108
|
}
|
|
194
109
|
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
110
|
+
for (let i = 1; i <= m; i++) {
|
|
111
|
+
currRow[0] = i;
|
|
112
|
+
const ch1 = str1[i - 1];
|
|
113
|
+
|
|
114
|
+
for (let j = 1; j <= n; j++) {
|
|
115
|
+
if (ch1 === str2[j - 1]) {
|
|
116
|
+
currRow[j] = prevRow[j - 1];
|
|
201
117
|
} else {
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
matrix[i - 1][j] + 1 // deletion: remove char from str1
|
|
207
|
-
);
|
|
118
|
+
const sub = prevRow[j - 1];
|
|
119
|
+
const ins = currRow[j - 1];
|
|
120
|
+
const del = prevRow[j];
|
|
121
|
+
currRow[j] = (sub < ins ? (sub < del ? sub : del) : (ins < del ? ins : del)) + 1;
|
|
208
122
|
}
|
|
209
123
|
}
|
|
124
|
+
|
|
125
|
+
// Swap rows
|
|
126
|
+
const temp = prevRow;
|
|
127
|
+
prevRow = currRow;
|
|
128
|
+
currRow = temp;
|
|
210
129
|
}
|
|
211
130
|
|
|
212
|
-
|
|
213
|
-
return matrix[str2.length][str1.length];
|
|
131
|
+
return prevRow[n];
|
|
214
132
|
}
|
|
215
133
|
|
|
216
134
|
/**
|
|
217
135
|
* Main function: Checks if a domain should be ignored based on similarity to existing domains
|
|
218
|
-
*
|
|
219
|
-
* This is called for every potential domain match during scanning, so it needs to be
|
|
220
|
-
* efficient. The function uses early returns and optimizations to minimize processing.
|
|
221
|
-
*
|
|
222
|
-
* Usage workflow:
|
|
223
|
-
* 1. New domain found: "g00gleads.com"
|
|
224
|
-
* 2. Extract base: "g00gleads"
|
|
225
|
-
* 3. Compare to existing: ["googleads", "facebook", "amazon"]
|
|
226
|
-
* 4. Find "googleads" is 89% similar (above 80% threshold)
|
|
227
|
-
* 5. Return shouldIgnore: true
|
|
228
|
-
*
|
|
229
136
|
* @param {string} newDomain - The domain to check for similarity
|
|
230
137
|
* @param {Set|Array} existingDomains - Collection of already found domains
|
|
231
138
|
* @param {object} options - Configuration options
|
|
232
|
-
* @param {boolean} options.enabled - Whether similarity checking is enabled
|
|
233
|
-
* @param {number} options.threshold - Similarity percentage threshold (0-100)
|
|
234
|
-
* @param {boolean} options.forceDebug - Whether to log debug information
|
|
235
139
|
* @returns {object} Result object with shouldIgnore boolean and metadata
|
|
236
140
|
*/
|
|
237
141
|
function shouldIgnoreSimilarDomain(newDomain, existingDomains, options = {}) {
|
|
238
142
|
const {
|
|
239
143
|
enabled = true,
|
|
240
|
-
threshold = 80,
|
|
144
|
+
threshold = 80,
|
|
241
145
|
forceDebug = false
|
|
242
146
|
} = options;
|
|
243
147
|
|
|
244
|
-
// Quick exit if feature is disabled (performance optimization)
|
|
245
148
|
if (!enabled) {
|
|
246
149
|
return { shouldIgnore: false, reason: 'ignore_similar disabled' };
|
|
247
150
|
}
|
|
248
151
|
|
|
249
|
-
// Validate input domain
|
|
250
152
|
if (!newDomain) {
|
|
251
153
|
return { shouldIgnore: false, reason: 'invalid domain' };
|
|
252
154
|
}
|
|
253
155
|
|
|
254
|
-
// Extract base domain name for comparison
|
|
255
156
|
const newBaseDomain = getBaseDomainName(newDomain);
|
|
256
157
|
if (!newBaseDomain) {
|
|
257
158
|
return { shouldIgnore: false, reason: 'could not extract base domain' };
|
|
258
159
|
}
|
|
259
160
|
|
|
260
|
-
//
|
|
161
|
+
// KEEP original guard exactly as-is: Array.from handles undefined/null/objects safely
|
|
261
162
|
const domainsArray = Array.isArray(existingDomains) ? existingDomains : Array.from(existingDomains);
|
|
262
163
|
|
|
263
|
-
// Compare against each existing domain
|
|
264
164
|
for (const existingDomain of domainsArray) {
|
|
265
|
-
// Skip invalid, empty, or identical domains (optimization)
|
|
266
165
|
if (!existingDomain || existingDomain === newDomain) {
|
|
267
166
|
continue;
|
|
268
167
|
}
|
|
269
168
|
|
|
270
|
-
// Extract base domain for comparison
|
|
271
169
|
const existingBaseDomain = getBaseDomainName(existingDomain);
|
|
272
170
|
if (!existingBaseDomain || existingBaseDomain === newBaseDomain) {
|
|
273
|
-
continue;
|
|
171
|
+
continue;
|
|
274
172
|
}
|
|
275
173
|
|
|
276
|
-
// Calculate similarity percentage
|
|
277
174
|
const similarity = calculateSimilarity(newBaseDomain, existingBaseDomain);
|
|
278
175
|
|
|
279
|
-
// Check if similarity exceeds threshold
|
|
280
176
|
if (similarity >= threshold) {
|
|
281
|
-
// Debug logging for similarity matches (helps tune thresholds)
|
|
282
177
|
if (forceDebug) {
|
|
283
178
|
console.log(formatLogMessage('debug',
|
|
284
179
|
`[ignore_similar] ${newDomain} (${newBaseDomain}) is ${similarity}% similar to ${existingDomain} (${existingBaseDomain}) - ignoring`
|
|
285
180
|
));
|
|
286
181
|
}
|
|
287
182
|
|
|
288
|
-
// Return detailed similarity information for debugging/analysis
|
|
289
183
|
return {
|
|
290
184
|
shouldIgnore: true,
|
|
291
185
|
reason: `${similarity}% similar to ${existingDomain}`,
|
|
@@ -297,24 +191,14 @@ function shouldIgnoreSimilarDomain(newDomain, existingDomains, options = {}) {
|
|
|
297
191
|
}
|
|
298
192
|
}
|
|
299
193
|
|
|
300
|
-
// No similar domains found - safe to add this domain
|
|
301
194
|
return { shouldIgnore: false, reason: 'no similar domains found' };
|
|
302
195
|
}
|
|
303
196
|
|
|
304
197
|
/**
|
|
305
198
|
* Utility function: Filters out similar domains from a collection
|
|
306
|
-
*
|
|
307
|
-
* This is useful for post-processing existing domain lists to remove
|
|
308
|
-
* similar entries. It processes the array sequentially, comparing each
|
|
309
|
-
* domain against the already-accepted domains.
|
|
310
|
-
*
|
|
311
|
-
* Use case: Clean up an existing blocklist by removing similar domains
|
|
312
|
-
* Example: ["googleads.com", "g00gleads.com", "facebook.com"]
|
|
313
|
-
* -> ["googleads.com", "facebook.com"] (removed g00gleads as similar)
|
|
314
|
-
*
|
|
315
199
|
* @param {Array} domains - Array of domains to filter
|
|
316
|
-
* @param {object} options - Filtering options
|
|
317
|
-
* @returns {object} Result with filtered domains and
|
|
200
|
+
* @param {object} options - Filtering options
|
|
201
|
+
* @returns {object} Result with filtered domains and removed domains
|
|
318
202
|
*/
|
|
319
203
|
function filterSimilarDomains(domains, options = {}) {
|
|
320
204
|
const {
|
|
@@ -323,33 +207,27 @@ function filterSimilarDomains(domains, options = {}) {
|
|
|
323
207
|
forceDebug = false
|
|
324
208
|
} = options;
|
|
325
209
|
|
|
326
|
-
// Quick exit if disabled or invalid input
|
|
327
210
|
if (!enabled || !Array.isArray(domains)) {
|
|
328
211
|
return { filtered: domains, removed: [] };
|
|
329
212
|
}
|
|
330
213
|
|
|
331
|
-
const filtered = [];
|
|
332
|
-
const removed = [];
|
|
214
|
+
const filtered = [];
|
|
215
|
+
const removed = [];
|
|
333
216
|
|
|
334
|
-
// Process each domain sequentially
|
|
335
217
|
for (const domain of domains) {
|
|
336
|
-
// Check if this domain is similar to any already-accepted domain
|
|
337
218
|
const result = shouldIgnoreSimilarDomain(domain, filtered, { enabled, threshold, forceDebug });
|
|
338
219
|
|
|
339
220
|
if (result.shouldIgnore) {
|
|
340
|
-
// Domain is too similar - add to removed list with metadata
|
|
341
221
|
removed.push({
|
|
342
222
|
domain,
|
|
343
223
|
reason: result.reason,
|
|
344
224
|
similarTo: result.similarDomain
|
|
345
225
|
});
|
|
346
226
|
} else {
|
|
347
|
-
// Domain is unique enough - add to filtered list
|
|
348
227
|
filtered.push(domain);
|
|
349
228
|
}
|
|
350
229
|
}
|
|
351
230
|
|
|
352
|
-
// Debug reporting for filtering results
|
|
353
231
|
if (forceDebug && removed.length > 0) {
|
|
354
232
|
console.log(formatLogMessage('debug',
|
|
355
233
|
`[ignore_similar] Filtered out ${removed.length} similar domains`
|
|
@@ -359,15 +237,6 @@ function filterSimilarDomains(domains, options = {}) {
|
|
|
359
237
|
return { filtered, removed };
|
|
360
238
|
}
|
|
361
239
|
|
|
362
|
-
/**
|
|
363
|
-
* MODULE EXPORTS
|
|
364
|
-
*
|
|
365
|
-
* Public API for the ignore_similar module:
|
|
366
|
-
* - getBaseDomainName: Extract base domain from full domain
|
|
367
|
-
* - calculateSimilarity: Get similarity percentage between two domains
|
|
368
|
-
* - shouldIgnoreSimilarDomain: Main function for real-time similarity checking
|
|
369
|
-
* - filterSimilarDomains: Batch processing function for existing lists
|
|
370
|
-
*/
|
|
371
240
|
module.exports = {
|
|
372
241
|
getBaseDomainName,
|
|
373
242
|
calculateSimilarity,
|