@fanboynz/network-scanner 1.0.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,237 @@
1
+ const { formatLogMessage } = require('./colorize');
2
+
3
+ /**
4
+ * Extracts the base domain name without TLD for similarity comparison
5
+ * @param {string} domain - The domain to process
6
+ * @returns {string} The base domain name
7
+ */
8
+ function getBaseDomainName(domain) {
9
+ if (!domain || typeof domain !== 'string') {
10
+ return '';
11
+ }
12
+
13
+ // Remove protocol if present
14
+ domain = domain.replace(/^https?:\/\//, '');
15
+
16
+ // Remove www prefix
17
+ domain = domain.replace(/^www\./, '');
18
+
19
+ // Split by dots and get the part before the last dot (TLD)
20
+ const parts = domain.split('.');
21
+ if (parts.length < 2) {
22
+ return domain; // Single part, return as-is
23
+ }
24
+
25
+ // Handle common multi-part TLDs (country code domains)
26
+ const multiPartTLDs = [
27
+ 'co.uk', 'co.nz', 'com.au', 'co.za', 'co.in', 'co.jp', 'co.kr',
28
+ 'com.br', 'com.mx', 'com.ar', 'com.co', 'com.pe', 'com.ve',
29
+ 'co.th', 'co.id', 'co.il', 'co.ke', 'co.tz', 'co.zw', 'co.bw',
30
+ 'com.sg', 'com.my', 'com.hk', 'com.tw', 'com.ph', 'com.vn',
31
+ 'co.cr', 'co.ug', 'co.zm', 'co.ao', 'co.mz', 'co.ls',
32
+
33
+ // Europe extensions
34
+ 'org.uk', 'me.uk', 'ltd.uk', 'plc.uk', 'gov.uk', 'ac.uk', 'sch.uk',
35
+ 'com.de', 'org.de', 'com.fr', 'org.fr', 'com.es', 'org.es',
36
+ 'com.it', 'org.it', 'com.pl', 'org.pl', 'com.nl', 'org.nl',
37
+ 'com.ru', 'org.ru', 'com.ua', 'org.ua', 'com.tr', 'org.tr',
38
+
39
+ // Asia-Pacific extensions
40
+ 'or.jp', 'ne.jp', 'ac.jp', 'ed.jp', 'go.jp',
41
+ 'or.kr', 'ne.kr', 'com.cn', 'org.cn', 'net.cn', 'edu.cn', 'gov.cn',
42
+ 'org.in', 'net.in', 'org.au', 'net.au', 'edu.au', 'gov.au',
43
+ 'org.nz', 'net.nz', 'org.il', 'net.il', 'org.za', 'net.za',
44
+
45
+ // Americas extensions
46
+ 'org.br', 'net.br', 'edu.br', 'gov.br', 'org.ar', 'org.mx',
47
+ 'org.co', 'org.pe', 'com.cl', 'org.cl', 'com.uy', 'org.uy',
48
+ 'org.ve', 'com.do', 'org.do', 'com.pr', 'org.pr',
49
+
50
+ // Central America & Caribbean
51
+ 'com.gt', 'org.gt', 'com.pa', 'org.pa', 'com.sv', 'org.sv',
52
+ 'com.ni', 'org.ni', 'com.hn', 'org.hn', 'org.cr',
53
+
54
+ // Middle East & Africa extensions
55
+ 'com.eg', 'org.eg', 'or.ke'
56
+ ];
57
+
58
+ // Check if domain ends with a multi-part TLD
59
+ const lastTwoParts = parts.slice(-2).join('.');
60
+ const lastThreeParts = parts.length >= 3 ? parts.slice(-3).join('.') : '';
61
+
62
+ // Handle multi-part TLDs (e.g., google.co.nz ? "google")
63
+ if (multiPartTLDs.includes(lastTwoParts)) {
64
+ return parts.length >= 3 ? parts[parts.length - 3] : parts[0];
65
+ }
66
+
67
+ // Handle some 3-part TLDs (e.g., com.au.com if it existed)
68
+ if (parts.length >= 4 && lastThreeParts &&
69
+ ['com.au.com', 'co.uk.com'].includes(lastThreeParts)) {
70
+ return parts[parts.length - 4];
71
+ }
72
+
73
+ // For standard TLDs, take the second-to-last part (e.g., google.com ? "google")
74
+ return parts[parts.length - 2];
75
+ }
76
+
77
+ /**
78
+ * Calculates similarity between two domain base names using Levenshtein distance
79
+ * @param {string} domain1 - First domain base name
80
+ * @param {string} domain2 - Second domain base name
81
+ * @returns {number} Similarity percentage (0-100)
82
+ */
83
+ function calculateSimilarity(domain1, domain2) {
84
+ if (domain1 === domain2) return 100;
85
+ if (!domain1 || !domain2) return 0;
86
+
87
+ const longer = domain1.length > domain2.length ? domain1 : domain2;
88
+ const shorter = domain1.length > domain2.length ? domain2 : domain1;
89
+
90
+ if (longer.length === 0) return 100;
91
+
92
+ const distance = levenshteinDistance(longer, shorter);
93
+ return Math.round(((longer.length - distance) / longer.length) * 100);
94
+ }
95
+
96
+ /**
97
+ * Calculates Levenshtein distance between two strings
98
+ * @param {string} str1 - First string
99
+ * @param {string} str2 - Second string
100
+ * @returns {number} Edit distance
101
+ */
102
+ function levenshteinDistance(str1, str2) {
103
+ const matrix = [];
104
+
105
+ for (let i = 0; i <= str2.length; i++) {
106
+ matrix[i] = [i];
107
+ }
108
+
109
+ for (let j = 0; j <= str1.length; j++) {
110
+ matrix[0][j] = j;
111
+ }
112
+
113
+ for (let i = 1; i <= str2.length; i++) {
114
+ for (let j = 1; j <= str1.length; j++) {
115
+ if (str2.charAt(i - 1) === str1.charAt(j - 1)) {
116
+ matrix[i][j] = matrix[i - 1][j - 1];
117
+ } else {
118
+ matrix[i][j] = Math.min(
119
+ matrix[i - 1][j - 1] + 1, // substitution
120
+ matrix[i][j - 1] + 1, // insertion
121
+ matrix[i - 1][j] + 1 // deletion
122
+ );
123
+ }
124
+ }
125
+ }
126
+
127
+ return matrix[str2.length][str1.length];
128
+ }
129
+
130
+ /**
131
+ * Checks if a domain should be ignored based on similarity to existing domains
132
+ * @param {string} newDomain - The domain to check
133
+ * @param {Set|Array} existingDomains - Collection of existing domains
134
+ * @param {object} options - Options for similarity checking
135
+ * @returns {object} Result object with shouldIgnore boolean and details
136
+ */
137
+ function shouldIgnoreSimilarDomain(newDomain, existingDomains, options = {}) {
138
+ const {
139
+ enabled = true,
140
+ threshold = 80, // Similarity threshold percentage (80% by default)
141
+ forceDebug = false
142
+ } = options;
143
+
144
+ if (!enabled) {
145
+ return { shouldIgnore: false, reason: 'ignore_similar disabled' };
146
+ }
147
+
148
+ if (!newDomain) {
149
+ return { shouldIgnore: false, reason: 'invalid domain' };
150
+ }
151
+
152
+ const newBaseDomain = getBaseDomainName(newDomain);
153
+ if (!newBaseDomain) {
154
+ return { shouldIgnore: false, reason: 'could not extract base domain' };
155
+ }
156
+
157
+ // Convert Set to Array if needed
158
+ const domainsArray = Array.isArray(existingDomains) ? existingDomains : Array.from(existingDomains);
159
+
160
+ for (const existingDomain of domainsArray) {
161
+ if (!existingDomain || existingDomain === newDomain) {
162
+ continue; // Skip empty or identical domains
163
+ }
164
+
165
+ const existingBaseDomain = getBaseDomainName(existingDomain);
166
+ if (!existingBaseDomain || existingBaseDomain === newBaseDomain) {
167
+ continue; // Skip if same base domain or invalid
168
+ }
169
+
170
+ const similarity = calculateSimilarity(newBaseDomain, existingBaseDomain);
171
+
172
+ if (similarity >= threshold) {
173
+ if (forceDebug) {
174
+ console.log(formatLogMessage('debug', `[ignore_similar] ${newDomain} (${newBaseDomain}) is ${similarity}% similar to ${existingDomain} (${existingBaseDomain}) - ignoring`));
175
+ }
176
+
177
+ return {
178
+ shouldIgnore: true,
179
+ reason: `${similarity}% similar to ${existingDomain}`,
180
+ similarity,
181
+ similarDomain: existingDomain,
182
+ newBaseDomain,
183
+ existingBaseDomain
184
+ };
185
+ }
186
+ }
187
+
188
+ return { shouldIgnore: false, reason: 'no similar domains found' };
189
+ }
190
+
191
+ /**
192
+ * Filters out similar domains from a collection
193
+ * @param {Array} domains - Array of domains to filter
194
+ * @param {object} options - Filtering options
195
+ * @returns {object} Result with filtered domains and removed domains
196
+ */
197
+ function filterSimilarDomains(domains, options = {}) {
198
+ const {
199
+ enabled = true,
200
+ threshold = 80,
201
+ forceDebug = false
202
+ } = options;
203
+
204
+ if (!enabled || !Array.isArray(domains)) {
205
+ return { filtered: domains, removed: [] };
206
+ }
207
+
208
+ const filtered = [];
209
+ const removed = [];
210
+
211
+ for (const domain of domains) {
212
+ const result = shouldIgnoreSimilarDomain(domain, filtered, { enabled, threshold, forceDebug });
213
+
214
+ if (result.shouldIgnore) {
215
+ removed.push({
216
+ domain,
217
+ reason: result.reason,
218
+ similarTo: result.similarDomain
219
+ });
220
+ } else {
221
+ filtered.push(domain);
222
+ }
223
+ }
224
+
225
+ if (forceDebug && removed.length > 0) {
226
+ console.log(formatLogMessage('debug', `[ignore_similar] Filtered out ${removed.length} similar domains`));
227
+ }
228
+
229
+ return { filtered, removed };
230
+ }
231
+
232
+ module.exports = {
233
+ getBaseDomainName,
234
+ calculateSimilarity,
235
+ shouldIgnoreSimilarDomain,
236
+ filterSimilarDomains
237
+ };