@fanboynz/network-scanner 2.0.66 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/npm-publish.yml +134 -10
- package/CHANGELOG.md +135 -0
- package/CLAUDE.md +18 -7
- package/README.md +12 -4
- package/lib/adblock-rust.js +23 -18
- package/lib/adblock.js +127 -82
- package/lib/browserexit.js +210 -200
- package/lib/browserhealth.js +84 -60
- package/lib/cdp.js +103 -81
- package/lib/clear_sitedata.js +61 -159
- package/lib/cloudflare.js +579 -409
- package/lib/colorize.js +29 -12
- package/lib/compare.js +16 -8
- package/lib/compress.js +2 -1
- package/lib/curl.js +287 -220
- package/lib/domain-cache.js +87 -40
- package/lib/dry-run.js +137 -194
- package/lib/fingerprint.js +20 -18
- package/lib/flowproxy.js +391 -188
- package/lib/ghost-cursor.js +8 -7
- package/lib/grep.js +248 -171
- package/lib/ignore_similar.js +70 -124
- package/lib/interaction.js +132 -235
- package/lib/nettools.js +309 -87
- package/lib/openvpn_vpn.js +12 -11
- package/lib/output.js +92 -59
- package/lib/post-processing.js +216 -162
- package/lib/redirect.js +46 -30
- package/lib/referrer.js +158 -165
- package/lib/searchstring.js +290 -381
- package/lib/smart-cache.js +141 -91
- package/lib/socks-relay.js +8 -7
- package/lib/spawn-async.js +137 -0
- package/lib/validate_rules.js +188 -176
- package/lib/wireguard_vpn.js +111 -117
- package/nwss.js +740 -156
- package/package.json +4 -4
package/lib/ignore_similar.js
CHANGED
|
@@ -1,69 +1,42 @@
|
|
|
1
|
-
const
|
|
1
|
+
const psl = require('psl');
|
|
2
|
+
const { formatLogMessage, messageColors } = require('./colorize');
|
|
3
|
+
const IGNORE_SIMILAR_TAG = messageColors.processing('[ignore_similar]');
|
|
2
4
|
|
|
3
|
-
//
|
|
5
|
+
// Strip protocol before handing to psl.parse, which expects a bare
|
|
6
|
+
// hostname per Public Suffix List semantics. psl handles 'www.' as a
|
|
7
|
+
// subdomain naturally (no need for a separate strip).
|
|
4
8
|
const REGEX_PROTOCOL = /^https?:\/\//;
|
|
5
|
-
const REGEX_WWW = /^www\./;
|
|
6
|
-
|
|
7
|
-
// Multi-part TLD lookup (module-level Set, O(1) instead of per-call array + O(n) .includes)
|
|
8
|
-
const MULTI_PART_TLDS = new Set([
|
|
9
|
-
'co.uk', 'co.nz', 'com.au', 'co.za', 'co.in', 'co.jp', 'co.kr',
|
|
10
|
-
'com.br', 'com.mx', 'com.ar', 'com.co', 'com.pe', 'com.ve',
|
|
11
|
-
'co.th', 'co.id', 'co.il', 'co.ke', 'co.tz', 'co.zw', 'co.bw',
|
|
12
|
-
'com.sg', 'com.my', 'com.hk', 'com.tw', 'com.ph', 'com.vn',
|
|
13
|
-
'co.cr', 'co.ug', 'co.zm', 'co.ao', 'co.mz', 'co.ls',
|
|
14
|
-
'org.uk', 'me.uk', 'ltd.uk', 'plc.uk', 'gov.uk', 'ac.uk', 'sch.uk',
|
|
15
|
-
'com.de', 'org.de', 'com.fr', 'org.fr', 'com.es', 'org.es',
|
|
16
|
-
'com.it', 'org.it', 'com.pl', 'org.pl', 'com.nl', 'org.nl',
|
|
17
|
-
'com.ru', 'org.ru', 'com.ua', 'org.ua', 'com.tr', 'org.tr',
|
|
18
|
-
'or.jp', 'ne.jp', 'ac.jp', 'ed.jp', 'go.jp',
|
|
19
|
-
'or.kr', 'ne.kr', 'com.cn', 'org.cn', 'net.cn', 'edu.cn', 'gov.cn',
|
|
20
|
-
'org.in', 'net.in', 'org.au', 'net.au', 'edu.au', 'gov.au',
|
|
21
|
-
'org.nz', 'net.nz', 'org.il', 'net.il', 'org.za', 'net.za',
|
|
22
|
-
'org.br', 'net.br', 'edu.br', 'gov.br', 'org.ar', 'org.mx',
|
|
23
|
-
'org.co', 'org.pe', 'com.cl', 'org.cl', 'com.uy', 'org.uy',
|
|
24
|
-
'org.ve', 'com.do', 'org.do', 'com.pr', 'org.pr',
|
|
25
|
-
'com.gt', 'org.gt', 'com.pa', 'org.pa', 'com.sv', 'org.sv',
|
|
26
|
-
'com.ni', 'org.ni', 'com.hn', 'org.hn', 'org.cr',
|
|
27
|
-
'com.eg', 'org.eg', 'or.ke'
|
|
28
|
-
]);
|
|
29
|
-
|
|
30
|
-
// 3-part TLD lookup
|
|
31
|
-
const THREE_PART_TLDS = new Set(['com.au.com', 'co.uk.com']);
|
|
32
9
|
|
|
33
10
|
/**
|
|
34
|
-
* Extracts the base domain name without TLD for similarity comparison
|
|
11
|
+
* Extracts the base domain name (sld) without TLD for similarity comparison.
|
|
12
|
+
*
|
|
13
|
+
* Uses the project's `psl` dependency — the canonical Public Suffix List
|
|
14
|
+
* parser, maintained against the live Mozilla list. Replaces a hand-curated
|
|
15
|
+
* ~80-entry MULTI_PART_TLDS Set that went stale as PSL changed, plus a
|
|
16
|
+
* THREE_PART_TLDS set that only listed two entries (both vanity domains
|
|
17
|
+
* 'com.au.com'/'co.uk.com', not real public suffixes). The rest of the
|
|
18
|
+
* codebase already uses psl (nwss.js, lib/post-processing.js, etc.) — this
|
|
19
|
+
* brings ignore_similar in line.
|
|
20
|
+
*
|
|
35
21
|
* @param {string} domain - The domain to process
|
|
36
|
-
* @returns {string} The base domain name
|
|
22
|
+
* @returns {string} The base domain name (sld), e.g. 'example' for
|
|
23
|
+
* 'www.example.co.uk'. Returns '' for invalid input; falls back to
|
|
24
|
+
* second-to-last token for hostnames psl can't parse (IPs, single-token
|
|
25
|
+
* hosts, unlisted TLDs).
|
|
37
26
|
*/
|
|
38
27
|
function getBaseDomainName(domain) {
|
|
39
28
|
if (!domain || typeof domain !== 'string') {
|
|
40
29
|
return '';
|
|
41
30
|
}
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
const parts = domain.split('.');
|
|
47
|
-
if (parts.length < 2) {
|
|
48
|
-
return domain;
|
|
31
|
+
const hostname = domain.replace(REGEX_PROTOCOL, '');
|
|
32
|
+
const parsed = psl.parse(hostname);
|
|
33
|
+
if (parsed && parsed.sld) {
|
|
34
|
+
return parsed.sld;
|
|
49
35
|
}
|
|
50
|
-
|
|
51
|
-
//
|
|
52
|
-
const
|
|
53
|
-
|
|
54
|
-
if (MULTI_PART_TLDS.has(lastTwoParts)) {
|
|
55
|
-
return parts.length >= 3 ? parts[parts.length - 3] : parts[0];
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
// Handle rare 3-part TLDs
|
|
59
|
-
if (parts.length >= 4) {
|
|
60
|
-
const lastThreeParts = parts[parts.length - 3] + '.' + lastTwoParts;
|
|
61
|
-
if (THREE_PART_TLDS.has(lastThreeParts)) {
|
|
62
|
-
return parts[parts.length - 4];
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
return parts[parts.length - 2];
|
|
36
|
+
// Fallback for IPs / single-token / unparseable: best-effort
|
|
37
|
+
// second-to-last token (the old behavior's default branch).
|
|
38
|
+
const parts = hostname.split('.');
|
|
39
|
+
return parts.length >= 2 ? parts[parts.length - 2] : hostname;
|
|
67
40
|
}
|
|
68
41
|
|
|
69
42
|
/**
|
|
@@ -75,12 +48,13 @@ function getBaseDomainName(domain) {
|
|
|
75
48
|
function calculateSimilarity(domain1, domain2) {
|
|
76
49
|
if (domain1 === domain2) return 100;
|
|
77
50
|
if (!domain1 || !domain2) return 0;
|
|
78
|
-
|
|
79
|
-
|
|
51
|
+
|
|
52
|
+
// Both inputs are non-empty different strings at this point — the
|
|
53
|
+
// `''` cases are handled by the two guards above. (Used to have an
|
|
54
|
+
// `if (longer.length === 0) return 100` here but it was unreachable.)
|
|
55
|
+
const longer = domain1.length > domain2.length ? domain1 : domain2;
|
|
80
56
|
const shorter = domain1.length > domain2.length ? domain2 : domain1;
|
|
81
|
-
|
|
82
|
-
if (longer.length === 0) return 100;
|
|
83
|
-
|
|
57
|
+
|
|
84
58
|
const distance = levenshteinDistance(longer, shorter);
|
|
85
59
|
return Math.round(((longer.length - distance) / longer.length) * 100);
|
|
86
60
|
}
|
|
@@ -93,26 +67,29 @@ function calculateSimilarity(domain1, domain2) {
|
|
|
93
67
|
* @returns {number} Edit distance
|
|
94
68
|
*/
|
|
95
69
|
function levenshteinDistance(str1, str2) {
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
//
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
70
|
+
// Ensure str1 is the longer one so the inner-loop dimension (n)
|
|
71
|
+
// stays small. Inline swap instead of recursive re-entry — the old
|
|
72
|
+
// `if (m < n) return levenshteinDistance(str2, str1)` paid a stack
|
|
73
|
+
// frame + re-validation for what's really just a variable rename.
|
|
74
|
+
let a = str1, b = str2;
|
|
75
|
+
if (a.length < b.length) { const t = a; a = b; b = t; }
|
|
76
|
+
const m = a.length;
|
|
77
|
+
const n = b.length;
|
|
78
|
+
|
|
79
|
+
// Two rows instead of full matrix — O(n) space instead of O(m*n).
|
|
103
80
|
let prevRow = new Array(n + 1);
|
|
104
81
|
let currRow = new Array(n + 1);
|
|
105
|
-
|
|
82
|
+
|
|
106
83
|
for (let j = 0; j <= n; j++) {
|
|
107
84
|
prevRow[j] = j;
|
|
108
85
|
}
|
|
109
|
-
|
|
86
|
+
|
|
110
87
|
for (let i = 1; i <= m; i++) {
|
|
111
88
|
currRow[0] = i;
|
|
112
|
-
const ch1 =
|
|
113
|
-
|
|
89
|
+
const ch1 = a[i - 1];
|
|
90
|
+
|
|
114
91
|
for (let j = 1; j <= n; j++) {
|
|
115
|
-
if (ch1 ===
|
|
92
|
+
if (ch1 === b[j - 1]) {
|
|
116
93
|
currRow[j] = prevRow[j - 1];
|
|
117
94
|
} else {
|
|
118
95
|
const sub = prevRow[j - 1];
|
|
@@ -121,13 +98,13 @@ function levenshteinDistance(str1, str2) {
|
|
|
121
98
|
currRow[j] = (sub < ins ? (sub < del ? sub : del) : (ins < del ? ins : del)) + 1;
|
|
122
99
|
}
|
|
123
100
|
}
|
|
124
|
-
|
|
101
|
+
|
|
125
102
|
// Swap rows
|
|
126
103
|
const temp = prevRow;
|
|
127
104
|
prevRow = currRow;
|
|
128
105
|
currRow = temp;
|
|
129
106
|
}
|
|
130
|
-
|
|
107
|
+
|
|
131
108
|
return prevRow[n];
|
|
132
109
|
}
|
|
133
110
|
|
|
@@ -165,18 +142,28 @@ function shouldIgnoreSimilarDomain(newDomain, existingDomains, options = {}) {
|
|
|
165
142
|
if (!existingDomain || existingDomain === newDomain) {
|
|
166
143
|
continue;
|
|
167
144
|
}
|
|
168
|
-
|
|
145
|
+
|
|
169
146
|
const existingBaseDomain = getBaseDomainName(existingDomain);
|
|
170
|
-
if (!existingBaseDomain
|
|
147
|
+
if (!existingBaseDomain) {
|
|
171
148
|
continue;
|
|
172
149
|
}
|
|
173
|
-
|
|
150
|
+
|
|
151
|
+
// BEHAVIOR NOTE: identical base names (e.g. google.com vs google.net)
|
|
152
|
+
// now count as 100% similar — calculateSimilarity returns 100 for
|
|
153
|
+
// matching strings, which is above any reasonable threshold. The old
|
|
154
|
+
// `existingBaseDomain === newBaseDomain` skip silently exempted
|
|
155
|
+
// same-base-different-TLD pairs, defeating the dedup purpose for the
|
|
156
|
+
// most common variant case (brand registrations across multiple TLDs).
|
|
157
|
+
// Both call sites in nwss.js (matched-dedup at ~2833, ignoreDomains
|
|
158
|
+
// expansion at ~2849) want this stricter behavior. Set a lower
|
|
159
|
+
// threshold or disable ignore_similar entirely if you actually want
|
|
160
|
+
// to keep brand variants.
|
|
174
161
|
const similarity = calculateSimilarity(newBaseDomain, existingBaseDomain);
|
|
175
162
|
|
|
176
163
|
if (similarity >= threshold) {
|
|
177
164
|
if (forceDebug) {
|
|
178
165
|
console.log(formatLogMessage('debug',
|
|
179
|
-
|
|
166
|
+
`${IGNORE_SIMILAR_TAG} ${newDomain} (${newBaseDomain}) is ${similarity}% similar to ${existingDomain} (${existingBaseDomain}) - ignoring`
|
|
180
167
|
));
|
|
181
168
|
}
|
|
182
169
|
|
|
@@ -194,52 +181,11 @@ function shouldIgnoreSimilarDomain(newDomain, existingDomains, options = {}) {
|
|
|
194
181
|
return { shouldIgnore: false, reason: 'no similar domains found' };
|
|
195
182
|
}
|
|
196
183
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
* @returns {object} Result with filtered domains and removed domains
|
|
202
|
-
*/
|
|
203
|
-
function filterSimilarDomains(domains, options = {}) {
|
|
204
|
-
const {
|
|
205
|
-
enabled = true,
|
|
206
|
-
threshold = 80,
|
|
207
|
-
forceDebug = false
|
|
208
|
-
} = options;
|
|
209
|
-
|
|
210
|
-
if (!enabled || !Array.isArray(domains)) {
|
|
211
|
-
return { filtered: domains, removed: [] };
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
const filtered = [];
|
|
215
|
-
const removed = [];
|
|
216
|
-
|
|
217
|
-
for (const domain of domains) {
|
|
218
|
-
const result = shouldIgnoreSimilarDomain(domain, filtered, { enabled, threshold, forceDebug });
|
|
219
|
-
|
|
220
|
-
if (result.shouldIgnore) {
|
|
221
|
-
removed.push({
|
|
222
|
-
domain,
|
|
223
|
-
reason: result.reason,
|
|
224
|
-
similarTo: result.similarDomain
|
|
225
|
-
});
|
|
226
|
-
} else {
|
|
227
|
-
filtered.push(domain);
|
|
228
|
-
}
|
|
229
|
-
}
|
|
230
|
-
|
|
231
|
-
if (forceDebug && removed.length > 0) {
|
|
232
|
-
console.log(formatLogMessage('debug',
|
|
233
|
-
`[ignore_similar] Filtered out ${removed.length} similar domains`
|
|
234
|
-
));
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
return { filtered, removed };
|
|
238
|
-
}
|
|
239
|
-
|
|
184
|
+
// Public surface used by nwss.js. getBaseDomainName + (deleted)
|
|
185
|
+
// filterSimilarDomains had zero external callers — getBaseDomainName
|
|
186
|
+
// stays as an internal helper, filterSimilarDomains is gone entirely
|
|
187
|
+
// (no internal callers either).
|
|
240
188
|
module.exports = {
|
|
241
|
-
getBaseDomainName,
|
|
242
189
|
calculateSimilarity,
|
|
243
|
-
shouldIgnoreSimilarDomain
|
|
244
|
-
filterSimilarDomains
|
|
190
|
+
shouldIgnoreSimilarDomain
|
|
245
191
|
};
|