@fanboynz/network-scanner 2.0.66 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/npm-publish.yml +134 -10
- package/CHANGELOG.md +135 -0
- package/CLAUDE.md +18 -7
- package/README.md +12 -4
- package/lib/adblock-rust.js +23 -18
- package/lib/adblock.js +127 -82
- package/lib/browserexit.js +210 -200
- package/lib/browserhealth.js +84 -60
- package/lib/cdp.js +103 -81
- package/lib/clear_sitedata.js +61 -159
- package/lib/cloudflare.js +579 -409
- package/lib/colorize.js +29 -12
- package/lib/compare.js +16 -8
- package/lib/compress.js +2 -1
- package/lib/curl.js +287 -220
- package/lib/domain-cache.js +87 -40
- package/lib/dry-run.js +137 -194
- package/lib/fingerprint.js +20 -18
- package/lib/flowproxy.js +391 -188
- package/lib/ghost-cursor.js +8 -7
- package/lib/grep.js +248 -171
- package/lib/ignore_similar.js +70 -124
- package/lib/interaction.js +132 -235
- package/lib/nettools.js +309 -87
- package/lib/openvpn_vpn.js +12 -11
- package/lib/output.js +92 -59
- package/lib/post-processing.js +216 -162
- package/lib/redirect.js +46 -30
- package/lib/referrer.js +158 -165
- package/lib/searchstring.js +290 -381
- package/lib/smart-cache.js +141 -91
- package/lib/socks-relay.js +8 -7
- package/lib/spawn-async.js +137 -0
- package/lib/validate_rules.js +188 -176
- package/lib/wireguard_vpn.js +111 -117
- package/nwss.js +740 -156
- package/package.json +4 -4
package/lib/nettools.js
CHANGED
|
@@ -3,7 +3,14 @@
|
|
|
3
3
|
* Provides domain analysis capabilities with proper timeout handling, custom whois servers, and retry logic
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
|
-
|
|
6
|
+
// execFile (no shell) for whois/dig invocations -- arguments are passed
|
|
7
|
+
// directly to the executable as an argv array, so shell metacharacters in
|
|
8
|
+
// config-supplied hostnames or server names CANNOT execute commands. The
|
|
9
|
+
// prior `exec(string)` approach interpolated tainted values into a shell
|
|
10
|
+
// string protected only by double-quoting, which doesn't stop $()/backticks.
|
|
11
|
+
// execSync is retained ONLY for the version-probe helpers below, where
|
|
12
|
+
// commands are constant string literals with no user-controlled inputs.
|
|
13
|
+
const { execFile, execSync } = require('child_process');
|
|
7
14
|
const fs = require('fs');
|
|
8
15
|
const path = require('path');
|
|
9
16
|
const { formatLogMessage, messageColors } = require('./colorize');
|
|
@@ -17,19 +24,68 @@ let whoisServerCycleIndex = 0;
|
|
|
17
24
|
// DNS records don't change based on what terms you're searching for,
|
|
18
25
|
// so we cache the raw dig output and let each handler check its own terms against it
|
|
19
26
|
const globalDigResultCache = new Map();
|
|
20
|
-
const GLOBAL_DIG_CACHE_TTL =
|
|
21
|
-
const GLOBAL_DIG_CACHE_MAX =
|
|
27
|
+
const GLOBAL_DIG_CACHE_TTL = 72000000; // 20 hours (persisted to disk between runs)
|
|
28
|
+
const GLOBAL_DIG_CACHE_MAX = 2000;
|
|
22
29
|
|
|
23
30
|
// Global whois result cache — shared across ALL handler instances and processUrl calls
|
|
24
31
|
// Whois data is per root domain and doesn't change based on search terms
|
|
25
32
|
const globalWhoisResultCache = new Map();
|
|
26
|
-
const GLOBAL_WHOIS_CACHE_TTL =
|
|
27
|
-
const GLOBAL_WHOIS_CACHE_MAX =
|
|
33
|
+
const GLOBAL_WHOIS_CACHE_TTL = 72000000; // 20 hours (persisted to disk between runs)
|
|
34
|
+
const GLOBAL_WHOIS_CACHE_MAX = 2000;
|
|
28
35
|
|
|
29
36
|
// Persistent disk cache file paths
|
|
30
37
|
const DIG_CACHE_FILE = path.join(__dirname, '..', '.digcache');
|
|
31
38
|
const WHOIS_CACHE_FILE = path.join(__dirname, '..', '.whoiscache');
|
|
32
39
|
|
|
40
|
+
// Index of hostnames known to resolve, populated as a side effect of
|
|
41
|
+
// positive dig/whois cache writes AND cache hits. nwss.js's DNS pre-check
|
|
42
|
+
// reads this via domainKnownToResolve() so it can skip its own resolve4
|
|
43
|
+
// call on hosts that dig or whois have already proven live within the
|
|
44
|
+
// 20-hour TTL window. Populating on cache HITS (not just writes) handles
|
|
45
|
+
// the --dns-cache disk-load case where entries arrive without going
|
|
46
|
+
// through the in-process write path. Stale entries -- hostname in Set but
|
|
47
|
+
// the dig/whois entry has since been evicted -- are harmless: worst case
|
|
48
|
+
// is one wasted pre-check next time the hostname comes through.
|
|
49
|
+
const knownResolvedHostnames = new Set();
|
|
50
|
+
const MAX_RESOLVED_HOSTNAMES = 5000;
|
|
51
|
+
|
|
52
|
+
function markResolved(hostname) {
|
|
53
|
+
if (!hostname) return;
|
|
54
|
+
if (knownResolvedHostnames.size >= MAX_RESOLVED_HOSTNAMES) {
|
|
55
|
+
// FIFO eviction -- Set iteration order is insertion order.
|
|
56
|
+
knownResolvedHostnames.delete(knownResolvedHostnames.values().next().value);
|
|
57
|
+
}
|
|
58
|
+
knownResolvedHostnames.add(hostname);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Returns true if dig or whois has produced a verifiable-positive result
|
|
63
|
+
* for this hostname during the current process lifetime. nwss.js's DNS
|
|
64
|
+
* pre-check uses this to skip resolve4 calls on hosts we already know
|
|
65
|
+
* are live. False does NOT mean "unresolvable" -- it means "we have no
|
|
66
|
+
* recent evidence either way; do the pre-check".
|
|
67
|
+
*/
|
|
68
|
+
function domainKnownToResolve(hostname) {
|
|
69
|
+
return knownResolvedHostnames.has(hostname);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Dig responses with success:true can still represent NXDOMAIN -- the dig
|
|
73
|
+
// COMMAND succeeded but the DNS RESPONSE is "no such name". The output
|
|
74
|
+
// string is the only reliable signal. NOERROR + non-zero answer count =
|
|
75
|
+
// the hostname genuinely resolved.
|
|
76
|
+
function digOutputIndicatesResolution(output) {
|
|
77
|
+
if (!output) return false;
|
|
78
|
+
if (!output.includes('status: NOERROR')) return false;
|
|
79
|
+
// ANSWER: 0 means NOERROR but no records of the requested type -- the
|
|
80
|
+
// hostname exists at this label but doesn't have THIS record type.
|
|
81
|
+
// For our purposes (proving the name is live) that's still useful, but
|
|
82
|
+
// strictly "domain has nameservers and returned authoritative empty"
|
|
83
|
+
// is weaker than "domain returned an actual A/AAAA". Conservative
|
|
84
|
+
// choice: require non-zero answer count.
|
|
85
|
+
if (/ANSWER:\s*0\b/.test(output)) return false;
|
|
86
|
+
return true;
|
|
87
|
+
}
|
|
88
|
+
|
|
33
89
|
/**
|
|
34
90
|
* Load persistent cache from disk into in-memory Map
|
|
35
91
|
* Skips expired entries and enforces max size
|
|
@@ -39,6 +95,18 @@ const WHOIS_CACHE_FILE = path.join(__dirname, '..', '.whoiscache');
|
|
|
39
95
|
* @param {number} maxSize - Maximum cache entries
|
|
40
96
|
*/
|
|
41
97
|
function loadDiskCache(filePath, cache, ttl, maxSize) {
|
|
98
|
+
// Also clean up any stray .tmp files from a prior interrupted save.
|
|
99
|
+
// The atomic-write path (saveDiskCache below) writes to `${filePath}.tmp`
|
|
100
|
+
// then renames; a process killed mid-write leaves the .tmp behind. The
|
|
101
|
+
// real file remains intact (rename is atomic), so we just sweep the
|
|
102
|
+
// stray on load.
|
|
103
|
+
try {
|
|
104
|
+
const tmpPath = filePath + '.tmp';
|
|
105
|
+
if (fs.existsSync(tmpPath)) {
|
|
106
|
+
try { fs.unlinkSync(tmpPath); } catch {}
|
|
107
|
+
}
|
|
108
|
+
} catch {}
|
|
109
|
+
|
|
42
110
|
try {
|
|
43
111
|
if (!fs.existsSync(filePath)) return;
|
|
44
112
|
const data = JSON.parse(fs.readFileSync(filePath, 'utf-8'));
|
|
@@ -51,8 +119,13 @@ function loadDiskCache(filePath, cache, ttl, maxSize) {
|
|
|
51
119
|
loaded++;
|
|
52
120
|
}
|
|
53
121
|
}
|
|
54
|
-
} catch {
|
|
55
|
-
// Corrupt or unreadable cache file — delete and start fresh
|
|
122
|
+
} catch (err) {
|
|
123
|
+
// Corrupt or unreadable cache file — delete and start fresh.
|
|
124
|
+
// Surface the event so the user knows they lost their warm cache;
|
|
125
|
+
// previously this was a silent reset, which made "why did my dns
|
|
126
|
+
// cache stop helping?" hard to diagnose.
|
|
127
|
+
// eslint-disable-next-line no-console
|
|
128
|
+
console.warn(`${messageColors.highlight('[dns-cache]')} ${path.basename(filePath)} was unreadable (${err.message}); starting fresh`);
|
|
56
129
|
try { fs.unlinkSync(filePath); } catch {}
|
|
57
130
|
}
|
|
58
131
|
}
|
|
@@ -78,11 +151,12 @@ function saveDiskCache(filePath, cache, ttl, maxSize) {
|
|
|
78
151
|
}
|
|
79
152
|
}
|
|
80
153
|
|
|
81
|
-
//
|
|
82
|
-
// saveDiskCache runs on the synchronous 'exit' handler when
|
|
83
|
-
// is set, so any work here directly delays scan exit.
|
|
84
|
-
//
|
|
85
|
-
// intended for human reading.
|
|
154
|
+
// Build the final payload (with trimming if over cap). Compact JSON
|
|
155
|
+
// -- saveDiskCache runs on the synchronous 'exit' handler when
|
|
156
|
+
// --dns-cache is set, so any work here directly delays scan exit.
|
|
157
|
+
// Several times faster than pretty-print on multi-megabyte caches
|
|
158
|
+
// and the file is not intended for human reading.
|
|
159
|
+
let payload;
|
|
86
160
|
if (count > maxSize) {
|
|
87
161
|
const sorted = Object.entries(entries)
|
|
88
162
|
.sort((a, b) => b[1].timestamp - a[1].timestamp)
|
|
@@ -91,12 +165,28 @@ function saveDiskCache(filePath, cache, ttl, maxSize) {
|
|
|
91
165
|
for (const [key, entry] of sorted) {
|
|
92
166
|
trimmed[key] = entry;
|
|
93
167
|
}
|
|
94
|
-
|
|
168
|
+
payload = JSON.stringify(trimmed);
|
|
95
169
|
} else {
|
|
96
|
-
|
|
170
|
+
payload = JSON.stringify(entries);
|
|
97
171
|
}
|
|
172
|
+
|
|
173
|
+
// Atomic write: writeFileSync to a sibling .tmp path, then rename.
|
|
174
|
+
// If the process is killed mid-write (SIGKILL, OOM, power loss) the
|
|
175
|
+
// .tmp is left as garbage but the real filePath is either complete
|
|
176
|
+
// or absent -- never half-written. loadDiskCache sweeps stray .tmp
|
|
177
|
+
// files on next startup.
|
|
178
|
+
// Matches the pattern already used in lib/adblock-rust.js per the
|
|
179
|
+
// CLAUDE.md convention. We deliberately omit the pid suffix used
|
|
180
|
+
// there because saveDiskCache only ever runs from the single 'exit'
|
|
181
|
+
// handler -- no concurrent-process race to disambiguate.
|
|
182
|
+
const tmpPath = filePath + '.tmp';
|
|
183
|
+
fs.writeFileSync(tmpPath, payload);
|
|
184
|
+
fs.renameSync(tmpPath, filePath);
|
|
98
185
|
} catch {
|
|
99
|
-
// Disk write failed
|
|
186
|
+
// Disk write failed -- non-fatal, in-memory cache still works.
|
|
187
|
+
// Best-effort cleanup of any stray tmp file from this attempt so
|
|
188
|
+
// it doesn't accumulate over repeated failures.
|
|
189
|
+
try { fs.unlinkSync(filePath + '.tmp'); } catch {}
|
|
100
190
|
}
|
|
101
191
|
}
|
|
102
192
|
|
|
@@ -104,9 +194,57 @@ function saveDiskCache(filePath, cache, ttl, maxSize) {
|
|
|
104
194
|
const pendingDigLookups = new Map();
|
|
105
195
|
const pendingWhoisLookups = new Map();
|
|
106
196
|
|
|
107
|
-
|
|
197
|
+
/**
|
|
198
|
+
* Enforce a hard size cap on the dig/whois global caches. Evicts expired
|
|
199
|
+
* entries first; if the cache is still over cap after that (i.e. every
|
|
200
|
+
* remaining entry is within its TTL but there are simply too many),
|
|
201
|
+
* deletes the oldest entries by timestamp until size <= max. Without the
|
|
202
|
+
* second pass the caches could grow unbounded on scans of many unique
|
|
203
|
+
* hostnames whose entries hadn't expired yet.
|
|
204
|
+
*
|
|
205
|
+
* @param {Map} cache - cache Map to prune
|
|
206
|
+
* @param {number} maxSize - desired hard cap
|
|
207
|
+
* @param {number} ttl - TTL in ms; entries older than this are evicted first
|
|
208
|
+
* @returns {{expired: number, overflow: number}} eviction counts
|
|
209
|
+
*/
|
|
210
|
+
function enforceCacheCap(cache, maxSize, ttl) {
|
|
211
|
+
if (cache.size <= maxSize) return { expired: 0, overflow: 0 };
|
|
212
|
+
const now = Date.now();
|
|
213
|
+
let expired = 0;
|
|
214
|
+
for (const [key, entry] of cache.entries()) {
|
|
215
|
+
if (now - entry.timestamp > ttl) {
|
|
216
|
+
cache.delete(key);
|
|
217
|
+
expired++;
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
let overflow = 0;
|
|
221
|
+
if (cache.size > maxSize) {
|
|
222
|
+
// Snapshot timestamps and sort ascending, evict the oldest few.
|
|
223
|
+
const byAge = Array.from(cache.entries())
|
|
224
|
+
.sort((a, b) => a[1].timestamp - b[1].timestamp);
|
|
225
|
+
const toDrop = cache.size - maxSize;
|
|
226
|
+
for (let i = 0; i < toDrop; i++) {
|
|
227
|
+
cache.delete(byAge[i][0]);
|
|
228
|
+
overflow++;
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
return { expired, overflow };
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// DNS cache statistics. freshDig / freshWhois are sample lists for
|
|
235
|
+
// end-of-scan visibility; capped at MAX_FRESH_LIST entries (FIFO) so
|
|
236
|
+
// they can't grow unbounded on scans with thousands of unique fresh
|
|
237
|
+
// lookups. digMisses/whoisMisses retain the full count, so callers
|
|
238
|
+
// who want totals can read those; freshDig/freshWhois are intended as
|
|
239
|
+
// "show me which domains" diagnostic samples.
|
|
240
|
+
const MAX_FRESH_LIST = 1000;
|
|
108
241
|
const dnsCacheStats = { digHits: 0, digMisses: 0, whoisHits: 0, whoisMisses: 0, freshDig: [], freshWhois: [] };
|
|
109
242
|
|
|
243
|
+
function pushFreshSample(arr, item) {
|
|
244
|
+
if (arr.length >= MAX_FRESH_LIST) arr.shift();
|
|
245
|
+
arr.push(item);
|
|
246
|
+
}
|
|
247
|
+
|
|
110
248
|
/**
|
|
111
249
|
* Get DNS cache statistics for end-of-scan reporting
|
|
112
250
|
* @returns {Object} Cache hit/miss counts and fresh domain lists
|
|
@@ -119,14 +257,46 @@ function getDnsCacheStats() {
|
|
|
119
257
|
let diskCacheEnabled = false;
|
|
120
258
|
|
|
121
259
|
/**
|
|
122
|
-
* Enable persistent disk caching for dig/whois results
|
|
123
|
-
* Call this when --dns-cache flag is set
|
|
260
|
+
* Enable persistent disk caching for dig/whois results.
|
|
261
|
+
* Call this when --dns-cache flag is set. Idempotent — repeated calls
|
|
262
|
+
* are no-ops, which prevents double-loading the cache files and double-
|
|
263
|
+
* registering the 'exit' handler that flushes them on shutdown.
|
|
124
264
|
*/
|
|
125
265
|
function enableDiskCache() {
|
|
266
|
+
if (diskCacheEnabled) return;
|
|
126
267
|
diskCacheEnabled = true;
|
|
127
268
|
loadDiskCache(DIG_CACHE_FILE, globalDigResultCache, GLOBAL_DIG_CACHE_TTL, GLOBAL_DIG_CACHE_MAX);
|
|
128
269
|
loadDiskCache(WHOIS_CACHE_FILE, globalWhoisResultCache, GLOBAL_WHOIS_CACHE_TTL, GLOBAL_WHOIS_CACHE_MAX);
|
|
129
270
|
|
|
271
|
+
// Warm knownResolvedHostnames from disk-loaded entries so the very
|
|
272
|
+
// first URL per cached domain also skips the c-ares pre-check (instead
|
|
273
|
+
// of waiting for the cache-hit handler to fire later in the URL's
|
|
274
|
+
// pipeline). Entries written by older versions of this module lack the
|
|
275
|
+
// `hostname` field -- they're skipped here and fall back to lazy
|
|
276
|
+
// on-hit population. Same positive-resolution gates apply as the live
|
|
277
|
+
// write/hit paths (dig: NOERROR + non-zero answers; whois: success).
|
|
278
|
+
let digWarm = 0;
|
|
279
|
+
let whoisWarm = 0;
|
|
280
|
+
for (const entry of globalDigResultCache.values()) {
|
|
281
|
+
if (entry.hostname && entry.result && entry.result.success &&
|
|
282
|
+
digOutputIndicatesResolution(entry.result.output)) {
|
|
283
|
+
markResolved(entry.hostname);
|
|
284
|
+
digWarm++;
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
for (const entry of globalWhoisResultCache.values()) {
|
|
288
|
+
if (entry.hostname && entry.result && entry.result.success) {
|
|
289
|
+
markResolved(entry.hostname);
|
|
290
|
+
whoisWarm++;
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
// Debug log only if anything was actually warmed; silent on fresh
|
|
294
|
+
// installs / empty disk caches.
|
|
295
|
+
if (digWarm > 0 || whoisWarm > 0) {
|
|
296
|
+
// eslint-disable-next-line no-console
|
|
297
|
+
console.log(`${messageColors.highlight('[dns-cache]')} Warmed resolved-hostnames index from disk: ${digWarm} dig + ${whoisWarm} whois entries`);
|
|
298
|
+
}
|
|
299
|
+
|
|
130
300
|
// Save caches to disk once on process exit instead of per-lookup. The
|
|
131
301
|
// 'exit' handler fires synchronously regardless of how the process exits
|
|
132
302
|
// (normal completion, signal, uncaught exception), so a separate signal
|
|
@@ -203,25 +373,36 @@ function validateDigAvailability() {
|
|
|
203
373
|
}
|
|
204
374
|
|
|
205
375
|
/**
|
|
206
|
-
*
|
|
207
|
-
*
|
|
376
|
+
* Spawn a process with execFile (no shell) and a hard timeout. Arguments
|
|
377
|
+
* are passed directly as argv -- shell metacharacters in any element
|
|
378
|
+
* cannot execute commands. Replaces the prior exec(string)-based helper
|
|
379
|
+
* whose double-quote-only protection failed against $()/backticks.
|
|
380
|
+
*
|
|
381
|
+
* @param {string} cmd - Executable name or path
|
|
382
|
+
* @param {string[]} args - Argument vector (each element a separate arg)
|
|
208
383
|
* @param {number} timeout - Timeout in milliseconds
|
|
209
|
-
* @returns {Promise<
|
|
384
|
+
* @returns {Promise<{stdout:string, stderr:string}>} -- rejects on timeout/error
|
|
210
385
|
*/
|
|
211
|
-
function
|
|
386
|
+
function execFileWithTimeout(cmd, args, timeout = 10000) {
|
|
212
387
|
return new Promise((resolve, reject) => {
|
|
213
|
-
|
|
388
|
+
// Hoisted before the callbacks that reference it. Previously `const
|
|
389
|
+
// timer = setTimeout(...)` was declared after the exec callback /
|
|
390
|
+
// 'error' listener that both did `if (timer) clearTimeout(timer)` —
|
|
391
|
+
// worked in practice because exec defers callbacks via nextTick, but
|
|
392
|
+
// structurally fragile (a synchronous exec failure would TDZ-throw).
|
|
393
|
+
let timer = null;
|
|
394
|
+
|
|
395
|
+
const child = execFile(cmd, args, { encoding: 'utf8' }, (error, stdout, stderr) => {
|
|
214
396
|
if (timer) clearTimeout(timer);
|
|
215
|
-
|
|
397
|
+
|
|
216
398
|
if (error) {
|
|
217
399
|
reject(error);
|
|
218
400
|
} else {
|
|
219
401
|
resolve({ stdout, stderr });
|
|
220
402
|
}
|
|
221
403
|
});
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
const timer = setTimeout(() => {
|
|
404
|
+
|
|
405
|
+
timer = setTimeout(() => {
|
|
225
406
|
child.kill('SIGTERM');
|
|
226
407
|
|
|
227
408
|
// Force kill after 2 seconds if SIGTERM doesn't work. unref() so this
|
|
@@ -235,9 +416,9 @@ function execWithTimeout(command, timeout = 10000) {
|
|
|
235
416
|
}, 2000);
|
|
236
417
|
killTimer.unref();
|
|
237
418
|
|
|
238
|
-
reject(new Error(`Command timeout after ${timeout}ms: ${
|
|
419
|
+
reject(new Error(`Command timeout after ${timeout}ms: ${cmd} ${args.join(' ')}`));
|
|
239
420
|
}, timeout);
|
|
240
|
-
|
|
421
|
+
|
|
241
422
|
// Handle child process errors
|
|
242
423
|
child.on('error', (err) => {
|
|
243
424
|
if (timer) clearTimeout(timer);
|
|
@@ -344,19 +525,24 @@ async function whoisLookup(domain = '', timeout = 10000, whoisServer = '', debug
|
|
|
344
525
|
try {
|
|
345
526
|
// Clean domain (remove protocol, path, etc)
|
|
346
527
|
cleanDomain = domain.replace(/^https?:\/\//, '').replace(/\/.*$/, '').replace(/:\d+$/, '');
|
|
347
|
-
|
|
528
|
+
|
|
348
529
|
// Select whois server if provided
|
|
349
530
|
selectedServer = selectWhoisServer(whoisServer);
|
|
350
|
-
|
|
351
|
-
// Build whois
|
|
531
|
+
|
|
532
|
+
// Build whois argv. Pass each token as a separate argv element --
|
|
533
|
+
// execFile does NOT spawn a shell, so neither cleanDomain nor
|
|
534
|
+
// selectedServer can inject commands no matter what they contain.
|
|
535
|
+
// The leading `--` is preserved so dashes in `cleanDomain` don't get
|
|
536
|
+
// re-interpreted as flags by the whois binary itself.
|
|
537
|
+
let whoisArgs;
|
|
352
538
|
if (selectedServer) {
|
|
353
|
-
|
|
354
|
-
whoisCommand = `whois -h "${selectedServer}" -- "${cleanDomain}"`;
|
|
539
|
+
whoisArgs = ['-h', selectedServer, '--', cleanDomain];
|
|
355
540
|
} else {
|
|
356
|
-
|
|
357
|
-
whoisCommand = `whois -- "${cleanDomain}"`;
|
|
541
|
+
whoisArgs = ['--', cleanDomain];
|
|
358
542
|
}
|
|
359
|
-
|
|
543
|
+
// Kept as a display string for debug logging only -- never executed.
|
|
544
|
+
whoisCommand = `whois ${whoisArgs.join(' ')}`;
|
|
545
|
+
|
|
360
546
|
if (debugMode) {
|
|
361
547
|
if (logFunc) {
|
|
362
548
|
logFunc(`${messageColors.highlight('[whois]')} Starting lookup for ${cleanDomain} (timeout: ${timeout}ms)`);
|
|
@@ -366,8 +552,8 @@ async function whoisLookup(domain = '', timeout = 10000, whoisServer = '', debug
|
|
|
366
552
|
console.log(formatLogMessage('debug', `${messageColors.highlight('[whois]')} Command: ${whoisCommand}`));
|
|
367
553
|
}
|
|
368
554
|
}
|
|
369
|
-
|
|
370
|
-
const { stdout, stderr } = await
|
|
555
|
+
|
|
556
|
+
const { stdout, stderr } = await execFileWithTimeout('whois', whoisArgs, timeout);
|
|
371
557
|
const duration = Date.now() - startTime;
|
|
372
558
|
|
|
373
559
|
if (stderr && stderr.trim()) {
|
|
@@ -772,9 +958,11 @@ async function digLookup(domain = '', recordType = 'A', timeout = 5000) {
|
|
|
772
958
|
try {
|
|
773
959
|
// Clean domain
|
|
774
960
|
const cleanDomain = domain.replace(/^https?:\/\//, '').replace(/\/.*$/, '').replace(/:\d+$/, '');
|
|
775
|
-
|
|
776
|
-
// Single dig command — full output contains everything including
|
|
777
|
-
|
|
961
|
+
|
|
962
|
+
// Single dig command — full output contains everything including short
|
|
963
|
+
// answers. execFile (no shell) so cleanDomain / recordType can contain
|
|
964
|
+
// any chars without injection risk.
|
|
965
|
+
const { stdout: fullOutput, stderr } = await execFileWithTimeout('dig', [cleanDomain, recordType], timeout);
|
|
778
966
|
|
|
779
967
|
if (stderr && stderr.trim()) {
|
|
780
968
|
return {
|
|
@@ -928,7 +1116,14 @@ function createNetToolsHandler(config) {
|
|
|
928
1116
|
dumpUrls,
|
|
929
1117
|
matchedUrlsLogFile,
|
|
930
1118
|
forceDebug,
|
|
931
|
-
fs
|
|
1119
|
+
fs,
|
|
1120
|
+
// ignoreDomains guard: callers pass the live ignoreDomains list + matcher
|
|
1121
|
+
// so a domain that became ignored AFTER the request fired (e.g. via
|
|
1122
|
+
// ignoreDomainsByUrl on a sibling request, or _dynamicallyIgnoredDomains)
|
|
1123
|
+
// doesn't slip into matchedDomains during the async whois/dig window.
|
|
1124
|
+
// Both default to no-op so older callers without the kwargs still work.
|
|
1125
|
+
ignoreDomains = null,
|
|
1126
|
+
matchesIgnoreDomain = null
|
|
932
1127
|
} = config;
|
|
933
1128
|
|
|
934
1129
|
const hasWhois = whoisTerms && Array.isArray(whoisTerms) && whoisTerms.length > 0;
|
|
@@ -1123,6 +1318,11 @@ function createNetToolsHandler(config) {
|
|
|
1123
1318
|
originalTimestamp: cachedEntry.timestamp
|
|
1124
1319
|
});
|
|
1125
1320
|
dnsCacheStats.whoisHits++;
|
|
1321
|
+
// Warm the resolved-hostnames index from disk-loaded entries.
|
|
1322
|
+
// Cached whois entries are pre-filtered for network errors at
|
|
1323
|
+
// write time, so every cached entry implies the domain has a
|
|
1324
|
+
// registrar record -- strong resolution signal.
|
|
1325
|
+
markResolved(whoisRootDomain);
|
|
1126
1326
|
} else {
|
|
1127
1327
|
// Cache expired, remove it
|
|
1128
1328
|
globalWhoisResultCache.delete(whoisCacheKey);
|
|
@@ -1169,12 +1369,18 @@ function createNetToolsHandler(config) {
|
|
|
1169
1369
|
!whoisResult.error.toLowerCase().includes('connection') &&
|
|
1170
1370
|
!whoisResult.error.toLowerCase().includes('network'))) {
|
|
1171
1371
|
|
|
1372
|
+
// `hostname` field is backwards-compat additive (see dig
|
|
1373
|
+
// write site for details).
|
|
1172
1374
|
globalWhoisResultCache.set(whoisCacheKey, {
|
|
1173
1375
|
result: whoisResult,
|
|
1174
|
-
timestamp: now
|
|
1376
|
+
timestamp: now,
|
|
1377
|
+
hostname: whoisRootDomain
|
|
1175
1378
|
});
|
|
1176
1379
|
dnsCacheStats.whoisMisses++;
|
|
1177
|
-
dnsCacheStats.freshWhois
|
|
1380
|
+
pushFreshSample(dnsCacheStats.freshWhois, whoisRootDomain);
|
|
1381
|
+
// Only mark resolved on actual whois success -- a cached
|
|
1382
|
+
// "not found" / "no match" failure shouldn't claim resolution.
|
|
1383
|
+
if (whoisResult.success) markResolved(whoisRootDomain);
|
|
1178
1384
|
|
|
1179
1385
|
if (forceDebug) {
|
|
1180
1386
|
const cacheType = whoisResult.success ? 'successful' : 'failed';
|
|
@@ -1332,18 +1538,14 @@ function createNetToolsHandler(config) {
|
|
|
1332
1538
|
}
|
|
1333
1539
|
}
|
|
1334
1540
|
|
|
1335
|
-
// Periodic whois cache cleanup
|
|
1336
|
-
if (
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
}
|
|
1344
|
-
}
|
|
1345
|
-
if (forceDebug && cleanedCount > 0) {
|
|
1346
|
-
logToConsoleAndFile(`${messageColors.highlight('[whois-cache]')} Cleaned ${cleanedCount} expired entries, cache size: ${globalWhoisResultCache.size}`);
|
|
1541
|
+
// Periodic whois cache cleanup. enforceCacheCap evicts expired
|
|
1542
|
+
// entries first; if still over MAX (all entries still within TTL
|
|
1543
|
+
// but too many), evicts the oldest by timestamp so the cap is
|
|
1544
|
+
// strictly enforced.
|
|
1545
|
+
{
|
|
1546
|
+
const ev = enforceCacheCap(globalWhoisResultCache, GLOBAL_WHOIS_CACHE_MAX, GLOBAL_WHOIS_CACHE_TTL);
|
|
1547
|
+
if (forceDebug && (ev.expired + ev.overflow) > 0) {
|
|
1548
|
+
logToConsoleAndFile(`${messageColors.highlight('[whois-cache]')} Pruned ${ev.expired} expired + ${ev.overflow} overflow entries, cache size: ${globalWhoisResultCache.size}`);
|
|
1347
1549
|
}
|
|
1348
1550
|
}
|
|
1349
1551
|
}
|
|
@@ -1374,6 +1576,11 @@ function createNetToolsHandler(config) {
|
|
|
1374
1576
|
}
|
|
1375
1577
|
digResult = cachedEntry.result;
|
|
1376
1578
|
dnsCacheStats.digHits++;
|
|
1579
|
+
// Warm the resolved-hostnames index from disk-loaded entries.
|
|
1580
|
+
// No-op if already present.
|
|
1581
|
+
if (digResult.success && digOutputIndicatesResolution(digResult.output)) {
|
|
1582
|
+
markResolved(digDomain);
|
|
1583
|
+
}
|
|
1377
1584
|
} else {
|
|
1378
1585
|
// Cache expired, remove it
|
|
1379
1586
|
globalDigResultCache.delete(digCacheKey);
|
|
@@ -1397,13 +1604,23 @@ function createNetToolsHandler(config) {
|
|
|
1397
1604
|
pendingDigLookups.delete(digCacheKey);
|
|
1398
1605
|
}
|
|
1399
1606
|
|
|
1400
|
-
// Cache the result for future use
|
|
1607
|
+
// Cache the result for future use. `hostname` field is
|
|
1608
|
+
// backwards-compat additive: old code reading new cache
|
|
1609
|
+
// ignores it; new code reading old cache (no field) falls
|
|
1610
|
+
// back to lazy on-hit population in the cache-hit branch.
|
|
1401
1611
|
globalDigResultCache.set(digCacheKey, {
|
|
1402
1612
|
result: digResult,
|
|
1403
|
-
timestamp: now
|
|
1613
|
+
timestamp: now,
|
|
1614
|
+
hostname: digDomain
|
|
1404
1615
|
});
|
|
1405
1616
|
dnsCacheStats.digMisses++;
|
|
1406
|
-
dnsCacheStats.freshDig
|
|
1617
|
+
pushFreshSample(dnsCacheStats.freshDig, `${digDomain} (${digRecordType})`);
|
|
1618
|
+
// Index hostname IF dig actually proved resolution -- NXDOMAIN
|
|
1619
|
+
// responses arrive as success:true with NXDOMAIN in the body,
|
|
1620
|
+
// so digOutputIndicatesResolution is the real gate.
|
|
1621
|
+
if (digResult.success && digOutputIndicatesResolution(digResult.output)) {
|
|
1622
|
+
markResolved(digDomain);
|
|
1623
|
+
}
|
|
1407
1624
|
|
|
1408
1625
|
if (forceDebug && digResult.success) {
|
|
1409
1626
|
logToConsoleAndFile(`${messageColors.highlight('[dig-cache]')} Cached new result for ${digDomain} (${digRecordType})`);
|
|
@@ -1475,18 +1692,11 @@ function createNetToolsHandler(config) {
|
|
|
1475
1692
|
}
|
|
1476
1693
|
}
|
|
1477
1694
|
|
|
1478
|
-
// Periodic dig cache cleanup
|
|
1479
|
-
|
|
1480
|
-
const
|
|
1481
|
-
|
|
1482
|
-
|
|
1483
|
-
if (now - entry.timestamp > GLOBAL_DIG_CACHE_TTL) {
|
|
1484
|
-
globalDigResultCache.delete(key);
|
|
1485
|
-
cleanedCount++;
|
|
1486
|
-
}
|
|
1487
|
-
}
|
|
1488
|
-
if (forceDebug && cleanedCount > 0) {
|
|
1489
|
-
logToConsoleAndFile(`${messageColors.highlight('[dig-cache]')} Cleaned ${cleanedCount} expired entries, cache size: ${globalDigResultCache.size}`);
|
|
1695
|
+
// Periodic dig cache cleanup. Same enforce-cap pattern as whois.
|
|
1696
|
+
{
|
|
1697
|
+
const ev = enforceCacheCap(globalDigResultCache, GLOBAL_DIG_CACHE_MAX, GLOBAL_DIG_CACHE_TTL);
|
|
1698
|
+
if (forceDebug && (ev.expired + ev.overflow) > 0) {
|
|
1699
|
+
logToConsoleAndFile(`${messageColors.highlight('[dig-cache]')} Pruned ${ev.expired} expired + ${ev.overflow} overflow entries, cache size: ${globalDigResultCache.size}`);
|
|
1490
1700
|
}
|
|
1491
1701
|
}
|
|
1492
1702
|
}
|
|
@@ -1532,7 +1742,14 @@ function createNetToolsHandler(config) {
|
|
|
1532
1742
|
}
|
|
1533
1743
|
// No need to add to matched domains
|
|
1534
1744
|
} else {
|
|
1535
|
-
|
|
1745
|
+
// Re-check ignoreDomains right before adding — the async whois/dig
|
|
1746
|
+
// window may have classified this domain as ignored since the
|
|
1747
|
+
// request-time gate ran. Mirrors curl.js/grep.js/searchstring.js.
|
|
1748
|
+
if (typeof matchesIgnoreDomain === 'function' && matchesIgnoreDomain(domain, ignoreDomains)) {
|
|
1749
|
+
if (forceDebug) {
|
|
1750
|
+
logToConsoleAndFile(`${messageColors.highlight('[nettools]')} Skipping ${domain}: now in ignoreDomains (post-whois/dig)`);
|
|
1751
|
+
}
|
|
1752
|
+
} else if (typeof addMatchedDomain === 'function') {
|
|
1536
1753
|
addMatchedDomain(domain, null, fullSubdomain);
|
|
1537
1754
|
} else {
|
|
1538
1755
|
matchedDomains.add(domain);
|
|
@@ -1581,22 +1798,27 @@ function createNetToolsHandler(config) {
|
|
|
1581
1798
|
};
|
|
1582
1799
|
}
|
|
1583
1800
|
|
|
1801
|
+
// Public surface kept narrow on purpose -- only what nwss.js actually
|
|
1802
|
+
// imports (verified via repo-wide grep). Internal helpers
|
|
1803
|
+
// (whoisLookup, whoisLookupWithRetry, digLookup, checkWhoisTerms,
|
|
1804
|
+
// checkWhoisTermsOr, checkDigTerms, checkDigTermsOr, selectWhoisServer,
|
|
1805
|
+
// getCommonWhoisServers, suggestWhoisServers, execFileWithTimeout,
|
|
1806
|
+
// markResolved, digOutputIndicatesResolution, loadDiskCache,
|
|
1807
|
+
// saveDiskCache, enforceCacheCap, stripAnsiColors) stay as module-local
|
|
1808
|
+
// functions -- move back to module.exports only if a new external
|
|
1809
|
+
// consumer appears. The dropped `execWithTimeout` was also the
|
|
1810
|
+
// "// Export for testing" entry; there's no test suite, so the export
|
|
1811
|
+
// was load-bearing for nothing.
|
|
1584
1812
|
module.exports = {
|
|
1585
|
-
validateWhoisAvailability,
|
|
1586
|
-
validateDigAvailability,
|
|
1587
|
-
whoisLookup,
|
|
1588
|
-
whoisLookupWithRetry,
|
|
1589
|
-
digLookup,
|
|
1590
|
-
checkWhoisTerms,
|
|
1591
|
-
checkWhoisTermsOr,
|
|
1592
|
-
checkDigTerms,
|
|
1593
|
-
checkDigTermsOr,
|
|
1594
1813
|
createNetToolsHandler,
|
|
1595
1814
|
createEnhancedDryRunCallback,
|
|
1596
|
-
|
|
1597
|
-
|
|
1598
|
-
suggestWhoisServers,
|
|
1599
|
-
execWithTimeout, // Export for testing
|
|
1815
|
+
validateWhoisAvailability,
|
|
1816
|
+
validateDigAvailability,
|
|
1600
1817
|
enableDiskCache,
|
|
1601
|
-
getDnsCacheStats
|
|
1818
|
+
getDnsCacheStats,
|
|
1819
|
+
// Resolved-hostnames index for the DNS pre-check optimization.
|
|
1820
|
+
// nwss.js's per-task pre-check consults this BEFORE calling resolve4
|
|
1821
|
+
// so hosts already proven live by dig or whois (within their 20h
|
|
1822
|
+
// cache TTL) skip the c-ares call entirely.
|
|
1823
|
+
domainKnownToResolve
|
|
1602
1824
|
};
|