@fanboynz/network-scanner 2.0.66 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/nettools.js CHANGED
@@ -3,7 +3,14 @@
3
3
  * Provides domain analysis capabilities with proper timeout handling, custom whois servers, and retry logic
4
4
  */
5
5
 
6
- const { exec, execSync } = require('child_process');
6
+ // execFile (no shell) for whois/dig invocations -- arguments are passed
7
+ // directly to the executable as an argv array, so shell metacharacters in
8
+ // config-supplied hostnames or server names CANNOT execute commands. The
9
+ // prior `exec(string)` approach interpolated tainted values into a shell
10
+ // string protected only by double-quoting, which doesn't stop $()/backticks.
11
+ // execSync is retained ONLY for the version-probe helpers below, where
12
+ // commands are constant string literals with no user-controlled inputs.
13
+ const { execFile, execSync } = require('child_process');
7
14
  const fs = require('fs');
8
15
  const path = require('path');
9
16
  const { formatLogMessage, messageColors } = require('./colorize');
@@ -17,19 +24,68 @@ let whoisServerCycleIndex = 0;
17
24
  // DNS records don't change based on what terms you're searching for,
18
25
  // so we cache the raw dig output and let each handler check its own terms against it
19
26
  const globalDigResultCache = new Map();
20
- const GLOBAL_DIG_CACHE_TTL = 50400000; // 14 hours (persisted to disk between runs)
21
- const GLOBAL_DIG_CACHE_MAX = 1000;
27
+ const GLOBAL_DIG_CACHE_TTL = 72000000; // 20 hours (persisted to disk between runs)
28
+ const GLOBAL_DIG_CACHE_MAX = 2000;
22
29
 
23
30
  // Global whois result cache — shared across ALL handler instances and processUrl calls
24
31
  // Whois data is per root domain and doesn't change based on search terms
25
32
  const globalWhoisResultCache = new Map();
26
- const GLOBAL_WHOIS_CACHE_TTL = 50400000; // 14 hours (persisted to disk between runs)
27
- const GLOBAL_WHOIS_CACHE_MAX = 1000;
33
+ const GLOBAL_WHOIS_CACHE_TTL = 72000000; // 20 hours (persisted to disk between runs)
34
+ const GLOBAL_WHOIS_CACHE_MAX = 2000;
28
35
 
29
36
  // Persistent disk cache file paths
30
37
  const DIG_CACHE_FILE = path.join(__dirname, '..', '.digcache');
31
38
  const WHOIS_CACHE_FILE = path.join(__dirname, '..', '.whoiscache');
32
39
 
40
+ // Index of hostnames known to resolve, populated as a side effect of
41
+ // positive dig/whois cache writes AND cache hits. nwss.js's DNS pre-check
42
+ // reads this via domainKnownToResolve() so it can skip its own resolve4
43
+ // call on hosts that dig or whois have already proven live within the
44
+ // 20-hour TTL window. Populating on cache HITS (not just writes) handles
45
+ // the --dns-cache disk-load case where entries arrive without going
46
+ // through the in-process write path. Stale entries -- hostname in Set but
47
+ // the dig/whois entry has since been evicted -- are harmless: worst case
48
+ // is one wasted pre-check next time the hostname comes through.
49
+ const knownResolvedHostnames = new Set();
50
+ const MAX_RESOLVED_HOSTNAMES = 5000;
51
+
52
+ function markResolved(hostname) {
53
+ if (!hostname) return;
54
+ if (knownResolvedHostnames.size >= MAX_RESOLVED_HOSTNAMES) {
55
+ // FIFO eviction -- Set iteration order is insertion order.
56
+ knownResolvedHostnames.delete(knownResolvedHostnames.values().next().value);
57
+ }
58
+ knownResolvedHostnames.add(hostname);
59
+ }
60
+
61
+ /**
62
+ * Returns true if dig or whois has produced a verifiable-positive result
63
+ * for this hostname during the current process lifetime. nwss.js's DNS
64
+ * pre-check uses this to skip resolve4 calls on hosts we already know
65
+ * are live. False does NOT mean "unresolvable" -- it means "we have no
66
+ * recent evidence either way; do the pre-check".
67
+ */
68
+ function domainKnownToResolve(hostname) {
69
+ return knownResolvedHostnames.has(hostname);
70
+ }
71
+
72
+ // Dig responses with success:true can still represent NXDOMAIN -- the dig
73
+ // COMMAND succeeded but the DNS RESPONSE is "no such name". The output
74
+ // string is the only reliable signal. NOERROR + non-zero answer count =
75
+ // the hostname genuinely resolved.
76
+ function digOutputIndicatesResolution(output) {
77
+ if (!output) return false;
78
+ if (!output.includes('status: NOERROR')) return false;
79
+ // ANSWER: 0 means NOERROR but no records of the requested type -- the
80
+ // hostname exists at this label but doesn't have THIS record type.
81
+ // For our purposes (proving the name is live) that's still useful, but
82
+ // strictly "domain has nameservers and returned authoritative empty"
83
+ // is weaker than "domain returned an actual A/AAAA". Conservative
84
+ // choice: require non-zero answer count.
85
+ if (/ANSWER:\s*0\b/.test(output)) return false;
86
+ return true;
87
+ }
88
+
33
89
  /**
34
90
  * Load persistent cache from disk into in-memory Map
35
91
  * Skips expired entries and enforces max size
@@ -39,6 +95,18 @@ const WHOIS_CACHE_FILE = path.join(__dirname, '..', '.whoiscache');
39
95
  * @param {number} maxSize - Maximum cache entries
40
96
  */
41
97
  function loadDiskCache(filePath, cache, ttl, maxSize) {
98
+ // Also clean up any stray .tmp files from a prior interrupted save.
99
+ // The atomic-write path (saveDiskCache below) writes to `${filePath}.tmp`
100
+ // then renames; a process killed mid-write leaves the .tmp behind. The
101
+ // real file remains intact (rename is atomic), so we just sweep the
102
+ // stray on load.
103
+ try {
104
+ const tmpPath = filePath + '.tmp';
105
+ if (fs.existsSync(tmpPath)) {
106
+ try { fs.unlinkSync(tmpPath); } catch {}
107
+ }
108
+ } catch {}
109
+
42
110
  try {
43
111
  if (!fs.existsSync(filePath)) return;
44
112
  const data = JSON.parse(fs.readFileSync(filePath, 'utf-8'));
@@ -51,8 +119,13 @@ function loadDiskCache(filePath, cache, ttl, maxSize) {
51
119
  loaded++;
52
120
  }
53
121
  }
54
- } catch {
55
- // Corrupt or unreadable cache file — delete and start fresh
122
+ } catch (err) {
123
+ // Corrupt or unreadable cache file — delete and start fresh.
124
+ // Surface the event so the user knows they lost their warm cache;
125
+ // previously this was a silent reset, which made "why did my dns
126
+ // cache stop helping?" hard to diagnose.
127
+ // eslint-disable-next-line no-console
128
+ console.warn(`${messageColors.highlight('[dns-cache]')} ${path.basename(filePath)} was unreadable (${err.message}); starting fresh`);
56
129
  try { fs.unlinkSync(filePath); } catch {}
57
130
  }
58
131
  }
@@ -78,11 +151,12 @@ function saveDiskCache(filePath, cache, ttl, maxSize) {
78
151
  }
79
152
  }
80
153
 
81
- // If over max, keep only the newest entries. Drop the pretty-print —
82
- // saveDiskCache runs on the synchronous 'exit' handler when --dns-cache
83
- // is set, so any work here directly delays scan exit. Compact JSON is
84
- // several times faster on multi-megabyte caches and the file is not
85
- // intended for human reading.
154
+ // Build the final payload (with trimming if over cap). Compact JSON
155
+ // -- saveDiskCache runs on the synchronous 'exit' handler when
156
+ // --dns-cache is set, so any work here directly delays scan exit.
157
+ // Several times faster than pretty-print on multi-megabyte caches
158
+ // and the file is not intended for human reading.
159
+ let payload;
86
160
  if (count > maxSize) {
87
161
  const sorted = Object.entries(entries)
88
162
  .sort((a, b) => b[1].timestamp - a[1].timestamp)
@@ -91,12 +165,28 @@ function saveDiskCache(filePath, cache, ttl, maxSize) {
91
165
  for (const [key, entry] of sorted) {
92
166
  trimmed[key] = entry;
93
167
  }
94
- fs.writeFileSync(filePath, JSON.stringify(trimmed));
168
+ payload = JSON.stringify(trimmed);
95
169
  } else {
96
- fs.writeFileSync(filePath, JSON.stringify(entries));
170
+ payload = JSON.stringify(entries);
97
171
  }
172
+
173
+ // Atomic write: writeFileSync to a sibling .tmp path, then rename.
174
+ // If the process is killed mid-write (SIGKILL, OOM, power loss) the
175
+ // .tmp is left as garbage but the real filePath is either complete
176
+ // or absent -- never half-written. loadDiskCache sweeps stray .tmp
177
+ // files on next startup.
178
+ // Matches the pattern already used in lib/adblock-rust.js per the
179
+ // CLAUDE.md convention. We deliberately omit the pid suffix used
180
+ // there because saveDiskCache only ever runs from the single 'exit'
181
+ // handler -- no concurrent-process race to disambiguate.
182
+ const tmpPath = filePath + '.tmp';
183
+ fs.writeFileSync(tmpPath, payload);
184
+ fs.renameSync(tmpPath, filePath);
98
185
  } catch {
99
- // Disk write failed non-fatal, in-memory cache still works
186
+ // Disk write failed -- non-fatal, in-memory cache still works.
187
+ // Best-effort cleanup of any stray tmp file from this attempt so
188
+ // it doesn't accumulate over repeated failures.
189
+ try { fs.unlinkSync(filePath + '.tmp'); } catch {}
100
190
  }
101
191
  }
102
192
 
@@ -104,9 +194,57 @@ function saveDiskCache(filePath, cache, ttl, maxSize) {
104
194
  const pendingDigLookups = new Map();
105
195
  const pendingWhoisLookups = new Map();
106
196
 
107
- // DNS cache statistics
197
+ /**
198
+ * Enforce a hard size cap on the dig/whois global caches. Evicts expired
199
+ * entries first; if the cache is still over cap after that (i.e. every
200
+ * remaining entry is within its TTL but there are simply too many),
201
+ * deletes the oldest entries by timestamp until size <= max. Without the
202
+ * second pass the caches could grow unbounded on scans of many unique
203
+ * hostnames whose entries hadn't expired yet.
204
+ *
205
+ * @param {Map} cache - cache Map to prune
206
+ * @param {number} maxSize - desired hard cap
207
+ * @param {number} ttl - TTL in ms; entries older than this are evicted first
208
+ * @returns {{expired: number, overflow: number}} eviction counts
209
+ */
210
+ function enforceCacheCap(cache, maxSize, ttl) {
211
+ if (cache.size <= maxSize) return { expired: 0, overflow: 0 };
212
+ const now = Date.now();
213
+ let expired = 0;
214
+ for (const [key, entry] of cache.entries()) {
215
+ if (now - entry.timestamp > ttl) {
216
+ cache.delete(key);
217
+ expired++;
218
+ }
219
+ }
220
+ let overflow = 0;
221
+ if (cache.size > maxSize) {
222
+ // Snapshot timestamps and sort ascending, evict the oldest few.
223
+ const byAge = Array.from(cache.entries())
224
+ .sort((a, b) => a[1].timestamp - b[1].timestamp);
225
+ const toDrop = cache.size - maxSize;
226
+ for (let i = 0; i < toDrop; i++) {
227
+ cache.delete(byAge[i][0]);
228
+ overflow++;
229
+ }
230
+ }
231
+ return { expired, overflow };
232
+ }
233
+
234
+ // DNS cache statistics. freshDig / freshWhois are sample lists for
235
+ // end-of-scan visibility; capped at MAX_FRESH_LIST entries (FIFO) so
236
+ // they can't grow unbounded on scans with thousands of unique fresh
237
+ // lookups. digMisses/whoisMisses retain the full count, so callers
238
+ // who want totals can read those; freshDig/freshWhois are intended as
239
+ // "show me which domains" diagnostic samples.
240
+ const MAX_FRESH_LIST = 1000;
108
241
  const dnsCacheStats = { digHits: 0, digMisses: 0, whoisHits: 0, whoisMisses: 0, freshDig: [], freshWhois: [] };
109
242
 
243
+ function pushFreshSample(arr, item) {
244
+ if (arr.length >= MAX_FRESH_LIST) arr.shift();
245
+ arr.push(item);
246
+ }
247
+
110
248
  /**
111
249
  * Get DNS cache statistics for end-of-scan reporting
112
250
  * @returns {Object} Cache hit/miss counts and fresh domain lists
@@ -119,14 +257,46 @@ function getDnsCacheStats() {
119
257
  let diskCacheEnabled = false;
120
258
 
121
259
  /**
122
- * Enable persistent disk caching for dig/whois results
123
- * Call this when --dns-cache flag is set
260
+ * Enable persistent disk caching for dig/whois results.
261
+ * Call this when --dns-cache flag is set. Idempotent — repeated calls
262
+ * are no-ops, which prevents double-loading the cache files and double-
263
+ * registering the 'exit' handler that flushes them on shutdown.
124
264
  */
125
265
  function enableDiskCache() {
266
+ if (diskCacheEnabled) return;
126
267
  diskCacheEnabled = true;
127
268
  loadDiskCache(DIG_CACHE_FILE, globalDigResultCache, GLOBAL_DIG_CACHE_TTL, GLOBAL_DIG_CACHE_MAX);
128
269
  loadDiskCache(WHOIS_CACHE_FILE, globalWhoisResultCache, GLOBAL_WHOIS_CACHE_TTL, GLOBAL_WHOIS_CACHE_MAX);
129
270
 
271
+ // Warm knownResolvedHostnames from disk-loaded entries so the very
272
+ // first URL per cached domain also skips the c-ares pre-check (instead
273
+ // of waiting for the cache-hit handler to fire later in the URL's
274
+ // pipeline). Entries written by older versions of this module lack the
275
+ // `hostname` field -- they're skipped here and fall back to lazy
276
+ // on-hit population. Same positive-resolution gates apply as the live
277
+ // write/hit paths (dig: NOERROR + non-zero answers; whois: success).
278
+ let digWarm = 0;
279
+ let whoisWarm = 0;
280
+ for (const entry of globalDigResultCache.values()) {
281
+ if (entry.hostname && entry.result && entry.result.success &&
282
+ digOutputIndicatesResolution(entry.result.output)) {
283
+ markResolved(entry.hostname);
284
+ digWarm++;
285
+ }
286
+ }
287
+ for (const entry of globalWhoisResultCache.values()) {
288
+ if (entry.hostname && entry.result && entry.result.success) {
289
+ markResolved(entry.hostname);
290
+ whoisWarm++;
291
+ }
292
+ }
293
+ // Debug log only if anything was actually warmed; silent on fresh
294
+ // installs / empty disk caches.
295
+ if (digWarm > 0 || whoisWarm > 0) {
296
+ // eslint-disable-next-line no-console
297
+ console.log(`${messageColors.highlight('[dns-cache]')} Warmed resolved-hostnames index from disk: ${digWarm} dig + ${whoisWarm} whois entries`);
298
+ }
299
+
130
300
  // Save caches to disk once on process exit instead of per-lookup. The
131
301
  // 'exit' handler fires synchronously regardless of how the process exits
132
302
  // (normal completion, signal, uncaught exception), so a separate signal
@@ -203,25 +373,36 @@ function validateDigAvailability() {
203
373
  }
204
374
 
205
375
  /**
206
- * Executes a command with proper timeout handling
207
- * @param {string} command - Command to execute
376
+ * Spawn a process with execFile (no shell) and a hard timeout. Arguments
377
+ * are passed directly as argv -- shell metacharacters in any element
378
+ * cannot execute commands. Replaces the prior exec(string)-based helper
379
+ * whose double-quote-only protection failed against $()/backticks.
380
+ *
381
+ * @param {string} cmd - Executable name or path
382
+ * @param {string[]} args - Argument vector (each element a separate arg)
208
383
  * @param {number} timeout - Timeout in milliseconds
209
- * @returns {Promise<Object>} Promise that resolves with stdout/stderr or rejects on timeout/error
384
+ * @returns {Promise<{stdout:string, stderr:string}>} -- rejects on timeout/error
210
385
  */
211
- function execWithTimeout(command, timeout = 10000) {
386
+ function execFileWithTimeout(cmd, args, timeout = 10000) {
212
387
  return new Promise((resolve, reject) => {
213
- const child = exec(command, { encoding: 'utf8' }, (error, stdout, stderr) => {
388
+ // Hoisted before the callbacks that reference it. Previously `const
389
+ // timer = setTimeout(...)` was declared after the exec callback /
390
+ // 'error' listener that both did `if (timer) clearTimeout(timer)` —
391
+ // worked in practice because exec defers callbacks via nextTick, but
392
+ // structurally fragile (a synchronous exec failure would TDZ-throw).
393
+ let timer = null;
394
+
395
+ const child = execFile(cmd, args, { encoding: 'utf8' }, (error, stdout, stderr) => {
214
396
  if (timer) clearTimeout(timer);
215
-
397
+
216
398
  if (error) {
217
399
  reject(error);
218
400
  } else {
219
401
  resolve({ stdout, stderr });
220
402
  }
221
403
  });
222
-
223
- // Set up timeout
224
- const timer = setTimeout(() => {
404
+
405
+ timer = setTimeout(() => {
225
406
  child.kill('SIGTERM');
226
407
 
227
408
  // Force kill after 2 seconds if SIGTERM doesn't work. unref() so this
@@ -235,9 +416,9 @@ function execWithTimeout(command, timeout = 10000) {
235
416
  }, 2000);
236
417
  killTimer.unref();
237
418
 
238
- reject(new Error(`Command timeout after ${timeout}ms: ${command}`));
419
+ reject(new Error(`Command timeout after ${timeout}ms: ${cmd} ${args.join(' ')}`));
239
420
  }, timeout);
240
-
421
+
241
422
  // Handle child process errors
242
423
  child.on('error', (err) => {
243
424
  if (timer) clearTimeout(timer);
@@ -344,19 +525,24 @@ async function whoisLookup(domain = '', timeout = 10000, whoisServer = '', debug
344
525
  try {
345
526
  // Clean domain (remove protocol, path, etc)
346
527
  cleanDomain = domain.replace(/^https?:\/\//, '').replace(/\/.*$/, '').replace(/:\d+$/, '');
347
-
528
+
348
529
  // Select whois server if provided
349
530
  selectedServer = selectWhoisServer(whoisServer);
350
-
351
- // Build whois command
531
+
532
+ // Build whois argv. Pass each token as a separate argv element --
533
+ // execFile does NOT spawn a shell, so neither cleanDomain nor
534
+ // selectedServer can inject commands no matter what they contain.
535
+ // The leading `--` is preserved so dashes in `cleanDomain` don't get
536
+ // re-interpreted as flags by the whois binary itself.
537
+ let whoisArgs;
352
538
  if (selectedServer) {
353
- // Use custom whois server with -h flag
354
- whoisCommand = `whois -h "${selectedServer}" -- "${cleanDomain}"`;
539
+ whoisArgs = ['-h', selectedServer, '--', cleanDomain];
355
540
  } else {
356
- // Use default whois behavior
357
- whoisCommand = `whois -- "${cleanDomain}"`;
541
+ whoisArgs = ['--', cleanDomain];
358
542
  }
359
-
543
+ // Kept as a display string for debug logging only -- never executed.
544
+ whoisCommand = `whois ${whoisArgs.join(' ')}`;
545
+
360
546
  if (debugMode) {
361
547
  if (logFunc) {
362
548
  logFunc(`${messageColors.highlight('[whois]')} Starting lookup for ${cleanDomain} (timeout: ${timeout}ms)`);
@@ -366,8 +552,8 @@ async function whoisLookup(domain = '', timeout = 10000, whoisServer = '', debug
366
552
  console.log(formatLogMessage('debug', `${messageColors.highlight('[whois]')} Command: ${whoisCommand}`));
367
553
  }
368
554
  }
369
-
370
- const { stdout, stderr } = await execWithTimeout(whoisCommand, timeout);
555
+
556
+ const { stdout, stderr } = await execFileWithTimeout('whois', whoisArgs, timeout);
371
557
  const duration = Date.now() - startTime;
372
558
 
373
559
  if (stderr && stderr.trim()) {
@@ -772,9 +958,11 @@ async function digLookup(domain = '', recordType = 'A', timeout = 5000) {
772
958
  try {
773
959
  // Clean domain
774
960
  const cleanDomain = domain.replace(/^https?:\/\//, '').replace(/\/.*$/, '').replace(/:\d+$/, '');
775
-
776
- // Single dig command — full output contains everything including the short answers
777
- const { stdout: fullOutput, stderr } = await execWithTimeout(`dig "${cleanDomain}" ${recordType}`, timeout);
961
+
962
+ // Single dig command — full output contains everything including short
963
+ // answers. execFile (no shell) so cleanDomain / recordType can contain
964
+ // any chars without injection risk.
965
+ const { stdout: fullOutput, stderr } = await execFileWithTimeout('dig', [cleanDomain, recordType], timeout);
778
966
 
779
967
  if (stderr && stderr.trim()) {
780
968
  return {
@@ -928,7 +1116,14 @@ function createNetToolsHandler(config) {
928
1116
  dumpUrls,
929
1117
  matchedUrlsLogFile,
930
1118
  forceDebug,
931
- fs
1119
+ fs,
1120
+ // ignoreDomains guard: callers pass the live ignoreDomains list + matcher
1121
+ // so a domain that became ignored AFTER the request fired (e.g. via
1122
+ // ignoreDomainsByUrl on a sibling request, or _dynamicallyIgnoredDomains)
1123
+ // doesn't slip into matchedDomains during the async whois/dig window.
1124
+ // Both default to no-op so older callers without the kwargs still work.
1125
+ ignoreDomains = null,
1126
+ matchesIgnoreDomain = null
932
1127
  } = config;
933
1128
 
934
1129
  const hasWhois = whoisTerms && Array.isArray(whoisTerms) && whoisTerms.length > 0;
@@ -1123,6 +1318,11 @@ function createNetToolsHandler(config) {
1123
1318
  originalTimestamp: cachedEntry.timestamp
1124
1319
  });
1125
1320
  dnsCacheStats.whoisHits++;
1321
+ // Warm the resolved-hostnames index from disk-loaded entries.
1322
+ // Cached whois entries are pre-filtered for network errors at
1323
+ // write time, so every cached entry implies the domain has a
1324
+ // registrar record -- strong resolution signal.
1325
+ markResolved(whoisRootDomain);
1126
1326
  } else {
1127
1327
  // Cache expired, remove it
1128
1328
  globalWhoisResultCache.delete(whoisCacheKey);
@@ -1169,12 +1369,18 @@ function createNetToolsHandler(config) {
1169
1369
  !whoisResult.error.toLowerCase().includes('connection') &&
1170
1370
  !whoisResult.error.toLowerCase().includes('network'))) {
1171
1371
 
1372
+ // `hostname` field is backwards-compat additive (see dig
1373
+ // write site for details).
1172
1374
  globalWhoisResultCache.set(whoisCacheKey, {
1173
1375
  result: whoisResult,
1174
- timestamp: now
1376
+ timestamp: now,
1377
+ hostname: whoisRootDomain
1175
1378
  });
1176
1379
  dnsCacheStats.whoisMisses++;
1177
- dnsCacheStats.freshWhois.push(whoisRootDomain);
1380
+ pushFreshSample(dnsCacheStats.freshWhois, whoisRootDomain);
1381
+ // Only mark resolved on actual whois success -- a cached
1382
+ // "not found" / "no match" failure shouldn't claim resolution.
1383
+ if (whoisResult.success) markResolved(whoisRootDomain);
1178
1384
 
1179
1385
  if (forceDebug) {
1180
1386
  const cacheType = whoisResult.success ? 'successful' : 'failed';
@@ -1332,18 +1538,14 @@ function createNetToolsHandler(config) {
1332
1538
  }
1333
1539
  }
1334
1540
 
1335
- // Periodic whois cache cleanup to prevent memory leaks
1336
- if (globalWhoisResultCache.size > GLOBAL_WHOIS_CACHE_MAX) {
1337
- const now = Date.now();
1338
- let cleanedCount = 0;
1339
- for (const [key, entry] of globalWhoisResultCache.entries()) {
1340
- if (now - entry.timestamp > GLOBAL_WHOIS_CACHE_TTL) {
1341
- globalWhoisResultCache.delete(key);
1342
- cleanedCount++;
1343
- }
1344
- }
1345
- if (forceDebug && cleanedCount > 0) {
1346
- logToConsoleAndFile(`${messageColors.highlight('[whois-cache]')} Cleaned ${cleanedCount} expired entries, cache size: ${globalWhoisResultCache.size}`);
1541
+ // Periodic whois cache cleanup. enforceCacheCap evicts expired
1542
+ // entries first; if still over MAX (all entries still within TTL
1543
+ // but too many), evicts the oldest by timestamp so the cap is
1544
+ // strictly enforced.
1545
+ {
1546
+ const ev = enforceCacheCap(globalWhoisResultCache, GLOBAL_WHOIS_CACHE_MAX, GLOBAL_WHOIS_CACHE_TTL);
1547
+ if (forceDebug && (ev.expired + ev.overflow) > 0) {
1548
+ logToConsoleAndFile(`${messageColors.highlight('[whois-cache]')} Pruned ${ev.expired} expired + ${ev.overflow} overflow entries, cache size: ${globalWhoisResultCache.size}`);
1347
1549
  }
1348
1550
  }
1349
1551
  }
@@ -1374,6 +1576,11 @@ function createNetToolsHandler(config) {
1374
1576
  }
1375
1577
  digResult = cachedEntry.result;
1376
1578
  dnsCacheStats.digHits++;
1579
+ // Warm the resolved-hostnames index from disk-loaded entries.
1580
+ // No-op if already present.
1581
+ if (digResult.success && digOutputIndicatesResolution(digResult.output)) {
1582
+ markResolved(digDomain);
1583
+ }
1377
1584
  } else {
1378
1585
  // Cache expired, remove it
1379
1586
  globalDigResultCache.delete(digCacheKey);
@@ -1397,13 +1604,23 @@ function createNetToolsHandler(config) {
1397
1604
  pendingDigLookups.delete(digCacheKey);
1398
1605
  }
1399
1606
 
1400
- // Cache the result for future use
1607
+ // Cache the result for future use. `hostname` field is
1608
+ // backwards-compat additive: old code reading new cache
1609
+ // ignores it; new code reading old cache (no field) falls
1610
+ // back to lazy on-hit population in the cache-hit branch.
1401
1611
  globalDigResultCache.set(digCacheKey, {
1402
1612
  result: digResult,
1403
- timestamp: now
1613
+ timestamp: now,
1614
+ hostname: digDomain
1404
1615
  });
1405
1616
  dnsCacheStats.digMisses++;
1406
- dnsCacheStats.freshDig.push(`${digDomain} (${digRecordType})`);
1617
+ pushFreshSample(dnsCacheStats.freshDig, `${digDomain} (${digRecordType})`);
1618
+ // Index hostname IF dig actually proved resolution -- NXDOMAIN
1619
+ // responses arrive as success:true with NXDOMAIN in the body,
1620
+ // so digOutputIndicatesResolution is the real gate.
1621
+ if (digResult.success && digOutputIndicatesResolution(digResult.output)) {
1622
+ markResolved(digDomain);
1623
+ }
1407
1624
 
1408
1625
  if (forceDebug && digResult.success) {
1409
1626
  logToConsoleAndFile(`${messageColors.highlight('[dig-cache]')} Cached new result for ${digDomain} (${digRecordType})`);
@@ -1475,18 +1692,11 @@ function createNetToolsHandler(config) {
1475
1692
  }
1476
1693
  }
1477
1694
 
1478
- // Periodic dig cache cleanup to prevent memory leaks
1479
- if (globalDigResultCache.size > GLOBAL_DIG_CACHE_MAX) {
1480
- const now = Date.now();
1481
- let cleanedCount = 0;
1482
- for (const [key, entry] of globalDigResultCache.entries()) {
1483
- if (now - entry.timestamp > GLOBAL_DIG_CACHE_TTL) {
1484
- globalDigResultCache.delete(key);
1485
- cleanedCount++;
1486
- }
1487
- }
1488
- if (forceDebug && cleanedCount > 0) {
1489
- logToConsoleAndFile(`${messageColors.highlight('[dig-cache]')} Cleaned ${cleanedCount} expired entries, cache size: ${globalDigResultCache.size}`);
1695
+ // Periodic dig cache cleanup. Same enforce-cap pattern as whois.
1696
+ {
1697
+ const ev = enforceCacheCap(globalDigResultCache, GLOBAL_DIG_CACHE_MAX, GLOBAL_DIG_CACHE_TTL);
1698
+ if (forceDebug && (ev.expired + ev.overflow) > 0) {
1699
+ logToConsoleAndFile(`${messageColors.highlight('[dig-cache]')} Pruned ${ev.expired} expired + ${ev.overflow} overflow entries, cache size: ${globalDigResultCache.size}`);
1490
1700
  }
1491
1701
  }
1492
1702
  }
@@ -1532,7 +1742,14 @@ function createNetToolsHandler(config) {
1532
1742
  }
1533
1743
  // No need to add to matched domains
1534
1744
  } else {
1535
- if (typeof addMatchedDomain === 'function') {
1745
+ // Re-check ignoreDomains right before adding — the async whois/dig
1746
+ // window may have classified this domain as ignored since the
1747
+ // request-time gate ran. Mirrors curl.js/grep.js/searchstring.js.
1748
+ if (typeof matchesIgnoreDomain === 'function' && matchesIgnoreDomain(domain, ignoreDomains)) {
1749
+ if (forceDebug) {
1750
+ logToConsoleAndFile(`${messageColors.highlight('[nettools]')} Skipping ${domain}: now in ignoreDomains (post-whois/dig)`);
1751
+ }
1752
+ } else if (typeof addMatchedDomain === 'function') {
1536
1753
  addMatchedDomain(domain, null, fullSubdomain);
1537
1754
  } else {
1538
1755
  matchedDomains.add(domain);
@@ -1581,22 +1798,27 @@ function createNetToolsHandler(config) {
1581
1798
  };
1582
1799
  }
1583
1800
 
1801
+ // Public surface kept narrow on purpose -- only what nwss.js actually
1802
+ // imports (verified via repo-wide grep). Internal helpers
1803
+ // (whoisLookup, whoisLookupWithRetry, digLookup, checkWhoisTerms,
1804
+ // checkWhoisTermsOr, checkDigTerms, checkDigTermsOr, selectWhoisServer,
1805
+ // getCommonWhoisServers, suggestWhoisServers, execFileWithTimeout,
1806
+ // markResolved, digOutputIndicatesResolution, loadDiskCache,
1807
+ // saveDiskCache, enforceCacheCap, stripAnsiColors) stay as module-local
1808
+ // functions -- move back to module.exports only if a new external
1809
+ // consumer appears. The dropped `execWithTimeout` was also the
1810
+ // "// Export for testing" entry; there's no test suite, so the export
1811
+ // was load-bearing for nothing.
1584
1812
  module.exports = {
1585
- validateWhoisAvailability,
1586
- validateDigAvailability,
1587
- whoisLookup,
1588
- whoisLookupWithRetry,
1589
- digLookup,
1590
- checkWhoisTerms,
1591
- checkWhoisTermsOr,
1592
- checkDigTerms,
1593
- checkDigTermsOr,
1594
1813
  createNetToolsHandler,
1595
1814
  createEnhancedDryRunCallback,
1596
- selectWhoisServer,
1597
- getCommonWhoisServers,
1598
- suggestWhoisServers,
1599
- execWithTimeout, // Export for testing
1815
+ validateWhoisAvailability,
1816
+ validateDigAvailability,
1600
1817
  enableDiskCache,
1601
- getDnsCacheStats
1818
+ getDnsCacheStats,
1819
+ // Resolved-hostnames index for the DNS pre-check optimization.
1820
+ // nwss.js's per-task pre-check consults this BEFORE calling resolve4
1821
+ // so hosts already proven live by dig or whois (within their 20h
1822
+ // cache TTL) skip the c-ares call entirely.
1823
+ domainKnownToResolve
1602
1824
  };