@fanboynz/network-scanner 3.1.2 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -103,54 +103,6 @@ class DomainCache {
103
103
  return wasNew;
104
104
  }
105
105
 
106
- /**
107
- * Combined check-and-mark in one pass. Functionally equivalent to
108
- * isDomainAlreadyDetected() followed by markDomainAsDetected(), but with
109
- * one Set.has() call instead of two. (JS is single-threaded so all three
110
- * variants are individually atomic; this one is just cheaper.)
111
- * @param {string} domain - Domain to check and potentially mark
112
- * @returns {boolean} True if domain was ALREADY detected (should skip), false if NEW (should process)
113
- */
114
- checkAndMark(domain) {
115
- if (!domain || typeof domain !== 'string') {
116
- return false;
117
- }
118
-
119
- const wasAlreadyDetected = this.cache.has(domain);
120
-
121
- if (wasAlreadyDetected) {
122
- // Domain already exists - update skip stats and return true (should skip)
123
- this.stats.totalSkipped++;
124
- this.stats.cacheHits++;
125
-
126
- if (this.enableLogging) {
127
- console.log(formatLogMessage('debug', `${this.logPrefix} Cache HIT: ${domain} (skipped)`));
128
- }
129
- return true; // Already detected, should skip
130
- }
131
-
132
- // Domain is NEW - mark it as detected
133
- this.stats.cacheMisses++;
134
-
135
- this.cache.add(domain);
136
- this.stats.totalDetected++;
137
-
138
- if (this.enableLogging) {
139
- console.log(formatLogMessage('debug', `${this.logPrefix} Cache MISS: ${domain} (processing and marked, cache size: ${this.cache.size})`));
140
- }
141
-
142
- // Check size after the add so an overflow only fires eviction once per
143
- // overflowing call (using targetCacheSize precomputed in the constructor).
144
- if (this.cache.size > this.maxCacheSize) {
145
- const toRemove = this.cache.size - this.targetCacheSize;
146
- if (toRemove > 0) {
147
- this.clearOldestEntries(toRemove);
148
- }
149
- }
150
-
151
- return false; // New domain, should process
152
- }
153
-
154
106
  /**
155
107
  * Clear oldest entries from cache (FIFO eviction). Set iteration order is
156
108
  * guaranteed insertion order per ES2015, so this genuinely evicts oldest-
@@ -208,45 +160,6 @@ class DomainCache {
208
160
  return this.cache.has(domain);
209
161
  }
210
162
 
211
- /**
212
- * Add multiple domains to cache at once. Uses a single .size delta to
213
- * count actually-new entries (skipping per-domain .has() calls), and
214
- * runs the size-overflow eviction check once after the batch instead of
215
- * per-domain. For a batch of N domains this is N .has() calls saved and
216
- * up to N redundant cap checks collapsed to one.
217
- * @param {Array<string>} domains - Array of domains to add
218
- * @returns {number} Number of domains actually added (excludes duplicates)
219
- */
220
- markMultipleDomainsAsDetected(domains) {
221
- if (!Array.isArray(domains) || domains.length === 0) {
222
- return 0;
223
- }
224
-
225
- const startSize = this.cache.size;
226
- for (let i = 0; i < domains.length; i++) {
227
- const d = domains[i];
228
- if (d && typeof d === 'string') {
229
- this.cache.add(d);
230
- }
231
- }
232
- const addedCount = this.cache.size - startSize;
233
- this.stats.totalDetected += addedCount;
234
-
235
- if (this.enableLogging && addedCount > 0) {
236
- console.log(formatLogMessage('debug', `${this.logPrefix} Batch added ${addedCount} new domains (cache size: ${this.cache.size})`));
237
- }
238
-
239
- // One eviction sweep at the end, mirroring the single-add overflow check.
240
- if (this.cache.size > this.maxCacheSize) {
241
- const toRemove = this.cache.size - this.targetCacheSize;
242
- if (toRemove > 0) {
243
- this.clearOldestEntries(toRemove);
244
- }
245
- }
246
-
247
- return addedCount;
248
- }
249
-
250
163
  /**
251
164
  * Create bound helper functions for easy integration with existing code
252
165
  * @returns {object} Object with bound helper functions
@@ -255,7 +168,6 @@ class DomainCache {
255
168
  return {
256
169
  isDomainAlreadyDetected: this.isDomainAlreadyDetected.bind(this),
257
170
  markDomainAsDetected: this.markDomainAsDetected.bind(this),
258
- checkAndMark: this.checkAndMark.bind(this),
259
171
  getSkippedCount: () => this.stats.totalSkipped,
260
172
  getCacheSize: () => this.cache.size,
261
173
  getStats: this.getStats.bind(this)
@@ -273,8 +185,7 @@ let globalDomainCache = null;
273
185
  *
274
186
  * NOTE: `options` is honored ONLY on the first call (the call that actually
275
187
  * constructs the singleton). Subsequent calls return the existing instance
276
- * regardless of what's passed. If you need different settings, call
277
- * resetGlobalCache() first or use `new DomainCache(options)` directly.
188
+ * regardless of what's passed; options are fixed at first construction.
278
189
  *
279
190
  * Under debug logging, a warning fires if a later caller passes options
280
191
  * that don't match the live instance — silent drift is a recurring source
@@ -295,7 +206,7 @@ function getGlobalDomainCache(options = {}) {
295
206
  (options.enableLogging !== undefined && options.enableLogging !== globalDomainCache.enableLogging) ||
296
207
  (options.logPrefix !== undefined && options.logPrefix !== globalDomainCache.logPrefix);
297
208
  if (drifted) {
298
- console.log(formatLogMessage('debug', `${globalDomainCache.logPrefix} getGlobalDomainCache called with options that differ from the live singleton; ignored (call resetGlobalCache() first to apply new options)`));
209
+ console.log(formatLogMessage('debug', `${globalDomainCache.logPrefix} getGlobalDomainCache called with options that differ from the live singleton; ignored (options are fixed at first construction)`));
299
210
  }
300
211
  }
301
212
  return globalDomainCache;
@@ -312,36 +223,17 @@ function createGlobalHelpers(options = {}) {
312
223
  }
313
224
 
314
225
  /**
315
- * Reset the global cache (useful for testing or manual resets)
316
- */
317
- function resetGlobalCache() {
318
- if (globalDomainCache) {
319
- globalDomainCache.clear();
320
- }
321
- globalDomainCache = null;
322
- }
323
-
324
- /**
325
- * Legacy wrapper functions for backward compatibility
326
- * These match the original function signatures from nwss.js
226
+ * Legacy wrapper for backward compatibility.
327
227
  *
328
- * NOTE: getTotalDomainsSkipped and getDetectedDomainsCount are the only
329
- * ones kept they're used directly by nwss.js for end-of-scan stats.
330
- * Previously-defined isDomainAlreadyDetected / markDomainAsDetected /
331
- * checkAndMark wrappers were removed: nwss.js calls those via
332
- * createGlobalHelpers() now and repo-wide grep confirmed zero remaining
333
- * external callers of the legacy wrappers.
228
+ * getDetectedDomainsCount is the only one kept — nwss.js reads it for the
229
+ * end-of-scan "unique domains cached" stat. getTotalDomainsSkipped was
230
+ * removed: its value was always 0 because the global cache's skip-check
231
+ * (isDomainAlreadyDetected) is never called cross-URL dedup is handled by
232
+ * nettools' processed-domain sets / smart-cache / the per-URL set — so the
233
+ * stat was misleading. The isDomainAlreadyDetected / markDomainAsDetected /
234
+ * checkAndMark wrappers were likewise removed; nwss.js uses createGlobalHelpers().
334
235
  */
335
236
 
336
- /**
337
- * Get total domains skipped (legacy wrapper)
338
- * @returns {number} Number of domains skipped
339
- */
340
- function getTotalDomainsSkipped() {
341
- const cache = getGlobalDomainCache();
342
- return cache.stats.totalSkipped;
343
- }
344
-
345
237
  /**
346
238
  * Get detected domains cache size (legacy wrapper)
347
239
  * @returns {number} Size of the detected domains cache
@@ -352,15 +244,10 @@ function getDetectedDomainsCount() {
352
244
  }
353
245
 
354
246
  module.exports = {
355
- // Main class
356
- DomainCache,
357
-
358
- // Global cache functions
359
- getGlobalDomainCache,
247
+ // Global cache helpers — createGlobalHelpers feeds nwss.js's per-domain
248
+ // marking; getDetectedDomainsCount feeds the end-of-scan "unique domains
249
+ // cached" stat. (DomainCache / getGlobalDomainCache stay internal — no
250
+ // external consumer; construct via createGlobalHelpers.)
360
251
  createGlobalHelpers,
361
- resetGlobalCache,
362
-
363
- // Legacy wrappers still used by nwss.js for end-of-scan stats
364
- getTotalDomainsSkipped,
365
252
  getDetectedDomainsCount
366
253
  };
@@ -15,6 +15,11 @@
15
15
  // npm install ghost-cursor (optional dependency)
16
16
 
17
17
  const { formatLogMessage, messageColors } = require('./colorize');
18
+ // humanClick gives the coordinate-click path the same press realism as the
19
+ // built-in content clicks (hover dwell + mousedown/hold/mouseup, optional
20
+ // hand-tremor + mouseup drift) instead of a 0ms page.mouse.click. One-way
21
+ // require — interaction.js does not depend on ghost-cursor, so no cycle.
22
+ const { humanClick } = require('./interaction');
18
23
  const GHOST_CURSOR_TAG = messageColors.processing('[ghost-cursor]');
19
24
 
20
25
  let ghostCursorModule = null;
@@ -56,7 +61,7 @@ function createGhostCursor(page, options = {}) {
56
61
  const cursor = ghostCursorModule.createCursor(page, { x: startX, y: startY });
57
62
 
58
63
  if (forceDebug) {
59
- console.log(formatLogMessage('debug', '[ghost-cursor] Cursor instance created'));
64
+ console.log(formatLogMessage('debug', `${GHOST_CURSOR_TAG} Cursor instance created`));
60
65
  }
61
66
 
62
67
  return cursor;
@@ -98,7 +103,7 @@ async function ghostMove(cursor, toX, toY, options = {}) {
98
103
  const moveOpts = {};
99
104
  if (moveSpeed !== undefined) moveOpts.moveSpeed = moveSpeed;
100
105
  if (moveDelay > 0) moveOpts.moveDelay = moveDelay;
101
- if (randomizeMoveDelay !== undefined) moveOpts.randomizeMoveDelay = randomizeMoveDelay;
106
+ moveOpts.randomizeMoveDelay = randomizeMoveDelay; // always defined (defaults to true)
102
107
  if (overshootThreshold !== undefined) moveOpts.overshootThreshold = overshootThreshold;
103
108
 
104
109
  await cursor.moveTo({ x: toX, y: toY }, moveOpts);
@@ -126,6 +131,8 @@ async function ghostMove(cursor, toX, toY, options = {}) {
126
131
  * @param {number} options.waitForClick - Delay (ms) between mousedown/mouseup (default: auto)
127
132
  * @param {number} options.moveDelay - Delay (ms) after moving to target
128
133
  * @param {number} options.paddingPercentage - Click point within element (0=edge, 100=center)
134
+ * @param {import('puppeteer').Page} options.page - Page for coordinate clicks (falls back to cursor.page)
135
+ * @param {boolean} options.realistic - Coordinate clicks: emit hand-tremor + mouseup drift (default: false)
129
136
  * @param {boolean} options.forceDebug - Enable debug logging
130
137
  * @returns {Promise<boolean>} true if click succeeded
131
138
  */
@@ -137,6 +144,8 @@ async function ghostClick(cursor, target, options = {}) {
137
144
  waitForClick,
138
145
  moveDelay,
139
146
  paddingPercentage,
147
+ page,
148
+ realistic = false,
140
149
  forceDebug
141
150
  } = options;
142
151
 
@@ -149,16 +158,25 @@ async function ghostClick(cursor, target, options = {}) {
149
158
  if (typeof target === 'string') {
150
159
  await cursor.click(target, clickOpts);
151
160
  } else {
152
- // For coordinate clicks, move first then use page click
161
+ // Coordinate click: ghost-cursor's bezier moveTo brings the cursor to the
162
+ // point, then humanClick does the realistic press (hover dwell, mousedown
163
+ // → hold → mouseup, plus hand-tremor + down≠up drift when realistic). This
164
+ // replaces a 0ms page.mouse.click, so the ghost path gets the same click
165
+ // realism as built-in content clicks.
153
166
  await cursor.moveTo(target);
154
- // Small hesitation before clicking
155
- if (hesitate > 0) {
156
- await new Promise(resolve => setTimeout(resolve, hesitate));
157
- }
158
- const page = cursor._page || cursor.page;
159
- if (page && typeof page.mouse?.click === 'function') {
160
- await page.mouse.click(target.x, target.y);
167
+ // Prefer the caller-supplied page; fall back to the cursor's own page
168
+ // (ghost-cursor exposes it as cursor.page) so we don't depend on internals.
169
+ // Return false (not silent success) if there's no usable page — otherwise
170
+ // the "Clicked" log + return true below would lie about a click that
171
+ // never fired.
172
+ const clickPage = page || cursor.page;
173
+ if (!clickPage || typeof clickPage.mouse?.down !== 'function') {
174
+ if (forceDebug) {
175
+ console.log(formatLogMessage('debug', `${GHOST_CURSOR_TAG} Coordinate click skipped: no usable page`));
176
+ }
177
+ return false;
161
178
  }
179
+ await humanClick(clickPage, target.x, target.y, { realistic, forceDebug });
162
180
  }
163
181
 
164
182
  if (forceDebug) {
@@ -189,7 +207,7 @@ async function ghostRandomMove(cursor, options = {}) {
189
207
  try {
190
208
  await cursor.randomMove();
191
209
  if (options.forceDebug) {
192
- console.log(formatLogMessage('debug', '[ghost-cursor] Random movement performed'));
210
+ console.log(formatLogMessage('debug', `${GHOST_CURSOR_TAG} Random movement performed`));
193
211
  }
194
212
  return true;
195
213
  } catch (err) {
@@ -1333,5 +1333,9 @@ module.exports = {
1333
1333
  simulateScrolling,
1334
1334
  interactWithElements,
1335
1335
  performContentClicks,
1336
+ // Realistic timed click (hover dwell + mousedown/hold/mouseup, optional
1337
+ // hand-tremor + mouseup drift). Reused by lib/ghost-cursor.js so the ghost
1338
+ // coordinate click gets the same press realism as built-in content clicks.
1339
+ humanClick,
1336
1340
  generateRandomCoordinates
1337
1341
  };
package/lib/nettools.js CHANGED
@@ -30,7 +30,7 @@ const GLOBAL_DIG_CACHE_MAX = 2000;
30
30
  // Global whois result cache — shared across ALL handler instances and processUrl calls
31
31
  // Whois data is per root domain and doesn't change based on search terms
32
32
  const globalWhoisResultCache = new Map();
33
- const GLOBAL_WHOIS_CACHE_TTL = 72000000; // 20 hours (persisted to disk between runs)
33
+ const GLOBAL_WHOIS_CACHE_TTL = 129600000; // 36 hours (persisted to disk between runs). Longer than dig's 20h: registrar data is very stable and whois servers rate-limit aggressively, so caching longer cuts repeat queries.
34
34
  const GLOBAL_WHOIS_CACHE_MAX = 2000;
35
35
 
36
36
  // Persistent disk cache file paths
@@ -40,8 +40,8 @@ const WHOIS_CACHE_FILE = path.join(__dirname, '..', '.whoiscache');
40
40
  // Index of hostnames known to resolve, populated as a side effect of
41
41
  // positive dig/whois cache writes AND cache hits. nwss.js's DNS pre-check
42
42
  // reads this via domainKnownToResolve() so it can skip its own resolve4
43
- // call on hosts that dig or whois have already proven live within the
44
- // 20-hour TTL window. Populating on cache HITS (not just writes) handles
43
+ // call on hosts that dig or whois have already proven live within their
44
+ // cache TTL window (dig 20h / whois 36h). Populating on cache HITS (not just writes) handles
45
45
  // the --dns-cache disk-load case where entries arrive without going
46
46
  // through the in-process write path. Stale entries -- hostname in Set but
47
47
  // the dig/whois entry has since been evicted -- are harmless: worst case
@@ -124,7 +124,6 @@ function loadDiskCache(filePath, cache, ttl, maxSize) {
124
124
  // Surface the event so the user knows they lost their warm cache;
125
125
  // previously this was a silent reset, which made "why did my dns
126
126
  // cache stop helping?" hard to diagnose.
127
- // eslint-disable-next-line no-console
128
127
  console.warn(`${messageColors.highlight('[dns-cache]')} ${path.basename(filePath)} was unreadable (${err.message}); starting fresh`);
129
128
  try { fs.unlinkSync(filePath); } catch {}
130
129
  }
@@ -256,6 +255,38 @@ function getDnsCacheStats() {
256
255
  // Disk cache is opt-in via --dns-cache flag
257
256
  let diskCacheEnabled = false;
258
257
 
258
+ // Optional dig resolver(s), set from --dns. When non-empty, dig queries
259
+ // `@<one of these>` (round-robin) instead of the system resolver — so dig uses
260
+ // the same reliable servers as the pre-check rather than a flaky /etc/resolv.conf
261
+ // (the cause of `dig: Command timeout` drops on Cloudflare-fronted ad domains).
262
+ let digResolvers = [];
263
+ let digResolverCursor = 0;
264
+ // dig's `@server` wants a bare IP; strip any `ipv4:port` / `[ipv6]:port` form.
265
+ function digServerFromSpec(spec) {
266
+ const s = String(spec);
267
+ const br = s.match(/^\[([0-9a-fA-F:]+)\]/);
268
+ if (br) return br[1];
269
+ const v4p = s.match(/^(\d{1,3}(?:\.\d{1,3}){3}):\d+$/);
270
+ if (v4p) return v4p[1];
271
+ return s;
272
+ }
273
+ function setDigResolvers(servers) {
274
+ digResolvers = (Array.isArray(servers) ? servers : []).filter(Boolean).map(digServerFromSpec);
275
+ }
276
+ // Ordered `@server` attempt list for ONE dig lookup: starts at the round-robin
277
+ // cursor (advanced once per lookup, preserving the old fairness) then falls
278
+ // through the remaining resolvers as failover. Returns [null] when no --dns
279
+ // resolvers are configured — a single attempt via the system resolver.
280
+ function digServerAttemptList() {
281
+ if (digResolvers.length === 0) return [null];
282
+ const start = digResolverCursor++ % digResolvers.length;
283
+ const list = [];
284
+ for (let i = 0; i < digResolvers.length; i++) {
285
+ list.push('@' + digResolvers[(start + i) % digResolvers.length]);
286
+ }
287
+ return list;
288
+ }
289
+
259
290
  /**
260
291
  * Enable persistent disk caching for dig/whois results.
261
292
  * Call this when --dns-cache flag is set. Idempotent — repeated calls
@@ -293,7 +324,6 @@ function enableDiskCache() {
293
324
  // Debug log only if anything was actually warmed; silent on fresh
294
325
  // installs / empty disk caches.
295
326
  if (digWarm > 0 || whoisWarm > 0) {
296
- // eslint-disable-next-line no-console
297
327
  console.log(`${messageColors.highlight('[dns-cache]')} Warmed resolved-hostnames index from disk: ${digWarm} dig + ${whoisWarm} whois entries`);
298
328
  }
299
329
 
@@ -994,50 +1024,103 @@ async function whoisLookupWithRetry(domain = '', timeout = 10000, whoisServer =
994
1024
  * @returns {Promise<Object>} Object with success status and output/error
995
1025
  */
996
1026
  async function digLookup(domain = '', recordType = 'A', timeout = 5000) {
997
- try {
998
- // Clean domain
999
- const cleanDomain = domain.replace(/^https?:\/\//, '').replace(/\/.*$/, '').replace(/:\d+$/, '');
1027
+ // Clean domain (defensive — callers usually pass an already-clean digDomain).
1028
+ const cleanDomain = domain.replace(/^https?:\/\//, '').replace(/\/.*$/, '').replace(/:\d+$/, '');
1029
+
1030
+ // dig argv-injection guard. dig parses @/-/+ -leading tokens as options
1031
+ // (`@host` redirects the query to an arbitrary server, `-f path` reads a
1032
+ // file as a query batch) and has no `--` end-of-options marker like whois.
1033
+ // Reject anything not hostname-shaped before shelling out — success:false so
1034
+ // it's treated as no-match and not cached. (Charset blocks @ + / space etc;
1035
+ // the leading-`-` check blocks `-f` and friends, since `-` is valid mid-host.)
1036
+ if (!cleanDomain || /[^a-zA-Z0-9._-]/.test(cleanDomain) || cleanDomain.startsWith('-')) {
1037
+ return { success: false, error: `invalid domain shape: ${cleanDomain}`, domain: cleanDomain, recordType };
1038
+ }
1039
+
1040
+ // Resolver failover: try the round-robin resolver first, then fall through
1041
+ // the remaining --dns resolvers on timeout / no-reply / REFUSED / SERVFAIL —
1042
+ // the same resilience the whois path already has via whoisLookupWithRetry,
1043
+ // and the DNS pre-check has via its rotation. Capped at 3 attempts (matches
1044
+ // whois maxRetries default) so a host that's dead on every resolver can't
1045
+ // burn the whole nettools budget.
1046
+ const attempts = digServerAttemptList();
1047
+ // Only do JS-level failover when --dns gave us pinned resolvers. Without it,
1048
+ // attempts is [null]: a SINGLE system-resolver invocation that keeps dig's
1049
+ // native resolv.conf rotation + retries (forcing +tries=1 there would strip
1050
+ // that built-in resilience — the whole point is to be MORE resilient).
1051
+ const usingResolvers = attempts[0] !== null;
1052
+ const maxAttempts = usingResolvers ? Math.min(3, attempts.length) : 1;
1053
+ // Pinned-resolver attempts use +time=2 +tries=1 (the JS loop owns failover)
1054
+ // under a 4s SIGTERM ceiling. The system-resolver path keeps the full budget
1055
+ // and dig's own retry behaviour, matching the pre-failover semantics exactly.
1056
+ const perAttemptTimeout = usingResolvers ? Math.min(timeout, 4000) : timeout;
1057
+
1058
+ let lastError = 'no resolver attempts made';
1059
+
1060
+ for (let i = 0; i < maxAttempts; i++) {
1061
+ const digServerArg = attempts[i];
1062
+ // With a pinned resolver: one fast try (+time=2 +tries=1), then the JS loop
1063
+ // moves to the next resolver. Without --dns: bare `dig name type` so dig
1064
+ // applies its native resolv.conf rotation. execFile (no shell) => args
1065
+ // can't be injected.
1066
+ const digArgs = digServerArg
1067
+ ? [digServerArg, '+time=2', '+tries=1', cleanDomain, recordType]
1068
+ : [cleanDomain, recordType];
1069
+ const resolverLabel = digServerArg ? digServerArg.slice(1) : 'system resolver';
1070
+
1071
+ try {
1072
+ const { stdout: fullOutput } = await execFileWithTimeout('dig', digArgs, perAttemptTimeout);
1073
+
1074
+ // Judge success by RCODE, not by stderr. dig exits 0 for ANY server
1075
+ // response, so non-zero exit (timeout / no-reply) already rejected above.
1076
+ // REFUSED/SERVFAIL are resolver-SIDE failures another resolver may not
1077
+ // share — fail over instead of accepting an answerless response (the
1078
+ // EREFUSED-storm case). NOERROR/NXDOMAIN are definitive => accept.
1079
+ const statusMatch = fullOutput.match(/status:\s*([A-Z]+)/i);
1080
+ const rcode = statusMatch ? statusMatch[1].toUpperCase() : 'NOERROR';
1081
+ if (rcode === 'REFUSED' || rcode === 'SERVFAIL') {
1082
+ lastError = `dig ${rcode} from ${resolverLabel}`;
1083
+ continue; // try next resolver in the failover list
1084
+ }
1085
+
1086
+ // Non-empty stderr is intentionally NOT treated as failure here: dig
1087
+ // prints `;; communications error ... timed out` warnings to stderr while
1088
+ // still returning a valid ANSWER SECTION and exit 0. The old code failed
1089
+ // the whole lookup on any stderr, discarding good answers — the exact
1090
+ // missed-match pattern under flaky resolvers.
1091
+ const answerMatch = fullOutput.match(/;; ANSWER SECTION:\n([\s\S]*?)(?:\n;;|\n*$)/);
1092
+ let shortOutput = '';
1093
+ if (answerMatch) {
1094
+ shortOutput = answerMatch[1]
1095
+ .split('\n')
1096
+ .map(line => line.split(/\s+/).pop())
1097
+ .filter(Boolean)
1098
+ .join('\n');
1099
+ }
1000
1100
 
1001
- // Single dig command — full output contains everything including short
1002
- // answers. execFile (no shell) so cleanDomain / recordType can contain
1003
- // any chars without injection risk.
1004
- const { stdout: fullOutput, stderr } = await execFileWithTimeout('dig', [cleanDomain, recordType], timeout);
1005
-
1006
- if (stderr && stderr.trim()) {
1007
1101
  return {
1008
- success: false,
1009
- error: stderr.trim(),
1102
+ success: true,
1103
+ output: fullOutput,
1104
+ shortOutput,
1010
1105
  domain: cleanDomain,
1011
- recordType
1106
+ recordType,
1107
+ resolver: resolverLabel
1012
1108
  };
1109
+ } catch (error) {
1110
+ // Timeout or non-zero exit (e.g. dig exit 9 = no reply from this server).
1111
+ // Record and fall through to the next resolver.
1112
+ lastError = error.message;
1013
1113
  }
1014
-
1015
- // Extract short output from ANSWER SECTION of full dig output
1016
- const answerMatch = fullOutput.match(/;; ANSWER SECTION:\n([\s\S]*?)(?:\n;;|\n*$)/);
1017
- let shortOutput = '';
1018
- if (answerMatch) {
1019
- shortOutput = answerMatch[1]
1020
- .split('\n')
1021
- .map(line => line.split(/\s+/).pop())
1022
- .filter(Boolean)
1023
- .join('\n');
1024
- }
1025
-
1026
- return {
1027
- success: true,
1028
- output: fullOutput,
1029
- shortOutput,
1030
- domain: cleanDomain,
1031
- recordType
1032
- };
1033
- } catch (error) {
1034
- return {
1035
- success: false,
1036
- error: error.message,
1037
- domain: domain,
1038
- recordType
1039
- };
1040
1114
  }
1115
+
1116
+ // Every attempt timed out / was refused. success:false so the handler does
1117
+ // NOT cache it (transient — caching would poison the domain for the TTL).
1118
+ return {
1119
+ success: false,
1120
+ error: lastError,
1121
+ domain: cleanDomain,
1122
+ recordType
1123
+ };
1041
1124
  }
1042
1125
 
1043
1126
  /**
@@ -1170,15 +1253,20 @@ function createNetToolsHandler(config) {
1170
1253
  // Determine which domain will be used for dig lookup
1171
1254
  const digDomain = digSubdomain && originalDomain ? originalDomain : domain;
1172
1255
 
1173
- // For whois: use root domain only (whois data is consistent for entire domain)
1174
- const whoisRootDomain = getRootDomain ? getRootDomain(`http://${domain}`) : domain;
1175
-
1256
+ // For whois: use root domain only (whois data is consistent for entire
1257
+ // domain). Only compute it when whois is actually configured — getRootDomain
1258
+ // does a domain parse, so on a dig-only config (no whois/whois-or) this skips
1259
+ // a parse + string build on every single request. whoisRootDomain is only
1260
+ // ever read inside the whois branch, so the `domain` fallback is never used.
1261
+ const wantWhois = hasWhois || hasWhoisOr;
1262
+ const whoisRootDomain = wantWhois ? (getRootDomain ? getRootDomain(`http://${domain}`) : domain) : domain;
1263
+
1176
1264
  // Check if we need to perform any lookups with appropriate deduplication
1177
1265
  // Whois: root domain + config (whois data same for sub.example.com and example.com)
1178
- const whoisDedupeKey = `${whoisRootDomain}:${whoisConfigKey}`;
1266
+ const whoisDedupeKey = wantWhois ? `${whoisRootDomain}:${whoisConfigKey}` : '';
1179
1267
  // Dig: specific subdomain + config (DNS records can differ between subdomains)
1180
1268
  const digDedupeKey = `${digDomain}:${digConfigKey}`;
1181
- const needsWhoisLookup = (hasWhois || hasWhoisOr) && !processedWhoisDomains.has(whoisDedupeKey);
1269
+ const needsWhoisLookup = wantWhois && !processedWhoisDomains.has(whoisDedupeKey);
1182
1270
  const needsDigLookup = (hasDig || hasDigOr) && !processedDigDomains.has(digDedupeKey);
1183
1271
 
1184
1272
  // Claim the dedupe keys NOW, synchronously, before executeNetToolsLookup
@@ -1606,11 +1694,20 @@ function createNetToolsHandler(config) {
1606
1694
  // backwards-compat additive: old code reading new cache
1607
1695
  // ignores it; new code reading old cache (no field) falls
1608
1696
  // back to lazy on-hit population in the cache-hit branch.
1609
- globalDigResultCache.set(digCacheKey, {
1610
- result: digResult,
1611
- timestamp: now,
1612
- hostname: digDomain
1613
- });
1697
+ //
1698
+ // Only cache a SUCCESSFUL dig. A timeout/error (success:false) is
1699
+ // transient — caching it would poison the domain for the full
1700
+ // cache TTL (20h when persisted via --dns-cache), so a host that
1701
+ // resolves fine on the next attempt keeps getting dropped. (An
1702
+ // NXDOMAIN is success:true with NXDOMAIN in the body — a real
1703
+ // answer — so it's correctly still cached.)
1704
+ if (digResult.success) {
1705
+ globalDigResultCache.set(digCacheKey, {
1706
+ result: digResult,
1707
+ timestamp: now,
1708
+ hostname: digDomain
1709
+ });
1710
+ }
1614
1711
  dnsCacheStats.digMisses++;
1615
1712
  pushFreshSample(dnsCacheStats.freshDig, `${digDomain} (${digRecordType})`);
1616
1713
  // Index hostname IF dig actually proved resolution -- NXDOMAIN
@@ -1662,7 +1759,7 @@ function createNetToolsHandler(config) {
1662
1759
  if (hasDig) logToConsoleAndFile(`${messageColors.highlight('[dig-and]')} Terms checked: ${digTerms.join(' AND ')}, matched: ${digMatched}`);
1663
1760
  if (hasDigOr) logToConsoleAndFile(`${messageColors.highlight('[dig-or]')} Terms checked: ${digOrTerms.join(' OR ')}, matched: ${digOrMatched}`);
1664
1761
  }
1665
- logToConsoleAndFile(`${messageColors.highlight('[dig]')} Lookup completed for ${digDomain}, dig-and: ${digMatched}, dig-or: ${digOrMatched}`);
1762
+ logToConsoleAndFile(`${messageColors.highlight('[dig]')} Lookup completed for ${digDomain}${digResult.resolver ? ` via ${digResult.resolver}` : ''}, dig-and: ${digMatched}, dig-or: ${digOrMatched}`);
1666
1763
  if (siteConfig.verbose === 1) {
1667
1764
  if (hasDig) logToConsoleAndFile(`${messageColors.highlight('[dig]')} AND terms: ${digTerms.join(', ')}`);
1668
1765
  if (hasDigOr) logToConsoleAndFile(`${messageColors.highlight('[dig]')} OR terms: ${digOrTerms.join(', ')}`);
@@ -1813,6 +1910,12 @@ module.exports = {
1813
1910
  validateDigAvailability,
1814
1911
  enableDiskCache,
1815
1912
  getDnsCacheStats,
1913
+ // Route dig through the --dns resolver(s) instead of the system resolver.
1914
+ setDigResolvers,
1915
+ // Generic disk-cache primitives (atomic write, TTL/size-bounded) — reused by
1916
+ // nwss.js to persist the DNS pre-check negative cache under --dns-cache.
1917
+ loadDiskCache,
1918
+ saveDiskCache,
1816
1919
  // Resolved-hostnames index for the DNS pre-check optimization.
1817
1920
  // nwss.js's per-task pre-check consults this BEFORE calling resolve4
1818
1921
  // so hosts already proven live by dig or whois (within their 20h
@@ -778,6 +778,14 @@ function validateOvpnConfig(ovpnConfig) {
778
778
  * @returns {Promise<Object>} { success, connection, tunDevice, error }
779
779
  */
780
780
  async function connectForSite(siteConfig, forceDebug = false) {
781
+ // Platform guard: OpenVPN routing here reads /proc and uses the iproute2 `ip`
782
+ // command, both Linux-only. Fail clearly instead of a cryptic /proc or `ip`
783
+ // error on macOS/Windows. WSL2 reports 'linux' and passes (TUN is checked
784
+ // separately below via isWSL/checkTunDevice).
785
+ if (process.platform !== 'linux') {
786
+ return { success: false, error: `OpenVPN routing is currently Linux-only (needs /proc + the iproute2 'ip' command; not available on ${process.platform}). Run on Linux/WSL2, or remove the 'openvpn' option from the site config.` };
787
+ }
788
+
781
789
  const ovpnConfig = normalizeOvpnConfig(siteConfig.openvpn);
782
790
  if (!ovpnConfig) {
783
791
  return { success: false, error: 'Invalid OpenVPN configuration' };
package/lib/output.js CHANGED
@@ -133,32 +133,43 @@ function formatDomain(domain, options = {}) {
133
133
  if (!domain || domain.length <= 6 || !domain.includes('.')) {
134
134
  return null;
135
135
  }
136
-
137
- // If plain is true, always return just the domain regardless of other options
136
+
137
+ // Path-prefix rules (from output_regex) are stored as "host/path/" they
138
+ // contain a '/'. Only adblock can express a path; every domain-only format
139
+ // (dnsmasq/unbound/pihole/hosts/privoxy/plain) falls back to the bare host
140
+ // (everything before the first '/') so output stays valid in all formats.
141
+ const slash = domain.indexOf('/');
142
+ const isPathRule = slash !== -1;
143
+ const host = isPathRule ? domain.slice(0, slash) : domain;
144
+
145
+ // If plain is true, always return just the host regardless of other options
138
146
  if (plain) {
139
- return domain;
147
+ return host;
140
148
  }
141
-
149
+
142
150
  // Apply specific format based on output mode
143
151
  if (pihole) {
144
152
  // Escape dots for regex and use Pi-hole format: (^|\.)domain\.com$
145
- const escapedDomain = domain.replace(/\./g, '\\.');
153
+ const escapedDomain = host.replace(/\./g, '\\.');
146
154
  return `(^|\\.)${escapedDomain}$`;
147
155
  } else if (privoxy) {
148
- return `{ +block } .${domain}`;
156
+ return `{ +block } .${host}`;
149
157
  } else if (dnsmasq) {
150
- return `local=/${domain}/`;
158
+ return `local=/${host}/`;
151
159
  } else if (dnsmasqOld) {
152
- return `server=/${domain}/`;
160
+ return `server=/${host}/`;
153
161
  } else if (unbound) {
154
- return `local-zone: "${domain}." always_null`;
162
+ return `local-zone: "${host}." always_null`;
155
163
  } else if (localhostIP) {
156
- return `${localhostIP} ${domain}`;
164
+ return `${localhostIP} ${host}`;
157
165
  } else if (adblockRules && resourceType) {
158
- // Generate adblock filter rules with resource type modifiers
159
- return `||${domain}^${resourceType}`;
166
+ // Adblock with resource-type modifier. A path rule self-anchors via its
167
+ // trailing '/', so it takes no '^' separator; a domain rule needs '^'.
168
+ return isPathRule ? `||${domain}${resourceType}` : `||${domain}^${resourceType}`;
160
169
  } else {
161
- return `||${domain}^`;
170
+ // Default adblock: ||host^ for a domain, ||host/path/ for a path rule
171
+ // (the path already anchors, so no trailing '^').
172
+ return isPathRule ? `||${domain}` : `||${domain}^`;
162
173
  }
163
174
  }
164
175