@fanboynz/network-scanner 3.1.2 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,6 +15,11 @@
15
15
  // npm install ghost-cursor (optional dependency)
16
16
 
17
17
  const { formatLogMessage, messageColors } = require('./colorize');
18
+ // humanClick gives the coordinate-click path the same press realism as the
19
+ // built-in content clicks (hover dwell + mousedown/hold/mouseup, optional
20
+ // hand-tremor + mouseup drift) instead of a 0ms page.mouse.click. One-way
21
+ // require — interaction.js does not depend on ghost-cursor, so no cycle.
22
+ const { humanClick } = require('./interaction');
18
23
  const GHOST_CURSOR_TAG = messageColors.processing('[ghost-cursor]');
19
24
 
20
25
  let ghostCursorModule = null;
@@ -56,7 +61,7 @@ function createGhostCursor(page, options = {}) {
56
61
  const cursor = ghostCursorModule.createCursor(page, { x: startX, y: startY });
57
62
 
58
63
  if (forceDebug) {
59
- console.log(formatLogMessage('debug', '[ghost-cursor] Cursor instance created'));
64
+ console.log(formatLogMessage('debug', `${GHOST_CURSOR_TAG} Cursor instance created`));
60
65
  }
61
66
 
62
67
  return cursor;
@@ -98,7 +103,7 @@ async function ghostMove(cursor, toX, toY, options = {}) {
98
103
  const moveOpts = {};
99
104
  if (moveSpeed !== undefined) moveOpts.moveSpeed = moveSpeed;
100
105
  if (moveDelay > 0) moveOpts.moveDelay = moveDelay;
101
- if (randomizeMoveDelay !== undefined) moveOpts.randomizeMoveDelay = randomizeMoveDelay;
106
+ moveOpts.randomizeMoveDelay = randomizeMoveDelay; // always defined (defaults to true)
102
107
  if (overshootThreshold !== undefined) moveOpts.overshootThreshold = overshootThreshold;
103
108
 
104
109
  await cursor.moveTo({ x: toX, y: toY }, moveOpts);
@@ -126,6 +131,8 @@ async function ghostMove(cursor, toX, toY, options = {}) {
126
131
  * @param {number} options.waitForClick - Delay (ms) between mousedown/mouseup (default: auto)
127
132
  * @param {number} options.moveDelay - Delay (ms) after moving to target
128
133
  * @param {number} options.paddingPercentage - Click point within element (0=edge, 100=center)
134
+ * @param {import('puppeteer').Page} options.page - Page for coordinate clicks (falls back to cursor.page)
135
+ * @param {boolean} options.realistic - Coordinate clicks: emit hand-tremor + mouseup drift (default: false)
129
136
  * @param {boolean} options.forceDebug - Enable debug logging
130
137
  * @returns {Promise<boolean>} true if click succeeded
131
138
  */
@@ -137,6 +144,8 @@ async function ghostClick(cursor, target, options = {}) {
137
144
  waitForClick,
138
145
  moveDelay,
139
146
  paddingPercentage,
147
+ page,
148
+ realistic = false,
140
149
  forceDebug
141
150
  } = options;
142
151
 
@@ -149,16 +158,25 @@ async function ghostClick(cursor, target, options = {}) {
149
158
  if (typeof target === 'string') {
150
159
  await cursor.click(target, clickOpts);
151
160
  } else {
152
- // For coordinate clicks, move first then use page click
161
+ // Coordinate click: ghost-cursor's bezier moveTo brings the cursor to the
162
+ // point, then humanClick does the realistic press (hover dwell, mousedown
163
+ // → hold → mouseup, plus hand-tremor + down≠up drift when realistic). This
164
+ // replaces a 0ms page.mouse.click, so the ghost path gets the same click
165
+ // realism as built-in content clicks.
153
166
  await cursor.moveTo(target);
154
- // Small hesitation before clicking
155
- if (hesitate > 0) {
156
- await new Promise(resolve => setTimeout(resolve, hesitate));
157
- }
158
- const page = cursor._page || cursor.page;
159
- if (page && typeof page.mouse?.click === 'function') {
160
- await page.mouse.click(target.x, target.y);
167
+ // Prefer the caller-supplied page; fall back to the cursor's own page
168
+ // (ghost-cursor exposes it as cursor.page) so we don't depend on internals.
169
+ // Return false (not silent success) if there's no usable page — otherwise
170
+ // the "Clicked" log + return true below would lie about a click that
171
+ // never fired.
172
+ const clickPage = page || cursor.page;
173
+ if (!clickPage || typeof clickPage.mouse?.down !== 'function') {
174
+ if (forceDebug) {
175
+ console.log(formatLogMessage('debug', `${GHOST_CURSOR_TAG} Coordinate click skipped: no usable page`));
176
+ }
177
+ return false;
161
178
  }
179
+ await humanClick(clickPage, target.x, target.y, { realistic, forceDebug });
162
180
  }
163
181
 
164
182
  if (forceDebug) {
@@ -189,7 +207,7 @@ async function ghostRandomMove(cursor, options = {}) {
189
207
  try {
190
208
  await cursor.randomMove();
191
209
  if (options.forceDebug) {
192
- console.log(formatLogMessage('debug', '[ghost-cursor] Random movement performed'));
210
+ console.log(formatLogMessage('debug', `${GHOST_CURSOR_TAG} Random movement performed`));
193
211
  }
194
212
  return true;
195
213
  } catch (err) {
@@ -1333,5 +1333,9 @@ module.exports = {
1333
1333
  simulateScrolling,
1334
1334
  interactWithElements,
1335
1335
  performContentClicks,
1336
+ // Realistic timed click (hover dwell + mousedown/hold/mouseup, optional
1337
+ // hand-tremor + mouseup drift). Reused by lib/ghost-cursor.js so the ghost
1338
+ // coordinate click gets the same press realism as built-in content clicks.
1339
+ humanClick,
1336
1340
  generateRandomCoordinates
1337
1341
  };
package/lib/nettools.js CHANGED
@@ -124,7 +124,6 @@ function loadDiskCache(filePath, cache, ttl, maxSize) {
124
124
  // Surface the event so the user knows they lost their warm cache;
125
125
  // previously this was a silent reset, which made "why did my dns
126
126
  // cache stop helping?" hard to diagnose.
127
- // eslint-disable-next-line no-console
128
127
  console.warn(`${messageColors.highlight('[dns-cache]')} ${path.basename(filePath)} was unreadable (${err.message}); starting fresh`);
129
128
  try { fs.unlinkSync(filePath); } catch {}
130
129
  }
@@ -256,6 +255,38 @@ function getDnsCacheStats() {
256
255
  // Disk cache is opt-in via --dns-cache flag
257
256
  let diskCacheEnabled = false;
258
257
 
258
+ // Optional dig resolver(s), set from --dns. When non-empty, dig queries
259
+ // `@<one of these>` (round-robin) instead of the system resolver — so dig uses
260
+ // the same reliable servers as the pre-check rather than a flaky /etc/resolv.conf
261
+ // (the cause of `dig: Command timeout` drops on Cloudflare-fronted ad domains).
262
+ let digResolvers = [];
263
+ let digResolverCursor = 0;
264
+ // dig's `@server` wants a bare IP; strip any `ipv4:port` / `[ipv6]:port` form.
265
+ function digServerFromSpec(spec) {
266
+ const s = String(spec);
267
+ const br = s.match(/^\[([0-9a-fA-F:]+)\]/);
268
+ if (br) return br[1];
269
+ const v4p = s.match(/^(\d{1,3}(?:\.\d{1,3}){3}):\d+$/);
270
+ if (v4p) return v4p[1];
271
+ return s;
272
+ }
273
+ function setDigResolvers(servers) {
274
+ digResolvers = (Array.isArray(servers) ? servers : []).filter(Boolean).map(digServerFromSpec);
275
+ }
276
+ // Ordered `@server` attempt list for ONE dig lookup: starts at the round-robin
277
+ // cursor (advanced once per lookup, preserving the old fairness) then falls
278
+ // through the remaining resolvers as failover. Returns [null] when no --dns
279
+ // resolvers are configured — a single attempt via the system resolver.
280
+ function digServerAttemptList() {
281
+ if (digResolvers.length === 0) return [null];
282
+ const start = digResolverCursor++ % digResolvers.length;
283
+ const list = [];
284
+ for (let i = 0; i < digResolvers.length; i++) {
285
+ list.push('@' + digResolvers[(start + i) % digResolvers.length]);
286
+ }
287
+ return list;
288
+ }
289
+
259
290
  /**
260
291
  * Enable persistent disk caching for dig/whois results.
261
292
  * Call this when --dns-cache flag is set. Idempotent — repeated calls
@@ -293,7 +324,6 @@ function enableDiskCache() {
293
324
  // Debug log only if anything was actually warmed; silent on fresh
294
325
  // installs / empty disk caches.
295
326
  if (digWarm > 0 || whoisWarm > 0) {
296
- // eslint-disable-next-line no-console
297
327
  console.log(`${messageColors.highlight('[dns-cache]')} Warmed resolved-hostnames index from disk: ${digWarm} dig + ${whoisWarm} whois entries`);
298
328
  }
299
329
 
@@ -994,50 +1024,103 @@ async function whoisLookupWithRetry(domain = '', timeout = 10000, whoisServer =
994
1024
  * @returns {Promise<Object>} Object with success status and output/error
995
1025
  */
996
1026
  async function digLookup(domain = '', recordType = 'A', timeout = 5000) {
997
- try {
998
- // Clean domain
999
- const cleanDomain = domain.replace(/^https?:\/\//, '').replace(/\/.*$/, '').replace(/:\d+$/, '');
1027
+ // Clean domain (defensive — callers usually pass an already-clean digDomain).
1028
+ const cleanDomain = domain.replace(/^https?:\/\//, '').replace(/\/.*$/, '').replace(/:\d+$/, '');
1029
+
1030
+ // dig argv-injection guard. dig parses @/-/+ -leading tokens as options
1031
+ // (`@host` redirects the query to an arbitrary server, `-f path` reads a
1032
+ // file as a query batch) and has no `--` end-of-options marker like whois.
1033
+ // Reject anything not hostname-shaped before shelling out — success:false so
1034
+ // it's treated as no-match and not cached. (Charset blocks @ + / space etc;
1035
+ // the leading-`-` check blocks `-f` and friends, since `-` is valid mid-host.)
1036
+ if (!cleanDomain || /[^a-zA-Z0-9._-]/.test(cleanDomain) || cleanDomain.startsWith('-')) {
1037
+ return { success: false, error: `invalid domain shape: ${cleanDomain}`, domain: cleanDomain, recordType };
1038
+ }
1039
+
1040
+ // Resolver failover: try the round-robin resolver first, then fall through
1041
+ // the remaining --dns resolvers on timeout / no-reply / REFUSED / SERVFAIL —
1042
+ // the same resilience the whois path already has via whoisLookupWithRetry,
1043
+ // and the DNS pre-check has via its rotation. Capped at 3 attempts (matches
1044
+ // whois maxRetries default) so a host that's dead on every resolver can't
1045
+ // burn the whole nettools budget.
1046
+ const attempts = digServerAttemptList();
1047
+ // Only do JS-level failover when --dns gave us pinned resolvers. Without it,
1048
+ // attempts is [null]: a SINGLE system-resolver invocation that keeps dig's
1049
+ // native resolv.conf rotation + retries (forcing +tries=1 there would strip
1050
+ // that built-in resilience — the whole point is to be MORE resilient).
1051
+ const usingResolvers = attempts[0] !== null;
1052
+ const maxAttempts = usingResolvers ? Math.min(3, attempts.length) : 1;
1053
+ // Pinned-resolver attempts use +time=2 +tries=1 (the JS loop owns failover)
1054
+ // under a 4s SIGTERM ceiling. The system-resolver path keeps the full budget
1055
+ // and dig's own retry behaviour, matching the pre-failover semantics exactly.
1056
+ const perAttemptTimeout = usingResolvers ? Math.min(timeout, 4000) : timeout;
1057
+
1058
+ let lastError = 'no resolver attempts made';
1059
+
1060
+ for (let i = 0; i < maxAttempts; i++) {
1061
+ const digServerArg = attempts[i];
1062
+ // With a pinned resolver: one fast try (+time=2 +tries=1), then the JS loop
1063
+ // moves to the next resolver. Without --dns: bare `dig name type` so dig
1064
+ // applies its native resolv.conf rotation. execFile (no shell) => args
1065
+ // can't be injected.
1066
+ const digArgs = digServerArg
1067
+ ? [digServerArg, '+time=2', '+tries=1', cleanDomain, recordType]
1068
+ : [cleanDomain, recordType];
1069
+ const resolverLabel = digServerArg ? digServerArg.slice(1) : 'system resolver';
1070
+
1071
+ try {
1072
+ const { stdout: fullOutput } = await execFileWithTimeout('dig', digArgs, perAttemptTimeout);
1073
+
1074
+ // Judge success by RCODE, not by stderr. dig exits 0 for ANY server
1075
+ // response, so non-zero exit (timeout / no-reply) already rejected above.
1076
+ // REFUSED/SERVFAIL are resolver-SIDE failures another resolver may not
1077
+ // share — fail over instead of accepting an answerless response (the
1078
+ // EREFUSED-storm case). NOERROR/NXDOMAIN are definitive => accept.
1079
+ const statusMatch = fullOutput.match(/status:\s*([A-Z]+)/i);
1080
+ const rcode = statusMatch ? statusMatch[1].toUpperCase() : 'NOERROR';
1081
+ if (rcode === 'REFUSED' || rcode === 'SERVFAIL') {
1082
+ lastError = `dig ${rcode} from ${resolverLabel}`;
1083
+ continue; // try next resolver in the failover list
1084
+ }
1085
+
1086
+ // Non-empty stderr is intentionally NOT treated as failure here: dig
1087
+ // prints `;; communications error ... timed out` warnings to stderr while
1088
+ // still returning a valid ANSWER SECTION and exit 0. The old code failed
1089
+ // the whole lookup on any stderr, discarding good answers — the exact
1090
+ // missed-match pattern under flaky resolvers.
1091
+ const answerMatch = fullOutput.match(/;; ANSWER SECTION:\n([\s\S]*?)(?:\n;;|\n*$)/);
1092
+ let shortOutput = '';
1093
+ if (answerMatch) {
1094
+ shortOutput = answerMatch[1]
1095
+ .split('\n')
1096
+ .map(line => line.split(/\s+/).pop())
1097
+ .filter(Boolean)
1098
+ .join('\n');
1099
+ }
1000
1100
 
1001
- // Single dig command — full output contains everything including short
1002
- // answers. execFile (no shell) so cleanDomain / recordType can contain
1003
- // any chars without injection risk.
1004
- const { stdout: fullOutput, stderr } = await execFileWithTimeout('dig', [cleanDomain, recordType], timeout);
1005
-
1006
- if (stderr && stderr.trim()) {
1007
1101
  return {
1008
- success: false,
1009
- error: stderr.trim(),
1102
+ success: true,
1103
+ output: fullOutput,
1104
+ shortOutput,
1010
1105
  domain: cleanDomain,
1011
- recordType
1106
+ recordType,
1107
+ resolver: resolverLabel
1012
1108
  };
1109
+ } catch (error) {
1110
+ // Timeout or non-zero exit (e.g. dig exit 9 = no reply from this server).
1111
+ // Record and fall through to the next resolver.
1112
+ lastError = error.message;
1013
1113
  }
1014
-
1015
- // Extract short output from ANSWER SECTION of full dig output
1016
- const answerMatch = fullOutput.match(/;; ANSWER SECTION:\n([\s\S]*?)(?:\n;;|\n*$)/);
1017
- let shortOutput = '';
1018
- if (answerMatch) {
1019
- shortOutput = answerMatch[1]
1020
- .split('\n')
1021
- .map(line => line.split(/\s+/).pop())
1022
- .filter(Boolean)
1023
- .join('\n');
1024
- }
1025
-
1026
- return {
1027
- success: true,
1028
- output: fullOutput,
1029
- shortOutput,
1030
- domain: cleanDomain,
1031
- recordType
1032
- };
1033
- } catch (error) {
1034
- return {
1035
- success: false,
1036
- error: error.message,
1037
- domain: domain,
1038
- recordType
1039
- };
1040
1114
  }
1115
+
1116
+ // Every attempt timed out / was refused. success:false so the handler does
1117
+ // NOT cache it (transient — caching would poison the domain for the TTL).
1118
+ return {
1119
+ success: false,
1120
+ error: lastError,
1121
+ domain: cleanDomain,
1122
+ recordType
1123
+ };
1041
1124
  }
1042
1125
 
1043
1126
  /**
@@ -1170,15 +1253,20 @@ function createNetToolsHandler(config) {
1170
1253
  // Determine which domain will be used for dig lookup
1171
1254
  const digDomain = digSubdomain && originalDomain ? originalDomain : domain;
1172
1255
 
1173
- // For whois: use root domain only (whois data is consistent for entire domain)
1174
- const whoisRootDomain = getRootDomain ? getRootDomain(`http://${domain}`) : domain;
1175
-
1256
+ // For whois: use root domain only (whois data is consistent for entire
1257
+ // domain). Only compute it when whois is actually configured — getRootDomain
1258
+ // does a domain parse, so on a dig-only config (no whois/whois-or) this skips
1259
+ // a parse + string build on every single request. whoisRootDomain is only
1260
+ // ever read inside the whois branch, so the `domain` fallback is never used.
1261
+ const wantWhois = hasWhois || hasWhoisOr;
1262
+ const whoisRootDomain = wantWhois ? (getRootDomain ? getRootDomain(`http://${domain}`) : domain) : domain;
1263
+
1176
1264
  // Check if we need to perform any lookups with appropriate deduplication
1177
1265
  // Whois: root domain + config (whois data same for sub.example.com and example.com)
1178
- const whoisDedupeKey = `${whoisRootDomain}:${whoisConfigKey}`;
1266
+ const whoisDedupeKey = wantWhois ? `${whoisRootDomain}:${whoisConfigKey}` : '';
1179
1267
  // Dig: specific subdomain + config (DNS records can differ between subdomains)
1180
1268
  const digDedupeKey = `${digDomain}:${digConfigKey}`;
1181
- const needsWhoisLookup = (hasWhois || hasWhoisOr) && !processedWhoisDomains.has(whoisDedupeKey);
1269
+ const needsWhoisLookup = wantWhois && !processedWhoisDomains.has(whoisDedupeKey);
1182
1270
  const needsDigLookup = (hasDig || hasDigOr) && !processedDigDomains.has(digDedupeKey);
1183
1271
 
1184
1272
  // Claim the dedupe keys NOW, synchronously, before executeNetToolsLookup
@@ -1606,11 +1694,20 @@ function createNetToolsHandler(config) {
1606
1694
  // backwards-compat additive: old code reading new cache
1607
1695
  // ignores it; new code reading old cache (no field) falls
1608
1696
  // back to lazy on-hit population in the cache-hit branch.
1609
- globalDigResultCache.set(digCacheKey, {
1610
- result: digResult,
1611
- timestamp: now,
1612
- hostname: digDomain
1613
- });
1697
+ //
1698
+ // Only cache a SUCCESSFUL dig. A timeout/error (success:false) is
1699
+ // transient — caching it would poison the domain for the full
1700
+ // cache TTL (20h when persisted via --dns-cache), so a host that
1701
+ // resolves fine on the next attempt keeps getting dropped. (An
1702
+ // NXDOMAIN is success:true with NXDOMAIN in the body — a real
1703
+ // answer — so it's correctly still cached.)
1704
+ if (digResult.success) {
1705
+ globalDigResultCache.set(digCacheKey, {
1706
+ result: digResult,
1707
+ timestamp: now,
1708
+ hostname: digDomain
1709
+ });
1710
+ }
1614
1711
  dnsCacheStats.digMisses++;
1615
1712
  pushFreshSample(dnsCacheStats.freshDig, `${digDomain} (${digRecordType})`);
1616
1713
  // Index hostname IF dig actually proved resolution -- NXDOMAIN
@@ -1662,7 +1759,7 @@ function createNetToolsHandler(config) {
1662
1759
  if (hasDig) logToConsoleAndFile(`${messageColors.highlight('[dig-and]')} Terms checked: ${digTerms.join(' AND ')}, matched: ${digMatched}`);
1663
1760
  if (hasDigOr) logToConsoleAndFile(`${messageColors.highlight('[dig-or]')} Terms checked: ${digOrTerms.join(' OR ')}, matched: ${digOrMatched}`);
1664
1761
  }
1665
- logToConsoleAndFile(`${messageColors.highlight('[dig]')} Lookup completed for ${digDomain}, dig-and: ${digMatched}, dig-or: ${digOrMatched}`);
1762
+ logToConsoleAndFile(`${messageColors.highlight('[dig]')} Lookup completed for ${digDomain}${digResult.resolver ? ` via ${digResult.resolver}` : ''}, dig-and: ${digMatched}, dig-or: ${digOrMatched}`);
1666
1763
  if (siteConfig.verbose === 1) {
1667
1764
  if (hasDig) logToConsoleAndFile(`${messageColors.highlight('[dig]')} AND terms: ${digTerms.join(', ')}`);
1668
1765
  if (hasDigOr) logToConsoleAndFile(`${messageColors.highlight('[dig]')} OR terms: ${digOrTerms.join(', ')}`);
@@ -1813,6 +1910,12 @@ module.exports = {
1813
1910
  validateDigAvailability,
1814
1911
  enableDiskCache,
1815
1912
  getDnsCacheStats,
1913
+ // Route dig through the --dns resolver(s) instead of the system resolver.
1914
+ setDigResolvers,
1915
+ // Generic disk-cache primitives (atomic write, TTL/size-bounded) — reused by
1916
+ // nwss.js to persist the DNS pre-check negative cache under --dns-cache.
1917
+ loadDiskCache,
1918
+ saveDiskCache,
1816
1919
  // Resolved-hostnames index for the DNS pre-check optimization.
1817
1920
  // nwss.js's per-task pre-check consults this BEFORE calling resolve4
1818
1921
  // so hosts already proven live by dig or whois (within their 20h
package/lib/output.js CHANGED
@@ -133,32 +133,43 @@ function formatDomain(domain, options = {}) {
133
133
  if (!domain || domain.length <= 6 || !domain.includes('.')) {
134
134
  return null;
135
135
  }
136
-
137
- // If plain is true, always return just the domain regardless of other options
136
+
137
+ // Path-prefix rules (from output_regex) are stored as "host/path/" they
138
+ // contain a '/'. Only adblock can express a path; every domain-only format
139
+ // (dnsmasq/unbound/pihole/hosts/privoxy/plain) falls back to the bare host
140
+ // (everything before the first '/') so output stays valid in all formats.
141
+ const slash = domain.indexOf('/');
142
+ const isPathRule = slash !== -1;
143
+ const host = isPathRule ? domain.slice(0, slash) : domain;
144
+
145
+ // If plain is true, always return just the host regardless of other options
138
146
  if (plain) {
139
- return domain;
147
+ return host;
140
148
  }
141
-
149
+
142
150
  // Apply specific format based on output mode
143
151
  if (pihole) {
144
152
  // Escape dots for regex and use Pi-hole format: (^|\.)domain\.com$
145
- const escapedDomain = domain.replace(/\./g, '\\.');
153
+ const escapedDomain = host.replace(/\./g, '\\.');
146
154
  return `(^|\\.)${escapedDomain}$`;
147
155
  } else if (privoxy) {
148
- return `{ +block } .${domain}`;
156
+ return `{ +block } .${host}`;
149
157
  } else if (dnsmasq) {
150
- return `local=/${domain}/`;
158
+ return `local=/${host}/`;
151
159
  } else if (dnsmasqOld) {
152
- return `server=/${domain}/`;
160
+ return `server=/${host}/`;
153
161
  } else if (unbound) {
154
- return `local-zone: "${domain}." always_null`;
162
+ return `local-zone: "${host}." always_null`;
155
163
  } else if (localhostIP) {
156
- return `${localhostIP} ${domain}`;
164
+ return `${localhostIP} ${host}`;
157
165
  } else if (adblockRules && resourceType) {
158
- // Generate adblock filter rules with resource type modifiers
159
- return `||${domain}^${resourceType}`;
166
+ // Adblock with resource-type modifier. A path rule self-anchors via its
167
+ // trailing '/', so it takes no '^' separator; a domain rule needs '^'.
168
+ return isPathRule ? `||${domain}${resourceType}` : `||${domain}^${resourceType}`;
160
169
  } else {
161
- return `||${domain}^`;
170
+ // Default adblock: ||host^ for a domain, ||host/path/ for a path rule
171
+ // (the path already anchors, so no trailing '^').
172
+ return isPathRule ? `||${domain}` : `||${domain}^`;
162
173
  }
163
174
  }
164
175
 
package/lib/redirect.js CHANGED
@@ -19,7 +19,10 @@ async function navigateWithRedirectHandling(page, currentUrl, siteConfig, gotoOp
19
19
  let httpStatus = null;
20
20
  let cfRay = null;
21
21
  const jsRedirectTimeout = siteConfig.js_redirect_timeout || 5000; // Wait 5s for JS redirects
22
- const maxRedirects = siteConfig.max_redirects || 10;
22
+ // Use a number check, not || , so max_redirects: 0 (follow none) isn't
23
+ // swallowed as falsy and silently bumped to 10. Only absent/negative/non-number defaults.
24
+ const maxRedirects = (typeof siteConfig.max_redirects === 'number' && siteConfig.max_redirects >= 0)
25
+ ? siteConfig.max_redirects : 10;
23
26
  const detectJSPatterns = siteConfig.detect_js_patterns !== false; // Default to true
24
27
 
25
28
  // Monitor frame navigations to detect redirects
@@ -806,7 +806,6 @@ function cleanRulesetFile(filePath, outputPath = null, options = {}) {
806
806
  } = options;
807
807
 
808
808
  const fs = require('fs');
809
- const path = require('path');
810
809
 
811
810
  let content;
812
811
  try {
@@ -1118,6 +1117,7 @@ const KNOWN_SITE_CONFIG_KEYS = new Set([
1118
1117
  'ignore_similar_threshold', 'interact', 'interact_click_count', 'interact_clicks',
1119
1118
  'interact_duration', 'interact_intensity', 'interact_scrolling', 'isBrave',
1120
1119
  'js_redirect_timeout', 'localhost', 'max_redirects', 'openvpn', 'pihole',
1120
+ 'output_regex',
1121
1121
  'plain', 'privoxy', 'proxy', 'proxy_bypass', 'proxy_debug', 'proxy_remote_dns',
1122
1122
  'realistic_click', 'referrer_disable', 'referrer_headers', 'regex_and',
1123
1123
  'reload', 'resourceTypes', 'screenshot', 'searchstring', 'searchstring_and',
@@ -1307,6 +1307,21 @@ function normalizeSiteConfig(siteConfig, siteIndex = 0) {
1307
1307
  }
1308
1308
  }
1309
1309
 
1310
+ // 2b. output_regex must be a compilable regex. An invalid one is silently
1311
+ // disabled at runtime (the use-site try/catch falls back to ||host^), so
1312
+ // surface it here at load time where the user can fix it.
1313
+ if ('output_regex' in siteConfig && siteConfig.output_regex != null && siteConfig.output_regex !== '') {
1314
+ if (typeof siteConfig.output_regex !== 'string') {
1315
+ warnings.push(`${tag}: 'output_regex' should be a string regex, got ${JSON.stringify(siteConfig.output_regex)} — will be ignored`);
1316
+ } else {
1317
+ try {
1318
+ new RegExp(siteConfig.output_regex);
1319
+ } catch (e) {
1320
+ warnings.push(`${tag}: 'output_regex' is not a valid regex (${e.message}) — will be ignored, output falls back to ||host^`);
1321
+ }
1322
+ }
1323
+ }
1324
+
1310
1325
  // 3. String → single-element array coercion for fields that accept both
1311
1326
  // forms (dig, dig-or, whois, whois-or). Downstream consumers all gate on
1312
1327
  // Array.isArray(), so a bare string value previously silently disabled
package/nwss.1 CHANGED
@@ -72,10 +72,6 @@ Output as \fB(^|\\.)domain\\.com$\fR format for Pi-hole regex filters.
72
72
  Generate adblock filter rules with resource type modifiers (requires \fB\-o\fR).
73
73
 
74
74
  .SS General Options
75
- .TP
76
- .B \--verbose
77
- Enable verbose output globally for all sites.
78
-
79
75
  .TP
80
76
  .B \--debug
81
77
  Enable debug mode with detailed logging of all network requests.
@@ -104,6 +100,10 @@ Output full subdomains instead of collapsing to root domains.
104
100
  .B \--no-interact
105
101
  Disable mouse simulation and page interaction globally.
106
102
 
103
+ .TP
104
+ .B \--ghost-cursor
105
+ Use ghost-cursor Bezier mouse movements globally (requires \fBnpm i ghost-cursor\fR). See \fBGhost Cursor Options\fR. Equivalent to per-site \fBcursor_mode: "ghost"\fR.
106
+
107
107
  .TP
108
108
  .BR \--custom-json " \fIFILE\fR"
109
109
  Use \fIFILE\fR instead of \fBconfig.json\fR for configuration.
@@ -136,10 +136,23 @@ Remove Chrome/Puppeteer temporary files before exit.
136
136
  .BR \--max-concurrent " \fINUMBER\fR"
137
137
  Maximum concurrent site processing (1-50, overrides config/default).
138
138
 
139
+ .TP
140
+ .BR \--dns " \fIIP\fR[,\fIIP\fR...]"
141
+ Nameserver(s) for the DNS pre-check AND nettools' dig \(em does not affect Chrome
142
+ navigation or whois. A single address pins all queries to it; several are
143
+ rotated per query (each leading once, the rest as failover) to spread the
144
+ load. Routing dig through these avoids dig timeouts on a flaky system resolver
145
+ silently dropping dig-gated domains. Overrides /etc/resolv.conf. Invalid
146
+ entries are warned and dropped.
147
+
139
148
  .TP
140
149
  .BR \--cleanup-interval " \fINUMBER\fR"
141
150
  Browser restart interval in URLs processed (1-1000, overrides config/default).
142
151
 
152
+ .TP
153
+ .B \--show-dead-domains
154
+ At end of scan, list hostnames that did not resolve or were unreachable (\fBNXDOMAIN\fR/\fBENODATA\fR plus \fBERR_NAME_NOT_RESOLVED\fR/\fBERR_ADDRESS_UNREACHABLE\fR). Excludes blocks and timeouts, since those mean the domain is alive. Useful for pruning dead URLs.
155
+
143
156
  .TP
144
157
  .BR \-h ", " \--help
145
158
  Show help message and exit.
@@ -249,6 +262,10 @@ Regex pattern(s) to match suspicious requests.
249
262
  .B regex_and
250
263
  Boolean. Use AND logic for multiple filterRegex patterns - ALL patterns must match the same URL (default: false).
251
264
 
265
+ .TP
266
+ .B output_regex
267
+ String. Regex applied to each matched URL to build the rule body: capture group 1 (or the whole match) becomes \fB||<capture>\fR instead of \fB||host^\fR. For example \fB^https?://([^/]+/[^/]+/)\fR turns \fBhttps://host.com/script/abc.js\fR into \fB||host.com/script/\fR, collapsing randomized filenames under a path into one rule. The capture must include the host. If the regex does not match a URL, output falls back to \fB||host^\fR. Adblock-only; domain-based formats (dnsmasq, pihole, hosts, plain) emit the bare host.
268
+
252
269
  .TP
253
270
  .B comments
254
271
  Documentation strings or notes - completely ignored by the scanner. Can be a single string or array of strings. Used for adding context, URLs, timestamps, or any documentation notes to configuration files.
@@ -293,6 +310,14 @@ Boolean. Simulate mouse movements and clicks.
293
310
  .B interact_intensity
294
311
  String. Interaction simulation intensity: \fB"low"\fR, \fB"medium"\fR, \fB"high"\fR (default: "medium").
295
312
 
313
+ .TP
314
+ .B interact_click_count
315
+ Integer. Number of random content-zone clicks per load, capped at 20 (default: 3). The default of 3 is a primary click plus two backups, since some ad SDKs suppress the first or second click as warmup.
316
+
317
+ .TP
318
+ .B realistic_click
319
+ Boolean. Higher click fidelity: denser mouse approach (15 steps), sub-pixel hand-tremor micro-moves during the press, and a small mouseup drift so the mousedown and mouseup coordinates differ. For sites that score click realism. Costs roughly 80-120ms per click (default: false).
320
+
296
321
  .TP
297
322
  .B delay
298
323
  Milliseconds to wait after page load (default: 4000).
@@ -419,7 +444,7 @@ Object. Custom page.goto() options for Puppeteer navigation. Available options:
419
444
  .IP \(bu 4
420
445
  \fB"networkidle0"\fR - Wait until 0 network requests for 500ms
421
446
  .IP \(bu 4
422
- \fB"networkidle2"\fR - Wait until 2 network requests for 500ms
447
+ \fB"networkidle2"\fR - Wait until \(<=2 network requests for 500ms
423
448
  .RE
424
449
  .IP \(bu 4
425
450
  \fBtimeout\fR: Maximum navigation time in milliseconds (overrides site timeout)
@@ -479,15 +504,28 @@ Both modes wait 16 seconds before cleanup to allow final operations to complete,
479
504
 
480
505
  .TP
481
506
 
482
- .SS Redirect Handling Options
507
+ .SS Popup Capture Options
508
+ .TP
509
+ .B capture_popups
510
+ Boolean. Capture popup windows opened during the scan and evaluate their landing URL and in-popup requests against \fBfilterRegex\fR/\fBdig\fR/\fBwhois\fR. Requires \fBinteract\fR plus interaction clicks to fire the user-gesture click that opens popups; \fBcapture_popups\fR alone registers the listener but no popups will fire (default: false).
511
+
512
+ .TP
513
+ .B interact_popups
514
+ Boolean. Mouse-click inside captured popups (content-zone clicks) so the chain cascades to its next redirect or ad. Requires \fBcapture_popups\fR. Clicks popups up to \fBcapture_popups_max_depth\fR minus 1 \(em the deepest captured popup is observed, not clicked (default: false).
483
515
 
484
516
  .TP
485
- .B follow_redirects
486
- Boolean. Follow redirects to new domains (default: true).
517
+ .B capture_popups_max_depth
518
+ Integer. Maximum popup-chain depth to capture, e.g. \fBsite -> p1 -> p2 -> p3 -> destination\fR. Each extra level multiplies popups and time (default: 4).
519
+
520
+ .TP
521
+ .B capture_popups_window_ms
522
+ Integer. Per-popup capture window in milliseconds before the popup is auto-closed (default: 5000).
523
+
524
+ .SS Redirect Handling Options
487
525
 
488
526
  .TP
489
527
  .B max_redirects
490
- Number. Maximum number of redirects to follow (default: 10).
528
+ Number. Maximum number of redirects to follow (default: 10; 0 = follow none).
491
529
 
492
530
  .TP
493
531
  .B js_redirect_timeout
@@ -501,6 +539,29 @@ Boolean. Analyze page source for redirect patterns (default: true).
501
539
  .B redirect_timeout_multiplier
502
540
  Number. Increase timeout for redirected URLs (default: 1.5).
503
541
 
542
+ .SS Ghost Cursor Options
543
+ Optional Bezier-curve mouse engine (the \fBghost-cursor\fR npm package, install
544
+ with \fBnpm i ghost-cursor\fR). Falls back to the built-in mouse if not
545
+ installed. Enable per-site with \fBcursor_mode\fR or globally with the
546
+ \fB\-\-ghost-cursor\fR flag.
547
+ .TP
548
+ .B cursor_mode
549
+ String. Set to \fB"ghost"\fR to use ghost-cursor Bezier mouse movements for this site.
550
+ .TP
551
+ .B ghost_cursor_speed
552
+ Number. Movement speed multiplier (default: auto).
553
+ .TP
554
+ .B ghost_cursor_hesitate
555
+ Number. Delay in milliseconds before a click (default: 50).
556
+ .TP
557
+ .B ghost_cursor_overshoot
558
+ Number. Maximum overshoot distance in pixels before correcting back to the target (default: auto).
559
+ .TP
560
+ .B ghost_cursor_duration
561
+ Number. How long the Bezier movement loop runs, in milliseconds (default: \fBinteract_duration\fR or 2000). Part of this budget (up to half) is reserved for clicks.
562
+ .PP
563
+ Ghost-cursor only \fIclicks\fR when both \fBinteract\fR and \fBinteract_clicks\fR are true. With \fBrealistic_click\fR set, each press adds hand-tremor during the hold plus a mouseup drift so mousedown and mouseup coordinates differ. Ghost mode honors \fBinteract_click_count\fR (default 3, cap 20); since realistic clicks take roughly 600-700ms each, raise \fBghost_cursor_duration\fR (about \fBinteract_click_count\fR x 700 plus movement, e.g. 5000-8000) to fit all of them \(em the default 2000 fits about one click.
564
+
504
565
  .SS Cloudflare Protection Options
505
566
 
506
567
  .TP
@@ -678,15 +739,15 @@ Global and per-site boolean to enable similarity filtering against ignoreDomains
678
739
  With default settings (\fBignore_similar_threshold: 80\fR):
679
740
  .RS
680
741
  .IP \(bu 4
681
- \fBanimerco.com\fR vs \fBanimerco.org\fR 100% similar Ignored
742
+ \fBanimerco.com\fR vs \fBanimerco.org\fR \(-> 100% similar \(-> Ignored
682
743
  .IP \(bu 4
683
- \fBgoogle.com\fR vs \fBgoogle.co.uk\fR 100% similar Ignored
744
+ \fBgoogle.com\fR vs \fBgoogle.co.uk\fR \(-> 100% similar \(-> Ignored
684
745
  .IP \(bu 4
685
- \fBamazon.com\fR vs \fBamazon2.org\fR 89% similar Ignored
746
+ \fBamazon.com\fR vs \fBamazon2.org\fR \(-> 89% similar \(-> Ignored
686
747
  .IP \(bu 4
687
- \fBfacebook.com\fR vs \fBfaceboook.com\fR 91% similar Ignored
748
+ \fBfacebook.com\fR vs \fBfaceboook.com\fR \(-> 91% similar \(-> Ignored
688
749
  .IP \(bu 4
689
- \fBapple.com\fR vs \fBmicrosoft.com\fR 0% similar Kept
750
+ \fBapple.com\fR vs \fBmicrosoft.com\fR \(-> 0% similar \(-> Kept
690
751
  .RE
691
752
 
692
753
  .SH EXAMPLES
@@ -844,7 +905,7 @@ With default settings (\fBignore_similar_threshold: 80\fR):
844
905
 
845
906
  .SS Run with debug mode and similarity filtering:
846
907
  .EX
847
- node nwss.js --debug --dry-run --verbose
908
+ node nwss.js --debug --dry-run
848
909
  .EE
849
910
 
850
911
  .SS Run with adblock output format: