@fanboynz/network-scanner 3.1.2 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +24 -1
- package/CLAUDE.md +2 -1
- package/README.md +33 -5
- package/eslint.config.mjs +13 -1
- package/lib/browserhealth.js +25 -3
- package/lib/dns.js +238 -0
- package/lib/domain-cache.js +14 -127
- package/lib/ghost-cursor.js +29 -11
- package/lib/interaction.js +4 -0
- package/lib/nettools.js +154 -51
- package/lib/output.js +24 -13
- package/lib/redirect.js +4 -1
- package/lib/validate_rules.js +16 -1
- package/nwss.1 +76 -15
- package/nwss.js +356 -105
- package/package.json +1 -1
package/lib/ghost-cursor.js
CHANGED
|
@@ -15,6 +15,11 @@
|
|
|
15
15
|
// npm install ghost-cursor (optional dependency)
|
|
16
16
|
|
|
17
17
|
const { formatLogMessage, messageColors } = require('./colorize');
|
|
18
|
+
// humanClick gives the coordinate-click path the same press realism as the
|
|
19
|
+
// built-in content clicks (hover dwell + mousedown/hold/mouseup, optional
|
|
20
|
+
// hand-tremor + mouseup drift) instead of a 0ms page.mouse.click. One-way
|
|
21
|
+
// require — interaction.js does not depend on ghost-cursor, so no cycle.
|
|
22
|
+
const { humanClick } = require('./interaction');
|
|
18
23
|
const GHOST_CURSOR_TAG = messageColors.processing('[ghost-cursor]');
|
|
19
24
|
|
|
20
25
|
let ghostCursorModule = null;
|
|
@@ -56,7 +61,7 @@ function createGhostCursor(page, options = {}) {
|
|
|
56
61
|
const cursor = ghostCursorModule.createCursor(page, { x: startX, y: startY });
|
|
57
62
|
|
|
58
63
|
if (forceDebug) {
|
|
59
|
-
console.log(formatLogMessage('debug',
|
|
64
|
+
console.log(formatLogMessage('debug', `${GHOST_CURSOR_TAG} Cursor instance created`));
|
|
60
65
|
}
|
|
61
66
|
|
|
62
67
|
return cursor;
|
|
@@ -98,7 +103,7 @@ async function ghostMove(cursor, toX, toY, options = {}) {
|
|
|
98
103
|
const moveOpts = {};
|
|
99
104
|
if (moveSpeed !== undefined) moveOpts.moveSpeed = moveSpeed;
|
|
100
105
|
if (moveDelay > 0) moveOpts.moveDelay = moveDelay;
|
|
101
|
-
|
|
106
|
+
moveOpts.randomizeMoveDelay = randomizeMoveDelay; // always defined (defaults to true)
|
|
102
107
|
if (overshootThreshold !== undefined) moveOpts.overshootThreshold = overshootThreshold;
|
|
103
108
|
|
|
104
109
|
await cursor.moveTo({ x: toX, y: toY }, moveOpts);
|
|
@@ -126,6 +131,8 @@ async function ghostMove(cursor, toX, toY, options = {}) {
|
|
|
126
131
|
* @param {number} options.waitForClick - Delay (ms) between mousedown/mouseup (default: auto)
|
|
127
132
|
* @param {number} options.moveDelay - Delay (ms) after moving to target
|
|
128
133
|
* @param {number} options.paddingPercentage - Click point within element (0=edge, 100=center)
|
|
134
|
+
* @param {import('puppeteer').Page} options.page - Page for coordinate clicks (falls back to cursor.page)
|
|
135
|
+
* @param {boolean} options.realistic - Coordinate clicks: emit hand-tremor + mouseup drift (default: false)
|
|
129
136
|
* @param {boolean} options.forceDebug - Enable debug logging
|
|
130
137
|
* @returns {Promise<boolean>} true if click succeeded
|
|
131
138
|
*/
|
|
@@ -137,6 +144,8 @@ async function ghostClick(cursor, target, options = {}) {
|
|
|
137
144
|
waitForClick,
|
|
138
145
|
moveDelay,
|
|
139
146
|
paddingPercentage,
|
|
147
|
+
page,
|
|
148
|
+
realistic = false,
|
|
140
149
|
forceDebug
|
|
141
150
|
} = options;
|
|
142
151
|
|
|
@@ -149,16 +158,25 @@ async function ghostClick(cursor, target, options = {}) {
|
|
|
149
158
|
if (typeof target === 'string') {
|
|
150
159
|
await cursor.click(target, clickOpts);
|
|
151
160
|
} else {
|
|
152
|
-
//
|
|
161
|
+
// Coordinate click: ghost-cursor's bezier moveTo brings the cursor to the
|
|
162
|
+
// point, then humanClick does the realistic press (hover dwell, mousedown
|
|
163
|
+
// → hold → mouseup, plus hand-tremor + down≠up drift when realistic). This
|
|
164
|
+
// replaces a 0ms page.mouse.click, so the ghost path gets the same click
|
|
165
|
+
// realism as built-in content clicks.
|
|
153
166
|
await cursor.moveTo(target);
|
|
154
|
-
//
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
167
|
+
// Prefer the caller-supplied page; fall back to the cursor's own page
|
|
168
|
+
// (ghost-cursor exposes it as cursor.page) so we don't depend on internals.
|
|
169
|
+
// Return false (not silent success) if there's no usable page — otherwise
|
|
170
|
+
// the "Clicked" log + return true below would lie about a click that
|
|
171
|
+
// never fired.
|
|
172
|
+
const clickPage = page || cursor.page;
|
|
173
|
+
if (!clickPage || typeof clickPage.mouse?.down !== 'function') {
|
|
174
|
+
if (forceDebug) {
|
|
175
|
+
console.log(formatLogMessage('debug', `${GHOST_CURSOR_TAG} Coordinate click skipped: no usable page`));
|
|
176
|
+
}
|
|
177
|
+
return false;
|
|
161
178
|
}
|
|
179
|
+
await humanClick(clickPage, target.x, target.y, { realistic, forceDebug });
|
|
162
180
|
}
|
|
163
181
|
|
|
164
182
|
if (forceDebug) {
|
|
@@ -189,7 +207,7 @@ async function ghostRandomMove(cursor, options = {}) {
|
|
|
189
207
|
try {
|
|
190
208
|
await cursor.randomMove();
|
|
191
209
|
if (options.forceDebug) {
|
|
192
|
-
console.log(formatLogMessage('debug',
|
|
210
|
+
console.log(formatLogMessage('debug', `${GHOST_CURSOR_TAG} Random movement performed`));
|
|
193
211
|
}
|
|
194
212
|
return true;
|
|
195
213
|
} catch (err) {
|
package/lib/interaction.js
CHANGED
|
@@ -1333,5 +1333,9 @@ module.exports = {
|
|
|
1333
1333
|
simulateScrolling,
|
|
1334
1334
|
interactWithElements,
|
|
1335
1335
|
performContentClicks,
|
|
1336
|
+
// Realistic timed click (hover dwell + mousedown/hold/mouseup, optional
|
|
1337
|
+
// hand-tremor + mouseup drift). Reused by lib/ghost-cursor.js so the ghost
|
|
1338
|
+
// coordinate click gets the same press realism as built-in content clicks.
|
|
1339
|
+
humanClick,
|
|
1336
1340
|
generateRandomCoordinates
|
|
1337
1341
|
};
|
package/lib/nettools.js
CHANGED
|
@@ -124,7 +124,6 @@ function loadDiskCache(filePath, cache, ttl, maxSize) {
|
|
|
124
124
|
// Surface the event so the user knows they lost their warm cache;
|
|
125
125
|
// previously this was a silent reset, which made "why did my dns
|
|
126
126
|
// cache stop helping?" hard to diagnose.
|
|
127
|
-
// eslint-disable-next-line no-console
|
|
128
127
|
console.warn(`${messageColors.highlight('[dns-cache]')} ${path.basename(filePath)} was unreadable (${err.message}); starting fresh`);
|
|
129
128
|
try { fs.unlinkSync(filePath); } catch {}
|
|
130
129
|
}
|
|
@@ -256,6 +255,38 @@ function getDnsCacheStats() {
|
|
|
256
255
|
// Disk cache is opt-in via --dns-cache flag
|
|
257
256
|
let diskCacheEnabled = false;
|
|
258
257
|
|
|
258
|
+
// Optional dig resolver(s), set from --dns. When non-empty, dig queries
|
|
259
|
+
// `@<one of these>` (round-robin) instead of the system resolver — so dig uses
|
|
260
|
+
// the same reliable servers as the pre-check rather than a flaky /etc/resolv.conf
|
|
261
|
+
// (the cause of `dig: Command timeout` drops on Cloudflare-fronted ad domains).
|
|
262
|
+
let digResolvers = [];
|
|
263
|
+
let digResolverCursor = 0;
|
|
264
|
+
// dig's `@server` wants a bare IP; strip any `ipv4:port` / `[ipv6]:port` form.
|
|
265
|
+
function digServerFromSpec(spec) {
|
|
266
|
+
const s = String(spec);
|
|
267
|
+
const br = s.match(/^\[([0-9a-fA-F:]+)\]/);
|
|
268
|
+
if (br) return br[1];
|
|
269
|
+
const v4p = s.match(/^(\d{1,3}(?:\.\d{1,3}){3}):\d+$/);
|
|
270
|
+
if (v4p) return v4p[1];
|
|
271
|
+
return s;
|
|
272
|
+
}
|
|
273
|
+
function setDigResolvers(servers) {
|
|
274
|
+
digResolvers = (Array.isArray(servers) ? servers : []).filter(Boolean).map(digServerFromSpec);
|
|
275
|
+
}
|
|
276
|
+
// Ordered `@server` attempt list for ONE dig lookup: starts at the round-robin
|
|
277
|
+
// cursor (advanced once per lookup, preserving the old fairness) then falls
|
|
278
|
+
// through the remaining resolvers as failover. Returns [null] when no --dns
|
|
279
|
+
// resolvers are configured — a single attempt via the system resolver.
|
|
280
|
+
function digServerAttemptList() {
|
|
281
|
+
if (digResolvers.length === 0) return [null];
|
|
282
|
+
const start = digResolverCursor++ % digResolvers.length;
|
|
283
|
+
const list = [];
|
|
284
|
+
for (let i = 0; i < digResolvers.length; i++) {
|
|
285
|
+
list.push('@' + digResolvers[(start + i) % digResolvers.length]);
|
|
286
|
+
}
|
|
287
|
+
return list;
|
|
288
|
+
}
|
|
289
|
+
|
|
259
290
|
/**
|
|
260
291
|
* Enable persistent disk caching for dig/whois results.
|
|
261
292
|
* Call this when --dns-cache flag is set. Idempotent — repeated calls
|
|
@@ -293,7 +324,6 @@ function enableDiskCache() {
|
|
|
293
324
|
// Debug log only if anything was actually warmed; silent on fresh
|
|
294
325
|
// installs / empty disk caches.
|
|
295
326
|
if (digWarm > 0 || whoisWarm > 0) {
|
|
296
|
-
// eslint-disable-next-line no-console
|
|
297
327
|
console.log(`${messageColors.highlight('[dns-cache]')} Warmed resolved-hostnames index from disk: ${digWarm} dig + ${whoisWarm} whois entries`);
|
|
298
328
|
}
|
|
299
329
|
|
|
@@ -994,50 +1024,103 @@ async function whoisLookupWithRetry(domain = '', timeout = 10000, whoisServer =
|
|
|
994
1024
|
* @returns {Promise<Object>} Object with success status and output/error
|
|
995
1025
|
*/
|
|
996
1026
|
async function digLookup(domain = '', recordType = 'A', timeout = 5000) {
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1027
|
+
// Clean domain (defensive — callers usually pass an already-clean digDomain).
|
|
1028
|
+
const cleanDomain = domain.replace(/^https?:\/\//, '').replace(/\/.*$/, '').replace(/:\d+$/, '');
|
|
1029
|
+
|
|
1030
|
+
// dig argv-injection guard. dig parses @/-/+ -leading tokens as options
|
|
1031
|
+
// (`@host` redirects the query to an arbitrary server, `-f path` reads a
|
|
1032
|
+
// file as a query batch) and has no `--` end-of-options marker like whois.
|
|
1033
|
+
// Reject anything not hostname-shaped before shelling out — success:false so
|
|
1034
|
+
// it's treated as no-match and not cached. (Charset blocks @ + / space etc;
|
|
1035
|
+
// the leading-`-` check blocks `-f` and friends, since `-` is valid mid-host.)
|
|
1036
|
+
if (!cleanDomain || /[^a-zA-Z0-9._-]/.test(cleanDomain) || cleanDomain.startsWith('-')) {
|
|
1037
|
+
return { success: false, error: `invalid domain shape: ${cleanDomain}`, domain: cleanDomain, recordType };
|
|
1038
|
+
}
|
|
1039
|
+
|
|
1040
|
+
// Resolver failover: try the round-robin resolver first, then fall through
|
|
1041
|
+
// the remaining --dns resolvers on timeout / no-reply / REFUSED / SERVFAIL —
|
|
1042
|
+
// the same resilience the whois path already has via whoisLookupWithRetry,
|
|
1043
|
+
// and the DNS pre-check has via its rotation. Capped at 3 attempts (matches
|
|
1044
|
+
// whois maxRetries default) so a host that's dead on every resolver can't
|
|
1045
|
+
// burn the whole nettools budget.
|
|
1046
|
+
const attempts = digServerAttemptList();
|
|
1047
|
+
// Only do JS-level failover when --dns gave us pinned resolvers. Without it,
|
|
1048
|
+
// attempts is [null]: a SINGLE system-resolver invocation that keeps dig's
|
|
1049
|
+
// native resolv.conf rotation + retries (forcing +tries=1 there would strip
|
|
1050
|
+
// that built-in resilience — the whole point is to be MORE resilient).
|
|
1051
|
+
const usingResolvers = attempts[0] !== null;
|
|
1052
|
+
const maxAttempts = usingResolvers ? Math.min(3, attempts.length) : 1;
|
|
1053
|
+
// Pinned-resolver attempts use +time=2 +tries=1 (the JS loop owns failover)
|
|
1054
|
+
// under a 4s SIGTERM ceiling. The system-resolver path keeps the full budget
|
|
1055
|
+
// and dig's own retry behaviour, matching the pre-failover semantics exactly.
|
|
1056
|
+
const perAttemptTimeout = usingResolvers ? Math.min(timeout, 4000) : timeout;
|
|
1057
|
+
|
|
1058
|
+
let lastError = 'no resolver attempts made';
|
|
1059
|
+
|
|
1060
|
+
for (let i = 0; i < maxAttempts; i++) {
|
|
1061
|
+
const digServerArg = attempts[i];
|
|
1062
|
+
// With a pinned resolver: one fast try (+time=2 +tries=1), then the JS loop
|
|
1063
|
+
// moves to the next resolver. Without --dns: bare `dig name type` so dig
|
|
1064
|
+
// applies its native resolv.conf rotation. execFile (no shell) => args
|
|
1065
|
+
// can't be injected.
|
|
1066
|
+
const digArgs = digServerArg
|
|
1067
|
+
? [digServerArg, '+time=2', '+tries=1', cleanDomain, recordType]
|
|
1068
|
+
: [cleanDomain, recordType];
|
|
1069
|
+
const resolverLabel = digServerArg ? digServerArg.slice(1) : 'system resolver';
|
|
1070
|
+
|
|
1071
|
+
try {
|
|
1072
|
+
const { stdout: fullOutput } = await execFileWithTimeout('dig', digArgs, perAttemptTimeout);
|
|
1073
|
+
|
|
1074
|
+
// Judge success by RCODE, not by stderr. dig exits 0 for ANY server
|
|
1075
|
+
// response, so non-zero exit (timeout / no-reply) already rejected above.
|
|
1076
|
+
// REFUSED/SERVFAIL are resolver-SIDE failures another resolver may not
|
|
1077
|
+
// share — fail over instead of accepting an answerless response (the
|
|
1078
|
+
// EREFUSED-storm case). NOERROR/NXDOMAIN are definitive => accept.
|
|
1079
|
+
const statusMatch = fullOutput.match(/status:\s*([A-Z]+)/i);
|
|
1080
|
+
const rcode = statusMatch ? statusMatch[1].toUpperCase() : 'NOERROR';
|
|
1081
|
+
if (rcode === 'REFUSED' || rcode === 'SERVFAIL') {
|
|
1082
|
+
lastError = `dig ${rcode} from ${resolverLabel}`;
|
|
1083
|
+
continue; // try next resolver in the failover list
|
|
1084
|
+
}
|
|
1085
|
+
|
|
1086
|
+
// Non-empty stderr is intentionally NOT treated as failure here: dig
|
|
1087
|
+
// prints `;; communications error ... timed out` warnings to stderr while
|
|
1088
|
+
// still returning a valid ANSWER SECTION and exit 0. The old code failed
|
|
1089
|
+
// the whole lookup on any stderr, discarding good answers — the exact
|
|
1090
|
+
// missed-match pattern under flaky resolvers.
|
|
1091
|
+
const answerMatch = fullOutput.match(/;; ANSWER SECTION:\n([\s\S]*?)(?:\n;;|\n*$)/);
|
|
1092
|
+
let shortOutput = '';
|
|
1093
|
+
if (answerMatch) {
|
|
1094
|
+
shortOutput = answerMatch[1]
|
|
1095
|
+
.split('\n')
|
|
1096
|
+
.map(line => line.split(/\s+/).pop())
|
|
1097
|
+
.filter(Boolean)
|
|
1098
|
+
.join('\n');
|
|
1099
|
+
}
|
|
1000
1100
|
|
|
1001
|
-
// Single dig command — full output contains everything including short
|
|
1002
|
-
// answers. execFile (no shell) so cleanDomain / recordType can contain
|
|
1003
|
-
// any chars without injection risk.
|
|
1004
|
-
const { stdout: fullOutput, stderr } = await execFileWithTimeout('dig', [cleanDomain, recordType], timeout);
|
|
1005
|
-
|
|
1006
|
-
if (stderr && stderr.trim()) {
|
|
1007
1101
|
return {
|
|
1008
|
-
success:
|
|
1009
|
-
|
|
1102
|
+
success: true,
|
|
1103
|
+
output: fullOutput,
|
|
1104
|
+
shortOutput,
|
|
1010
1105
|
domain: cleanDomain,
|
|
1011
|
-
recordType
|
|
1106
|
+
recordType,
|
|
1107
|
+
resolver: resolverLabel
|
|
1012
1108
|
};
|
|
1109
|
+
} catch (error) {
|
|
1110
|
+
// Timeout or non-zero exit (e.g. dig exit 9 = no reply from this server).
|
|
1111
|
+
// Record and fall through to the next resolver.
|
|
1112
|
+
lastError = error.message;
|
|
1013
1113
|
}
|
|
1014
|
-
|
|
1015
|
-
// Extract short output from ANSWER SECTION of full dig output
|
|
1016
|
-
const answerMatch = fullOutput.match(/;; ANSWER SECTION:\n([\s\S]*?)(?:\n;;|\n*$)/);
|
|
1017
|
-
let shortOutput = '';
|
|
1018
|
-
if (answerMatch) {
|
|
1019
|
-
shortOutput = answerMatch[1]
|
|
1020
|
-
.split('\n')
|
|
1021
|
-
.map(line => line.split(/\s+/).pop())
|
|
1022
|
-
.filter(Boolean)
|
|
1023
|
-
.join('\n');
|
|
1024
|
-
}
|
|
1025
|
-
|
|
1026
|
-
return {
|
|
1027
|
-
success: true,
|
|
1028
|
-
output: fullOutput,
|
|
1029
|
-
shortOutput,
|
|
1030
|
-
domain: cleanDomain,
|
|
1031
|
-
recordType
|
|
1032
|
-
};
|
|
1033
|
-
} catch (error) {
|
|
1034
|
-
return {
|
|
1035
|
-
success: false,
|
|
1036
|
-
error: error.message,
|
|
1037
|
-
domain: domain,
|
|
1038
|
-
recordType
|
|
1039
|
-
};
|
|
1040
1114
|
}
|
|
1115
|
+
|
|
1116
|
+
// Every attempt timed out / was refused. success:false so the handler does
|
|
1117
|
+
// NOT cache it (transient — caching would poison the domain for the TTL).
|
|
1118
|
+
return {
|
|
1119
|
+
success: false,
|
|
1120
|
+
error: lastError,
|
|
1121
|
+
domain: cleanDomain,
|
|
1122
|
+
recordType
|
|
1123
|
+
};
|
|
1041
1124
|
}
|
|
1042
1125
|
|
|
1043
1126
|
/**
|
|
@@ -1170,15 +1253,20 @@ function createNetToolsHandler(config) {
|
|
|
1170
1253
|
// Determine which domain will be used for dig lookup
|
|
1171
1254
|
const digDomain = digSubdomain && originalDomain ? originalDomain : domain;
|
|
1172
1255
|
|
|
1173
|
-
// For whois: use root domain only (whois data is consistent for entire
|
|
1174
|
-
|
|
1175
|
-
|
|
1256
|
+
// For whois: use root domain only (whois data is consistent for entire
|
|
1257
|
+
// domain). Only compute it when whois is actually configured — getRootDomain
|
|
1258
|
+
// does a domain parse, so on a dig-only config (no whois/whois-or) this skips
|
|
1259
|
+
// a parse + string build on every single request. whoisRootDomain is only
|
|
1260
|
+
// ever read inside the whois branch, so the `domain` fallback is never used.
|
|
1261
|
+
const wantWhois = hasWhois || hasWhoisOr;
|
|
1262
|
+
const whoisRootDomain = wantWhois ? (getRootDomain ? getRootDomain(`http://${domain}`) : domain) : domain;
|
|
1263
|
+
|
|
1176
1264
|
// Check if we need to perform any lookups with appropriate deduplication
|
|
1177
1265
|
// Whois: root domain + config (whois data same for sub.example.com and example.com)
|
|
1178
|
-
const whoisDedupeKey = `${whoisRootDomain}:${whoisConfigKey}
|
|
1266
|
+
const whoisDedupeKey = wantWhois ? `${whoisRootDomain}:${whoisConfigKey}` : '';
|
|
1179
1267
|
// Dig: specific subdomain + config (DNS records can differ between subdomains)
|
|
1180
1268
|
const digDedupeKey = `${digDomain}:${digConfigKey}`;
|
|
1181
|
-
const needsWhoisLookup =
|
|
1269
|
+
const needsWhoisLookup = wantWhois && !processedWhoisDomains.has(whoisDedupeKey);
|
|
1182
1270
|
const needsDigLookup = (hasDig || hasDigOr) && !processedDigDomains.has(digDedupeKey);
|
|
1183
1271
|
|
|
1184
1272
|
// Claim the dedupe keys NOW, synchronously, before executeNetToolsLookup
|
|
@@ -1606,11 +1694,20 @@ function createNetToolsHandler(config) {
|
|
|
1606
1694
|
// backwards-compat additive: old code reading new cache
|
|
1607
1695
|
// ignores it; new code reading old cache (no field) falls
|
|
1608
1696
|
// back to lazy on-hit population in the cache-hit branch.
|
|
1609
|
-
|
|
1610
|
-
|
|
1611
|
-
|
|
1612
|
-
|
|
1613
|
-
|
|
1697
|
+
//
|
|
1698
|
+
// Only cache a SUCCESSFUL dig. A timeout/error (success:false) is
|
|
1699
|
+
// transient — caching it would poison the domain for the full
|
|
1700
|
+
// cache TTL (20h when persisted via --dns-cache), so a host that
|
|
1701
|
+
// resolves fine on the next attempt keeps getting dropped. (An
|
|
1702
|
+
// NXDOMAIN is success:true with NXDOMAIN in the body — a real
|
|
1703
|
+
// answer — so it's correctly still cached.)
|
|
1704
|
+
if (digResult.success) {
|
|
1705
|
+
globalDigResultCache.set(digCacheKey, {
|
|
1706
|
+
result: digResult,
|
|
1707
|
+
timestamp: now,
|
|
1708
|
+
hostname: digDomain
|
|
1709
|
+
});
|
|
1710
|
+
}
|
|
1614
1711
|
dnsCacheStats.digMisses++;
|
|
1615
1712
|
pushFreshSample(dnsCacheStats.freshDig, `${digDomain} (${digRecordType})`);
|
|
1616
1713
|
// Index hostname IF dig actually proved resolution -- NXDOMAIN
|
|
@@ -1662,7 +1759,7 @@ function createNetToolsHandler(config) {
|
|
|
1662
1759
|
if (hasDig) logToConsoleAndFile(`${messageColors.highlight('[dig-and]')} Terms checked: ${digTerms.join(' AND ')}, matched: ${digMatched}`);
|
|
1663
1760
|
if (hasDigOr) logToConsoleAndFile(`${messageColors.highlight('[dig-or]')} Terms checked: ${digOrTerms.join(' OR ')}, matched: ${digOrMatched}`);
|
|
1664
1761
|
}
|
|
1665
|
-
logToConsoleAndFile(`${messageColors.highlight('[dig]')} Lookup completed for ${digDomain}, dig-and: ${digMatched}, dig-or: ${digOrMatched}`);
|
|
1762
|
+
logToConsoleAndFile(`${messageColors.highlight('[dig]')} Lookup completed for ${digDomain}${digResult.resolver ? ` via ${digResult.resolver}` : ''}, dig-and: ${digMatched}, dig-or: ${digOrMatched}`);
|
|
1666
1763
|
if (siteConfig.verbose === 1) {
|
|
1667
1764
|
if (hasDig) logToConsoleAndFile(`${messageColors.highlight('[dig]')} AND terms: ${digTerms.join(', ')}`);
|
|
1668
1765
|
if (hasDigOr) logToConsoleAndFile(`${messageColors.highlight('[dig]')} OR terms: ${digOrTerms.join(', ')}`);
|
|
@@ -1813,6 +1910,12 @@ module.exports = {
|
|
|
1813
1910
|
validateDigAvailability,
|
|
1814
1911
|
enableDiskCache,
|
|
1815
1912
|
getDnsCacheStats,
|
|
1913
|
+
// Route dig through the --dns resolver(s) instead of the system resolver.
|
|
1914
|
+
setDigResolvers,
|
|
1915
|
+
// Generic disk-cache primitives (atomic write, TTL/size-bounded) — reused by
|
|
1916
|
+
// nwss.js to persist the DNS pre-check negative cache under --dns-cache.
|
|
1917
|
+
loadDiskCache,
|
|
1918
|
+
saveDiskCache,
|
|
1816
1919
|
// Resolved-hostnames index for the DNS pre-check optimization.
|
|
1817
1920
|
// nwss.js's per-task pre-check consults this BEFORE calling resolve4
|
|
1818
1921
|
// so hosts already proven live by dig or whois (within their 20h
|
package/lib/output.js
CHANGED
|
@@ -133,32 +133,43 @@ function formatDomain(domain, options = {}) {
|
|
|
133
133
|
if (!domain || domain.length <= 6 || !domain.includes('.')) {
|
|
134
134
|
return null;
|
|
135
135
|
}
|
|
136
|
-
|
|
137
|
-
//
|
|
136
|
+
|
|
137
|
+
// Path-prefix rules (from output_regex) are stored as "host/path/" — they
|
|
138
|
+
// contain a '/'. Only adblock can express a path; every domain-only format
|
|
139
|
+
// (dnsmasq/unbound/pihole/hosts/privoxy/plain) falls back to the bare host
|
|
140
|
+
// (everything before the first '/') so output stays valid in all formats.
|
|
141
|
+
const slash = domain.indexOf('/');
|
|
142
|
+
const isPathRule = slash !== -1;
|
|
143
|
+
const host = isPathRule ? domain.slice(0, slash) : domain;
|
|
144
|
+
|
|
145
|
+
// If plain is true, always return just the host regardless of other options
|
|
138
146
|
if (plain) {
|
|
139
|
-
return
|
|
147
|
+
return host;
|
|
140
148
|
}
|
|
141
|
-
|
|
149
|
+
|
|
142
150
|
// Apply specific format based on output mode
|
|
143
151
|
if (pihole) {
|
|
144
152
|
// Escape dots for regex and use Pi-hole format: (^|\.)domain\.com$
|
|
145
|
-
const escapedDomain =
|
|
153
|
+
const escapedDomain = host.replace(/\./g, '\\.');
|
|
146
154
|
return `(^|\\.)${escapedDomain}$`;
|
|
147
155
|
} else if (privoxy) {
|
|
148
|
-
return `{ +block } .${
|
|
156
|
+
return `{ +block } .${host}`;
|
|
149
157
|
} else if (dnsmasq) {
|
|
150
|
-
return `local=/${
|
|
158
|
+
return `local=/${host}/`;
|
|
151
159
|
} else if (dnsmasqOld) {
|
|
152
|
-
return `server=/${
|
|
160
|
+
return `server=/${host}/`;
|
|
153
161
|
} else if (unbound) {
|
|
154
|
-
return `local-zone: "${
|
|
162
|
+
return `local-zone: "${host}." always_null`;
|
|
155
163
|
} else if (localhostIP) {
|
|
156
|
-
return `${localhostIP} ${
|
|
164
|
+
return `${localhostIP} ${host}`;
|
|
157
165
|
} else if (adblockRules && resourceType) {
|
|
158
|
-
//
|
|
159
|
-
|
|
166
|
+
// Adblock with resource-type modifier. A path rule self-anchors via its
|
|
167
|
+
// trailing '/', so it takes no '^' separator; a domain rule needs '^'.
|
|
168
|
+
return isPathRule ? `||${domain}${resourceType}` : `||${domain}^${resourceType}`;
|
|
160
169
|
} else {
|
|
161
|
-
|
|
170
|
+
// Default adblock: ||host^ for a domain, ||host/path/ for a path rule
|
|
171
|
+
// (the path already anchors, so no trailing '^').
|
|
172
|
+
return isPathRule ? `||${domain}` : `||${domain}^`;
|
|
162
173
|
}
|
|
163
174
|
}
|
|
164
175
|
|
package/lib/redirect.js
CHANGED
|
@@ -19,7 +19,10 @@ async function navigateWithRedirectHandling(page, currentUrl, siteConfig, gotoOp
|
|
|
19
19
|
let httpStatus = null;
|
|
20
20
|
let cfRay = null;
|
|
21
21
|
const jsRedirectTimeout = siteConfig.js_redirect_timeout || 5000; // Wait 5s for JS redirects
|
|
22
|
-
|
|
22
|
+
// Use a number check, not || , so max_redirects: 0 (follow none) isn't
|
|
23
|
+
// swallowed as falsy and silently bumped to 10. Only absent/negative/non-number defaults.
|
|
24
|
+
const maxRedirects = (typeof siteConfig.max_redirects === 'number' && siteConfig.max_redirects >= 0)
|
|
25
|
+
? siteConfig.max_redirects : 10;
|
|
23
26
|
const detectJSPatterns = siteConfig.detect_js_patterns !== false; // Default to true
|
|
24
27
|
|
|
25
28
|
// Monitor frame navigations to detect redirects
|
package/lib/validate_rules.js
CHANGED
|
@@ -806,7 +806,6 @@ function cleanRulesetFile(filePath, outputPath = null, options = {}) {
|
|
|
806
806
|
} = options;
|
|
807
807
|
|
|
808
808
|
const fs = require('fs');
|
|
809
|
-
const path = require('path');
|
|
810
809
|
|
|
811
810
|
let content;
|
|
812
811
|
try {
|
|
@@ -1118,6 +1117,7 @@ const KNOWN_SITE_CONFIG_KEYS = new Set([
|
|
|
1118
1117
|
'ignore_similar_threshold', 'interact', 'interact_click_count', 'interact_clicks',
|
|
1119
1118
|
'interact_duration', 'interact_intensity', 'interact_scrolling', 'isBrave',
|
|
1120
1119
|
'js_redirect_timeout', 'localhost', 'max_redirects', 'openvpn', 'pihole',
|
|
1120
|
+
'output_regex',
|
|
1121
1121
|
'plain', 'privoxy', 'proxy', 'proxy_bypass', 'proxy_debug', 'proxy_remote_dns',
|
|
1122
1122
|
'realistic_click', 'referrer_disable', 'referrer_headers', 'regex_and',
|
|
1123
1123
|
'reload', 'resourceTypes', 'screenshot', 'searchstring', 'searchstring_and',
|
|
@@ -1307,6 +1307,21 @@ function normalizeSiteConfig(siteConfig, siteIndex = 0) {
|
|
|
1307
1307
|
}
|
|
1308
1308
|
}
|
|
1309
1309
|
|
|
1310
|
+
// 2b. output_regex must be a compilable regex. An invalid one is silently
|
|
1311
|
+
// disabled at runtime (the use-site try/catch falls back to ||host^), so
|
|
1312
|
+
// surface it here at load time where the user can fix it.
|
|
1313
|
+
if ('output_regex' in siteConfig && siteConfig.output_regex != null && siteConfig.output_regex !== '') {
|
|
1314
|
+
if (typeof siteConfig.output_regex !== 'string') {
|
|
1315
|
+
warnings.push(`${tag}: 'output_regex' should be a string regex, got ${JSON.stringify(siteConfig.output_regex)} — will be ignored`);
|
|
1316
|
+
} else {
|
|
1317
|
+
try {
|
|
1318
|
+
new RegExp(siteConfig.output_regex);
|
|
1319
|
+
} catch (e) {
|
|
1320
|
+
warnings.push(`${tag}: 'output_regex' is not a valid regex (${e.message}) — will be ignored, output falls back to ||host^`);
|
|
1321
|
+
}
|
|
1322
|
+
}
|
|
1323
|
+
}
|
|
1324
|
+
|
|
1310
1325
|
// 3. String → single-element array coercion for fields that accept both
|
|
1311
1326
|
// forms (dig, dig-or, whois, whois-or). Downstream consumers all gate on
|
|
1312
1327
|
// Array.isArray(), so a bare string value previously silently disabled
|
package/nwss.1
CHANGED
|
@@ -72,10 +72,6 @@ Output as \fB(^|\\.)domain\\.com$\fR format for Pi-hole regex filters.
|
|
|
72
72
|
Generate adblock filter rules with resource type modifiers (requires \fB\-o\fR).
|
|
73
73
|
|
|
74
74
|
.SS General Options
|
|
75
|
-
.TP
|
|
76
|
-
.B \--verbose
|
|
77
|
-
Enable verbose output globally for all sites.
|
|
78
|
-
|
|
79
75
|
.TP
|
|
80
76
|
.B \--debug
|
|
81
77
|
Enable debug mode with detailed logging of all network requests.
|
|
@@ -104,6 +100,10 @@ Output full subdomains instead of collapsing to root domains.
|
|
|
104
100
|
.B \--no-interact
|
|
105
101
|
Disable mouse simulation and page interaction globally.
|
|
106
102
|
|
|
103
|
+
.TP
|
|
104
|
+
.B \--ghost-cursor
|
|
105
|
+
Use ghost-cursor Bezier mouse movements globally (requires \fBnpm i ghost-cursor\fR). See \fBGhost Cursor Options\fR. Equivalent to per-site \fBcursor_mode: "ghost"\fR.
|
|
106
|
+
|
|
107
107
|
.TP
|
|
108
108
|
.BR \--custom-json " \fIFILE\fR"
|
|
109
109
|
Use \fIFILE\fR instead of \fBconfig.json\fR for configuration.
|
|
@@ -136,10 +136,23 @@ Remove Chrome/Puppeteer temporary files before exit.
|
|
|
136
136
|
.BR \--max-concurrent " \fINUMBER\fR"
|
|
137
137
|
Maximum concurrent site processing (1-50, overrides config/default).
|
|
138
138
|
|
|
139
|
+
.TP
|
|
140
|
+
.BR \--dns " \fIIP\fR[,\fIIP\fR...]"
|
|
141
|
+
Nameserver(s) for the DNS pre-check AND nettools' dig \(em does not affect Chrome
|
|
142
|
+
navigation or whois. A single address pins all queries to it; several are
|
|
143
|
+
rotated per query (each leading once, the rest as failover) to spread the
|
|
144
|
+
load. Routing dig through these avoids dig timeouts on a flaky system resolver
|
|
145
|
+
silently dropping dig-gated domains. Overrides /etc/resolv.conf. Invalid
|
|
146
|
+
entries are warned and dropped.
|
|
147
|
+
|
|
139
148
|
.TP
|
|
140
149
|
.BR \--cleanup-interval " \fINUMBER\fR"
|
|
141
150
|
Browser restart interval in URLs processed (1-1000, overrides config/default).
|
|
142
151
|
|
|
152
|
+
.TP
|
|
153
|
+
.B \--show-dead-domains
|
|
154
|
+
At end of scan, list hostnames that did not resolve or were unreachable (\fBNXDOMAIN\fR/\fBENODATA\fR plus \fBERR_NAME_NOT_RESOLVED\fR/\fBERR_ADDRESS_UNREACHABLE\fR). Excludes blocks and timeouts, since those mean the domain is alive. Useful for pruning dead URLs.
|
|
155
|
+
|
|
143
156
|
.TP
|
|
144
157
|
.BR \-h ", " \--help
|
|
145
158
|
Show help message and exit.
|
|
@@ -249,6 +262,10 @@ Regex pattern(s) to match suspicious requests.
|
|
|
249
262
|
.B regex_and
|
|
250
263
|
Boolean. Use AND logic for multiple filterRegex patterns - ALL patterns must match the same URL (default: false).
|
|
251
264
|
|
|
265
|
+
.TP
|
|
266
|
+
.B output_regex
|
|
267
|
+
String. Regex applied to each matched URL to build the rule body: capture group 1 (or the whole match) becomes \fB||<capture>\fR instead of \fB||host^\fR. For example \fB^https?://([^/]+/[^/]+/)\fR turns \fBhttps://host.com/script/abc.js\fR into \fB||host.com/script/\fR, collapsing randomized filenames under a path into one rule. The capture must include the host. If the regex does not match a URL, output falls back to \fB||host^\fR. Adblock-only; domain-based formats (dnsmasq, pihole, hosts, plain) emit the bare host.
|
|
268
|
+
|
|
252
269
|
.TP
|
|
253
270
|
.B comments
|
|
254
271
|
Documentation strings or notes - completely ignored by the scanner. Can be a single string or array of strings. Used for adding context, URLs, timestamps, or any documentation notes to configuration files.
|
|
@@ -293,6 +310,14 @@ Boolean. Simulate mouse movements and clicks.
|
|
|
293
310
|
.B interact_intensity
|
|
294
311
|
String. Interaction simulation intensity: \fB"low"\fR, \fB"medium"\fR, \fB"high"\fR (default: "medium").
|
|
295
312
|
|
|
313
|
+
.TP
|
|
314
|
+
.B interact_click_count
|
|
315
|
+
Integer. Number of random content-zone clicks per load, capped at 20 (default: 3). The default of 3 is a primary click plus two backups, since some ad SDKs suppress the first or second click as warmup.
|
|
316
|
+
|
|
317
|
+
.TP
|
|
318
|
+
.B realistic_click
|
|
319
|
+
Boolean. Higher click fidelity: denser mouse approach (15 steps), sub-pixel hand-tremor micro-moves during the press, and a small mouseup drift so the mousedown and mouseup coordinates differ. For sites that score click realism. Costs roughly 80-120ms per click (default: false).
|
|
320
|
+
|
|
296
321
|
.TP
|
|
297
322
|
.B delay
|
|
298
323
|
Milliseconds to wait after page load (default: 4000).
|
|
@@ -419,7 +444,7 @@ Object. Custom page.goto() options for Puppeteer navigation. Available options:
|
|
|
419
444
|
.IP \(bu 4
|
|
420
445
|
\fB"networkidle0"\fR - Wait until 0 network requests for 500ms
|
|
421
446
|
.IP \(bu 4
|
|
422
|
-
\fB"networkidle2"\fR - Wait until
|
|
447
|
+
\fB"networkidle2"\fR - Wait until \(<=2 network requests for 500ms
|
|
423
448
|
.RE
|
|
424
449
|
.IP \(bu 4
|
|
425
450
|
\fBtimeout\fR: Maximum navigation time in milliseconds (overrides site timeout)
|
|
@@ -479,15 +504,28 @@ Both modes wait 16 seconds before cleanup to allow final operations to complete,
|
|
|
479
504
|
|
|
480
505
|
.TP
|
|
481
506
|
|
|
482
|
-
.SS
|
|
507
|
+
.SS Popup Capture Options
|
|
508
|
+
.TP
|
|
509
|
+
.B capture_popups
|
|
510
|
+
Boolean. Capture popup windows opened during the scan and evaluate their landing URL and in-popup requests against \fBfilterRegex\fR/\fBdig\fR/\fBwhois\fR. Requires \fBinteract\fR plus interaction clicks to fire the user-gesture click that opens popups; \fBcapture_popups\fR alone registers the listener but no popups will fire (default: false).
|
|
511
|
+
|
|
512
|
+
.TP
|
|
513
|
+
.B interact_popups
|
|
514
|
+
Boolean. Mouse-click inside captured popups (content-zone clicks) so the chain cascades to its next redirect or ad. Requires \fBcapture_popups\fR. Clicks popups up to \fBcapture_popups_max_depth\fR minus 1 \(em the deepest captured popup is observed, not clicked (default: false).
|
|
483
515
|
|
|
484
516
|
.TP
|
|
485
|
-
.B
|
|
486
|
-
|
|
517
|
+
.B capture_popups_max_depth
|
|
518
|
+
Integer. Maximum popup-chain depth to capture, e.g. \fBsite -> p1 -> p2 -> p3 -> destination\fR. Each extra level multiplies popups and time (default: 4).
|
|
519
|
+
|
|
520
|
+
.TP
|
|
521
|
+
.B capture_popups_window_ms
|
|
522
|
+
Integer. Per-popup capture window in milliseconds before the popup is auto-closed (default: 5000).
|
|
523
|
+
|
|
524
|
+
.SS Redirect Handling Options
|
|
487
525
|
|
|
488
526
|
.TP
|
|
489
527
|
.B max_redirects
|
|
490
|
-
Number. Maximum number of redirects to follow (default: 10).
|
|
528
|
+
Number. Maximum number of redirects to follow (default: 10; 0 = follow none).
|
|
491
529
|
|
|
492
530
|
.TP
|
|
493
531
|
.B js_redirect_timeout
|
|
@@ -501,6 +539,29 @@ Boolean. Analyze page source for redirect patterns (default: true).
|
|
|
501
539
|
.B redirect_timeout_multiplier
|
|
502
540
|
Number. Increase timeout for redirected URLs (default: 1.5).
|
|
503
541
|
|
|
542
|
+
.SS Ghost Cursor Options
|
|
543
|
+
Optional Bezier-curve mouse engine (the \fBghost-cursor\fR npm package, install
|
|
544
|
+
with \fBnpm i ghost-cursor\fR). Falls back to the built-in mouse if not
|
|
545
|
+
installed. Enable per-site with \fBcursor_mode\fR or globally with the
|
|
546
|
+
\fB\-\-ghost-cursor\fR flag.
|
|
547
|
+
.TP
|
|
548
|
+
.B cursor_mode
|
|
549
|
+
String. Set to \fB"ghost"\fR to use ghost-cursor Bezier mouse movements for this site.
|
|
550
|
+
.TP
|
|
551
|
+
.B ghost_cursor_speed
|
|
552
|
+
Number. Movement speed multiplier (default: auto).
|
|
553
|
+
.TP
|
|
554
|
+
.B ghost_cursor_hesitate
|
|
555
|
+
Number. Delay in milliseconds before a click (default: 50).
|
|
556
|
+
.TP
|
|
557
|
+
.B ghost_cursor_overshoot
|
|
558
|
+
Number. Maximum overshoot distance in pixels before correcting back to the target (default: auto).
|
|
559
|
+
.TP
|
|
560
|
+
.B ghost_cursor_duration
|
|
561
|
+
Number. How long the Bezier movement loop runs, in milliseconds (default: \fBinteract_duration\fR or 2000). Part of this budget (up to half) is reserved for clicks.
|
|
562
|
+
.PP
|
|
563
|
+
Ghost-cursor only \fIclicks\fR when both \fBinteract\fR and \fBinteract_clicks\fR are true. With \fBrealistic_click\fR set, each press adds hand-tremor during the hold plus a mouseup drift so mousedown and mouseup coordinates differ. Ghost mode honors \fBinteract_click_count\fR (default 3, cap 20); since realistic clicks take roughly 600-700ms each, raise \fBghost_cursor_duration\fR (about \fBinteract_click_count\fR x 700 plus movement, e.g. 5000-8000) to fit all of them \(em the default 2000 fits about one click.
|
|
564
|
+
|
|
504
565
|
.SS Cloudflare Protection Options
|
|
505
566
|
|
|
506
567
|
.TP
|
|
@@ -678,15 +739,15 @@ Global and per-site boolean to enable similarity filtering against ignoreDomains
|
|
|
678
739
|
With default settings (\fBignore_similar_threshold: 80\fR):
|
|
679
740
|
.RS
|
|
680
741
|
.IP \(bu 4
|
|
681
|
-
\fBanimerco.com\fR vs \fBanimerco.org\fR
|
|
742
|
+
\fBanimerco.com\fR vs \fBanimerco.org\fR \(-> 100% similar \(-> Ignored
|
|
682
743
|
.IP \(bu 4
|
|
683
|
-
\fBgoogle.com\fR vs \fBgoogle.co.uk\fR
|
|
744
|
+
\fBgoogle.com\fR vs \fBgoogle.co.uk\fR \(-> 100% similar \(-> Ignored
|
|
684
745
|
.IP \(bu 4
|
|
685
|
-
\fBamazon.com\fR vs \fBamazon2.org\fR
|
|
746
|
+
\fBamazon.com\fR vs \fBamazon2.org\fR \(-> 89% similar \(-> Ignored
|
|
686
747
|
.IP \(bu 4
|
|
687
|
-
\fBfacebook.com\fR vs \fBfaceboook.com\fR
|
|
748
|
+
\fBfacebook.com\fR vs \fBfaceboook.com\fR \(-> 91% similar \(-> Ignored
|
|
688
749
|
.IP \(bu 4
|
|
689
|
-
\fBapple.com\fR vs \fBmicrosoft.com\fR
|
|
750
|
+
\fBapple.com\fR vs \fBmicrosoft.com\fR \(-> 0% similar \(-> Kept
|
|
690
751
|
.RE
|
|
691
752
|
|
|
692
753
|
.SH EXAMPLES
|
|
@@ -844,7 +905,7 @@ With default settings (\fBignore_similar_threshold: 80\fR):
|
|
|
844
905
|
|
|
845
906
|
.SS Run with debug mode and similarity filtering:
|
|
846
907
|
.EX
|
|
847
|
-
node nwss.js --debug --dry-run
|
|
908
|
+
node nwss.js --debug --dry-run
|
|
848
909
|
.EE
|
|
849
910
|
|
|
850
911
|
.SS Run with adblock output format:
|