@fanboynz/network-scanner 2.0.63 → 2.0.65
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +1 -1
- package/lib/adblock-rust.js +368 -0
- package/lib/cloudflare.js +225 -102
- package/lib/nettools.js +85 -57
- package/lib/redirect.js +38 -8
- package/nwss.js +269 -115
- package/package.json +6 -5
- package/scanner-script-org.js +0 -588
package/lib/nettools.js
CHANGED
|
@@ -4,11 +4,9 @@
|
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
6
|
const { exec, execSync } = require('child_process');
|
|
7
|
-
const util = require('util');
|
|
8
7
|
const fs = require('fs');
|
|
9
8
|
const path = require('path');
|
|
10
9
|
const { formatLogMessage, messageColors } = require('./colorize');
|
|
11
|
-
const execPromise = util.promisify(exec);
|
|
12
10
|
const ANSI_REGEX = /\x1b\[[0-9;]*m/g;
|
|
13
11
|
|
|
14
12
|
// Cycling index for whois server rotation
|
|
@@ -80,7 +78,11 @@ function saveDiskCache(filePath, cache, ttl, maxSize) {
|
|
|
80
78
|
}
|
|
81
79
|
}
|
|
82
80
|
|
|
83
|
-
// If over max, keep only the newest entries
|
|
81
|
+
// If over max, keep only the newest entries. Drop the pretty-print —
|
|
82
|
+
// saveDiskCache runs on the synchronous 'exit' handler when --dns-cache
|
|
83
|
+
// is set, so any work here directly delays scan exit. Compact JSON is
|
|
84
|
+
// several times faster on multi-megabyte caches and the file is not
|
|
85
|
+
// intended for human reading.
|
|
84
86
|
if (count > maxSize) {
|
|
85
87
|
const sorted = Object.entries(entries)
|
|
86
88
|
.sort((a, b) => b[1].timestamp - a[1].timestamp)
|
|
@@ -89,9 +91,9 @@ function saveDiskCache(filePath, cache, ttl, maxSize) {
|
|
|
89
91
|
for (const [key, entry] of sorted) {
|
|
90
92
|
trimmed[key] = entry;
|
|
91
93
|
}
|
|
92
|
-
fs.writeFileSync(filePath, JSON.stringify(trimmed
|
|
94
|
+
fs.writeFileSync(filePath, JSON.stringify(trimmed));
|
|
93
95
|
} else {
|
|
94
|
-
fs.writeFileSync(filePath, JSON.stringify(entries
|
|
96
|
+
fs.writeFileSync(filePath, JSON.stringify(entries));
|
|
95
97
|
}
|
|
96
98
|
} catch {
|
|
97
99
|
// Disk write failed — non-fatal, in-memory cache still works
|
|
@@ -125,14 +127,18 @@ function enableDiskCache() {
|
|
|
125
127
|
loadDiskCache(DIG_CACHE_FILE, globalDigResultCache, GLOBAL_DIG_CACHE_TTL, GLOBAL_DIG_CACHE_MAX);
|
|
126
128
|
loadDiskCache(WHOIS_CACHE_FILE, globalWhoisResultCache, GLOBAL_WHOIS_CACHE_TTL, GLOBAL_WHOIS_CACHE_MAX);
|
|
127
129
|
|
|
128
|
-
// Save caches to disk once on process exit instead of per-lookup
|
|
130
|
+
// Save caches to disk once on process exit instead of per-lookup. The
|
|
131
|
+
// 'exit' handler fires synchronously regardless of how the process exits
|
|
132
|
+
// (normal completion, signal, uncaught exception), so a separate signal
|
|
133
|
+
// handler is redundant. We deliberately do NOT install SIGINT/SIGTERM
|
|
134
|
+
// handlers here — nwss.js installs its own async ones that perform
|
|
135
|
+
// browser/VPN cleanup, and a sync handler here would call process.exit(0)
|
|
136
|
+
// first and skip that cleanup entirely.
|
|
129
137
|
const flushCaches = () => {
|
|
130
138
|
saveDiskCache(DIG_CACHE_FILE, globalDigResultCache, GLOBAL_DIG_CACHE_TTL, GLOBAL_DIG_CACHE_MAX);
|
|
131
139
|
saveDiskCache(WHOIS_CACHE_FILE, globalWhoisResultCache, GLOBAL_WHOIS_CACHE_TTL, GLOBAL_WHOIS_CACHE_MAX);
|
|
132
140
|
};
|
|
133
141
|
process.on('exit', flushCaches);
|
|
134
|
-
process.on('SIGINT', () => { flushCaches(); process.exit(0); });
|
|
135
|
-
process.on('SIGTERM', () => { flushCaches(); process.exit(0); });
|
|
136
142
|
}
|
|
137
143
|
|
|
138
144
|
/**
|
|
@@ -217,14 +223,18 @@ function execWithTimeout(command, timeout = 10000) {
|
|
|
217
223
|
// Set up timeout
|
|
218
224
|
const timer = setTimeout(() => {
|
|
219
225
|
child.kill('SIGTERM');
|
|
220
|
-
|
|
221
|
-
// Force kill after 2 seconds if SIGTERM doesn't work
|
|
222
|
-
|
|
226
|
+
|
|
227
|
+
// Force kill after 2 seconds if SIGTERM doesn't work. unref() so this
|
|
228
|
+
// tail timer doesn't keep the event loop alive past scan completion —
|
|
229
|
+
// a dig that times out near the end of a scan would otherwise delay
|
|
230
|
+
// exit by ~2 seconds.
|
|
231
|
+
const killTimer = setTimeout(() => {
|
|
223
232
|
if (!child.killed) {
|
|
224
233
|
child.kill('SIGKILL');
|
|
225
234
|
}
|
|
226
235
|
}, 2000);
|
|
227
|
-
|
|
236
|
+
killTimer.unref();
|
|
237
|
+
|
|
228
238
|
reject(new Error(`Command timeout after ${timeout}ms: ${command}`));
|
|
229
239
|
}, timeout);
|
|
230
240
|
|
|
@@ -925,6 +935,31 @@ function createNetToolsHandler(config) {
|
|
|
925
935
|
const hasWhoisOr = whoisOrTerms && Array.isArray(whoisOrTerms) && whoisOrTerms.length > 0;
|
|
926
936
|
const hasDig = digTerms && Array.isArray(digTerms) && digTerms.length > 0;
|
|
927
937
|
const hasDigOr = digOrTerms && Array.isArray(digOrTerms) && digOrTerms.length > 0;
|
|
938
|
+
|
|
939
|
+
// Pre-lowercase search terms once per handler so the per-domain check loop
|
|
940
|
+
// doesn't re-lowercase the same constants for every output it scans.
|
|
941
|
+
const whoisTermsLower = hasWhois ? whoisTerms.map(t => t.toLowerCase()) : null;
|
|
942
|
+
const whoisOrTermsLower = hasWhoisOr ? whoisOrTerms.map(t => t.toLowerCase()) : null;
|
|
943
|
+
const digTermsLower = hasDig ? digTerms.map(t => t.toLowerCase()) : null;
|
|
944
|
+
const digOrTermsLower = hasDigOr ? digOrTerms.map(t => t.toLowerCase()) : null;
|
|
945
|
+
|
|
946
|
+
// Hoisted out of handleNetToolsCheck so the closure is constructed once per
|
|
947
|
+
// handler rather than once per invocation. References forceDebug, debugLogFile,
|
|
948
|
+
// and fs from the destructured config above.
|
|
949
|
+
function logToConsoleAndFile(message) {
|
|
950
|
+
if (forceDebug) {
|
|
951
|
+
console.log(formatLogMessage('debug', message));
|
|
952
|
+
}
|
|
953
|
+
if (debugLogFile && fs) {
|
|
954
|
+
try {
|
|
955
|
+
const timestamp = new Date().toISOString();
|
|
956
|
+
const cleanMessage = stripAnsiColors(message);
|
|
957
|
+
fs.appendFileSync(debugLogFile, `${timestamp} [debug nettools] ${cleanMessage}\n`);
|
|
958
|
+
} catch (_) {
|
|
959
|
+
// Silently fail file logging to avoid disrupting whois operations
|
|
960
|
+
}
|
|
961
|
+
}
|
|
962
|
+
}
|
|
928
963
|
|
|
929
964
|
// Create config-aware cache keys for deduplication
|
|
930
965
|
// Whois: Only include search terms + server (domain registry data is consistent across subdomains)
|
|
@@ -948,10 +983,7 @@ function createNetToolsHandler(config) {
|
|
|
948
983
|
// DNS results are the same regardless of search terms
|
|
949
984
|
|
|
950
985
|
return async function handleNetToolsCheck(domain, fullSubdomain) {
|
|
951
|
-
// Use fullSubdomain parameter instead of originalDomain to maintain consistency
|
|
952
|
-
// with the domain cache fix approach
|
|
953
986
|
const originalDomain = fullSubdomain;
|
|
954
|
-
// Helper function to log to BOTH console and debug file
|
|
955
987
|
|
|
956
988
|
// Check if domain was already detected (skip expensive operations)
|
|
957
989
|
if (typeof isDomainAlreadyDetected === 'function' && isDomainAlreadyDetected(fullSubdomain)) {
|
|
@@ -960,36 +992,7 @@ function createNetToolsHandler(config) {
|
|
|
960
992
|
}
|
|
961
993
|
return;
|
|
962
994
|
}
|
|
963
|
-
|
|
964
|
-
// NOTE: The logToConsoleAndFile function needs to be declared INSIDE this function
|
|
965
|
-
// so it has access to the closure variables (forceDebug, debugLogFile, fs) from the
|
|
966
|
-
// createNetToolsHandler config. This function was being called but not declared
|
|
967
|
-
// within the scope where whoisLookup and whoisLookupWithRetry try to use it.
|
|
968
|
-
// This is why we were getting "logToConsoleAndFile is not defined" errors.
|
|
969
995
|
|
|
970
|
-
// Move the logToConsoleAndFile function declaration from later in the file to here:
|
|
971
|
-
function logToConsoleAndFile(message) {
|
|
972
|
-
// Note: This function needs access to forceDebug, debugLogFile, and fs from the parent scope
|
|
973
|
-
// These are passed in via the config object to createNetToolsHandler
|
|
974
|
-
// forceDebug, debugLogFile, and fs are available in this closure
|
|
975
|
-
|
|
976
|
-
// Always log to console when in debug mode
|
|
977
|
-
if (forceDebug) {
|
|
978
|
-
console.log(formatLogMessage('debug', message));
|
|
979
|
-
}
|
|
980
|
-
|
|
981
|
-
// Also log to file if debug file logging is enabled
|
|
982
|
-
if (debugLogFile && fs) {
|
|
983
|
-
try {
|
|
984
|
-
const timestamp = new Date().toISOString();
|
|
985
|
-
const cleanMessage = stripAnsiColors(message);
|
|
986
|
-
fs.appendFileSync(debugLogFile, `${timestamp} [debug nettools] ${cleanMessage}\n`);
|
|
987
|
-
} catch (logErr) {
|
|
988
|
-
// Silently fail file logging to avoid disrupting whois operations
|
|
989
|
-
}
|
|
990
|
-
}
|
|
991
|
-
}
|
|
992
|
-
|
|
993
996
|
// Determine which domain will be used for dig lookup
|
|
994
997
|
const digDomain = digSubdomain && originalDomain ? originalDomain : domain;
|
|
995
998
|
|
|
@@ -1152,8 +1155,13 @@ function createNetToolsHandler(config) {
|
|
|
1152
1155
|
try {
|
|
1153
1156
|
const lookupPromise = whoisLookupWithRetry(whoisRootDomain, 8000, whoisServer, forceDebug, retryOptions, whoisDelay, logToConsoleAndFile);
|
|
1154
1157
|
pendingWhoisLookups.set(whoisCacheKey, lookupPromise);
|
|
1155
|
-
|
|
1156
|
-
|
|
1158
|
+
// try/finally so a rejected lookup still clears the pending
|
|
1159
|
+
// entry — see matching comment on pendingDigLookups below.
|
|
1160
|
+
try {
|
|
1161
|
+
whoisResult = await lookupPromise;
|
|
1162
|
+
} finally {
|
|
1163
|
+
pendingWhoisLookups.delete(whoisCacheKey);
|
|
1164
|
+
}
|
|
1157
1165
|
|
|
1158
1166
|
// Cache successful results (and certain types of failures)
|
|
1159
1167
|
if (whoisResult.success ||
|
|
@@ -1196,11 +1204,18 @@ function createNetToolsHandler(config) {
|
|
|
1196
1204
|
|
|
1197
1205
|
// Process whois result (whether from cache or fresh lookup)
|
|
1198
1206
|
if (whoisResult) {
|
|
1199
|
-
|
|
1207
|
+
|
|
1200
1208
|
if (whoisResult.success) {
|
|
1209
|
+
// Lowercase the output ONCE — checkWhoisTerms / checkWhoisTermsOr
|
|
1210
|
+
// each call .toLowerCase() on their input independently, which
|
|
1211
|
+
// re-allocates a multi-KB lowercased string per call. Pre-lowering
|
|
1212
|
+
// here lets the AND check, OR check, and matched-term find share
|
|
1213
|
+
// a single allocation.
|
|
1214
|
+
const whoisOutputLower = whoisResult.output.toLowerCase();
|
|
1215
|
+
|
|
1201
1216
|
// Check AND terms if configured
|
|
1202
1217
|
if (hasWhois) {
|
|
1203
|
-
whoisMatched =
|
|
1218
|
+
whoisMatched = whoisTermsLower.every(t => whoisOutputLower.includes(t));
|
|
1204
1219
|
if (whoisMatched && dryRunCallback) {
|
|
1205
1220
|
dryRunCallback(domain, 'whois', 'AND logic', whoisTerms.join(', '), 'All terms found in whois data', {
|
|
1206
1221
|
server: whoisResult.whoisServer || 'default',
|
|
@@ -1214,12 +1229,13 @@ function createNetToolsHandler(config) {
|
|
|
1214
1229
|
}
|
|
1215
1230
|
|
|
1216
1231
|
}
|
|
1217
|
-
|
|
1232
|
+
|
|
1218
1233
|
// Check OR terms if configured
|
|
1219
1234
|
if (hasWhoisOr) {
|
|
1220
|
-
whoisOrMatched =
|
|
1235
|
+
whoisOrMatched = whoisOrTermsLower.some(t => whoisOutputLower.includes(t));
|
|
1221
1236
|
if (whoisOrMatched && dryRunCallback) {
|
|
1222
|
-
const
|
|
1237
|
+
const matchedIdx = whoisOrTermsLower.findIndex(t => whoisOutputLower.includes(t));
|
|
1238
|
+
const matchedTerm = whoisOrTerms[matchedIdx];
|
|
1223
1239
|
dryRunCallback(domain, 'whois', 'OR logic', matchedTerm, 'Term found in whois data', {
|
|
1224
1240
|
server: whoisResult.whoisServer || 'default',
|
|
1225
1241
|
duration: whoisResult.duration,
|
|
@@ -1371,8 +1387,15 @@ function createNetToolsHandler(config) {
|
|
|
1371
1387
|
} else {
|
|
1372
1388
|
const lookupPromise = digLookup(digDomain, digRecordType, 5000);
|
|
1373
1389
|
pendingDigLookups.set(digCacheKey, lookupPromise);
|
|
1374
|
-
|
|
1375
|
-
|
|
1390
|
+
// try/finally so a rejected lookup still clears the pending
|
|
1391
|
+
// entry — otherwise the Map would retain a rejected-Promise
|
|
1392
|
+
// entry forever and any subsequent caller for the same key
|
|
1393
|
+
// would await that rejection.
|
|
1394
|
+
try {
|
|
1395
|
+
digResult = await lookupPromise;
|
|
1396
|
+
} finally {
|
|
1397
|
+
pendingDigLookups.delete(digCacheKey);
|
|
1398
|
+
}
|
|
1376
1399
|
|
|
1377
1400
|
// Cache the result for future use
|
|
1378
1401
|
globalDigResultCache.set(digCacheKey, {
|
|
@@ -1389,9 +1412,13 @@ function createNetToolsHandler(config) {
|
|
|
1389
1412
|
}
|
|
1390
1413
|
|
|
1391
1414
|
if (digResult.success) {
|
|
1415
|
+
// Lowercase the output ONCE — see matching comment in the whois
|
|
1416
|
+
// branch above for rationale.
|
|
1417
|
+
const digOutputLower = digResult.output.toLowerCase();
|
|
1418
|
+
|
|
1392
1419
|
// Check AND terms if configured
|
|
1393
1420
|
if (hasDig) {
|
|
1394
|
-
digMatched =
|
|
1421
|
+
digMatched = digTermsLower.every(t => digOutputLower.includes(t));
|
|
1395
1422
|
if (digMatched && dryRunCallback) {
|
|
1396
1423
|
dryRunCallback(domain, 'dig', 'AND logic', digTerms.join(', '), `All terms found in ${digRecordType} records`, {
|
|
1397
1424
|
queriedDomain: digDomain,
|
|
@@ -1400,12 +1427,13 @@ function createNetToolsHandler(config) {
|
|
|
1400
1427
|
});
|
|
1401
1428
|
}
|
|
1402
1429
|
}
|
|
1403
|
-
|
|
1430
|
+
|
|
1404
1431
|
// Check OR terms if configured
|
|
1405
1432
|
if (hasDigOr) {
|
|
1406
|
-
digOrMatched =
|
|
1433
|
+
digOrMatched = digOrTermsLower.some(t => digOutputLower.includes(t));
|
|
1407
1434
|
if (digOrMatched && dryRunCallback) {
|
|
1408
|
-
const
|
|
1435
|
+
const matchedIdx = digOrTermsLower.findIndex(t => digOutputLower.includes(t));
|
|
1436
|
+
const matchedTerm = digOrTerms[matchedIdx];
|
|
1409
1437
|
dryRunCallback(domain, 'dig', 'OR logic', matchedTerm, `Term found in ${digRecordType} records`, {
|
|
1410
1438
|
queriedDomain: digDomain,
|
|
1411
1439
|
recordType: digRecordType,
|
package/lib/redirect.js
CHANGED
|
@@ -15,6 +15,9 @@ async function navigateWithRedirectHandling(page, currentUrl, siteConfig, gotoOp
|
|
|
15
15
|
const redirectChain = [currentUrl];
|
|
16
16
|
let finalUrl = currentUrl;
|
|
17
17
|
let redirected = false;
|
|
18
|
+
// Hoisted so they're in scope at the return outside the try block below.
|
|
19
|
+
let httpStatus = null;
|
|
20
|
+
let cfRay = null;
|
|
18
21
|
const jsRedirectTimeout = siteConfig.js_redirect_timeout || 5000; // Wait 5s for JS redirects
|
|
19
22
|
const maxRedirects = siteConfig.max_redirects || 10;
|
|
20
23
|
const detectJSPatterns = siteConfig.detect_js_patterns !== false; // Default to true
|
|
@@ -23,7 +26,12 @@ async function navigateWithRedirectHandling(page, currentUrl, siteConfig, gotoOp
|
|
|
23
26
|
const navigationHandler = (frame) => {
|
|
24
27
|
if (frame === page.mainFrame()) {
|
|
25
28
|
const frameUrl = frame.url();
|
|
26
|
-
|
|
29
|
+
// Skip about:blank and chrome-error:// — the latter is what Puppeteer
|
|
30
|
+
// navigates to on DNS/connection failures, and pushing it into the
|
|
31
|
+
// redirect chain produces bogus entries like
|
|
32
|
+
// "chrome-error://chromewebdata/" that downstream consumers
|
|
33
|
+
// (redirectDomains, logs) treat as a real intermediate hop.
|
|
34
|
+
if (frameUrl && frameUrl !== 'about:blank' && !frameUrl.startsWith('chrome-error://') && !redirectChain.includes(frameUrl)) {
|
|
27
35
|
// Check redirect limit before adding
|
|
28
36
|
if (redirectChain.length >= maxRedirects) {
|
|
29
37
|
if (forceDebug) {
|
|
@@ -161,9 +169,21 @@ async function navigateWithRedirectHandling(page, currentUrl, siteConfig, gotoOp
|
|
|
161
169
|
console.log(formatLogMessage('debug', `Using goto options: ${JSON.stringify(gotoOptions)}`));
|
|
162
170
|
}
|
|
163
171
|
|
|
164
|
-
// Initial navigation
|
|
172
|
+
// Initial navigation. Puppeteer's page.goto returns the response for the
|
|
173
|
+
// last HTTP request in the chain (it follows HTTP redirects internally),
|
|
174
|
+
// so response.status() reflects the page that actually rendered, not the
|
|
175
|
+
// 301/302 hop. JS redirects via window.location detected later in this
|
|
176
|
+
// function will land on a different page, in which case httpStatus/cfRay
|
|
177
|
+
// captured here are pre-JS-redirect — a known limitation.
|
|
165
178
|
const response = await page.goto(currentUrl, gotoOptions);
|
|
166
|
-
|
|
179
|
+
if (response) {
|
|
180
|
+
try {
|
|
181
|
+
httpStatus = response.status();
|
|
182
|
+
const headers = response.headers();
|
|
183
|
+
if (headers && headers['cf-ray']) cfRay = headers['cf-ray'];
|
|
184
|
+
} catch (_) { /* response disposed or detached — fine, stays null */ }
|
|
185
|
+
}
|
|
186
|
+
|
|
167
187
|
if (response && response.url() !== currentUrl) {
|
|
168
188
|
// Check redirect limit before adding
|
|
169
189
|
if (redirectChain.length >= maxRedirects) {
|
|
@@ -295,7 +315,7 @@ async function navigateWithRedirectHandling(page, currentUrl, siteConfig, gotoOp
|
|
|
295
315
|
redirectDomains = intermediateDomains;
|
|
296
316
|
}
|
|
297
317
|
|
|
298
|
-
return { finalUrl, redirected, redirectChain, originalUrl: currentUrl, redirectDomains };
|
|
318
|
+
return { finalUrl, redirected, redirectChain, originalUrl: currentUrl, redirectDomains, httpStatus, cfRay };
|
|
299
319
|
}
|
|
300
320
|
|
|
301
321
|
/**
|
|
@@ -306,13 +326,23 @@ async function navigateWithRedirectHandling(page, currentUrl, siteConfig, gotoOp
|
|
|
306
326
|
* @returns {Promise<Array>} Array of detected patterns
|
|
307
327
|
*/
|
|
308
328
|
async function detectCommonJSRedirects(page, forceDebug = false, formatLogMessage) {
|
|
329
|
+
// This function's only externally-visible behavior is the per-pattern
|
|
330
|
+
// debug log below. The return value isn't read by any caller. Bail
|
|
331
|
+
// before the expensive page.evaluate + outerHTML serialization when
|
|
332
|
+
// there's no debug consumer for the result.
|
|
333
|
+
if (!forceDebug) return [];
|
|
334
|
+
|
|
309
335
|
try {
|
|
310
336
|
const redirectPatterns = await page.evaluate(() => {
|
|
311
337
|
const patterns = [];
|
|
312
|
-
|
|
313
|
-
//
|
|
314
|
-
|
|
315
|
-
|
|
338
|
+
|
|
339
|
+
// Cap the source read to 100KB. document.documentElement.outerHTML
|
|
340
|
+
// materializes the full page (potentially many MB on content-heavy
|
|
341
|
+
// sites) AND serializes it over CDP back to Node. JS redirects all
|
|
342
|
+
// appear early — in head meta tags or top-of-body inline scripts —
|
|
343
|
+
// so a head-anchored cap is enough for real-world coverage.
|
|
344
|
+
const pageSource = document.documentElement.outerHTML.substring(0, 100000);
|
|
345
|
+
|
|
316
346
|
// Pattern 1: window.location = "url"
|
|
317
347
|
const locationAssign = pageSource.match(/window\.location\s*=\s*["']([^"']+)["']/g);
|
|
318
348
|
if (locationAssign) {
|