@fanboynz/network-scanner 2.0.64 → 2.0.65

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/nettools.js CHANGED
@@ -4,11 +4,9 @@
4
4
  */
5
5
 
6
6
  const { exec, execSync } = require('child_process');
7
- const util = require('util');
8
7
  const fs = require('fs');
9
8
  const path = require('path');
10
9
  const { formatLogMessage, messageColors } = require('./colorize');
11
- const execPromise = util.promisify(exec);
12
10
  const ANSI_REGEX = /\x1b\[[0-9;]*m/g;
13
11
 
14
12
  // Cycling index for whois server rotation
@@ -80,7 +78,11 @@ function saveDiskCache(filePath, cache, ttl, maxSize) {
80
78
  }
81
79
  }
82
80
 
83
- // If over max, keep only the newest entries
81
+ // If over max, keep only the newest entries. Drop the pretty-print —
82
+ // saveDiskCache runs on the synchronous 'exit' handler when --dns-cache
83
+ // is set, so any work here directly delays scan exit. Compact JSON is
84
+ // several times faster on multi-megabyte caches and the file is not
85
+ // intended for human reading.
84
86
  if (count > maxSize) {
85
87
  const sorted = Object.entries(entries)
86
88
  .sort((a, b) => b[1].timestamp - a[1].timestamp)
@@ -89,9 +91,9 @@ function saveDiskCache(filePath, cache, ttl, maxSize) {
89
91
  for (const [key, entry] of sorted) {
90
92
  trimmed[key] = entry;
91
93
  }
92
- fs.writeFileSync(filePath, JSON.stringify(trimmed, null, 2));
94
+ fs.writeFileSync(filePath, JSON.stringify(trimmed));
93
95
  } else {
94
- fs.writeFileSync(filePath, JSON.stringify(entries, null, 2));
96
+ fs.writeFileSync(filePath, JSON.stringify(entries));
95
97
  }
96
98
  } catch {
97
99
  // Disk write failed — non-fatal, in-memory cache still works
@@ -125,14 +127,18 @@ function enableDiskCache() {
125
127
  loadDiskCache(DIG_CACHE_FILE, globalDigResultCache, GLOBAL_DIG_CACHE_TTL, GLOBAL_DIG_CACHE_MAX);
126
128
  loadDiskCache(WHOIS_CACHE_FILE, globalWhoisResultCache, GLOBAL_WHOIS_CACHE_TTL, GLOBAL_WHOIS_CACHE_MAX);
127
129
 
128
- // Save caches to disk once on process exit instead of per-lookup
130
+ // Save caches to disk once on process exit instead of per-lookup. The
131
+ // 'exit' handler fires synchronously regardless of how the process exits
132
+ // (normal completion, signal, uncaught exception), so a separate signal
133
+ // handler is redundant. We deliberately do NOT install SIGINT/SIGTERM
134
+ // handlers here — nwss.js installs its own async ones that perform
135
+ // browser/VPN cleanup, and a sync handler here would call process.exit(0)
136
+ // first and skip that cleanup entirely.
129
137
  const flushCaches = () => {
130
138
  saveDiskCache(DIG_CACHE_FILE, globalDigResultCache, GLOBAL_DIG_CACHE_TTL, GLOBAL_DIG_CACHE_MAX);
131
139
  saveDiskCache(WHOIS_CACHE_FILE, globalWhoisResultCache, GLOBAL_WHOIS_CACHE_TTL, GLOBAL_WHOIS_CACHE_MAX);
132
140
  };
133
141
  process.on('exit', flushCaches);
134
- process.on('SIGINT', () => { flushCaches(); process.exit(0); });
135
- process.on('SIGTERM', () => { flushCaches(); process.exit(0); });
136
142
  }
137
143
 
138
144
  /**
@@ -217,14 +223,18 @@ function execWithTimeout(command, timeout = 10000) {
217
223
  // Set up timeout
218
224
  const timer = setTimeout(() => {
219
225
  child.kill('SIGTERM');
220
-
221
- // Force kill after 2 seconds if SIGTERM doesn't work
222
- setTimeout(() => {
226
+
227
+ // Force kill after 2 seconds if SIGTERM doesn't work. unref() so this
228
+ // tail timer doesn't keep the event loop alive past scan completion —
229
+ // a dig that times out near the end of a scan would otherwise delay
230
+ // exit by ~2 seconds.
231
+ const killTimer = setTimeout(() => {
223
232
  if (!child.killed) {
224
233
  child.kill('SIGKILL');
225
234
  }
226
235
  }, 2000);
227
-
236
+ killTimer.unref();
237
+
228
238
  reject(new Error(`Command timeout after ${timeout}ms: ${command}`));
229
239
  }, timeout);
230
240
 
@@ -925,6 +935,31 @@ function createNetToolsHandler(config) {
925
935
  const hasWhoisOr = whoisOrTerms && Array.isArray(whoisOrTerms) && whoisOrTerms.length > 0;
926
936
  const hasDig = digTerms && Array.isArray(digTerms) && digTerms.length > 0;
927
937
  const hasDigOr = digOrTerms && Array.isArray(digOrTerms) && digOrTerms.length > 0;
938
+
939
+ // Pre-lowercase search terms once per handler so the per-domain check loop
940
+ // doesn't re-lowercase the same constants for every output it scans.
941
+ const whoisTermsLower = hasWhois ? whoisTerms.map(t => t.toLowerCase()) : null;
942
+ const whoisOrTermsLower = hasWhoisOr ? whoisOrTerms.map(t => t.toLowerCase()) : null;
943
+ const digTermsLower = hasDig ? digTerms.map(t => t.toLowerCase()) : null;
944
+ const digOrTermsLower = hasDigOr ? digOrTerms.map(t => t.toLowerCase()) : null;
945
+
946
+ // Hoisted out of handleNetToolsCheck so the closure is constructed once per
947
+ // handler rather than once per invocation. References forceDebug, debugLogFile,
948
+ // and fs from the destructured config above.
949
+ function logToConsoleAndFile(message) {
950
+ if (forceDebug) {
951
+ console.log(formatLogMessage('debug', message));
952
+ }
953
+ if (debugLogFile && fs) {
954
+ try {
955
+ const timestamp = new Date().toISOString();
956
+ const cleanMessage = stripAnsiColors(message);
957
+ fs.appendFileSync(debugLogFile, `${timestamp} [debug nettools] ${cleanMessage}\n`);
958
+ } catch (_) {
959
+ // Silently fail file logging to avoid disrupting whois operations
960
+ }
961
+ }
962
+ }
928
963
 
929
964
  // Create config-aware cache keys for deduplication
930
965
  // Whois: Only include search terms + server (domain registry data is consistent across subdomains)
@@ -948,10 +983,7 @@ function createNetToolsHandler(config) {
948
983
  // DNS results are the same regardless of search terms
949
984
 
950
985
  return async function handleNetToolsCheck(domain, fullSubdomain) {
951
- // Use fullSubdomain parameter instead of originalDomain to maintain consistency
952
- // with the domain cache fix approach
953
986
  const originalDomain = fullSubdomain;
954
- // Helper function to log to BOTH console and debug file
955
987
 
956
988
  // Check if domain was already detected (skip expensive operations)
957
989
  if (typeof isDomainAlreadyDetected === 'function' && isDomainAlreadyDetected(fullSubdomain)) {
@@ -960,36 +992,7 @@ function createNetToolsHandler(config) {
960
992
  }
961
993
  return;
962
994
  }
963
-
964
- // NOTE: The logToConsoleAndFile function needs to be declared INSIDE this function
965
- // so it has access to the closure variables (forceDebug, debugLogFile, fs) from the
966
- // createNetToolsHandler config. This function was being called but not declared
967
- // within the scope where whoisLookup and whoisLookupWithRetry try to use it.
968
- // This is why we were getting "logToConsoleAndFile is not defined" errors.
969
995
 
970
- // Move the logToConsoleAndFile function declaration from later in the file to here:
971
- function logToConsoleAndFile(message) {
972
- // Note: This function needs access to forceDebug, debugLogFile, and fs from the parent scope
973
- // These are passed in via the config object to createNetToolsHandler
974
- // forceDebug, debugLogFile, and fs are available in this closure
975
-
976
- // Always log to console when in debug mode
977
- if (forceDebug) {
978
- console.log(formatLogMessage('debug', message));
979
- }
980
-
981
- // Also log to file if debug file logging is enabled
982
- if (debugLogFile && fs) {
983
- try {
984
- const timestamp = new Date().toISOString();
985
- const cleanMessage = stripAnsiColors(message);
986
- fs.appendFileSync(debugLogFile, `${timestamp} [debug nettools] ${cleanMessage}\n`);
987
- } catch (logErr) {
988
- // Silently fail file logging to avoid disrupting whois operations
989
- }
990
- }
991
- }
992
-
993
996
  // Determine which domain will be used for dig lookup
994
997
  const digDomain = digSubdomain && originalDomain ? originalDomain : domain;
995
998
 
@@ -1152,8 +1155,13 @@ function createNetToolsHandler(config) {
1152
1155
  try {
1153
1156
  const lookupPromise = whoisLookupWithRetry(whoisRootDomain, 8000, whoisServer, forceDebug, retryOptions, whoisDelay, logToConsoleAndFile);
1154
1157
  pendingWhoisLookups.set(whoisCacheKey, lookupPromise);
1155
- whoisResult = await lookupPromise;
1156
- pendingWhoisLookups.delete(whoisCacheKey);
1158
+ // try/finally so a rejected lookup still clears the pending
1159
+ // entry — see matching comment on pendingDigLookups below.
1160
+ try {
1161
+ whoisResult = await lookupPromise;
1162
+ } finally {
1163
+ pendingWhoisLookups.delete(whoisCacheKey);
1164
+ }
1157
1165
 
1158
1166
  // Cache successful results (and certain types of failures)
1159
1167
  if (whoisResult.success ||
@@ -1196,11 +1204,18 @@ function createNetToolsHandler(config) {
1196
1204
 
1197
1205
  // Process whois result (whether from cache or fresh lookup)
1198
1206
  if (whoisResult) {
1199
-
1207
+
1200
1208
  if (whoisResult.success) {
1209
+ // Lowercase the output ONCE — checkWhoisTerms / checkWhoisTermsOr
1210
+ // each call .toLowerCase() on their input independently, which
1211
+ // re-allocates a multi-KB lowercased string per call. Pre-lowering
1212
+ // here lets the AND check, OR check, and matched-term find share
1213
+ // a single allocation.
1214
+ const whoisOutputLower = whoisResult.output.toLowerCase();
1215
+
1201
1216
  // Check AND terms if configured
1202
1217
  if (hasWhois) {
1203
- whoisMatched = checkWhoisTerms(whoisResult.output, whoisTerms);
1218
+ whoisMatched = whoisTermsLower.every(t => whoisOutputLower.includes(t));
1204
1219
  if (whoisMatched && dryRunCallback) {
1205
1220
  dryRunCallback(domain, 'whois', 'AND logic', whoisTerms.join(', '), 'All terms found in whois data', {
1206
1221
  server: whoisResult.whoisServer || 'default',
@@ -1214,12 +1229,13 @@ function createNetToolsHandler(config) {
1214
1229
  }
1215
1230
 
1216
1231
  }
1217
-
1232
+
1218
1233
  // Check OR terms if configured
1219
1234
  if (hasWhoisOr) {
1220
- whoisOrMatched = checkWhoisTermsOr(whoisResult.output, whoisOrTerms);
1235
+ whoisOrMatched = whoisOrTermsLower.some(t => whoisOutputLower.includes(t));
1221
1236
  if (whoisOrMatched && dryRunCallback) {
1222
- const matchedTerm = whoisOrTerms.find(term => whoisResult.output.toLowerCase().includes(term.toLowerCase()));
1237
+ const matchedIdx = whoisOrTermsLower.findIndex(t => whoisOutputLower.includes(t));
1238
+ const matchedTerm = whoisOrTerms[matchedIdx];
1223
1239
  dryRunCallback(domain, 'whois', 'OR logic', matchedTerm, 'Term found in whois data', {
1224
1240
  server: whoisResult.whoisServer || 'default',
1225
1241
  duration: whoisResult.duration,
@@ -1371,8 +1387,15 @@ function createNetToolsHandler(config) {
1371
1387
  } else {
1372
1388
  const lookupPromise = digLookup(digDomain, digRecordType, 5000);
1373
1389
  pendingDigLookups.set(digCacheKey, lookupPromise);
1374
- digResult = await lookupPromise;
1375
- pendingDigLookups.delete(digCacheKey);
1390
+ // try/finally so a rejected lookup still clears the pending
1391
+ // entry — otherwise the Map would retain a rejected-Promise
1392
+ // entry forever and any subsequent caller for the same key
1393
+ // would await that rejection.
1394
+ try {
1395
+ digResult = await lookupPromise;
1396
+ } finally {
1397
+ pendingDigLookups.delete(digCacheKey);
1398
+ }
1376
1399
 
1377
1400
  // Cache the result for future use
1378
1401
  globalDigResultCache.set(digCacheKey, {
@@ -1389,9 +1412,13 @@ function createNetToolsHandler(config) {
1389
1412
  }
1390
1413
 
1391
1414
  if (digResult.success) {
1415
+ // Lowercase the output ONCE — see matching comment in the whois
1416
+ // branch above for rationale.
1417
+ const digOutputLower = digResult.output.toLowerCase();
1418
+
1392
1419
  // Check AND terms if configured
1393
1420
  if (hasDig) {
1394
- digMatched = checkDigTerms(digResult.output, digTerms);
1421
+ digMatched = digTermsLower.every(t => digOutputLower.includes(t));
1395
1422
  if (digMatched && dryRunCallback) {
1396
1423
  dryRunCallback(domain, 'dig', 'AND logic', digTerms.join(', '), `All terms found in ${digRecordType} records`, {
1397
1424
  queriedDomain: digDomain,
@@ -1400,12 +1427,13 @@ function createNetToolsHandler(config) {
1400
1427
  });
1401
1428
  }
1402
1429
  }
1403
-
1430
+
1404
1431
  // Check OR terms if configured
1405
1432
  if (hasDigOr) {
1406
- digOrMatched = checkDigTermsOr(digResult.output, digOrTerms);
1433
+ digOrMatched = digOrTermsLower.some(t => digOutputLower.includes(t));
1407
1434
  if (digOrMatched && dryRunCallback) {
1408
- const matchedTerm = digOrTerms.find(term => digResult.output.toLowerCase().includes(term.toLowerCase()));
1435
+ const matchedIdx = digOrTermsLower.findIndex(t => digOutputLower.includes(t));
1436
+ const matchedTerm = digOrTerms[matchedIdx];
1409
1437
  dryRunCallback(domain, 'dig', 'OR logic', matchedTerm, `Term found in ${digRecordType} records`, {
1410
1438
  queriedDomain: digDomain,
1411
1439
  recordType: digRecordType,
package/lib/redirect.js CHANGED
@@ -15,6 +15,9 @@ async function navigateWithRedirectHandling(page, currentUrl, siteConfig, gotoOp
15
15
  const redirectChain = [currentUrl];
16
16
  let finalUrl = currentUrl;
17
17
  let redirected = false;
18
+ // Hoisted so they're in scope at the return outside the try block below.
19
+ let httpStatus = null;
20
+ let cfRay = null;
18
21
  const jsRedirectTimeout = siteConfig.js_redirect_timeout || 5000; // Wait 5s for JS redirects
19
22
  const maxRedirects = siteConfig.max_redirects || 10;
20
23
  const detectJSPatterns = siteConfig.detect_js_patterns !== false; // Default to true
@@ -23,7 +26,12 @@ async function navigateWithRedirectHandling(page, currentUrl, siteConfig, gotoOp
23
26
  const navigationHandler = (frame) => {
24
27
  if (frame === page.mainFrame()) {
25
28
  const frameUrl = frame.url();
26
- if (frameUrl && frameUrl !== 'about:blank' && !redirectChain.includes(frameUrl)) {
29
+ // Skip about:blank and chrome-error:// — the latter is what Puppeteer
30
+ // navigates to on DNS/connection failures, and pushing it into the
31
+ // redirect chain produces bogus entries like
32
+ // "chrome-error://chromewebdata/" that downstream consumers
33
+ // (redirectDomains, logs) treat as a real intermediate hop.
34
+ if (frameUrl && frameUrl !== 'about:blank' && !frameUrl.startsWith('chrome-error://') && !redirectChain.includes(frameUrl)) {
27
35
  // Check redirect limit before adding
28
36
  if (redirectChain.length >= maxRedirects) {
29
37
  if (forceDebug) {
@@ -161,9 +169,21 @@ async function navigateWithRedirectHandling(page, currentUrl, siteConfig, gotoOp
161
169
  console.log(formatLogMessage('debug', `Using goto options: ${JSON.stringify(gotoOptions)}`));
162
170
  }
163
171
 
164
- // Initial navigation
172
+ // Initial navigation. Puppeteer's page.goto returns the response for the
173
+ // last HTTP request in the chain (it follows HTTP redirects internally),
174
+ // so response.status() reflects the page that actually rendered, not the
175
+ // 301/302 hop. JS redirects via window.location detected later in this
176
+ // function will land on a different page, in which case httpStatus/cfRay
177
+ // captured here are pre-JS-redirect — a known limitation.
165
178
  const response = await page.goto(currentUrl, gotoOptions);
166
-
179
+ if (response) {
180
+ try {
181
+ httpStatus = response.status();
182
+ const headers = response.headers();
183
+ if (headers && headers['cf-ray']) cfRay = headers['cf-ray'];
184
+ } catch (_) { /* response disposed or detached — fine, stays null */ }
185
+ }
186
+
167
187
  if (response && response.url() !== currentUrl) {
168
188
  // Check redirect limit before adding
169
189
  if (redirectChain.length >= maxRedirects) {
@@ -295,7 +315,7 @@ async function navigateWithRedirectHandling(page, currentUrl, siteConfig, gotoOp
295
315
  redirectDomains = intermediateDomains;
296
316
  }
297
317
 
298
- return { finalUrl, redirected, redirectChain, originalUrl: currentUrl, redirectDomains };
318
+ return { finalUrl, redirected, redirectChain, originalUrl: currentUrl, redirectDomains, httpStatus, cfRay };
299
319
  }
300
320
 
301
321
  /**
@@ -306,13 +326,23 @@ async function navigateWithRedirectHandling(page, currentUrl, siteConfig, gotoOp
306
326
  * @returns {Promise<Array>} Array of detected patterns
307
327
  */
308
328
  async function detectCommonJSRedirects(page, forceDebug = false, formatLogMessage) {
329
+ // This function's only externally-visible behavior is the per-pattern
330
+ // debug log below. The return value isn't read by any caller. Bail
331
+ // before the expensive page.evaluate + outerHTML serialization when
332
+ // there's no debug consumer for the result.
333
+ if (!forceDebug) return [];
334
+
309
335
  try {
310
336
  const redirectPatterns = await page.evaluate(() => {
311
337
  const patterns = [];
312
-
313
- // Check for common redirect patterns in page source
314
- const pageSource = document.documentElement.outerHTML;
315
-
338
+
339
+ // Cap the source read to 100KB. document.documentElement.outerHTML
340
+ // materializes the full page (potentially many MB on content-heavy
341
+ // sites) AND serializes it over CDP back to Node. JS redirects all
342
+ // appear early — in head meta tags or top-of-body inline scripts —
343
+ // so a head-anchored cap is enough for real-world coverage.
344
+ const pageSource = document.documentElement.outerHTML.substring(0, 100000);
345
+
316
346
  // Pattern 1: window.location = "url"
317
347
  const locationAssign = pageSource.match(/window\.location\s*=\s*["']([^"']+)["']/g);
318
348
  if (locationAssign) {