@fanboynz/network-scanner 2.0.64 → 2.0.66

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/nwss.js CHANGED
@@ -9,6 +9,7 @@ const fs = require('fs');
9
9
  const os = require('os');
10
10
  const psl = require('psl');
11
11
  const path = require('path');
12
+ const dnsPromises = require('node:dns/promises');
12
13
  const { createGrepHandler, validateGrepAvailability } = require('./lib/grep');
13
14
  const { compressMultipleFiles, formatFileSize } = require('./lib/compress');
14
15
  const { parseSearchStrings, createResponseHandler, createCurlHandler } = require('./lib/searchstring');
@@ -50,7 +51,7 @@ const { isGhostCursorAvailable, createGhostCursor, ghostMove, ghostClick, ghostR
50
51
  const { createGlobalHelpers, getTotalDomainsSkipped, getDetectedDomainsCount } = require('./lib/domain-cache');
51
52
  const { createSmartCache } = require('./lib/smart-cache'); // Smart cache system
52
53
  const { clearPersistentCache } = require('./lib/smart-cache');
53
- const { needsProxy, getProxyArgs, applyProxyAuth, getProxyInfo, testProxy } = require('./lib/proxy');
54
+ const { needsProxy, getProxyArgs, applyProxyAuth, getProxyInfo, testProxy, prepareSocksRelays, closeAllSocksRelays } = require('./lib/proxy');
54
55
  // Dry run functionality
55
56
  const { initializeDryRunCollections, addDryRunMatch, addDryRunNetTools, processDryRunResults, writeDryRunOutput } = require('./lib/dry-run');
56
57
  // Enhanced site data clearing functionality
@@ -266,6 +267,13 @@ if (fs.existsSync(NWSSCONFIG_PATH)) {
266
267
  }
267
268
 
268
269
  const headfulMode = args.includes('--headful');
270
+ // Sites (esp. video/streaming) call element.requestFullscreen() on load or
271
+ // click. In --headful that hijacks the real Chrome window into true
272
+ // fullscreen, forcing a manual ESC. Neutralize the Fullscreen API by
273
+ // default so it can't. Harmless in headless (no screen — the API is
274
+ // already inert there), so default-on keeps headful consistent with the
275
+ // primary headless path. --allow-fullscreen restores native behavior.
276
+ const allowFullscreen = args.includes('--allow-fullscreen');
269
277
  const SOURCES_FOLDER = 'sources';
270
278
 
271
279
  let outputFile = null;
@@ -326,6 +334,31 @@ const cacheRequests = args.includes('--cache-requests');
326
334
  const dnsCacheMode = args.includes('--dns-cache');
327
335
  if (dnsCacheMode) enableDiskCache();
328
336
 
337
+ // DNS pre-check before page.goto() — default-on, --no-dns-precheck disables.
338
+ // Filters NXDOMAIN / unresolvable hostnames in <100ms before paying the
339
+ // ~5-15s Puppeteer + Cloudflare detection round-trip on each.
340
+ const dnsPrecheckEnabled = !args.includes('--no-dns-precheck');
341
+ const dnsPrecheckTimeoutMs = 2000;
342
+
343
+ // Per-scan cache of negative DNS lookups. OS resolvers don't always cache
344
+ // NXDOMAIN responses, and a scan can hit the same dead hostname many times
345
+ // (different URL paths on the same site). Positive results are left to the
346
+ // OS cache; failure-cache avoids repeated lookup latency for known-dead hosts.
347
+ // FIFO eviction at DNS_NEGATIVE_CACHE_MAX so pathological scans (thousands
348
+ // of unique dead hosts) can't grow the cache unboundedly. Same pattern as
349
+ // the rest of the codebase's in-memory caches.
350
+ const dnsNegativeCache = new Map(); // hostname -> { error, timestamp }
351
+ const DNS_NEGATIVE_CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes
352
+ const DNS_NEGATIVE_CACHE_MAX = 1000;
353
+ let dnsPrecheckSkips = 0;
354
+
355
+ function dnsNegativeCacheSet(hostname, error) {
356
+ if (dnsNegativeCache.size >= DNS_NEGATIVE_CACHE_MAX) {
357
+ dnsNegativeCache.delete(dnsNegativeCache.keys().next().value);
358
+ }
359
+ dnsNegativeCache.set(hostname, { error, timestamp: Date.now() });
360
+ }
361
+
329
362
  let validateRulesFile = null;
330
363
  const validateRulesIndex = args.findIndex(arg => arg === '--validate-rules');
331
364
  if (validateRulesIndex !== -1 && args[validateRulesIndex + 1] && !args[validateRulesIndex + 1].startsWith('--')) {
@@ -360,6 +393,12 @@ let adblockEnabled = false;
360
393
  let adblockMatcher = null;
361
394
  let adblockStats = { blocked: 0, allowed: 0 };
362
395
 
396
+ // Cloudflare scan-wide stats. errorPages counts URLs where the returned page
397
+ // was a Cloudflare-served 5xx origin error (522/523/etc.) — no bypass
398
+ // possible, useful signal for diagnosing dead-origin scans. Named distinct
399
+ // from the local cloudflareStats = getCacheStats() in the debug stats block.
400
+ let cloudflareScanStats = { errorPages: 0 };
401
+
363
402
  // Validate --adblock-rules usage - ignore if used incorrectly instead of erroring
364
403
  if (adblockRulesMode) {
365
404
  if (!outputFile) {
@@ -478,78 +517,10 @@ if (testValidation) {
478
517
  }
479
518
  }
480
519
 
481
- if (validateConfig) {
482
- console.log(`\n${messageColors.processing('Validating configuration file...')}`);
483
- try {
484
- const validation = validateFullConfig(config, { forceDebug, silentMode });
485
-
486
- // Validate referrer_headers format
487
- for (const site of sites) {
488
- if (site.referrer_headers && typeof site.referrer_headers === 'object' && !Array.isArray(site.referrer_headers)) {
489
- const validation = validateReferrerConfig(site.referrer_headers);
490
- if (!validation.isValid) {
491
- console.warn(`⚠ Invalid referrer_headers configuration: ${validation.errors.join(', ')}`);
492
- }
493
- if (validation.warnings.length > 0) {
494
- console.warn(`⚠ Referrer warnings: ${validation.warnings.join(', ')}`);
495
- }
496
- }
497
- // Validate referrer_disable format
498
- if (site.referrer_disable) {
499
- const disableValidation = validateReferrerDisable(site.referrer_disable);
500
- if (!disableValidation.isValid) {
501
- console.warn(`⚠ Invalid referrer_disable configuration: ${disableValidation.errors.join(', ')}`);
502
- }
503
- if (disableValidation.warnings.length > 0) {
504
- console.warn(`⚠ Referrer disable warnings: ${disableValidation.warnings.join(', ')}`);
505
- }
506
- }
507
- }
508
-
509
- // Validate VPN configurations
510
- for (const site of sites) {
511
- if (site.vpn) {
512
- const vpnNorm = normalizeVpnConfig(site.vpn);
513
- const vpnValidation = validateVpnConfig(vpnNorm);
514
- if (!vpnValidation.isValid) {
515
- console.warn(`⚠ Invalid vpn configuration for ${site.url}: ${vpnValidation.errors.join(', ')}`);
516
- }
517
- if (vpnValidation.warnings.length > 0) {
518
- vpnValidation.warnings.forEach(w => console.warn(`⚠ VPN warning for ${site.url}: ${w}`));
519
- }
520
- }
521
- if (site.openvpn) {
522
- const ovpnNorm = normalizeOvpnConfig(site.openvpn);
523
- const ovpnValidation = validateOvpnConfig(ovpnNorm);
524
- if (!ovpnValidation.isValid) {
525
- console.warn(`⚠ Invalid openvpn configuration for ${site.url}: ${ovpnValidation.errors.join(', ')}`);
526
- }
527
- if (ovpnValidation.warnings.length > 0) {
528
- ovpnValidation.warnings.forEach(w => console.warn(`⚠ OpenVPN warning for ${site.url}: ${w}`));
529
- }
530
- }
531
- if (site.vpn && site.openvpn) {
532
- console.warn(`⚠ ${site.url} has both vpn and openvpn configured — only one will be used (vpn takes precedence)`);
533
- }
534
- }
535
-
536
- if (validation.isValid) {
537
- console.log(`${messageColors.success('✅ Configuration is valid!')}`);
538
- console.log(`${messageColors.info('Summary:')} ${validation.summary.validSites}/${validation.summary.totalSites} sites valid`);
539
- if (validation.summary.sitesWithWarnings > 0) {
540
- console.log(`${messageColors.warn('⚠ Warnings:')} ${validation.summary.sitesWithWarnings} sites have warnings`);
541
- }
542
- process.exit(0);
543
- } else {
544
- console.log(`${messageColors.error('❌ Configuration validation failed!')}`);
545
- console.log(`${messageColors.error('Errors:')} ${validation.globalErrors.length} global, ${validation.summary.sitesWithErrors} site-specific`);
546
- process.exit(1);
547
- }
548
- } catch (validationErr) {
549
- console.error(`❌ Validation failed: ${validationErr.message}`);
550
- process.exit(1);
551
- }
552
- }
520
+ // Note: --validate-config is handled further down, AFTER the config file is
521
+ // loaded and `config`/`sites` are populated. Running it here would fail with
522
+ // "Cannot access 'config' before initialization" since those are declared
523
+ // later in the module.
553
524
 
554
525
  if (validateRules || validateRulesFile) {
555
526
  const filesToValidate = validateRulesFile ? [validateRulesFile] : [outputFile, compareFile].filter(Boolean);
@@ -705,6 +676,8 @@ General Options:
705
676
  --custom-json <file> Use a custom config JSON file instead of config.json
706
677
  --headful Launch browser with GUI (not headless)
707
678
  --keep-open Keep browser open after scan completes (use with --headful)
679
+ --allow-fullscreen Allow sites to use the Fullscreen API. By default it is
680
+ neutralized so sites can't hijack the window in --headful
708
681
  --use-puppeteer-core Use puppeteer-core with system Chrome instead of bundled Chromium
709
682
  --use-obscura Connect to running Obscura CDP server (ws://127.0.0.1:9222 or OBSCURA_WS env)
710
683
  Skips fingerprint injection — Obscura provides built-in stealth
@@ -721,6 +694,9 @@ General Options:
721
694
  Validation Options:
722
695
  --cache-requests Cache HTTP requests to avoid re-requesting same URLs within scan
723
696
  --dns-cache Persist dig/whois results to disk between runs (3hr/4hr TTL)
697
+ --no-dns-precheck Disable per-URL DNS resolution check before page navigation.
698
+ By default, URLs whose hostname doesn't resolve are skipped
699
+ immediately (saves ~5-15s of Puppeteer time per dead host).
724
700
  --validate-config Validate config.json file and exit
725
701
  --validate-rules [file] Validate rule file format (uses --output/--compare files if no file specified)
726
702
  --clean-rules [file] Clean rule files by removing invalid lines and optionally duplicates (uses --output/--compare files if no file specified)
@@ -909,10 +885,86 @@ const {
909
885
  disable_ad_tagging = true,
910
886
  max_concurrent_sites = 6,
911
887
  resource_cleanup_interval = 80,
912
- comments: globalComments,
913
- ...otherGlobalConfig
888
+ comments: globalComments,
889
+ ...otherGlobalConfig
914
890
  } = config;
915
891
 
892
+ // --validate-config runs here, after `config` and `sites` are populated.
893
+ // Previously this block lived above the config load and triggered a TDZ
894
+ // "Cannot access 'config' before initialization" error.
895
+ if (validateConfig) {
896
+ console.log(`\n${messageColors.processing('Validating configuration file...')}`);
897
+ try {
898
+ const validation = validateFullConfig(config, { forceDebug, silentMode });
899
+
900
+ // Validate referrer_headers format
901
+ for (const site of sites) {
902
+ if (site.referrer_headers && typeof site.referrer_headers === 'object' && !Array.isArray(site.referrer_headers)) {
903
+ const refValidation = validateReferrerConfig(site.referrer_headers);
904
+ if (!refValidation.isValid) {
905
+ console.warn(`⚠ Invalid referrer_headers configuration: ${refValidation.errors.join(', ')}`);
906
+ }
907
+ if (refValidation.warnings.length > 0) {
908
+ console.warn(`⚠ Referrer warnings: ${refValidation.warnings.join(', ')}`);
909
+ }
910
+ }
911
+ // Validate referrer_disable format
912
+ if (site.referrer_disable) {
913
+ const disableValidation = validateReferrerDisable(site.referrer_disable);
914
+ if (!disableValidation.isValid) {
915
+ console.warn(`⚠ Invalid referrer_disable configuration: ${disableValidation.errors.join(', ')}`);
916
+ }
917
+ if (disableValidation.warnings.length > 0) {
918
+ console.warn(`⚠ Referrer disable warnings: ${disableValidation.warnings.join(', ')}`);
919
+ }
920
+ }
921
+ }
922
+
923
+ // Validate VPN configurations
924
+ for (const site of sites) {
925
+ if (site.vpn) {
926
+ const vpnNorm = normalizeVpnConfig(site.vpn);
927
+ const vpnValidation = validateVpnConfig(vpnNorm);
928
+ if (!vpnValidation.isValid) {
929
+ console.warn(`⚠ Invalid vpn configuration for ${site.url}: ${vpnValidation.errors.join(', ')}`);
930
+ }
931
+ if (vpnValidation.warnings.length > 0) {
932
+ vpnValidation.warnings.forEach(w => console.warn(`⚠ VPN warning for ${site.url}: ${w}`));
933
+ }
934
+ }
935
+ if (site.openvpn) {
936
+ const ovpnNorm = normalizeOvpnConfig(site.openvpn);
937
+ const ovpnValidation = validateOvpnConfig(ovpnNorm);
938
+ if (!ovpnValidation.isValid) {
939
+ console.warn(`⚠ Invalid openvpn configuration for ${site.url}: ${ovpnValidation.errors.join(', ')}`);
940
+ }
941
+ if (ovpnValidation.warnings.length > 0) {
942
+ ovpnValidation.warnings.forEach(w => console.warn(`⚠ OpenVPN warning for ${site.url}: ${w}`));
943
+ }
944
+ }
945
+ if (site.vpn && site.openvpn) {
946
+ console.warn(`⚠ ${site.url} has both vpn and openvpn configured — only one will be used (vpn takes precedence)`);
947
+ }
948
+ }
949
+
950
+ if (validation.isValid) {
951
+ console.log(`${messageColors.success('✅ Configuration is valid!')}`);
952
+ console.log(`${messageColors.info('Summary:')} ${validation.summary.validSites}/${validation.summary.totalSites} sites valid`);
953
+ if (validation.summary.sitesWithWarnings > 0) {
954
+ console.log(`${messageColors.warn('⚠ Warnings:')} ${validation.summary.sitesWithWarnings} sites have warnings`);
955
+ }
956
+ process.exit(0);
957
+ } else {
958
+ console.log(`${messageColors.error('❌ Configuration validation failed!')}`);
959
+ console.log(`${messageColors.error('Errors:')} ${validation.globalErrors.length} global, ${validation.summary.sitesWithErrors} site-specific`);
960
+ process.exit(1);
961
+ }
962
+ } catch (validationErr) {
963
+ console.error(`❌ Validation failed: ${validationErr.message}`);
964
+ process.exit(1);
965
+ }
966
+ }
967
+
916
968
  // Pre-compile global blocked regexes ONCE (used in every processUrl call)
917
969
  const globalBlockedRegexes = Array.isArray(globalBlocked)
918
970
  ? globalBlocked.map(pattern => new RegExp(pattern))
@@ -1817,6 +1869,7 @@ function setupFrameHandling(page, forceDebug) {
1817
1869
  ovpnDisconnectAll(forceDebug);
1818
1870
  cleanupCloudflareCache();
1819
1871
  purgeStaleTrackers();
1872
+ try { await closeAllSocksRelays(forceDebug); } catch (_) {}
1820
1873
  }
1821
1874
 
1822
1875
  let siteCounter = 0;
@@ -2424,6 +2477,29 @@ function setupFrameHandling(page, forceDebug) {
2424
2477
  } else if (forceDebug) {
2425
2478
  console.log(formatLogMessage('debug', `Skipping fingerprint injection — Obscura provides built-in stealth`));
2426
2479
  }
2480
+
2481
+ // Neutralize the Fullscreen API before any page script runs so a
2482
+ // site can't force the real browser window fullscreen in --headful
2483
+ // (or trip an anti-bot check that reads document.fullscreenElement).
2484
+ // requestFullscreen is stubbed to a resolved no-op — which is also
2485
+ // how browsers already behave when it's called without a user
2486
+ // gesture, so this looks normal, not automated. fullscreenElement
2487
+ // stays null naturally since we never enter fullscreen.
2488
+ if (!allowFullscreen) {
2489
+ try {
2490
+ await page.evaluateOnNewDocument(() => {
2491
+ const noop = function () { return Promise.resolve(); };
2492
+ const legacyNoop = function () {};
2493
+ try { Element.prototype.requestFullscreen = noop; } catch (_) {}
2494
+ try { Element.prototype.webkitRequestFullscreen = legacyNoop; } catch (_) {}
2495
+ try { Element.prototype.webkitRequestFullScreen = legacyNoop; } catch (_) {}
2496
+ try { Element.prototype.mozRequestFullScreen = legacyNoop; } catch (_) {}
2497
+ try { Element.prototype.msRequestFullscreen = legacyNoop; } catch (_) {}
2498
+ });
2499
+ } catch (fsErr) {
2500
+ if (forceDebug) console.log(formatLogMessage('debug', `Fullscreen neutralization injection failed: ${fsErr.message}`));
2501
+ }
2502
+ }
2427
2503
 
2428
2504
  // Client Hints protection for Chrome user agents (skipped under Obscura — it sets its own)
2429
2505
  if (!useObscura && siteConfig.userAgent && siteConfig.userAgent.toLowerCase().includes('chrome')) {
@@ -3425,7 +3501,7 @@ function setupFrameHandling(page, forceDebug) {
3425
3501
  }
3426
3502
  }
3427
3503
 
3428
- const { finalUrl, redirected, redirectChain, originalUrl, redirectDomains } = navigationResult;
3504
+ const { finalUrl, redirected, redirectChain, originalUrl, redirectDomains, httpStatus, cfRay } = navigationResult;
3429
3505
 
3430
3506
  // Check for same-page reload loops BEFORE redirect processing
3431
3507
  const loadCount = pageLoadHistory.get(currentUrl) || 0;
@@ -3534,8 +3610,14 @@ function setupFrameHandling(page, forceDebug) {
3534
3610
  }
3535
3611
  }
3536
3612
 
3537
- // Handle all Cloudflare protections using the enhanced module
3538
- const cloudflareResult = await handleCloudflareProtection(page, currentUrl, siteConfig, forceDebug);
3613
+ // Handle all Cloudflare protections using the enhanced module. Pass
3614
+ // httpStatus and cfRay captured at goto time so the outcome log can
3615
+ // surface them — Puppeteer's response object is only available
3616
+ // immediately after page.goto, so handleCloudflareProtection can't
3617
+ // recover them from `page` alone.
3618
+ const cloudflareResult = await handleCloudflareProtection(page, currentUrl, siteConfig, forceDebug, { httpStatus, cfRay });
3619
+
3620
+ if (cloudflareResult.cloudflareErrorPage) cloudflareScanStats.errorPages++;
3539
3621
 
3540
3622
  // Check if Cloudflare handling exceeded max retries and should terminate processing
3541
3623
  if (!cloudflareResult.overallSuccess &&
@@ -3568,7 +3650,10 @@ function setupFrameHandling(page, forceDebug) {
3568
3650
  console.warn(` - ${error}`);
3569
3651
  });
3570
3652
  // Continue with scan despite Cloudflare issues
3571
- } else if (cloudflareResult.verificationChallenge?.success && forceDebug) {
3653
+ } else if (cloudflareResult.verificationChallenge?.attempted && cloudflareResult.verificationChallenge?.success && forceDebug) {
3654
+ // Require attempted === true so we don't log "Challenge solved using:
3655
+ // undefined" for pages that had no challenge to solve (success: true
3656
+ // is the natural state for that case).
3572
3657
  console.log(formatLogMessage('debug', `[cloudflare] Challenge solved using: ${cloudflareResult.verificationChallenge.method}`));
3573
3658
  }
3574
3659
 
@@ -4277,6 +4362,19 @@ function setupFrameHandling(page, forceDebug) {
4277
4362
  // Sort tasks so proxy groups are contiguous — direct connections first, then each proxy
4278
4363
  allTasks.sort((a, b) => proxyKeyFor(a.config).localeCompare(proxyKeyFor(b.config)));
4279
4364
 
4365
+ // Pre-start local no-auth SOCKS5 relays for any authenticated socks5://
4366
+ // upstreams. Done once here (the only async step) so getProxyArgs stays a
4367
+ // sync lookup in the per-batch browser-launch path. Chromium can't auth
4368
+ // SOCKS5; the relay does the upstream auth transparently.
4369
+ try {
4370
+ const relayCount = await prepareSocksRelays(sites, forceDebug);
4371
+ if (relayCount > 0 && !silentMode) {
4372
+ console.log(messageColors.processing(`Started ${relayCount} SOCKS5 auth relay(s)`));
4373
+ }
4374
+ } catch (relayErr) {
4375
+ console.warn(formatLogMessage('proxy', `SOCKS5 relay setup failed: ${relayErr.message}`));
4376
+ }
4377
+
4280
4378
  let results = [];
4281
4379
  let processedUrlCount = 0;
4282
4380
  let urlsSinceLastCleanup = 0;
@@ -4299,27 +4397,39 @@ function setupFrameHandling(page, forceDebug) {
4299
4397
  let forceRestartFlag = false; // Flag to trigger restart on next iteration
4300
4398
 
4301
4399
  const hangDetectionInterval = setInterval(() => {
4400
+ // Progress check, counter, and forceRestartFlag MUST run regardless of
4401
+ // debug mode — previously the entire body was gated on forceDebug, which
4402
+ // made hang recovery a debug-only feature even though the restart
4403
+ // machinery exists for production scans. Only the verbose diagnostic
4404
+ // logs stay debug-gated; the "no progress" warning and the
4405
+ // "triggering restart" error are user-visible recovery events.
4406
+ if (processedUrlCount === lastProcessedCount) {
4407
+ hangCheckCount++;
4408
+ if (forceDebug) {
4409
+ console.log(formatLogMessage('warn', `[HANG CHECK] No progress for ${hangCheckCount * 30}s`));
4410
+ }
4411
+ if (hangCheckCount >= 5) {
4412
+ console.log(formatLogMessage('error', `[HANG CHECK] Hung for 2.5 minutes. Triggering emergency browser restart.`));
4413
+ forceRestartFlag = true; // Set flag instead of exiting
4414
+ hangCheckCount = 0; // Reset counter for next cycle
4415
+ }
4416
+ } else {
4417
+ hangCheckCount = 0;
4418
+ }
4419
+ lastProcessedCount = processedUrlCount;
4420
+
4421
+ // Debug-only diagnostic snapshot
4302
4422
  if (forceDebug) {
4303
4423
  const currentBatch = Math.floor(currentBatchInfo.batchStart / RESOURCE_CLEANUP_INTERVAL) + 1;
4304
4424
  const totalBatches = Math.ceil(totalUrls / RESOURCE_CLEANUP_INTERVAL);
4305
4425
  console.log(formatLogMessage('debug', `[HANG CHECK] Processed: ${processedUrlCount}/${totalUrls} URLs, Batch: ${currentBatch}/${totalBatches}, Current batch size: ${currentBatchInfo.batchSize}`));
4306
4426
  console.log(formatLogMessage('debug', `[HANG CHECK] URLs since cleanup: ${urlsSinceLastCleanup}, Recent failures: ${results.slice(-3).filter(r => !r.success).length}/3`));
4307
-
4308
- // Check progress and trigger browser restart if hung
4309
- if (processedUrlCount === lastProcessedCount) {
4310
- hangCheckCount++;
4311
- console.log(formatLogMessage('warn', `[HANG CHECK] No progress for ${hangCheckCount * 30}s`));
4312
- if (hangCheckCount >= 5) {
4313
- console.log(formatLogMessage('error', `[HANG CHECK] Hung for 2.5 minutes. Triggering emergency browser restart.`));
4314
- forceRestartFlag = true; // Set flag instead of exiting
4315
- hangCheckCount = 0; // Reset counter for next cycle
4316
- }
4317
- } else {
4318
- hangCheckCount = 0;
4319
- }
4320
- lastProcessedCount = processedUrlCount;
4321
4427
  }
4322
4428
  }, 30000);
4429
+ // Don't keep the event loop alive solely for the hang-check interval — the
4430
+ // clearInterval calls at the normal-exit and error paths already cover the
4431
+ // cleanup, this is belt-and-suspenders in case a future refactor moves them.
4432
+ hangDetectionInterval.unref();
4323
4433
 
4324
4434
  // Process URLs in batches with exception handling
4325
4435
  let siteGroupIndex = 0;
@@ -4387,8 +4497,14 @@ function setupFrameHandling(page, forceDebug) {
4387
4497
  !healthCheck.reason?.includes('Scheduled cleanup') &&
4388
4498
  (healthCheck.reason?.includes('Critical') || healthCheck.reason?.includes('disconnected'));
4389
4499
 
4390
- // Restart browser if we've processed enough URLs, health check suggests it, hang detected, and this isn't the last site
4391
- if ((wouldExceedLimit || shouldRestartFromHealth || forceRestartFlag || (hasHighFailureRate && recentResults.length >= 6)) && urlsSinceLastCleanup > 8 && isNotLastBatch) {
4500
+ // Restart conditions split into hang recovery vs proactive triggers.
4501
+ // Hang recovery (forceRestartFlag set by 2.5-min HANG CHECK or a per-URL
4502
+ // timeout) bypasses the urlsSinceLastCleanup > 8 gate — a confirmed hang
4503
+ // needs immediate restart even if we just cleaned up. Proactive triggers
4504
+ // keep the gate to prevent thrashing.
4505
+ const hangRecoveryRestart = forceRestartFlag;
4506
+ const proactiveRestart = (wouldExceedLimit || shouldRestartFromHealth || (hasHighFailureRate && recentResults.length >= 6)) && urlsSinceLastCleanup > 8;
4507
+ if ((hangRecoveryRestart || proactiveRestart) && isNotLastBatch) {
4392
4508
  let restartReason = 'Unknown';
4393
4509
  if (forceRestartFlag) {
4394
4510
  restartReason = 'Emergency restart due to 2.5-minute hang detection';
@@ -4517,16 +4633,118 @@ function setupFrameHandling(page, forceDebug) {
4517
4633
  console.log(formatLogMessage('debug', `[CONCURRENCY] Starting ${batchSize} concurrent tasks with limit ${MAX_CONCURRENT_SITES}`));
4518
4634
  }
4519
4635
 
4520
- // Create tasks with timeout protection — skip domains that repeatedly timed out
4521
- const batchTasks = currentBatch.map(task => originalLimit(() => {
4636
+ // Create tasks with timeout protection — skip domains that repeatedly timed out.
4637
+ // Wrapped in an outer try/finally so processedUrlCount is incremented exactly
4638
+ // once per URL no matter which return/throw path is taken — that turns HANG
4639
+ // CHECK's signal from "did the batch finish?" into "did any URL finish?",
4640
+ // which is what 30-second tick granularity actually needs.
4641
+ const batchTasks = currentBatch.map(task => originalLimit(async () => {
4522
4642
  try {
4523
- const taskDomain = new URL(task.url).hostname;
4524
- if ((domainTimeoutCounts.get(taskDomain) || 0) >= DOMAIN_TIMEOUT_THRESHOLD) {
4525
- if (!silentMode) console.log(formatLogMessage('info', `Skipping ${task.url} ${taskDomain} timed out ${DOMAIN_TIMEOUT_THRESHOLD} times`));
4526
- return { url: task.url, rules: [], success: false, error: 'Domain repeatedly timed out', skipped: true };
4643
+ // Short-circuit queued URLs once any URL in this batch has triggered a
4644
+ // restart. Without this, the 80-URL batch in the user's hang trace
4645
+ // would have to fail one-by-one at 120s each (~28 min total) before
4646
+ // the boundary restart could fire. Now: first hang fires the flag,
4647
+ // remaining queued URLs return immediately, batch completes, restart.
4648
+ if (forceRestartFlag) {
4649
+ return { url: task.url, rules: [], success: false, error: 'Browser restart pending', skipped: true };
4527
4650
  }
4528
- } catch {}
4529
- return processUrl(task.url, task.config, browser);
4651
+
4652
+ try {
4653
+ const taskDomain = new URL(task.url).hostname;
4654
+ if ((domainTimeoutCounts.get(taskDomain) || 0) >= DOMAIN_TIMEOUT_THRESHOLD) {
4655
+ if (!silentMode) console.log(formatLogMessage('info', `Skipping ${task.url} — ${taskDomain} timed out ${DOMAIN_TIMEOUT_THRESHOLD} times`));
4656
+ return { url: task.url, rules: [], success: false, error: 'Domain repeatedly timed out', skipped: true };
4657
+ }
4658
+
4659
+ // DNS pre-check — fails fast on NXDOMAIN/unresolvable hosts before
4660
+ // we pay ~5-15s for Puppeteer navigation + Cloudflare detection.
4661
+ // Skips IP literals. Respects an in-memory negative cache so a dead
4662
+ // host hit by many URL paths only costs one DNS round-trip per TTL.
4663
+ //
4664
+ // Uses dns.resolve* (c-ares, async network I/O) NOT dns.lookup
4665
+ // (getaddrinfo, libuv threadpool). Under scan concurrency Puppeteer
4666
+ // saturates the default 4-slot threadpool with filesystem I/O, so
4667
+ // dns.lookup calls sit queued and blow the timeout while never
4668
+ // actually starting — wrongly skipping live domains. c-ares isn't
4669
+ // threadpool-bound so it's immune to that contention.
4670
+ if (dnsPrecheckEnabled && taskDomain && !/^[\d.:]+$|^\[/.test(taskDomain)) {
4671
+ const cached = dnsNegativeCache.get(taskDomain);
4672
+ if (cached && Date.now() - cached.timestamp < DNS_NEGATIVE_CACHE_TTL_MS) {
4673
+ dnsPrecheckSkips++;
4674
+ if (forceDebug) console.log(formatLogMessage('debug', `DNS pre-check (cached): ${taskDomain} — ${cached.error}`));
4675
+ return { url: task.url, rules: [], success: false, error: `DNS: ${cached.error}`, skipped: true };
4676
+ }
4677
+ const dnsResolve = async () => {
4678
+ // resolve4 first; on no-IPv4 (ENODATA / ENOTFOUND) fall back to
4679
+ // resolve6 so IPv6-only hosts aren't wrongly skipped. Only a
4680
+ // failure of BOTH means the host is genuinely unresolvable.
4681
+ // 2s timeout kept as a real safety net — with c-ares off the
4682
+ // threadpool it should now rarely fire.
4683
+ let timer;
4684
+ try {
4685
+ const timeoutP = new Promise((_, reject) => {
4686
+ timer = setTimeout(() => reject(new Error('DNS timeout')), dnsPrecheckTimeoutMs);
4687
+ });
4688
+ const resolveChain = dnsPromises.resolve4(taskDomain)
4689
+ .catch(() => dnsPromises.resolve6(taskDomain));
4690
+ await Promise.race([resolveChain, timeoutP]);
4691
+ } finally {
4692
+ if (timer) clearTimeout(timer);
4693
+ }
4694
+ };
4695
+ // c-ares transient codes — retry once so a momentary resolver
4696
+ // hiccup doesn't poison the negative cache for 5 minutes.
4697
+ const TRANSIENT = new Set(['ETIMEOUT', 'ESERVFAIL', 'EREFUSED', 'ECONNREFUSED']);
4698
+ try {
4699
+ try {
4700
+ await dnsResolve();
4701
+ } catch (firstErr) {
4702
+ const code = firstErr && firstErr.code;
4703
+ if (TRANSIENT.has(code) || (firstErr && firstErr.message === 'DNS timeout')) {
4704
+ if (forceDebug) console.log(formatLogMessage('debug', `DNS pre-check transient (${code || 'timeout'}) for ${taskDomain}, retrying once`));
4705
+ await dnsResolve();
4706
+ } else {
4707
+ throw firstErr;
4708
+ }
4709
+ }
4710
+ } catch (dnsErr) {
4711
+ const errCode = dnsErr.code || dnsErr.message || 'DNS resolve failed';
4712
+ dnsNegativeCacheSet(taskDomain, errCode);
4713
+ dnsPrecheckSkips++;
4714
+ if (forceDebug) console.log(formatLogMessage('debug', `DNS pre-check failed: ${taskDomain} — ${errCode}`));
4715
+ return { url: task.url, rules: [], success: false, error: `DNS: ${errCode}`, skipped: true };
4716
+ }
4717
+ }
4718
+ } catch {}
4719
+
4720
+ // Per-URL timeout so a single hung processUrl can't block the batch
4721
+ // forever. 120s is well past any legitimate slow page: Cloudflare
4722
+ // adaptive max ~25s, nettools overall ~65s, navigation 15s.
4723
+ const processUrlPromise = processUrl(task.url, task.config, browser);
4724
+ let perUrlTimer;
4725
+ try {
4726
+ return await Promise.race([
4727
+ processUrlPromise,
4728
+ new Promise((_, reject) => {
4729
+ perUrlTimer = setTimeout(() => reject(new Error('Per-URL timeout (120s)')), 120000);
4730
+ })
4731
+ ]);
4732
+ } catch (err) {
4733
+ if (err && err.message === 'Per-URL timeout (120s)') {
4734
+ processUrlPromise.catch(() => {});
4735
+ forceRestartFlag = true;
4736
+ return { url: task.url, rules: [], success: false, error: 'Per-URL timeout (120s)', needsImmediateRestart: true };
4737
+ }
4738
+ throw err;
4739
+ } finally {
4740
+ if (perUrlTimer) clearTimeout(perUrlTimer);
4741
+ }
4742
+ } finally {
4743
+ // Always count completion — even on unexpected throw — so HANG CHECK's
4744
+ // per-tick progress signal stays accurate. Replaces the old
4745
+ // `processedUrlCount += batchSize` that ran after the whole batch.
4746
+ processedUrlCount++;
4747
+ }
4530
4748
  }));
4531
4749
 
4532
4750
  let batchResults;
@@ -4628,7 +4846,8 @@ function setupFrameHandling(page, forceDebug) {
4628
4846
  }
4629
4847
  }
4630
4848
 
4631
- processedUrlCount += batchSize;
4849
+ // processedUrlCount is now incremented per-URL inside the batchTasks
4850
+ // wrapper above; no batch-level += batchSize here.
4632
4851
  urlsSinceLastCleanup += batchSize;
4633
4852
 
4634
4853
  // Force browser restart if any URL had critical errors
@@ -4809,12 +5028,43 @@ function setupFrameHandling(page, forceDebug) {
4809
5028
  console.log(formatLogMessage('debug', `Cache hit rate: ${cloudflareStats.hitRate}, Total hits: ${cloudflareStats.hits}, Misses: ${cloudflareStats.misses}`));
4810
5029
  console.log(formatLogMessage('debug', `Cached detections: ${cloudflareStats.size}`));
4811
5030
  }
5031
+ if (cloudflareScanStats.errorPages > 0) {
5032
+ console.log(formatLogMessage('debug', `Cloudflare 5xx origin-error pages: ${cloudflareScanStats.errorPages} (no bypass possible — origin unreachable)`));
5033
+ }
5034
+ if (dnsPrecheckEnabled && dnsPrecheckSkips > 0) {
5035
+ console.log(formatLogMessage('debug', `DNS pre-check skipped: ${dnsPrecheckSkips} URL(s) via ${dnsNegativeCache.size} unresolvable host(s)`));
5036
+ }
4812
5037
  // Log smart cache statistics (if cache is enabled)
4813
5038
  // Adblock statistics
4814
5039
  if (adblockEnabled) {
4815
5040
  console.log(formatLogMessage('debug', '=== Adblock Statistics ==='));
4816
5041
  const blockRate = ((adblockStats.blocked / (adblockStats.blocked + adblockStats.allowed)) * 100).toFixed(1);
4817
5042
  console.log(formatLogMessage('debug', `Blocked: ${adblockStats.blocked} requests (${blockRate}% block rate), Allowed: ${adblockStats.allowed}`));
5043
+
5044
+ // Engine-specific stats from the matcher itself. Both engines expose
5045
+ // getStats() but with slightly different cache shapes — JS engine
5046
+ // tracks urlCacheSize + resultCacheSize separately, rust wrapper
5047
+ // tracks a single size. Handle both.
5048
+ if (adblockMatcher && typeof adblockMatcher.getStats === 'function') {
5049
+ try {
5050
+ const es = adblockMatcher.getStats();
5051
+ const engine = es.engine || 'js';
5052
+ console.log(formatLogMessage('debug', `Engine: ${engine}${es.fromDiskCache ? ' (loaded from disk cache)' : ''}`));
5053
+ if (es.cache && (es.cache.hits != null || es.cache.misses != null)) {
5054
+ // rust wrapper: single `size`; JS engine: split into urlCacheSize + resultCacheSize
5055
+ const sizeDesc = es.cache.size != null
5056
+ ? `${es.cache.size}/${es.cache.maxSize}`
5057
+ : `url ${es.cache.urlCacheSize}, result ${es.cache.resultCacheSize}, cap ${es.cache.maxSize}`;
5058
+ console.log(formatLogMessage('debug', `Matcher cache: ${es.cache.hits} hits / ${es.cache.misses} misses (${es.cache.hitRate}), ${sizeDesc}`));
5059
+ }
5060
+ if (es.exceptions != null && es.exceptions > 0) {
5061
+ console.log(formatLogMessage('debug', `Whitelist exceptions: ${es.exceptions}`));
5062
+ }
5063
+ if (es.errors != null && es.errors > 0) {
5064
+ console.log(formatLogMessage('debug', `Engine errors: ${es.errors}`));
5065
+ }
5066
+ } catch (_) { /* getStats shape mismatch — don't crash the exit path */ }
5067
+ }
4818
5068
  }
4819
5069
  if (smartCache) {
4820
5070
  const cacheStats = smartCache.getStats();
@@ -4993,8 +5243,18 @@ function setupFrameHandling(page, forceDebug) {
4993
5243
  }
4994
5244
  }
4995
5245
 
5246
+ // Run the same cleanup the SIGINT/SIGTERM emergency handler does, so normal
5247
+ // scan completion isn't left depending on process.exit(0) to override
5248
+ // lingering setInterval handles (the cloudflare detection cache schedules
5249
+ // one that's otherwise only stopped on signal-driven shutdown).
5250
+ try { cleanupCloudflareCache(); } catch (_) {}
5251
+ try { wgDisconnectAll(forceDebug); } catch (_) {}
5252
+ try { ovpnDisconnectAll(forceDebug); } catch (_) {}
5253
+ try { purgeStaleTrackers(); } catch (_) {}
5254
+ try { await closeAllSocksRelays(forceDebug); } catch (_) {}
5255
+
4996
5256
  // Clean process termination
4997
5257
  if (forceDebug) console.log(formatLogMessage('debug', `About to exit process...`));
4998
5258
  process.exit(0);
4999
-
5259
+
5000
5260
  })();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fanboynz/network-scanner",
3
- "version": "2.0.64",
3
+ "version": "2.0.66",
4
4
  "description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
5
5
  "main": "nwss.js",
6
6
  "scripts": {
@@ -14,11 +14,12 @@
14
14
  "lru-cache": "^11.3.5",
15
15
  "p-limit": "^7.3.0",
16
16
  "psl": "^1.15.0",
17
- "puppeteer": ">=20.0.0"
17
+ "puppeteer": ">=20.0.0",
18
+ "socks": "^2.8.9"
18
19
  },
19
20
  "overrides": {
20
21
  "tar-fs": "3.1.1",
21
- "ws": "8.18.3",
22
+ "ws": ">=8.20.1",
22
23
  "yauzl": ">=3.2.1"
23
24
  },
24
25
  "keywords": [