@fanboynz/network-scanner 2.0.63 → 2.0.65

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/nwss.js CHANGED
@@ -58,7 +58,8 @@ const { clearSiteData } = require('./lib/clear_sitedata');
58
58
  // Referrer header generation
59
59
  const { getReferrerForUrl, validateReferrerConfig, validateReferrerDisable } = require('./lib/referrer');
60
60
  // Adblock rules parser
61
- const { parseAdblockRules } = require('./lib/adblock');
61
+ const adblockJs = require('./lib/adblock');
62
+ const adblockRust = require('./lib/adblock-rust');
62
63
  // WireGuard VPN
63
64
  const { connectForSite: wgConnect, disconnectForSite: wgDisconnect, disconnectAll: wgDisconnectAll, validateVpnConfig, normalizeVpnConfig } = require('./lib/wireguard_vpn');
64
65
  // OpenVPN
@@ -359,6 +360,12 @@ let adblockEnabled = false;
359
360
  let adblockMatcher = null;
360
361
  let adblockStats = { blocked: 0, allowed: 0 };
361
362
 
363
+ // Cloudflare scan-wide stats. errorPages counts URLs where the returned page
364
+ // was a Cloudflare-served 5xx origin error (522/523/etc.) — no bypass
365
+ // possible, useful signal for diagnosing dead-origin scans. Named distinct
366
+ // from the local cloudflareStats = getCacheStats() in the debug stats block.
367
+ let cloudflareScanStats = { errorPages: 0 };
368
+
362
369
  // Validate --adblock-rules usage - ignore if used incorrectly instead of erroring
363
370
  if (adblockRulesMode) {
364
371
  if (!outputFile) {
@@ -477,78 +484,10 @@ if (testValidation) {
477
484
  }
478
485
  }
479
486
 
480
- if (validateConfig) {
481
- console.log(`\n${messageColors.processing('Validating configuration file...')}`);
482
- try {
483
- const validation = validateFullConfig(config, { forceDebug, silentMode });
484
-
485
- // Validate referrer_headers format
486
- for (const site of sites) {
487
- if (site.referrer_headers && typeof site.referrer_headers === 'object' && !Array.isArray(site.referrer_headers)) {
488
- const validation = validateReferrerConfig(site.referrer_headers);
489
- if (!validation.isValid) {
490
- console.warn(`⚠ Invalid referrer_headers configuration: ${validation.errors.join(', ')}`);
491
- }
492
- if (validation.warnings.length > 0) {
493
- console.warn(`⚠ Referrer warnings: ${validation.warnings.join(', ')}`);
494
- }
495
- }
496
- // Validate referrer_disable format
497
- if (site.referrer_disable) {
498
- const disableValidation = validateReferrerDisable(site.referrer_disable);
499
- if (!disableValidation.isValid) {
500
- console.warn(`⚠ Invalid referrer_disable configuration: ${disableValidation.errors.join(', ')}`);
501
- }
502
- if (disableValidation.warnings.length > 0) {
503
- console.warn(`⚠ Referrer disable warnings: ${disableValidation.warnings.join(', ')}`);
504
- }
505
- }
506
- }
507
-
508
- // Validate VPN configurations
509
- for (const site of sites) {
510
- if (site.vpn) {
511
- const vpnNorm = normalizeVpnConfig(site.vpn);
512
- const vpnValidation = validateVpnConfig(vpnNorm);
513
- if (!vpnValidation.isValid) {
514
- console.warn(`⚠ Invalid vpn configuration for ${site.url}: ${vpnValidation.errors.join(', ')}`);
515
- }
516
- if (vpnValidation.warnings.length > 0) {
517
- vpnValidation.warnings.forEach(w => console.warn(`⚠ VPN warning for ${site.url}: ${w}`));
518
- }
519
- }
520
- if (site.openvpn) {
521
- const ovpnNorm = normalizeOvpnConfig(site.openvpn);
522
- const ovpnValidation = validateOvpnConfig(ovpnNorm);
523
- if (!ovpnValidation.isValid) {
524
- console.warn(`⚠ Invalid openvpn configuration for ${site.url}: ${ovpnValidation.errors.join(', ')}`);
525
- }
526
- if (ovpnValidation.warnings.length > 0) {
527
- ovpnValidation.warnings.forEach(w => console.warn(`⚠ OpenVPN warning for ${site.url}: ${w}`));
528
- }
529
- }
530
- if (site.vpn && site.openvpn) {
531
- console.warn(`⚠ ${site.url} has both vpn and openvpn configured — only one will be used (vpn takes precedence)`);
532
- }
533
- }
534
-
535
- if (validation.isValid) {
536
- console.log(`${messageColors.success('✅ Configuration is valid!')}`);
537
- console.log(`${messageColors.info('Summary:')} ${validation.summary.validSites}/${validation.summary.totalSites} sites valid`);
538
- if (validation.summary.sitesWithWarnings > 0) {
539
- console.log(`${messageColors.warn('⚠ Warnings:')} ${validation.summary.sitesWithWarnings} sites have warnings`);
540
- }
541
- process.exit(0);
542
- } else {
543
- console.log(`${messageColors.error('❌ Configuration validation failed!')}`);
544
- console.log(`${messageColors.error('Errors:')} ${validation.globalErrors.length} global, ${validation.summary.sitesWithErrors} site-specific`);
545
- process.exit(1);
546
- }
547
- } catch (validationErr) {
548
- console.error(`❌ Validation failed: ${validationErr.message}`);
549
- process.exit(1);
550
- }
551
- }
487
+ // Note: --validate-config is handled further down, AFTER the config file is
488
+ // loaded and `config`/`sites` are populated. Running it here would fail with
489
+ // "Cannot access 'config' before initialization" since those are declared
490
+ // later in the module.
552
491
 
553
492
  if (validateRules || validateRulesFile) {
554
493
  const filesToValidate = validateRulesFile ? [validateRulesFile] : [outputFile, compareFile].filter(Boolean);
@@ -594,6 +533,22 @@ if (validateRules || validateRulesFile) {
594
533
  }
595
534
  }
596
535
 
536
+ // Parse --adblock-engine=<js|rust> (default: js). Selects the matcher backend
537
+ // used by --block-ads. The rust engine requires the optional adblock-rs package.
538
+ const adblockEngineIndex = args.findIndex(arg => arg.startsWith('--adblock-engine'));
539
+ let adblockEngineName = 'js';
540
+ if (adblockEngineIndex !== -1) {
541
+ const engineArg = args[adblockEngineIndex].includes('=')
542
+ ? args[adblockEngineIndex].split('=')[1]
543
+ : args[adblockEngineIndex + 1];
544
+ if (engineArg === 'rust' || engineArg === 'js') {
545
+ adblockEngineName = engineArg;
546
+ } else {
547
+ console.log(`Error: --adblock-engine must be 'js' or 'rust' (got: ${engineArg})`);
548
+ process.exit(1);
549
+ }
550
+ }
551
+
597
552
  // Parse --block-ads argument for request-level ad blocking (supports comma-separated lists)
598
553
  const blockAdsIndex = args.findIndex(arg => arg.startsWith('--block-ads'));
599
554
  if (blockAdsIndex !== -1) {
@@ -614,18 +569,31 @@ if (blockAdsIndex !== -1) {
614
569
  }
615
570
  }
616
571
 
617
- // Concatenate multiple lists into a single temp file for the parser
618
- let rulesFile = rulesFiles[0];
619
- if (rulesFiles.length > 1) {
620
- rulesFile = path.join(os.tmpdir(), `nwss-adblock-combined-${Date.now()}.txt`);
621
- const combined = rulesFiles.map(f => fs.readFileSync(f, 'utf-8')).join('\n');
622
- fs.writeFileSync(rulesFile, combined);
623
- }
624
-
625
572
  adblockEnabled = true;
626
- adblockMatcher = parseAdblockRules(rulesFile, { enableLogging: forceDebug });
573
+ const engine = adblockEngineName === 'rust' ? adblockRust : adblockJs;
574
+ try {
575
+ if (engine === adblockRust) {
576
+ // Rust wrapper accepts an array directly — no temp file needed.
577
+ adblockMatcher = engine.parseAdblockRules(rulesFiles, { enableLogging: forceDebug });
578
+ } else {
579
+ // JS engine takes a single path; concat to a temp file when multiple lists.
580
+ let rulesFile = rulesFiles[0];
581
+ if (rulesFiles.length > 1) {
582
+ rulesFile = path.join(os.tmpdir(), `nwss-adblock-combined-${Date.now()}.txt`);
583
+ const combined = rulesFiles.map(f => fs.readFileSync(f, 'utf-8')).join('\n');
584
+ fs.writeFileSync(rulesFile, combined);
585
+ }
586
+ adblockMatcher = engine.parseAdblockRules(rulesFile, { enableLogging: forceDebug });
587
+ }
588
+ } catch (err) {
589
+ console.log(`Error: Failed to load adblock engine '${adblockEngineName}': ${err.message}`);
590
+ process.exit(1);
591
+ }
627
592
  const stats = adblockMatcher.getStats();
628
- if (!silentMode) console.log(messageColors.success(`Adblock enabled: Loaded ${stats.total} blocking rules from ${rulesFiles.length} list${rulesFiles.length > 1 ? 's' : ''}`));
593
+ const ruleDesc = stats.total != null
594
+ ? `${stats.total} blocking rules`
595
+ : `compiled engine (cached)`;
596
+ if (!silentMode) console.log(messageColors.success(`Adblock enabled (${adblockEngineName}): Loaded ${ruleDesc} from ${rulesFiles.length} list${rulesFiles.length > 1 ? 's' : ''}`));
629
597
  }
630
598
 
631
599
  if (args.includes('--help') || args.includes('-h')) {
@@ -651,6 +619,9 @@ Output Format Options:
651
619
  Request Blocking:
652
620
  --block-ads=<file> Block ads/trackers using EasyList format rules (||domain.com^, /ads/*, etc)
653
621
  Works at request-level for maximum performance
622
+ Supports comma-separated lists: --block-ads=easylist.txt,easyprivacy.txt
623
+ --adblock-engine=<js|rust> Matcher backend for --block-ads (default: js)
624
+ 'rust' uses Brave's adblock-rs (faster on large lists; needs: npm i adblock-rs)
654
625
 
655
626
  Per-config settings file (.nwssconfig):
656
627
  Place a .nwssconfig file in the project root to define per-config settings.
@@ -876,10 +847,86 @@ const {
876
847
  disable_ad_tagging = true,
877
848
  max_concurrent_sites = 6,
878
849
  resource_cleanup_interval = 80,
879
- comments: globalComments,
880
- ...otherGlobalConfig
850
+ comments: globalComments,
851
+ ...otherGlobalConfig
881
852
  } = config;
882
853
 
854
+ // --validate-config runs here, after `config` and `sites` are populated.
855
+ // Previously this block lived above the config load and triggered a TDZ
856
+ // "Cannot access 'config' before initialization" error.
857
+ if (validateConfig) {
858
+ console.log(`\n${messageColors.processing('Validating configuration file...')}`);
859
+ try {
860
+ const validation = validateFullConfig(config, { forceDebug, silentMode });
861
+
862
+ // Validate referrer_headers format
863
+ for (const site of sites) {
864
+ if (site.referrer_headers && typeof site.referrer_headers === 'object' && !Array.isArray(site.referrer_headers)) {
865
+ const refValidation = validateReferrerConfig(site.referrer_headers);
866
+ if (!refValidation.isValid) {
867
+ console.warn(`⚠ Invalid referrer_headers configuration: ${refValidation.errors.join(', ')}`);
868
+ }
869
+ if (refValidation.warnings.length > 0) {
870
+ console.warn(`⚠ Referrer warnings: ${refValidation.warnings.join(', ')}`);
871
+ }
872
+ }
873
+ // Validate referrer_disable format
874
+ if (site.referrer_disable) {
875
+ const disableValidation = validateReferrerDisable(site.referrer_disable);
876
+ if (!disableValidation.isValid) {
877
+ console.warn(`⚠ Invalid referrer_disable configuration: ${disableValidation.errors.join(', ')}`);
878
+ }
879
+ if (disableValidation.warnings.length > 0) {
880
+ console.warn(`⚠ Referrer disable warnings: ${disableValidation.warnings.join(', ')}`);
881
+ }
882
+ }
883
+ }
884
+
885
+ // Validate VPN configurations
886
+ for (const site of sites) {
887
+ if (site.vpn) {
888
+ const vpnNorm = normalizeVpnConfig(site.vpn);
889
+ const vpnValidation = validateVpnConfig(vpnNorm);
890
+ if (!vpnValidation.isValid) {
891
+ console.warn(`⚠ Invalid vpn configuration for ${site.url}: ${vpnValidation.errors.join(', ')}`);
892
+ }
893
+ if (vpnValidation.warnings.length > 0) {
894
+ vpnValidation.warnings.forEach(w => console.warn(`⚠ VPN warning for ${site.url}: ${w}`));
895
+ }
896
+ }
897
+ if (site.openvpn) {
898
+ const ovpnNorm = normalizeOvpnConfig(site.openvpn);
899
+ const ovpnValidation = validateOvpnConfig(ovpnNorm);
900
+ if (!ovpnValidation.isValid) {
901
+ console.warn(`⚠ Invalid openvpn configuration for ${site.url}: ${ovpnValidation.errors.join(', ')}`);
902
+ }
903
+ if (ovpnValidation.warnings.length > 0) {
904
+ ovpnValidation.warnings.forEach(w => console.warn(`⚠ OpenVPN warning for ${site.url}: ${w}`));
905
+ }
906
+ }
907
+ if (site.vpn && site.openvpn) {
908
+ console.warn(`⚠ ${site.url} has both vpn and openvpn configured — only one will be used (vpn takes precedence)`);
909
+ }
910
+ }
911
+
912
+ if (validation.isValid) {
913
+ console.log(`${messageColors.success('✅ Configuration is valid!')}`);
914
+ console.log(`${messageColors.info('Summary:')} ${validation.summary.validSites}/${validation.summary.totalSites} sites valid`);
915
+ if (validation.summary.sitesWithWarnings > 0) {
916
+ console.log(`${messageColors.warn('⚠ Warnings:')} ${validation.summary.sitesWithWarnings} sites have warnings`);
917
+ }
918
+ process.exit(0);
919
+ } else {
920
+ console.log(`${messageColors.error('❌ Configuration validation failed!')}`);
921
+ console.log(`${messageColors.error('Errors:')} ${validation.globalErrors.length} global, ${validation.summary.sitesWithErrors} site-specific`);
922
+ process.exit(1);
923
+ }
924
+ } catch (validationErr) {
925
+ console.error(`❌ Validation failed: ${validationErr.message}`);
926
+ process.exit(1);
927
+ }
928
+ }
929
+
883
930
  // Pre-compile global blocked regexes ONCE (used in every processUrl call)
884
931
  const globalBlockedRegexes = Array.isArray(globalBlocked)
885
932
  ? globalBlocked.map(pattern => new RegExp(pattern))
@@ -3392,7 +3439,7 @@ function setupFrameHandling(page, forceDebug) {
3392
3439
  }
3393
3440
  }
3394
3441
 
3395
- const { finalUrl, redirected, redirectChain, originalUrl, redirectDomains } = navigationResult;
3442
+ const { finalUrl, redirected, redirectChain, originalUrl, redirectDomains, httpStatus, cfRay } = navigationResult;
3396
3443
 
3397
3444
  // Check for same-page reload loops BEFORE redirect processing
3398
3445
  const loadCount = pageLoadHistory.get(currentUrl) || 0;
@@ -3501,8 +3548,14 @@ function setupFrameHandling(page, forceDebug) {
3501
3548
  }
3502
3549
  }
3503
3550
 
3504
- // Handle all Cloudflare protections using the enhanced module
3505
- const cloudflareResult = await handleCloudflareProtection(page, currentUrl, siteConfig, forceDebug);
3551
+ // Handle all Cloudflare protections using the enhanced module. Pass
3552
+ // httpStatus and cfRay captured at goto time so the outcome log can
3553
+ // surface them — Puppeteer's response object is only available
3554
+ // immediately after page.goto, so handleCloudflareProtection can't
3555
+ // recover them from `page` alone.
3556
+ const cloudflareResult = await handleCloudflareProtection(page, currentUrl, siteConfig, forceDebug, { httpStatus, cfRay });
3557
+
3558
+ if (cloudflareResult.cloudflareErrorPage) cloudflareScanStats.errorPages++;
3506
3559
 
3507
3560
  // Check if Cloudflare handling exceeded max retries and should terminate processing
3508
3561
  if (!cloudflareResult.overallSuccess &&
@@ -3535,7 +3588,10 @@ function setupFrameHandling(page, forceDebug) {
3535
3588
  console.warn(` - ${error}`);
3536
3589
  });
3537
3590
  // Continue with scan despite Cloudflare issues
3538
- } else if (cloudflareResult.verificationChallenge?.success && forceDebug) {
3591
+ } else if (cloudflareResult.verificationChallenge?.attempted && cloudflareResult.verificationChallenge?.success && forceDebug) {
3592
+ // Require attempted === true so we don't log "Challenge solved using:
3593
+ // undefined" for pages that had no challenge to solve (success: true
3594
+ // is the natural state for that case).
3539
3595
  console.log(formatLogMessage('debug', `[cloudflare] Challenge solved using: ${cloudflareResult.verificationChallenge.method}`));
3540
3596
  }
3541
3597
 
@@ -4266,27 +4322,39 @@ function setupFrameHandling(page, forceDebug) {
4266
4322
  let forceRestartFlag = false; // Flag to trigger restart on next iteration
4267
4323
 
4268
4324
  const hangDetectionInterval = setInterval(() => {
4325
+ // Progress check, counter, and forceRestartFlag MUST run regardless of
4326
+ // debug mode — previously the entire body was gated on forceDebug, which
4327
+ // made hang recovery a debug-only feature even though the restart
4328
+ // machinery exists for production scans. Only the verbose diagnostic
4329
+ // logs stay debug-gated; the "no progress" warning and the
4330
+ // "triggering restart" error are user-visible recovery events.
4331
+ if (processedUrlCount === lastProcessedCount) {
4332
+ hangCheckCount++;
4333
+ if (forceDebug) {
4334
+ console.log(formatLogMessage('warn', `[HANG CHECK] No progress for ${hangCheckCount * 30}s`));
4335
+ }
4336
+ if (hangCheckCount >= 5) {
4337
+ console.log(formatLogMessage('error', `[HANG CHECK] Hung for 2.5 minutes. Triggering emergency browser restart.`));
4338
+ forceRestartFlag = true; // Set flag instead of exiting
4339
+ hangCheckCount = 0; // Reset counter for next cycle
4340
+ }
4341
+ } else {
4342
+ hangCheckCount = 0;
4343
+ }
4344
+ lastProcessedCount = processedUrlCount;
4345
+
4346
+ // Debug-only diagnostic snapshot
4269
4347
  if (forceDebug) {
4270
4348
  const currentBatch = Math.floor(currentBatchInfo.batchStart / RESOURCE_CLEANUP_INTERVAL) + 1;
4271
4349
  const totalBatches = Math.ceil(totalUrls / RESOURCE_CLEANUP_INTERVAL);
4272
4350
  console.log(formatLogMessage('debug', `[HANG CHECK] Processed: ${processedUrlCount}/${totalUrls} URLs, Batch: ${currentBatch}/${totalBatches}, Current batch size: ${currentBatchInfo.batchSize}`));
4273
4351
  console.log(formatLogMessage('debug', `[HANG CHECK] URLs since cleanup: ${urlsSinceLastCleanup}, Recent failures: ${results.slice(-3).filter(r => !r.success).length}/3`));
4274
-
4275
- // Check progress and trigger browser restart if hung
4276
- if (processedUrlCount === lastProcessedCount) {
4277
- hangCheckCount++;
4278
- console.log(formatLogMessage('warn', `[HANG CHECK] No progress for ${hangCheckCount * 30}s`));
4279
- if (hangCheckCount >= 5) {
4280
- console.log(formatLogMessage('error', `[HANG CHECK] Hung for 2.5 minutes. Triggering emergency browser restart.`));
4281
- forceRestartFlag = true; // Set flag instead of exiting
4282
- hangCheckCount = 0; // Reset counter for next cycle
4283
- }
4284
- } else {
4285
- hangCheckCount = 0;
4286
- }
4287
- lastProcessedCount = processedUrlCount;
4288
4352
  }
4289
4353
  }, 30000);
4354
+ // Don't keep the event loop alive solely for the hang-check interval — the
4355
+ // clearInterval calls at the normal-exit and error paths already cover the
4356
+ // cleanup, this is belt-and-suspenders in case a future refactor moves them.
4357
+ hangDetectionInterval.unref();
4290
4358
 
4291
4359
  // Process URLs in batches with exception handling
4292
4360
  let siteGroupIndex = 0;
@@ -4354,8 +4422,14 @@ function setupFrameHandling(page, forceDebug) {
4354
4422
  !healthCheck.reason?.includes('Scheduled cleanup') &&
4355
4423
  (healthCheck.reason?.includes('Critical') || healthCheck.reason?.includes('disconnected'));
4356
4424
 
4357
- // Restart browser if we've processed enough URLs, health check suggests it, hang detected, and this isn't the last site
4358
- if ((wouldExceedLimit || shouldRestartFromHealth || forceRestartFlag || (hasHighFailureRate && recentResults.length >= 6)) && urlsSinceLastCleanup > 8 && isNotLastBatch) {
4425
+ // Restart conditions split into hang recovery vs proactive triggers.
4426
+ // Hang recovery (forceRestartFlag set by 2.5-min HANG CHECK or a per-URL
4427
+ // timeout) bypasses the urlsSinceLastCleanup > 8 gate — a confirmed hang
4428
+ // needs immediate restart even if we just cleaned up. Proactive triggers
4429
+ // keep the gate to prevent thrashing.
4430
+ const hangRecoveryRestart = forceRestartFlag;
4431
+ const proactiveRestart = (wouldExceedLimit || shouldRestartFromHealth || (hasHighFailureRate && recentResults.length >= 6)) && urlsSinceLastCleanup > 8;
4432
+ if ((hangRecoveryRestart || proactiveRestart) && isNotLastBatch) {
4359
4433
  let restartReason = 'Unknown';
4360
4434
  if (forceRestartFlag) {
4361
4435
  restartReason = 'Emergency restart due to 2.5-minute hang detection';
@@ -4484,16 +4558,58 @@ function setupFrameHandling(page, forceDebug) {
4484
4558
  console.log(formatLogMessage('debug', `[CONCURRENCY] Starting ${batchSize} concurrent tasks with limit ${MAX_CONCURRENT_SITES}`));
4485
4559
  }
4486
4560
 
4487
- // Create tasks with timeout protection — skip domains that repeatedly timed out
4488
- const batchTasks = currentBatch.map(task => originalLimit(() => {
4561
+ // Create tasks with timeout protection — skip domains that repeatedly timed out.
4562
+ // Wrapped in an outer try/finally so processedUrlCount is incremented exactly
4563
+ // once per URL no matter which return/throw path is taken — that turns HANG
4564
+ // CHECK's signal from "did the batch finish?" into "did any URL finish?",
4565
+ // which is what 30-second tick granularity actually needs.
4566
+ const batchTasks = currentBatch.map(task => originalLimit(async () => {
4489
4567
  try {
4490
- const taskDomain = new URL(task.url).hostname;
4491
- if ((domainTimeoutCounts.get(taskDomain) || 0) >= DOMAIN_TIMEOUT_THRESHOLD) {
4492
- if (!silentMode) console.log(formatLogMessage('info', `Skipping ${task.url} ${taskDomain} timed out ${DOMAIN_TIMEOUT_THRESHOLD} times`));
4493
- return { url: task.url, rules: [], success: false, error: 'Domain repeatedly timed out', skipped: true };
4568
+ // Short-circuit queued URLs once any URL in this batch has triggered a
4569
+ // restart. Without this, the 80-URL batch in the user's hang trace
4570
+ // would have to fail one-by-one at 120s each (~28 min total) before
4571
+ // the boundary restart could fire. Now: first hang fires the flag,
4572
+ // remaining queued URLs return immediately, batch completes, restart.
4573
+ if (forceRestartFlag) {
4574
+ return { url: task.url, rules: [], success: false, error: 'Browser restart pending', skipped: true };
4575
+ }
4576
+
4577
+ try {
4578
+ const taskDomain = new URL(task.url).hostname;
4579
+ if ((domainTimeoutCounts.get(taskDomain) || 0) >= DOMAIN_TIMEOUT_THRESHOLD) {
4580
+ if (!silentMode) console.log(formatLogMessage('info', `Skipping ${task.url} — ${taskDomain} timed out ${DOMAIN_TIMEOUT_THRESHOLD} times`));
4581
+ return { url: task.url, rules: [], success: false, error: 'Domain repeatedly timed out', skipped: true };
4582
+ }
4583
+ } catch {}
4584
+
4585
+ // Per-URL timeout so a single hung processUrl can't block the batch
4586
+ // forever. 120s is well past any legitimate slow page: Cloudflare
4587
+ // adaptive max ~25s, nettools overall ~65s, navigation 15s.
4588
+ const processUrlPromise = processUrl(task.url, task.config, browser);
4589
+ let perUrlTimer;
4590
+ try {
4591
+ return await Promise.race([
4592
+ processUrlPromise,
4593
+ new Promise((_, reject) => {
4594
+ perUrlTimer = setTimeout(() => reject(new Error('Per-URL timeout (120s)')), 120000);
4595
+ })
4596
+ ]);
4597
+ } catch (err) {
4598
+ if (err && err.message === 'Per-URL timeout (120s)') {
4599
+ processUrlPromise.catch(() => {});
4600
+ forceRestartFlag = true;
4601
+ return { url: task.url, rules: [], success: false, error: 'Per-URL timeout (120s)', needsImmediateRestart: true };
4602
+ }
4603
+ throw err;
4604
+ } finally {
4605
+ if (perUrlTimer) clearTimeout(perUrlTimer);
4494
4606
  }
4495
- } catch {}
4496
- return processUrl(task.url, task.config, browser);
4607
+ } finally {
4608
+ // Always count completion — even on unexpected throw — so HANG CHECK's
4609
+ // per-tick progress signal stays accurate. Replaces the old
4610
+ // `processedUrlCount += batchSize` that ran after the whole batch.
4611
+ processedUrlCount++;
4612
+ }
4497
4613
  }));
4498
4614
 
4499
4615
  let batchResults;
@@ -4595,7 +4711,8 @@ function setupFrameHandling(page, forceDebug) {
4595
4711
  }
4596
4712
  }
4597
4713
 
4598
- processedUrlCount += batchSize;
4714
+ // processedUrlCount is now incremented per-URL inside the batchTasks
4715
+ // wrapper above; no batch-level += batchSize here.
4599
4716
  urlsSinceLastCleanup += batchSize;
4600
4717
 
4601
4718
  // Force browser restart if any URL had critical errors
@@ -4776,12 +4893,40 @@ function setupFrameHandling(page, forceDebug) {
4776
4893
  console.log(formatLogMessage('debug', `Cache hit rate: ${cloudflareStats.hitRate}, Total hits: ${cloudflareStats.hits}, Misses: ${cloudflareStats.misses}`));
4777
4894
  console.log(formatLogMessage('debug', `Cached detections: ${cloudflareStats.size}`));
4778
4895
  }
4896
+ if (cloudflareScanStats.errorPages > 0) {
4897
+ console.log(formatLogMessage('debug', `Cloudflare 5xx origin-error pages: ${cloudflareScanStats.errorPages} (no bypass possible — origin unreachable)`));
4898
+ }
4779
4899
  // Log smart cache statistics (if cache is enabled)
4780
4900
  // Adblock statistics
4781
4901
  if (adblockEnabled) {
4782
4902
  console.log(formatLogMessage('debug', '=== Adblock Statistics ==='));
4783
4903
  const blockRate = ((adblockStats.blocked / (adblockStats.blocked + adblockStats.allowed)) * 100).toFixed(1);
4784
4904
  console.log(formatLogMessage('debug', `Blocked: ${adblockStats.blocked} requests (${blockRate}% block rate), Allowed: ${adblockStats.allowed}`));
4905
+
4906
+ // Engine-specific stats from the matcher itself. Both engines expose
4907
+ // getStats() but with slightly different cache shapes — JS engine
4908
+ // tracks urlCacheSize + resultCacheSize separately, rust wrapper
4909
+ // tracks a single size. Handle both.
4910
+ if (adblockMatcher && typeof adblockMatcher.getStats === 'function') {
4911
+ try {
4912
+ const es = adblockMatcher.getStats();
4913
+ const engine = es.engine || 'js';
4914
+ console.log(formatLogMessage('debug', `Engine: ${engine}${es.fromDiskCache ? ' (loaded from disk cache)' : ''}`));
4915
+ if (es.cache && (es.cache.hits != null || es.cache.misses != null)) {
4916
+ // rust wrapper: single `size`; JS engine: split into urlCacheSize + resultCacheSize
4917
+ const sizeDesc = es.cache.size != null
4918
+ ? `${es.cache.size}/${es.cache.maxSize}`
4919
+ : `url ${es.cache.urlCacheSize}, result ${es.cache.resultCacheSize}, cap ${es.cache.maxSize}`;
4920
+ console.log(formatLogMessage('debug', `Matcher cache: ${es.cache.hits} hits / ${es.cache.misses} misses (${es.cache.hitRate}), ${sizeDesc}`));
4921
+ }
4922
+ if (es.exceptions != null && es.exceptions > 0) {
4923
+ console.log(formatLogMessage('debug', `Whitelist exceptions: ${es.exceptions}`));
4924
+ }
4925
+ if (es.errors != null && es.errors > 0) {
4926
+ console.log(formatLogMessage('debug', `Engine errors: ${es.errors}`));
4927
+ }
4928
+ } catch (_) { /* getStats shape mismatch — don't crash the exit path */ }
4929
+ }
4785
4930
  }
4786
4931
  if (smartCache) {
4787
4932
  const cacheStats = smartCache.getStats();
@@ -4960,8 +5105,17 @@ function setupFrameHandling(page, forceDebug) {
4960
5105
  }
4961
5106
  }
4962
5107
 
5108
+ // Run the same cleanup the SIGINT/SIGTERM emergency handler does, so normal
5109
+ // scan completion isn't left depending on process.exit(0) to override
5110
+ // lingering setInterval handles (the cloudflare detection cache schedules
5111
+ // one that's otherwise only stopped on signal-driven shutdown).
5112
+ try { cleanupCloudflareCache(); } catch (_) {}
5113
+ try { wgDisconnectAll(forceDebug); } catch (_) {}
5114
+ try { ovpnDisconnectAll(forceDebug); } catch (_) {}
5115
+ try { purgeStaleTrackers(); } catch (_) {}
5116
+
4963
5117
  // Clean process termination
4964
5118
  if (forceDebug) console.log(formatLogMessage('debug', `About to exit process...`));
4965
5119
  process.exit(0);
4966
-
5120
+
4967
5121
  })();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fanboynz/network-scanner",
3
- "version": "2.0.63",
3
+ "version": "2.0.65",
4
4
  "description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
5
5
  "main": "nwss.js",
6
6
  "scripts": {
@@ -11,8 +11,8 @@
11
11
  },
12
12
  "dependencies": {
13
13
  "ghost-cursor": "^1.4.2",
14
- "lru-cache": "^10.4.3",
15
- "p-limit": "^4.0.0",
14
+ "lru-cache": "^11.3.5",
15
+ "p-limit": "^7.3.0",
16
16
  "psl": "^1.15.0",
17
17
  "puppeteer": ">=20.0.0"
18
18
  },
@@ -36,7 +36,7 @@
36
36
  "author": "FanboyNZ",
37
37
  "license": "GPL-3.0",
38
38
  "engines": {
39
- "node": ">=20.0.0"
39
+ "node": ">=22.0.0"
40
40
  },
41
41
  "repository": {
42
42
  "type": "git",
@@ -50,10 +50,11 @@
50
50
  },
51
51
  "homepage": "https://github.com/ryanbr/network-scanner",
52
52
  "optionalDependencies": {
53
+ "adblock-rs": "^0.12.3",
53
54
  "puppeteer-core": ">=20.0.0"
54
55
  },
55
56
  "devDependencies": {
56
57
  "eslint": "^10.0.2",
57
- "globals": "^16.3.0"
58
+ "globals": "^17.6.0"
58
59
  }
59
60
  }