@fanboynz/network-scanner 1.0.80 → 1.0.81

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/nwss.js +136 -45
  2. package/package.json +1 -1
package/nwss.js CHANGED
@@ -1,4 +1,4 @@
1
- // === Network scanner script (nwss.js) v1.0.80 ===
1
+ // === Network scanner script (nwss.js) v1.0.81 ===
2
2
 
3
3
  // puppeteer for browser automation, fs for file system operations, psl for domain parsing.
4
4
  // const pLimit = require('p-limit'); // Will be dynamically imported
@@ -123,7 +123,7 @@ const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/r
123
123
  const { monitorBrowserHealth, isBrowserHealthy } = require('./lib/browserhealth');
124
124
 
125
125
  // --- Script Configuration & Constants ---
126
- const VERSION = '1.0.80'; // Script version
126
+ const VERSION = '1.0.81'; // Script version
127
127
 
128
128
  // get startTime
129
129
  const startTime = Date.now();
@@ -1348,10 +1348,7 @@ function setupFrameHandling(page, forceDebug) {
1348
1348
  }
1349
1349
 
1350
1350
  let siteCounter = 0;
1351
- const totalUrls = sites.reduce((sum, site) => {
1352
- const urls = Array.isArray(site.url) ? site.url.length : 1;
1353
- return sum + urls;
1354
- }, 0);
1351
+ // totalUrls now calculated from allTasks.length after URL flattening
1355
1352
 
1356
1353
  // --- Global CDP (Chrome DevTools Protocol) Session --- [COMMENT RE-ADDED PREVIOUSLY, relevant to old logic]
1357
1354
  // NOTE: This CDP session is attached to the initial browser page (e.g., about:blank).
@@ -1383,6 +1380,14 @@ function setupFrameHandling(page, forceDebug) {
1383
1380
  const siteLocalhostAlt = siteConfig.localhost_0_0_0_0 === true;
1384
1381
  const cloudflarePhishBypass = siteConfig.cloudflare_phish === true;
1385
1382
  const cloudflareBypass = siteConfig.cloudflare_bypass === true;
1383
+ // Add redirect and same-page loop protection
1384
+ const MAX_REDIRECT_DEPTH = siteConfig.max_redirects || 10;
1385
+ const redirectHistory = new Set();
1386
+ let redirectCount = 0;
1387
+ const pageLoadHistory = new Map(); // Track same-page reloads
1388
+ const MAX_SAME_PAGE_LOADS = 3;
1389
+ let currentPageUrl = currentUrl;
1390
+
1386
1391
  const sitePrivoxy = siteConfig.privoxy === true;
1387
1392
  const sitePihole = siteConfig.pihole === true;
1388
1393
  const flowproxyDetection = siteConfig.flowproxy_detection === true;
@@ -1564,6 +1569,31 @@ function setupFrameHandling(page, forceDebug) {
1564
1569
  }
1565
1570
  try {
1566
1571
  await page.evaluateOnNewDocument(() => {
1572
+ // Prevent infinite reload loops
1573
+ let reloadCount = 0;
1574
+ const MAX_RELOADS = 2;
1575
+ const originalReload = window.location.reload;
1576
+ const originalReplace = window.location.replace;
1577
+ const originalAssign = window.location.assign;
1578
+
1579
+ window.location.reload = function() {
1580
+ if (++reloadCount > MAX_RELOADS) {
1581
+ console.log('[loop-protection] Blocked excessive reload attempt');
1582
+ return;
1583
+ }
1584
+ return originalReload.apply(this, arguments);
1585
+ };
1586
+
1587
+ // Also protect against location.replace/assign to same URL
1588
+ const currentHref = window.location.href;
1589
+ window.location.replace = function(url) {
1590
+ if (url === currentHref && ++reloadCount > MAX_RELOADS) {
1591
+ console.log('[loop-protection] Blocked same-page replace attempt');
1592
+ return;
1593
+ }
1594
+ return originalReplace.apply(this, arguments);
1595
+ };
1596
+
1567
1597
  // This script intercepts and logs Fetch and XHR requests
1568
1598
  // from within the page context at the earliest possible moment.
1569
1599
  const originalFetch = window.fetch;
@@ -2542,11 +2572,44 @@ function setupFrameHandling(page, forceDebug) {
2542
2572
 
2543
2573
  const { finalUrl, redirected, redirectChain, originalUrl, redirectDomains } = navigationResult;
2544
2574
 
2575
+ // Check for same-page reload loops BEFORE redirect processing
2576
+ const loadCount = pageLoadHistory.get(currentUrl) || 0;
2577
+ pageLoadHistory.set(currentUrl, loadCount + 1);
2578
+
2579
+ if (loadCount >= MAX_SAME_PAGE_LOADS) {
2580
+ const samePageError = `Same page loaded ${loadCount + 1} times: ${currentUrl}`;
2581
+ console.warn(`⚠ ${samePageError} - possible infinite reload loop`);
2582
+ throw new Error(`Same-page loop detected: ${samePageError}`);
2583
+ }
2584
+
2585
+ currentPageUrl = finalUrl || currentUrl;
2586
+
2545
2587
  // Handle redirect to new domain
2546
2588
  if (redirected) {
2547
2589
  const originalDomain = safeGetDomain(originalUrl);
2548
2590
  const finalDomain = safeGetDomain(finalUrl);
2549
2591
 
2592
+ // Increment redirect counter
2593
+ redirectCount++;
2594
+
2595
+ // Check for redirect loops
2596
+ if (redirectHistory.has(finalUrl)) {
2597
+ const loopError = `Redirect loop detected: ${finalUrl} already visited in chain`;
2598
+ console.warn(`⚠ ${loopError} for ${currentUrl}`);
2599
+ throw new Error(loopError);
2600
+ }
2601
+
2602
+ // Check redirect depth
2603
+ if (redirectCount > MAX_REDIRECT_DEPTH) {
2604
+ const depthError = `Maximum redirect depth (${MAX_REDIRECT_DEPTH}) exceeded`;
2605
+ console.warn(`⚠ ${depthError} for ${currentUrl}`);
2606
+ throw new Error(`${depthError}: ${redirectCount} redirects`);
2607
+ }
2608
+
2609
+ // Add URLs to history
2610
+ redirectHistory.add(currentUrl);
2611
+ redirectHistory.add(finalUrl);
2612
+
2550
2613
  // Add redirect destination to first-party domains immediately
2551
2614
  if (finalDomain) {
2552
2615
  firstPartyDomains.add(finalDomain);
@@ -2917,57 +2980,72 @@ function setupFrameHandling(page, forceDebug) {
2917
2980
  // Temporarily store the pLimit function
2918
2981
  const originalLimit = limit;
2919
2982
 
2920
- // Group URLs by site to respect site boundaries during cleanup
2921
- const siteGroups = [];
2922
- let currentUrlCount = 0;
2923
-
2983
+ // Create a flat list of all URL tasks with their site configs for true concurrency
2984
+ const allTasks = [];
2924
2985
  for (const site of sites) {
2925
-
2926
2986
  const urlsToProcess = Array.isArray(site.url) ? site.url : [site.url];
2927
- siteGroups.push({
2928
- config: site,
2929
- urls: urlsToProcess
2987
+ urlsToProcess.forEach(url => {
2988
+ allTasks.push({
2989
+ url,
2990
+ config: site,
2991
+ taskId: allTasks.length // For tracking
2992
+ });
2930
2993
  });
2931
- currentUrlCount += urlsToProcess.length;
2932
- }
2933
- if (!silentMode && currentUrlCount > 0) {
2934
- console.log(`\n${messageColors.processing('Processing')} ${currentUrlCount} URLs across ${siteGroups.length} sites with concurrency ${MAX_CONCURRENT_SITES}...`);
2935
- if (currentUrlCount > RESOURCE_CLEANUP_INTERVAL) {
2936
- console.log(messageColors.processing('Browser will restart every') + ` ~${RESOURCE_CLEANUP_INTERVAL} URLs to free resources`);
2937
- }
2938
2994
  }
2995
+
2996
+ const totalUrls = allTasks.length;
2939
2997
 
2940
2998
  let results = [];
2941
2999
  let processedUrlCount = 0;
2942
3000
  let urlsSinceLastCleanup = 0;
2943
3001
 
2944
- // Process sites one by one, but restart browser when hitting URL limits
2945
- for (let siteIndex = 0; siteIndex < siteGroups.length; siteIndex++) {
2946
- const siteGroup = siteGroups[siteIndex];
3002
+ if (!silentMode && totalUrls > 0) {
3003
+ console.log(`\n${messageColors.processing('Processing')} ${totalUrls} URLs with TRUE concurrency ${MAX_CONCURRENT_SITES}...`);
3004
+ if (totalUrls > RESOURCE_CLEANUP_INTERVAL) {
3005
+ console.log(messageColors.processing('Browser will restart every') + ` ~${RESOURCE_CLEANUP_INTERVAL} URLs to free resources`);
3006
+ }
3007
+ }
3008
+
3009
+ // Hang detection for debugging concurrency issues
3010
+ let currentBatchInfo = { batchStart: 0, batchSize: 0 };
3011
+ const hangDetectionInterval = setInterval(() => {
3012
+ const currentBatch = Math.floor(currentBatchInfo.batchStart / RESOURCE_CLEANUP_INTERVAL) + 1;
3013
+ const totalBatches = Math.ceil(totalUrls / RESOURCE_CLEANUP_INTERVAL);
3014
+ console.log(formatLogMessage('debug', `[HANG CHECK] Processed: ${processedUrlCount}/${totalUrls} URLs, Batch: ${currentBatch}/${totalBatches}, Current batch size: ${currentBatchInfo.batchSize}`));
3015
+ console.log(formatLogMessage('debug', `[HANG CHECK] URLs since cleanup: ${urlsSinceLastCleanup}, Recent failures: ${results.slice(-3).filter(r => !r.success).length}/3`));
3016
+ }, 30000); // Check every 30 seconds
3017
+
3018
+ // Process URLs in batches to maintain concurrency while allowing browser restarts
3019
+ for (let batchStart = 0; batchStart < totalUrls; batchStart += RESOURCE_CLEANUP_INTERVAL) {
3020
+ const batchEnd = Math.min(batchStart + RESOURCE_CLEANUP_INTERVAL, totalUrls);
3021
+ const currentBatch = allTasks.slice(batchStart, batchEnd);
2947
3022
 
2948
3023
  // Check browser health before processing each site
2949
3024
  const healthCheck = await monitorBrowserHealth(browser, {}, {
2950
- siteIndex,
2951
- totalSites: siteGroups.length,
3025
+ siteIndex: Math.floor(batchStart / RESOURCE_CLEANUP_INTERVAL),
3026
+ totalSites: Math.ceil(totalUrls / RESOURCE_CLEANUP_INTERVAL),
2952
3027
  urlsSinceCleanup: urlsSinceLastCleanup,
2953
3028
  cleanupInterval: RESOURCE_CLEANUP_INTERVAL,
2954
3029
  forceDebug,
2955
3030
  silentMode
2956
3031
  });
2957
3032
 
2958
- // Also check if browser was unhealthy during recent processing
3033
+ // Check if browser was unhealthy during recent processing
2959
3034
  const recentResults = results.slice(-3);
2960
3035
  const hasRecentFailures = recentResults.filter(r => !r.success).length >= 2;
2961
- const shouldRestartFromFailures = hasRecentFailures && urlsSinceLastCleanup > 3; // More aggressive restart
3036
+ const shouldRestartFromFailures = hasRecentFailures && urlsSinceLastCleanup > 3;
2962
3037
 
2963
- const siteUrlCount = siteGroup.urls.length;
3038
+ const batchSize = currentBatch.length;
2964
3039
 
3040
+ // Update hang detection info
3041
+ currentBatchInfo = { batchStart, batchSize };
3042
+
2965
3043
  // Check if processing this entire site would exceed cleanup interval OR health check suggests restart
2966
- const wouldExceedLimit = urlsSinceLastCleanup + siteUrlCount >= Math.min(RESOURCE_CLEANUP_INTERVAL, 100); // More frequent restarts
2967
- const isNotLastSite = siteIndex < siteGroups.length - 1;
3044
+ const wouldExceedLimit = urlsSinceLastCleanup + batchSize >= Math.min(RESOURCE_CLEANUP_INTERVAL, 100);
3045
+ const isNotLastBatch = batchEnd < totalUrls;
2968
3046
 
2969
3047
  // Restart browser if we've processed enough URLs, health check suggests it, and this isn't the last site
2970
- if ((wouldExceedLimit || healthCheck.shouldRestart || shouldRestartFromFailures) && urlsSinceLastCleanup > 0 && isNotLastSite) {
3048
+ if ((wouldExceedLimit || healthCheck.shouldRestart || shouldRestartFromFailures) && urlsSinceLastCleanup > 0 && isNotLastBatch) {
2971
3049
 
2972
3050
  let restartReason = 'Unknown';
2973
3051
  if (healthCheck.shouldRestart) {
@@ -3023,7 +3101,7 @@ function setupFrameHandling(page, forceDebug) {
3023
3101
 
3024
3102
  // Create new browser for next batch
3025
3103
  browser = await createBrowser();
3026
- if (forceDebug) console.log(formatLogMessage('debug', `New browser instance created for site ${siteIndex + 1}`));
3104
+ if (forceDebug) console.log(formatLogMessage('debug', `New browser instance created for batch ${Math.floor(batchStart / RESOURCE_CLEANUP_INTERVAL) + 1}`));
3027
3105
 
3028
3106
  // Reset cleanup counter and add delay
3029
3107
  urlsSinceLastCleanup = 0;
@@ -3031,19 +3109,29 @@ function setupFrameHandling(page, forceDebug) {
3031
3109
  }
3032
3110
 
3033
3111
  if (forceDebug) {
3034
- console.log(formatLogMessage('debug', `Processing site ${siteIndex + 1}/${siteGroups.length}: ${siteUrlCount} URL(s) (total processed: ${processedUrlCount})`));
3112
+ console.log(formatLogMessage('debug', `Processing batch ${Math.floor(batchStart / RESOURCE_CLEANUP_INTERVAL) + 1}: ${batchSize} URL(s) (total processed: ${processedUrlCount})`));
3113
+ }
3114
+
3115
+ // Log start of concurrent processing for hang detection
3116
+ if (forceDebug) {
3117
+ console.log(formatLogMessage('debug', `[CONCURRENCY] Starting ${batchSize} concurrent tasks with limit ${MAX_CONCURRENT_SITES}`));
3035
3118
  }
3036
3119
 
3037
- // Create tasks with current browser instance and process them
3038
- const siteTasks = siteGroup.urls.map(url => originalLimit(() => processUrl(url, siteGroup.config, browser)));
3039
- const siteResults = await Promise.all(siteTasks);
3120
+ // Create tasks with current browser instance and process them with TRUE concurrency
3121
+ const batchTasks = currentBatch.map(task => originalLimit(() => processUrl(task.url, task.config, browser)));
3122
+ const batchResults = await Promise.all(batchTasks);
3040
3123
 
3041
3124
  // Check if any results indicate immediate restart is needed
3042
- const needsImmediateRestart = siteResults.some(r => r.needsImmediateRestart);
3125
+ const needsImmediateRestart = batchResults.some(r => r.needsImmediateRestart);
3126
+
3127
+ // Log completion of concurrent processing
3128
+ if (forceDebug) {
3129
+ console.log(formatLogMessage('debug', `[CONCURRENCY] Completed ${batchSize} concurrent tasks, ${batchResults.filter(r => r.success).length} successful`));
3130
+ }
3043
3131
 
3044
3132
  // Enhanced error reporting for Puppeteer 23.x
3045
3133
  if (forceDebug) {
3046
- const errorSummary = siteResults.reduce((acc, result) => {
3134
+ const errorSummary = batchResults.reduce((acc, result) => {
3047
3135
  if (!result.success && result.errorType) {
3048
3136
  acc[result.errorType] = (acc[result.errorType] || 0) + 1;
3049
3137
  }
@@ -3051,20 +3139,20 @@ function setupFrameHandling(page, forceDebug) {
3051
3139
  }, {});
3052
3140
 
3053
3141
  if (Object.keys(errorSummary).length > 0) {
3054
- console.log(formatLogMessage('debug', `Site ${siteIndex + 1} error summary:`));
3142
+ console.log(formatLogMessage('debug', `Batch ${Math.floor(batchStart / RESOURCE_CLEANUP_INTERVAL) + 1} error summary:`));
3055
3143
  Object.entries(errorSummary).forEach(([errorType, count]) => {
3056
3144
  console.log(formatLogMessage('debug', ` ${errorType}: ${count} error(s)`));
3057
3145
  });
3058
3146
  }
3059
3147
  }
3060
3148
 
3061
- results.push(...siteResults);
3149
+ results.push(...batchResults);
3062
3150
 
3063
- processedUrlCount += siteUrlCount;
3064
- urlsSinceLastCleanup += siteUrlCount;
3151
+ processedUrlCount += batchSize;
3152
+ urlsSinceLastCleanup += batchSize;
3065
3153
 
3066
3154
  // Force browser restart if any URL had critical errors
3067
- if (needsImmediateRestart && siteIndex < siteGroups.length - 1) {
3155
+ if (needsImmediateRestart && isNotLastBatch) {
3068
3156
  if (!silentMode) {
3069
3157
  console.log(`\n${messageColors.fileOp('🔄 Emergency browser restart:')} Critical browser errors detected`);
3070
3158
  }
@@ -3084,7 +3172,7 @@ function setupFrameHandling(page, forceDebug) {
3084
3172
  try {
3085
3173
  // Enhanced emergency restart for Puppeteer 23.x
3086
3174
  if (forceDebug) {
3087
- console.log(formatLogMessage('debug', `Emergency restart triggered by errors: ${siteResults.filter(r => r.needsImmediateRestart).map(r => r.error).join(', ')}`));
3175
+ console.log(formatLogMessage('debug', `Emergency restart triggered by errors: ${batchResults.filter(r => r.needsImmediateRestart).map(r => r.error).join(', ')}`));
3088
3176
  }
3089
3177
 
3090
3178
  // Try to gracefully close all pages first
@@ -3113,6 +3201,9 @@ function setupFrameHandling(page, forceDebug) {
3113
3201
  }
3114
3202
  }
3115
3203
 
3204
+ // Clear hang detection interval
3205
+ clearInterval(hangDetectionInterval);
3206
+
3116
3207
  // === POST-SCAN PROCESSING ===
3117
3208
  // Clean up first-party domains and validate results
3118
3209
  if (!dryRunMode) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fanboynz/network-scanner",
3
- "version": "1.0.80",
3
+ "version": "1.0.81",
4
4
  "description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
5
5
  "main": "nwss.js",
6
6
  "scripts": {