@fanboynz/network-scanner 1.0.93 → 1.0.95

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +20 -1
  2. package/nwss.js +98 -13
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -73,10 +73,13 @@ A Puppeteer-based tool for scanning websites to find third-party (or optionally
73
73
 
74
74
  | Argument | Description |
75
75
  |:---------------------------|:------------|
76
+ | `--cache-requests` | Cache HTTP requests to avoid re-requesting same URLs within scan |
76
77
  | `--validate-config` | Validate config.json file and exit |
77
78
  | `--validate-rules [file]` | Validate rule file format (uses --output/--compare files if no file specified) |
78
79
  | `--clean-rules [file]` | Clean rule files by removing invalid lines and optionally duplicates (uses --output/--compare files if no file specified) |
79
80
  | `--test-validation` | Run domain validation tests and exit |
81
+ | `--clear-cache` | Clear persistent cache before scanning (improves fresh start performance) |
82
+ | `--ignore-cache` | Bypass all smart caching functionality during scanning |
80
83
 
81
84
  ---
82
85
 
@@ -133,6 +136,7 @@ Example:
133
136
  | `url` | String or Array | - | Website URL(s) to scan |
134
137
  | `userAgent` | `chrome`, `firefox`, `safari` | - | User agent for page (latest versions: Chrome 131, Firefox 133, Safari 18.2) |
135
138
  | `filterRegex` | String or Array | `.*` | Regex or list of regexes to match requests |
139
+ | `regex_and` | Boolean | `false` | Use AND logic for multiple filterRegex patterns - ALL patterns must match the same URL |
136
140
  | `comments` | String or Array | - | String of comments or references |
137
141
  | `resourceTypes` | Array | `["script", "xhr", "image", "stylesheet"]` | What resource types to monitor |
138
142
  | `reload` | Integer | `1` | Number of times to reload page |
@@ -146,6 +150,8 @@ Example:
146
150
  | `subDomains` | `0` or `1` | `0` | 1 = preserve subdomains in output |
147
151
  | `blocked` | Array | - | Domains or regexes to block during scanning |
148
152
  | `even_blocked` | Boolean | `false` | Add matching rules even if requests are blocked |
153
+ | `bypass_cache` | Boolean | `false` | Skip all caching for this site's URLs |
154
+
149
155
 
150
156
  ### Redirect Handling Options
151
157
 
@@ -190,6 +196,10 @@ When a page redirects to a new domain, first-party/third-party detection is base
190
196
  |:---------------------|:-------|:-------:|:------------|
191
197
  | `cloudflare_phish` | Boolean | `false` | Auto-click through Cloudflare phishing warnings |
192
198
  | `cloudflare_bypass` | Boolean | `false` | Auto-solve Cloudflare "Verify you are human" challenges |
199
+ | `cloudflare_parallel_detection` | Boolean | `true` | Use parallel detection for faster Cloudflare checks |
200
+ | `cloudflare_max_retries` | Integer | `3` | Maximum retry attempts for Cloudflare operations |
201
+ | `cloudflare_cache_ttl` | Milliseconds | `300000` | TTL for Cloudflare detection cache (5 minutes) |
202
+ | `cloudflare_retry_on_error` | Boolean | `true` | Enable retry logic for Cloudflare operations |
193
203
  | `flowproxy_detection` | Boolean | `false` | Enable flowProxy protection detection and handling |
194
204
  | `flowproxy_page_timeout` | Milliseconds | `45000` | Page timeout for flowProxy sites |
195
205
  | `flowproxy_nav_timeout` | Milliseconds | `45000` | Navigation timeout for flowProxy sites |
@@ -240,6 +250,11 @@ When a page redirects to a new domain, first-party/third-party detection is base
240
250
  | `screenshot` | Boolean | `false` | Capture screenshot on load failure |
241
251
  | `headful` | Boolean | `false` | Launch browser with GUI for this site |
242
252
  | `adblock_rules` | Boolean | `false` | Generate adblock filter rules with resource types for this site |
253
+ | `interact_duration` | Milliseconds | `2000` | Duration of interaction simulation |
254
+ | `interact_scrolling` | Boolean | `true` | Enable scrolling simulation |
255
+ | `interact_clicks` | Boolean | `false` | Enable element clicking simulation |
256
+ | `interact_typing` | Boolean | `false` | Enable typing simulation |
257
+ | `interact_intensity` | String | `"medium"` | Interaction simulation intensity: "low", "medium", "high" |
243
258
 
244
259
  ### Global Configuration Options
245
260
 
@@ -254,7 +269,11 @@ These options go at the root level of your config.json:
254
269
  | `ignore_similar_threshold` | Integer | `80` | Similarity threshold percentage for ignore_similar |
255
270
  | `ignore_similar_ignored_domains` | Boolean | `true` | Ignore domains similar to ignoreDomains list |
256
271
  | `max_concurrent_sites` | Integer | `6` | Maximum concurrent site processing (1-50) |
257
- | `resource_cleanup_interval` | Integer | `180` | Browser restart interval in URLs processed (1-1000) |
272
+ | `resource_cleanup_interval` | Integer | `80` | Browser restart interval in URLs processed (1-1000) |
273
+ | `cache_path` | String | `".cache"` | Directory path for persistent cache storage |
274
+ | `cache_max_size` | Integer | `5000` | Maximum number of entries in cache |
275
+ | `cache_autosave_minutes` | Integer | `1` | Interval for automatic cache saves (minutes) |
276
+ | `cache_requests` | Boolean | `false` | Enable HTTP request response caching |
258
277
 
259
278
  ---
260
279
 
package/nwss.js CHANGED
@@ -1,4 +1,4 @@
1
- // === Network scanner script (nwss.js) v1.0.91 ===
1
+ // === Network scanner script (nwss.js) v1.0.95 ===
2
2
 
3
3
  // puppeteer for browser automation, fs for file system operations, psl for domain parsing.
4
4
  // const pLimit = require('p-limit'); // Will be dynamically imported
@@ -125,7 +125,7 @@ const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/r
125
125
  const { monitorBrowserHealth, isBrowserHealthy, isQuicklyResponsive } = require('./lib/browserhealth');
126
126
 
127
127
  // --- Script Configuration & Constants ---
128
- const VERSION = '1.0.91'; // Script version
128
+ const VERSION = '1.0.95'; // Script version
129
129
 
130
130
  // get startTime
131
131
  const startTime = Date.now();
@@ -1584,8 +1584,16 @@ function setupFrameHandling(page, forceDebug) {
1584
1584
  // Strategy 2: Try injection with reduced complexity if browser is responsive
1585
1585
  if (browserResponsive) {
1586
1586
  try {
1587
+ // Add comprehensive timeout protection for evaluateOnNewDocument
1587
1588
  await Promise.race([
1589
+ // Main injection with all safety checks
1588
1590
  page.evaluateOnNewDocument(() => {
1591
+ // Wrap everything in try-catch to prevent page crashes
1592
+ try {
1593
+ // Add timeout check within the injection
1594
+ const injectionTimeout = setTimeout(() => {
1595
+ console.log('[evalOnDoc] Injection taking too long, aborting');
1596
+ }, 3000);
1589
1597
  // Prevent infinite reload loops
1590
1598
  let reloadCount = 0;
1591
1599
  const MAX_RELOADS = 2;
@@ -1646,19 +1654,39 @@ function setupFrameHandling(page, forceDebug) {
1646
1654
  return originalXHROpen.apply(this, arguments);
1647
1655
  }
1648
1656
  };
1657
+ clearTimeout(injectionTimeout);
1658
+ } catch (injectionError) {
1659
+ console.log('[evalOnDoc][error]', 'Injection failed:', injectionError.message);
1660
+ }
1649
1661
  }),
1650
- new Promise((_, reject) =>
1651
- setTimeout(() => reject(new Error('Injection timeout')), 8000)
1652
- )
1662
+ // Reduced timeout for faster failure
1663
+ new Promise((_, reject) => {
1664
+ setTimeout(() => {
1665
+ reject(new Error('evaluateOnNewDocument timeout - browser may be unresponsive'));
1666
+ }, 5000); // Reduced from 8000ms
1667
+ })
1653
1668
  ]);
1654
1669
  evalOnDocSuccess = true;
1655
1670
  if (forceDebug) {
1656
1671
  console.log(formatLogMessage('debug', `[evalOnDoc] Full injection successful for ${currentUrl}`));
1657
1672
  }
1658
1673
  } catch (fullInjectionErr) {
1674
+ // Enhanced error detection for CDP issues
1675
+ const isCDPError = fullInjectionErr.constructor.name === 'ProtocolError' ||
1676
+ fullInjectionErr.name === 'ProtocolError' ||
1677
+ fullInjectionErr.message.includes('addScriptToEvaluateOnNewDocument timed out') ||
1678
+ fullInjectionErr.message.includes('Protocol error');
1679
+
1659
1680
  if (forceDebug) {
1660
- console.log(formatLogMessage('debug', `[evalOnDoc] Full injection failed: ${fullInjectionErr.message}, trying simplified fallback`));
1681
+ const errorType = isCDPError ? 'CDP/Protocol error' : 'timeout/other';
1682
+ console.log(formatLogMessage('debug', `[evalOnDoc] Full injection failed (${errorType}): ${fullInjectionErr.message}`));
1661
1683
  }
1684
+
1685
+ // Skip fallback for CDP errors - they indicate browser communication issues
1686
+ if (isCDPError) {
1687
+ console.warn(formatLogMessage('warn', `[evalOnDoc] CDP communication failure - skipping injection for ${currentUrl}`));
1688
+ evalOnDocSuccess = false;
1689
+ } else {
1662
1690
 
1663
1691
  // Strategy 3: Fallback - Try minimal injection (just fetch monitoring)
1664
1692
  try {
@@ -1692,6 +1720,7 @@ function setupFrameHandling(page, forceDebug) {
1692
1720
  evalOnDocSuccess = false;
1693
1721
  }
1694
1722
  }
1723
+ }
1695
1724
  } else {
1696
1725
  if (forceDebug) {
1697
1726
  console.log(formatLogMessage('debug', `[evalOnDoc] Browser unresponsive, skipping injection for ${currentUrl}`));
@@ -2950,23 +2979,79 @@ function setupFrameHandling(page, forceDebug) {
2950
2979
  }
2951
2980
  }
2952
2981
 
2953
- if (useForceReload) {
2954
- // Force reload: disable cache, reload, re-enable cache
2982
+ let reloadSuccess = false;
2983
+
2984
+ if (useForceReload && !reloadSuccess) {
2985
+ // Attempt force reload: disable cache, reload, re-enable cache
2955
2986
  try {
2956
- await page.setCacheEnabled(false);
2957
- await page.reload({ waitUntil: 'domcontentloaded', timeout: Math.min(timeout, 12000) });
2958
- await page.setCacheEnabled(true);
2987
+ // Add timeout protection for setCacheEnabled operations
2988
+ await Promise.race([
2989
+ page.setCacheEnabled(false),
2990
+ new Promise((_, reject) =>
2991
+ setTimeout(() => reject(new Error('setCacheEnabled(false) timeout')), 5000)
2992
+ )
2993
+ ]);
2994
+
2995
+ await page.reload({ waitUntil: 'domcontentloaded', timeout: Math.min(timeout, 12000) });
2996
+
2997
+ await Promise.race([
2998
+ page.setCacheEnabled(true),
2999
+ new Promise((_, reject) =>
3000
+ setTimeout(() => reject(new Error('setCacheEnabled(true) timeout')), 5000)
3001
+ )
3002
+ ]);
3003
+
3004
+ reloadSuccess = true;
2959
3005
  if (forceDebug) console.log(formatLogMessage('debug', `Force reload #${i} completed for ${currentUrl}`));
3006
+
2960
3007
  } catch (forceReloadErr) {
2961
- console.warn(messageColors.warn(`[force reload #${i} failed] ${currentUrl}: ${forceReloadErr.message}`));
3008
+ console.warn(messageColors.warn(`[force reload #${i} failed] ${currentUrl}: ${forceReloadErr.message} - falling back to standard reload`));
3009
+ reloadSuccess = false; // Ensure we try standard reload
3010
+ }
3011
+ }
3012
+
3013
+ // Fallback to standard reload if force reload failed or wasn't attempted
3014
+ if (!reloadSuccess) {
3015
+ try {
3016
+ // Reduced timeout for faster failure detection
3017
+ const standardReloadTimeout = Math.min(timeout, 8000); // Reduced from 15000ms
3018
+ await page.reload({ waitUntil: 'domcontentloaded', timeout: standardReloadTimeout });
3019
+ if (forceDebug) console.log(formatLogMessage('debug', `Standard reload #${i} completed for ${currentUrl}`));
3020
+ } catch (standardReloadErr) {
3021
+ console.warn(messageColors.warn(`[standard reload #${i} failed] ${currentUrl}: ${standardReloadErr.message}`));
3022
+
3023
+ // Check if this is a persistent failure that should skip remaining reloads
3024
+ const isPersistentFailure = standardReloadErr.message.includes('Navigation timeout') ||
3025
+ standardReloadErr.message.includes('net::ERR_') ||
3026
+ standardReloadErr.message.includes('Protocol error') ||
3027
+ standardReloadErr.message.includes('Page crashed') ||
3028
+ // CDP and injection failures
3029
+ standardReloadErr.constructor.name === 'ProtocolError' ||
3030
+ standardReloadErr.name === 'ProtocolError' ||
3031
+ standardReloadErr.message.includes('addScriptToEvaluateOnNewDocument timed out') ||
3032
+ standardReloadErr.message.includes('Runtime.callFunctionOn timed out') ||
3033
+ standardReloadErr.message.includes('CDP injection timeout');
3034
+
3035
+ if (isPersistentFailure) {
3036
+ const remainingReloads = totalReloads - i;
3037
+ if (remainingReloads > 0 && forceDebug) {
3038
+ console.log(formatLogMessage('debug', `Persistent failure detected - skipping ${remainingReloads} remaining reload(s) for ${currentUrl}`));
3039
+ }
3040
+ // Break out of reload loop to move to next URL faster
3041
+ break;
3042
+ }
2962
3043
  }
2963
3044
  } else {
2964
3045
  // Regular reload
2965
3046
  await page.reload({ waitUntil: 'domcontentloaded', timeout: Math.min(timeout, 15000) });
2966
3047
  }
2967
-
3048
+
3049
+ // Only add delay if we're continuing with more reloads
3050
+ if (i < totalReloads) {
3051
+
2968
3052
  await fastTimeout(delayMs);
2969
3053
  }
3054
+ }
2970
3055
 
2971
3056
  if (dryRunMode) {
2972
3057
  // Get page title for dry run output
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fanboynz/network-scanner",
3
- "version": "1.0.93",
3
+ "version": "1.0.95",
4
4
  "description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
5
5
  "main": "nwss.js",
6
6
  "scripts": {