@fanboynz/network-scanner 1.0.94 → 1.0.96

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/README.md +20 -1
  2. package/lib/cdp.js +39 -2
  3. package/nwss.js +70 -19
  4. package/package.json +1 -1
package/README.md CHANGED
@@ -73,10 +73,13 @@ A Puppeteer-based tool for scanning websites to find third-party (or optionally
73
73
 
74
74
  | Argument | Description |
75
75
  |:---------------------------|:------------|
76
+ | `--cache-requests` | Cache HTTP requests to avoid re-requesting same URLs within scan |
76
77
  | `--validate-config` | Validate config.json file and exit |
77
78
  | `--validate-rules [file]` | Validate rule file format (uses --output/--compare files if no file specified) |
78
79
  | `--clean-rules [file]` | Clean rule files by removing invalid lines and optionally duplicates (uses --output/--compare files if no file specified) |
79
80
  | `--test-validation` | Run domain validation tests and exit |
81
+ | `--clear-cache` | Clear persistent cache before scanning (improves fresh start performance) |
82
+ | `--ignore-cache` | Bypass all smart caching functionality during scanning |
80
83
 
81
84
  ---
82
85
 
@@ -133,6 +136,7 @@ Example:
133
136
  | `url` | String or Array | - | Website URL(s) to scan |
134
137
  | `userAgent` | `chrome`, `firefox`, `safari` | - | User agent for page (latest versions: Chrome 131, Firefox 133, Safari 18.2) |
135
138
  | `filterRegex` | String or Array | `.*` | Regex or list of regexes to match requests |
139
+ | `regex_and` | Boolean | `false` | Use AND logic for multiple filterRegex patterns - ALL patterns must match the same URL |
136
140
  | `comments` | String or Array | - | String of comments or references |
137
141
  | `resourceTypes` | Array | `["script", "xhr", "image", "stylesheet"]` | What resource types to monitor |
138
142
  | `reload` | Integer | `1` | Number of times to reload page |
@@ -146,6 +150,8 @@ Example:
146
150
  | `subDomains` | `0` or `1` | `0` | 1 = preserve subdomains in output |
147
151
  | `blocked` | Array | - | Domains or regexes to block during scanning |
148
152
  | `even_blocked` | Boolean | `false` | Add matching rules even if requests are blocked |
153
+ | `bypass_cache` | Boolean | `false` | Skip all caching for this site's URLs |
154
+
149
155
 
150
156
  ### Redirect Handling Options
151
157
 
@@ -190,6 +196,10 @@ When a page redirects to a new domain, first-party/third-party detection is base
190
196
  |:---------------------|:-------|:-------:|:------------|
191
197
  | `cloudflare_phish` | Boolean | `false` | Auto-click through Cloudflare phishing warnings |
192
198
  | `cloudflare_bypass` | Boolean | `false` | Auto-solve Cloudflare "Verify you are human" challenges |
199
+ | `cloudflare_parallel_detection` | Boolean | `true` | Use parallel detection for faster Cloudflare checks |
200
+ | `cloudflare_max_retries` | Integer | `3` | Maximum retry attempts for Cloudflare operations |
201
+ | `cloudflare_cache_ttl` | Milliseconds | `300000` | TTL for Cloudflare detection cache (5 minutes) |
202
+ | `cloudflare_retry_on_error` | Boolean | `true` | Enable retry logic for Cloudflare operations |
193
203
  | `flowproxy_detection` | Boolean | `false` | Enable flowProxy protection detection and handling |
194
204
  | `flowproxy_page_timeout` | Milliseconds | `45000` | Page timeout for flowProxy sites |
195
205
  | `flowproxy_nav_timeout` | Milliseconds | `45000` | Navigation timeout for flowProxy sites |
@@ -240,6 +250,11 @@ When a page redirects to a new domain, first-party/third-party detection is base
240
250
  | `screenshot` | Boolean | `false` | Capture screenshot on load failure |
241
251
  | `headful` | Boolean | `false` | Launch browser with GUI for this site |
242
252
  | `adblock_rules` | Boolean | `false` | Generate adblock filter rules with resource types for this site |
253
+ | `interact_duration` | Milliseconds | `2000` | Duration of interaction simulation |
254
+ | `interact_scrolling` | Boolean | `true` | Enable scrolling simulation |
255
+ | `interact_clicks` | Boolean | `false` | Enable element clicking simulation |
256
+ | `interact_typing` | Boolean | `false` | Enable typing simulation |
257
+ | `interact_intensity` | String | `"medium"` | Interaction simulation intensity: "low", "medium", "high" |
243
258
 
244
259
  ### Global Configuration Options
245
260
 
@@ -254,7 +269,11 @@ These options go at the root level of your config.json:
254
269
  | `ignore_similar_threshold` | Integer | `80` | Similarity threshold percentage for ignore_similar |
255
270
  | `ignore_similar_ignored_domains` | Boolean | `true` | Ignore domains similar to ignoreDomains list |
256
271
  | `max_concurrent_sites` | Integer | `6` | Maximum concurrent site processing (1-50) |
257
- | `resource_cleanup_interval` | Integer | `180` | Browser restart interval in URLs processed (1-1000) |
272
+ | `resource_cleanup_interval` | Integer | `80` | Browser restart interval in URLs processed (1-1000) |
273
+ | `cache_path` | String | `".cache"` | Directory path for persistent cache storage |
274
+ | `cache_max_size` | Integer | `5000` | Maximum number of entries in cache |
275
+ | `cache_autosave_minutes` | Integer | `1` | Interval for automatic cache saves (minutes) |
276
+ | `cache_requests` | Boolean | `false` | Enable HTTP request response caching |
258
277
 
259
278
  ---
260
279
 
package/lib/cdp.js CHANGED
@@ -27,6 +27,36 @@
27
27
 
28
28
  const { formatLogMessage } = require('./colorize');
29
29
 
30
+ /**
31
+ * Creates a new page with timeout protection to prevent CDP hangs
32
+ * @param {import('puppeteer').Browser} browser - Browser instance
33
+ * @param {number} timeout - Timeout in milliseconds (default: 30000)
34
+ * @returns {Promise<import('puppeteer').Page>} Page instance
35
+ */
36
+ async function createPageWithTimeout(browser, timeout = 30000) {
37
+ return Promise.race([
38
+ browser.newPage(),
39
+ new Promise((_, reject) =>
40
+ setTimeout(() => reject(new Error('Page creation timeout - browser may be unresponsive')), timeout)
41
+ )
42
+ ]);
43
+ }
44
+
45
+ /**
46
+ * Sets request interception with timeout protection
47
+ * @param {import('puppeteer').Page} page - Page instance
48
+ * @param {number} timeout - Timeout in milliseconds (default: 15000)
49
+ * @returns {Promise<void>}
50
+ */
51
+ async function setRequestInterceptionWithTimeout(page, timeout = 15000) {
52
+ return Promise.race([
53
+ page.setRequestInterception(true),
54
+ new Promise((_, reject) =>
55
+ setTimeout(() => reject(new Error('Request interception setup timeout')), timeout)
56
+ )
57
+ ]);
58
+ }
59
+
30
60
  /**
31
61
  * Creates and manages a CDP session for network monitoring
32
62
  *
@@ -88,8 +118,13 @@ async function createCDPSession(page, currentUrl, options = {}) {
88
118
 
89
119
  try {
90
120
  // Create CDP session using modern Puppeteer 20+ API
91
- // page.target().createCDPSession() was deprecated in 19+ and removed in 20+
92
- cdpSession = await page.createCDPSession();
121
+ // Add timeout protection for CDP session creation
122
+ cdpSession = await Promise.race([
123
+ page.createCDPSession(),
124
+ new Promise((_, reject) =>
125
+ setTimeout(() => reject(new Error('CDP session creation timeout')), 20000)
126
+ )
127
+ ]);
93
128
 
94
129
  // Enable network domain - required for network event monitoring
95
130
  await cdpSession.send('Network.enable');
@@ -302,6 +337,8 @@ async function createEnhancedCDPSession(page, currentUrl, options = {}) {
302
337
  // - Some features may not work in --no-sandbox mode
303
338
  module.exports = {
304
339
  createCDPSession,
340
+ createPageWithTimeout,
341
+ setRequestInterceptionWithTimeout,
305
342
  validateCDPConfig,
306
343
  createEnhancedCDPSession
307
344
  };
package/nwss.js CHANGED
@@ -1,4 +1,4 @@
1
- // === Network scanner script (nwss.js) v1.0.94 ===
1
+ // === Network scanner script (nwss.js) v1.0.96 ===
2
2
 
3
3
  // puppeteer for browser automation, fs for file system operations, psl for domain parsing.
4
4
  // const pLimit = require('p-limit'); // Will be dynamically imported
@@ -33,7 +33,7 @@ const { createNetToolsHandler, createEnhancedDryRunCallback, validateWhoisAvaila
33
33
  // File compare
34
34
  const { loadComparisonRules, filterUniqueRules } = require('./lib/compare');
35
35
  // CDP functionality
36
- const { createCDPSession } = require('./lib/cdp');
36
+ const { createCDPSession, createPageWithTimeout, setRequestInterceptionWithTimeout } = require('./lib/cdp');
37
37
  // Post-processing cleanup
38
38
  const { processResults } = require('./lib/post-processing');
39
39
  // Colorize various text when used
@@ -125,7 +125,7 @@ const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/r
125
125
  const { monitorBrowserHealth, isBrowserHealthy, isQuicklyResponsive } = require('./lib/browserhealth');
126
126
 
127
127
  // --- Script Configuration & Constants ---
128
- const VERSION = '1.0.94'; // Script version
128
+ const VERSION = '1.0.96'; // Script version
129
129
 
130
130
  // get startTime
131
131
  const startTime = Date.now();
@@ -1465,7 +1465,7 @@ function setupFrameHandling(page, forceDebug) {
1465
1465
  if (browserInstance.process() && browserInstance.process().killed) {
1466
1466
  throw new Error('Browser process was killed - restart required');
1467
1467
  }
1468
- page = await browserInstance.newPage();
1468
+ page = await createPageWithTimeout(browserInstance, 30000);
1469
1469
 
1470
1470
  // Enhanced page validation for Puppeteer 23.x
1471
1471
  if (!page || page.isClosed()) {
@@ -1584,8 +1584,16 @@ function setupFrameHandling(page, forceDebug) {
1584
1584
  // Strategy 2: Try injection with reduced complexity if browser is responsive
1585
1585
  if (browserResponsive) {
1586
1586
  try {
1587
+ // Add comprehensive timeout protection for evaluateOnNewDocument
1587
1588
  await Promise.race([
1589
+ // Main injection with all safety checks
1588
1590
  page.evaluateOnNewDocument(() => {
1591
+ // Wrap everything in try-catch to prevent page crashes
1592
+ try {
1593
+ // Add timeout check within the injection
1594
+ const injectionTimeout = setTimeout(() => {
1595
+ console.log('[evalOnDoc] Injection taking too long, aborting');
1596
+ }, 3000);
1589
1597
  // Prevent infinite reload loops
1590
1598
  let reloadCount = 0;
1591
1599
  const MAX_RELOADS = 2;
@@ -1646,19 +1654,39 @@ function setupFrameHandling(page, forceDebug) {
1646
1654
  return originalXHROpen.apply(this, arguments);
1647
1655
  }
1648
1656
  };
1657
+ clearTimeout(injectionTimeout);
1658
+ } catch (injectionError) {
1659
+ console.log('[evalOnDoc][error]', 'Injection failed:', injectionError.message);
1660
+ }
1649
1661
  }),
1650
- new Promise((_, reject) =>
1651
- setTimeout(() => reject(new Error('Injection timeout')), 8000)
1652
- )
1662
+ // Reduced timeout for faster failure
1663
+ new Promise((_, reject) => {
1664
+ setTimeout(() => {
1665
+ reject(new Error('evaluateOnNewDocument timeout - browser may be unresponsive'));
1666
+ }, 5000); // Reduced from 8000ms
1667
+ })
1653
1668
  ]);
1654
1669
  evalOnDocSuccess = true;
1655
1670
  if (forceDebug) {
1656
1671
  console.log(formatLogMessage('debug', `[evalOnDoc] Full injection successful for ${currentUrl}`));
1657
1672
  }
1658
1673
  } catch (fullInjectionErr) {
1674
+ // Enhanced error detection for CDP issues
1675
+ const isCDPError = fullInjectionErr.constructor.name === 'ProtocolError' ||
1676
+ fullInjectionErr.name === 'ProtocolError' ||
1677
+ fullInjectionErr.message.includes('addScriptToEvaluateOnNewDocument timed out') ||
1678
+ fullInjectionErr.message.includes('Protocol error');
1679
+
1659
1680
  if (forceDebug) {
1660
- console.log(formatLogMessage('debug', `[evalOnDoc] Full injection failed: ${fullInjectionErr.message}, trying simplified fallback`));
1681
+ const errorType = isCDPError ? 'CDP/Protocol error' : 'timeout/other';
1682
+ console.log(formatLogMessage('debug', `[evalOnDoc] Full injection failed (${errorType}): ${fullInjectionErr.message}`));
1661
1683
  }
1684
+
1685
+ // Skip fallback for CDP errors - they indicate browser communication issues
1686
+ if (isCDPError) {
1687
+ console.warn(formatLogMessage('warn', `[evalOnDoc] CDP communication failure - skipping injection for ${currentUrl}`));
1688
+ evalOnDocSuccess = false;
1689
+ } else {
1662
1690
 
1663
1691
  // Strategy 3: Fallback - Try minimal injection (just fetch monitoring)
1664
1692
  try {
@@ -1692,6 +1720,7 @@ function setupFrameHandling(page, forceDebug) {
1692
1720
  evalOnDocSuccess = false;
1693
1721
  }
1694
1722
  }
1723
+ }
1695
1724
  } else {
1696
1725
  if (forceDebug) {
1697
1726
  console.log(formatLogMessage('debug', `[evalOnDoc] Browser unresponsive, skipping injection for ${currentUrl}`));
@@ -1750,13 +1779,8 @@ function setupFrameHandling(page, forceDebug) {
1750
1779
 
1751
1780
  // Protected request interception setup with timeout
1752
1781
  try {
1753
- // Test if network operations are responsive before enabling request interception
1754
- await Promise.race([
1755
- page.setRequestInterception(true),
1756
- new Promise((_, reject) =>
1757
- setTimeout(() => reject(new Error('Network.enable timeout')), 10000)
1758
- )
1759
- ]);
1782
+ // Use timeout-protected request interception setup
1783
+ await setRequestInterceptionWithTimeout(page, 15000);
1760
1784
 
1761
1785
  if (forceDebug) {
1762
1786
  console.log(formatLogMessage('debug', `Request interception enabled successfully for ${currentUrl}`));
@@ -1765,13 +1789,13 @@ function setupFrameHandling(page, forceDebug) {
1765
1789
  if (networkErr.message.includes('timed out') ||
1766
1790
  networkErr.message.includes('Network.enable') ||
1767
1791
  networkErr.message.includes('timeout')) {
1768
- console.warn(formatLogMessage('warn', `Network setup failed for ${currentUrl}: ${networkErr.message} - triggering browser restart`));
1792
+ console.warn(formatLogMessage('warn', `Request interception setup failed for ${currentUrl}: ${networkErr.message} - triggering browser restart`));
1769
1793
  return {
1770
1794
  url: currentUrl,
1771
1795
  rules: [],
1772
1796
  success: false,
1773
1797
  needsImmediateRestart: true,
1774
- error: 'Network.enable timeout - browser restart required'
1798
+ error: 'Request interception timeout - browser restart required'
1775
1799
  };
1776
1800
  }
1777
1801
  throw networkErr; // Re-throw other errors
@@ -2984,18 +3008,45 @@ function setupFrameHandling(page, forceDebug) {
2984
3008
  // Fallback to standard reload if force reload failed or wasn't attempted
2985
3009
  if (!reloadSuccess) {
2986
3010
  try {
2987
- await page.reload({ waitUntil: 'domcontentloaded', timeout: Math.min(timeout, 15000) });
3011
+ // Reduced timeout for faster failure detection
3012
+ const standardReloadTimeout = Math.min(timeout, 8000); // Reduced from 15000ms
3013
+ await page.reload({ waitUntil: 'domcontentloaded', timeout: standardReloadTimeout });
2988
3014
  if (forceDebug) console.log(formatLogMessage('debug', `Standard reload #${i} completed for ${currentUrl}`));
2989
3015
  } catch (standardReloadErr) {
2990
3016
  console.warn(messageColors.warn(`[standard reload #${i} failed] ${currentUrl}: ${standardReloadErr.message}`));
3017
+
3018
+ // Check if this is a persistent failure that should skip remaining reloads
3019
+ const isPersistentFailure = standardReloadErr.message.includes('Navigation timeout') ||
3020
+ standardReloadErr.message.includes('net::ERR_') ||
3021
+ standardReloadErr.message.includes('Protocol error') ||
3022
+ standardReloadErr.message.includes('Page crashed') ||
3023
+ // CDP and injection failures
3024
+ standardReloadErr.constructor.name === 'ProtocolError' ||
3025
+ standardReloadErr.name === 'ProtocolError' ||
3026
+ standardReloadErr.message.includes('addScriptToEvaluateOnNewDocument timed out') ||
3027
+ standardReloadErr.message.includes('Runtime.callFunctionOn timed out') ||
3028
+ standardReloadErr.message.includes('CDP injection timeout');
3029
+
3030
+ if (isPersistentFailure) {
3031
+ const remainingReloads = totalReloads - i;
3032
+ if (remainingReloads > 0 && forceDebug) {
3033
+ console.log(formatLogMessage('debug', `Persistent failure detected - skipping ${remainingReloads} remaining reload(s) for ${currentUrl}`));
3034
+ }
3035
+ // Break out of reload loop to move to next URL faster
3036
+ break;
3037
+ }
2991
3038
  }
2992
3039
  } else {
2993
3040
  // Regular reload
2994
3041
  await page.reload({ waitUntil: 'domcontentloaded', timeout: Math.min(timeout, 15000) });
2995
3042
  }
2996
-
3043
+
3044
+ // Only add delay if we're continuing with more reloads
3045
+ if (i < totalReloads) {
3046
+
2997
3047
  await fastTimeout(delayMs);
2998
3048
  }
3049
+ }
2999
3050
 
3000
3051
  if (dryRunMode) {
3001
3052
  // Get page title for dry run output
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fanboynz/network-scanner",
3
- "version": "1.0.94",
3
+ "version": "1.0.96",
4
4
  "description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
5
5
  "main": "nwss.js",
6
6
  "scripts": {