@fanboynz/network-scanner 1.0.93 → 1.0.95
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -1
- package/nwss.js +98 -13
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -73,10 +73,13 @@ A Puppeteer-based tool for scanning websites to find third-party (or optionally
|
|
|
73
73
|
|
|
74
74
|
| Argument | Description |
|
|
75
75
|
|:---------------------------|:------------|
|
|
76
|
+
| `--cache-requests` | Cache HTTP requests to avoid re-requesting same URLs within scan |
|
|
76
77
|
| `--validate-config` | Validate config.json file and exit |
|
|
77
78
|
| `--validate-rules [file]` | Validate rule file format (uses --output/--compare files if no file specified) |
|
|
78
79
|
| `--clean-rules [file]` | Clean rule files by removing invalid lines and optionally duplicates (uses --output/--compare files if no file specified) |
|
|
79
80
|
| `--test-validation` | Run domain validation tests and exit |
|
|
81
|
+
| `--clear-cache` | Clear persistent cache before scanning (improves fresh start performance) |
|
|
82
|
+
| `--ignore-cache` | Bypass all smart caching functionality during scanning |
|
|
80
83
|
|
|
81
84
|
---
|
|
82
85
|
|
|
@@ -133,6 +136,7 @@ Example:
|
|
|
133
136
|
| `url` | String or Array | - | Website URL(s) to scan |
|
|
134
137
|
| `userAgent` | `chrome`, `firefox`, `safari` | - | User agent for page (latest versions: Chrome 131, Firefox 133, Safari 18.2) |
|
|
135
138
|
| `filterRegex` | String or Array | `.*` | Regex or list of regexes to match requests |
|
|
139
|
+
| `regex_and` | Boolean | `false` | Use AND logic for multiple filterRegex patterns - ALL patterns must match the same URL |
|
|
136
140
|
| `comments` | String or Array | - | String of comments or references |
|
|
137
141
|
| `resourceTypes` | Array | `["script", "xhr", "image", "stylesheet"]` | What resource types to monitor |
|
|
138
142
|
| `reload` | Integer | `1` | Number of times to reload page |
|
|
@@ -146,6 +150,8 @@ Example:
|
|
|
146
150
|
| `subDomains` | `0` or `1` | `0` | 1 = preserve subdomains in output |
|
|
147
151
|
| `blocked` | Array | - | Domains or regexes to block during scanning |
|
|
148
152
|
| `even_blocked` | Boolean | `false` | Add matching rules even if requests are blocked |
|
|
153
|
+
| `bypass_cache` | Boolean | `false` | Skip all caching for this site's URLs |
|
|
154
|
+
|
|
149
155
|
|
|
150
156
|
### Redirect Handling Options
|
|
151
157
|
|
|
@@ -190,6 +196,10 @@ When a page redirects to a new domain, first-party/third-party detection is base
|
|
|
190
196
|
|:---------------------|:-------|:-------:|:------------|
|
|
191
197
|
| `cloudflare_phish` | Boolean | `false` | Auto-click through Cloudflare phishing warnings |
|
|
192
198
|
| `cloudflare_bypass` | Boolean | `false` | Auto-solve Cloudflare "Verify you are human" challenges |
|
|
199
|
+
| `cloudflare_parallel_detection` | Boolean | `true` | Use parallel detection for faster Cloudflare checks |
|
|
200
|
+
| `cloudflare_max_retries` | Integer | `3` | Maximum retry attempts for Cloudflare operations |
|
|
201
|
+
| `cloudflare_cache_ttl` | Milliseconds | `300000` | TTL for Cloudflare detection cache (5 minutes) |
|
|
202
|
+
| `cloudflare_retry_on_error` | Boolean | `true` | Enable retry logic for Cloudflare operations |
|
|
193
203
|
| `flowproxy_detection` | Boolean | `false` | Enable flowProxy protection detection and handling |
|
|
194
204
|
| `flowproxy_page_timeout` | Milliseconds | `45000` | Page timeout for flowProxy sites |
|
|
195
205
|
| `flowproxy_nav_timeout` | Milliseconds | `45000` | Navigation timeout for flowProxy sites |
|
|
@@ -240,6 +250,11 @@ When a page redirects to a new domain, first-party/third-party detection is base
|
|
|
240
250
|
| `screenshot` | Boolean | `false` | Capture screenshot on load failure |
|
|
241
251
|
| `headful` | Boolean | `false` | Launch browser with GUI for this site |
|
|
242
252
|
| `adblock_rules` | Boolean | `false` | Generate adblock filter rules with resource types for this site |
|
|
253
|
+
| `interact_duration` | Milliseconds | `2000` | Duration of interaction simulation |
|
|
254
|
+
| `interact_scrolling` | Boolean | `true` | Enable scrolling simulation |
|
|
255
|
+
| `interact_clicks` | Boolean | `false` | Enable element clicking simulation |
|
|
256
|
+
| `interact_typing` | Boolean | `false` | Enable typing simulation |
|
|
257
|
+
| `interact_intensity` | String | `"medium"` | Interaction simulation intensity: "low", "medium", "high" |
|
|
243
258
|
|
|
244
259
|
### Global Configuration Options
|
|
245
260
|
|
|
@@ -254,7 +269,11 @@ These options go at the root level of your config.json:
|
|
|
254
269
|
| `ignore_similar_threshold` | Integer | `80` | Similarity threshold percentage for ignore_similar |
|
|
255
270
|
| `ignore_similar_ignored_domains` | Boolean | `true` | Ignore domains similar to ignoreDomains list |
|
|
256
271
|
| `max_concurrent_sites` | Integer | `6` | Maximum concurrent site processing (1-50) |
|
|
257
|
-
| `resource_cleanup_interval` | Integer | `
|
|
272
|
+
| `resource_cleanup_interval` | Integer | `80` | Browser restart interval in URLs processed (1-1000) |
|
|
273
|
+
| `cache_path` | String | `".cache"` | Directory path for persistent cache storage |
|
|
274
|
+
| `cache_max_size` | Integer | `5000` | Maximum number of entries in cache |
|
|
275
|
+
| `cache_autosave_minutes` | Integer | `1` | Interval for automatic cache saves (minutes) |
|
|
276
|
+
| `cache_requests` | Boolean | `false` | Enable HTTP request response caching |
|
|
258
277
|
|
|
259
278
|
---
|
|
260
279
|
|
package/nwss.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// === Network scanner script (nwss.js) v1.0.
|
|
1
|
+
// === Network scanner script (nwss.js) v1.0.95 ===
|
|
2
2
|
|
|
3
3
|
// puppeteer for browser automation, fs for file system operations, psl for domain parsing.
|
|
4
4
|
// const pLimit = require('p-limit'); // Will be dynamically imported
|
|
@@ -125,7 +125,7 @@ const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/r
|
|
|
125
125
|
const { monitorBrowserHealth, isBrowserHealthy, isQuicklyResponsive } = require('./lib/browserhealth');
|
|
126
126
|
|
|
127
127
|
// --- Script Configuration & Constants ---
|
|
128
|
-
const VERSION = '1.0.
|
|
128
|
+
const VERSION = '1.0.95'; // Script version
|
|
129
129
|
|
|
130
130
|
// get startTime
|
|
131
131
|
const startTime = Date.now();
|
|
@@ -1584,8 +1584,16 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1584
1584
|
// Strategy 2: Try injection with reduced complexity if browser is responsive
|
|
1585
1585
|
if (browserResponsive) {
|
|
1586
1586
|
try {
|
|
1587
|
+
// Add comprehensive timeout protection for evaluateOnNewDocument
|
|
1587
1588
|
await Promise.race([
|
|
1589
|
+
// Main injection with all safety checks
|
|
1588
1590
|
page.evaluateOnNewDocument(() => {
|
|
1591
|
+
// Wrap everything in try-catch to prevent page crashes
|
|
1592
|
+
try {
|
|
1593
|
+
// Add timeout check within the injection
|
|
1594
|
+
const injectionTimeout = setTimeout(() => {
|
|
1595
|
+
console.log('[evalOnDoc] Injection taking too long, aborting');
|
|
1596
|
+
}, 3000);
|
|
1589
1597
|
// Prevent infinite reload loops
|
|
1590
1598
|
let reloadCount = 0;
|
|
1591
1599
|
const MAX_RELOADS = 2;
|
|
@@ -1646,19 +1654,39 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1646
1654
|
return originalXHROpen.apply(this, arguments);
|
|
1647
1655
|
}
|
|
1648
1656
|
};
|
|
1657
|
+
clearTimeout(injectionTimeout);
|
|
1658
|
+
} catch (injectionError) {
|
|
1659
|
+
console.log('[evalOnDoc][error]', 'Injection failed:', injectionError.message);
|
|
1660
|
+
}
|
|
1649
1661
|
}),
|
|
1650
|
-
|
|
1651
|
-
|
|
1652
|
-
|
|
1662
|
+
// Reduced timeout for faster failure
|
|
1663
|
+
new Promise((_, reject) => {
|
|
1664
|
+
setTimeout(() => {
|
|
1665
|
+
reject(new Error('evaluateOnNewDocument timeout - browser may be unresponsive'));
|
|
1666
|
+
}, 5000); // Reduced from 8000ms
|
|
1667
|
+
})
|
|
1653
1668
|
]);
|
|
1654
1669
|
evalOnDocSuccess = true;
|
|
1655
1670
|
if (forceDebug) {
|
|
1656
1671
|
console.log(formatLogMessage('debug', `[evalOnDoc] Full injection successful for ${currentUrl}`));
|
|
1657
1672
|
}
|
|
1658
1673
|
} catch (fullInjectionErr) {
|
|
1674
|
+
// Enhanced error detection for CDP issues
|
|
1675
|
+
const isCDPError = fullInjectionErr.constructor.name === 'ProtocolError' ||
|
|
1676
|
+
fullInjectionErr.name === 'ProtocolError' ||
|
|
1677
|
+
fullInjectionErr.message.includes('addScriptToEvaluateOnNewDocument timed out') ||
|
|
1678
|
+
fullInjectionErr.message.includes('Protocol error');
|
|
1679
|
+
|
|
1659
1680
|
if (forceDebug) {
|
|
1660
|
-
|
|
1681
|
+
const errorType = isCDPError ? 'CDP/Protocol error' : 'timeout/other';
|
|
1682
|
+
console.log(formatLogMessage('debug', `[evalOnDoc] Full injection failed (${errorType}): ${fullInjectionErr.message}`));
|
|
1661
1683
|
}
|
|
1684
|
+
|
|
1685
|
+
// Skip fallback for CDP errors - they indicate browser communication issues
|
|
1686
|
+
if (isCDPError) {
|
|
1687
|
+
console.warn(formatLogMessage('warn', `[evalOnDoc] CDP communication failure - skipping injection for ${currentUrl}`));
|
|
1688
|
+
evalOnDocSuccess = false;
|
|
1689
|
+
} else {
|
|
1662
1690
|
|
|
1663
1691
|
// Strategy 3: Fallback - Try minimal injection (just fetch monitoring)
|
|
1664
1692
|
try {
|
|
@@ -1692,6 +1720,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1692
1720
|
evalOnDocSuccess = false;
|
|
1693
1721
|
}
|
|
1694
1722
|
}
|
|
1723
|
+
}
|
|
1695
1724
|
} else {
|
|
1696
1725
|
if (forceDebug) {
|
|
1697
1726
|
console.log(formatLogMessage('debug', `[evalOnDoc] Browser unresponsive, skipping injection for ${currentUrl}`));
|
|
@@ -2950,23 +2979,79 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2950
2979
|
}
|
|
2951
2980
|
}
|
|
2952
2981
|
|
|
2953
|
-
|
|
2954
|
-
|
|
2982
|
+
let reloadSuccess = false;
|
|
2983
|
+
|
|
2984
|
+
if (useForceReload && !reloadSuccess) {
|
|
2985
|
+
// Attempt force reload: disable cache, reload, re-enable cache
|
|
2955
2986
|
try {
|
|
2956
|
-
|
|
2957
|
-
|
|
2958
|
-
|
|
2987
|
+
// Add timeout protection for setCacheEnabled operations
|
|
2988
|
+
await Promise.race([
|
|
2989
|
+
page.setCacheEnabled(false),
|
|
2990
|
+
new Promise((_, reject) =>
|
|
2991
|
+
setTimeout(() => reject(new Error('setCacheEnabled(false) timeout')), 5000)
|
|
2992
|
+
)
|
|
2993
|
+
]);
|
|
2994
|
+
|
|
2995
|
+
await page.reload({ waitUntil: 'domcontentloaded', timeout: Math.min(timeout, 12000) });
|
|
2996
|
+
|
|
2997
|
+
await Promise.race([
|
|
2998
|
+
page.setCacheEnabled(true),
|
|
2999
|
+
new Promise((_, reject) =>
|
|
3000
|
+
setTimeout(() => reject(new Error('setCacheEnabled(true) timeout')), 5000)
|
|
3001
|
+
)
|
|
3002
|
+
]);
|
|
3003
|
+
|
|
3004
|
+
reloadSuccess = true;
|
|
2959
3005
|
if (forceDebug) console.log(formatLogMessage('debug', `Force reload #${i} completed for ${currentUrl}`));
|
|
3006
|
+
|
|
2960
3007
|
} catch (forceReloadErr) {
|
|
2961
|
-
|
|
3008
|
+
console.warn(messageColors.warn(`[force reload #${i} failed] ${currentUrl}: ${forceReloadErr.message} - falling back to standard reload`));
|
|
3009
|
+
reloadSuccess = false; // Ensure we try standard reload
|
|
3010
|
+
}
|
|
3011
|
+
}
|
|
3012
|
+
|
|
3013
|
+
// Fallback to standard reload if force reload failed or wasn't attempted
|
|
3014
|
+
if (!reloadSuccess) {
|
|
3015
|
+
try {
|
|
3016
|
+
// Reduced timeout for faster failure detection
|
|
3017
|
+
const standardReloadTimeout = Math.min(timeout, 8000); // Reduced from 15000ms
|
|
3018
|
+
await page.reload({ waitUntil: 'domcontentloaded', timeout: standardReloadTimeout });
|
|
3019
|
+
if (forceDebug) console.log(formatLogMessage('debug', `Standard reload #${i} completed for ${currentUrl}`));
|
|
3020
|
+
} catch (standardReloadErr) {
|
|
3021
|
+
console.warn(messageColors.warn(`[standard reload #${i} failed] ${currentUrl}: ${standardReloadErr.message}`));
|
|
3022
|
+
|
|
3023
|
+
// Check if this is a persistent failure that should skip remaining reloads
|
|
3024
|
+
const isPersistentFailure = standardReloadErr.message.includes('Navigation timeout') ||
|
|
3025
|
+
standardReloadErr.message.includes('net::ERR_') ||
|
|
3026
|
+
standardReloadErr.message.includes('Protocol error') ||
|
|
3027
|
+
standardReloadErr.message.includes('Page crashed') ||
|
|
3028
|
+
// CDP and injection failures
|
|
3029
|
+
standardReloadErr.constructor.name === 'ProtocolError' ||
|
|
3030
|
+
standardReloadErr.name === 'ProtocolError' ||
|
|
3031
|
+
standardReloadErr.message.includes('addScriptToEvaluateOnNewDocument timed out') ||
|
|
3032
|
+
standardReloadErr.message.includes('Runtime.callFunctionOn timed out') ||
|
|
3033
|
+
standardReloadErr.message.includes('CDP injection timeout');
|
|
3034
|
+
|
|
3035
|
+
if (isPersistentFailure) {
|
|
3036
|
+
const remainingReloads = totalReloads - i;
|
|
3037
|
+
if (remainingReloads > 0 && forceDebug) {
|
|
3038
|
+
console.log(formatLogMessage('debug', `Persistent failure detected - skipping ${remainingReloads} remaining reload(s) for ${currentUrl}`));
|
|
3039
|
+
}
|
|
3040
|
+
// Break out of reload loop to move to next URL faster
|
|
3041
|
+
break;
|
|
3042
|
+
}
|
|
2962
3043
|
}
|
|
2963
3044
|
} else {
|
|
2964
3045
|
// Regular reload
|
|
2965
3046
|
await page.reload({ waitUntil: 'domcontentloaded', timeout: Math.min(timeout, 15000) });
|
|
2966
3047
|
}
|
|
2967
|
-
|
|
3048
|
+
|
|
3049
|
+
// Only add delay if we're continuing with more reloads
|
|
3050
|
+
if (i < totalReloads) {
|
|
3051
|
+
|
|
2968
3052
|
await fastTimeout(delayMs);
|
|
2969
3053
|
}
|
|
3054
|
+
}
|
|
2970
3055
|
|
|
2971
3056
|
if (dryRunMode) {
|
|
2972
3057
|
// Get page title for dry run output
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fanboynz/network-scanner",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.95",
|
|
4
4
|
"description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
|
|
5
5
|
"main": "nwss.js",
|
|
6
6
|
"scripts": {
|