@fanboynz/network-scanner 2.0.50 → 2.0.52
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +81 -0
- package/lib/cloudflare.js +217 -176
- package/lib/fingerprint.js +22 -1
- package/lib/proxy.js +279 -0
- package/nwss.js +158 -25
- package/package.json +2 -2
package/lib/proxy.js
ADDED
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Proxy Module for NWSS Network Scanner
|
|
3
|
+
* ======================================
|
|
4
|
+
* Routes specific site URLs through SOCKS5, SOCKS4, HTTP, or HTTPS proxies.
|
|
5
|
+
*
|
|
6
|
+
* Chromium's --proxy-server flag is browser-wide, so sites requiring a proxy
|
|
7
|
+
* need a separate browser instance. This module handles:
|
|
8
|
+
* - Parsing proxy URLs (all supported protocols)
|
|
9
|
+
* - Generating Chromium launch args
|
|
10
|
+
* - Per-page authentication via Puppeteer
|
|
11
|
+
* - Proxy bypass lists
|
|
12
|
+
* - Proxy health checks
|
|
13
|
+
*
|
|
14
|
+
* CONFIG EXAMPLES:
|
|
15
|
+
*
|
|
16
|
+
* SOCKS5 (no auth):
|
|
17
|
+
* "proxy": "socks5://127.0.0.1:1080"
|
|
18
|
+
*
|
|
19
|
+
* SOCKS5 with auth:
|
|
20
|
+
* "proxy": "socks5://user:pass@127.0.0.1:1080"
|
|
21
|
+
*
|
|
22
|
+
* HTTP proxy (corporate):
|
|
23
|
+
* "proxy": "http://proxy.corp.com:3128"
|
|
24
|
+
*
|
|
25
|
+
* HTTP proxy with auth:
|
|
26
|
+
* "proxy": "http://user:pass@proxy.corp.com:8080"
|
|
27
|
+
*
|
|
28
|
+
* HTTPS proxy:
|
|
29
|
+
* "proxy": "https://secure-proxy.example.com:8443"
|
|
30
|
+
*
|
|
31
|
+
* With bypass list and remote DNS:
|
|
32
|
+
* "proxy": "socks5://127.0.0.1:1080",
|
|
33
|
+
* "proxy_bypass": ["localhost", "127.0.0.1", "*.local"],
|
|
34
|
+
* "proxy_remote_dns": true
|
|
35
|
+
*
|
|
36
|
+
* Debug mode:
|
|
37
|
+
* "proxy": "socks5://127.0.0.1:1080",
|
|
38
|
+
* "proxy_debug": true
|
|
39
|
+
*
|
|
40
|
+
* Legacy key (backwards compatible):
|
|
41
|
+
* "socks5_proxy": "socks5://127.0.0.1:1080"
|
|
42
|
+
*
|
|
43
|
+
* INTEGRATION (in nwss.js):
|
|
44
|
+
* const { needsProxy, getProxyArgs, applyProxyAuth, getProxyInfo } = require('./lib/proxy');
|
|
45
|
+
*
|
|
46
|
+
* // Before browser launch
|
|
47
|
+
* if (needsProxy(siteConfig)) {
|
|
48
|
+
* const proxyArgs = getProxyArgs(siteConfig, forceDebug);
|
|
49
|
+
* browserArgs.push(...proxyArgs);
|
|
50
|
+
* }
|
|
51
|
+
*
|
|
52
|
+
* // After page creation, before page.goto()
|
|
53
|
+
* await applyProxyAuth(page, siteConfig, forceDebug);
|
|
54
|
+
*
|
|
55
|
+
* @version 1.1.0
|
|
56
|
+
*/
|
|
57
|
+
|
|
58
|
+
const { formatLogMessage } = require('./colorize');
|
|
59
|
+
|
|
60
|
+
const PROXY_MODULE_VERSION = '1.1.0';
|
|
61
|
+
const SUPPORTED_PROTOCOLS = ['socks5', 'socks4', 'http', 'https'];
|
|
62
|
+
|
|
63
|
+
const DEFAULT_PORTS = {
|
|
64
|
+
socks5: 1080,
|
|
65
|
+
socks4: 1080,
|
|
66
|
+
http: 8080,
|
|
67
|
+
https: 8443
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Returns the configured proxy URL string from siteConfig.
|
|
72
|
+
* Supports both "proxy" (preferred) and "socks5_proxy" (legacy) keys.
|
|
73
|
+
*
|
|
74
|
+
* @param {object} siteConfig
|
|
75
|
+
* @returns {string|null}
|
|
76
|
+
*/
|
|
77
|
+
function getConfiguredProxy(siteConfig) {
|
|
78
|
+
return siteConfig.proxy || siteConfig.socks5_proxy || null;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Parses a proxy URL into components.
|
|
83
|
+
* Accepts: protocol://host:port, protocol://user:pass@host:port, bare host:port
|
|
84
|
+
*
|
|
85
|
+
* @param {string} proxyUrl - Proxy URL string
|
|
86
|
+
* @returns {object|null} Parsed proxy or null if invalid
|
|
87
|
+
*/
|
|
88
|
+
function parseProxyUrl(proxyUrl) {
|
|
89
|
+
if (!proxyUrl || typeof proxyUrl !== 'string') return null;
|
|
90
|
+
|
|
91
|
+
let cleaned = proxyUrl.trim();
|
|
92
|
+
|
|
93
|
+
// Normalise bare host:port to socks5:// URL
|
|
94
|
+
if (!cleaned.includes('://')) {
|
|
95
|
+
cleaned = `socks5://${cleaned}`;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
try {
|
|
99
|
+
const url = new URL(cleaned);
|
|
100
|
+
const protocol = url.protocol.replace(':', '');
|
|
101
|
+
|
|
102
|
+
if (!SUPPORTED_PROTOCOLS.includes(protocol)) return null;
|
|
103
|
+
|
|
104
|
+
const host = url.hostname;
|
|
105
|
+
if (!host) return null;
|
|
106
|
+
|
|
107
|
+
const port = parseInt(url.port, 10) || DEFAULT_PORTS[protocol] || 1080;
|
|
108
|
+
const username = url.username ? decodeURIComponent(url.username) : null;
|
|
109
|
+
const password = url.password ? decodeURIComponent(url.password) : null;
|
|
110
|
+
|
|
111
|
+
return { protocol, host, port, username, password };
|
|
112
|
+
} catch (_) {
|
|
113
|
+
return null;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Checks if a site config requires a proxy
|
|
119
|
+
*
|
|
120
|
+
* @param {object} siteConfig
|
|
121
|
+
* @returns {boolean}
|
|
122
|
+
*/
|
|
123
|
+
function needsProxy(siteConfig) {
|
|
124
|
+
return !!getConfiguredProxy(siteConfig);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Returns Chromium launch arguments for the configured proxy.
|
|
129
|
+
*
|
|
130
|
+
* @param {object} siteConfig
|
|
131
|
+
* @param {boolean} forceDebug
|
|
132
|
+
* @returns {string[]} Array of Chromium args (empty if no proxy configured)
|
|
133
|
+
*/
|
|
134
|
+
function getProxyArgs(siteConfig, forceDebug = false) {
|
|
135
|
+
const proxyUrl = getConfiguredProxy(siteConfig);
|
|
136
|
+
if (!proxyUrl) return [];
|
|
137
|
+
|
|
138
|
+
const parsed = parseProxyUrl(proxyUrl);
|
|
139
|
+
if (!parsed) {
|
|
140
|
+
console.warn(formatLogMessage('proxy', `Invalid proxy URL: ${proxyUrl}`));
|
|
141
|
+
return [];
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
const args = [
|
|
145
|
+
`--proxy-server=${parsed.protocol}://${parsed.host}:${parsed.port}`
|
|
146
|
+
];
|
|
147
|
+
|
|
148
|
+
// Remote DNS: resolve hostnames through the proxy (prevents DNS leaks)
|
|
149
|
+
// Only meaningful for SOCKS proxies; HTTP proxies resolve remotely by default
|
|
150
|
+
const remoteDns = siteConfig.proxy_remote_dns ?? siteConfig.socks5_remote_dns;
|
|
151
|
+
if ((parsed.protocol === 'socks5' || parsed.protocol === 'socks4') && remoteDns !== false) {
|
|
152
|
+
args.push('--host-resolver-rules=MAP * ~NOTFOUND , EXCLUDE 127.0.0.1');
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Bypass list: domains that skip the proxy
|
|
156
|
+
const bypass = siteConfig.proxy_bypass || siteConfig.socks5_bypass || [];
|
|
157
|
+
if (bypass.length > 0) {
|
|
158
|
+
args.push(`--proxy-bypass-list=${bypass.join(';')}`);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
const debug = forceDebug || siteConfig.proxy_debug || siteConfig.socks5_debug;
|
|
162
|
+
if (debug) {
|
|
163
|
+
console.log(formatLogMessage('proxy', `[${parsed.protocol}] Args: ${args.join(' ')}`));
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
return args;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Applies proxy authentication to a page via Puppeteer's authenticate API.
|
|
171
|
+
* Must be called BEFORE page.goto().
|
|
172
|
+
*
|
|
173
|
+
* @param {object} page - Puppeteer page instance
|
|
174
|
+
* @param {object} siteConfig
|
|
175
|
+
* @param {boolean} forceDebug
|
|
176
|
+
* @returns {Promise<boolean>} True if auth was applied
|
|
177
|
+
*/
|
|
178
|
+
async function applyProxyAuth(page, siteConfig, forceDebug = false) {
|
|
179
|
+
const proxyUrl = getConfiguredProxy(siteConfig);
|
|
180
|
+
if (!proxyUrl) return false;
|
|
181
|
+
|
|
182
|
+
const parsed = parseProxyUrl(proxyUrl);
|
|
183
|
+
if (!parsed || !parsed.username) return false;
|
|
184
|
+
|
|
185
|
+
try {
|
|
186
|
+
await page.authenticate({
|
|
187
|
+
username: parsed.username,
|
|
188
|
+
password: parsed.password || ''
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
const debug = forceDebug || siteConfig.proxy_debug || siteConfig.socks5_debug;
|
|
192
|
+
if (debug) {
|
|
193
|
+
console.log(formatLogMessage('proxy', `Auth set for ${parsed.username}@${parsed.host}:${parsed.port}`));
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
return true;
|
|
197
|
+
} catch (err) {
|
|
198
|
+
console.warn(formatLogMessage('proxy', `Failed to set proxy auth: ${err.message}`));
|
|
199
|
+
return false;
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Tests proxy connectivity by attempting a TCP connection.
|
|
205
|
+
*
|
|
206
|
+
* @param {object} siteConfig
|
|
207
|
+
* @param {number} timeoutMs - Connection timeout (default 5000ms)
|
|
208
|
+
* @returns {Promise<object>} { reachable, latencyMs, error }
|
|
209
|
+
*/
|
|
210
|
+
async function testProxy(siteConfig, timeoutMs = 5000) {
|
|
211
|
+
const proxyUrl = getConfiguredProxy(siteConfig);
|
|
212
|
+
if (!proxyUrl) {
|
|
213
|
+
return { reachable: false, latencyMs: 0, error: 'No proxy configured' };
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
const parsed = parseProxyUrl(proxyUrl);
|
|
217
|
+
if (!parsed) {
|
|
218
|
+
return { reachable: false, latencyMs: 0, error: 'Invalid proxy URL' };
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
const net = require('net');
|
|
222
|
+
const start = Date.now();
|
|
223
|
+
|
|
224
|
+
return new Promise((resolve) => {
|
|
225
|
+
const socket = new net.Socket();
|
|
226
|
+
|
|
227
|
+
const onError = (err) => {
|
|
228
|
+
socket.destroy();
|
|
229
|
+
resolve({ reachable: false, latencyMs: Date.now() - start, error: err.message });
|
|
230
|
+
};
|
|
231
|
+
|
|
232
|
+
socket.setTimeout(timeoutMs);
|
|
233
|
+
socket.on('error', onError);
|
|
234
|
+
socket.on('timeout', () => onError(new Error('Connection timeout')));
|
|
235
|
+
|
|
236
|
+
socket.connect(parsed.port, parsed.host, () => {
|
|
237
|
+
const latency = Date.now() - start;
|
|
238
|
+
socket.destroy();
|
|
239
|
+
resolve({ reachable: true, latencyMs: latency, error: null });
|
|
240
|
+
});
|
|
241
|
+
});
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
/**
|
|
245
|
+
* Returns human-readable proxy info string for logging.
|
|
246
|
+
*
|
|
247
|
+
* @param {object} siteConfig
|
|
248
|
+
* @returns {string}
|
|
249
|
+
*/
|
|
250
|
+
function getProxyInfo(siteConfig) {
|
|
251
|
+
const proxyUrl = getConfiguredProxy(siteConfig);
|
|
252
|
+
if (!proxyUrl) return 'none';
|
|
253
|
+
|
|
254
|
+
const parsed = parseProxyUrl(proxyUrl);
|
|
255
|
+
if (!parsed) return 'invalid';
|
|
256
|
+
|
|
257
|
+
const auth = parsed.username ? `${parsed.username}@` : '';
|
|
258
|
+
return `${parsed.protocol}://${auth}${parsed.host}:${parsed.port}`;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
/**
|
|
262
|
+
* Returns module version information
|
|
263
|
+
*/
|
|
264
|
+
function getModuleInfo() {
|
|
265
|
+
return { version: PROXY_MODULE_VERSION, name: 'Proxy Handler' };
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
module.exports = {
|
|
269
|
+
parseProxyUrl,
|
|
270
|
+
needsProxy,
|
|
271
|
+
getProxyArgs,
|
|
272
|
+
applyProxyAuth,
|
|
273
|
+
testProxy,
|
|
274
|
+
getProxyInfo,
|
|
275
|
+
getModuleInfo,
|
|
276
|
+
getConfiguredProxy,
|
|
277
|
+
PROXY_MODULE_VERSION,
|
|
278
|
+
SUPPORTED_PROTOCOLS
|
|
279
|
+
};
|
package/nwss.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// === Network scanner script (nwss.js) v2.0.
|
|
1
|
+
// === Network scanner script (nwss.js) v2.0.51 ===
|
|
2
2
|
|
|
3
3
|
// puppeteer for browser automation, fs for file system operations, psl for domain parsing.
|
|
4
4
|
// const pLimit = require('p-limit'); // Will be dynamically imported
|
|
@@ -44,6 +44,7 @@ const { performPageInteraction, createInteractionConfig } = require('./lib/inter
|
|
|
44
44
|
const { createGlobalHelpers, getTotalDomainsSkipped, getDetectedDomainsCount } = require('./lib/domain-cache');
|
|
45
45
|
const { createSmartCache } = require('./lib/smart-cache'); // Smart cache system
|
|
46
46
|
const { clearPersistentCache } = require('./lib/smart-cache');
|
|
47
|
+
const { needsProxy, getProxyArgs, applyProxyAuth, getProxyInfo, testProxy } = require('./lib/proxy');
|
|
47
48
|
// Dry run functionality
|
|
48
49
|
const { initializeDryRunCollections, addDryRunMatch, addDryRunNetTools, processDryRunResults, writeDryRunOutput } = require('./lib/dry-run');
|
|
49
50
|
// Enhanced site data clearing functionality
|
|
@@ -1354,7 +1355,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1354
1355
|
* Uses system Chrome and temporary directories to minimize disk usage
|
|
1355
1356
|
* @returns {Promise<import('puppeteer').Browser>} Browser instance
|
|
1356
1357
|
*/
|
|
1357
|
-
async function createBrowser() {
|
|
1358
|
+
async function createBrowser(extraArgs = []) {
|
|
1358
1359
|
// Create temporary user data directory that we can fully control and clean up
|
|
1359
1360
|
const tempUserDataDir = `/tmp/puppeteer-${Date.now()}-${Math.random().toString(36).substring(7)}`;
|
|
1360
1361
|
userDataDir = tempUserDataDir; // Store for cleanup tracking (use outer scope variable)
|
|
@@ -1366,9 +1367,17 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1366
1367
|
if (launchHeadless) {
|
|
1367
1368
|
const puppeteerInfo = detectPuppeteerVersion();
|
|
1368
1369
|
|
|
1370
|
+
// Check if any site needs fingerprint protection — use stealth-friendly headless mode
|
|
1371
|
+
const needsStealth = sites.some(site => site.fingerprint_protection);
|
|
1372
|
+
|
|
1369
1373
|
if (puppeteerInfo.useShellMode) {
|
|
1370
|
-
|
|
1371
|
-
|
|
1374
|
+
if (needsStealth) {
|
|
1375
|
+
headlessMode = 'new'; // Full Chrome in headless — harder to detect than chrome-headless-shell
|
|
1376
|
+
if (forceDebug) console.log(formatLogMessage('debug', `Using headless=new for stealth (fingerprint_protection detected)`));
|
|
1377
|
+
} else {
|
|
1378
|
+
headlessMode = 'shell'; // Use fast chrome-headless-shell for 22.x+
|
|
1379
|
+
if (forceDebug) console.log(formatLogMessage('debug', `Using chrome-headless-shell (Puppeteer ${puppeteerInfo.version || 'v' + puppeteerInfo.majorVersion + '.x'})`));
|
|
1380
|
+
}
|
|
1372
1381
|
} else {
|
|
1373
1382
|
headlessMode = true; // Use regular headless for older versions
|
|
1374
1383
|
if (forceDebug) console.log(formatLogMessage('debug', 'Could not detect Puppeteer version, using regular headless mode'));
|
|
@@ -1438,7 +1447,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1438
1447
|
'--disable-features=SafeBrowsing',
|
|
1439
1448
|
'--disable-dev-shm-usage',
|
|
1440
1449
|
'--disable-sync',
|
|
1441
|
-
'--
|
|
1450
|
+
'--use-gl=swiftshader', // Software WebGL — prevents ad script crashes in headless
|
|
1442
1451
|
'--mute-audio',
|
|
1443
1452
|
'--disable-translate',
|
|
1444
1453
|
'--window-size=1920,1080',
|
|
@@ -1458,6 +1467,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1458
1467
|
'--disable-background-timer-throttling',
|
|
1459
1468
|
'--disable-features=site-per-process', // Better for single-site scanning
|
|
1460
1469
|
'--no-zygote', // Better process isolation
|
|
1470
|
+
...extraArgs,
|
|
1461
1471
|
],
|
|
1462
1472
|
// Optimized timeouts for Puppeteer 23.x performance
|
|
1463
1473
|
protocolTimeout: TIMEOUTS.PROTOCOL_TIMEOUT,
|
|
@@ -1492,7 +1502,9 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1492
1502
|
|
|
1493
1503
|
// Log which headless mode is being used
|
|
1494
1504
|
if (forceDebug && launchHeadless) {
|
|
1495
|
-
|
|
1505
|
+
const needsStealth = sites.some(site => site.fingerprint_protection);
|
|
1506
|
+
const modeLabel = needsStealth ? 'headless=new (stealth mode)' : 'chrome-headless-shell (performance mode)';
|
|
1507
|
+
console.log(formatLogMessage('debug', `Using ${modeLabel}`));
|
|
1496
1508
|
}
|
|
1497
1509
|
|
|
1498
1510
|
// Initial cleanup of any existing Chrome temp files - always comprehensive on startup
|
|
@@ -1650,6 +1662,11 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1650
1662
|
let cdpSessionManager = null;
|
|
1651
1663
|
// Use Map to track domains and their resource types for --adblock-rules or --dry-run
|
|
1652
1664
|
const matchedDomains = (adblockRulesMode || siteConfig.adblock_rules || dryRunMode) ? new Map() : new Set();
|
|
1665
|
+
|
|
1666
|
+
// Local domain dedup scoped to THIS processUrl call only
|
|
1667
|
+
// Prevents cross-config contamination from the global domain cache
|
|
1668
|
+
const localDetectedDomains = new Set();
|
|
1669
|
+
const isLocallyDetected = (domain) => localDetectedDomains.has(domain);
|
|
1653
1670
|
|
|
1654
1671
|
// Initialize dry run matches collection
|
|
1655
1672
|
if (dryRunMode) {
|
|
@@ -2130,6 +2147,11 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2130
2147
|
}
|
|
2131
2148
|
}
|
|
2132
2149
|
|
|
2150
|
+
// --- Apply proxy authentication if configured ---
|
|
2151
|
+
if (needsProxy(siteConfig)) {
|
|
2152
|
+
await applyProxyAuth(page, siteConfig, forceDebug);
|
|
2153
|
+
}
|
|
2154
|
+
|
|
2133
2155
|
// --- Apply all fingerprint spoofing (user agent, Brave, fingerprint protection) ---
|
|
2134
2156
|
try {
|
|
2135
2157
|
await applyAllFingerprintSpoofing(page, siteConfig, forceDebug, currentUrl);
|
|
@@ -2423,6 +2445,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2423
2445
|
|
|
2424
2446
|
// Mark full subdomain as detected for future reference
|
|
2425
2447
|
markDomainAsDetected(cacheKey);
|
|
2448
|
+
localDetectedDomains.add(cacheKey);
|
|
2426
2449
|
|
|
2427
2450
|
// Also mark in smart cache with context (if cache is enabled)
|
|
2428
2451
|
if (smartCache) {
|
|
@@ -2492,7 +2515,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2492
2515
|
if (forceDebug) {
|
|
2493
2516
|
console.log(formatLogMessage('debug', `Blocking potential infinite iframe loop: ${checkedUrl}`));
|
|
2494
2517
|
}
|
|
2495
|
-
request.abort();
|
|
2518
|
+
request.abort('blockedbyclient');
|
|
2496
2519
|
return;
|
|
2497
2520
|
}
|
|
2498
2521
|
|
|
@@ -2527,7 +2550,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2527
2550
|
if (forceDebug) {
|
|
2528
2551
|
console.log(formatLogMessage('debug', `${messageColors.blocked('[adblock]')} ${checkedUrl} (${result.reason})`));
|
|
2529
2552
|
}
|
|
2530
|
-
request.abort();
|
|
2553
|
+
request.abort('blockedbyclient');
|
|
2531
2554
|
return;
|
|
2532
2555
|
}
|
|
2533
2556
|
adblockStats.allowed++;
|
|
@@ -2607,7 +2630,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2607
2630
|
}
|
|
2608
2631
|
}
|
|
2609
2632
|
|
|
2610
|
-
request.abort();
|
|
2633
|
+
request.abort('blockedbyclient');
|
|
2611
2634
|
return;
|
|
2612
2635
|
}
|
|
2613
2636
|
|
|
@@ -2717,7 +2740,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2717
2740
|
dryRunCallback: dryRunMode ? createEnhancedDryRunCallback(matchedDomains, forceDebug) : null,
|
|
2718
2741
|
matchedDomains,
|
|
2719
2742
|
addMatchedDomain,
|
|
2720
|
-
isDomainAlreadyDetected,
|
|
2743
|
+
isDomainAlreadyDetected: isLocallyDetected,
|
|
2721
2744
|
onWhoisResult: smartCache ? (domain, result) => smartCache.cacheNetTools(domain, 'whois', result) : undefined,
|
|
2722
2745
|
onDigResult: smartCache ? (domain, result, recordType) => smartCache.cacheNetTools(domain, 'dig', result, recordType) : undefined,
|
|
2723
2746
|
cachedWhois: smartCache ? smartCache.getCachedNetTools(reqDomain, 'whois') : null,
|
|
@@ -2766,8 +2789,8 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2766
2789
|
}
|
|
2767
2790
|
} else if (hasNetTools && !hasSearchString && !hasSearchStringAnd) {
|
|
2768
2791
|
// If nettools are configured (whois/dig), perform checks on the domain
|
|
2769
|
-
// Skip nettools check if full subdomain was already detected
|
|
2770
|
-
if (
|
|
2792
|
+
// Skip nettools check if full subdomain was already detected in THIS scan
|
|
2793
|
+
if (localDetectedDomains.has(fullSubdomain)) {
|
|
2771
2794
|
if (forceDebug) {
|
|
2772
2795
|
console.log(formatLogMessage('debug', `Skipping nettools check for already detected subdomain: ${fullSubdomain}`));
|
|
2773
2796
|
}
|
|
@@ -2826,7 +2849,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2826
2849
|
dryRunCallback: dryRunMode ? createEnhancedDryRunCallback(matchedDomains, forceDebug) : null,
|
|
2827
2850
|
matchedDomains,
|
|
2828
2851
|
addMatchedDomain,
|
|
2829
|
-
isDomainAlreadyDetected,
|
|
2852
|
+
isDomainAlreadyDetected: isLocallyDetected,
|
|
2830
2853
|
// Add cache callbacks if smart cache is available and caching is enabled
|
|
2831
2854
|
onWhoisResult: smartCache ? (domain, result) => {
|
|
2832
2855
|
smartCache.cacheNetTools(domain, 'whois', result);
|
|
@@ -2856,8 +2879,8 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2856
2879
|
}
|
|
2857
2880
|
} else {
|
|
2858
2881
|
// If searchstring or searchstring_and IS defined (with or without nettools), queue for content checking
|
|
2859
|
-
// Skip searchstring check if full subdomain was already detected
|
|
2860
|
-
if (
|
|
2882
|
+
// Skip searchstring check if full subdomain was already detected in THIS scan
|
|
2883
|
+
if (localDetectedDomains.has(fullSubdomain)) {
|
|
2861
2884
|
if (forceDebug) {
|
|
2862
2885
|
console.log(formatLogMessage('debug', `Skipping searchstring check for already detected subdomain: ${fullSubdomain}`));
|
|
2863
2886
|
}
|
|
@@ -2913,7 +2936,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2913
2936
|
searchStringsAnd,
|
|
2914
2937
|
matchedDomains,
|
|
2915
2938
|
addMatchedDomain, // Pass the helper function
|
|
2916
|
-
isDomainAlreadyDetected,
|
|
2939
|
+
isDomainAlreadyDetected: isLocallyDetected,
|
|
2917
2940
|
onContentFetched: smartCache && !ignoreCache ? (url, content) => {
|
|
2918
2941
|
// Only cache if not bypassing cache
|
|
2919
2942
|
if (!shouldBypassCacheForUrl(url, siteConfig)) {
|
|
@@ -2949,7 +2972,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2949
2972
|
regexes,
|
|
2950
2973
|
matchedDomains,
|
|
2951
2974
|
addMatchedDomain,
|
|
2952
|
-
isDomainAlreadyDetected,
|
|
2975
|
+
isDomainAlreadyDetected: isLocallyDetected,
|
|
2953
2976
|
onContentFetched: smartCache && !ignoreCache ? (url, content) => {
|
|
2954
2977
|
// Only cache if not bypassing cache
|
|
2955
2978
|
if (!shouldBypassCacheForUrl(url, siteConfig)) {
|
|
@@ -3020,7 +3043,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3020
3043
|
matchedDomains,
|
|
3021
3044
|
addMatchedDomain, // Pass the helper function
|
|
3022
3045
|
bypassCache: (url) => shouldBypassCacheForUrl(url, siteConfig),
|
|
3023
|
-
isDomainAlreadyDetected,
|
|
3046
|
+
isDomainAlreadyDetected: isLocallyDetected,
|
|
3024
3047
|
onContentFetched: smartCache && !ignoreCache ? (url, content) => {
|
|
3025
3048
|
// Only cache if not bypassing cache
|
|
3026
3049
|
if (!shouldBypassCacheForUrl(url, siteConfig)) {
|
|
@@ -3337,6 +3360,25 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3337
3360
|
siteCounter++;
|
|
3338
3361
|
// Continue processing with the redirected URL instead of throwing error
|
|
3339
3362
|
} else {
|
|
3363
|
+
// Detect proxy-specific failures and provide clear diagnostics
|
|
3364
|
+
if (needsProxy(siteConfig) && err.message) {
|
|
3365
|
+
const proxyErrors = [
|
|
3366
|
+
'ERR_PROXY_CONNECTION_FAILED',
|
|
3367
|
+
'ERR_SOCKS_CONNECTION_FAILED',
|
|
3368
|
+
'ERR_TUNNEL_CONNECTION_FAILED',
|
|
3369
|
+
'ERR_PROXY_AUTH_UNSUPPORTED',
|
|
3370
|
+
'ERR_PROXY_AUTH_REQUESTED',
|
|
3371
|
+
'ERR_SOCKS_CONNECTION_HOST_UNREACHABLE',
|
|
3372
|
+
'ERR_PROXY_CERTIFICATE_INVALID',
|
|
3373
|
+
'ERR_NO_SUPPORTED_PROXIES'
|
|
3374
|
+
];
|
|
3375
|
+
const proxyErr = proxyErrors.find(e => err.message.includes(e));
|
|
3376
|
+
if (proxyErr) {
|
|
3377
|
+
const info = getProxyInfo(siteConfig);
|
|
3378
|
+
console.error(formatLogMessage('error', `[proxy] ${proxyErr} — proxy: ${info} — URL: ${currentUrl}`));
|
|
3379
|
+
console.error(formatLogMessage('error', `[proxy] Check: is the proxy running? Are credentials correct? Is the target reachable from the proxy?`));
|
|
3380
|
+
}
|
|
3381
|
+
}
|
|
3340
3382
|
console.error(formatLogMessage('error', `Failed on ${currentUrl}: ${err.message}`));
|
|
3341
3383
|
throw err;
|
|
3342
3384
|
}
|
|
@@ -3348,8 +3390,16 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3348
3390
|
|
|
3349
3391
|
// Mark page as processing during interactions
|
|
3350
3392
|
updatePageUsage(page, true);
|
|
3351
|
-
// Use enhanced interaction module
|
|
3352
|
-
|
|
3393
|
+
// Use enhanced interaction module with hard abort timeout
|
|
3394
|
+
const INTERACTION_HARD_TIMEOUT = 15000;
|
|
3395
|
+
try {
|
|
3396
|
+
await Promise.race([
|
|
3397
|
+
performPageInteraction(page, currentUrl, interactionConfig, forceDebug),
|
|
3398
|
+
new Promise((_, reject) => setTimeout(() => reject(new Error('interaction hard timeout')), INTERACTION_HARD_TIMEOUT))
|
|
3399
|
+
]);
|
|
3400
|
+
} catch (interactTimeoutErr) {
|
|
3401
|
+
if (forceDebug) console.log(formatLogMessage('debug', `[interaction] Aborted after ${INTERACTION_HARD_TIMEOUT}ms: ${interactTimeoutErr.message}`));
|
|
3402
|
+
}
|
|
3353
3403
|
}
|
|
3354
3404
|
|
|
3355
3405
|
const delayMs = DEFAULT_DELAY;
|
|
@@ -3662,6 +3712,26 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3662
3712
|
}
|
|
3663
3713
|
|
|
3664
3714
|
} catch (err) {
|
|
3715
|
+
// Detect proxy-specific failures at top level
|
|
3716
|
+
if (needsProxy(siteConfig) && err.message) {
|
|
3717
|
+
const proxyErrors = [
|
|
3718
|
+
'ERR_PROXY_CONNECTION_FAILED',
|
|
3719
|
+
'ERR_SOCKS_CONNECTION_FAILED',
|
|
3720
|
+
'ERR_TUNNEL_CONNECTION_FAILED',
|
|
3721
|
+
'ERR_PROXY_AUTH_UNSUPPORTED',
|
|
3722
|
+
'ERR_PROXY_AUTH_REQUESTED',
|
|
3723
|
+
'ERR_SOCKS_CONNECTION_HOST_UNREACHABLE',
|
|
3724
|
+
'ERR_PROXY_CERTIFICATE_INVALID',
|
|
3725
|
+
'ERR_NO_SUPPORTED_PROXIES'
|
|
3726
|
+
];
|
|
3727
|
+
const proxyErr = proxyErrors.find(e => err.message.includes(e));
|
|
3728
|
+
if (proxyErr) {
|
|
3729
|
+
const info = getProxyInfo(siteConfig);
|
|
3730
|
+
console.error(formatLogMessage('error', `[proxy] ${proxyErr} — proxy: ${info} — URL: ${currentUrl}`));
|
|
3731
|
+
console.error(formatLogMessage('error', `[proxy] Check: is the proxy running? Are credentials correct? Is the target reachable from the proxy?`));
|
|
3732
|
+
}
|
|
3733
|
+
}
|
|
3734
|
+
|
|
3665
3735
|
// Only restart for truly fatal browser errors
|
|
3666
3736
|
const isFatalError = CRITICAL_BROWSER_ERRORS.some(errorType =>
|
|
3667
3737
|
err.message.includes(errorType)
|
|
@@ -3789,6 +3859,14 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3789
3859
|
}
|
|
3790
3860
|
}
|
|
3791
3861
|
|
|
3862
|
+
// Helper to get a stable proxy key for grouping browser instances
|
|
3863
|
+
const proxyKeyFor = (siteConfig) => {
|
|
3864
|
+
if (!needsProxy(siteConfig)) return '';
|
|
3865
|
+
return getProxyInfo(siteConfig);
|
|
3866
|
+
};
|
|
3867
|
+
|
|
3868
|
+
// Sort tasks so proxy groups are contiguous — direct connections first, then each proxy
|
|
3869
|
+
allTasks.sort((a, b) => proxyKeyFor(a.config).localeCompare(proxyKeyFor(b.config)));
|
|
3792
3870
|
|
|
3793
3871
|
let results = [];
|
|
3794
3872
|
let processedUrlCount = 0;
|
|
@@ -3832,6 +3910,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3832
3910
|
|
|
3833
3911
|
// Process URLs in batches with exception handling
|
|
3834
3912
|
let siteGroupIndex = 0;
|
|
3913
|
+
let currentProxyKey = ''; // Track active proxy config — '' means direct connection
|
|
3835
3914
|
try {
|
|
3836
3915
|
for (let batchStart = 0; batchStart < totalUrls; batchStart += RESOURCE_CLEANUP_INTERVAL) {
|
|
3837
3916
|
const batchEnd = Math.min(batchStart + RESOURCE_CLEANUP_INTERVAL, totalUrls);
|
|
@@ -3952,14 +4031,67 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3952
4031
|
if (forceDebug) console.log(formatLogMessage('debug', `Browser cleanup warning: ${browserCloseErr.message}`));
|
|
3953
4032
|
}
|
|
3954
4033
|
|
|
3955
|
-
// Create new browser for next batch
|
|
3956
|
-
|
|
4034
|
+
// Create new browser for next batch (preserve current proxy config)
|
|
4035
|
+
const restartProxyArgs = currentProxyKey ? getProxyArgs(currentBatch[0].config, forceDebug) : [];
|
|
4036
|
+
browser = await createBrowser(restartProxyArgs);
|
|
3957
4037
|
if (forceDebug) console.log(formatLogMessage('debug', `New browser instance created for batch ${Math.floor(batchStart / RESOURCE_CLEANUP_INTERVAL) + 1}`));
|
|
3958
4038
|
|
|
3959
4039
|
// Reset cleanup counter and add delay
|
|
3960
4040
|
urlsSinceLastCleanup = 0;
|
|
3961
4041
|
await fastTimeout(TIMEOUTS.BROWSER_STABILIZE_DELAY);
|
|
3962
4042
|
}
|
|
4043
|
+
|
|
4044
|
+
// --- Proxy-aware browser restart ---
|
|
4045
|
+
// --proxy-server is browser-wide, so if the batch needs a different proxy we must restart
|
|
4046
|
+
const batchProxyKey = proxyKeyFor(currentBatch[0].config);
|
|
4047
|
+
if (batchProxyKey !== currentProxyKey) {
|
|
4048
|
+
const debug = forceDebug || currentBatch[0].config.proxy_debug || currentBatch[0].config.socks5_debug;
|
|
4049
|
+
if (debug) {
|
|
4050
|
+
const from = currentProxyKey || 'direct';
|
|
4051
|
+
const to = batchProxyKey || 'direct';
|
|
4052
|
+
console.log(formatLogMessage('proxy', `Switching proxy: ${from} → ${to}`));
|
|
4053
|
+
}
|
|
4054
|
+
|
|
4055
|
+
try {
|
|
4056
|
+
await handleBrowserExit(browser, {
|
|
4057
|
+
forceDebug, timeout: 10000, exitOnFailure: false,
|
|
4058
|
+
cleanTempFiles: true, comprehensiveCleanup: removeTempFiles
|
|
4059
|
+
});
|
|
4060
|
+
if (userDataDir && fs.existsSync(userDataDir)) {
|
|
4061
|
+
fs.rmSync(userDataDir, { recursive: true, force: true });
|
|
4062
|
+
}
|
|
4063
|
+
} catch (proxyRestartErr) {
|
|
4064
|
+
if (forceDebug) console.log(formatLogMessage('debug', `Proxy switch browser cleanup: ${proxyRestartErr.message}`));
|
|
4065
|
+
}
|
|
4066
|
+
|
|
4067
|
+
const proxyArgs = batchProxyKey ? getProxyArgs(currentBatch[0].config, forceDebug) : [];
|
|
4068
|
+
|
|
4069
|
+
// Pre-flight: verify proxy is reachable before launching browser
|
|
4070
|
+
if (proxyArgs.length > 0) {
|
|
4071
|
+
const health = await testProxy(currentBatch[0].config, 5000);
|
|
4072
|
+
if (!health.reachable) {
|
|
4073
|
+
const info = getProxyInfo(currentBatch[0].config);
|
|
4074
|
+
console.error(formatLogMessage('error', `[proxy] Unreachable: ${info} — ${health.error}`));
|
|
4075
|
+
console.error(formatLogMessage('error', `[proxy] Skipping ${currentBatch.length} URL(s) in this batch`));
|
|
4076
|
+
const skipResults = currentBatch.map(task => ({
|
|
4077
|
+
success: false, url: task.url, rules: [],
|
|
4078
|
+
error: `Proxy unreachable: ${health.error}`
|
|
4079
|
+
}));
|
|
4080
|
+
results.push(...skipResults);
|
|
4081
|
+
processedUrlCount += currentBatch.length;
|
|
4082
|
+
urlsSinceLastCleanup += currentBatch.length;
|
|
4083
|
+
continue;
|
|
4084
|
+
}
|
|
4085
|
+
if (forceDebug) {
|
|
4086
|
+
console.log(formatLogMessage('proxy', `Proxy reachable (${health.latencyMs}ms)`));
|
|
4087
|
+
}
|
|
4088
|
+
}
|
|
4089
|
+
|
|
4090
|
+
browser = await createBrowser(proxyArgs);
|
|
4091
|
+
currentProxyKey = batchProxyKey;
|
|
4092
|
+
urlsSinceLastCleanup = 0;
|
|
4093
|
+
await fastTimeout(TIMEOUTS.BROWSER_STABILIZE_DELAY);
|
|
4094
|
+
}
|
|
3963
4095
|
|
|
3964
4096
|
if (forceDebug) {
|
|
3965
4097
|
console.log(formatLogMessage('debug', `Processing batch ${Math.floor(batchStart / RESOURCE_CLEANUP_INTERVAL) + 1}: ${batchSize} URL(s) (total processed: ${processedUrlCount})`));
|
|
@@ -3986,7 +4118,8 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3986
4118
|
console.log(formatLogMessage('error', `[TIMEOUT] Batch hung. Restarting browser.`));
|
|
3987
4119
|
try {
|
|
3988
4120
|
await handleBrowserExit(browser, { forceDebug, timeout: 5000, exitOnFailure: false });
|
|
3989
|
-
|
|
4121
|
+
const timeoutProxyArgs = currentProxyKey ? getProxyArgs(currentBatch[0].config, forceDebug) : [];
|
|
4122
|
+
browser = await createBrowser(timeoutProxyArgs);
|
|
3990
4123
|
urlsSinceLastCleanup = 0;
|
|
3991
4124
|
} catch (restartErr) {
|
|
3992
4125
|
throw restartErr;
|
|
@@ -4104,7 +4237,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
4104
4237
|
comprehensive: true
|
|
4105
4238
|
});
|
|
4106
4239
|
}
|
|
4107
|
-
browser = await createBrowser();
|
|
4240
|
+
browser = await createBrowser(currentProxyKey ? getProxyArgs(currentBatch[0].config, forceDebug) : []);
|
|
4108
4241
|
urlsSinceLastCleanup = 0; // Reset counter
|
|
4109
4242
|
await fastTimeout(TIMEOUTS.EMERGENCY_RESTART_DELAY); // Give browser time to stabilize
|
|
4110
4243
|
} catch (emergencyRestartErr) {
|
|
@@ -4116,7 +4249,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
4116
4249
|
console.log(`\n${messageColors.fileOp('🔄 Emergency hang detection restart:')} Browser appears hung, forcing restart`);
|
|
4117
4250
|
try {
|
|
4118
4251
|
await handleBrowserExit(browser, { forceDebug, timeout: 5000, exitOnFailure: false, cleanTempFiles: true });
|
|
4119
|
-
browser = await createBrowser();
|
|
4252
|
+
browser = await createBrowser(currentProxyKey ? getProxyArgs(currentBatch[0].config, forceDebug) : []);
|
|
4120
4253
|
urlsSinceLastCleanup = 0;
|
|
4121
4254
|
forceRestartFlag = false; // Reset flag
|
|
4122
4255
|
await fastTimeout(TIMEOUTS.EMERGENCY_RESTART_DELAY);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fanboynz/network-scanner",
|
|
3
|
-
"version": "2.0.
|
|
3
|
+
"version": "2.0.52",
|
|
4
4
|
"description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
|
|
5
5
|
"main": "nwss.js",
|
|
6
6
|
"scripts": {
|
|
@@ -48,7 +48,7 @@
|
|
|
48
48
|
},
|
|
49
49
|
"homepage": "https://github.com/ryanbr/network-scanner",
|
|
50
50
|
"devDependencies": {
|
|
51
|
-
"eslint": "^
|
|
51
|
+
"eslint": "^10.0.2",
|
|
52
52
|
"globals": "^16.3.0"
|
|
53
53
|
}
|
|
54
54
|
}
|