@fanboynz/network-scanner 2.0.50 → 2.0.51
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +81 -0
- package/lib/cloudflare.js +217 -176
- package/lib/proxy.js +279 -0
- package/nwss.js +116 -7
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -17,6 +17,7 @@ A Puppeteer-based tool for scanning websites to find third-party (or optionally
|
|
|
17
17
|
- Subdomain handling (collapse to root or full subdomain)
|
|
18
18
|
- Optionally match only first-party, third-party, or both
|
|
19
19
|
- Enhanced redirect handling with JavaScript and meta refresh detection
|
|
20
|
+
- Per-site proxy routing (SOCKS5, SOCKS4, HTTP, HTTPS) with pre-flight health checks
|
|
20
21
|
|
|
21
22
|
---
|
|
22
23
|
|
|
@@ -315,6 +316,86 @@ Route traffic through a VPN for specific sites. Requires `sudo` privileges. The
|
|
|
315
316
|
|
|
316
317
|
> **Authentication:** If the `.ovpn` file already contains credentials (via `auth-user-pass /path/to/file` or an inline `<auth-user-pass>` block), no additional config is needed — just provide the config path. The `username`/`password` fields are only needed when the `.ovpn` file has a bare `auth-user-pass` directive that expects interactive input.
|
|
317
318
|
|
|
319
|
+
### Proxy Options
|
|
320
|
+
|
|
321
|
+
Route traffic through a proxy for specific sites. Supports SOCKS5, SOCKS4, HTTP, and HTTPS proxies. Unlike VPN, proxy routing is per-site-group — only URLs in the same config block use the proxy; other sites connect directly.
|
|
322
|
+
|
|
323
|
+
> **Note:** Chromium's `--proxy-server` flag is browser-wide. Sites requiring different proxies (or direct vs proxied) are automatically separated into different browser instances. Tasks are sorted so proxy groups are contiguous to minimise restarts.
|
|
324
|
+
|
|
325
|
+
| Field | Values | Default | Description |
|
|
326
|
+
|:---------------------|:-------|:-------:|:------------|
|
|
327
|
+
| `proxy` | String | - | Proxy URL: `socks5://host:port`, `http://host:port`, `https://host:port`, or `http://user:pass@host:port` |
|
|
328
|
+
| `proxy_bypass` | Array | `[]` | Domains that skip the proxy (e.g. `["localhost", "127.0.0.1", "*.local"]`) |
|
|
329
|
+
| `proxy_remote_dns` | Boolean | `true` | Resolve DNS through the proxy (SOCKS only — prevents DNS leaks) |
|
|
330
|
+
| `proxy_debug` | Boolean | `false` | Print proxy diagnostics: launch args, auth, health checks, error codes |
|
|
331
|
+
|
|
332
|
+
Legacy aliases (`socks5_proxy`, `socks5_bypass`, `socks5_remote_dns`, `socks5_debug`) are supported for backwards compatibility.
|
|
333
|
+
|
|
334
|
+
#### Proxy Examples
|
|
335
|
+
|
|
336
|
+
**SOCKS5 — no auth:**
|
|
337
|
+
```json
|
|
338
|
+
{
|
|
339
|
+
"url": ["https://blocked-site.com/", "https://another-blocked.com/"],
|
|
340
|
+
"proxy": "socks5://127.0.0.1:1080",
|
|
341
|
+
"search_string": ["tracking.js"]
|
|
342
|
+
}
|
|
343
|
+
```
|
|
344
|
+
|
|
345
|
+
**HTTP proxy with credentials:**
|
|
346
|
+
```json
|
|
347
|
+
{
|
|
348
|
+
"url": ["https://geo-restricted.com/"],
|
|
349
|
+
"proxy": "http://user:pass@proxy.corp.com:3128",
|
|
350
|
+
"search_string": ["analytics"]
|
|
351
|
+
}
|
|
352
|
+
```
|
|
353
|
+
|
|
354
|
+
**SOCKS5 with bypass list and debug:**
|
|
355
|
+
```json
|
|
356
|
+
{
|
|
357
|
+
"url": ["https://target-site.com/"],
|
|
358
|
+
"proxy": "socks5://user:pass@proxy.example.com:9050",
|
|
359
|
+
"proxy_bypass": ["localhost", "127.0.0.1", "*.internal.corp"],
|
|
360
|
+
"proxy_remote_dns": true,
|
|
361
|
+
"proxy_debug": true,
|
|
362
|
+
"search_string": ["tracker"]
|
|
363
|
+
}
|
|
364
|
+
```
|
|
365
|
+
|
|
366
|
+
**Mixed direct + proxied in one config:**
|
|
367
|
+
```json
|
|
368
|
+
[
|
|
369
|
+
{
|
|
370
|
+
"url": ["https://direct-site.com/"],
|
|
371
|
+
"search_string": ["ads"]
|
|
372
|
+
},
|
|
373
|
+
{
|
|
374
|
+
"url": ["https://blocked-site.com/"],
|
|
375
|
+
"proxy": "socks5://127.0.0.1:1080",
|
|
376
|
+
"search_string": ["ads"]
|
|
377
|
+
}
|
|
378
|
+
]
|
|
379
|
+
```
|
|
380
|
+
|
|
381
|
+
#### Proxy Error Handling
|
|
382
|
+
|
|
383
|
+
If a proxy is unreachable, the batch is skipped with a clear error before any navigation is attempted:
|
|
384
|
+
|
|
385
|
+
```
|
|
386
|
+
[error] [proxy] Unreachable: socks5://127.0.0.1:1080 — Connection refused
|
|
387
|
+
[error] [proxy] Skipping 5 URL(s) in this batch
|
|
388
|
+
```
|
|
389
|
+
|
|
390
|
+
If a proxy fails mid-scan, Chromium's error code is detected and diagnosed:
|
|
391
|
+
|
|
392
|
+
```
|
|
393
|
+
[error] [proxy] ERR_SOCKS_CONNECTION_FAILED — proxy: socks5://127.0.0.1:1080 — URL: https://example.com/
|
|
394
|
+
[error] [proxy] Check: is the proxy running? Are credentials correct? Is the target reachable from the proxy?
|
|
395
|
+
```
|
|
396
|
+
|
|
397
|
+
Detected error codes: `ERR_PROXY_CONNECTION_FAILED`, `ERR_SOCKS_CONNECTION_FAILED`, `ERR_TUNNEL_CONNECTION_FAILED`, `ERR_PROXY_AUTH_UNSUPPORTED`, `ERR_PROXY_AUTH_REQUESTED`, `ERR_SOCKS_CONNECTION_HOST_UNREACHABLE`, `ERR_PROXY_CERTIFICATE_INVALID`, `ERR_NO_SUPPORTED_PROXIES`.
|
|
398
|
+
|
|
318
399
|
### Global Configuration Options
|
|
319
400
|
|
|
320
401
|
These options go at the root level of your config.json:
|
package/lib/cloudflare.js
CHANGED
|
@@ -58,11 +58,76 @@ const FAST_TIMEOUTS = {
|
|
|
58
58
|
ELEMENT_INTERACTION_DELAY: 250, // Fast element interactions
|
|
59
59
|
SELECTOR_WAIT: 3000, // Fast selector waits
|
|
60
60
|
TURNSTILE_OPERATION: 6000, // Fast Turnstile operations
|
|
61
|
-
JS_CHALLENGE:
|
|
61
|
+
JS_CHALLENGE: 10000, // Fast JS challenge completion
|
|
62
62
|
CHALLENGE_SOLVING: 30000, // Fast overall challenge solving
|
|
63
63
|
CHALLENGE_COMPLETION: 8000 // Fast completion check
|
|
64
64
|
};
|
|
65
65
|
|
|
66
|
+
/**
|
|
67
|
+
* Finds and clicks an element inside shadow DOM trees via page.evaluate
|
|
68
|
+
* Returns {found, clicked, x, y} - coordinates allow fallback mouse.click
|
|
69
|
+
*/
|
|
70
|
+
async function clickInShadowDOM(context, selectors, forceDebug = false, waitMs = 1500) {
|
|
71
|
+
// Try Puppeteer's pierce/ selector first � handles CLOSED shadow roots via CDP
|
|
72
|
+
for (const selector of selectors) {
|
|
73
|
+
try {
|
|
74
|
+
// Wait for element to appear (handles delayed rendering)
|
|
75
|
+
const start = Date.now();
|
|
76
|
+
const element = await context.waitForSelector(`pierce/${selector}`, { timeout: waitMs });
|
|
77
|
+
if (element) {
|
|
78
|
+
const box = await element.boundingBox();
|
|
79
|
+
if (box && box.width > 0 && box.height > 0) {
|
|
80
|
+
if (forceDebug) console.log(formatLogMessage('cloudflare', `pierce/${selector} matched in ${Date.now() - start}ms � box: ${box.width}x${box.height} at (${box.x},${box.y})`));
|
|
81
|
+
await element.click();
|
|
82
|
+
await element.dispose();
|
|
83
|
+
return { found: true, clicked: true, selector, x: box.x + box.width / 2, y: box.y + box.height / 2 };
|
|
84
|
+
}
|
|
85
|
+
if (forceDebug) console.log(formatLogMessage('cloudflare', `pierce/${selector} found but not visible (0x0)`));
|
|
86
|
+
await element.dispose();
|
|
87
|
+
// Element found but not visible
|
|
88
|
+
return { found: true, clicked: false, selector, x: 0, y: 0 };
|
|
89
|
+
}
|
|
90
|
+
} catch (e) {
|
|
91
|
+
if (forceDebug) console.log(formatLogMessage('cloudflare', `pierce/${selector} timeout after ${waitMs}ms`));
|
|
92
|
+
continue;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Fallback: manual traversal for open shadow roots
|
|
97
|
+
const result = await context.evaluate((sels) => {
|
|
98
|
+
function deepQuery(root, selector) {
|
|
99
|
+
// Try direct query first
|
|
100
|
+
const el = root.querySelector(selector);
|
|
101
|
+
if (el) return el;
|
|
102
|
+
|
|
103
|
+
// Traverse shadow roots
|
|
104
|
+
const allElements = root.querySelectorAll('*');
|
|
105
|
+
for (const node of allElements) {
|
|
106
|
+
if (node.shadowRoot) {
|
|
107
|
+
const found = deepQuery(node.shadowRoot, selector);
|
|
108
|
+
if (found) return found;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
return null;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
for (const selector of sels) {
|
|
115
|
+
const el = deepQuery(document, selector);
|
|
116
|
+
if (el) {
|
|
117
|
+
const rect = el.getBoundingClientRect();
|
|
118
|
+
if (rect.width > 0 && rect.height > 0) {
|
|
119
|
+
el.click();
|
|
120
|
+
return { found: true, clicked: true, selector, x: rect.x + rect.width / 2, y: rect.y + rect.height / 2 };
|
|
121
|
+
}
|
|
122
|
+
return { found: true, clicked: false, selector, x: 0, y: 0 };
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
return { found: false, clicked: false, selector: null, x: 0, y: 0 };
|
|
126
|
+
}, selectors);
|
|
127
|
+
|
|
128
|
+
return result;
|
|
129
|
+
}
|
|
130
|
+
|
|
66
131
|
/**
|
|
67
132
|
* Error categories for better handling
|
|
68
133
|
*/
|
|
@@ -306,12 +371,12 @@ function categorizeError(error) {
|
|
|
306
371
|
/**
|
|
307
372
|
* Implements exponential backoff delay
|
|
308
373
|
*/
|
|
309
|
-
|
|
374
|
+
function getRetryDelay(attempt) {
|
|
310
375
|
const delay = Math.min(
|
|
311
376
|
RETRY_CONFIG.baseDelay * Math.pow(RETRY_CONFIG.backoffMultiplier, attempt - 1),
|
|
312
377
|
RETRY_CONFIG.maxDelay
|
|
313
378
|
);
|
|
314
|
-
return
|
|
379
|
+
return delay;
|
|
315
380
|
}
|
|
316
381
|
|
|
317
382
|
/**
|
|
@@ -341,32 +406,6 @@ async function safePageEvaluate(page, func, timeout = TIMEOUTS.PAGE_EVALUATION_S
|
|
|
341
406
|
throw new Error('Page URL access failed - likely detached');
|
|
342
407
|
}
|
|
343
408
|
|
|
344
|
-
// Quick execution context validation with timeout
|
|
345
|
-
const contextValid = await Promise.race([
|
|
346
|
-
page.evaluate(() => {
|
|
347
|
-
try {
|
|
348
|
-
// Quick context validation
|
|
349
|
-
if (typeof window === 'undefined' || !document) {
|
|
350
|
-
return false;
|
|
351
|
-
}
|
|
352
|
-
// Check if document is ready for interaction
|
|
353
|
-
if (document.readyState === 'uninitialized') {
|
|
354
|
-
return false;
|
|
355
|
-
}
|
|
356
|
-
return true;
|
|
357
|
-
} catch (e) {
|
|
358
|
-
return false;
|
|
359
|
-
}
|
|
360
|
-
}),
|
|
361
|
-
new Promise((_, reject) => {
|
|
362
|
-
setTimeout(() => reject(new Error('Context validation timeout')), 3500);
|
|
363
|
-
})
|
|
364
|
-
]).catch(() => false);
|
|
365
|
-
|
|
366
|
-
if (!contextValid) {
|
|
367
|
-
throw new Error('Page execution context is invalid');
|
|
368
|
-
}
|
|
369
|
-
|
|
370
409
|
const result = await Promise.race([
|
|
371
410
|
page.evaluate(func),
|
|
372
411
|
new Promise((_, reject) => {
|
|
@@ -418,7 +457,7 @@ async function safePageEvaluate(page, func, timeout = TIMEOUTS.PAGE_EVALUATION_S
|
|
|
418
457
|
}
|
|
419
458
|
|
|
420
459
|
// Wait before retrying with exponential backoff
|
|
421
|
-
await getRetryDelay(attempt);
|
|
460
|
+
await new Promise(resolve => setTimeout(resolve, getRetryDelay(attempt)));
|
|
422
461
|
}
|
|
423
462
|
}
|
|
424
463
|
|
|
@@ -440,15 +479,18 @@ async function safePageEvaluate(page, func, timeout = TIMEOUTS.PAGE_EVALUATION_S
|
|
|
440
479
|
* Safe element clicking with timeout protection
|
|
441
480
|
*/
|
|
442
481
|
async function safeClick(page, selector, timeout = TIMEOUTS.CLICK_TIMEOUT) {
|
|
482
|
+
let timeoutId;
|
|
443
483
|
try {
|
|
444
484
|
return await Promise.race([
|
|
445
485
|
page.click(selector, { timeout: timeout }),
|
|
446
486
|
new Promise((_, reject) => {
|
|
447
|
-
setTimeout(() => reject(new Error('Click timeout')), timeout + TIMEOUTS.CLICK_TIMEOUT_BUFFER);
|
|
487
|
+
timeoutId = setTimeout(() => reject(new Error('Click timeout')), timeout + TIMEOUTS.CLICK_TIMEOUT_BUFFER);
|
|
448
488
|
})
|
|
449
489
|
]);
|
|
450
490
|
} catch (error) {
|
|
451
491
|
throw new Error(`Click failed: ${error.message}`);
|
|
492
|
+
} finally {
|
|
493
|
+
if (timeoutId) clearTimeout(timeoutId);
|
|
452
494
|
}
|
|
453
495
|
}
|
|
454
496
|
|
|
@@ -456,16 +498,18 @@ async function safeClick(page, selector, timeout = TIMEOUTS.CLICK_TIMEOUT) {
|
|
|
456
498
|
* Safe navigation waiting with timeout protection
|
|
457
499
|
*/
|
|
458
500
|
async function safeWaitForNavigation(page, timeout = TIMEOUTS.NAVIGATION_TIMEOUT) {
|
|
501
|
+
let timeoutId;
|
|
459
502
|
try {
|
|
460
503
|
return await Promise.race([
|
|
461
504
|
page.waitForNavigation({ waitUntil: 'domcontentloaded', timeout: timeout }),
|
|
462
505
|
new Promise((_, reject) => {
|
|
463
|
-
setTimeout(() => reject(new Error('Navigation timeout')), timeout + TIMEOUTS.NAVIGATION_TIMEOUT_BUFFER);
|
|
506
|
+
timeoutId = setTimeout(() => reject(new Error('Navigation timeout')), timeout + TIMEOUTS.NAVIGATION_TIMEOUT_BUFFER);
|
|
464
507
|
})
|
|
465
508
|
]);
|
|
466
509
|
} catch (error) {
|
|
467
510
|
console.warn(formatLogMessage('cloudflare', `Navigation wait failed: ${error.message}`));
|
|
468
|
-
|
|
511
|
+
} finally {
|
|
512
|
+
if (timeoutId) clearTimeout(timeoutId);
|
|
469
513
|
}
|
|
470
514
|
}
|
|
471
515
|
|
|
@@ -563,7 +607,14 @@ async function quickCloudflareDetection(page, forceDebug = false) {
|
|
|
563
607
|
*/
|
|
564
608
|
async function analyzeCloudflareChallenge(page) {
|
|
565
609
|
try {
|
|
566
|
-
|
|
610
|
+
// CDP-level frame check � bypasses closed shadow roots
|
|
611
|
+
const frames = page.frames();
|
|
612
|
+
const hasChallengeFrame = frames.some(f => {
|
|
613
|
+
const url = f.url();
|
|
614
|
+
return url.includes('challenges.cloudflare.com') || url.includes('/cdn-cgi/challenge-platform/');
|
|
615
|
+
});
|
|
616
|
+
|
|
617
|
+
const result = await safePageEvaluate(page, () => {
|
|
567
618
|
const title = document.title || '';
|
|
568
619
|
const bodyText = document.body ? document.body.textContent : '';
|
|
569
620
|
|
|
@@ -635,6 +686,15 @@ async function analyzeCloudflareChallenge(page) {
|
|
|
635
686
|
bodySnippet: bodyText.substring(0, 200)
|
|
636
687
|
};
|
|
637
688
|
}, TIMEOUTS.PAGE_EVALUATION);
|
|
689
|
+
|
|
690
|
+
// Merge CDP frame detection � catches iframes behind closed shadow roots
|
|
691
|
+
if (hasChallengeFrame && !result.hasTurnstileIframe) {
|
|
692
|
+
result.hasTurnstileIframe = true;
|
|
693
|
+
result.isTurnstile = true;
|
|
694
|
+
result.isChallengePresent = true;
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
return result;
|
|
638
698
|
} catch (error) {
|
|
639
699
|
return {
|
|
640
700
|
isChallengePresent: false,
|
|
@@ -842,7 +902,7 @@ async function handleVerificationChallengeWithRetries(page, currentUrl, siteConf
|
|
|
842
902
|
|
|
843
903
|
// If this wasn't the last attempt, wait before retrying
|
|
844
904
|
if (attempt < retryConfig.maxAttempts) {
|
|
845
|
-
const delay =
|
|
905
|
+
const delay = getRetryDelay(attempt);
|
|
846
906
|
if (forceDebug) {
|
|
847
907
|
console.log(formatLogMessage('cloudflare', `Challenge attempt ${attempt} failed, retrying in ${delay}ms: ${result.error}`));
|
|
848
908
|
}
|
|
@@ -884,7 +944,7 @@ async function handleVerificationChallengeWithRetries(page, currentUrl, siteConf
|
|
|
884
944
|
|
|
885
945
|
// Wait before retrying with exponential backoff
|
|
886
946
|
if (attempt < retryConfig.maxAttempts) {
|
|
887
|
-
await getRetryDelay(attempt);
|
|
947
|
+
await new Promise(resolve => setTimeout(resolve, getRetryDelay(attempt)));
|
|
888
948
|
}
|
|
889
949
|
}
|
|
890
950
|
}
|
|
@@ -925,7 +985,7 @@ async function handlePhishingWarningWithRetries(page, currentUrl, siteConfig, fo
|
|
|
925
985
|
|
|
926
986
|
// If this wasn't the last attempt, wait before retrying
|
|
927
987
|
if (attempt < retryConfig.maxAttempts) {
|
|
928
|
-
const delay =
|
|
988
|
+
const delay = getRetryDelay(attempt);
|
|
929
989
|
if (forceDebug) {
|
|
930
990
|
console.log(formatLogMessage('cloudflare', `Phishing warning attempt ${attempt} failed, retrying in ${delay}ms: ${result.error}`));
|
|
931
991
|
}
|
|
@@ -955,7 +1015,7 @@ async function handlePhishingWarningWithRetries(page, currentUrl, siteConfig, fo
|
|
|
955
1015
|
|
|
956
1016
|
// Wait before retrying with exponential backoff
|
|
957
1017
|
if (attempt < retryConfig.maxAttempts) {
|
|
958
|
-
await getRetryDelay(attempt);
|
|
1018
|
+
await new Promise(resolve => setTimeout(resolve, getRetryDelay(attempt)));
|
|
959
1019
|
}
|
|
960
1020
|
}
|
|
961
1021
|
}
|
|
@@ -1026,6 +1086,23 @@ async function attemptChallengeSolve(page, currentUrl, challengeInfo, forceDebug
|
|
|
1026
1086
|
|
|
1027
1087
|
const jsResult = await waitForJSChallengeCompletion(page, forceDebug);
|
|
1028
1088
|
if (jsResult.success) {
|
|
1089
|
+
// Wait for redirect after challenge completion
|
|
1090
|
+
try {
|
|
1091
|
+
const startUrl = await page.url();
|
|
1092
|
+
await page.waitForFunction(
|
|
1093
|
+
(origUrl) => {
|
|
1094
|
+
const bodyText = document.body?.textContent || '';
|
|
1095
|
+
return document.title !== 'Just a moment...' ||
|
|
1096
|
+
window.location.href !== origUrl ||
|
|
1097
|
+
bodyText.includes('Verification successful');
|
|
1098
|
+
},
|
|
1099
|
+
{ timeout: 10000 },
|
|
1100
|
+
startUrl
|
|
1101
|
+
);
|
|
1102
|
+
if (forceDebug) console.log(formatLogMessage('cloudflare', `Challenge page cleared for ${currentUrl}`));
|
|
1103
|
+
} catch (_) {
|
|
1104
|
+
if (forceDebug) console.log(formatLogMessage('cloudflare', `Challenge page not cleared after 10s � continuing`));
|
|
1105
|
+
}
|
|
1029
1106
|
result.success = true;
|
|
1030
1107
|
result.method = 'js_challenge_wait';
|
|
1031
1108
|
if (forceDebug) console.log(formatLogMessage('cloudflare', `JS challenge completed successfully for ${currentUrl}`));
|
|
@@ -1034,6 +1111,8 @@ async function attemptChallengeSolve(page, currentUrl, challengeInfo, forceDebug
|
|
|
1034
1111
|
} catch (jsError) {
|
|
1035
1112
|
if (forceDebug) console.log(formatLogMessage('cloudflare', `JS challenge wait failed for ${currentUrl}: ${jsError.message}`));
|
|
1036
1113
|
}
|
|
1114
|
+
} else if (forceDebug) {
|
|
1115
|
+
console.log(formatLogMessage('cloudflare', `Skipping JS challenge method (not detected)`));
|
|
1037
1116
|
}
|
|
1038
1117
|
|
|
1039
1118
|
// Method 2: Handle Turnstile challenges (interactive)
|
|
@@ -1051,6 +1130,8 @@ async function attemptChallengeSolve(page, currentUrl, challengeInfo, forceDebug
|
|
|
1051
1130
|
} catch (turnstileError) {
|
|
1052
1131
|
if (forceDebug) console.log(formatLogMessage('cloudflare', `Turnstile method failed for ${currentUrl}: ${turnstileError.message}`));
|
|
1053
1132
|
}
|
|
1133
|
+
} else if (forceDebug) {
|
|
1134
|
+
console.log(formatLogMessage('cloudflare', `Skipping Turnstile method (not detected)`));
|
|
1054
1135
|
}
|
|
1055
1136
|
|
|
1056
1137
|
// Method 3: Legacy checkbox interaction (fallback)
|
|
@@ -1068,10 +1149,23 @@ async function attemptChallengeSolve(page, currentUrl, challengeInfo, forceDebug
|
|
|
1068
1149
|
} catch (legacyError) {
|
|
1069
1150
|
if (forceDebug) console.log(formatLogMessage('cloudflare', `Legacy checkbox method failed for ${currentUrl}: ${legacyError.message}`));
|
|
1070
1151
|
}
|
|
1152
|
+
} else if (forceDebug) {
|
|
1153
|
+
console.log(formatLogMessage('cloudflare', `Skipping legacy checkbox method (not detected)`));
|
|
1071
1154
|
}
|
|
1072
1155
|
|
|
1073
1156
|
if (!result.success) {
|
|
1074
1157
|
result.error = result.error || 'All challenge bypass methods failed';
|
|
1158
|
+
if (forceDebug) {
|
|
1159
|
+
try {
|
|
1160
|
+
const postState = await page.evaluate(() => ({
|
|
1161
|
+
title: document.title,
|
|
1162
|
+
url: window.location.href,
|
|
1163
|
+
body: (document.body?.textContent || '').substring(0, 300)
|
|
1164
|
+
}));
|
|
1165
|
+
console.log(formatLogMessage('cloudflare', `Post-attempt page state: title="${postState.title}" url=${postState.url}`));
|
|
1166
|
+
console.log(formatLogMessage('cloudflare', `Post-attempt body: ${postState.body}`));
|
|
1167
|
+
} catch (_) {}
|
|
1168
|
+
}
|
|
1075
1169
|
}
|
|
1076
1170
|
|
|
1077
1171
|
return result;
|
|
@@ -1089,88 +1183,57 @@ async function handleEmbeddedIframeChallenge(page, forceDebug = false) {
|
|
|
1089
1183
|
try {
|
|
1090
1184
|
if (forceDebug) console.log(formatLogMessage('cloudflare', `Checking for embedded iframe challenges`));
|
|
1091
1185
|
|
|
1092
|
-
//
|
|
1093
|
-
const
|
|
1094
|
-
|
|
1095
|
-
'
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
];
|
|
1099
|
-
|
|
1100
|
-
// Wait for iframe to appear
|
|
1101
|
-
let iframeFound = false;
|
|
1102
|
-
for (const selector of iframeSelectors) {
|
|
1103
|
-
try {
|
|
1104
|
-
await Promise.race([
|
|
1105
|
-
page.waitForSelector(selector, { timeout: FAST_TIMEOUTS.SELECTOR_WAIT }),
|
|
1106
|
-
new Promise((_, reject) => setTimeout(() => reject(new Error('Timeout')), FAST_TIMEOUTS.SELECTOR_WAIT + 1000))
|
|
1107
|
-
]);
|
|
1108
|
-
iframeFound = true;
|
|
1109
|
-
if (forceDebug) console.log(formatLogMessage('cloudflare', `Found iframe: ${selector}`));
|
|
1110
|
-
break;
|
|
1111
|
-
} catch (e) {
|
|
1112
|
-
continue;
|
|
1186
|
+
// Use CDP-level frame detection � bypasses closed shadow roots
|
|
1187
|
+
const frames = page.frames();
|
|
1188
|
+
if (forceDebug) {
|
|
1189
|
+
console.log(formatLogMessage('cloudflare', `Available frames (${frames.length}):`));
|
|
1190
|
+
for (const f of frames) {
|
|
1191
|
+
console.log(formatLogMessage('cloudflare', ` ${f.url()}`));
|
|
1113
1192
|
}
|
|
1114
1193
|
}
|
|
1115
|
-
|
|
1116
|
-
if (!iframeFound) {
|
|
1117
|
-
result.error = 'No embedded iframe found';
|
|
1118
|
-
return result;
|
|
1119
|
-
}
|
|
1120
|
-
|
|
1121
|
-
// Find challenge frame using existing frame detection logic
|
|
1122
|
-
const frames = await page.frames();
|
|
1123
1194
|
const challengeFrame = frames.find(frame => {
|
|
1124
1195
|
const frameUrl = frame.url();
|
|
1125
1196
|
return frameUrl.includes('challenges.cloudflare.com') ||
|
|
1197
|
+
frameUrl.includes('/cdn-cgi/challenge-platform/') ||
|
|
1126
1198
|
frameUrl.includes('/turnstile/if/') ||
|
|
1127
|
-
frameUrl.includes('captcha-delivery.com') ||
|
|
1128
|
-
frameUrl.includes('/challenge-platform/') ||
|
|
1129
1199
|
frameUrl.includes('turnstile');
|
|
1130
1200
|
});
|
|
1131
1201
|
|
|
1132
1202
|
if (!challengeFrame) {
|
|
1133
|
-
result.error = '
|
|
1203
|
+
result.error = 'No challenge frame found via CDP';
|
|
1134
1204
|
return result;
|
|
1135
1205
|
}
|
|
1136
1206
|
|
|
1137
1207
|
if (forceDebug) console.log(formatLogMessage('cloudflare', `Interacting with iframe: ${challengeFrame.url()}`));
|
|
1138
1208
|
|
|
1139
|
-
|
|
1140
|
-
|
|
1209
|
+
await waitForTimeout(page, 500);
|
|
1210
|
+
|
|
1211
|
+
let checkboxInteractionSuccess = false;
|
|
1212
|
+
try {
|
|
1213
|
+
const shadowResult = await clickInShadowDOM(challengeFrame, [
|
|
1141
1214
|
'input[type="checkbox"]',
|
|
1142
1215
|
'.ctp-checkbox',
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
try {
|
|
1151
|
-
await Promise.race([
|
|
1152
|
-
challengeFrame.waitForSelector(selector, { timeout: FAST_TIMEOUTS.SELECTOR_WAIT }),
|
|
1153
|
-
new Promise((_, reject) => setTimeout(() => reject(new Error('Timeout')), FAST_TIMEOUTS.SELECTOR_WAIT + 1000))
|
|
1154
|
-
]);
|
|
1155
|
-
|
|
1156
|
-
await waitForTimeout(page, FAST_TIMEOUTS.ELEMENT_INTERACTION_DELAY);
|
|
1157
|
-
await challengeFrame.click(selector);
|
|
1158
|
-
|
|
1159
|
-
if (forceDebug) console.log(formatLogMessage('cloudflare', `Clicked iframe element: ${selector}`));
|
|
1216
|
+
'.ctp-checkbox-label',
|
|
1217
|
+
'[role="checkbox"]',
|
|
1218
|
+
'label.cb-lb',
|
|
1219
|
+
'label'
|
|
1220
|
+
], forceDebug);
|
|
1221
|
+
|
|
1222
|
+
if (shadowResult.clicked) {
|
|
1160
1223
|
checkboxInteractionSuccess = true;
|
|
1161
|
-
|
|
1162
|
-
}
|
|
1163
|
-
|
|
1224
|
+
if (forceDebug) console.log(formatLogMessage('cloudflare', `Shadow DOM click succeeded: ${shadowResult.selector}`));
|
|
1225
|
+
} else if (shadowResult.found && shadowResult.x > 0) {
|
|
1226
|
+
await page.mouse.click(shadowResult.x, shadowResult.y);
|
|
1227
|
+
checkboxInteractionSuccess = true;
|
|
1228
|
+
if (forceDebug) console.log(formatLogMessage('cloudflare', `Shadow DOM mouse fallback at (${shadowResult.x}, ${shadowResult.y})`));
|
|
1164
1229
|
}
|
|
1230
|
+
} catch (shadowErr) {
|
|
1231
|
+
if (forceDebug) console.log(formatLogMessage('cloudflare', `Shadow DOM click failed: ${shadowErr.message}`));
|
|
1165
1232
|
}
|
|
1166
1233
|
|
|
1167
|
-
// Try alternative interaction only if standard selectors failed
|
|
1168
1234
|
if (!checkboxInteractionSuccess) {
|
|
1169
|
-
if (forceDebug) console.log(formatLogMessage('cloudflare', `Checkbox interactions failed, trying container fallback`));
|
|
1170
|
-
await waitForTimeout(page, 1000);
|
|
1171
1235
|
|
|
1172
1236
|
try {
|
|
1173
|
-
// Try clicking on the iframe container itself as fallback
|
|
1174
1237
|
const iframeElement = await page.$('iframe[src*="challenges.cloudflare.com"]');
|
|
1175
1238
|
if (iframeElement) {
|
|
1176
1239
|
await iframeElement.click();
|
|
@@ -1179,8 +1242,6 @@ async function handleEmbeddedIframeChallenge(page, forceDebug = false) {
|
|
|
1179
1242
|
} catch (containerClickError) {
|
|
1180
1243
|
if (forceDebug) console.log(formatLogMessage('cloudflare', `Container click failed: ${containerClickError.message}`));
|
|
1181
1244
|
}
|
|
1182
|
-
} else {
|
|
1183
|
-
if (forceDebug) console.log(formatLogMessage('cloudflare', `Checkbox interaction successful, skipping container fallback`));
|
|
1184
1245
|
}
|
|
1185
1246
|
|
|
1186
1247
|
// Reuse existing completion check pattern with error handling
|
|
@@ -1237,8 +1298,10 @@ async function waitForJSChallengeCompletion(page, forceDebug = false) {
|
|
|
1237
1298
|
await Promise.race([
|
|
1238
1299
|
page.waitForFunction(
|
|
1239
1300
|
() => {
|
|
1240
|
-
|
|
1241
|
-
|
|
1301
|
+
const bodyText = document.body.textContent;
|
|
1302
|
+
if (bodyText.includes('Verification successful')) return true;
|
|
1303
|
+
return !bodyText.includes('Checking your browser') &&
|
|
1304
|
+
!bodyText.includes('Please wait while we verify') &&
|
|
1242
1305
|
!document.querySelector('.cf-challenge-running') &&
|
|
1243
1306
|
!document.querySelector('[data-cf-challenge]');
|
|
1244
1307
|
},
|
|
@@ -1322,28 +1385,26 @@ async function handleTurnstileChallenge(page, forceDebug = false) {
|
|
|
1322
1385
|
console.log(formatLogMessage('cloudflare', `Found Turnstile iframe with URL: ${turnstileFrame.url()}`));
|
|
1323
1386
|
}
|
|
1324
1387
|
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
if (forceDebug) console.log(formatLogMessage('cloudflare', `
|
|
1342
|
-
break;
|
|
1343
|
-
} catch (e) {
|
|
1344
|
-
if (forceDebug) console.log(formatLogMessage('cloudflare', `Checkbox selector ${selector} not found or failed to click`));
|
|
1345
|
-
continue;
|
|
1388
|
+
await waitForTimeout(page, FAST_TIMEOUTS.ELEMENT_INTERACTION_DELAY);
|
|
1389
|
+
|
|
1390
|
+
try {
|
|
1391
|
+
const shadowResult = await clickInShadowDOM(turnstileFrame, [
|
|
1392
|
+
'input[type="checkbox"]',
|
|
1393
|
+
'.ctp-checkbox',
|
|
1394
|
+
'.ctp-checkbox-label',
|
|
1395
|
+
'[role="checkbox"]',
|
|
1396
|
+
'label.cb-lb',
|
|
1397
|
+
'label'
|
|
1398
|
+
], forceDebug);
|
|
1399
|
+
|
|
1400
|
+
if (shadowResult.clicked) {
|
|
1401
|
+
if (forceDebug) console.log(formatLogMessage('cloudflare', `Turnstile shadow DOM click succeeded: ${shadowResult.selector}`));
|
|
1402
|
+
} else if (shadowResult.found && shadowResult.x > 0) {
|
|
1403
|
+
await page.mouse.click(shadowResult.x, shadowResult.y);
|
|
1404
|
+
if (forceDebug) console.log(formatLogMessage('cloudflare', `Turnstile shadow DOM mouse fallback at (${shadowResult.x}, ${shadowResult.y})`));
|
|
1346
1405
|
}
|
|
1406
|
+
} catch (shadowErr) {
|
|
1407
|
+
if (forceDebug) console.log(formatLogMessage('cloudflare', `Shadow DOM fallback failed: ${shadowErr.message}`));
|
|
1347
1408
|
}
|
|
1348
1409
|
|
|
1349
1410
|
// Wait for Turnstile completion with reduced timeout
|
|
@@ -1531,7 +1592,11 @@ async function checkChallengeCompletion(page) {
|
|
|
1531
1592
|
* }
|
|
1532
1593
|
*/
|
|
1533
1594
|
async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDebug = false) {
|
|
1534
|
-
|
|
1595
|
+
const cfDebug = forceDebug || siteConfig.cloudflare_bypass === 'debug' || siteConfig.cloudflare_phish === 'debug';
|
|
1596
|
+
const cfBypassEnabled = siteConfig.cloudflare_bypass === true || siteConfig.cloudflare_bypass === 'debug';
|
|
1597
|
+
const cfPhishEnabled = siteConfig.cloudflare_phish === true || siteConfig.cloudflare_phish === 'debug';
|
|
1598
|
+
|
|
1599
|
+
if (cfDebug) {
|
|
1535
1600
|
console.log(formatLogMessage('cloudflare', `Using Cloudflare module v${CLOUDFLARE_MODULE_VERSION} for ${currentUrl}`));
|
|
1536
1601
|
}
|
|
1537
1602
|
|
|
@@ -1561,7 +1626,7 @@ async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDeb
|
|
|
1561
1626
|
// Sets attempted: false, success: true for both protection types
|
|
1562
1627
|
|
|
1563
1628
|
// Only proceed if we have indicators OR explicit config enables Cloudflare handling
|
|
1564
|
-
if (!quickDetection.hasIndicators && !
|
|
1629
|
+
if (!quickDetection.hasIndicators && !cfPhishEnabled && !cfBypassEnabled) {
|
|
1565
1630
|
if (forceDebug) console.log(formatLogMessage('cloudflare', `No Cloudflare indicators found and no explicit config, skipping protection handling for ${currentUrl}`));
|
|
1566
1631
|
if (forceDebug) console.log(formatLogMessage('cloudflare', `Quick detection details: title="${quickDetection.title}", bodySnippet="${quickDetection.bodySnippet}"`));
|
|
1567
1632
|
return {
|
|
@@ -1586,7 +1651,7 @@ async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDeb
|
|
|
1586
1651
|
try {
|
|
1587
1652
|
// Adaptive timeout based on detection results and explicit config
|
|
1588
1653
|
let adaptiveTimeout;
|
|
1589
|
-
if (
|
|
1654
|
+
if (cfPhishEnabled || cfBypassEnabled) {
|
|
1590
1655
|
// Explicit config - give more time
|
|
1591
1656
|
adaptiveTimeout = quickDetection.hasIndicators ? TIMEOUTS.ADAPTIVE_TIMEOUT_WITH_INDICATORS : TIMEOUTS.ADAPTIVE_TIMEOUT_WITHOUT_INDICATORS;
|
|
1592
1657
|
} else {
|
|
@@ -1599,7 +1664,7 @@ async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDeb
|
|
|
1599
1664
|
}
|
|
1600
1665
|
|
|
1601
1666
|
return await Promise.race([
|
|
1602
|
-
performCloudflareHandling(page, currentUrl, siteConfig,
|
|
1667
|
+
performCloudflareHandling(page, currentUrl, siteConfig, cfDebug),
|
|
1603
1668
|
new Promise((resolve) => {
|
|
1604
1669
|
setTimeout(() => {
|
|
1605
1670
|
console.warn(formatLogMessage('cloudflare', `Adaptive timeout (${adaptiveTimeout}ms) for ${currentUrl} - continuing with scan`));
|
|
@@ -1631,6 +1696,9 @@ async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDeb
|
|
|
1631
1696
|
* @returns {Promise<Object>} Same structure as handleCloudflareProtection()
|
|
1632
1697
|
*/
|
|
1633
1698
|
async function performCloudflareHandling(page, currentUrl, siteConfig, forceDebug = false) {
|
|
1699
|
+
const cfBypassEnabled = siteConfig.cloudflare_bypass === true || siteConfig.cloudflare_bypass === 'debug';
|
|
1700
|
+
const cfPhishEnabled = siteConfig.cloudflare_phish === true || siteConfig.cloudflare_phish === 'debug';
|
|
1701
|
+
|
|
1634
1702
|
const result = {
|
|
1635
1703
|
phishingWarning: { attempted: false, success: false },
|
|
1636
1704
|
verificationChallenge: { attempted: false, success: false },
|
|
@@ -1643,7 +1711,7 @@ async function performCloudflareHandling(page, currentUrl, siteConfig, forceDebu
|
|
|
1643
1711
|
// Handle phishing warnings first - updates result.phishingWarning
|
|
1644
1712
|
// Only runs if siteConfig.cloudflare_phish === true
|
|
1645
1713
|
// Handle phishing warnings if enabled
|
|
1646
|
-
if (
|
|
1714
|
+
if (cfPhishEnabled) {
|
|
1647
1715
|
if (forceDebug) console.log(formatLogMessage('cloudflare', `Phishing warning bypass enabled for ${currentUrl}`));
|
|
1648
1716
|
|
|
1649
1717
|
const phishingResult = await handlePhishingWarningWithRetries(page, currentUrl, siteConfig, forceDebug);
|
|
@@ -1678,7 +1746,7 @@ async function performCloudflareHandling(page, currentUrl, siteConfig, forceDebu
|
|
|
1678
1746
|
// Only runs if siteConfig.cloudflare_bypass === true
|
|
1679
1747
|
// Sets requiresHuman: true if CAPTCHA detected (no bypass attempted)
|
|
1680
1748
|
// Handle verification challenges if enabled
|
|
1681
|
-
if (
|
|
1749
|
+
if (cfBypassEnabled) {
|
|
1682
1750
|
if (forceDebug) console.log(formatLogMessage('cloudflare', `Challenge bypass enabled for ${currentUrl}`));
|
|
1683
1751
|
|
|
1684
1752
|
const challengeResult = await handleVerificationChallengeWithRetries(page, currentUrl, siteConfig, forceDebug);
|
|
@@ -1731,55 +1799,28 @@ async function performCloudflareHandling(page, currentUrl, siteConfig, forceDebu
|
|
|
1731
1799
|
* Performs parallel detection of multiple challenge types for better performance
|
|
1732
1800
|
*/
|
|
1733
1801
|
async function parallelChallengeDetection(page, forceDebug = false) {
|
|
1734
|
-
|
|
1735
|
-
|
|
1736
|
-
|
|
1737
|
-
|
|
1738
|
-
|
|
1739
|
-
|
|
1740
|
-
|
|
1741
|
-
detected: document.querySelector('
|
|
1742
|
-
|
|
1743
|
-
|
|
1744
|
-
|
|
1745
|
-
|
|
1746
|
-
|
|
1747
|
-
|
|
1748
|
-
|
|
1749
|
-
|
|
1750
|
-
|
|
1751
|
-
|
|
1752
|
-
|
|
1753
|
-
|
|
1754
|
-
|
|
1755
|
-
|
|
1756
|
-
};
|
|
1757
|
-
}).catch(err => ({ type: 'turnstile', detected: false, error: err.message }))
|
|
1758
|
-
);
|
|
1759
|
-
|
|
1760
|
-
// Check for phishing warning
|
|
1761
|
-
detectionPromises.push(
|
|
1762
|
-
page.evaluate(() => {
|
|
1763
|
-
return {
|
|
1764
|
-
type: 'phishing',
|
|
1765
|
-
detected: document.body?.textContent?.includes('This website has been reported for potential phishing') ||
|
|
1766
|
-
document.querySelector('a[href*="continue"]') !== null
|
|
1767
|
-
};
|
|
1768
|
-
}).catch(err => ({ type: 'phishing', detected: false, error: err.message }))
|
|
1769
|
-
);
|
|
1770
|
-
|
|
1771
|
-
// Check for managed challenge
|
|
1772
|
-
detectionPromises.push(
|
|
1773
|
-
page.evaluate(() => {
|
|
1774
|
-
return {
|
|
1775
|
-
type: 'managed',
|
|
1776
|
-
detected: document.querySelector('.cf-managed-challenge') !== null ||
|
|
1777
|
-
document.querySelector('[data-cf-managed]') !== null
|
|
1778
|
-
};
|
|
1779
|
-
}).catch(err => ({ type: 'managed', detected: false, error: err.message }))
|
|
1780
|
-
);
|
|
1781
|
-
|
|
1782
|
-
const results = await Promise.all(detectionPromises);
|
|
1802
|
+
let results;
|
|
1803
|
+
try {
|
|
1804
|
+
results = await page.evaluate(() => {
|
|
1805
|
+
const bodyText = document.body?.textContent || '';
|
|
1806
|
+
return [
|
|
1807
|
+
{ type: 'js', detected: document.querySelector('script[src*="/cdn-cgi/challenge-platform/"]') !== null ||
|
|
1808
|
+
bodyText.includes('Checking your browser') || bodyText.includes('Please wait while we verify') },
|
|
1809
|
+
{ type: 'turnstile', detected: document.querySelector('.cf-turnstile') !== null ||
|
|
1810
|
+
document.querySelector('iframe[src*="challenges.cloudflare.com"]') !== null ||
|
|
1811
|
+
document.querySelector('.ctp-checkbox-container') !== null },
|
|
1812
|
+
{ type: 'phishing', detected: bodyText.includes('This website has been reported for potential phishing') ||
|
|
1813
|
+
document.querySelector('a[href*="continue"]') !== null },
|
|
1814
|
+
{ type: 'managed', detected: document.querySelector('.cf-managed-challenge') !== null ||
|
|
1815
|
+
document.querySelector('[data-cf-managed]') !== null }
|
|
1816
|
+
];
|
|
1817
|
+
});
|
|
1818
|
+
} catch (err) {
|
|
1819
|
+
results = [
|
|
1820
|
+
{ type: 'js', detected: false }, { type: 'turnstile', detected: false },
|
|
1821
|
+
{ type: 'phishing', detected: false }, { type: 'managed', detected: false }
|
|
1822
|
+
];
|
|
1823
|
+
}
|
|
1783
1824
|
|
|
1784
1825
|
const detectedChallenges = results.filter(r => r.detected).map(r => r.type);
|
|
1785
1826
|
|
package/lib/proxy.js
ADDED
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Proxy Module for NWSS Network Scanner
|
|
3
|
+
* ======================================
|
|
4
|
+
* Routes specific site URLs through SOCKS5, SOCKS4, HTTP, or HTTPS proxies.
|
|
5
|
+
*
|
|
6
|
+
* Chromium's --proxy-server flag is browser-wide, so sites requiring a proxy
|
|
7
|
+
* need a separate browser instance. This module handles:
|
|
8
|
+
* - Parsing proxy URLs (all supported protocols)
|
|
9
|
+
* - Generating Chromium launch args
|
|
10
|
+
* - Per-page authentication via Puppeteer
|
|
11
|
+
* - Proxy bypass lists
|
|
12
|
+
* - Proxy health checks
|
|
13
|
+
*
|
|
14
|
+
* CONFIG EXAMPLES:
|
|
15
|
+
*
|
|
16
|
+
* SOCKS5 (no auth):
|
|
17
|
+
* "proxy": "socks5://127.0.0.1:1080"
|
|
18
|
+
*
|
|
19
|
+
* SOCKS5 with auth:
|
|
20
|
+
* "proxy": "socks5://user:pass@127.0.0.1:1080"
|
|
21
|
+
*
|
|
22
|
+
* HTTP proxy (corporate):
|
|
23
|
+
* "proxy": "http://proxy.corp.com:3128"
|
|
24
|
+
*
|
|
25
|
+
* HTTP proxy with auth:
|
|
26
|
+
* "proxy": "http://user:pass@proxy.corp.com:8080"
|
|
27
|
+
*
|
|
28
|
+
* HTTPS proxy:
|
|
29
|
+
* "proxy": "https://secure-proxy.example.com:8443"
|
|
30
|
+
*
|
|
31
|
+
* With bypass list and remote DNS:
|
|
32
|
+
* "proxy": "socks5://127.0.0.1:1080",
|
|
33
|
+
* "proxy_bypass": ["localhost", "127.0.0.1", "*.local"],
|
|
34
|
+
* "proxy_remote_dns": true
|
|
35
|
+
*
|
|
36
|
+
* Debug mode:
|
|
37
|
+
* "proxy": "socks5://127.0.0.1:1080",
|
|
38
|
+
* "proxy_debug": true
|
|
39
|
+
*
|
|
40
|
+
* Legacy key (backwards compatible):
|
|
41
|
+
* "socks5_proxy": "socks5://127.0.0.1:1080"
|
|
42
|
+
*
|
|
43
|
+
* INTEGRATION (in nwss.js):
|
|
44
|
+
* const { needsProxy, getProxyArgs, applyProxyAuth, getProxyInfo } = require('./lib/proxy');
|
|
45
|
+
*
|
|
46
|
+
* // Before browser launch
|
|
47
|
+
* if (needsProxy(siteConfig)) {
|
|
48
|
+
* const proxyArgs = getProxyArgs(siteConfig, forceDebug);
|
|
49
|
+
* browserArgs.push(...proxyArgs);
|
|
50
|
+
* }
|
|
51
|
+
*
|
|
52
|
+
* // After page creation, before page.goto()
|
|
53
|
+
* await applyProxyAuth(page, siteConfig, forceDebug);
|
|
54
|
+
*
|
|
55
|
+
* @version 1.1.0
|
|
56
|
+
*/
|
|
57
|
+
|
|
58
|
+
const { formatLogMessage } = require('./colorize');
|
|
59
|
+
|
|
60
|
+
const PROXY_MODULE_VERSION = '1.1.0';
|
|
61
|
+
const SUPPORTED_PROTOCOLS = ['socks5', 'socks4', 'http', 'https'];
|
|
62
|
+
|
|
63
|
+
const DEFAULT_PORTS = {
|
|
64
|
+
socks5: 1080,
|
|
65
|
+
socks4: 1080,
|
|
66
|
+
http: 8080,
|
|
67
|
+
https: 8443
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Returns the configured proxy URL string from siteConfig.
|
|
72
|
+
* Supports both "proxy" (preferred) and "socks5_proxy" (legacy) keys.
|
|
73
|
+
*
|
|
74
|
+
* @param {object} siteConfig
|
|
75
|
+
* @returns {string|null}
|
|
76
|
+
*/
|
|
77
|
+
function getConfiguredProxy(siteConfig) {
|
|
78
|
+
return siteConfig.proxy || siteConfig.socks5_proxy || null;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Parses a proxy URL into components.
|
|
83
|
+
* Accepts: protocol://host:port, protocol://user:pass@host:port, bare host:port
|
|
84
|
+
*
|
|
85
|
+
* @param {string} proxyUrl - Proxy URL string
|
|
86
|
+
* @returns {object|null} Parsed proxy or null if invalid
|
|
87
|
+
*/
|
|
88
|
+
function parseProxyUrl(proxyUrl) {
|
|
89
|
+
if (!proxyUrl || typeof proxyUrl !== 'string') return null;
|
|
90
|
+
|
|
91
|
+
let cleaned = proxyUrl.trim();
|
|
92
|
+
|
|
93
|
+
// Normalise bare host:port to socks5:// URL
|
|
94
|
+
if (!cleaned.includes('://')) {
|
|
95
|
+
cleaned = `socks5://${cleaned}`;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
try {
|
|
99
|
+
const url = new URL(cleaned);
|
|
100
|
+
const protocol = url.protocol.replace(':', '');
|
|
101
|
+
|
|
102
|
+
if (!SUPPORTED_PROTOCOLS.includes(protocol)) return null;
|
|
103
|
+
|
|
104
|
+
const host = url.hostname;
|
|
105
|
+
if (!host) return null;
|
|
106
|
+
|
|
107
|
+
const port = parseInt(url.port, 10) || DEFAULT_PORTS[protocol] || 1080;
|
|
108
|
+
const username = url.username ? decodeURIComponent(url.username) : null;
|
|
109
|
+
const password = url.password ? decodeURIComponent(url.password) : null;
|
|
110
|
+
|
|
111
|
+
return { protocol, host, port, username, password };
|
|
112
|
+
} catch (_) {
|
|
113
|
+
return null;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Checks if a site config requires a proxy
|
|
119
|
+
*
|
|
120
|
+
* @param {object} siteConfig
|
|
121
|
+
* @returns {boolean}
|
|
122
|
+
*/
|
|
123
|
+
function needsProxy(siteConfig) {
|
|
124
|
+
return !!getConfiguredProxy(siteConfig);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Returns Chromium launch arguments for the configured proxy.
|
|
129
|
+
*
|
|
130
|
+
* @param {object} siteConfig
|
|
131
|
+
* @param {boolean} forceDebug
|
|
132
|
+
* @returns {string[]} Array of Chromium args (empty if no proxy configured)
|
|
133
|
+
*/
|
|
134
|
+
function getProxyArgs(siteConfig, forceDebug = false) {
|
|
135
|
+
const proxyUrl = getConfiguredProxy(siteConfig);
|
|
136
|
+
if (!proxyUrl) return [];
|
|
137
|
+
|
|
138
|
+
const parsed = parseProxyUrl(proxyUrl);
|
|
139
|
+
if (!parsed) {
|
|
140
|
+
console.warn(formatLogMessage('proxy', `Invalid proxy URL: ${proxyUrl}`));
|
|
141
|
+
return [];
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
const args = [
|
|
145
|
+
`--proxy-server=${parsed.protocol}://${parsed.host}:${parsed.port}`
|
|
146
|
+
];
|
|
147
|
+
|
|
148
|
+
// Remote DNS: resolve hostnames through the proxy (prevents DNS leaks)
|
|
149
|
+
// Only meaningful for SOCKS proxies; HTTP proxies resolve remotely by default
|
|
150
|
+
const remoteDns = siteConfig.proxy_remote_dns ?? siteConfig.socks5_remote_dns;
|
|
151
|
+
if ((parsed.protocol === 'socks5' || parsed.protocol === 'socks4') && remoteDns !== false) {
|
|
152
|
+
args.push('--host-resolver-rules=MAP * ~NOTFOUND , EXCLUDE 127.0.0.1');
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Bypass list: domains that skip the proxy
|
|
156
|
+
const bypass = siteConfig.proxy_bypass || siteConfig.socks5_bypass || [];
|
|
157
|
+
if (bypass.length > 0) {
|
|
158
|
+
args.push(`--proxy-bypass-list=${bypass.join(';')}`);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
const debug = forceDebug || siteConfig.proxy_debug || siteConfig.socks5_debug;
|
|
162
|
+
if (debug) {
|
|
163
|
+
console.log(formatLogMessage('proxy', `[${parsed.protocol}] Args: ${args.join(' ')}`));
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
return args;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Applies proxy authentication to a page via Puppeteer's authenticate API.
|
|
171
|
+
* Must be called BEFORE page.goto().
|
|
172
|
+
*
|
|
173
|
+
* @param {object} page - Puppeteer page instance
|
|
174
|
+
* @param {object} siteConfig
|
|
175
|
+
* @param {boolean} forceDebug
|
|
176
|
+
* @returns {Promise<boolean>} True if auth was applied
|
|
177
|
+
*/
|
|
178
|
+
async function applyProxyAuth(page, siteConfig, forceDebug = false) {
|
|
179
|
+
const proxyUrl = getConfiguredProxy(siteConfig);
|
|
180
|
+
if (!proxyUrl) return false;
|
|
181
|
+
|
|
182
|
+
const parsed = parseProxyUrl(proxyUrl);
|
|
183
|
+
if (!parsed || !parsed.username) return false;
|
|
184
|
+
|
|
185
|
+
try {
|
|
186
|
+
await page.authenticate({
|
|
187
|
+
username: parsed.username,
|
|
188
|
+
password: parsed.password || ''
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
const debug = forceDebug || siteConfig.proxy_debug || siteConfig.socks5_debug;
|
|
192
|
+
if (debug) {
|
|
193
|
+
console.log(formatLogMessage('proxy', `Auth set for ${parsed.username}@${parsed.host}:${parsed.port}`));
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
return true;
|
|
197
|
+
} catch (err) {
|
|
198
|
+
console.warn(formatLogMessage('proxy', `Failed to set proxy auth: ${err.message}`));
|
|
199
|
+
return false;
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Tests proxy connectivity by attempting a TCP connection.
|
|
205
|
+
*
|
|
206
|
+
* @param {object} siteConfig
|
|
207
|
+
* @param {number} timeoutMs - Connection timeout (default 5000ms)
|
|
208
|
+
* @returns {Promise<object>} { reachable, latencyMs, error }
|
|
209
|
+
*/
|
|
210
|
+
async function testProxy(siteConfig, timeoutMs = 5000) {
|
|
211
|
+
const proxyUrl = getConfiguredProxy(siteConfig);
|
|
212
|
+
if (!proxyUrl) {
|
|
213
|
+
return { reachable: false, latencyMs: 0, error: 'No proxy configured' };
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
const parsed = parseProxyUrl(proxyUrl);
|
|
217
|
+
if (!parsed) {
|
|
218
|
+
return { reachable: false, latencyMs: 0, error: 'Invalid proxy URL' };
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
const net = require('net');
|
|
222
|
+
const start = Date.now();
|
|
223
|
+
|
|
224
|
+
return new Promise((resolve) => {
|
|
225
|
+
const socket = new net.Socket();
|
|
226
|
+
|
|
227
|
+
const onError = (err) => {
|
|
228
|
+
socket.destroy();
|
|
229
|
+
resolve({ reachable: false, latencyMs: Date.now() - start, error: err.message });
|
|
230
|
+
};
|
|
231
|
+
|
|
232
|
+
socket.setTimeout(timeoutMs);
|
|
233
|
+
socket.on('error', onError);
|
|
234
|
+
socket.on('timeout', () => onError(new Error('Connection timeout')));
|
|
235
|
+
|
|
236
|
+
socket.connect(parsed.port, parsed.host, () => {
|
|
237
|
+
const latency = Date.now() - start;
|
|
238
|
+
socket.destroy();
|
|
239
|
+
resolve({ reachable: true, latencyMs: latency, error: null });
|
|
240
|
+
});
|
|
241
|
+
});
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
/**
|
|
245
|
+
* Returns human-readable proxy info string for logging.
|
|
246
|
+
*
|
|
247
|
+
* @param {object} siteConfig
|
|
248
|
+
* @returns {string}
|
|
249
|
+
*/
|
|
250
|
+
function getProxyInfo(siteConfig) {
|
|
251
|
+
const proxyUrl = getConfiguredProxy(siteConfig);
|
|
252
|
+
if (!proxyUrl) return 'none';
|
|
253
|
+
|
|
254
|
+
const parsed = parseProxyUrl(proxyUrl);
|
|
255
|
+
if (!parsed) return 'invalid';
|
|
256
|
+
|
|
257
|
+
const auth = parsed.username ? `${parsed.username}@` : '';
|
|
258
|
+
return `${parsed.protocol}://${auth}${parsed.host}:${parsed.port}`;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
/**
|
|
262
|
+
* Returns module version information
|
|
263
|
+
*/
|
|
264
|
+
function getModuleInfo() {
|
|
265
|
+
return { version: PROXY_MODULE_VERSION, name: 'Proxy Handler' };
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
module.exports = {
|
|
269
|
+
parseProxyUrl,
|
|
270
|
+
needsProxy,
|
|
271
|
+
getProxyArgs,
|
|
272
|
+
applyProxyAuth,
|
|
273
|
+
testProxy,
|
|
274
|
+
getProxyInfo,
|
|
275
|
+
getModuleInfo,
|
|
276
|
+
getConfiguredProxy,
|
|
277
|
+
PROXY_MODULE_VERSION,
|
|
278
|
+
SUPPORTED_PROTOCOLS
|
|
279
|
+
};
|
package/nwss.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// === Network scanner script (nwss.js) v2.0.
|
|
1
|
+
// === Network scanner script (nwss.js) v2.0.51 ===
|
|
2
2
|
|
|
3
3
|
// puppeteer for browser automation, fs for file system operations, psl for domain parsing.
|
|
4
4
|
// const pLimit = require('p-limit'); // Will be dynamically imported
|
|
@@ -44,6 +44,7 @@ const { performPageInteraction, createInteractionConfig } = require('./lib/inter
|
|
|
44
44
|
const { createGlobalHelpers, getTotalDomainsSkipped, getDetectedDomainsCount } = require('./lib/domain-cache');
|
|
45
45
|
const { createSmartCache } = require('./lib/smart-cache'); // Smart cache system
|
|
46
46
|
const { clearPersistentCache } = require('./lib/smart-cache');
|
|
47
|
+
const { needsProxy, getProxyArgs, applyProxyAuth, getProxyInfo, testProxy } = require('./lib/proxy');
|
|
47
48
|
// Dry run functionality
|
|
48
49
|
const { initializeDryRunCollections, addDryRunMatch, addDryRunNetTools, processDryRunResults, writeDryRunOutput } = require('./lib/dry-run');
|
|
49
50
|
// Enhanced site data clearing functionality
|
|
@@ -1354,7 +1355,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1354
1355
|
* Uses system Chrome and temporary directories to minimize disk usage
|
|
1355
1356
|
* @returns {Promise<import('puppeteer').Browser>} Browser instance
|
|
1356
1357
|
*/
|
|
1357
|
-
async function createBrowser() {
|
|
1358
|
+
async function createBrowser(extraArgs = []) {
|
|
1358
1359
|
// Create temporary user data directory that we can fully control and clean up
|
|
1359
1360
|
const tempUserDataDir = `/tmp/puppeteer-${Date.now()}-${Math.random().toString(36).substring(7)}`;
|
|
1360
1361
|
userDataDir = tempUserDataDir; // Store for cleanup tracking (use outer scope variable)
|
|
@@ -1458,6 +1459,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1458
1459
|
'--disable-background-timer-throttling',
|
|
1459
1460
|
'--disable-features=site-per-process', // Better for single-site scanning
|
|
1460
1461
|
'--no-zygote', // Better process isolation
|
|
1462
|
+
...extraArgs,
|
|
1461
1463
|
],
|
|
1462
1464
|
// Optimized timeouts for Puppeteer 23.x performance
|
|
1463
1465
|
protocolTimeout: TIMEOUTS.PROTOCOL_TIMEOUT,
|
|
@@ -2130,6 +2132,11 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2130
2132
|
}
|
|
2131
2133
|
}
|
|
2132
2134
|
|
|
2135
|
+
// --- Apply proxy authentication if configured ---
|
|
2136
|
+
if (needsProxy(siteConfig)) {
|
|
2137
|
+
await applyProxyAuth(page, siteConfig, forceDebug);
|
|
2138
|
+
}
|
|
2139
|
+
|
|
2133
2140
|
// --- Apply all fingerprint spoofing (user agent, Brave, fingerprint protection) ---
|
|
2134
2141
|
try {
|
|
2135
2142
|
await applyAllFingerprintSpoofing(page, siteConfig, forceDebug, currentUrl);
|
|
@@ -3337,6 +3344,25 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3337
3344
|
siteCounter++;
|
|
3338
3345
|
// Continue processing with the redirected URL instead of throwing error
|
|
3339
3346
|
} else {
|
|
3347
|
+
// Detect proxy-specific failures and provide clear diagnostics
|
|
3348
|
+
if (needsProxy(siteConfig) && err.message) {
|
|
3349
|
+
const proxyErrors = [
|
|
3350
|
+
'ERR_PROXY_CONNECTION_FAILED',
|
|
3351
|
+
'ERR_SOCKS_CONNECTION_FAILED',
|
|
3352
|
+
'ERR_TUNNEL_CONNECTION_FAILED',
|
|
3353
|
+
'ERR_PROXY_AUTH_UNSUPPORTED',
|
|
3354
|
+
'ERR_PROXY_AUTH_REQUESTED',
|
|
3355
|
+
'ERR_SOCKS_CONNECTION_HOST_UNREACHABLE',
|
|
3356
|
+
'ERR_PROXY_CERTIFICATE_INVALID',
|
|
3357
|
+
'ERR_NO_SUPPORTED_PROXIES'
|
|
3358
|
+
];
|
|
3359
|
+
const proxyErr = proxyErrors.find(e => err.message.includes(e));
|
|
3360
|
+
if (proxyErr) {
|
|
3361
|
+
const info = getProxyInfo(siteConfig);
|
|
3362
|
+
console.error(formatLogMessage('error', `[proxy] ${proxyErr} — proxy: ${info} — URL: ${currentUrl}`));
|
|
3363
|
+
console.error(formatLogMessage('error', `[proxy] Check: is the proxy running? Are credentials correct? Is the target reachable from the proxy?`));
|
|
3364
|
+
}
|
|
3365
|
+
}
|
|
3340
3366
|
console.error(formatLogMessage('error', `Failed on ${currentUrl}: ${err.message}`));
|
|
3341
3367
|
throw err;
|
|
3342
3368
|
}
|
|
@@ -3662,6 +3688,26 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3662
3688
|
}
|
|
3663
3689
|
|
|
3664
3690
|
} catch (err) {
|
|
3691
|
+
// Detect proxy-specific failures at top level
|
|
3692
|
+
if (needsProxy(siteConfig) && err.message) {
|
|
3693
|
+
const proxyErrors = [
|
|
3694
|
+
'ERR_PROXY_CONNECTION_FAILED',
|
|
3695
|
+
'ERR_SOCKS_CONNECTION_FAILED',
|
|
3696
|
+
'ERR_TUNNEL_CONNECTION_FAILED',
|
|
3697
|
+
'ERR_PROXY_AUTH_UNSUPPORTED',
|
|
3698
|
+
'ERR_PROXY_AUTH_REQUESTED',
|
|
3699
|
+
'ERR_SOCKS_CONNECTION_HOST_UNREACHABLE',
|
|
3700
|
+
'ERR_PROXY_CERTIFICATE_INVALID',
|
|
3701
|
+
'ERR_NO_SUPPORTED_PROXIES'
|
|
3702
|
+
];
|
|
3703
|
+
const proxyErr = proxyErrors.find(e => err.message.includes(e));
|
|
3704
|
+
if (proxyErr) {
|
|
3705
|
+
const info = getProxyInfo(siteConfig);
|
|
3706
|
+
console.error(formatLogMessage('error', `[proxy] ${proxyErr} — proxy: ${info} — URL: ${currentUrl}`));
|
|
3707
|
+
console.error(formatLogMessage('error', `[proxy] Check: is the proxy running? Are credentials correct? Is the target reachable from the proxy?`));
|
|
3708
|
+
}
|
|
3709
|
+
}
|
|
3710
|
+
|
|
3665
3711
|
// Only restart for truly fatal browser errors
|
|
3666
3712
|
const isFatalError = CRITICAL_BROWSER_ERRORS.some(errorType =>
|
|
3667
3713
|
err.message.includes(errorType)
|
|
@@ -3789,6 +3835,14 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3789
3835
|
}
|
|
3790
3836
|
}
|
|
3791
3837
|
|
|
3838
|
+
// Helper to get a stable proxy key for grouping browser instances
|
|
3839
|
+
const proxyKeyFor = (siteConfig) => {
|
|
3840
|
+
if (!needsProxy(siteConfig)) return '';
|
|
3841
|
+
return getProxyInfo(siteConfig);
|
|
3842
|
+
};
|
|
3843
|
+
|
|
3844
|
+
// Sort tasks so proxy groups are contiguous — direct connections first, then each proxy
|
|
3845
|
+
allTasks.sort((a, b) => proxyKeyFor(a.config).localeCompare(proxyKeyFor(b.config)));
|
|
3792
3846
|
|
|
3793
3847
|
let results = [];
|
|
3794
3848
|
let processedUrlCount = 0;
|
|
@@ -3832,6 +3886,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3832
3886
|
|
|
3833
3887
|
// Process URLs in batches with exception handling
|
|
3834
3888
|
let siteGroupIndex = 0;
|
|
3889
|
+
let currentProxyKey = ''; // Track active proxy config — '' means direct connection
|
|
3835
3890
|
try {
|
|
3836
3891
|
for (let batchStart = 0; batchStart < totalUrls; batchStart += RESOURCE_CLEANUP_INTERVAL) {
|
|
3837
3892
|
const batchEnd = Math.min(batchStart + RESOURCE_CLEANUP_INTERVAL, totalUrls);
|
|
@@ -3952,14 +4007,67 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3952
4007
|
if (forceDebug) console.log(formatLogMessage('debug', `Browser cleanup warning: ${browserCloseErr.message}`));
|
|
3953
4008
|
}
|
|
3954
4009
|
|
|
3955
|
-
// Create new browser for next batch
|
|
3956
|
-
|
|
4010
|
+
// Create new browser for next batch (preserve current proxy config)
|
|
4011
|
+
const restartProxyArgs = currentProxyKey ? getProxyArgs(currentBatch[0].config, forceDebug) : [];
|
|
4012
|
+
browser = await createBrowser(restartProxyArgs);
|
|
3957
4013
|
if (forceDebug) console.log(formatLogMessage('debug', `New browser instance created for batch ${Math.floor(batchStart / RESOURCE_CLEANUP_INTERVAL) + 1}`));
|
|
3958
4014
|
|
|
3959
4015
|
// Reset cleanup counter and add delay
|
|
3960
4016
|
urlsSinceLastCleanup = 0;
|
|
3961
4017
|
await fastTimeout(TIMEOUTS.BROWSER_STABILIZE_DELAY);
|
|
3962
4018
|
}
|
|
4019
|
+
|
|
4020
|
+
// --- Proxy-aware browser restart ---
|
|
4021
|
+
// --proxy-server is browser-wide, so if the batch needs a different proxy we must restart
|
|
4022
|
+
const batchProxyKey = proxyKeyFor(currentBatch[0].config);
|
|
4023
|
+
if (batchProxyKey !== currentProxyKey) {
|
|
4024
|
+
const debug = forceDebug || currentBatch[0].config.proxy_debug || currentBatch[0].config.socks5_debug;
|
|
4025
|
+
if (debug) {
|
|
4026
|
+
const from = currentProxyKey || 'direct';
|
|
4027
|
+
const to = batchProxyKey || 'direct';
|
|
4028
|
+
console.log(formatLogMessage('proxy', `Switching proxy: ${from} → ${to}`));
|
|
4029
|
+
}
|
|
4030
|
+
|
|
4031
|
+
try {
|
|
4032
|
+
await handleBrowserExit(browser, {
|
|
4033
|
+
forceDebug, timeout: 10000, exitOnFailure: false,
|
|
4034
|
+
cleanTempFiles: true, comprehensiveCleanup: removeTempFiles
|
|
4035
|
+
});
|
|
4036
|
+
if (userDataDir && fs.existsSync(userDataDir)) {
|
|
4037
|
+
fs.rmSync(userDataDir, { recursive: true, force: true });
|
|
4038
|
+
}
|
|
4039
|
+
} catch (proxyRestartErr) {
|
|
4040
|
+
if (forceDebug) console.log(formatLogMessage('debug', `Proxy switch browser cleanup: ${proxyRestartErr.message}`));
|
|
4041
|
+
}
|
|
4042
|
+
|
|
4043
|
+
const proxyArgs = batchProxyKey ? getProxyArgs(currentBatch[0].config, forceDebug) : [];
|
|
4044
|
+
|
|
4045
|
+
// Pre-flight: verify proxy is reachable before launching browser
|
|
4046
|
+
if (proxyArgs.length > 0) {
|
|
4047
|
+
const health = await testProxy(currentBatch[0].config, 5000);
|
|
4048
|
+
if (!health.reachable) {
|
|
4049
|
+
const info = getProxyInfo(currentBatch[0].config);
|
|
4050
|
+
console.error(formatLogMessage('error', `[proxy] Unreachable: ${info} — ${health.error}`));
|
|
4051
|
+
console.error(formatLogMessage('error', `[proxy] Skipping ${currentBatch.length} URL(s) in this batch`));
|
|
4052
|
+
const skipResults = currentBatch.map(task => ({
|
|
4053
|
+
success: false, url: task.url, rules: [],
|
|
4054
|
+
error: `Proxy unreachable: ${health.error}`
|
|
4055
|
+
}));
|
|
4056
|
+
results.push(...skipResults);
|
|
4057
|
+
processedUrlCount += currentBatch.length;
|
|
4058
|
+
urlsSinceLastCleanup += currentBatch.length;
|
|
4059
|
+
continue;
|
|
4060
|
+
}
|
|
4061
|
+
if (forceDebug) {
|
|
4062
|
+
console.log(formatLogMessage('proxy', `Proxy reachable (${health.latencyMs}ms)`));
|
|
4063
|
+
}
|
|
4064
|
+
}
|
|
4065
|
+
|
|
4066
|
+
browser = await createBrowser(proxyArgs);
|
|
4067
|
+
currentProxyKey = batchProxyKey;
|
|
4068
|
+
urlsSinceLastCleanup = 0;
|
|
4069
|
+
await fastTimeout(TIMEOUTS.BROWSER_STABILIZE_DELAY);
|
|
4070
|
+
}
|
|
3963
4071
|
|
|
3964
4072
|
if (forceDebug) {
|
|
3965
4073
|
console.log(formatLogMessage('debug', `Processing batch ${Math.floor(batchStart / RESOURCE_CLEANUP_INTERVAL) + 1}: ${batchSize} URL(s) (total processed: ${processedUrlCount})`));
|
|
@@ -3986,7 +4094,8 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3986
4094
|
console.log(formatLogMessage('error', `[TIMEOUT] Batch hung. Restarting browser.`));
|
|
3987
4095
|
try {
|
|
3988
4096
|
await handleBrowserExit(browser, { forceDebug, timeout: 5000, exitOnFailure: false });
|
|
3989
|
-
|
|
4097
|
+
const timeoutProxyArgs = currentProxyKey ? getProxyArgs(currentBatch[0].config, forceDebug) : [];
|
|
4098
|
+
browser = await createBrowser(timeoutProxyArgs);
|
|
3990
4099
|
urlsSinceLastCleanup = 0;
|
|
3991
4100
|
} catch (restartErr) {
|
|
3992
4101
|
throw restartErr;
|
|
@@ -4104,7 +4213,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
4104
4213
|
comprehensive: true
|
|
4105
4214
|
});
|
|
4106
4215
|
}
|
|
4107
|
-
browser = await createBrowser();
|
|
4216
|
+
browser = await createBrowser(currentProxyKey ? getProxyArgs(currentBatch[0].config, forceDebug) : []);
|
|
4108
4217
|
urlsSinceLastCleanup = 0; // Reset counter
|
|
4109
4218
|
await fastTimeout(TIMEOUTS.EMERGENCY_RESTART_DELAY); // Give browser time to stabilize
|
|
4110
4219
|
} catch (emergencyRestartErr) {
|
|
@@ -4116,7 +4225,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
4116
4225
|
console.log(`\n${messageColors.fileOp('🔄 Emergency hang detection restart:')} Browser appears hung, forcing restart`);
|
|
4117
4226
|
try {
|
|
4118
4227
|
await handleBrowserExit(browser, { forceDebug, timeout: 5000, exitOnFailure: false, cleanTempFiles: true });
|
|
4119
|
-
browser = await createBrowser();
|
|
4228
|
+
browser = await createBrowser(currentProxyKey ? getProxyArgs(currentBatch[0].config, forceDebug) : []);
|
|
4120
4229
|
urlsSinceLastCleanup = 0;
|
|
4121
4230
|
forceRestartFlag = false; // Reset flag
|
|
4122
4231
|
await fastTimeout(TIMEOUTS.EMERGENCY_RESTART_DELAY);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fanboynz/network-scanner",
|
|
3
|
-
"version": "2.0.
|
|
3
|
+
"version": "2.0.51",
|
|
4
4
|
"description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
|
|
5
5
|
"main": "nwss.js",
|
|
6
6
|
"scripts": {
|
|
@@ -48,7 +48,7 @@
|
|
|
48
48
|
},
|
|
49
49
|
"homepage": "https://github.com/ryanbr/network-scanner",
|
|
50
50
|
"devDependencies": {
|
|
51
|
-
"eslint": "^
|
|
51
|
+
"eslint": "^10.0.2",
|
|
52
52
|
"globals": "^16.3.0"
|
|
53
53
|
}
|
|
54
54
|
}
|