@fanboynz/network-scanner 2.0.49 → 2.0.51
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +81 -0
- package/lib/cloudflare.js +218 -197
- package/lib/proxy.js +279 -0
- package/nwss.js +116 -7
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -17,6 +17,7 @@ A Puppeteer-based tool for scanning websites to find third-party (or optionally
|
|
|
17
17
|
- Subdomain handling (collapse to root or full subdomain)
|
|
18
18
|
- Optionally match only first-party, third-party, or both
|
|
19
19
|
- Enhanced redirect handling with JavaScript and meta refresh detection
|
|
20
|
+
- Per-site proxy routing (SOCKS5, SOCKS4, HTTP, HTTPS) with pre-flight health checks
|
|
20
21
|
|
|
21
22
|
---
|
|
22
23
|
|
|
@@ -315,6 +316,86 @@ Route traffic through a VPN for specific sites. Requires `sudo` privileges. The
|
|
|
315
316
|
|
|
316
317
|
> **Authentication:** If the `.ovpn` file already contains credentials (via `auth-user-pass /path/to/file` or an inline `<auth-user-pass>` block), no additional config is needed — just provide the config path. The `username`/`password` fields are only needed when the `.ovpn` file has a bare `auth-user-pass` directive that expects interactive input.
|
|
317
318
|
|
|
319
|
+
### Proxy Options
|
|
320
|
+
|
|
321
|
+
Route traffic through a proxy for specific sites. Supports SOCKS5, SOCKS4, HTTP, and HTTPS proxies. Unlike VPN, proxy routing is per-site-group — only URLs in the same config block use the proxy; other sites connect directly.
|
|
322
|
+
|
|
323
|
+
> **Note:** Chromium's `--proxy-server` flag is browser-wide. Sites requiring different proxies (or direct vs proxied) are automatically separated into different browser instances. Tasks are sorted so proxy groups are contiguous to minimise restarts.
|
|
324
|
+
|
|
325
|
+
| Field | Values | Default | Description |
|
|
326
|
+
|:---------------------|:-------|:-------:|:------------|
|
|
327
|
+
| `proxy` | String | - | Proxy URL: `socks5://host:port`, `http://host:port`, `https://host:port`, or `http://user:pass@host:port` |
|
|
328
|
+
| `proxy_bypass` | Array | `[]` | Domains that skip the proxy (e.g. `["localhost", "127.0.0.1", "*.local"]`) |
|
|
329
|
+
| `proxy_remote_dns` | Boolean | `true` | Resolve DNS through the proxy (SOCKS only — prevents DNS leaks) |
|
|
330
|
+
| `proxy_debug` | Boolean | `false` | Print proxy diagnostics: launch args, auth, health checks, error codes |
|
|
331
|
+
|
|
332
|
+
Legacy aliases (`socks5_proxy`, `socks5_bypass`, `socks5_remote_dns`, `socks5_debug`) are supported for backwards compatibility.
|
|
333
|
+
|
|
334
|
+
#### Proxy Examples
|
|
335
|
+
|
|
336
|
+
**SOCKS5 — no auth:**
|
|
337
|
+
```json
|
|
338
|
+
{
|
|
339
|
+
"url": ["https://blocked-site.com/", "https://another-blocked.com/"],
|
|
340
|
+
"proxy": "socks5://127.0.0.1:1080",
|
|
341
|
+
"search_string": ["tracking.js"]
|
|
342
|
+
}
|
|
343
|
+
```
|
|
344
|
+
|
|
345
|
+
**HTTP proxy with credentials:**
|
|
346
|
+
```json
|
|
347
|
+
{
|
|
348
|
+
"url": ["https://geo-restricted.com/"],
|
|
349
|
+
"proxy": "http://user:pass@proxy.corp.com:3128",
|
|
350
|
+
"search_string": ["analytics"]
|
|
351
|
+
}
|
|
352
|
+
```
|
|
353
|
+
|
|
354
|
+
**SOCKS5 with bypass list and debug:**
|
|
355
|
+
```json
|
|
356
|
+
{
|
|
357
|
+
"url": ["https://target-site.com/"],
|
|
358
|
+
"proxy": "socks5://user:pass@proxy.example.com:9050",
|
|
359
|
+
"proxy_bypass": ["localhost", "127.0.0.1", "*.internal.corp"],
|
|
360
|
+
"proxy_remote_dns": true,
|
|
361
|
+
"proxy_debug": true,
|
|
362
|
+
"search_string": ["tracker"]
|
|
363
|
+
}
|
|
364
|
+
```
|
|
365
|
+
|
|
366
|
+
**Mixed direct + proxied in one config:**
|
|
367
|
+
```json
|
|
368
|
+
[
|
|
369
|
+
{
|
|
370
|
+
"url": ["https://direct-site.com/"],
|
|
371
|
+
"search_string": ["ads"]
|
|
372
|
+
},
|
|
373
|
+
{
|
|
374
|
+
"url": ["https://blocked-site.com/"],
|
|
375
|
+
"proxy": "socks5://127.0.0.1:1080",
|
|
376
|
+
"search_string": ["ads"]
|
|
377
|
+
}
|
|
378
|
+
]
|
|
379
|
+
```
|
|
380
|
+
|
|
381
|
+
#### Proxy Error Handling
|
|
382
|
+
|
|
383
|
+
If a proxy is unreachable, the batch is skipped with a clear error before any navigation is attempted:
|
|
384
|
+
|
|
385
|
+
```
|
|
386
|
+
[error] [proxy] Unreachable: socks5://127.0.0.1:1080 — Connection refused
|
|
387
|
+
[error] [proxy] Skipping 5 URL(s) in this batch
|
|
388
|
+
```
|
|
389
|
+
|
|
390
|
+
If a proxy fails mid-scan, Chromium's error code is detected and diagnosed:
|
|
391
|
+
|
|
392
|
+
```
|
|
393
|
+
[error] [proxy] ERR_SOCKS_CONNECTION_FAILED — proxy: socks5://127.0.0.1:1080 — URL: https://example.com/
|
|
394
|
+
[error] [proxy] Check: is the proxy running? Are credentials correct? Is the target reachable from the proxy?
|
|
395
|
+
```
|
|
396
|
+
|
|
397
|
+
Detected error codes: `ERR_PROXY_CONNECTION_FAILED`, `ERR_SOCKS_CONNECTION_FAILED`, `ERR_TUNNEL_CONNECTION_FAILED`, `ERR_PROXY_AUTH_UNSUPPORTED`, `ERR_PROXY_AUTH_REQUESTED`, `ERR_SOCKS_CONNECTION_HOST_UNREACHABLE`, `ERR_PROXY_CERTIFICATE_INVALID`, `ERR_NO_SUPPORTED_PROXIES`.
|
|
398
|
+
|
|
318
399
|
### Global Configuration Options
|
|
319
400
|
|
|
320
401
|
These options go at the root level of your config.json:
|
package/lib/cloudflare.js
CHANGED
|
@@ -58,11 +58,76 @@ const FAST_TIMEOUTS = {
|
|
|
58
58
|
ELEMENT_INTERACTION_DELAY: 250, // Fast element interactions
|
|
59
59
|
SELECTOR_WAIT: 3000, // Fast selector waits
|
|
60
60
|
TURNSTILE_OPERATION: 6000, // Fast Turnstile operations
|
|
61
|
-
JS_CHALLENGE:
|
|
61
|
+
JS_CHALLENGE: 10000, // Fast JS challenge completion
|
|
62
62
|
CHALLENGE_SOLVING: 30000, // Fast overall challenge solving
|
|
63
63
|
CHALLENGE_COMPLETION: 8000 // Fast completion check
|
|
64
64
|
};
|
|
65
65
|
|
|
66
|
+
/**
|
|
67
|
+
* Finds and clicks an element inside shadow DOM trees via page.evaluate
|
|
68
|
+
* Returns {found, clicked, x, y} - coordinates allow fallback mouse.click
|
|
69
|
+
*/
|
|
70
|
+
async function clickInShadowDOM(context, selectors, forceDebug = false, waitMs = 1500) {
|
|
71
|
+
// Try Puppeteer's pierce/ selector first � handles CLOSED shadow roots via CDP
|
|
72
|
+
for (const selector of selectors) {
|
|
73
|
+
try {
|
|
74
|
+
// Wait for element to appear (handles delayed rendering)
|
|
75
|
+
const start = Date.now();
|
|
76
|
+
const element = await context.waitForSelector(`pierce/${selector}`, { timeout: waitMs });
|
|
77
|
+
if (element) {
|
|
78
|
+
const box = await element.boundingBox();
|
|
79
|
+
if (box && box.width > 0 && box.height > 0) {
|
|
80
|
+
if (forceDebug) console.log(formatLogMessage('cloudflare', `pierce/${selector} matched in ${Date.now() - start}ms � box: ${box.width}x${box.height} at (${box.x},${box.y})`));
|
|
81
|
+
await element.click();
|
|
82
|
+
await element.dispose();
|
|
83
|
+
return { found: true, clicked: true, selector, x: box.x + box.width / 2, y: box.y + box.height / 2 };
|
|
84
|
+
}
|
|
85
|
+
if (forceDebug) console.log(formatLogMessage('cloudflare', `pierce/${selector} found but not visible (0x0)`));
|
|
86
|
+
await element.dispose();
|
|
87
|
+
// Element found but not visible
|
|
88
|
+
return { found: true, clicked: false, selector, x: 0, y: 0 };
|
|
89
|
+
}
|
|
90
|
+
} catch (e) {
|
|
91
|
+
if (forceDebug) console.log(formatLogMessage('cloudflare', `pierce/${selector} timeout after ${waitMs}ms`));
|
|
92
|
+
continue;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Fallback: manual traversal for open shadow roots
|
|
97
|
+
const result = await context.evaluate((sels) => {
|
|
98
|
+
function deepQuery(root, selector) {
|
|
99
|
+
// Try direct query first
|
|
100
|
+
const el = root.querySelector(selector);
|
|
101
|
+
if (el) return el;
|
|
102
|
+
|
|
103
|
+
// Traverse shadow roots
|
|
104
|
+
const allElements = root.querySelectorAll('*');
|
|
105
|
+
for (const node of allElements) {
|
|
106
|
+
if (node.shadowRoot) {
|
|
107
|
+
const found = deepQuery(node.shadowRoot, selector);
|
|
108
|
+
if (found) return found;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
return null;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
for (const selector of sels) {
|
|
115
|
+
const el = deepQuery(document, selector);
|
|
116
|
+
if (el) {
|
|
117
|
+
const rect = el.getBoundingClientRect();
|
|
118
|
+
if (rect.width > 0 && rect.height > 0) {
|
|
119
|
+
el.click();
|
|
120
|
+
return { found: true, clicked: true, selector, x: rect.x + rect.width / 2, y: rect.y + rect.height / 2 };
|
|
121
|
+
}
|
|
122
|
+
return { found: true, clicked: false, selector, x: 0, y: 0 };
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
return { found: false, clicked: false, selector: null, x: 0, y: 0 };
|
|
126
|
+
}, selectors);
|
|
127
|
+
|
|
128
|
+
return result;
|
|
129
|
+
}
|
|
130
|
+
|
|
66
131
|
/**
|
|
67
132
|
* Error categories for better handling
|
|
68
133
|
*/
|
|
@@ -306,12 +371,12 @@ function categorizeError(error) {
|
|
|
306
371
|
/**
|
|
307
372
|
* Implements exponential backoff delay
|
|
308
373
|
*/
|
|
309
|
-
|
|
374
|
+
function getRetryDelay(attempt) {
|
|
310
375
|
const delay = Math.min(
|
|
311
376
|
RETRY_CONFIG.baseDelay * Math.pow(RETRY_CONFIG.backoffMultiplier, attempt - 1),
|
|
312
377
|
RETRY_CONFIG.maxDelay
|
|
313
378
|
);
|
|
314
|
-
return
|
|
379
|
+
return delay;
|
|
315
380
|
}
|
|
316
381
|
|
|
317
382
|
/**
|
|
@@ -341,49 +406,8 @@ async function safePageEvaluate(page, func, timeout = TIMEOUTS.PAGE_EVALUATION_S
|
|
|
341
406
|
throw new Error('Page URL access failed - likely detached');
|
|
342
407
|
}
|
|
343
408
|
|
|
344
|
-
// Quick execution context validation with timeout
|
|
345
|
-
const contextValid = await Promise.race([
|
|
346
|
-
page.evaluate(() => {
|
|
347
|
-
try {
|
|
348
|
-
// Quick context validation
|
|
349
|
-
if (typeof window === 'undefined' || !document) {
|
|
350
|
-
return false;
|
|
351
|
-
}
|
|
352
|
-
// Check if document is ready for interaction
|
|
353
|
-
if (document.readyState === 'uninitialized') {
|
|
354
|
-
return false;
|
|
355
|
-
}
|
|
356
|
-
return true;
|
|
357
|
-
} catch (e) {
|
|
358
|
-
return false;
|
|
359
|
-
}
|
|
360
|
-
}),
|
|
361
|
-
new Promise((_, reject) => {
|
|
362
|
-
setTimeout(() => reject(new Error('Context validation timeout')), 3500);
|
|
363
|
-
})
|
|
364
|
-
]).catch(() => false);
|
|
365
|
-
|
|
366
|
-
if (!contextValid) {
|
|
367
|
-
throw new Error('Page execution context is invalid');
|
|
368
|
-
}
|
|
369
|
-
|
|
370
409
|
const result = await Promise.race([
|
|
371
|
-
page.evaluate(
|
|
372
|
-
// Additional runtime validation inside evaluation
|
|
373
|
-
try {
|
|
374
|
-
if (typeof window === 'undefined' || !document) {
|
|
375
|
-
throw new Error('Execution context invalid during evaluation');
|
|
376
|
-
}
|
|
377
|
-
return func();
|
|
378
|
-
} catch (evalError) {
|
|
379
|
-
// Return error info instead of throwing to avoid unhandled promise rejections
|
|
380
|
-
return {
|
|
381
|
-
__evaluation_error: true,
|
|
382
|
-
message: evalError.message,
|
|
383
|
-
type: 'evaluation_error'
|
|
384
|
-
};
|
|
385
|
-
}
|
|
386
|
-
}),
|
|
410
|
+
page.evaluate(func),
|
|
387
411
|
new Promise((_, reject) => {
|
|
388
412
|
timeoutId = setTimeout(() => reject(new Error('Page evaluation timeout')), timeout);
|
|
389
413
|
})
|
|
@@ -394,11 +418,6 @@ async function safePageEvaluate(page, func, timeout = TIMEOUTS.PAGE_EVALUATION_S
|
|
|
394
418
|
clearTimeout(timeoutId);
|
|
395
419
|
}
|
|
396
420
|
|
|
397
|
-
// Check if evaluation returned an error
|
|
398
|
-
if (result && result.__evaluation_error) {
|
|
399
|
-
throw new Error(`Evaluation failed: ${result.message}`);
|
|
400
|
-
}
|
|
401
|
-
|
|
402
421
|
if (forceDebug && attempt > 1) {
|
|
403
422
|
console.log(formatLogMessage('cloudflare', `Page evaluation succeeded on attempt ${attempt}`));
|
|
404
423
|
}
|
|
@@ -438,7 +457,7 @@ async function safePageEvaluate(page, func, timeout = TIMEOUTS.PAGE_EVALUATION_S
|
|
|
438
457
|
}
|
|
439
458
|
|
|
440
459
|
// Wait before retrying with exponential backoff
|
|
441
|
-
await getRetryDelay(attempt);
|
|
460
|
+
await new Promise(resolve => setTimeout(resolve, getRetryDelay(attempt)));
|
|
442
461
|
}
|
|
443
462
|
}
|
|
444
463
|
|
|
@@ -460,15 +479,18 @@ async function safePageEvaluate(page, func, timeout = TIMEOUTS.PAGE_EVALUATION_S
|
|
|
460
479
|
* Safe element clicking with timeout protection
|
|
461
480
|
*/
|
|
462
481
|
async function safeClick(page, selector, timeout = TIMEOUTS.CLICK_TIMEOUT) {
|
|
482
|
+
let timeoutId;
|
|
463
483
|
try {
|
|
464
484
|
return await Promise.race([
|
|
465
485
|
page.click(selector, { timeout: timeout }),
|
|
466
486
|
new Promise((_, reject) => {
|
|
467
|
-
setTimeout(() => reject(new Error('Click timeout')), timeout + TIMEOUTS.CLICK_TIMEOUT_BUFFER);
|
|
487
|
+
timeoutId = setTimeout(() => reject(new Error('Click timeout')), timeout + TIMEOUTS.CLICK_TIMEOUT_BUFFER);
|
|
468
488
|
})
|
|
469
489
|
]);
|
|
470
490
|
} catch (error) {
|
|
471
491
|
throw new Error(`Click failed: ${error.message}`);
|
|
492
|
+
} finally {
|
|
493
|
+
if (timeoutId) clearTimeout(timeoutId);
|
|
472
494
|
}
|
|
473
495
|
}
|
|
474
496
|
|
|
@@ -476,16 +498,18 @@ async function safeClick(page, selector, timeout = TIMEOUTS.CLICK_TIMEOUT) {
|
|
|
476
498
|
* Safe navigation waiting with timeout protection
|
|
477
499
|
*/
|
|
478
500
|
async function safeWaitForNavigation(page, timeout = TIMEOUTS.NAVIGATION_TIMEOUT) {
|
|
501
|
+
let timeoutId;
|
|
479
502
|
try {
|
|
480
503
|
return await Promise.race([
|
|
481
504
|
page.waitForNavigation({ waitUntil: 'domcontentloaded', timeout: timeout }),
|
|
482
505
|
new Promise((_, reject) => {
|
|
483
|
-
setTimeout(() => reject(new Error('Navigation timeout')), timeout + TIMEOUTS.NAVIGATION_TIMEOUT_BUFFER);
|
|
506
|
+
timeoutId = setTimeout(() => reject(new Error('Navigation timeout')), timeout + TIMEOUTS.NAVIGATION_TIMEOUT_BUFFER);
|
|
484
507
|
})
|
|
485
508
|
]);
|
|
486
509
|
} catch (error) {
|
|
487
510
|
console.warn(formatLogMessage('cloudflare', `Navigation wait failed: ${error.message}`));
|
|
488
|
-
|
|
511
|
+
} finally {
|
|
512
|
+
if (timeoutId) clearTimeout(timeoutId);
|
|
489
513
|
}
|
|
490
514
|
}
|
|
491
515
|
|
|
@@ -583,7 +607,14 @@ async function quickCloudflareDetection(page, forceDebug = false) {
|
|
|
583
607
|
*/
|
|
584
608
|
async function analyzeCloudflareChallenge(page) {
|
|
585
609
|
try {
|
|
586
|
-
|
|
610
|
+
// CDP-level frame check � bypasses closed shadow roots
|
|
611
|
+
const frames = page.frames();
|
|
612
|
+
const hasChallengeFrame = frames.some(f => {
|
|
613
|
+
const url = f.url();
|
|
614
|
+
return url.includes('challenges.cloudflare.com') || url.includes('/cdn-cgi/challenge-platform/');
|
|
615
|
+
});
|
|
616
|
+
|
|
617
|
+
const result = await safePageEvaluate(page, () => {
|
|
587
618
|
const title = document.title || '';
|
|
588
619
|
const bodyText = document.body ? document.body.textContent : '';
|
|
589
620
|
|
|
@@ -655,6 +686,15 @@ async function analyzeCloudflareChallenge(page) {
|
|
|
655
686
|
bodySnippet: bodyText.substring(0, 200)
|
|
656
687
|
};
|
|
657
688
|
}, TIMEOUTS.PAGE_EVALUATION);
|
|
689
|
+
|
|
690
|
+
// Merge CDP frame detection � catches iframes behind closed shadow roots
|
|
691
|
+
if (hasChallengeFrame && !result.hasTurnstileIframe) {
|
|
692
|
+
result.hasTurnstileIframe = true;
|
|
693
|
+
result.isTurnstile = true;
|
|
694
|
+
result.isChallengePresent = true;
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
return result;
|
|
658
698
|
} catch (error) {
|
|
659
699
|
return {
|
|
660
700
|
isChallengePresent: false,
|
|
@@ -862,7 +902,7 @@ async function handleVerificationChallengeWithRetries(page, currentUrl, siteConf
|
|
|
862
902
|
|
|
863
903
|
// If this wasn't the last attempt, wait before retrying
|
|
864
904
|
if (attempt < retryConfig.maxAttempts) {
|
|
865
|
-
const delay =
|
|
905
|
+
const delay = getRetryDelay(attempt);
|
|
866
906
|
if (forceDebug) {
|
|
867
907
|
console.log(formatLogMessage('cloudflare', `Challenge attempt ${attempt} failed, retrying in ${delay}ms: ${result.error}`));
|
|
868
908
|
}
|
|
@@ -904,7 +944,7 @@ async function handleVerificationChallengeWithRetries(page, currentUrl, siteConf
|
|
|
904
944
|
|
|
905
945
|
// Wait before retrying with exponential backoff
|
|
906
946
|
if (attempt < retryConfig.maxAttempts) {
|
|
907
|
-
await getRetryDelay(attempt);
|
|
947
|
+
await new Promise(resolve => setTimeout(resolve, getRetryDelay(attempt)));
|
|
908
948
|
}
|
|
909
949
|
}
|
|
910
950
|
}
|
|
@@ -945,7 +985,7 @@ async function handlePhishingWarningWithRetries(page, currentUrl, siteConfig, fo
|
|
|
945
985
|
|
|
946
986
|
// If this wasn't the last attempt, wait before retrying
|
|
947
987
|
if (attempt < retryConfig.maxAttempts) {
|
|
948
|
-
const delay =
|
|
988
|
+
const delay = getRetryDelay(attempt);
|
|
949
989
|
if (forceDebug) {
|
|
950
990
|
console.log(formatLogMessage('cloudflare', `Phishing warning attempt ${attempt} failed, retrying in ${delay}ms: ${result.error}`));
|
|
951
991
|
}
|
|
@@ -975,7 +1015,7 @@ async function handlePhishingWarningWithRetries(page, currentUrl, siteConfig, fo
|
|
|
975
1015
|
|
|
976
1016
|
// Wait before retrying with exponential backoff
|
|
977
1017
|
if (attempt < retryConfig.maxAttempts) {
|
|
978
|
-
await getRetryDelay(attempt);
|
|
1018
|
+
await new Promise(resolve => setTimeout(resolve, getRetryDelay(attempt)));
|
|
979
1019
|
}
|
|
980
1020
|
}
|
|
981
1021
|
}
|
|
@@ -1046,6 +1086,23 @@ async function attemptChallengeSolve(page, currentUrl, challengeInfo, forceDebug
|
|
|
1046
1086
|
|
|
1047
1087
|
const jsResult = await waitForJSChallengeCompletion(page, forceDebug);
|
|
1048
1088
|
if (jsResult.success) {
|
|
1089
|
+
// Wait for redirect after challenge completion
|
|
1090
|
+
try {
|
|
1091
|
+
const startUrl = await page.url();
|
|
1092
|
+
await page.waitForFunction(
|
|
1093
|
+
(origUrl) => {
|
|
1094
|
+
const bodyText = document.body?.textContent || '';
|
|
1095
|
+
return document.title !== 'Just a moment...' ||
|
|
1096
|
+
window.location.href !== origUrl ||
|
|
1097
|
+
bodyText.includes('Verification successful');
|
|
1098
|
+
},
|
|
1099
|
+
{ timeout: 10000 },
|
|
1100
|
+
startUrl
|
|
1101
|
+
);
|
|
1102
|
+
if (forceDebug) console.log(formatLogMessage('cloudflare', `Challenge page cleared for ${currentUrl}`));
|
|
1103
|
+
} catch (_) {
|
|
1104
|
+
if (forceDebug) console.log(formatLogMessage('cloudflare', `Challenge page not cleared after 10s � continuing`));
|
|
1105
|
+
}
|
|
1049
1106
|
result.success = true;
|
|
1050
1107
|
result.method = 'js_challenge_wait';
|
|
1051
1108
|
if (forceDebug) console.log(formatLogMessage('cloudflare', `JS challenge completed successfully for ${currentUrl}`));
|
|
@@ -1054,6 +1111,8 @@ async function attemptChallengeSolve(page, currentUrl, challengeInfo, forceDebug
|
|
|
1054
1111
|
} catch (jsError) {
|
|
1055
1112
|
if (forceDebug) console.log(formatLogMessage('cloudflare', `JS challenge wait failed for ${currentUrl}: ${jsError.message}`));
|
|
1056
1113
|
}
|
|
1114
|
+
} else if (forceDebug) {
|
|
1115
|
+
console.log(formatLogMessage('cloudflare', `Skipping JS challenge method (not detected)`));
|
|
1057
1116
|
}
|
|
1058
1117
|
|
|
1059
1118
|
// Method 2: Handle Turnstile challenges (interactive)
|
|
@@ -1071,6 +1130,8 @@ async function attemptChallengeSolve(page, currentUrl, challengeInfo, forceDebug
|
|
|
1071
1130
|
} catch (turnstileError) {
|
|
1072
1131
|
if (forceDebug) console.log(formatLogMessage('cloudflare', `Turnstile method failed for ${currentUrl}: ${turnstileError.message}`));
|
|
1073
1132
|
}
|
|
1133
|
+
} else if (forceDebug) {
|
|
1134
|
+
console.log(formatLogMessage('cloudflare', `Skipping Turnstile method (not detected)`));
|
|
1074
1135
|
}
|
|
1075
1136
|
|
|
1076
1137
|
// Method 3: Legacy checkbox interaction (fallback)
|
|
@@ -1088,10 +1149,23 @@ async function attemptChallengeSolve(page, currentUrl, challengeInfo, forceDebug
|
|
|
1088
1149
|
} catch (legacyError) {
|
|
1089
1150
|
if (forceDebug) console.log(formatLogMessage('cloudflare', `Legacy checkbox method failed for ${currentUrl}: ${legacyError.message}`));
|
|
1090
1151
|
}
|
|
1152
|
+
} else if (forceDebug) {
|
|
1153
|
+
console.log(formatLogMessage('cloudflare', `Skipping legacy checkbox method (not detected)`));
|
|
1091
1154
|
}
|
|
1092
1155
|
|
|
1093
1156
|
if (!result.success) {
|
|
1094
1157
|
result.error = result.error || 'All challenge bypass methods failed';
|
|
1158
|
+
if (forceDebug) {
|
|
1159
|
+
try {
|
|
1160
|
+
const postState = await page.evaluate(() => ({
|
|
1161
|
+
title: document.title,
|
|
1162
|
+
url: window.location.href,
|
|
1163
|
+
body: (document.body?.textContent || '').substring(0, 300)
|
|
1164
|
+
}));
|
|
1165
|
+
console.log(formatLogMessage('cloudflare', `Post-attempt page state: title="${postState.title}" url=${postState.url}`));
|
|
1166
|
+
console.log(formatLogMessage('cloudflare', `Post-attempt body: ${postState.body}`));
|
|
1167
|
+
} catch (_) {}
|
|
1168
|
+
}
|
|
1095
1169
|
}
|
|
1096
1170
|
|
|
1097
1171
|
return result;
|
|
@@ -1109,88 +1183,57 @@ async function handleEmbeddedIframeChallenge(page, forceDebug = false) {
|
|
|
1109
1183
|
try {
|
|
1110
1184
|
if (forceDebug) console.log(formatLogMessage('cloudflare', `Checking for embedded iframe challenges`));
|
|
1111
1185
|
|
|
1112
|
-
//
|
|
1113
|
-
const
|
|
1114
|
-
|
|
1115
|
-
'
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
];
|
|
1119
|
-
|
|
1120
|
-
// Wait for iframe to appear
|
|
1121
|
-
let iframeFound = false;
|
|
1122
|
-
for (const selector of iframeSelectors) {
|
|
1123
|
-
try {
|
|
1124
|
-
await Promise.race([
|
|
1125
|
-
page.waitForSelector(selector, { timeout: FAST_TIMEOUTS.SELECTOR_WAIT }),
|
|
1126
|
-
new Promise((_, reject) => setTimeout(() => reject(new Error('Timeout')), FAST_TIMEOUTS.SELECTOR_WAIT + 1000))
|
|
1127
|
-
]);
|
|
1128
|
-
iframeFound = true;
|
|
1129
|
-
if (forceDebug) console.log(formatLogMessage('cloudflare', `Found iframe: ${selector}`));
|
|
1130
|
-
break;
|
|
1131
|
-
} catch (e) {
|
|
1132
|
-
continue;
|
|
1186
|
+
// Use CDP-level frame detection � bypasses closed shadow roots
|
|
1187
|
+
const frames = page.frames();
|
|
1188
|
+
if (forceDebug) {
|
|
1189
|
+
console.log(formatLogMessage('cloudflare', `Available frames (${frames.length}):`));
|
|
1190
|
+
for (const f of frames) {
|
|
1191
|
+
console.log(formatLogMessage('cloudflare', ` ${f.url()}`));
|
|
1133
1192
|
}
|
|
1134
1193
|
}
|
|
1135
|
-
|
|
1136
|
-
if (!iframeFound) {
|
|
1137
|
-
result.error = 'No embedded iframe found';
|
|
1138
|
-
return result;
|
|
1139
|
-
}
|
|
1140
|
-
|
|
1141
|
-
// Find challenge frame using existing frame detection logic
|
|
1142
|
-
const frames = await page.frames();
|
|
1143
1194
|
const challengeFrame = frames.find(frame => {
|
|
1144
1195
|
const frameUrl = frame.url();
|
|
1145
1196
|
return frameUrl.includes('challenges.cloudflare.com') ||
|
|
1197
|
+
frameUrl.includes('/cdn-cgi/challenge-platform/') ||
|
|
1146
1198
|
frameUrl.includes('/turnstile/if/') ||
|
|
1147
|
-
frameUrl.includes('captcha-delivery.com') ||
|
|
1148
|
-
frameUrl.includes('/challenge-platform/') ||
|
|
1149
1199
|
frameUrl.includes('turnstile');
|
|
1150
1200
|
});
|
|
1151
1201
|
|
|
1152
1202
|
if (!challengeFrame) {
|
|
1153
|
-
result.error = '
|
|
1203
|
+
result.error = 'No challenge frame found via CDP';
|
|
1154
1204
|
return result;
|
|
1155
1205
|
}
|
|
1156
1206
|
|
|
1157
1207
|
if (forceDebug) console.log(formatLogMessage('cloudflare', `Interacting with iframe: ${challengeFrame.url()}`));
|
|
1158
1208
|
|
|
1159
|
-
|
|
1160
|
-
|
|
1209
|
+
await waitForTimeout(page, 500);
|
|
1210
|
+
|
|
1211
|
+
let checkboxInteractionSuccess = false;
|
|
1212
|
+
try {
|
|
1213
|
+
const shadowResult = await clickInShadowDOM(challengeFrame, [
|
|
1161
1214
|
'input[type="checkbox"]',
|
|
1162
1215
|
'.ctp-checkbox',
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
try {
|
|
1171
|
-
await Promise.race([
|
|
1172
|
-
challengeFrame.waitForSelector(selector, { timeout: FAST_TIMEOUTS.SELECTOR_WAIT }),
|
|
1173
|
-
new Promise((_, reject) => setTimeout(() => reject(new Error('Timeout')), FAST_TIMEOUTS.SELECTOR_WAIT + 1000))
|
|
1174
|
-
]);
|
|
1175
|
-
|
|
1176
|
-
await waitForTimeout(page, FAST_TIMEOUTS.ELEMENT_INTERACTION_DELAY);
|
|
1177
|
-
await challengeFrame.click(selector);
|
|
1178
|
-
|
|
1179
|
-
if (forceDebug) console.log(formatLogMessage('cloudflare', `Clicked iframe element: ${selector}`));
|
|
1216
|
+
'.ctp-checkbox-label',
|
|
1217
|
+
'[role="checkbox"]',
|
|
1218
|
+
'label.cb-lb',
|
|
1219
|
+
'label'
|
|
1220
|
+
], forceDebug);
|
|
1221
|
+
|
|
1222
|
+
if (shadowResult.clicked) {
|
|
1180
1223
|
checkboxInteractionSuccess = true;
|
|
1181
|
-
|
|
1182
|
-
}
|
|
1183
|
-
|
|
1224
|
+
if (forceDebug) console.log(formatLogMessage('cloudflare', `Shadow DOM click succeeded: ${shadowResult.selector}`));
|
|
1225
|
+
} else if (shadowResult.found && shadowResult.x > 0) {
|
|
1226
|
+
await page.mouse.click(shadowResult.x, shadowResult.y);
|
|
1227
|
+
checkboxInteractionSuccess = true;
|
|
1228
|
+
if (forceDebug) console.log(formatLogMessage('cloudflare', `Shadow DOM mouse fallback at (${shadowResult.x}, ${shadowResult.y})`));
|
|
1184
1229
|
}
|
|
1230
|
+
} catch (shadowErr) {
|
|
1231
|
+
if (forceDebug) console.log(formatLogMessage('cloudflare', `Shadow DOM click failed: ${shadowErr.message}`));
|
|
1185
1232
|
}
|
|
1186
1233
|
|
|
1187
|
-
// Try alternative interaction only if standard selectors failed
|
|
1188
1234
|
if (!checkboxInteractionSuccess) {
|
|
1189
|
-
if (forceDebug) console.log(formatLogMessage('cloudflare', `Checkbox interactions failed, trying container fallback`));
|
|
1190
|
-
await waitForTimeout(page, 1000);
|
|
1191
1235
|
|
|
1192
1236
|
try {
|
|
1193
|
-
// Try clicking on the iframe container itself as fallback
|
|
1194
1237
|
const iframeElement = await page.$('iframe[src*="challenges.cloudflare.com"]');
|
|
1195
1238
|
if (iframeElement) {
|
|
1196
1239
|
await iframeElement.click();
|
|
@@ -1199,8 +1242,6 @@ async function handleEmbeddedIframeChallenge(page, forceDebug = false) {
|
|
|
1199
1242
|
} catch (containerClickError) {
|
|
1200
1243
|
if (forceDebug) console.log(formatLogMessage('cloudflare', `Container click failed: ${containerClickError.message}`));
|
|
1201
1244
|
}
|
|
1202
|
-
} else {
|
|
1203
|
-
if (forceDebug) console.log(formatLogMessage('cloudflare', `Checkbox interaction successful, skipping container fallback`));
|
|
1204
1245
|
}
|
|
1205
1246
|
|
|
1206
1247
|
// Reuse existing completion check pattern with error handling
|
|
@@ -1257,8 +1298,10 @@ async function waitForJSChallengeCompletion(page, forceDebug = false) {
|
|
|
1257
1298
|
await Promise.race([
|
|
1258
1299
|
page.waitForFunction(
|
|
1259
1300
|
() => {
|
|
1260
|
-
|
|
1261
|
-
|
|
1301
|
+
const bodyText = document.body.textContent;
|
|
1302
|
+
if (bodyText.includes('Verification successful')) return true;
|
|
1303
|
+
return !bodyText.includes('Checking your browser') &&
|
|
1304
|
+
!bodyText.includes('Please wait while we verify') &&
|
|
1262
1305
|
!document.querySelector('.cf-challenge-running') &&
|
|
1263
1306
|
!document.querySelector('[data-cf-challenge]');
|
|
1264
1307
|
},
|
|
@@ -1342,28 +1385,26 @@ async function handleTurnstileChallenge(page, forceDebug = false) {
|
|
|
1342
1385
|
console.log(formatLogMessage('cloudflare', `Found Turnstile iframe with URL: ${turnstileFrame.url()}`));
|
|
1343
1386
|
}
|
|
1344
1387
|
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
if (forceDebug) console.log(formatLogMessage('cloudflare', `
|
|
1362
|
-
break;
|
|
1363
|
-
} catch (e) {
|
|
1364
|
-
if (forceDebug) console.log(formatLogMessage('cloudflare', `Checkbox selector ${selector} not found or failed to click`));
|
|
1365
|
-
continue;
|
|
1388
|
+
await waitForTimeout(page, FAST_TIMEOUTS.ELEMENT_INTERACTION_DELAY);
|
|
1389
|
+
|
|
1390
|
+
try {
|
|
1391
|
+
const shadowResult = await clickInShadowDOM(turnstileFrame, [
|
|
1392
|
+
'input[type="checkbox"]',
|
|
1393
|
+
'.ctp-checkbox',
|
|
1394
|
+
'.ctp-checkbox-label',
|
|
1395
|
+
'[role="checkbox"]',
|
|
1396
|
+
'label.cb-lb',
|
|
1397
|
+
'label'
|
|
1398
|
+
], forceDebug);
|
|
1399
|
+
|
|
1400
|
+
if (shadowResult.clicked) {
|
|
1401
|
+
if (forceDebug) console.log(formatLogMessage('cloudflare', `Turnstile shadow DOM click succeeded: ${shadowResult.selector}`));
|
|
1402
|
+
} else if (shadowResult.found && shadowResult.x > 0) {
|
|
1403
|
+
await page.mouse.click(shadowResult.x, shadowResult.y);
|
|
1404
|
+
if (forceDebug) console.log(formatLogMessage('cloudflare', `Turnstile shadow DOM mouse fallback at (${shadowResult.x}, ${shadowResult.y})`));
|
|
1366
1405
|
}
|
|
1406
|
+
} catch (shadowErr) {
|
|
1407
|
+
if (forceDebug) console.log(formatLogMessage('cloudflare', `Shadow DOM fallback failed: ${shadowErr.message}`));
|
|
1367
1408
|
}
|
|
1368
1409
|
|
|
1369
1410
|
// Wait for Turnstile completion with reduced timeout
|
|
@@ -1551,7 +1592,11 @@ async function checkChallengeCompletion(page) {
|
|
|
1551
1592
|
* }
|
|
1552
1593
|
*/
|
|
1553
1594
|
async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDebug = false) {
|
|
1554
|
-
|
|
1595
|
+
const cfDebug = forceDebug || siteConfig.cloudflare_bypass === 'debug' || siteConfig.cloudflare_phish === 'debug';
|
|
1596
|
+
const cfBypassEnabled = siteConfig.cloudflare_bypass === true || siteConfig.cloudflare_bypass === 'debug';
|
|
1597
|
+
const cfPhishEnabled = siteConfig.cloudflare_phish === true || siteConfig.cloudflare_phish === 'debug';
|
|
1598
|
+
|
|
1599
|
+
if (cfDebug) {
|
|
1555
1600
|
console.log(formatLogMessage('cloudflare', `Using Cloudflare module v${CLOUDFLARE_MODULE_VERSION} for ${currentUrl}`));
|
|
1556
1601
|
}
|
|
1557
1602
|
|
|
@@ -1581,7 +1626,7 @@ async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDeb
|
|
|
1581
1626
|
// Sets attempted: false, success: true for both protection types
|
|
1582
1627
|
|
|
1583
1628
|
// Only proceed if we have indicators OR explicit config enables Cloudflare handling
|
|
1584
|
-
if (!quickDetection.hasIndicators && !
|
|
1629
|
+
if (!quickDetection.hasIndicators && !cfPhishEnabled && !cfBypassEnabled) {
|
|
1585
1630
|
if (forceDebug) console.log(formatLogMessage('cloudflare', `No Cloudflare indicators found and no explicit config, skipping protection handling for ${currentUrl}`));
|
|
1586
1631
|
if (forceDebug) console.log(formatLogMessage('cloudflare', `Quick detection details: title="${quickDetection.title}", bodySnippet="${quickDetection.bodySnippet}"`));
|
|
1587
1632
|
return {
|
|
@@ -1606,7 +1651,7 @@ async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDeb
|
|
|
1606
1651
|
try {
|
|
1607
1652
|
// Adaptive timeout based on detection results and explicit config
|
|
1608
1653
|
let adaptiveTimeout;
|
|
1609
|
-
if (
|
|
1654
|
+
if (cfPhishEnabled || cfBypassEnabled) {
|
|
1610
1655
|
// Explicit config - give more time
|
|
1611
1656
|
adaptiveTimeout = quickDetection.hasIndicators ? TIMEOUTS.ADAPTIVE_TIMEOUT_WITH_INDICATORS : TIMEOUTS.ADAPTIVE_TIMEOUT_WITHOUT_INDICATORS;
|
|
1612
1657
|
} else {
|
|
@@ -1619,7 +1664,7 @@ async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDeb
|
|
|
1619
1664
|
}
|
|
1620
1665
|
|
|
1621
1666
|
return await Promise.race([
|
|
1622
|
-
performCloudflareHandling(page, currentUrl, siteConfig,
|
|
1667
|
+
performCloudflareHandling(page, currentUrl, siteConfig, cfDebug),
|
|
1623
1668
|
new Promise((resolve) => {
|
|
1624
1669
|
setTimeout(() => {
|
|
1625
1670
|
console.warn(formatLogMessage('cloudflare', `Adaptive timeout (${adaptiveTimeout}ms) for ${currentUrl} - continuing with scan`));
|
|
@@ -1651,6 +1696,9 @@ async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDeb
|
|
|
1651
1696
|
* @returns {Promise<Object>} Same structure as handleCloudflareProtection()
|
|
1652
1697
|
*/
|
|
1653
1698
|
async function performCloudflareHandling(page, currentUrl, siteConfig, forceDebug = false) {
|
|
1699
|
+
const cfBypassEnabled = siteConfig.cloudflare_bypass === true || siteConfig.cloudflare_bypass === 'debug';
|
|
1700
|
+
const cfPhishEnabled = siteConfig.cloudflare_phish === true || siteConfig.cloudflare_phish === 'debug';
|
|
1701
|
+
|
|
1654
1702
|
const result = {
|
|
1655
1703
|
phishingWarning: { attempted: false, success: false },
|
|
1656
1704
|
verificationChallenge: { attempted: false, success: false },
|
|
@@ -1663,7 +1711,7 @@ async function performCloudflareHandling(page, currentUrl, siteConfig, forceDebu
|
|
|
1663
1711
|
// Handle phishing warnings first - updates result.phishingWarning
|
|
1664
1712
|
// Only runs if siteConfig.cloudflare_phish === true
|
|
1665
1713
|
// Handle phishing warnings if enabled
|
|
1666
|
-
if (
|
|
1714
|
+
if (cfPhishEnabled) {
|
|
1667
1715
|
if (forceDebug) console.log(formatLogMessage('cloudflare', `Phishing warning bypass enabled for ${currentUrl}`));
|
|
1668
1716
|
|
|
1669
1717
|
const phishingResult = await handlePhishingWarningWithRetries(page, currentUrl, siteConfig, forceDebug);
|
|
@@ -1698,7 +1746,7 @@ async function performCloudflareHandling(page, currentUrl, siteConfig, forceDebu
|
|
|
1698
1746
|
// Only runs if siteConfig.cloudflare_bypass === true
|
|
1699
1747
|
// Sets requiresHuman: true if CAPTCHA detected (no bypass attempted)
|
|
1700
1748
|
// Handle verification challenges if enabled
|
|
1701
|
-
if (
|
|
1749
|
+
if (cfBypassEnabled) {
|
|
1702
1750
|
if (forceDebug) console.log(formatLogMessage('cloudflare', `Challenge bypass enabled for ${currentUrl}`));
|
|
1703
1751
|
|
|
1704
1752
|
const challengeResult = await handleVerificationChallengeWithRetries(page, currentUrl, siteConfig, forceDebug);
|
|
@@ -1751,55 +1799,28 @@ async function performCloudflareHandling(page, currentUrl, siteConfig, forceDebu
|
|
|
1751
1799
|
* Performs parallel detection of multiple challenge types for better performance
|
|
1752
1800
|
*/
|
|
1753
1801
|
async function parallelChallengeDetection(page, forceDebug = false) {
|
|
1754
|
-
|
|
1755
|
-
|
|
1756
|
-
|
|
1757
|
-
|
|
1758
|
-
|
|
1759
|
-
|
|
1760
|
-
|
|
1761
|
-
detected: document.querySelector('
|
|
1762
|
-
|
|
1763
|
-
|
|
1764
|
-
|
|
1765
|
-
|
|
1766
|
-
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
1770
|
-
|
|
1771
|
-
|
|
1772
|
-
|
|
1773
|
-
|
|
1774
|
-
|
|
1775
|
-
|
|
1776
|
-
};
|
|
1777
|
-
}).catch(err => ({ type: 'turnstile', detected: false, error: err.message }))
|
|
1778
|
-
);
|
|
1779
|
-
|
|
1780
|
-
// Check for phishing warning
|
|
1781
|
-
detectionPromises.push(
|
|
1782
|
-
page.evaluate(() => {
|
|
1783
|
-
return {
|
|
1784
|
-
type: 'phishing',
|
|
1785
|
-
detected: document.body?.textContent?.includes('This website has been reported for potential phishing') ||
|
|
1786
|
-
document.querySelector('a[href*="continue"]') !== null
|
|
1787
|
-
};
|
|
1788
|
-
}).catch(err => ({ type: 'phishing', detected: false, error: err.message }))
|
|
1789
|
-
);
|
|
1790
|
-
|
|
1791
|
-
// Check for managed challenge
|
|
1792
|
-
detectionPromises.push(
|
|
1793
|
-
page.evaluate(() => {
|
|
1794
|
-
return {
|
|
1795
|
-
type: 'managed',
|
|
1796
|
-
detected: document.querySelector('.cf-managed-challenge') !== null ||
|
|
1797
|
-
document.querySelector('[data-cf-managed]') !== null
|
|
1798
|
-
};
|
|
1799
|
-
}).catch(err => ({ type: 'managed', detected: false, error: err.message }))
|
|
1800
|
-
);
|
|
1801
|
-
|
|
1802
|
-
const results = await Promise.all(detectionPromises);
|
|
1802
|
+
let results;
|
|
1803
|
+
try {
|
|
1804
|
+
results = await page.evaluate(() => {
|
|
1805
|
+
const bodyText = document.body?.textContent || '';
|
|
1806
|
+
return [
|
|
1807
|
+
{ type: 'js', detected: document.querySelector('script[src*="/cdn-cgi/challenge-platform/"]') !== null ||
|
|
1808
|
+
bodyText.includes('Checking your browser') || bodyText.includes('Please wait while we verify') },
|
|
1809
|
+
{ type: 'turnstile', detected: document.querySelector('.cf-turnstile') !== null ||
|
|
1810
|
+
document.querySelector('iframe[src*="challenges.cloudflare.com"]') !== null ||
|
|
1811
|
+
document.querySelector('.ctp-checkbox-container') !== null },
|
|
1812
|
+
{ type: 'phishing', detected: bodyText.includes('This website has been reported for potential phishing') ||
|
|
1813
|
+
document.querySelector('a[href*="continue"]') !== null },
|
|
1814
|
+
{ type: 'managed', detected: document.querySelector('.cf-managed-challenge') !== null ||
|
|
1815
|
+
document.querySelector('[data-cf-managed]') !== null }
|
|
1816
|
+
];
|
|
1817
|
+
});
|
|
1818
|
+
} catch (err) {
|
|
1819
|
+
results = [
|
|
1820
|
+
{ type: 'js', detected: false }, { type: 'turnstile', detected: false },
|
|
1821
|
+
{ type: 'phishing', detected: false }, { type: 'managed', detected: false }
|
|
1822
|
+
];
|
|
1823
|
+
}
|
|
1803
1824
|
|
|
1804
1825
|
const detectedChallenges = results.filter(r => r.detected).map(r => r.type);
|
|
1805
1826
|
|
package/lib/proxy.js
ADDED
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Proxy Module for NWSS Network Scanner
|
|
3
|
+
* ======================================
|
|
4
|
+
* Routes specific site URLs through SOCKS5, SOCKS4, HTTP, or HTTPS proxies.
|
|
5
|
+
*
|
|
6
|
+
* Chromium's --proxy-server flag is browser-wide, so sites requiring a proxy
|
|
7
|
+
* need a separate browser instance. This module handles:
|
|
8
|
+
* - Parsing proxy URLs (all supported protocols)
|
|
9
|
+
* - Generating Chromium launch args
|
|
10
|
+
* - Per-page authentication via Puppeteer
|
|
11
|
+
* - Proxy bypass lists
|
|
12
|
+
* - Proxy health checks
|
|
13
|
+
*
|
|
14
|
+
* CONFIG EXAMPLES:
|
|
15
|
+
*
|
|
16
|
+
* SOCKS5 (no auth):
|
|
17
|
+
* "proxy": "socks5://127.0.0.1:1080"
|
|
18
|
+
*
|
|
19
|
+
* SOCKS5 with auth:
|
|
20
|
+
* "proxy": "socks5://user:pass@127.0.0.1:1080"
|
|
21
|
+
*
|
|
22
|
+
* HTTP proxy (corporate):
|
|
23
|
+
* "proxy": "http://proxy.corp.com:3128"
|
|
24
|
+
*
|
|
25
|
+
* HTTP proxy with auth:
|
|
26
|
+
* "proxy": "http://user:pass@proxy.corp.com:8080"
|
|
27
|
+
*
|
|
28
|
+
* HTTPS proxy:
|
|
29
|
+
* "proxy": "https://secure-proxy.example.com:8443"
|
|
30
|
+
*
|
|
31
|
+
* With bypass list and remote DNS:
|
|
32
|
+
* "proxy": "socks5://127.0.0.1:1080",
|
|
33
|
+
* "proxy_bypass": ["localhost", "127.0.0.1", "*.local"],
|
|
34
|
+
* "proxy_remote_dns": true
|
|
35
|
+
*
|
|
36
|
+
* Debug mode:
|
|
37
|
+
* "proxy": "socks5://127.0.0.1:1080",
|
|
38
|
+
* "proxy_debug": true
|
|
39
|
+
*
|
|
40
|
+
* Legacy key (backwards compatible):
|
|
41
|
+
* "socks5_proxy": "socks5://127.0.0.1:1080"
|
|
42
|
+
*
|
|
43
|
+
* INTEGRATION (in nwss.js):
|
|
44
|
+
* const { needsProxy, getProxyArgs, applyProxyAuth, getProxyInfo } = require('./lib/proxy');
|
|
45
|
+
*
|
|
46
|
+
* // Before browser launch
|
|
47
|
+
* if (needsProxy(siteConfig)) {
|
|
48
|
+
* const proxyArgs = getProxyArgs(siteConfig, forceDebug);
|
|
49
|
+
* browserArgs.push(...proxyArgs);
|
|
50
|
+
* }
|
|
51
|
+
*
|
|
52
|
+
* // After page creation, before page.goto()
|
|
53
|
+
* await applyProxyAuth(page, siteConfig, forceDebug);
|
|
54
|
+
*
|
|
55
|
+
* @version 1.1.0
|
|
56
|
+
*/
|
|
57
|
+
|
|
58
|
+
const { formatLogMessage } = require('./colorize');
|
|
59
|
+
|
|
60
|
+
const PROXY_MODULE_VERSION = '1.1.0';
|
|
61
|
+
const SUPPORTED_PROTOCOLS = ['socks5', 'socks4', 'http', 'https'];
|
|
62
|
+
|
|
63
|
+
const DEFAULT_PORTS = {
|
|
64
|
+
socks5: 1080,
|
|
65
|
+
socks4: 1080,
|
|
66
|
+
http: 8080,
|
|
67
|
+
https: 8443
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Returns the configured proxy URL string from siteConfig.
|
|
72
|
+
* Supports both "proxy" (preferred) and "socks5_proxy" (legacy) keys.
|
|
73
|
+
*
|
|
74
|
+
* @param {object} siteConfig
|
|
75
|
+
* @returns {string|null}
|
|
76
|
+
*/
|
|
77
|
+
function getConfiguredProxy(siteConfig) {
|
|
78
|
+
return siteConfig.proxy || siteConfig.socks5_proxy || null;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Parses a proxy URL into components.
|
|
83
|
+
* Accepts: protocol://host:port, protocol://user:pass@host:port, bare host:port
|
|
84
|
+
*
|
|
85
|
+
* @param {string} proxyUrl - Proxy URL string
|
|
86
|
+
* @returns {object|null} Parsed proxy or null if invalid
|
|
87
|
+
*/
|
|
88
|
+
function parseProxyUrl(proxyUrl) {
|
|
89
|
+
if (!proxyUrl || typeof proxyUrl !== 'string') return null;
|
|
90
|
+
|
|
91
|
+
let cleaned = proxyUrl.trim();
|
|
92
|
+
|
|
93
|
+
// Normalise bare host:port to socks5:// URL
|
|
94
|
+
if (!cleaned.includes('://')) {
|
|
95
|
+
cleaned = `socks5://${cleaned}`;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
try {
|
|
99
|
+
const url = new URL(cleaned);
|
|
100
|
+
const protocol = url.protocol.replace(':', '');
|
|
101
|
+
|
|
102
|
+
if (!SUPPORTED_PROTOCOLS.includes(protocol)) return null;
|
|
103
|
+
|
|
104
|
+
const host = url.hostname;
|
|
105
|
+
if (!host) return null;
|
|
106
|
+
|
|
107
|
+
const port = parseInt(url.port, 10) || DEFAULT_PORTS[protocol] || 1080;
|
|
108
|
+
const username = url.username ? decodeURIComponent(url.username) : null;
|
|
109
|
+
const password = url.password ? decodeURIComponent(url.password) : null;
|
|
110
|
+
|
|
111
|
+
return { protocol, host, port, username, password };
|
|
112
|
+
} catch (_) {
|
|
113
|
+
return null;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Checks if a site config requires a proxy
|
|
119
|
+
*
|
|
120
|
+
* @param {object} siteConfig
|
|
121
|
+
* @returns {boolean}
|
|
122
|
+
*/
|
|
123
|
+
function needsProxy(siteConfig) {
|
|
124
|
+
return !!getConfiguredProxy(siteConfig);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Returns Chromium launch arguments for the configured proxy.
|
|
129
|
+
*
|
|
130
|
+
* @param {object} siteConfig
|
|
131
|
+
* @param {boolean} forceDebug
|
|
132
|
+
* @returns {string[]} Array of Chromium args (empty if no proxy configured)
|
|
133
|
+
*/
|
|
134
|
+
function getProxyArgs(siteConfig, forceDebug = false) {
|
|
135
|
+
const proxyUrl = getConfiguredProxy(siteConfig);
|
|
136
|
+
if (!proxyUrl) return [];
|
|
137
|
+
|
|
138
|
+
const parsed = parseProxyUrl(proxyUrl);
|
|
139
|
+
if (!parsed) {
|
|
140
|
+
console.warn(formatLogMessage('proxy', `Invalid proxy URL: ${proxyUrl}`));
|
|
141
|
+
return [];
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
const args = [
|
|
145
|
+
`--proxy-server=${parsed.protocol}://${parsed.host}:${parsed.port}`
|
|
146
|
+
];
|
|
147
|
+
|
|
148
|
+
// Remote DNS: resolve hostnames through the proxy (prevents DNS leaks)
|
|
149
|
+
// Only meaningful for SOCKS proxies; HTTP proxies resolve remotely by default
|
|
150
|
+
const remoteDns = siteConfig.proxy_remote_dns ?? siteConfig.socks5_remote_dns;
|
|
151
|
+
if ((parsed.protocol === 'socks5' || parsed.protocol === 'socks4') && remoteDns !== false) {
|
|
152
|
+
args.push('--host-resolver-rules=MAP * ~NOTFOUND , EXCLUDE 127.0.0.1');
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Bypass list: domains that skip the proxy
|
|
156
|
+
const bypass = siteConfig.proxy_bypass || siteConfig.socks5_bypass || [];
|
|
157
|
+
if (bypass.length > 0) {
|
|
158
|
+
args.push(`--proxy-bypass-list=${bypass.join(';')}`);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
const debug = forceDebug || siteConfig.proxy_debug || siteConfig.socks5_debug;
|
|
162
|
+
if (debug) {
|
|
163
|
+
console.log(formatLogMessage('proxy', `[${parsed.protocol}] Args: ${args.join(' ')}`));
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
return args;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Applies proxy authentication to a page via Puppeteer's authenticate API.
|
|
171
|
+
* Must be called BEFORE page.goto().
|
|
172
|
+
*
|
|
173
|
+
* @param {object} page - Puppeteer page instance
|
|
174
|
+
* @param {object} siteConfig
|
|
175
|
+
* @param {boolean} forceDebug
|
|
176
|
+
* @returns {Promise<boolean>} True if auth was applied
|
|
177
|
+
*/
|
|
178
|
+
async function applyProxyAuth(page, siteConfig, forceDebug = false) {
|
|
179
|
+
const proxyUrl = getConfiguredProxy(siteConfig);
|
|
180
|
+
if (!proxyUrl) return false;
|
|
181
|
+
|
|
182
|
+
const parsed = parseProxyUrl(proxyUrl);
|
|
183
|
+
if (!parsed || !parsed.username) return false;
|
|
184
|
+
|
|
185
|
+
try {
|
|
186
|
+
await page.authenticate({
|
|
187
|
+
username: parsed.username,
|
|
188
|
+
password: parsed.password || ''
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
const debug = forceDebug || siteConfig.proxy_debug || siteConfig.socks5_debug;
|
|
192
|
+
if (debug) {
|
|
193
|
+
console.log(formatLogMessage('proxy', `Auth set for ${parsed.username}@${parsed.host}:${parsed.port}`));
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
return true;
|
|
197
|
+
} catch (err) {
|
|
198
|
+
console.warn(formatLogMessage('proxy', `Failed to set proxy auth: ${err.message}`));
|
|
199
|
+
return false;
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Tests proxy connectivity by attempting a TCP connection.
|
|
205
|
+
*
|
|
206
|
+
* @param {object} siteConfig
|
|
207
|
+
* @param {number} timeoutMs - Connection timeout (default 5000ms)
|
|
208
|
+
* @returns {Promise<object>} { reachable, latencyMs, error }
|
|
209
|
+
*/
|
|
210
|
+
async function testProxy(siteConfig, timeoutMs = 5000) {
|
|
211
|
+
const proxyUrl = getConfiguredProxy(siteConfig);
|
|
212
|
+
if (!proxyUrl) {
|
|
213
|
+
return { reachable: false, latencyMs: 0, error: 'No proxy configured' };
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
const parsed = parseProxyUrl(proxyUrl);
|
|
217
|
+
if (!parsed) {
|
|
218
|
+
return { reachable: false, latencyMs: 0, error: 'Invalid proxy URL' };
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
const net = require('net');
|
|
222
|
+
const start = Date.now();
|
|
223
|
+
|
|
224
|
+
return new Promise((resolve) => {
|
|
225
|
+
const socket = new net.Socket();
|
|
226
|
+
|
|
227
|
+
const onError = (err) => {
|
|
228
|
+
socket.destroy();
|
|
229
|
+
resolve({ reachable: false, latencyMs: Date.now() - start, error: err.message });
|
|
230
|
+
};
|
|
231
|
+
|
|
232
|
+
socket.setTimeout(timeoutMs);
|
|
233
|
+
socket.on('error', onError);
|
|
234
|
+
socket.on('timeout', () => onError(new Error('Connection timeout')));
|
|
235
|
+
|
|
236
|
+
socket.connect(parsed.port, parsed.host, () => {
|
|
237
|
+
const latency = Date.now() - start;
|
|
238
|
+
socket.destroy();
|
|
239
|
+
resolve({ reachable: true, latencyMs: latency, error: null });
|
|
240
|
+
});
|
|
241
|
+
});
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
/**
|
|
245
|
+
* Returns human-readable proxy info string for logging.
|
|
246
|
+
*
|
|
247
|
+
* @param {object} siteConfig
|
|
248
|
+
* @returns {string}
|
|
249
|
+
*/
|
|
250
|
+
function getProxyInfo(siteConfig) {
|
|
251
|
+
const proxyUrl = getConfiguredProxy(siteConfig);
|
|
252
|
+
if (!proxyUrl) return 'none';
|
|
253
|
+
|
|
254
|
+
const parsed = parseProxyUrl(proxyUrl);
|
|
255
|
+
if (!parsed) return 'invalid';
|
|
256
|
+
|
|
257
|
+
const auth = parsed.username ? `${parsed.username}@` : '';
|
|
258
|
+
return `${parsed.protocol}://${auth}${parsed.host}:${parsed.port}`;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
/**
|
|
262
|
+
* Returns module version information
|
|
263
|
+
*/
|
|
264
|
+
function getModuleInfo() {
|
|
265
|
+
return { version: PROXY_MODULE_VERSION, name: 'Proxy Handler' };
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
module.exports = {
|
|
269
|
+
parseProxyUrl,
|
|
270
|
+
needsProxy,
|
|
271
|
+
getProxyArgs,
|
|
272
|
+
applyProxyAuth,
|
|
273
|
+
testProxy,
|
|
274
|
+
getProxyInfo,
|
|
275
|
+
getModuleInfo,
|
|
276
|
+
getConfiguredProxy,
|
|
277
|
+
PROXY_MODULE_VERSION,
|
|
278
|
+
SUPPORTED_PROTOCOLS
|
|
279
|
+
};
|
package/nwss.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// === Network scanner script (nwss.js) v2.0.
|
|
1
|
+
// === Network scanner script (nwss.js) v2.0.51 ===
|
|
2
2
|
|
|
3
3
|
// puppeteer for browser automation, fs for file system operations, psl for domain parsing.
|
|
4
4
|
// const pLimit = require('p-limit'); // Will be dynamically imported
|
|
@@ -44,6 +44,7 @@ const { performPageInteraction, createInteractionConfig } = require('./lib/inter
|
|
|
44
44
|
const { createGlobalHelpers, getTotalDomainsSkipped, getDetectedDomainsCount } = require('./lib/domain-cache');
|
|
45
45
|
const { createSmartCache } = require('./lib/smart-cache'); // Smart cache system
|
|
46
46
|
const { clearPersistentCache } = require('./lib/smart-cache');
|
|
47
|
+
const { needsProxy, getProxyArgs, applyProxyAuth, getProxyInfo, testProxy } = require('./lib/proxy');
|
|
47
48
|
// Dry run functionality
|
|
48
49
|
const { initializeDryRunCollections, addDryRunMatch, addDryRunNetTools, processDryRunResults, writeDryRunOutput } = require('./lib/dry-run');
|
|
49
50
|
// Enhanced site data clearing functionality
|
|
@@ -1354,7 +1355,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1354
1355
|
* Uses system Chrome and temporary directories to minimize disk usage
|
|
1355
1356
|
* @returns {Promise<import('puppeteer').Browser>} Browser instance
|
|
1356
1357
|
*/
|
|
1357
|
-
async function createBrowser() {
|
|
1358
|
+
async function createBrowser(extraArgs = []) {
|
|
1358
1359
|
// Create temporary user data directory that we can fully control and clean up
|
|
1359
1360
|
const tempUserDataDir = `/tmp/puppeteer-${Date.now()}-${Math.random().toString(36).substring(7)}`;
|
|
1360
1361
|
userDataDir = tempUserDataDir; // Store for cleanup tracking (use outer scope variable)
|
|
@@ -1458,6 +1459,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1458
1459
|
'--disable-background-timer-throttling',
|
|
1459
1460
|
'--disable-features=site-per-process', // Better for single-site scanning
|
|
1460
1461
|
'--no-zygote', // Better process isolation
|
|
1462
|
+
...extraArgs,
|
|
1461
1463
|
],
|
|
1462
1464
|
// Optimized timeouts for Puppeteer 23.x performance
|
|
1463
1465
|
protocolTimeout: TIMEOUTS.PROTOCOL_TIMEOUT,
|
|
@@ -2130,6 +2132,11 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2130
2132
|
}
|
|
2131
2133
|
}
|
|
2132
2134
|
|
|
2135
|
+
// --- Apply proxy authentication if configured ---
|
|
2136
|
+
if (needsProxy(siteConfig)) {
|
|
2137
|
+
await applyProxyAuth(page, siteConfig, forceDebug);
|
|
2138
|
+
}
|
|
2139
|
+
|
|
2133
2140
|
// --- Apply all fingerprint spoofing (user agent, Brave, fingerprint protection) ---
|
|
2134
2141
|
try {
|
|
2135
2142
|
await applyAllFingerprintSpoofing(page, siteConfig, forceDebug, currentUrl);
|
|
@@ -3337,6 +3344,25 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3337
3344
|
siteCounter++;
|
|
3338
3345
|
// Continue processing with the redirected URL instead of throwing error
|
|
3339
3346
|
} else {
|
|
3347
|
+
// Detect proxy-specific failures and provide clear diagnostics
|
|
3348
|
+
if (needsProxy(siteConfig) && err.message) {
|
|
3349
|
+
const proxyErrors = [
|
|
3350
|
+
'ERR_PROXY_CONNECTION_FAILED',
|
|
3351
|
+
'ERR_SOCKS_CONNECTION_FAILED',
|
|
3352
|
+
'ERR_TUNNEL_CONNECTION_FAILED',
|
|
3353
|
+
'ERR_PROXY_AUTH_UNSUPPORTED',
|
|
3354
|
+
'ERR_PROXY_AUTH_REQUESTED',
|
|
3355
|
+
'ERR_SOCKS_CONNECTION_HOST_UNREACHABLE',
|
|
3356
|
+
'ERR_PROXY_CERTIFICATE_INVALID',
|
|
3357
|
+
'ERR_NO_SUPPORTED_PROXIES'
|
|
3358
|
+
];
|
|
3359
|
+
const proxyErr = proxyErrors.find(e => err.message.includes(e));
|
|
3360
|
+
if (proxyErr) {
|
|
3361
|
+
const info = getProxyInfo(siteConfig);
|
|
3362
|
+
console.error(formatLogMessage('error', `[proxy] ${proxyErr} — proxy: ${info} — URL: ${currentUrl}`));
|
|
3363
|
+
console.error(formatLogMessage('error', `[proxy] Check: is the proxy running? Are credentials correct? Is the target reachable from the proxy?`));
|
|
3364
|
+
}
|
|
3365
|
+
}
|
|
3340
3366
|
console.error(formatLogMessage('error', `Failed on ${currentUrl}: ${err.message}`));
|
|
3341
3367
|
throw err;
|
|
3342
3368
|
}
|
|
@@ -3662,6 +3688,26 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3662
3688
|
}
|
|
3663
3689
|
|
|
3664
3690
|
} catch (err) {
|
|
3691
|
+
// Detect proxy-specific failures at top level
|
|
3692
|
+
if (needsProxy(siteConfig) && err.message) {
|
|
3693
|
+
const proxyErrors = [
|
|
3694
|
+
'ERR_PROXY_CONNECTION_FAILED',
|
|
3695
|
+
'ERR_SOCKS_CONNECTION_FAILED',
|
|
3696
|
+
'ERR_TUNNEL_CONNECTION_FAILED',
|
|
3697
|
+
'ERR_PROXY_AUTH_UNSUPPORTED',
|
|
3698
|
+
'ERR_PROXY_AUTH_REQUESTED',
|
|
3699
|
+
'ERR_SOCKS_CONNECTION_HOST_UNREACHABLE',
|
|
3700
|
+
'ERR_PROXY_CERTIFICATE_INVALID',
|
|
3701
|
+
'ERR_NO_SUPPORTED_PROXIES'
|
|
3702
|
+
];
|
|
3703
|
+
const proxyErr = proxyErrors.find(e => err.message.includes(e));
|
|
3704
|
+
if (proxyErr) {
|
|
3705
|
+
const info = getProxyInfo(siteConfig);
|
|
3706
|
+
console.error(formatLogMessage('error', `[proxy] ${proxyErr} — proxy: ${info} — URL: ${currentUrl}`));
|
|
3707
|
+
console.error(formatLogMessage('error', `[proxy] Check: is the proxy running? Are credentials correct? Is the target reachable from the proxy?`));
|
|
3708
|
+
}
|
|
3709
|
+
}
|
|
3710
|
+
|
|
3665
3711
|
// Only restart for truly fatal browser errors
|
|
3666
3712
|
const isFatalError = CRITICAL_BROWSER_ERRORS.some(errorType =>
|
|
3667
3713
|
err.message.includes(errorType)
|
|
@@ -3789,6 +3835,14 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3789
3835
|
}
|
|
3790
3836
|
}
|
|
3791
3837
|
|
|
3838
|
+
// Helper to get a stable proxy key for grouping browser instances
|
|
3839
|
+
const proxyKeyFor = (siteConfig) => {
|
|
3840
|
+
if (!needsProxy(siteConfig)) return '';
|
|
3841
|
+
return getProxyInfo(siteConfig);
|
|
3842
|
+
};
|
|
3843
|
+
|
|
3844
|
+
// Sort tasks so proxy groups are contiguous — direct connections first, then each proxy
|
|
3845
|
+
allTasks.sort((a, b) => proxyKeyFor(a.config).localeCompare(proxyKeyFor(b.config)));
|
|
3792
3846
|
|
|
3793
3847
|
let results = [];
|
|
3794
3848
|
let processedUrlCount = 0;
|
|
@@ -3832,6 +3886,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3832
3886
|
|
|
3833
3887
|
// Process URLs in batches with exception handling
|
|
3834
3888
|
let siteGroupIndex = 0;
|
|
3889
|
+
let currentProxyKey = ''; // Track active proxy config — '' means direct connection
|
|
3835
3890
|
try {
|
|
3836
3891
|
for (let batchStart = 0; batchStart < totalUrls; batchStart += RESOURCE_CLEANUP_INTERVAL) {
|
|
3837
3892
|
const batchEnd = Math.min(batchStart + RESOURCE_CLEANUP_INTERVAL, totalUrls);
|
|
@@ -3952,14 +4007,67 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3952
4007
|
if (forceDebug) console.log(formatLogMessage('debug', `Browser cleanup warning: ${browserCloseErr.message}`));
|
|
3953
4008
|
}
|
|
3954
4009
|
|
|
3955
|
-
// Create new browser for next batch
|
|
3956
|
-
|
|
4010
|
+
// Create new browser for next batch (preserve current proxy config)
|
|
4011
|
+
const restartProxyArgs = currentProxyKey ? getProxyArgs(currentBatch[0].config, forceDebug) : [];
|
|
4012
|
+
browser = await createBrowser(restartProxyArgs);
|
|
3957
4013
|
if (forceDebug) console.log(formatLogMessage('debug', `New browser instance created for batch ${Math.floor(batchStart / RESOURCE_CLEANUP_INTERVAL) + 1}`));
|
|
3958
4014
|
|
|
3959
4015
|
// Reset cleanup counter and add delay
|
|
3960
4016
|
urlsSinceLastCleanup = 0;
|
|
3961
4017
|
await fastTimeout(TIMEOUTS.BROWSER_STABILIZE_DELAY);
|
|
3962
4018
|
}
|
|
4019
|
+
|
|
4020
|
+
// --- Proxy-aware browser restart ---
|
|
4021
|
+
// --proxy-server is browser-wide, so if the batch needs a different proxy we must restart
|
|
4022
|
+
const batchProxyKey = proxyKeyFor(currentBatch[0].config);
|
|
4023
|
+
if (batchProxyKey !== currentProxyKey) {
|
|
4024
|
+
const debug = forceDebug || currentBatch[0].config.proxy_debug || currentBatch[0].config.socks5_debug;
|
|
4025
|
+
if (debug) {
|
|
4026
|
+
const from = currentProxyKey || 'direct';
|
|
4027
|
+
const to = batchProxyKey || 'direct';
|
|
4028
|
+
console.log(formatLogMessage('proxy', `Switching proxy: ${from} → ${to}`));
|
|
4029
|
+
}
|
|
4030
|
+
|
|
4031
|
+
try {
|
|
4032
|
+
await handleBrowserExit(browser, {
|
|
4033
|
+
forceDebug, timeout: 10000, exitOnFailure: false,
|
|
4034
|
+
cleanTempFiles: true, comprehensiveCleanup: removeTempFiles
|
|
4035
|
+
});
|
|
4036
|
+
if (userDataDir && fs.existsSync(userDataDir)) {
|
|
4037
|
+
fs.rmSync(userDataDir, { recursive: true, force: true });
|
|
4038
|
+
}
|
|
4039
|
+
} catch (proxyRestartErr) {
|
|
4040
|
+
if (forceDebug) console.log(formatLogMessage('debug', `Proxy switch browser cleanup: ${proxyRestartErr.message}`));
|
|
4041
|
+
}
|
|
4042
|
+
|
|
4043
|
+
const proxyArgs = batchProxyKey ? getProxyArgs(currentBatch[0].config, forceDebug) : [];
|
|
4044
|
+
|
|
4045
|
+
// Pre-flight: verify proxy is reachable before launching browser
|
|
4046
|
+
if (proxyArgs.length > 0) {
|
|
4047
|
+
const health = await testProxy(currentBatch[0].config, 5000);
|
|
4048
|
+
if (!health.reachable) {
|
|
4049
|
+
const info = getProxyInfo(currentBatch[0].config);
|
|
4050
|
+
console.error(formatLogMessage('error', `[proxy] Unreachable: ${info} — ${health.error}`));
|
|
4051
|
+
console.error(formatLogMessage('error', `[proxy] Skipping ${currentBatch.length} URL(s) in this batch`));
|
|
4052
|
+
const skipResults = currentBatch.map(task => ({
|
|
4053
|
+
success: false, url: task.url, rules: [],
|
|
4054
|
+
error: `Proxy unreachable: ${health.error}`
|
|
4055
|
+
}));
|
|
4056
|
+
results.push(...skipResults);
|
|
4057
|
+
processedUrlCount += currentBatch.length;
|
|
4058
|
+
urlsSinceLastCleanup += currentBatch.length;
|
|
4059
|
+
continue;
|
|
4060
|
+
}
|
|
4061
|
+
if (forceDebug) {
|
|
4062
|
+
console.log(formatLogMessage('proxy', `Proxy reachable (${health.latencyMs}ms)`));
|
|
4063
|
+
}
|
|
4064
|
+
}
|
|
4065
|
+
|
|
4066
|
+
browser = await createBrowser(proxyArgs);
|
|
4067
|
+
currentProxyKey = batchProxyKey;
|
|
4068
|
+
urlsSinceLastCleanup = 0;
|
|
4069
|
+
await fastTimeout(TIMEOUTS.BROWSER_STABILIZE_DELAY);
|
|
4070
|
+
}
|
|
3963
4071
|
|
|
3964
4072
|
if (forceDebug) {
|
|
3965
4073
|
console.log(formatLogMessage('debug', `Processing batch ${Math.floor(batchStart / RESOURCE_CLEANUP_INTERVAL) + 1}: ${batchSize} URL(s) (total processed: ${processedUrlCount})`));
|
|
@@ -3986,7 +4094,8 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3986
4094
|
console.log(formatLogMessage('error', `[TIMEOUT] Batch hung. Restarting browser.`));
|
|
3987
4095
|
try {
|
|
3988
4096
|
await handleBrowserExit(browser, { forceDebug, timeout: 5000, exitOnFailure: false });
|
|
3989
|
-
|
|
4097
|
+
const timeoutProxyArgs = currentProxyKey ? getProxyArgs(currentBatch[0].config, forceDebug) : [];
|
|
4098
|
+
browser = await createBrowser(timeoutProxyArgs);
|
|
3990
4099
|
urlsSinceLastCleanup = 0;
|
|
3991
4100
|
} catch (restartErr) {
|
|
3992
4101
|
throw restartErr;
|
|
@@ -4104,7 +4213,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
4104
4213
|
comprehensive: true
|
|
4105
4214
|
});
|
|
4106
4215
|
}
|
|
4107
|
-
browser = await createBrowser();
|
|
4216
|
+
browser = await createBrowser(currentProxyKey ? getProxyArgs(currentBatch[0].config, forceDebug) : []);
|
|
4108
4217
|
urlsSinceLastCleanup = 0; // Reset counter
|
|
4109
4218
|
await fastTimeout(TIMEOUTS.EMERGENCY_RESTART_DELAY); // Give browser time to stabilize
|
|
4110
4219
|
} catch (emergencyRestartErr) {
|
|
@@ -4116,7 +4225,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
4116
4225
|
console.log(`\n${messageColors.fileOp('🔄 Emergency hang detection restart:')} Browser appears hung, forcing restart`);
|
|
4117
4226
|
try {
|
|
4118
4227
|
await handleBrowserExit(browser, { forceDebug, timeout: 5000, exitOnFailure: false, cleanTempFiles: true });
|
|
4119
|
-
browser = await createBrowser();
|
|
4228
|
+
browser = await createBrowser(currentProxyKey ? getProxyArgs(currentBatch[0].config, forceDebug) : []);
|
|
4120
4229
|
urlsSinceLastCleanup = 0;
|
|
4121
4230
|
forceRestartFlag = false; // Reset flag
|
|
4122
4231
|
await fastTimeout(TIMEOUTS.EMERGENCY_RESTART_DELAY);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fanboynz/network-scanner",
|
|
3
|
-
"version": "2.0.
|
|
3
|
+
"version": "2.0.51",
|
|
4
4
|
"description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
|
|
5
5
|
"main": "nwss.js",
|
|
6
6
|
"scripts": {
|
|
@@ -48,7 +48,7 @@
|
|
|
48
48
|
},
|
|
49
49
|
"homepage": "https://github.com/ryanbr/network-scanner",
|
|
50
50
|
"devDependencies": {
|
|
51
|
-
"eslint": "^
|
|
51
|
+
"eslint": "^10.0.2",
|
|
52
52
|
"globals": "^16.3.0"
|
|
53
53
|
}
|
|
54
54
|
}
|