@fanboynz/network-scanner 2.0.49 → 2.0.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -17,6 +17,7 @@ A Puppeteer-based tool for scanning websites to find third-party (or optionally
17
17
  - Subdomain handling (collapse to root or full subdomain)
18
18
  - Optionally match only first-party, third-party, or both
19
19
  - Enhanced redirect handling with JavaScript and meta refresh detection
20
+ - Per-site proxy routing (SOCKS5, SOCKS4, HTTP, HTTPS) with pre-flight health checks
20
21
 
21
22
  ---
22
23
 
@@ -315,6 +316,86 @@ Route traffic through a VPN for specific sites. Requires `sudo` privileges. The
315
316
 
316
317
  > **Authentication:** If the `.ovpn` file already contains credentials (via `auth-user-pass /path/to/file` or an inline `<auth-user-pass>` block), no additional config is needed — just provide the config path. The `username`/`password` fields are only needed when the `.ovpn` file has a bare `auth-user-pass` directive that expects interactive input.
317
318
 
319
+ ### Proxy Options
320
+
321
+ Route traffic through a proxy for specific sites. Supports SOCKS5, SOCKS4, HTTP, and HTTPS proxies. Unlike VPN, proxy routing is per-site-group — only URLs in the same config block use the proxy; other sites connect directly.
322
+
323
+ > **Note:** Chromium's `--proxy-server` flag is browser-wide. Sites requiring different proxies (or direct vs proxied) are automatically separated into different browser instances. Tasks are sorted so proxy groups are contiguous to minimise restarts.
324
+
325
+ | Field | Values | Default | Description |
326
+ |:---------------------|:-------|:-------:|:------------|
327
+ | `proxy` | String | - | Proxy URL: `socks5://host:port`, `http://host:port`, `https://host:port`, or `http://user:pass@host:port` |
328
+ | `proxy_bypass` | Array | `[]` | Domains that skip the proxy (e.g. `["localhost", "127.0.0.1", "*.local"]`) |
329
+ | `proxy_remote_dns` | Boolean | `true` | Resolve DNS through the proxy (SOCKS only — prevents DNS leaks) |
330
+ | `proxy_debug` | Boolean | `false` | Print proxy diagnostics: launch args, auth, health checks, error codes |
331
+
332
+ Legacy aliases (`socks5_proxy`, `socks5_bypass`, `socks5_remote_dns`, `socks5_debug`) are supported for backwards compatibility.
333
+
334
+ #### Proxy Examples
335
+
336
+ **SOCKS5 — no auth:**
337
+ ```json
338
+ {
339
+ "url": ["https://blocked-site.com/", "https://another-blocked.com/"],
340
+ "proxy": "socks5://127.0.0.1:1080",
341
+ "search_string": ["tracking.js"]
342
+ }
343
+ ```
344
+
345
+ **HTTP proxy with credentials:**
346
+ ```json
347
+ {
348
+ "url": ["https://geo-restricted.com/"],
349
+ "proxy": "http://user:pass@proxy.corp.com:3128",
350
+ "search_string": ["analytics"]
351
+ }
352
+ ```
353
+
354
+ **SOCKS5 with bypass list and debug:**
355
+ ```json
356
+ {
357
+ "url": ["https://target-site.com/"],
358
+ "proxy": "socks5://user:pass@proxy.example.com:9050",
359
+ "proxy_bypass": ["localhost", "127.0.0.1", "*.internal.corp"],
360
+ "proxy_remote_dns": true,
361
+ "proxy_debug": true,
362
+ "search_string": ["tracker"]
363
+ }
364
+ ```
365
+
366
+ **Mixed direct + proxied in one config:**
367
+ ```json
368
+ [
369
+ {
370
+ "url": ["https://direct-site.com/"],
371
+ "search_string": ["ads"]
372
+ },
373
+ {
374
+ "url": ["https://blocked-site.com/"],
375
+ "proxy": "socks5://127.0.0.1:1080",
376
+ "search_string": ["ads"]
377
+ }
378
+ ]
379
+ ```
380
+
381
+ #### Proxy Error Handling
382
+
383
+ If a proxy is unreachable, the batch is skipped with a clear error before any navigation is attempted:
384
+
385
+ ```
386
+ [error] [proxy] Unreachable: socks5://127.0.0.1:1080 — Connection refused
387
+ [error] [proxy] Skipping 5 URL(s) in this batch
388
+ ```
389
+
390
+ If a proxy fails mid-scan, Chromium's error code is detected and diagnosed:
391
+
392
+ ```
393
+ [error] [proxy] ERR_SOCKS_CONNECTION_FAILED — proxy: socks5://127.0.0.1:1080 — URL: https://example.com/
394
+ [error] [proxy] Check: is the proxy running? Are credentials correct? Is the target reachable from the proxy?
395
+ ```
396
+
397
+ Detected error codes: `ERR_PROXY_CONNECTION_FAILED`, `ERR_SOCKS_CONNECTION_FAILED`, `ERR_TUNNEL_CONNECTION_FAILED`, `ERR_PROXY_AUTH_UNSUPPORTED`, `ERR_PROXY_AUTH_REQUESTED`, `ERR_SOCKS_CONNECTION_HOST_UNREACHABLE`, `ERR_PROXY_CERTIFICATE_INVALID`, `ERR_NO_SUPPORTED_PROXIES`.
398
+
318
399
  ### Global Configuration Options
319
400
 
320
401
  These options go at the root level of your config.json:
package/lib/cloudflare.js CHANGED
@@ -58,11 +58,76 @@ const FAST_TIMEOUTS = {
58
58
  ELEMENT_INTERACTION_DELAY: 250, // Fast element interactions
59
59
  SELECTOR_WAIT: 3000, // Fast selector waits
60
60
  TURNSTILE_OPERATION: 6000, // Fast Turnstile operations
61
- JS_CHALLENGE: 19000, // Fast JS challenge completion
61
+ JS_CHALLENGE: 10000, // Fast JS challenge completion
62
62
  CHALLENGE_SOLVING: 30000, // Fast overall challenge solving
63
63
  CHALLENGE_COMPLETION: 8000 // Fast completion check
64
64
  };
65
65
 
66
+ /**
67
+ * Finds and clicks an element inside shadow DOM trees via page.evaluate
68
+ * Returns {found, clicked, x, y} - coordinates allow fallback mouse.click
69
+ */
70
+ async function clickInShadowDOM(context, selectors, forceDebug = false, waitMs = 1500) {
71
+ // Try Puppeteer's pierce/ selector first � handles CLOSED shadow roots via CDP
72
+ for (const selector of selectors) {
73
+ try {
74
+ // Wait for element to appear (handles delayed rendering)
75
+ const start = Date.now();
76
+ const element = await context.waitForSelector(`pierce/${selector}`, { timeout: waitMs });
77
+ if (element) {
78
+ const box = await element.boundingBox();
79
+ if (box && box.width > 0 && box.height > 0) {
80
+ if (forceDebug) console.log(formatLogMessage('cloudflare', `pierce/${selector} matched in ${Date.now() - start}ms � box: ${box.width}x${box.height} at (${box.x},${box.y})`));
81
+ await element.click();
82
+ await element.dispose();
83
+ return { found: true, clicked: true, selector, x: box.x + box.width / 2, y: box.y + box.height / 2 };
84
+ }
85
+ if (forceDebug) console.log(formatLogMessage('cloudflare', `pierce/${selector} found but not visible (0x0)`));
86
+ await element.dispose();
87
+ // Element found but not visible
88
+ return { found: true, clicked: false, selector, x: 0, y: 0 };
89
+ }
90
+ } catch (e) {
91
+ if (forceDebug) console.log(formatLogMessage('cloudflare', `pierce/${selector} timeout after ${waitMs}ms`));
92
+ continue;
93
+ }
94
+ }
95
+
96
+ // Fallback: manual traversal for open shadow roots
97
+ const result = await context.evaluate((sels) => {
98
+ function deepQuery(root, selector) {
99
+ // Try direct query first
100
+ const el = root.querySelector(selector);
101
+ if (el) return el;
102
+
103
+ // Traverse shadow roots
104
+ const allElements = root.querySelectorAll('*');
105
+ for (const node of allElements) {
106
+ if (node.shadowRoot) {
107
+ const found = deepQuery(node.shadowRoot, selector);
108
+ if (found) return found;
109
+ }
110
+ }
111
+ return null;
112
+ }
113
+
114
+ for (const selector of sels) {
115
+ const el = deepQuery(document, selector);
116
+ if (el) {
117
+ const rect = el.getBoundingClientRect();
118
+ if (rect.width > 0 && rect.height > 0) {
119
+ el.click();
120
+ return { found: true, clicked: true, selector, x: rect.x + rect.width / 2, y: rect.y + rect.height / 2 };
121
+ }
122
+ return { found: true, clicked: false, selector, x: 0, y: 0 };
123
+ }
124
+ }
125
+ return { found: false, clicked: false, selector: null, x: 0, y: 0 };
126
+ }, selectors);
127
+
128
+ return result;
129
+ }
130
+
66
131
  /**
67
132
  * Error categories for better handling
68
133
  */
@@ -306,12 +371,12 @@ function categorizeError(error) {
306
371
  /**
307
372
  * Implements exponential backoff delay
308
373
  */
309
- async function getRetryDelay(attempt) {
374
+ function getRetryDelay(attempt) {
310
375
  const delay = Math.min(
311
376
  RETRY_CONFIG.baseDelay * Math.pow(RETRY_CONFIG.backoffMultiplier, attempt - 1),
312
377
  RETRY_CONFIG.maxDelay
313
378
  );
314
- return new Promise(resolve => setTimeout(resolve, delay));
379
+ return delay;
315
380
  }
316
381
 
317
382
  /**
@@ -341,49 +406,8 @@ async function safePageEvaluate(page, func, timeout = TIMEOUTS.PAGE_EVALUATION_S
341
406
  throw new Error('Page URL access failed - likely detached');
342
407
  }
343
408
 
344
- // Quick execution context validation with timeout
345
- const contextValid = await Promise.race([
346
- page.evaluate(() => {
347
- try {
348
- // Quick context validation
349
- if (typeof window === 'undefined' || !document) {
350
- return false;
351
- }
352
- // Check if document is ready for interaction
353
- if (document.readyState === 'uninitialized') {
354
- return false;
355
- }
356
- return true;
357
- } catch (e) {
358
- return false;
359
- }
360
- }),
361
- new Promise((_, reject) => {
362
- setTimeout(() => reject(new Error('Context validation timeout')), 3500);
363
- })
364
- ]).catch(() => false);
365
-
366
- if (!contextValid) {
367
- throw new Error('Page execution context is invalid');
368
- }
369
-
370
409
  const result = await Promise.race([
371
- page.evaluate(() => {
372
- // Additional runtime validation inside evaluation
373
- try {
374
- if (typeof window === 'undefined' || !document) {
375
- throw new Error('Execution context invalid during evaluation');
376
- }
377
- return func();
378
- } catch (evalError) {
379
- // Return error info instead of throwing to avoid unhandled promise rejections
380
- return {
381
- __evaluation_error: true,
382
- message: evalError.message,
383
- type: 'evaluation_error'
384
- };
385
- }
386
- }),
410
+ page.evaluate(func),
387
411
  new Promise((_, reject) => {
388
412
  timeoutId = setTimeout(() => reject(new Error('Page evaluation timeout')), timeout);
389
413
  })
@@ -394,11 +418,6 @@ async function safePageEvaluate(page, func, timeout = TIMEOUTS.PAGE_EVALUATION_S
394
418
  clearTimeout(timeoutId);
395
419
  }
396
420
 
397
- // Check if evaluation returned an error
398
- if (result && result.__evaluation_error) {
399
- throw new Error(`Evaluation failed: ${result.message}`);
400
- }
401
-
402
421
  if (forceDebug && attempt > 1) {
403
422
  console.log(formatLogMessage('cloudflare', `Page evaluation succeeded on attempt ${attempt}`));
404
423
  }
@@ -438,7 +457,7 @@ async function safePageEvaluate(page, func, timeout = TIMEOUTS.PAGE_EVALUATION_S
438
457
  }
439
458
 
440
459
  // Wait before retrying with exponential backoff
441
- await getRetryDelay(attempt);
460
+ await new Promise(resolve => setTimeout(resolve, getRetryDelay(attempt)));
442
461
  }
443
462
  }
444
463
 
@@ -460,15 +479,18 @@ async function safePageEvaluate(page, func, timeout = TIMEOUTS.PAGE_EVALUATION_S
460
479
  * Safe element clicking with timeout protection
461
480
  */
462
481
  async function safeClick(page, selector, timeout = TIMEOUTS.CLICK_TIMEOUT) {
482
+ let timeoutId;
463
483
  try {
464
484
  return await Promise.race([
465
485
  page.click(selector, { timeout: timeout }),
466
486
  new Promise((_, reject) => {
467
- setTimeout(() => reject(new Error('Click timeout')), timeout + TIMEOUTS.CLICK_TIMEOUT_BUFFER);
487
+ timeoutId = setTimeout(() => reject(new Error('Click timeout')), timeout + TIMEOUTS.CLICK_TIMEOUT_BUFFER);
468
488
  })
469
489
  ]);
470
490
  } catch (error) {
471
491
  throw new Error(`Click failed: ${error.message}`);
492
+ } finally {
493
+ if (timeoutId) clearTimeout(timeoutId);
472
494
  }
473
495
  }
474
496
 
@@ -476,16 +498,18 @@ async function safeClick(page, selector, timeout = TIMEOUTS.CLICK_TIMEOUT) {
476
498
  * Safe navigation waiting with timeout protection
477
499
  */
478
500
  async function safeWaitForNavigation(page, timeout = TIMEOUTS.NAVIGATION_TIMEOUT) {
501
+ let timeoutId;
479
502
  try {
480
503
  return await Promise.race([
481
504
  page.waitForNavigation({ waitUntil: 'domcontentloaded', timeout: timeout }),
482
505
  new Promise((_, reject) => {
483
- setTimeout(() => reject(new Error('Navigation timeout')), timeout + TIMEOUTS.NAVIGATION_TIMEOUT_BUFFER);
506
+ timeoutId = setTimeout(() => reject(new Error('Navigation timeout')), timeout + TIMEOUTS.NAVIGATION_TIMEOUT_BUFFER);
484
507
  })
485
508
  ]);
486
509
  } catch (error) {
487
510
  console.warn(formatLogMessage('cloudflare', `Navigation wait failed: ${error.message}`));
488
- // Don't throw - just continue
511
+ } finally {
512
+ if (timeoutId) clearTimeout(timeoutId);
489
513
  }
490
514
  }
491
515
 
@@ -583,7 +607,14 @@ async function quickCloudflareDetection(page, forceDebug = false) {
583
607
  */
584
608
  async function analyzeCloudflareChallenge(page) {
585
609
  try {
586
- return await safePageEvaluate(page, () => {
610
+ // CDP-level frame check bypasses closed shadow roots
611
+ const frames = page.frames();
612
+ const hasChallengeFrame = frames.some(f => {
613
+ const url = f.url();
614
+ return url.includes('challenges.cloudflare.com') || url.includes('/cdn-cgi/challenge-platform/');
615
+ });
616
+
617
+ const result = await safePageEvaluate(page, () => {
587
618
  const title = document.title || '';
588
619
  const bodyText = document.body ? document.body.textContent : '';
589
620
 
@@ -655,6 +686,15 @@ async function analyzeCloudflareChallenge(page) {
655
686
  bodySnippet: bodyText.substring(0, 200)
656
687
  };
657
688
  }, TIMEOUTS.PAGE_EVALUATION);
689
+
690
+ // Merge CDP frame detection � catches iframes behind closed shadow roots
691
+ if (hasChallengeFrame && !result.hasTurnstileIframe) {
692
+ result.hasTurnstileIframe = true;
693
+ result.isTurnstile = true;
694
+ result.isChallengePresent = true;
695
+ }
696
+
697
+ return result;
658
698
  } catch (error) {
659
699
  return {
660
700
  isChallengePresent: false,
@@ -862,7 +902,7 @@ async function handleVerificationChallengeWithRetries(page, currentUrl, siteConf
862
902
 
863
903
  // If this wasn't the last attempt, wait before retrying
864
904
  if (attempt < retryConfig.maxAttempts) {
865
- const delay = await getRetryDelay(attempt);
905
+ const delay = getRetryDelay(attempt);
866
906
  if (forceDebug) {
867
907
  console.log(formatLogMessage('cloudflare', `Challenge attempt ${attempt} failed, retrying in ${delay}ms: ${result.error}`));
868
908
  }
@@ -904,7 +944,7 @@ async function handleVerificationChallengeWithRetries(page, currentUrl, siteConf
904
944
 
905
945
  // Wait before retrying with exponential backoff
906
946
  if (attempt < retryConfig.maxAttempts) {
907
- await getRetryDelay(attempt);
947
+ await new Promise(resolve => setTimeout(resolve, getRetryDelay(attempt)));
908
948
  }
909
949
  }
910
950
  }
@@ -945,7 +985,7 @@ async function handlePhishingWarningWithRetries(page, currentUrl, siteConfig, fo
945
985
 
946
986
  // If this wasn't the last attempt, wait before retrying
947
987
  if (attempt < retryConfig.maxAttempts) {
948
- const delay = await getRetryDelay(attempt);
988
+ const delay = getRetryDelay(attempt);
949
989
  if (forceDebug) {
950
990
  console.log(formatLogMessage('cloudflare', `Phishing warning attempt ${attempt} failed, retrying in ${delay}ms: ${result.error}`));
951
991
  }
@@ -975,7 +1015,7 @@ async function handlePhishingWarningWithRetries(page, currentUrl, siteConfig, fo
975
1015
 
976
1016
  // Wait before retrying with exponential backoff
977
1017
  if (attempt < retryConfig.maxAttempts) {
978
- await getRetryDelay(attempt);
1018
+ await new Promise(resolve => setTimeout(resolve, getRetryDelay(attempt)));
979
1019
  }
980
1020
  }
981
1021
  }
@@ -1046,6 +1086,23 @@ async function attemptChallengeSolve(page, currentUrl, challengeInfo, forceDebug
1046
1086
 
1047
1087
  const jsResult = await waitForJSChallengeCompletion(page, forceDebug);
1048
1088
  if (jsResult.success) {
1089
+ // Wait for redirect after challenge completion
1090
+ try {
1091
+ const startUrl = await page.url();
1092
+ await page.waitForFunction(
1093
+ (origUrl) => {
1094
+ const bodyText = document.body?.textContent || '';
1095
+ return document.title !== 'Just a moment...' ||
1096
+ window.location.href !== origUrl ||
1097
+ bodyText.includes('Verification successful');
1098
+ },
1099
+ { timeout: 10000 },
1100
+ startUrl
1101
+ );
1102
+ if (forceDebug) console.log(formatLogMessage('cloudflare', `Challenge page cleared for ${currentUrl}`));
1103
+ } catch (_) {
1104
+ if (forceDebug) console.log(formatLogMessage('cloudflare', `Challenge page not cleared after 10s � continuing`));
1105
+ }
1049
1106
  result.success = true;
1050
1107
  result.method = 'js_challenge_wait';
1051
1108
  if (forceDebug) console.log(formatLogMessage('cloudflare', `JS challenge completed successfully for ${currentUrl}`));
@@ -1054,6 +1111,8 @@ async function attemptChallengeSolve(page, currentUrl, challengeInfo, forceDebug
1054
1111
  } catch (jsError) {
1055
1112
  if (forceDebug) console.log(formatLogMessage('cloudflare', `JS challenge wait failed for ${currentUrl}: ${jsError.message}`));
1056
1113
  }
1114
+ } else if (forceDebug) {
1115
+ console.log(formatLogMessage('cloudflare', `Skipping JS challenge method (not detected)`));
1057
1116
  }
1058
1117
 
1059
1118
  // Method 2: Handle Turnstile challenges (interactive)
@@ -1071,6 +1130,8 @@ async function attemptChallengeSolve(page, currentUrl, challengeInfo, forceDebug
1071
1130
  } catch (turnstileError) {
1072
1131
  if (forceDebug) console.log(formatLogMessage('cloudflare', `Turnstile method failed for ${currentUrl}: ${turnstileError.message}`));
1073
1132
  }
1133
+ } else if (forceDebug) {
1134
+ console.log(formatLogMessage('cloudflare', `Skipping Turnstile method (not detected)`));
1074
1135
  }
1075
1136
 
1076
1137
  // Method 3: Legacy checkbox interaction (fallback)
@@ -1088,10 +1149,23 @@ async function attemptChallengeSolve(page, currentUrl, challengeInfo, forceDebug
1088
1149
  } catch (legacyError) {
1089
1150
  if (forceDebug) console.log(formatLogMessage('cloudflare', `Legacy checkbox method failed for ${currentUrl}: ${legacyError.message}`));
1090
1151
  }
1152
+ } else if (forceDebug) {
1153
+ console.log(formatLogMessage('cloudflare', `Skipping legacy checkbox method (not detected)`));
1091
1154
  }
1092
1155
 
1093
1156
  if (!result.success) {
1094
1157
  result.error = result.error || 'All challenge bypass methods failed';
1158
+ if (forceDebug) {
1159
+ try {
1160
+ const postState = await page.evaluate(() => ({
1161
+ title: document.title,
1162
+ url: window.location.href,
1163
+ body: (document.body?.textContent || '').substring(0, 300)
1164
+ }));
1165
+ console.log(formatLogMessage('cloudflare', `Post-attempt page state: title="${postState.title}" url=${postState.url}`));
1166
+ console.log(formatLogMessage('cloudflare', `Post-attempt body: ${postState.body}`));
1167
+ } catch (_) {}
1168
+ }
1095
1169
  }
1096
1170
 
1097
1171
  return result;
@@ -1109,88 +1183,57 @@ async function handleEmbeddedIframeChallenge(page, forceDebug = false) {
1109
1183
  try {
1110
1184
  if (forceDebug) console.log(formatLogMessage('cloudflare', `Checking for embedded iframe challenges`));
1111
1185
 
1112
- // Enhanced iframe selectors including challenges.cloudflare.com
1113
- const iframeSelectors = [
1114
- 'iframe[src*="challenges.cloudflare.com"]',
1115
- 'iframe[title*="Verify you are human"]',
1116
- 'iframe[title*="Cloudflare security challenge"]',
1117
- 'iframe[title*="Widget containing a Cloudflare"]'
1118
- ];
1119
-
1120
- // Wait for iframe to appear
1121
- let iframeFound = false;
1122
- for (const selector of iframeSelectors) {
1123
- try {
1124
- await Promise.race([
1125
- page.waitForSelector(selector, { timeout: FAST_TIMEOUTS.SELECTOR_WAIT }),
1126
- new Promise((_, reject) => setTimeout(() => reject(new Error('Timeout')), FAST_TIMEOUTS.SELECTOR_WAIT + 1000))
1127
- ]);
1128
- iframeFound = true;
1129
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Found iframe: ${selector}`));
1130
- break;
1131
- } catch (e) {
1132
- continue;
1186
+ // Use CDP-level frame detection � bypasses closed shadow roots
1187
+ const frames = page.frames();
1188
+ if (forceDebug) {
1189
+ console.log(formatLogMessage('cloudflare', `Available frames (${frames.length}):`));
1190
+ for (const f of frames) {
1191
+ console.log(formatLogMessage('cloudflare', ` ${f.url()}`));
1133
1192
  }
1134
1193
  }
1135
-
1136
- if (!iframeFound) {
1137
- result.error = 'No embedded iframe found';
1138
- return result;
1139
- }
1140
-
1141
- // Find challenge frame using existing frame detection logic
1142
- const frames = await page.frames();
1143
1194
  const challengeFrame = frames.find(frame => {
1144
1195
  const frameUrl = frame.url();
1145
1196
  return frameUrl.includes('challenges.cloudflare.com') ||
1197
+ frameUrl.includes('/cdn-cgi/challenge-platform/') ||
1146
1198
  frameUrl.includes('/turnstile/if/') ||
1147
- frameUrl.includes('captcha-delivery.com') ||
1148
- frameUrl.includes('/challenge-platform/') ||
1149
1199
  frameUrl.includes('turnstile');
1150
1200
  });
1151
1201
 
1152
1202
  if (!challengeFrame) {
1153
- result.error = 'Challenge iframe not accessible';
1203
+ result.error = 'No challenge frame found via CDP';
1154
1204
  return result;
1155
1205
  }
1156
1206
 
1157
1207
  if (forceDebug) console.log(formatLogMessage('cloudflare', `Interacting with iframe: ${challengeFrame.url()}`));
1158
1208
 
1159
- // Reuse existing checkbox interaction logic
1160
- const checkboxSelectors = [
1209
+ await waitForTimeout(page, 500);
1210
+
1211
+ let checkboxInteractionSuccess = false;
1212
+ try {
1213
+ const shadowResult = await clickInShadowDOM(challengeFrame, [
1161
1214
  'input[type="checkbox"]',
1162
1215
  '.ctp-checkbox',
1163
- 'input.ctp-checkbox',
1164
- '.cf-turnstile input',
1165
- '.ctp-checkbox-label'
1166
- ];
1167
-
1168
- let checkboxInteractionSuccess = false;
1169
- for (const selector of checkboxSelectors) {
1170
- try {
1171
- await Promise.race([
1172
- challengeFrame.waitForSelector(selector, { timeout: FAST_TIMEOUTS.SELECTOR_WAIT }),
1173
- new Promise((_, reject) => setTimeout(() => reject(new Error('Timeout')), FAST_TIMEOUTS.SELECTOR_WAIT + 1000))
1174
- ]);
1175
-
1176
- await waitForTimeout(page, FAST_TIMEOUTS.ELEMENT_INTERACTION_DELAY);
1177
- await challengeFrame.click(selector);
1178
-
1179
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Clicked iframe element: ${selector}`));
1216
+ '.ctp-checkbox-label',
1217
+ '[role="checkbox"]',
1218
+ 'label.cb-lb',
1219
+ 'label'
1220
+ ], forceDebug);
1221
+
1222
+ if (shadowResult.clicked) {
1180
1223
  checkboxInteractionSuccess = true;
1181
- break;
1182
- } catch (e) {
1183
- continue;
1224
+ if (forceDebug) console.log(formatLogMessage('cloudflare', `Shadow DOM click succeeded: ${shadowResult.selector}`));
1225
+ } else if (shadowResult.found && shadowResult.x > 0) {
1226
+ await page.mouse.click(shadowResult.x, shadowResult.y);
1227
+ checkboxInteractionSuccess = true;
1228
+ if (forceDebug) console.log(formatLogMessage('cloudflare', `Shadow DOM mouse fallback at (${shadowResult.x}, ${shadowResult.y})`));
1184
1229
  }
1230
+ } catch (shadowErr) {
1231
+ if (forceDebug) console.log(formatLogMessage('cloudflare', `Shadow DOM click failed: ${shadowErr.message}`));
1185
1232
  }
1186
1233
 
1187
- // Try alternative interaction only if standard selectors failed
1188
1234
  if (!checkboxInteractionSuccess) {
1189
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Checkbox interactions failed, trying container fallback`));
1190
- await waitForTimeout(page, 1000);
1191
1235
 
1192
1236
  try {
1193
- // Try clicking on the iframe container itself as fallback
1194
1237
  const iframeElement = await page.$('iframe[src*="challenges.cloudflare.com"]');
1195
1238
  if (iframeElement) {
1196
1239
  await iframeElement.click();
@@ -1199,8 +1242,6 @@ async function handleEmbeddedIframeChallenge(page, forceDebug = false) {
1199
1242
  } catch (containerClickError) {
1200
1243
  if (forceDebug) console.log(formatLogMessage('cloudflare', `Container click failed: ${containerClickError.message}`));
1201
1244
  }
1202
- } else {
1203
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Checkbox interaction successful, skipping container fallback`));
1204
1245
  }
1205
1246
 
1206
1247
  // Reuse existing completion check pattern with error handling
@@ -1257,8 +1298,10 @@ async function waitForJSChallengeCompletion(page, forceDebug = false) {
1257
1298
  await Promise.race([
1258
1299
  page.waitForFunction(
1259
1300
  () => {
1260
- return !document.body.textContent.includes('Checking your browser') &&
1261
- !document.body.textContent.includes('Please wait while we verify') &&
1301
+ const bodyText = document.body.textContent;
1302
+ if (bodyText.includes('Verification successful')) return true;
1303
+ return !bodyText.includes('Checking your browser') &&
1304
+ !bodyText.includes('Please wait while we verify') &&
1262
1305
  !document.querySelector('.cf-challenge-running') &&
1263
1306
  !document.querySelector('[data-cf-challenge]');
1264
1307
  },
@@ -1342,28 +1385,26 @@ async function handleTurnstileChallenge(page, forceDebug = false) {
1342
1385
  console.log(formatLogMessage('cloudflare', `Found Turnstile iframe with URL: ${turnstileFrame.url()}`));
1343
1386
  }
1344
1387
 
1345
- const checkboxSelectors = [
1346
- 'input[type="checkbox"].ctp-checkbox',
1347
- '.ctp-checkbox-label',
1348
- '.ctp-checkbox'
1349
- ];
1350
-
1351
- for (const selector of checkboxSelectors) {
1352
- try {
1353
- await Promise.race([
1354
- turnstileFrame.waitForSelector(selector, { timeout: FAST_TIMEOUTS.SELECTOR_WAIT }),
1355
- new Promise((_, reject) => setTimeout(() => reject(new Error('Checkbox timeout')), FAST_TIMEOUTS.SELECTOR_WAIT + 500))
1356
- ]);
1357
-
1358
- await waitForTimeout(page, FAST_TIMEOUTS.ELEMENT_INTERACTION_DELAY);
1359
- await turnstileFrame.click(selector);
1360
-
1361
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Clicked Turnstile checkbox: ${selector}`));
1362
- break;
1363
- } catch (e) {
1364
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Checkbox selector ${selector} not found or failed to click`));
1365
- continue;
1388
+ await waitForTimeout(page, FAST_TIMEOUTS.ELEMENT_INTERACTION_DELAY);
1389
+
1390
+ try {
1391
+ const shadowResult = await clickInShadowDOM(turnstileFrame, [
1392
+ 'input[type="checkbox"]',
1393
+ '.ctp-checkbox',
1394
+ '.ctp-checkbox-label',
1395
+ '[role="checkbox"]',
1396
+ 'label.cb-lb',
1397
+ 'label'
1398
+ ], forceDebug);
1399
+
1400
+ if (shadowResult.clicked) {
1401
+ if (forceDebug) console.log(formatLogMessage('cloudflare', `Turnstile shadow DOM click succeeded: ${shadowResult.selector}`));
1402
+ } else if (shadowResult.found && shadowResult.x > 0) {
1403
+ await page.mouse.click(shadowResult.x, shadowResult.y);
1404
+ if (forceDebug) console.log(formatLogMessage('cloudflare', `Turnstile shadow DOM mouse fallback at (${shadowResult.x}, ${shadowResult.y})`));
1366
1405
  }
1406
+ } catch (shadowErr) {
1407
+ if (forceDebug) console.log(formatLogMessage('cloudflare', `Shadow DOM fallback failed: ${shadowErr.message}`));
1367
1408
  }
1368
1409
 
1369
1410
  // Wait for Turnstile completion with reduced timeout
@@ -1551,7 +1592,11 @@ async function checkChallengeCompletion(page) {
1551
1592
  * }
1552
1593
  */
1553
1594
  async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDebug = false) {
1554
- if (forceDebug) {
1595
+ const cfDebug = forceDebug || siteConfig.cloudflare_bypass === 'debug' || siteConfig.cloudflare_phish === 'debug';
1596
+ const cfBypassEnabled = siteConfig.cloudflare_bypass === true || siteConfig.cloudflare_bypass === 'debug';
1597
+ const cfPhishEnabled = siteConfig.cloudflare_phish === true || siteConfig.cloudflare_phish === 'debug';
1598
+
1599
+ if (cfDebug) {
1555
1600
  console.log(formatLogMessage('cloudflare', `Using Cloudflare module v${CLOUDFLARE_MODULE_VERSION} for ${currentUrl}`));
1556
1601
  }
1557
1602
 
@@ -1581,7 +1626,7 @@ async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDeb
1581
1626
  // Sets attempted: false, success: true for both protection types
1582
1627
 
1583
1628
  // Only proceed if we have indicators OR explicit config enables Cloudflare handling
1584
- if (!quickDetection.hasIndicators && !siteConfig.cloudflare_phish && !siteConfig.cloudflare_bypass) {
1629
+ if (!quickDetection.hasIndicators && !cfPhishEnabled && !cfBypassEnabled) {
1585
1630
  if (forceDebug) console.log(formatLogMessage('cloudflare', `No Cloudflare indicators found and no explicit config, skipping protection handling for ${currentUrl}`));
1586
1631
  if (forceDebug) console.log(formatLogMessage('cloudflare', `Quick detection details: title="${quickDetection.title}", bodySnippet="${quickDetection.bodySnippet}"`));
1587
1632
  return {
@@ -1606,7 +1651,7 @@ async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDeb
1606
1651
  try {
1607
1652
  // Adaptive timeout based on detection results and explicit config
1608
1653
  let adaptiveTimeout;
1609
- if (siteConfig.cloudflare_phish || siteConfig.cloudflare_bypass) {
1654
+ if (cfPhishEnabled || cfBypassEnabled) {
1610
1655
  // Explicit config - give more time
1611
1656
  adaptiveTimeout = quickDetection.hasIndicators ? TIMEOUTS.ADAPTIVE_TIMEOUT_WITH_INDICATORS : TIMEOUTS.ADAPTIVE_TIMEOUT_WITHOUT_INDICATORS;
1612
1657
  } else {
@@ -1619,7 +1664,7 @@ async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDeb
1619
1664
  }
1620
1665
 
1621
1666
  return await Promise.race([
1622
- performCloudflareHandling(page, currentUrl, siteConfig, forceDebug),
1667
+ performCloudflareHandling(page, currentUrl, siteConfig, cfDebug),
1623
1668
  new Promise((resolve) => {
1624
1669
  setTimeout(() => {
1625
1670
  console.warn(formatLogMessage('cloudflare', `Adaptive timeout (${adaptiveTimeout}ms) for ${currentUrl} - continuing with scan`));
@@ -1651,6 +1696,9 @@ async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDeb
1651
1696
  * @returns {Promise<Object>} Same structure as handleCloudflareProtection()
1652
1697
  */
1653
1698
  async function performCloudflareHandling(page, currentUrl, siteConfig, forceDebug = false) {
1699
+ const cfBypassEnabled = siteConfig.cloudflare_bypass === true || siteConfig.cloudflare_bypass === 'debug';
1700
+ const cfPhishEnabled = siteConfig.cloudflare_phish === true || siteConfig.cloudflare_phish === 'debug';
1701
+
1654
1702
  const result = {
1655
1703
  phishingWarning: { attempted: false, success: false },
1656
1704
  verificationChallenge: { attempted: false, success: false },
@@ -1663,7 +1711,7 @@ async function performCloudflareHandling(page, currentUrl, siteConfig, forceDebu
1663
1711
  // Handle phishing warnings first - updates result.phishingWarning
1664
1712
  // Only runs if siteConfig.cloudflare_phish === true
1665
1713
  // Handle phishing warnings if enabled
1666
- if (siteConfig.cloudflare_phish === true) {
1714
+ if (cfPhishEnabled) {
1667
1715
  if (forceDebug) console.log(formatLogMessage('cloudflare', `Phishing warning bypass enabled for ${currentUrl}`));
1668
1716
 
1669
1717
  const phishingResult = await handlePhishingWarningWithRetries(page, currentUrl, siteConfig, forceDebug);
@@ -1698,7 +1746,7 @@ async function performCloudflareHandling(page, currentUrl, siteConfig, forceDebu
1698
1746
  // Only runs if siteConfig.cloudflare_bypass === true
1699
1747
  // Sets requiresHuman: true if CAPTCHA detected (no bypass attempted)
1700
1748
  // Handle verification challenges if enabled
1701
- if (siteConfig.cloudflare_bypass === true) {
1749
+ if (cfBypassEnabled) {
1702
1750
  if (forceDebug) console.log(formatLogMessage('cloudflare', `Challenge bypass enabled for ${currentUrl}`));
1703
1751
 
1704
1752
  const challengeResult = await handleVerificationChallengeWithRetries(page, currentUrl, siteConfig, forceDebug);
@@ -1751,55 +1799,28 @@ async function performCloudflareHandling(page, currentUrl, siteConfig, forceDebu
1751
1799
  * Performs parallel detection of multiple challenge types for better performance
1752
1800
  */
1753
1801
  async function parallelChallengeDetection(page, forceDebug = false) {
1754
- const detectionPromises = [];
1755
-
1756
- // Check for JS challenge
1757
- detectionPromises.push(
1758
- page.evaluate(() => {
1759
- return {
1760
- type: 'js',
1761
- detected: document.querySelector('script[src*="/cdn-cgi/challenge-platform/"]') !== null ||
1762
- document.body?.textContent?.includes('Checking your browser') ||
1763
- document.body?.textContent?.includes('Please wait while we verify')
1764
- };
1765
- }).catch(err => ({ type: 'js', detected: false, error: err.message }))
1766
- );
1767
-
1768
- // Check for Turnstile
1769
- detectionPromises.push(
1770
- page.evaluate(() => {
1771
- return {
1772
- type: 'turnstile',
1773
- detected: document.querySelector('.cf-turnstile') !== null ||
1774
- document.querySelector('iframe[src*="challenges.cloudflare.com"]') !== null ||
1775
- document.querySelector('.ctp-checkbox-container') !== null
1776
- };
1777
- }).catch(err => ({ type: 'turnstile', detected: false, error: err.message }))
1778
- );
1779
-
1780
- // Check for phishing warning
1781
- detectionPromises.push(
1782
- page.evaluate(() => {
1783
- return {
1784
- type: 'phishing',
1785
- detected: document.body?.textContent?.includes('This website has been reported for potential phishing') ||
1786
- document.querySelector('a[href*="continue"]') !== null
1787
- };
1788
- }).catch(err => ({ type: 'phishing', detected: false, error: err.message }))
1789
- );
1790
-
1791
- // Check for managed challenge
1792
- detectionPromises.push(
1793
- page.evaluate(() => {
1794
- return {
1795
- type: 'managed',
1796
- detected: document.querySelector('.cf-managed-challenge') !== null ||
1797
- document.querySelector('[data-cf-managed]') !== null
1798
- };
1799
- }).catch(err => ({ type: 'managed', detected: false, error: err.message }))
1800
- );
1801
-
1802
- const results = await Promise.all(detectionPromises);
1802
+ let results;
1803
+ try {
1804
+ results = await page.evaluate(() => {
1805
+ const bodyText = document.body?.textContent || '';
1806
+ return [
1807
+ { type: 'js', detected: document.querySelector('script[src*="/cdn-cgi/challenge-platform/"]') !== null ||
1808
+ bodyText.includes('Checking your browser') || bodyText.includes('Please wait while we verify') },
1809
+ { type: 'turnstile', detected: document.querySelector('.cf-turnstile') !== null ||
1810
+ document.querySelector('iframe[src*="challenges.cloudflare.com"]') !== null ||
1811
+ document.querySelector('.ctp-checkbox-container') !== null },
1812
+ { type: 'phishing', detected: bodyText.includes('This website has been reported for potential phishing') ||
1813
+ document.querySelector('a[href*="continue"]') !== null },
1814
+ { type: 'managed', detected: document.querySelector('.cf-managed-challenge') !== null ||
1815
+ document.querySelector('[data-cf-managed]') !== null }
1816
+ ];
1817
+ });
1818
+ } catch (err) {
1819
+ results = [
1820
+ { type: 'js', detected: false }, { type: 'turnstile', detected: false },
1821
+ { type: 'phishing', detected: false }, { type: 'managed', detected: false }
1822
+ ];
1823
+ }
1803
1824
 
1804
1825
  const detectedChallenges = results.filter(r => r.detected).map(r => r.type);
1805
1826
 
package/lib/proxy.js ADDED
@@ -0,0 +1,279 @@
1
+ /**
2
+ * Proxy Module for NWSS Network Scanner
3
+ * ======================================
4
+ * Routes specific site URLs through SOCKS5, SOCKS4, HTTP, or HTTPS proxies.
5
+ *
6
+ * Chromium's --proxy-server flag is browser-wide, so sites requiring a proxy
7
+ * need a separate browser instance. This module handles:
8
+ * - Parsing proxy URLs (all supported protocols)
9
+ * - Generating Chromium launch args
10
+ * - Per-page authentication via Puppeteer
11
+ * - Proxy bypass lists
12
+ * - Proxy health checks
13
+ *
14
+ * CONFIG EXAMPLES:
15
+ *
16
+ * SOCKS5 (no auth):
17
+ * "proxy": "socks5://127.0.0.1:1080"
18
+ *
19
+ * SOCKS5 with auth:
20
+ * "proxy": "socks5://user:pass@127.0.0.1:1080"
21
+ *
22
+ * HTTP proxy (corporate):
23
+ * "proxy": "http://proxy.corp.com:3128"
24
+ *
25
+ * HTTP proxy with auth:
26
+ * "proxy": "http://user:pass@proxy.corp.com:8080"
27
+ *
28
+ * HTTPS proxy:
29
+ * "proxy": "https://secure-proxy.example.com:8443"
30
+ *
31
+ * With bypass list and remote DNS:
32
+ * "proxy": "socks5://127.0.0.1:1080",
33
+ * "proxy_bypass": ["localhost", "127.0.0.1", "*.local"],
34
+ * "proxy_remote_dns": true
35
+ *
36
+ * Debug mode:
37
+ * "proxy": "socks5://127.0.0.1:1080",
38
+ * "proxy_debug": true
39
+ *
40
+ * Legacy key (backwards compatible):
41
+ * "socks5_proxy": "socks5://127.0.0.1:1080"
42
+ *
43
+ * INTEGRATION (in nwss.js):
44
+ * const { needsProxy, getProxyArgs, applyProxyAuth, getProxyInfo } = require('./lib/proxy');
45
+ *
46
+ * // Before browser launch
47
+ * if (needsProxy(siteConfig)) {
48
+ * const proxyArgs = getProxyArgs(siteConfig, forceDebug);
49
+ * browserArgs.push(...proxyArgs);
50
+ * }
51
+ *
52
+ * // After page creation, before page.goto()
53
+ * await applyProxyAuth(page, siteConfig, forceDebug);
54
+ *
55
+ * @version 1.1.0
56
+ */
57
+
58
+ const { formatLogMessage } = require('./colorize');
59
+
60
+ const PROXY_MODULE_VERSION = '1.1.0';
61
+ const SUPPORTED_PROTOCOLS = ['socks5', 'socks4', 'http', 'https'];
62
+
63
+ const DEFAULT_PORTS = {
64
+ socks5: 1080,
65
+ socks4: 1080,
66
+ http: 8080,
67
+ https: 8443
68
+ };
69
+
70
+ /**
71
+ * Returns the configured proxy URL string from siteConfig.
72
+ * Supports both "proxy" (preferred) and "socks5_proxy" (legacy) keys.
73
+ *
74
+ * @param {object} siteConfig
75
+ * @returns {string|null}
76
+ */
77
+ function getConfiguredProxy(siteConfig) {
78
+ return siteConfig.proxy || siteConfig.socks5_proxy || null;
79
+ }
80
+
81
+ /**
82
+ * Parses a proxy URL into components.
83
+ * Accepts: protocol://host:port, protocol://user:pass@host:port, bare host:port
84
+ *
85
+ * @param {string} proxyUrl - Proxy URL string
86
+ * @returns {object|null} Parsed proxy or null if invalid
87
+ */
88
+ function parseProxyUrl(proxyUrl) {
89
+ if (!proxyUrl || typeof proxyUrl !== 'string') return null;
90
+
91
+ let cleaned = proxyUrl.trim();
92
+
93
+ // Normalise bare host:port to socks5:// URL
94
+ if (!cleaned.includes('://')) {
95
+ cleaned = `socks5://${cleaned}`;
96
+ }
97
+
98
+ try {
99
+ const url = new URL(cleaned);
100
+ const protocol = url.protocol.replace(':', '');
101
+
102
+ if (!SUPPORTED_PROTOCOLS.includes(protocol)) return null;
103
+
104
+ const host = url.hostname;
105
+ if (!host) return null;
106
+
107
+ const port = parseInt(url.port, 10) || DEFAULT_PORTS[protocol] || 1080;
108
+ const username = url.username ? decodeURIComponent(url.username) : null;
109
+ const password = url.password ? decodeURIComponent(url.password) : null;
110
+
111
+ return { protocol, host, port, username, password };
112
+ } catch (_) {
113
+ return null;
114
+ }
115
+ }
116
+
117
+ /**
118
+ * Checks if a site config requires a proxy
119
+ *
120
+ * @param {object} siteConfig
121
+ * @returns {boolean}
122
+ */
123
+ function needsProxy(siteConfig) {
124
+ return !!getConfiguredProxy(siteConfig);
125
+ }
126
+
127
+ /**
128
+ * Returns Chromium launch arguments for the configured proxy.
129
+ *
130
+ * @param {object} siteConfig
131
+ * @param {boolean} forceDebug
132
+ * @returns {string[]} Array of Chromium args (empty if no proxy configured)
133
+ */
134
+ function getProxyArgs(siteConfig, forceDebug = false) {
135
+ const proxyUrl = getConfiguredProxy(siteConfig);
136
+ if (!proxyUrl) return [];
137
+
138
+ const parsed = parseProxyUrl(proxyUrl);
139
+ if (!parsed) {
140
+ console.warn(formatLogMessage('proxy', `Invalid proxy URL: ${proxyUrl}`));
141
+ return [];
142
+ }
143
+
144
+ const args = [
145
+ `--proxy-server=${parsed.protocol}://${parsed.host}:${parsed.port}`
146
+ ];
147
+
148
+ // Remote DNS: resolve hostnames through the proxy (prevents DNS leaks)
149
+ // Only meaningful for SOCKS proxies; HTTP proxies resolve remotely by default
150
+ const remoteDns = siteConfig.proxy_remote_dns ?? siteConfig.socks5_remote_dns;
151
+ if ((parsed.protocol === 'socks5' || parsed.protocol === 'socks4') && remoteDns !== false) {
152
+ args.push('--host-resolver-rules=MAP * ~NOTFOUND , EXCLUDE 127.0.0.1');
153
+ }
154
+
155
+ // Bypass list: domains that skip the proxy
156
+ const bypass = siteConfig.proxy_bypass || siteConfig.socks5_bypass || [];
157
+ if (bypass.length > 0) {
158
+ args.push(`--proxy-bypass-list=${bypass.join(';')}`);
159
+ }
160
+
161
+ const debug = forceDebug || siteConfig.proxy_debug || siteConfig.socks5_debug;
162
+ if (debug) {
163
+ console.log(formatLogMessage('proxy', `[${parsed.protocol}] Args: ${args.join(' ')}`));
164
+ }
165
+
166
+ return args;
167
+ }
168
+
169
+ /**
170
+ * Applies proxy authentication to a page via Puppeteer's authenticate API.
171
+ * Must be called BEFORE page.goto().
172
+ *
173
+ * @param {object} page - Puppeteer page instance
174
+ * @param {object} siteConfig
175
+ * @param {boolean} forceDebug
176
+ * @returns {Promise<boolean>} True if auth was applied
177
+ */
178
+ async function applyProxyAuth(page, siteConfig, forceDebug = false) {
179
+ const proxyUrl = getConfiguredProxy(siteConfig);
180
+ if (!proxyUrl) return false;
181
+
182
+ const parsed = parseProxyUrl(proxyUrl);
183
+ if (!parsed || !parsed.username) return false;
184
+
185
+ try {
186
+ await page.authenticate({
187
+ username: parsed.username,
188
+ password: parsed.password || ''
189
+ });
190
+
191
+ const debug = forceDebug || siteConfig.proxy_debug || siteConfig.socks5_debug;
192
+ if (debug) {
193
+ console.log(formatLogMessage('proxy', `Auth set for ${parsed.username}@${parsed.host}:${parsed.port}`));
194
+ }
195
+
196
+ return true;
197
+ } catch (err) {
198
+ console.warn(formatLogMessage('proxy', `Failed to set proxy auth: ${err.message}`));
199
+ return false;
200
+ }
201
+ }
202
+
203
+ /**
204
+ * Tests proxy connectivity by attempting a TCP connection.
205
+ *
206
+ * @param {object} siteConfig
207
+ * @param {number} timeoutMs - Connection timeout (default 5000ms)
208
+ * @returns {Promise<object>} { reachable, latencyMs, error }
209
+ */
210
+ async function testProxy(siteConfig, timeoutMs = 5000) {
211
+ const proxyUrl = getConfiguredProxy(siteConfig);
212
+ if (!proxyUrl) {
213
+ return { reachable: false, latencyMs: 0, error: 'No proxy configured' };
214
+ }
215
+
216
+ const parsed = parseProxyUrl(proxyUrl);
217
+ if (!parsed) {
218
+ return { reachable: false, latencyMs: 0, error: 'Invalid proxy URL' };
219
+ }
220
+
221
+ const net = require('net');
222
+ const start = Date.now();
223
+
224
+ return new Promise((resolve) => {
225
+ const socket = new net.Socket();
226
+
227
+ const onError = (err) => {
228
+ socket.destroy();
229
+ resolve({ reachable: false, latencyMs: Date.now() - start, error: err.message });
230
+ };
231
+
232
+ socket.setTimeout(timeoutMs);
233
+ socket.on('error', onError);
234
+ socket.on('timeout', () => onError(new Error('Connection timeout')));
235
+
236
+ socket.connect(parsed.port, parsed.host, () => {
237
+ const latency = Date.now() - start;
238
+ socket.destroy();
239
+ resolve({ reachable: true, latencyMs: latency, error: null });
240
+ });
241
+ });
242
+ }
243
+
244
+ /**
245
+ * Returns human-readable proxy info string for logging.
246
+ *
247
+ * @param {object} siteConfig
248
+ * @returns {string}
249
+ */
250
+ function getProxyInfo(siteConfig) {
251
+ const proxyUrl = getConfiguredProxy(siteConfig);
252
+ if (!proxyUrl) return 'none';
253
+
254
+ const parsed = parseProxyUrl(proxyUrl);
255
+ if (!parsed) return 'invalid';
256
+
257
+ const auth = parsed.username ? `${parsed.username}@` : '';
258
+ return `${parsed.protocol}://${auth}${parsed.host}:${parsed.port}`;
259
+ }
260
+
261
+ /**
262
+ * Returns module version information
263
+ */
264
+ function getModuleInfo() {
265
+ return { version: PROXY_MODULE_VERSION, name: 'Proxy Handler' };
266
+ }
267
+
268
+ module.exports = {
269
+ parseProxyUrl,
270
+ needsProxy,
271
+ getProxyArgs,
272
+ applyProxyAuth,
273
+ testProxy,
274
+ getProxyInfo,
275
+ getModuleInfo,
276
+ getConfiguredProxy,
277
+ PROXY_MODULE_VERSION,
278
+ SUPPORTED_PROTOCOLS
279
+ };
package/nwss.js CHANGED
@@ -1,4 +1,4 @@
1
- // === Network scanner script (nwss.js) v2.0.33 ===
1
+ // === Network scanner script (nwss.js) v2.0.51 ===
2
2
 
3
3
  // puppeteer for browser automation, fs for file system operations, psl for domain parsing.
4
4
  // const pLimit = require('p-limit'); // Will be dynamically imported
@@ -44,6 +44,7 @@ const { performPageInteraction, createInteractionConfig } = require('./lib/inter
44
44
  const { createGlobalHelpers, getTotalDomainsSkipped, getDetectedDomainsCount } = require('./lib/domain-cache');
45
45
  const { createSmartCache } = require('./lib/smart-cache'); // Smart cache system
46
46
  const { clearPersistentCache } = require('./lib/smart-cache');
47
+ const { needsProxy, getProxyArgs, applyProxyAuth, getProxyInfo, testProxy } = require('./lib/proxy');
47
48
  // Dry run functionality
48
49
  const { initializeDryRunCollections, addDryRunMatch, addDryRunNetTools, processDryRunResults, writeDryRunOutput } = require('./lib/dry-run');
49
50
  // Enhanced site data clearing functionality
@@ -1354,7 +1355,7 @@ function setupFrameHandling(page, forceDebug) {
1354
1355
  * Uses system Chrome and temporary directories to minimize disk usage
1355
1356
  * @returns {Promise<import('puppeteer').Browser>} Browser instance
1356
1357
  */
1357
- async function createBrowser() {
1358
+ async function createBrowser(extraArgs = []) {
1358
1359
  // Create temporary user data directory that we can fully control and clean up
1359
1360
  const tempUserDataDir = `/tmp/puppeteer-${Date.now()}-${Math.random().toString(36).substring(7)}`;
1360
1361
  userDataDir = tempUserDataDir; // Store for cleanup tracking (use outer scope variable)
@@ -1458,6 +1459,7 @@ function setupFrameHandling(page, forceDebug) {
1458
1459
  '--disable-background-timer-throttling',
1459
1460
  '--disable-features=site-per-process', // Better for single-site scanning
1460
1461
  '--no-zygote', // Better process isolation
1462
+ ...extraArgs,
1461
1463
  ],
1462
1464
  // Optimized timeouts for Puppeteer 23.x performance
1463
1465
  protocolTimeout: TIMEOUTS.PROTOCOL_TIMEOUT,
@@ -2130,6 +2132,11 @@ function setupFrameHandling(page, forceDebug) {
2130
2132
  }
2131
2133
  }
2132
2134
 
2135
+ // --- Apply proxy authentication if configured ---
2136
+ if (needsProxy(siteConfig)) {
2137
+ await applyProxyAuth(page, siteConfig, forceDebug);
2138
+ }
2139
+
2133
2140
  // --- Apply all fingerprint spoofing (user agent, Brave, fingerprint protection) ---
2134
2141
  try {
2135
2142
  await applyAllFingerprintSpoofing(page, siteConfig, forceDebug, currentUrl);
@@ -3337,6 +3344,25 @@ function setupFrameHandling(page, forceDebug) {
3337
3344
  siteCounter++;
3338
3345
  // Continue processing with the redirected URL instead of throwing error
3339
3346
  } else {
3347
+ // Detect proxy-specific failures and provide clear diagnostics
3348
+ if (needsProxy(siteConfig) && err.message) {
3349
+ const proxyErrors = [
3350
+ 'ERR_PROXY_CONNECTION_FAILED',
3351
+ 'ERR_SOCKS_CONNECTION_FAILED',
3352
+ 'ERR_TUNNEL_CONNECTION_FAILED',
3353
+ 'ERR_PROXY_AUTH_UNSUPPORTED',
3354
+ 'ERR_PROXY_AUTH_REQUESTED',
3355
+ 'ERR_SOCKS_CONNECTION_HOST_UNREACHABLE',
3356
+ 'ERR_PROXY_CERTIFICATE_INVALID',
3357
+ 'ERR_NO_SUPPORTED_PROXIES'
3358
+ ];
3359
+ const proxyErr = proxyErrors.find(e => err.message.includes(e));
3360
+ if (proxyErr) {
3361
+ const info = getProxyInfo(siteConfig);
3362
+ console.error(formatLogMessage('error', `[proxy] ${proxyErr} — proxy: ${info} — URL: ${currentUrl}`));
3363
+ console.error(formatLogMessage('error', `[proxy] Check: is the proxy running? Are credentials correct? Is the target reachable from the proxy?`));
3364
+ }
3365
+ }
3340
3366
  console.error(formatLogMessage('error', `Failed on ${currentUrl}: ${err.message}`));
3341
3367
  throw err;
3342
3368
  }
@@ -3662,6 +3688,26 @@ function setupFrameHandling(page, forceDebug) {
3662
3688
  }
3663
3689
 
3664
3690
  } catch (err) {
3691
+ // Detect proxy-specific failures at top level
3692
+ if (needsProxy(siteConfig) && err.message) {
3693
+ const proxyErrors = [
3694
+ 'ERR_PROXY_CONNECTION_FAILED',
3695
+ 'ERR_SOCKS_CONNECTION_FAILED',
3696
+ 'ERR_TUNNEL_CONNECTION_FAILED',
3697
+ 'ERR_PROXY_AUTH_UNSUPPORTED',
3698
+ 'ERR_PROXY_AUTH_REQUESTED',
3699
+ 'ERR_SOCKS_CONNECTION_HOST_UNREACHABLE',
3700
+ 'ERR_PROXY_CERTIFICATE_INVALID',
3701
+ 'ERR_NO_SUPPORTED_PROXIES'
3702
+ ];
3703
+ const proxyErr = proxyErrors.find(e => err.message.includes(e));
3704
+ if (proxyErr) {
3705
+ const info = getProxyInfo(siteConfig);
3706
+ console.error(formatLogMessage('error', `[proxy] ${proxyErr} — proxy: ${info} — URL: ${currentUrl}`));
3707
+ console.error(formatLogMessage('error', `[proxy] Check: is the proxy running? Are credentials correct? Is the target reachable from the proxy?`));
3708
+ }
3709
+ }
3710
+
3665
3711
  // Only restart for truly fatal browser errors
3666
3712
  const isFatalError = CRITICAL_BROWSER_ERRORS.some(errorType =>
3667
3713
  err.message.includes(errorType)
@@ -3789,6 +3835,14 @@ function setupFrameHandling(page, forceDebug) {
3789
3835
  }
3790
3836
  }
3791
3837
 
3838
+ // Helper to get a stable proxy key for grouping browser instances
3839
+ const proxyKeyFor = (siteConfig) => {
3840
+ if (!needsProxy(siteConfig)) return '';
3841
+ return getProxyInfo(siteConfig);
3842
+ };
3843
+
3844
+ // Sort tasks so proxy groups are contiguous — direct connections first, then each proxy
3845
+ allTasks.sort((a, b) => proxyKeyFor(a.config).localeCompare(proxyKeyFor(b.config)));
3792
3846
 
3793
3847
  let results = [];
3794
3848
  let processedUrlCount = 0;
@@ -3832,6 +3886,7 @@ function setupFrameHandling(page, forceDebug) {
3832
3886
 
3833
3887
  // Process URLs in batches with exception handling
3834
3888
  let siteGroupIndex = 0;
3889
+ let currentProxyKey = ''; // Track active proxy config — '' means direct connection
3835
3890
  try {
3836
3891
  for (let batchStart = 0; batchStart < totalUrls; batchStart += RESOURCE_CLEANUP_INTERVAL) {
3837
3892
  const batchEnd = Math.min(batchStart + RESOURCE_CLEANUP_INTERVAL, totalUrls);
@@ -3952,14 +4007,67 @@ function setupFrameHandling(page, forceDebug) {
3952
4007
  if (forceDebug) console.log(formatLogMessage('debug', `Browser cleanup warning: ${browserCloseErr.message}`));
3953
4008
  }
3954
4009
 
3955
- // Create new browser for next batch
3956
- browser = await createBrowser();
4010
+ // Create new browser for next batch (preserve current proxy config)
4011
+ const restartProxyArgs = currentProxyKey ? getProxyArgs(currentBatch[0].config, forceDebug) : [];
4012
+ browser = await createBrowser(restartProxyArgs);
3957
4013
  if (forceDebug) console.log(formatLogMessage('debug', `New browser instance created for batch ${Math.floor(batchStart / RESOURCE_CLEANUP_INTERVAL) + 1}`));
3958
4014
 
3959
4015
  // Reset cleanup counter and add delay
3960
4016
  urlsSinceLastCleanup = 0;
3961
4017
  await fastTimeout(TIMEOUTS.BROWSER_STABILIZE_DELAY);
3962
4018
  }
4019
+
4020
+ // --- Proxy-aware browser restart ---
4021
+ // --proxy-server is browser-wide, so if the batch needs a different proxy we must restart
4022
+ const batchProxyKey = proxyKeyFor(currentBatch[0].config);
4023
+ if (batchProxyKey !== currentProxyKey) {
4024
+ const debug = forceDebug || currentBatch[0].config.proxy_debug || currentBatch[0].config.socks5_debug;
4025
+ if (debug) {
4026
+ const from = currentProxyKey || 'direct';
4027
+ const to = batchProxyKey || 'direct';
4028
+ console.log(formatLogMessage('proxy', `Switching proxy: ${from} → ${to}`));
4029
+ }
4030
+
4031
+ try {
4032
+ await handleBrowserExit(browser, {
4033
+ forceDebug, timeout: 10000, exitOnFailure: false,
4034
+ cleanTempFiles: true, comprehensiveCleanup: removeTempFiles
4035
+ });
4036
+ if (userDataDir && fs.existsSync(userDataDir)) {
4037
+ fs.rmSync(userDataDir, { recursive: true, force: true });
4038
+ }
4039
+ } catch (proxyRestartErr) {
4040
+ if (forceDebug) console.log(formatLogMessage('debug', `Proxy switch browser cleanup: ${proxyRestartErr.message}`));
4041
+ }
4042
+
4043
+ const proxyArgs = batchProxyKey ? getProxyArgs(currentBatch[0].config, forceDebug) : [];
4044
+
4045
+ // Pre-flight: verify proxy is reachable before launching browser
4046
+ if (proxyArgs.length > 0) {
4047
+ const health = await testProxy(currentBatch[0].config, 5000);
4048
+ if (!health.reachable) {
4049
+ const info = getProxyInfo(currentBatch[0].config);
4050
+ console.error(formatLogMessage('error', `[proxy] Unreachable: ${info} — ${health.error}`));
4051
+ console.error(formatLogMessage('error', `[proxy] Skipping ${currentBatch.length} URL(s) in this batch`));
4052
+ const skipResults = currentBatch.map(task => ({
4053
+ success: false, url: task.url, rules: [],
4054
+ error: `Proxy unreachable: ${health.error}`
4055
+ }));
4056
+ results.push(...skipResults);
4057
+ processedUrlCount += currentBatch.length;
4058
+ urlsSinceLastCleanup += currentBatch.length;
4059
+ continue;
4060
+ }
4061
+ if (forceDebug) {
4062
+ console.log(formatLogMessage('proxy', `Proxy reachable (${health.latencyMs}ms)`));
4063
+ }
4064
+ }
4065
+
4066
+ browser = await createBrowser(proxyArgs);
4067
+ currentProxyKey = batchProxyKey;
4068
+ urlsSinceLastCleanup = 0;
4069
+ await fastTimeout(TIMEOUTS.BROWSER_STABILIZE_DELAY);
4070
+ }
3963
4071
 
3964
4072
  if (forceDebug) {
3965
4073
  console.log(formatLogMessage('debug', `Processing batch ${Math.floor(batchStart / RESOURCE_CLEANUP_INTERVAL) + 1}: ${batchSize} URL(s) (total processed: ${processedUrlCount})`));
@@ -3986,7 +4094,8 @@ function setupFrameHandling(page, forceDebug) {
3986
4094
  console.log(formatLogMessage('error', `[TIMEOUT] Batch hung. Restarting browser.`));
3987
4095
  try {
3988
4096
  await handleBrowserExit(browser, { forceDebug, timeout: 5000, exitOnFailure: false });
3989
- browser = await createBrowser();
4097
+ const timeoutProxyArgs = currentProxyKey ? getProxyArgs(currentBatch[0].config, forceDebug) : [];
4098
+ browser = await createBrowser(timeoutProxyArgs);
3990
4099
  urlsSinceLastCleanup = 0;
3991
4100
  } catch (restartErr) {
3992
4101
  throw restartErr;
@@ -4104,7 +4213,7 @@ function setupFrameHandling(page, forceDebug) {
4104
4213
  comprehensive: true
4105
4214
  });
4106
4215
  }
4107
- browser = await createBrowser();
4216
+ browser = await createBrowser(currentProxyKey ? getProxyArgs(currentBatch[0].config, forceDebug) : []);
4108
4217
  urlsSinceLastCleanup = 0; // Reset counter
4109
4218
  await fastTimeout(TIMEOUTS.EMERGENCY_RESTART_DELAY); // Give browser time to stabilize
4110
4219
  } catch (emergencyRestartErr) {
@@ -4116,7 +4225,7 @@ function setupFrameHandling(page, forceDebug) {
4116
4225
  console.log(`\n${messageColors.fileOp('🔄 Emergency hang detection restart:')} Browser appears hung, forcing restart`);
4117
4226
  try {
4118
4227
  await handleBrowserExit(browser, { forceDebug, timeout: 5000, exitOnFailure: false, cleanTempFiles: true });
4119
- browser = await createBrowser();
4228
+ browser = await createBrowser(currentProxyKey ? getProxyArgs(currentBatch[0].config, forceDebug) : []);
4120
4229
  urlsSinceLastCleanup = 0;
4121
4230
  forceRestartFlag = false; // Reset flag
4122
4231
  await fastTimeout(TIMEOUTS.EMERGENCY_RESTART_DELAY);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fanboynz/network-scanner",
3
- "version": "2.0.49",
3
+ "version": "2.0.51",
4
4
  "description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
5
5
  "main": "nwss.js",
6
6
  "scripts": {
@@ -48,7 +48,7 @@
48
48
  },
49
49
  "homepage": "https://github.com/ryanbr/network-scanner",
50
50
  "devDependencies": {
51
- "eslint": "^9.32.0",
51
+ "eslint": "^10.0.2",
52
52
  "globals": "^16.3.0"
53
53
  }
54
54
  }