@fanboynz/network-scanner 2.0.50 → 2.0.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -17,6 +17,7 @@ A Puppeteer-based tool for scanning websites to find third-party (or optionally
17
17
  - Subdomain handling (collapse to root or full subdomain)
18
18
  - Optionally match only first-party, third-party, or both
19
19
  - Enhanced redirect handling with JavaScript and meta refresh detection
20
+ - Per-site proxy routing (SOCKS5, SOCKS4, HTTP, HTTPS) with pre-flight health checks
20
21
 
21
22
  ---
22
23
 
@@ -315,6 +316,86 @@ Route traffic through a VPN for specific sites. Requires `sudo` privileges. The
315
316
 
316
317
  > **Authentication:** If the `.ovpn` file already contains credentials (via `auth-user-pass /path/to/file` or an inline `<auth-user-pass>` block), no additional config is needed — just provide the config path. The `username`/`password` fields are only needed when the `.ovpn` file has a bare `auth-user-pass` directive that expects interactive input.
317
318
 
319
+ ### Proxy Options
320
+
321
+ Route traffic through a proxy for specific sites. Supports SOCKS5, SOCKS4, HTTP, and HTTPS proxies. Unlike VPN, proxy routing is per-site-group — only URLs in the same config block use the proxy; other sites connect directly.
322
+
323
+ > **Note:** Chromium's `--proxy-server` flag is browser-wide. Sites requiring different proxies (or direct vs proxied) are automatically separated into different browser instances. Tasks are sorted so proxy groups are contiguous to minimise restarts.
324
+
325
+ | Field | Values | Default | Description |
326
+ |:---------------------|:-------|:-------:|:------------|
327
+ | `proxy` | String | - | Proxy URL: `socks5://host:port`, `http://host:port`, `https://host:port`, or `http://user:pass@host:port` |
328
+ | `proxy_bypass` | Array | `[]` | Domains that skip the proxy (e.g. `["localhost", "127.0.0.1", "*.local"]`) |
329
+ | `proxy_remote_dns` | Boolean | `true` | Resolve DNS through the proxy (SOCKS only — prevents DNS leaks) |
330
+ | `proxy_debug` | Boolean | `false` | Print proxy diagnostics: launch args, auth, health checks, error codes |
331
+
332
+ Legacy aliases (`socks5_proxy`, `socks5_bypass`, `socks5_remote_dns`, `socks5_debug`) are supported for backwards compatibility.
333
+
334
+ #### Proxy Examples
335
+
336
+ **SOCKS5 — no auth:**
337
+ ```json
338
+ {
339
+ "url": ["https://blocked-site.com/", "https://another-blocked.com/"],
340
+ "proxy": "socks5://127.0.0.1:1080",
341
+ "search_string": ["tracking.js"]
342
+ }
343
+ ```
344
+
345
+ **HTTP proxy with credentials:**
346
+ ```json
347
+ {
348
+ "url": ["https://geo-restricted.com/"],
349
+ "proxy": "http://user:pass@proxy.corp.com:3128",
350
+ "search_string": ["analytics"]
351
+ }
352
+ ```
353
+
354
+ **SOCKS5 with bypass list and debug:**
355
+ ```json
356
+ {
357
+ "url": ["https://target-site.com/"],
358
+ "proxy": "socks5://user:pass@proxy.example.com:9050",
359
+ "proxy_bypass": ["localhost", "127.0.0.1", "*.internal.corp"],
360
+ "proxy_remote_dns": true,
361
+ "proxy_debug": true,
362
+ "search_string": ["tracker"]
363
+ }
364
+ ```
365
+
366
+ **Mixed direct + proxied in one config:**
367
+ ```json
368
+ [
369
+ {
370
+ "url": ["https://direct-site.com/"],
371
+ "search_string": ["ads"]
372
+ },
373
+ {
374
+ "url": ["https://blocked-site.com/"],
375
+ "proxy": "socks5://127.0.0.1:1080",
376
+ "search_string": ["ads"]
377
+ }
378
+ ]
379
+ ```
380
+
381
+ #### Proxy Error Handling
382
+
383
+ If a proxy is unreachable, the batch is skipped with a clear error before any navigation is attempted:
384
+
385
+ ```
386
+ [error] [proxy] Unreachable: socks5://127.0.0.1:1080 — Connection refused
387
+ [error] [proxy] Skipping 5 URL(s) in this batch
388
+ ```
389
+
390
+ If a proxy fails mid-scan, Chromium's error code is detected and diagnosed:
391
+
392
+ ```
393
+ [error] [proxy] ERR_SOCKS_CONNECTION_FAILED — proxy: socks5://127.0.0.1:1080 — URL: https://example.com/
394
+ [error] [proxy] Check: is the proxy running? Are credentials correct? Is the target reachable from the proxy?
395
+ ```
396
+
397
+ Detected error codes: `ERR_PROXY_CONNECTION_FAILED`, `ERR_SOCKS_CONNECTION_FAILED`, `ERR_TUNNEL_CONNECTION_FAILED`, `ERR_PROXY_AUTH_UNSUPPORTED`, `ERR_PROXY_AUTH_REQUESTED`, `ERR_SOCKS_CONNECTION_HOST_UNREACHABLE`, `ERR_PROXY_CERTIFICATE_INVALID`, `ERR_NO_SUPPORTED_PROXIES`.
398
+
318
399
  ### Global Configuration Options
319
400
 
320
401
  These options go at the root level of your config.json:
package/lib/cloudflare.js CHANGED
@@ -58,11 +58,76 @@ const FAST_TIMEOUTS = {
58
58
  ELEMENT_INTERACTION_DELAY: 250, // Fast element interactions
59
59
  SELECTOR_WAIT: 3000, // Fast selector waits
60
60
  TURNSTILE_OPERATION: 6000, // Fast Turnstile operations
61
- JS_CHALLENGE: 19000, // Fast JS challenge completion
61
+ JS_CHALLENGE: 10000, // Fast JS challenge completion
62
62
  CHALLENGE_SOLVING: 30000, // Fast overall challenge solving
63
63
  CHALLENGE_COMPLETION: 8000 // Fast completion check
64
64
  };
65
65
 
66
+ /**
67
+ * Finds and clicks an element inside shadow DOM trees via page.evaluate
68
+ * Returns {found, clicked, x, y} - coordinates allow fallback mouse.click
69
+ */
70
+ async function clickInShadowDOM(context, selectors, forceDebug = false, waitMs = 1500) {
71
+ // Try Puppeteer's pierce/ selector first � handles CLOSED shadow roots via CDP
72
+ for (const selector of selectors) {
73
+ try {
74
+ // Wait for element to appear (handles delayed rendering)
75
+ const start = Date.now();
76
+ const element = await context.waitForSelector(`pierce/${selector}`, { timeout: waitMs });
77
+ if (element) {
78
+ const box = await element.boundingBox();
79
+ if (box && box.width > 0 && box.height > 0) {
80
+ if (forceDebug) console.log(formatLogMessage('cloudflare', `pierce/${selector} matched in ${Date.now() - start}ms � box: ${box.width}x${box.height} at (${box.x},${box.y})`));
81
+ await element.click();
82
+ await element.dispose();
83
+ return { found: true, clicked: true, selector, x: box.x + box.width / 2, y: box.y + box.height / 2 };
84
+ }
85
+ if (forceDebug) console.log(formatLogMessage('cloudflare', `pierce/${selector} found but not visible (0x0)`));
86
+ await element.dispose();
87
+ // Element found but not visible
88
+ return { found: true, clicked: false, selector, x: 0, y: 0 };
89
+ }
90
+ } catch (e) {
91
+ if (forceDebug) console.log(formatLogMessage('cloudflare', `pierce/${selector} timeout after ${waitMs}ms`));
92
+ continue;
93
+ }
94
+ }
95
+
96
+ // Fallback: manual traversal for open shadow roots
97
+ const result = await context.evaluate((sels) => {
98
+ function deepQuery(root, selector) {
99
+ // Try direct query first
100
+ const el = root.querySelector(selector);
101
+ if (el) return el;
102
+
103
+ // Traverse shadow roots
104
+ const allElements = root.querySelectorAll('*');
105
+ for (const node of allElements) {
106
+ if (node.shadowRoot) {
107
+ const found = deepQuery(node.shadowRoot, selector);
108
+ if (found) return found;
109
+ }
110
+ }
111
+ return null;
112
+ }
113
+
114
+ for (const selector of sels) {
115
+ const el = deepQuery(document, selector);
116
+ if (el) {
117
+ const rect = el.getBoundingClientRect();
118
+ if (rect.width > 0 && rect.height > 0) {
119
+ el.click();
120
+ return { found: true, clicked: true, selector, x: rect.x + rect.width / 2, y: rect.y + rect.height / 2 };
121
+ }
122
+ return { found: true, clicked: false, selector, x: 0, y: 0 };
123
+ }
124
+ }
125
+ return { found: false, clicked: false, selector: null, x: 0, y: 0 };
126
+ }, selectors);
127
+
128
+ return result;
129
+ }
130
+
66
131
  /**
67
132
  * Error categories for better handling
68
133
  */
@@ -306,12 +371,12 @@ function categorizeError(error) {
306
371
  /**
307
372
  * Implements exponential backoff delay
308
373
  */
309
- async function getRetryDelay(attempt) {
374
+ function getRetryDelay(attempt) {
310
375
  const delay = Math.min(
311
376
  RETRY_CONFIG.baseDelay * Math.pow(RETRY_CONFIG.backoffMultiplier, attempt - 1),
312
377
  RETRY_CONFIG.maxDelay
313
378
  );
314
- return new Promise(resolve => setTimeout(resolve, delay));
379
+ return delay;
315
380
  }
316
381
 
317
382
  /**
@@ -341,32 +406,6 @@ async function safePageEvaluate(page, func, timeout = TIMEOUTS.PAGE_EVALUATION_S
341
406
  throw new Error('Page URL access failed - likely detached');
342
407
  }
343
408
 
344
- // Quick execution context validation with timeout
345
- const contextValid = await Promise.race([
346
- page.evaluate(() => {
347
- try {
348
- // Quick context validation
349
- if (typeof window === 'undefined' || !document) {
350
- return false;
351
- }
352
- // Check if document is ready for interaction
353
- if (document.readyState === 'uninitialized') {
354
- return false;
355
- }
356
- return true;
357
- } catch (e) {
358
- return false;
359
- }
360
- }),
361
- new Promise((_, reject) => {
362
- setTimeout(() => reject(new Error('Context validation timeout')), 3500);
363
- })
364
- ]).catch(() => false);
365
-
366
- if (!contextValid) {
367
- throw new Error('Page execution context is invalid');
368
- }
369
-
370
409
  const result = await Promise.race([
371
410
  page.evaluate(func),
372
411
  new Promise((_, reject) => {
@@ -418,7 +457,7 @@ async function safePageEvaluate(page, func, timeout = TIMEOUTS.PAGE_EVALUATION_S
418
457
  }
419
458
 
420
459
  // Wait before retrying with exponential backoff
421
- await getRetryDelay(attempt);
460
+ await new Promise(resolve => setTimeout(resolve, getRetryDelay(attempt)));
422
461
  }
423
462
  }
424
463
 
@@ -440,15 +479,18 @@ async function safePageEvaluate(page, func, timeout = TIMEOUTS.PAGE_EVALUATION_S
440
479
  * Safe element clicking with timeout protection
441
480
  */
442
481
  async function safeClick(page, selector, timeout = TIMEOUTS.CLICK_TIMEOUT) {
482
+ let timeoutId;
443
483
  try {
444
484
  return await Promise.race([
445
485
  page.click(selector, { timeout: timeout }),
446
486
  new Promise((_, reject) => {
447
- setTimeout(() => reject(new Error('Click timeout')), timeout + TIMEOUTS.CLICK_TIMEOUT_BUFFER);
487
+ timeoutId = setTimeout(() => reject(new Error('Click timeout')), timeout + TIMEOUTS.CLICK_TIMEOUT_BUFFER);
448
488
  })
449
489
  ]);
450
490
  } catch (error) {
451
491
  throw new Error(`Click failed: ${error.message}`);
492
+ } finally {
493
+ if (timeoutId) clearTimeout(timeoutId);
452
494
  }
453
495
  }
454
496
 
@@ -456,16 +498,18 @@ async function safeClick(page, selector, timeout = TIMEOUTS.CLICK_TIMEOUT) {
456
498
  * Safe navigation waiting with timeout protection
457
499
  */
458
500
  async function safeWaitForNavigation(page, timeout = TIMEOUTS.NAVIGATION_TIMEOUT) {
501
+ let timeoutId;
459
502
  try {
460
503
  return await Promise.race([
461
504
  page.waitForNavigation({ waitUntil: 'domcontentloaded', timeout: timeout }),
462
505
  new Promise((_, reject) => {
463
- setTimeout(() => reject(new Error('Navigation timeout')), timeout + TIMEOUTS.NAVIGATION_TIMEOUT_BUFFER);
506
+ timeoutId = setTimeout(() => reject(new Error('Navigation timeout')), timeout + TIMEOUTS.NAVIGATION_TIMEOUT_BUFFER);
464
507
  })
465
508
  ]);
466
509
  } catch (error) {
467
510
  console.warn(formatLogMessage('cloudflare', `Navigation wait failed: ${error.message}`));
468
- // Don't throw - just continue
511
+ } finally {
512
+ if (timeoutId) clearTimeout(timeoutId);
469
513
  }
470
514
  }
471
515
 
@@ -563,7 +607,14 @@ async function quickCloudflareDetection(page, forceDebug = false) {
563
607
  */
564
608
  async function analyzeCloudflareChallenge(page) {
565
609
  try {
566
- return await safePageEvaluate(page, () => {
610
+ // CDP-level frame check bypasses closed shadow roots
611
+ const frames = page.frames();
612
+ const hasChallengeFrame = frames.some(f => {
613
+ const url = f.url();
614
+ return url.includes('challenges.cloudflare.com') || url.includes('/cdn-cgi/challenge-platform/');
615
+ });
616
+
617
+ const result = await safePageEvaluate(page, () => {
567
618
  const title = document.title || '';
568
619
  const bodyText = document.body ? document.body.textContent : '';
569
620
 
@@ -635,6 +686,15 @@ async function analyzeCloudflareChallenge(page) {
635
686
  bodySnippet: bodyText.substring(0, 200)
636
687
  };
637
688
  }, TIMEOUTS.PAGE_EVALUATION);
689
+
690
+ // Merge CDP frame detection � catches iframes behind closed shadow roots
691
+ if (hasChallengeFrame && !result.hasTurnstileIframe) {
692
+ result.hasTurnstileIframe = true;
693
+ result.isTurnstile = true;
694
+ result.isChallengePresent = true;
695
+ }
696
+
697
+ return result;
638
698
  } catch (error) {
639
699
  return {
640
700
  isChallengePresent: false,
@@ -842,7 +902,7 @@ async function handleVerificationChallengeWithRetries(page, currentUrl, siteConf
842
902
 
843
903
  // If this wasn't the last attempt, wait before retrying
844
904
  if (attempt < retryConfig.maxAttempts) {
845
- const delay = await getRetryDelay(attempt);
905
+ const delay = getRetryDelay(attempt);
846
906
  if (forceDebug) {
847
907
  console.log(formatLogMessage('cloudflare', `Challenge attempt ${attempt} failed, retrying in ${delay}ms: ${result.error}`));
848
908
  }
@@ -884,7 +944,7 @@ async function handleVerificationChallengeWithRetries(page, currentUrl, siteConf
884
944
 
885
945
  // Wait before retrying with exponential backoff
886
946
  if (attempt < retryConfig.maxAttempts) {
887
- await getRetryDelay(attempt);
947
+ await new Promise(resolve => setTimeout(resolve, getRetryDelay(attempt)));
888
948
  }
889
949
  }
890
950
  }
@@ -925,7 +985,7 @@ async function handlePhishingWarningWithRetries(page, currentUrl, siteConfig, fo
925
985
 
926
986
  // If this wasn't the last attempt, wait before retrying
927
987
  if (attempt < retryConfig.maxAttempts) {
928
- const delay = await getRetryDelay(attempt);
988
+ const delay = getRetryDelay(attempt);
929
989
  if (forceDebug) {
930
990
  console.log(formatLogMessage('cloudflare', `Phishing warning attempt ${attempt} failed, retrying in ${delay}ms: ${result.error}`));
931
991
  }
@@ -955,7 +1015,7 @@ async function handlePhishingWarningWithRetries(page, currentUrl, siteConfig, fo
955
1015
 
956
1016
  // Wait before retrying with exponential backoff
957
1017
  if (attempt < retryConfig.maxAttempts) {
958
- await getRetryDelay(attempt);
1018
+ await new Promise(resolve => setTimeout(resolve, getRetryDelay(attempt)));
959
1019
  }
960
1020
  }
961
1021
  }
@@ -1026,6 +1086,23 @@ async function attemptChallengeSolve(page, currentUrl, challengeInfo, forceDebug
1026
1086
 
1027
1087
  const jsResult = await waitForJSChallengeCompletion(page, forceDebug);
1028
1088
  if (jsResult.success) {
1089
+ // Wait for redirect after challenge completion
1090
+ try {
1091
+ const startUrl = await page.url();
1092
+ await page.waitForFunction(
1093
+ (origUrl) => {
1094
+ const bodyText = document.body?.textContent || '';
1095
+ return document.title !== 'Just a moment...' ||
1096
+ window.location.href !== origUrl ||
1097
+ bodyText.includes('Verification successful');
1098
+ },
1099
+ { timeout: 10000 },
1100
+ startUrl
1101
+ );
1102
+ if (forceDebug) console.log(formatLogMessage('cloudflare', `Challenge page cleared for ${currentUrl}`));
1103
+ } catch (_) {
1104
+ if (forceDebug) console.log(formatLogMessage('cloudflare', `Challenge page not cleared after 10s � continuing`));
1105
+ }
1029
1106
  result.success = true;
1030
1107
  result.method = 'js_challenge_wait';
1031
1108
  if (forceDebug) console.log(formatLogMessage('cloudflare', `JS challenge completed successfully for ${currentUrl}`));
@@ -1034,6 +1111,8 @@ async function attemptChallengeSolve(page, currentUrl, challengeInfo, forceDebug
1034
1111
  } catch (jsError) {
1035
1112
  if (forceDebug) console.log(formatLogMessage('cloudflare', `JS challenge wait failed for ${currentUrl}: ${jsError.message}`));
1036
1113
  }
1114
+ } else if (forceDebug) {
1115
+ console.log(formatLogMessage('cloudflare', `Skipping JS challenge method (not detected)`));
1037
1116
  }
1038
1117
 
1039
1118
  // Method 2: Handle Turnstile challenges (interactive)
@@ -1051,6 +1130,8 @@ async function attemptChallengeSolve(page, currentUrl, challengeInfo, forceDebug
1051
1130
  } catch (turnstileError) {
1052
1131
  if (forceDebug) console.log(formatLogMessage('cloudflare', `Turnstile method failed for ${currentUrl}: ${turnstileError.message}`));
1053
1132
  }
1133
+ } else if (forceDebug) {
1134
+ console.log(formatLogMessage('cloudflare', `Skipping Turnstile method (not detected)`));
1054
1135
  }
1055
1136
 
1056
1137
  // Method 3: Legacy checkbox interaction (fallback)
@@ -1068,10 +1149,23 @@ async function attemptChallengeSolve(page, currentUrl, challengeInfo, forceDebug
1068
1149
  } catch (legacyError) {
1069
1150
  if (forceDebug) console.log(formatLogMessage('cloudflare', `Legacy checkbox method failed for ${currentUrl}: ${legacyError.message}`));
1070
1151
  }
1152
+ } else if (forceDebug) {
1153
+ console.log(formatLogMessage('cloudflare', `Skipping legacy checkbox method (not detected)`));
1071
1154
  }
1072
1155
 
1073
1156
  if (!result.success) {
1074
1157
  result.error = result.error || 'All challenge bypass methods failed';
1158
+ if (forceDebug) {
1159
+ try {
1160
+ const postState = await page.evaluate(() => ({
1161
+ title: document.title,
1162
+ url: window.location.href,
1163
+ body: (document.body?.textContent || '').substring(0, 300)
1164
+ }));
1165
+ console.log(formatLogMessage('cloudflare', `Post-attempt page state: title="${postState.title}" url=${postState.url}`));
1166
+ console.log(formatLogMessage('cloudflare', `Post-attempt body: ${postState.body}`));
1167
+ } catch (_) {}
1168
+ }
1075
1169
  }
1076
1170
 
1077
1171
  return result;
@@ -1089,88 +1183,57 @@ async function handleEmbeddedIframeChallenge(page, forceDebug = false) {
1089
1183
  try {
1090
1184
  if (forceDebug) console.log(formatLogMessage('cloudflare', `Checking for embedded iframe challenges`));
1091
1185
 
1092
- // Enhanced iframe selectors including challenges.cloudflare.com
1093
- const iframeSelectors = [
1094
- 'iframe[src*="challenges.cloudflare.com"]',
1095
- 'iframe[title*="Verify you are human"]',
1096
- 'iframe[title*="Cloudflare security challenge"]',
1097
- 'iframe[title*="Widget containing a Cloudflare"]'
1098
- ];
1099
-
1100
- // Wait for iframe to appear
1101
- let iframeFound = false;
1102
- for (const selector of iframeSelectors) {
1103
- try {
1104
- await Promise.race([
1105
- page.waitForSelector(selector, { timeout: FAST_TIMEOUTS.SELECTOR_WAIT }),
1106
- new Promise((_, reject) => setTimeout(() => reject(new Error('Timeout')), FAST_TIMEOUTS.SELECTOR_WAIT + 1000))
1107
- ]);
1108
- iframeFound = true;
1109
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Found iframe: ${selector}`));
1110
- break;
1111
- } catch (e) {
1112
- continue;
1186
+ // Use CDP-level frame detection � bypasses closed shadow roots
1187
+ const frames = page.frames();
1188
+ if (forceDebug) {
1189
+ console.log(formatLogMessage('cloudflare', `Available frames (${frames.length}):`));
1190
+ for (const f of frames) {
1191
+ console.log(formatLogMessage('cloudflare', ` ${f.url()}`));
1113
1192
  }
1114
1193
  }
1115
-
1116
- if (!iframeFound) {
1117
- result.error = 'No embedded iframe found';
1118
- return result;
1119
- }
1120
-
1121
- // Find challenge frame using existing frame detection logic
1122
- const frames = await page.frames();
1123
1194
  const challengeFrame = frames.find(frame => {
1124
1195
  const frameUrl = frame.url();
1125
1196
  return frameUrl.includes('challenges.cloudflare.com') ||
1197
+ frameUrl.includes('/cdn-cgi/challenge-platform/') ||
1126
1198
  frameUrl.includes('/turnstile/if/') ||
1127
- frameUrl.includes('captcha-delivery.com') ||
1128
- frameUrl.includes('/challenge-platform/') ||
1129
1199
  frameUrl.includes('turnstile');
1130
1200
  });
1131
1201
 
1132
1202
  if (!challengeFrame) {
1133
- result.error = 'Challenge iframe not accessible';
1203
+ result.error = 'No challenge frame found via CDP';
1134
1204
  return result;
1135
1205
  }
1136
1206
 
1137
1207
  if (forceDebug) console.log(formatLogMessage('cloudflare', `Interacting with iframe: ${challengeFrame.url()}`));
1138
1208
 
1139
- // Reuse existing checkbox interaction logic
1140
- const checkboxSelectors = [
1209
+ await waitForTimeout(page, 500);
1210
+
1211
+ let checkboxInteractionSuccess = false;
1212
+ try {
1213
+ const shadowResult = await clickInShadowDOM(challengeFrame, [
1141
1214
  'input[type="checkbox"]',
1142
1215
  '.ctp-checkbox',
1143
- 'input.ctp-checkbox',
1144
- '.cf-turnstile input',
1145
- '.ctp-checkbox-label'
1146
- ];
1147
-
1148
- let checkboxInteractionSuccess = false;
1149
- for (const selector of checkboxSelectors) {
1150
- try {
1151
- await Promise.race([
1152
- challengeFrame.waitForSelector(selector, { timeout: FAST_TIMEOUTS.SELECTOR_WAIT }),
1153
- new Promise((_, reject) => setTimeout(() => reject(new Error('Timeout')), FAST_TIMEOUTS.SELECTOR_WAIT + 1000))
1154
- ]);
1155
-
1156
- await waitForTimeout(page, FAST_TIMEOUTS.ELEMENT_INTERACTION_DELAY);
1157
- await challengeFrame.click(selector);
1158
-
1159
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Clicked iframe element: ${selector}`));
1216
+ '.ctp-checkbox-label',
1217
+ '[role="checkbox"]',
1218
+ 'label.cb-lb',
1219
+ 'label'
1220
+ ], forceDebug);
1221
+
1222
+ if (shadowResult.clicked) {
1160
1223
  checkboxInteractionSuccess = true;
1161
- break;
1162
- } catch (e) {
1163
- continue;
1224
+ if (forceDebug) console.log(formatLogMessage('cloudflare', `Shadow DOM click succeeded: ${shadowResult.selector}`));
1225
+ } else if (shadowResult.found && shadowResult.x > 0) {
1226
+ await page.mouse.click(shadowResult.x, shadowResult.y);
1227
+ checkboxInteractionSuccess = true;
1228
+ if (forceDebug) console.log(formatLogMessage('cloudflare', `Shadow DOM mouse fallback at (${shadowResult.x}, ${shadowResult.y})`));
1164
1229
  }
1230
+ } catch (shadowErr) {
1231
+ if (forceDebug) console.log(formatLogMessage('cloudflare', `Shadow DOM click failed: ${shadowErr.message}`));
1165
1232
  }
1166
1233
 
1167
- // Try alternative interaction only if standard selectors failed
1168
1234
  if (!checkboxInteractionSuccess) {
1169
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Checkbox interactions failed, trying container fallback`));
1170
- await waitForTimeout(page, 1000);
1171
1235
 
1172
1236
  try {
1173
- // Try clicking on the iframe container itself as fallback
1174
1237
  const iframeElement = await page.$('iframe[src*="challenges.cloudflare.com"]');
1175
1238
  if (iframeElement) {
1176
1239
  await iframeElement.click();
@@ -1179,8 +1242,6 @@ async function handleEmbeddedIframeChallenge(page, forceDebug = false) {
1179
1242
  } catch (containerClickError) {
1180
1243
  if (forceDebug) console.log(formatLogMessage('cloudflare', `Container click failed: ${containerClickError.message}`));
1181
1244
  }
1182
- } else {
1183
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Checkbox interaction successful, skipping container fallback`));
1184
1245
  }
1185
1246
 
1186
1247
  // Reuse existing completion check pattern with error handling
@@ -1237,8 +1298,10 @@ async function waitForJSChallengeCompletion(page, forceDebug = false) {
1237
1298
  await Promise.race([
1238
1299
  page.waitForFunction(
1239
1300
  () => {
1240
- return !document.body.textContent.includes('Checking your browser') &&
1241
- !document.body.textContent.includes('Please wait while we verify') &&
1301
+ const bodyText = document.body.textContent;
1302
+ if (bodyText.includes('Verification successful')) return true;
1303
+ return !bodyText.includes('Checking your browser') &&
1304
+ !bodyText.includes('Please wait while we verify') &&
1242
1305
  !document.querySelector('.cf-challenge-running') &&
1243
1306
  !document.querySelector('[data-cf-challenge]');
1244
1307
  },
@@ -1322,28 +1385,26 @@ async function handleTurnstileChallenge(page, forceDebug = false) {
1322
1385
  console.log(formatLogMessage('cloudflare', `Found Turnstile iframe with URL: ${turnstileFrame.url()}`));
1323
1386
  }
1324
1387
 
1325
- const checkboxSelectors = [
1326
- 'input[type="checkbox"].ctp-checkbox',
1327
- '.ctp-checkbox-label',
1328
- '.ctp-checkbox'
1329
- ];
1330
-
1331
- for (const selector of checkboxSelectors) {
1332
- try {
1333
- await Promise.race([
1334
- turnstileFrame.waitForSelector(selector, { timeout: FAST_TIMEOUTS.SELECTOR_WAIT }),
1335
- new Promise((_, reject) => setTimeout(() => reject(new Error('Checkbox timeout')), FAST_TIMEOUTS.SELECTOR_WAIT + 500))
1336
- ]);
1337
-
1338
- await waitForTimeout(page, FAST_TIMEOUTS.ELEMENT_INTERACTION_DELAY);
1339
- await turnstileFrame.click(selector);
1340
-
1341
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Clicked Turnstile checkbox: ${selector}`));
1342
- break;
1343
- } catch (e) {
1344
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Checkbox selector ${selector} not found or failed to click`));
1345
- continue;
1388
+ await waitForTimeout(page, FAST_TIMEOUTS.ELEMENT_INTERACTION_DELAY);
1389
+
1390
+ try {
1391
+ const shadowResult = await clickInShadowDOM(turnstileFrame, [
1392
+ 'input[type="checkbox"]',
1393
+ '.ctp-checkbox',
1394
+ '.ctp-checkbox-label',
1395
+ '[role="checkbox"]',
1396
+ 'label.cb-lb',
1397
+ 'label'
1398
+ ], forceDebug);
1399
+
1400
+ if (shadowResult.clicked) {
1401
+ if (forceDebug) console.log(formatLogMessage('cloudflare', `Turnstile shadow DOM click succeeded: ${shadowResult.selector}`));
1402
+ } else if (shadowResult.found && shadowResult.x > 0) {
1403
+ await page.mouse.click(shadowResult.x, shadowResult.y);
1404
+ if (forceDebug) console.log(formatLogMessage('cloudflare', `Turnstile shadow DOM mouse fallback at (${shadowResult.x}, ${shadowResult.y})`));
1346
1405
  }
1406
+ } catch (shadowErr) {
1407
+ if (forceDebug) console.log(formatLogMessage('cloudflare', `Shadow DOM fallback failed: ${shadowErr.message}`));
1347
1408
  }
1348
1409
 
1349
1410
  // Wait for Turnstile completion with reduced timeout
@@ -1531,7 +1592,11 @@ async function checkChallengeCompletion(page) {
1531
1592
  * }
1532
1593
  */
1533
1594
  async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDebug = false) {
1534
- if (forceDebug) {
1595
+ const cfDebug = forceDebug || siteConfig.cloudflare_bypass === 'debug' || siteConfig.cloudflare_phish === 'debug';
1596
+ const cfBypassEnabled = siteConfig.cloudflare_bypass === true || siteConfig.cloudflare_bypass === 'debug';
1597
+ const cfPhishEnabled = siteConfig.cloudflare_phish === true || siteConfig.cloudflare_phish === 'debug';
1598
+
1599
+ if (cfDebug) {
1535
1600
  console.log(formatLogMessage('cloudflare', `Using Cloudflare module v${CLOUDFLARE_MODULE_VERSION} for ${currentUrl}`));
1536
1601
  }
1537
1602
 
@@ -1561,7 +1626,7 @@ async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDeb
1561
1626
  // Sets attempted: false, success: true for both protection types
1562
1627
 
1563
1628
  // Only proceed if we have indicators OR explicit config enables Cloudflare handling
1564
- if (!quickDetection.hasIndicators && !siteConfig.cloudflare_phish && !siteConfig.cloudflare_bypass) {
1629
+ if (!quickDetection.hasIndicators && !cfPhishEnabled && !cfBypassEnabled) {
1565
1630
  if (forceDebug) console.log(formatLogMessage('cloudflare', `No Cloudflare indicators found and no explicit config, skipping protection handling for ${currentUrl}`));
1566
1631
  if (forceDebug) console.log(formatLogMessage('cloudflare', `Quick detection details: title="${quickDetection.title}", bodySnippet="${quickDetection.bodySnippet}"`));
1567
1632
  return {
@@ -1586,7 +1651,7 @@ async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDeb
1586
1651
  try {
1587
1652
  // Adaptive timeout based on detection results and explicit config
1588
1653
  let adaptiveTimeout;
1589
- if (siteConfig.cloudflare_phish || siteConfig.cloudflare_bypass) {
1654
+ if (cfPhishEnabled || cfBypassEnabled) {
1590
1655
  // Explicit config - give more time
1591
1656
  adaptiveTimeout = quickDetection.hasIndicators ? TIMEOUTS.ADAPTIVE_TIMEOUT_WITH_INDICATORS : TIMEOUTS.ADAPTIVE_TIMEOUT_WITHOUT_INDICATORS;
1592
1657
  } else {
@@ -1599,7 +1664,7 @@ async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDeb
1599
1664
  }
1600
1665
 
1601
1666
  return await Promise.race([
1602
- performCloudflareHandling(page, currentUrl, siteConfig, forceDebug),
1667
+ performCloudflareHandling(page, currentUrl, siteConfig, cfDebug),
1603
1668
  new Promise((resolve) => {
1604
1669
  setTimeout(() => {
1605
1670
  console.warn(formatLogMessage('cloudflare', `Adaptive timeout (${adaptiveTimeout}ms) for ${currentUrl} - continuing with scan`));
@@ -1631,6 +1696,9 @@ async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDeb
1631
1696
  * @returns {Promise<Object>} Same structure as handleCloudflareProtection()
1632
1697
  */
1633
1698
  async function performCloudflareHandling(page, currentUrl, siteConfig, forceDebug = false) {
1699
+ const cfBypassEnabled = siteConfig.cloudflare_bypass === true || siteConfig.cloudflare_bypass === 'debug';
1700
+ const cfPhishEnabled = siteConfig.cloudflare_phish === true || siteConfig.cloudflare_phish === 'debug';
1701
+
1634
1702
  const result = {
1635
1703
  phishingWarning: { attempted: false, success: false },
1636
1704
  verificationChallenge: { attempted: false, success: false },
@@ -1643,7 +1711,7 @@ async function performCloudflareHandling(page, currentUrl, siteConfig, forceDebu
1643
1711
  // Handle phishing warnings first - updates result.phishingWarning
1644
1712
  // Only runs if siteConfig.cloudflare_phish === true
1645
1713
  // Handle phishing warnings if enabled
1646
- if (siteConfig.cloudflare_phish === true) {
1714
+ if (cfPhishEnabled) {
1647
1715
  if (forceDebug) console.log(formatLogMessage('cloudflare', `Phishing warning bypass enabled for ${currentUrl}`));
1648
1716
 
1649
1717
  const phishingResult = await handlePhishingWarningWithRetries(page, currentUrl, siteConfig, forceDebug);
@@ -1678,7 +1746,7 @@ async function performCloudflareHandling(page, currentUrl, siteConfig, forceDebu
1678
1746
  // Only runs if siteConfig.cloudflare_bypass === true
1679
1747
  // Sets requiresHuman: true if CAPTCHA detected (no bypass attempted)
1680
1748
  // Handle verification challenges if enabled
1681
- if (siteConfig.cloudflare_bypass === true) {
1749
+ if (cfBypassEnabled) {
1682
1750
  if (forceDebug) console.log(formatLogMessage('cloudflare', `Challenge bypass enabled for ${currentUrl}`));
1683
1751
 
1684
1752
  const challengeResult = await handleVerificationChallengeWithRetries(page, currentUrl, siteConfig, forceDebug);
@@ -1731,55 +1799,28 @@ async function performCloudflareHandling(page, currentUrl, siteConfig, forceDebu
1731
1799
  * Performs parallel detection of multiple challenge types for better performance
1732
1800
  */
1733
1801
  async function parallelChallengeDetection(page, forceDebug = false) {
1734
- const detectionPromises = [];
1735
-
1736
- // Check for JS challenge
1737
- detectionPromises.push(
1738
- page.evaluate(() => {
1739
- return {
1740
- type: 'js',
1741
- detected: document.querySelector('script[src*="/cdn-cgi/challenge-platform/"]') !== null ||
1742
- document.body?.textContent?.includes('Checking your browser') ||
1743
- document.body?.textContent?.includes('Please wait while we verify')
1744
- };
1745
- }).catch(err => ({ type: 'js', detected: false, error: err.message }))
1746
- );
1747
-
1748
- // Check for Turnstile
1749
- detectionPromises.push(
1750
- page.evaluate(() => {
1751
- return {
1752
- type: 'turnstile',
1753
- detected: document.querySelector('.cf-turnstile') !== null ||
1754
- document.querySelector('iframe[src*="challenges.cloudflare.com"]') !== null ||
1755
- document.querySelector('.ctp-checkbox-container') !== null
1756
- };
1757
- }).catch(err => ({ type: 'turnstile', detected: false, error: err.message }))
1758
- );
1759
-
1760
- // Check for phishing warning
1761
- detectionPromises.push(
1762
- page.evaluate(() => {
1763
- return {
1764
- type: 'phishing',
1765
- detected: document.body?.textContent?.includes('This website has been reported for potential phishing') ||
1766
- document.querySelector('a[href*="continue"]') !== null
1767
- };
1768
- }).catch(err => ({ type: 'phishing', detected: false, error: err.message }))
1769
- );
1770
-
1771
- // Check for managed challenge
1772
- detectionPromises.push(
1773
- page.evaluate(() => {
1774
- return {
1775
- type: 'managed',
1776
- detected: document.querySelector('.cf-managed-challenge') !== null ||
1777
- document.querySelector('[data-cf-managed]') !== null
1778
- };
1779
- }).catch(err => ({ type: 'managed', detected: false, error: err.message }))
1780
- );
1781
-
1782
- const results = await Promise.all(detectionPromises);
1802
+ let results;
1803
+ try {
1804
+ results = await page.evaluate(() => {
1805
+ const bodyText = document.body?.textContent || '';
1806
+ return [
1807
+ { type: 'js', detected: document.querySelector('script[src*="/cdn-cgi/challenge-platform/"]') !== null ||
1808
+ bodyText.includes('Checking your browser') || bodyText.includes('Please wait while we verify') },
1809
+ { type: 'turnstile', detected: document.querySelector('.cf-turnstile') !== null ||
1810
+ document.querySelector('iframe[src*="challenges.cloudflare.com"]') !== null ||
1811
+ document.querySelector('.ctp-checkbox-container') !== null },
1812
+ { type: 'phishing', detected: bodyText.includes('This website has been reported for potential phishing') ||
1813
+ document.querySelector('a[href*="continue"]') !== null },
1814
+ { type: 'managed', detected: document.querySelector('.cf-managed-challenge') !== null ||
1815
+ document.querySelector('[data-cf-managed]') !== null }
1816
+ ];
1817
+ });
1818
+ } catch (err) {
1819
+ results = [
1820
+ { type: 'js', detected: false }, { type: 'turnstile', detected: false },
1821
+ { type: 'phishing', detected: false }, { type: 'managed', detected: false }
1822
+ ];
1823
+ }
1783
1824
 
1784
1825
  const detectedChallenges = results.filter(r => r.detected).map(r => r.type);
1785
1826
 
package/lib/proxy.js ADDED
@@ -0,0 +1,279 @@
1
+ /**
2
+ * Proxy Module for NWSS Network Scanner
3
+ * ======================================
4
+ * Routes specific site URLs through SOCKS5, SOCKS4, HTTP, or HTTPS proxies.
5
+ *
6
+ * Chromium's --proxy-server flag is browser-wide, so sites requiring a proxy
7
+ * need a separate browser instance. This module handles:
8
+ * - Parsing proxy URLs (all supported protocols)
9
+ * - Generating Chromium launch args
10
+ * - Per-page authentication via Puppeteer
11
+ * - Proxy bypass lists
12
+ * - Proxy health checks
13
+ *
14
+ * CONFIG EXAMPLES:
15
+ *
16
+ * SOCKS5 (no auth):
17
+ * "proxy": "socks5://127.0.0.1:1080"
18
+ *
19
+ * SOCKS5 with auth:
20
+ * "proxy": "socks5://user:pass@127.0.0.1:1080"
21
+ *
22
+ * HTTP proxy (corporate):
23
+ * "proxy": "http://proxy.corp.com:3128"
24
+ *
25
+ * HTTP proxy with auth:
26
+ * "proxy": "http://user:pass@proxy.corp.com:8080"
27
+ *
28
+ * HTTPS proxy:
29
+ * "proxy": "https://secure-proxy.example.com:8443"
30
+ *
31
+ * With bypass list and remote DNS:
32
+ * "proxy": "socks5://127.0.0.1:1080",
33
+ * "proxy_bypass": ["localhost", "127.0.0.1", "*.local"],
34
+ * "proxy_remote_dns": true
35
+ *
36
+ * Debug mode:
37
+ * "proxy": "socks5://127.0.0.1:1080",
38
+ * "proxy_debug": true
39
+ *
40
+ * Legacy key (backwards compatible):
41
+ * "socks5_proxy": "socks5://127.0.0.1:1080"
42
+ *
43
+ * INTEGRATION (in nwss.js):
44
+ * const { needsProxy, getProxyArgs, applyProxyAuth, getProxyInfo } = require('./lib/proxy');
45
+ *
46
+ * // Before browser launch
47
+ * if (needsProxy(siteConfig)) {
48
+ * const proxyArgs = getProxyArgs(siteConfig, forceDebug);
49
+ * browserArgs.push(...proxyArgs);
50
+ * }
51
+ *
52
+ * // After page creation, before page.goto()
53
+ * await applyProxyAuth(page, siteConfig, forceDebug);
54
+ *
55
+ * @version 1.1.0
56
+ */
57
+
58
+ const { formatLogMessage } = require('./colorize');
59
+
60
+ const PROXY_MODULE_VERSION = '1.1.0';
61
+ const SUPPORTED_PROTOCOLS = ['socks5', 'socks4', 'http', 'https'];
62
+
63
+ const DEFAULT_PORTS = {
64
+ socks5: 1080,
65
+ socks4: 1080,
66
+ http: 8080,
67
+ https: 8443
68
+ };
69
+
70
+ /**
71
+ * Returns the configured proxy URL string from siteConfig.
72
+ * Supports both "proxy" (preferred) and "socks5_proxy" (legacy) keys.
73
+ *
74
+ * @param {object} siteConfig
75
+ * @returns {string|null}
76
+ */
77
+ function getConfiguredProxy(siteConfig) {
78
+ return siteConfig.proxy || siteConfig.socks5_proxy || null;
79
+ }
80
+
81
+ /**
82
+ * Parses a proxy URL into components.
83
+ * Accepts: protocol://host:port, protocol://user:pass@host:port, bare host:port
84
+ *
85
+ * @param {string} proxyUrl - Proxy URL string
86
+ * @returns {object|null} Parsed proxy or null if invalid
87
+ */
88
+ function parseProxyUrl(proxyUrl) {
89
+ if (!proxyUrl || typeof proxyUrl !== 'string') return null;
90
+
91
+ let cleaned = proxyUrl.trim();
92
+
93
+ // Normalise bare host:port to socks5:// URL
94
+ if (!cleaned.includes('://')) {
95
+ cleaned = `socks5://${cleaned}`;
96
+ }
97
+
98
+ try {
99
+ const url = new URL(cleaned);
100
+ const protocol = url.protocol.replace(':', '');
101
+
102
+ if (!SUPPORTED_PROTOCOLS.includes(protocol)) return null;
103
+
104
+ const host = url.hostname;
105
+ if (!host) return null;
106
+
107
+ const port = parseInt(url.port, 10) || DEFAULT_PORTS[protocol] || 1080;
108
+ const username = url.username ? decodeURIComponent(url.username) : null;
109
+ const password = url.password ? decodeURIComponent(url.password) : null;
110
+
111
+ return { protocol, host, port, username, password };
112
+ } catch (_) {
113
+ return null;
114
+ }
115
+ }
116
+
117
+ /**
118
+ * Checks if a site config requires a proxy
119
+ *
120
+ * @param {object} siteConfig
121
+ * @returns {boolean}
122
+ */
123
+ function needsProxy(siteConfig) {
124
+ return !!getConfiguredProxy(siteConfig);
125
+ }
126
+
127
+ /**
128
+ * Returns Chromium launch arguments for the configured proxy.
129
+ *
130
+ * @param {object} siteConfig
131
+ * @param {boolean} forceDebug
132
+ * @returns {string[]} Array of Chromium args (empty if no proxy configured)
133
+ */
134
+ function getProxyArgs(siteConfig, forceDebug = false) {
135
+ const proxyUrl = getConfiguredProxy(siteConfig);
136
+ if (!proxyUrl) return [];
137
+
138
+ const parsed = parseProxyUrl(proxyUrl);
139
+ if (!parsed) {
140
+ console.warn(formatLogMessage('proxy', `Invalid proxy URL: ${proxyUrl}`));
141
+ return [];
142
+ }
143
+
144
+ const args = [
145
+ `--proxy-server=${parsed.protocol}://${parsed.host}:${parsed.port}`
146
+ ];
147
+
148
+ // Remote DNS: resolve hostnames through the proxy (prevents DNS leaks)
149
+ // Only meaningful for SOCKS proxies; HTTP proxies resolve remotely by default
150
+ const remoteDns = siteConfig.proxy_remote_dns ?? siteConfig.socks5_remote_dns;
151
+ if ((parsed.protocol === 'socks5' || parsed.protocol === 'socks4') && remoteDns !== false) {
152
+ args.push('--host-resolver-rules=MAP * ~NOTFOUND , EXCLUDE 127.0.0.1');
153
+ }
154
+
155
+ // Bypass list: domains that skip the proxy
156
+ const bypass = siteConfig.proxy_bypass || siteConfig.socks5_bypass || [];
157
+ if (bypass.length > 0) {
158
+ args.push(`--proxy-bypass-list=${bypass.join(';')}`);
159
+ }
160
+
161
+ const debug = forceDebug || siteConfig.proxy_debug || siteConfig.socks5_debug;
162
+ if (debug) {
163
+ console.log(formatLogMessage('proxy', `[${parsed.protocol}] Args: ${args.join(' ')}`));
164
+ }
165
+
166
+ return args;
167
+ }
168
+
169
+ /**
170
+ * Applies proxy authentication to a page via Puppeteer's authenticate API.
171
+ * Must be called BEFORE page.goto().
172
+ *
173
+ * @param {object} page - Puppeteer page instance
174
+ * @param {object} siteConfig
175
+ * @param {boolean} forceDebug
176
+ * @returns {Promise<boolean>} True if auth was applied
177
+ */
178
+ async function applyProxyAuth(page, siteConfig, forceDebug = false) {
179
+ const proxyUrl = getConfiguredProxy(siteConfig);
180
+ if (!proxyUrl) return false;
181
+
182
+ const parsed = parseProxyUrl(proxyUrl);
183
+ if (!parsed || !parsed.username) return false;
184
+
185
+ try {
186
+ await page.authenticate({
187
+ username: parsed.username,
188
+ password: parsed.password || ''
189
+ });
190
+
191
+ const debug = forceDebug || siteConfig.proxy_debug || siteConfig.socks5_debug;
192
+ if (debug) {
193
+ console.log(formatLogMessage('proxy', `Auth set for ${parsed.username}@${parsed.host}:${parsed.port}`));
194
+ }
195
+
196
+ return true;
197
+ } catch (err) {
198
+ console.warn(formatLogMessage('proxy', `Failed to set proxy auth: ${err.message}`));
199
+ return false;
200
+ }
201
+ }
202
+
203
+ /**
204
+ * Tests proxy connectivity by attempting a TCP connection.
205
+ *
206
+ * @param {object} siteConfig
207
+ * @param {number} timeoutMs - Connection timeout (default 5000ms)
208
+ * @returns {Promise<object>} { reachable, latencyMs, error }
209
+ */
210
+ async function testProxy(siteConfig, timeoutMs = 5000) {
211
+ const proxyUrl = getConfiguredProxy(siteConfig);
212
+ if (!proxyUrl) {
213
+ return { reachable: false, latencyMs: 0, error: 'No proxy configured' };
214
+ }
215
+
216
+ const parsed = parseProxyUrl(proxyUrl);
217
+ if (!parsed) {
218
+ return { reachable: false, latencyMs: 0, error: 'Invalid proxy URL' };
219
+ }
220
+
221
+ const net = require('net');
222
+ const start = Date.now();
223
+
224
+ return new Promise((resolve) => {
225
+ const socket = new net.Socket();
226
+
227
+ const onError = (err) => {
228
+ socket.destroy();
229
+ resolve({ reachable: false, latencyMs: Date.now() - start, error: err.message });
230
+ };
231
+
232
+ socket.setTimeout(timeoutMs);
233
+ socket.on('error', onError);
234
+ socket.on('timeout', () => onError(new Error('Connection timeout')));
235
+
236
+ socket.connect(parsed.port, parsed.host, () => {
237
+ const latency = Date.now() - start;
238
+ socket.destroy();
239
+ resolve({ reachable: true, latencyMs: latency, error: null });
240
+ });
241
+ });
242
+ }
243
+
244
+ /**
245
+ * Returns human-readable proxy info string for logging.
246
+ *
247
+ * @param {object} siteConfig
248
+ * @returns {string}
249
+ */
250
+ function getProxyInfo(siteConfig) {
251
+ const proxyUrl = getConfiguredProxy(siteConfig);
252
+ if (!proxyUrl) return 'none';
253
+
254
+ const parsed = parseProxyUrl(proxyUrl);
255
+ if (!parsed) return 'invalid';
256
+
257
+ const auth = parsed.username ? `${parsed.username}@` : '';
258
+ return `${parsed.protocol}://${auth}${parsed.host}:${parsed.port}`;
259
+ }
260
+
261
+ /**
262
+ * Returns module version information
263
+ */
264
+ function getModuleInfo() {
265
+ return { version: PROXY_MODULE_VERSION, name: 'Proxy Handler' };
266
+ }
267
+
268
+ module.exports = {
269
+ parseProxyUrl,
270
+ needsProxy,
271
+ getProxyArgs,
272
+ applyProxyAuth,
273
+ testProxy,
274
+ getProxyInfo,
275
+ getModuleInfo,
276
+ getConfiguredProxy,
277
+ PROXY_MODULE_VERSION,
278
+ SUPPORTED_PROTOCOLS
279
+ };
package/nwss.js CHANGED
@@ -1,4 +1,4 @@
1
- // === Network scanner script (nwss.js) v2.0.33 ===
1
+ // === Network scanner script (nwss.js) v2.0.51 ===
2
2
 
3
3
  // puppeteer for browser automation, fs for file system operations, psl for domain parsing.
4
4
  // const pLimit = require('p-limit'); // Will be dynamically imported
@@ -44,6 +44,7 @@ const { performPageInteraction, createInteractionConfig } = require('./lib/inter
44
44
  const { createGlobalHelpers, getTotalDomainsSkipped, getDetectedDomainsCount } = require('./lib/domain-cache');
45
45
  const { createSmartCache } = require('./lib/smart-cache'); // Smart cache system
46
46
  const { clearPersistentCache } = require('./lib/smart-cache');
47
+ const { needsProxy, getProxyArgs, applyProxyAuth, getProxyInfo, testProxy } = require('./lib/proxy');
47
48
  // Dry run functionality
48
49
  const { initializeDryRunCollections, addDryRunMatch, addDryRunNetTools, processDryRunResults, writeDryRunOutput } = require('./lib/dry-run');
49
50
  // Enhanced site data clearing functionality
@@ -1354,7 +1355,7 @@ function setupFrameHandling(page, forceDebug) {
1354
1355
  * Uses system Chrome and temporary directories to minimize disk usage
1355
1356
  * @returns {Promise<import('puppeteer').Browser>} Browser instance
1356
1357
  */
1357
- async function createBrowser() {
1358
+ async function createBrowser(extraArgs = []) {
1358
1359
  // Create temporary user data directory that we can fully control and clean up
1359
1360
  const tempUserDataDir = `/tmp/puppeteer-${Date.now()}-${Math.random().toString(36).substring(7)}`;
1360
1361
  userDataDir = tempUserDataDir; // Store for cleanup tracking (use outer scope variable)
@@ -1458,6 +1459,7 @@ function setupFrameHandling(page, forceDebug) {
1458
1459
  '--disable-background-timer-throttling',
1459
1460
  '--disable-features=site-per-process', // Better for single-site scanning
1460
1461
  '--no-zygote', // Better process isolation
1462
+ ...extraArgs,
1461
1463
  ],
1462
1464
  // Optimized timeouts for Puppeteer 23.x performance
1463
1465
  protocolTimeout: TIMEOUTS.PROTOCOL_TIMEOUT,
@@ -2130,6 +2132,11 @@ function setupFrameHandling(page, forceDebug) {
2130
2132
  }
2131
2133
  }
2132
2134
 
2135
+ // --- Apply proxy authentication if configured ---
2136
+ if (needsProxy(siteConfig)) {
2137
+ await applyProxyAuth(page, siteConfig, forceDebug);
2138
+ }
2139
+
2133
2140
  // --- Apply all fingerprint spoofing (user agent, Brave, fingerprint protection) ---
2134
2141
  try {
2135
2142
  await applyAllFingerprintSpoofing(page, siteConfig, forceDebug, currentUrl);
@@ -3337,6 +3344,25 @@ function setupFrameHandling(page, forceDebug) {
3337
3344
  siteCounter++;
3338
3345
  // Continue processing with the redirected URL instead of throwing error
3339
3346
  } else {
3347
+ // Detect proxy-specific failures and provide clear diagnostics
3348
+ if (needsProxy(siteConfig) && err.message) {
3349
+ const proxyErrors = [
3350
+ 'ERR_PROXY_CONNECTION_FAILED',
3351
+ 'ERR_SOCKS_CONNECTION_FAILED',
3352
+ 'ERR_TUNNEL_CONNECTION_FAILED',
3353
+ 'ERR_PROXY_AUTH_UNSUPPORTED',
3354
+ 'ERR_PROXY_AUTH_REQUESTED',
3355
+ 'ERR_SOCKS_CONNECTION_HOST_UNREACHABLE',
3356
+ 'ERR_PROXY_CERTIFICATE_INVALID',
3357
+ 'ERR_NO_SUPPORTED_PROXIES'
3358
+ ];
3359
+ const proxyErr = proxyErrors.find(e => err.message.includes(e));
3360
+ if (proxyErr) {
3361
+ const info = getProxyInfo(siteConfig);
3362
+ console.error(formatLogMessage('error', `[proxy] ${proxyErr} — proxy: ${info} — URL: ${currentUrl}`));
3363
+ console.error(formatLogMessage('error', `[proxy] Check: is the proxy running? Are credentials correct? Is the target reachable from the proxy?`));
3364
+ }
3365
+ }
3340
3366
  console.error(formatLogMessage('error', `Failed on ${currentUrl}: ${err.message}`));
3341
3367
  throw err;
3342
3368
  }
@@ -3662,6 +3688,26 @@ function setupFrameHandling(page, forceDebug) {
3662
3688
  }
3663
3689
 
3664
3690
  } catch (err) {
3691
+ // Detect proxy-specific failures at top level
3692
+ if (needsProxy(siteConfig) && err.message) {
3693
+ const proxyErrors = [
3694
+ 'ERR_PROXY_CONNECTION_FAILED',
3695
+ 'ERR_SOCKS_CONNECTION_FAILED',
3696
+ 'ERR_TUNNEL_CONNECTION_FAILED',
3697
+ 'ERR_PROXY_AUTH_UNSUPPORTED',
3698
+ 'ERR_PROXY_AUTH_REQUESTED',
3699
+ 'ERR_SOCKS_CONNECTION_HOST_UNREACHABLE',
3700
+ 'ERR_PROXY_CERTIFICATE_INVALID',
3701
+ 'ERR_NO_SUPPORTED_PROXIES'
3702
+ ];
3703
+ const proxyErr = proxyErrors.find(e => err.message.includes(e));
3704
+ if (proxyErr) {
3705
+ const info = getProxyInfo(siteConfig);
3706
+ console.error(formatLogMessage('error', `[proxy] ${proxyErr} — proxy: ${info} — URL: ${currentUrl}`));
3707
+ console.error(formatLogMessage('error', `[proxy] Check: is the proxy running? Are credentials correct? Is the target reachable from the proxy?`));
3708
+ }
3709
+ }
3710
+
3665
3711
  // Only restart for truly fatal browser errors
3666
3712
  const isFatalError = CRITICAL_BROWSER_ERRORS.some(errorType =>
3667
3713
  err.message.includes(errorType)
@@ -3789,6 +3835,14 @@ function setupFrameHandling(page, forceDebug) {
3789
3835
  }
3790
3836
  }
3791
3837
 
3838
+ // Helper to get a stable proxy key for grouping browser instances
3839
+ const proxyKeyFor = (siteConfig) => {
3840
+ if (!needsProxy(siteConfig)) return '';
3841
+ return getProxyInfo(siteConfig);
3842
+ };
3843
+
3844
+ // Sort tasks so proxy groups are contiguous — direct connections first, then each proxy
3845
+ allTasks.sort((a, b) => proxyKeyFor(a.config).localeCompare(proxyKeyFor(b.config)));
3792
3846
 
3793
3847
  let results = [];
3794
3848
  let processedUrlCount = 0;
@@ -3832,6 +3886,7 @@ function setupFrameHandling(page, forceDebug) {
3832
3886
 
3833
3887
  // Process URLs in batches with exception handling
3834
3888
  let siteGroupIndex = 0;
3889
+ let currentProxyKey = ''; // Track active proxy config — '' means direct connection
3835
3890
  try {
3836
3891
  for (let batchStart = 0; batchStart < totalUrls; batchStart += RESOURCE_CLEANUP_INTERVAL) {
3837
3892
  const batchEnd = Math.min(batchStart + RESOURCE_CLEANUP_INTERVAL, totalUrls);
@@ -3952,14 +4007,67 @@ function setupFrameHandling(page, forceDebug) {
3952
4007
  if (forceDebug) console.log(formatLogMessage('debug', `Browser cleanup warning: ${browserCloseErr.message}`));
3953
4008
  }
3954
4009
 
3955
- // Create new browser for next batch
3956
- browser = await createBrowser();
4010
+ // Create new browser for next batch (preserve current proxy config)
4011
+ const restartProxyArgs = currentProxyKey ? getProxyArgs(currentBatch[0].config, forceDebug) : [];
4012
+ browser = await createBrowser(restartProxyArgs);
3957
4013
  if (forceDebug) console.log(formatLogMessage('debug', `New browser instance created for batch ${Math.floor(batchStart / RESOURCE_CLEANUP_INTERVAL) + 1}`));
3958
4014
 
3959
4015
  // Reset cleanup counter and add delay
3960
4016
  urlsSinceLastCleanup = 0;
3961
4017
  await fastTimeout(TIMEOUTS.BROWSER_STABILIZE_DELAY);
3962
4018
  }
4019
+
4020
+ // --- Proxy-aware browser restart ---
4021
+ // --proxy-server is browser-wide, so if the batch needs a different proxy we must restart
4022
+ const batchProxyKey = proxyKeyFor(currentBatch[0].config);
4023
+ if (batchProxyKey !== currentProxyKey) {
4024
+ const debug = forceDebug || currentBatch[0].config.proxy_debug || currentBatch[0].config.socks5_debug;
4025
+ if (debug) {
4026
+ const from = currentProxyKey || 'direct';
4027
+ const to = batchProxyKey || 'direct';
4028
+ console.log(formatLogMessage('proxy', `Switching proxy: ${from} → ${to}`));
4029
+ }
4030
+
4031
+ try {
4032
+ await handleBrowserExit(browser, {
4033
+ forceDebug, timeout: 10000, exitOnFailure: false,
4034
+ cleanTempFiles: true, comprehensiveCleanup: removeTempFiles
4035
+ });
4036
+ if (userDataDir && fs.existsSync(userDataDir)) {
4037
+ fs.rmSync(userDataDir, { recursive: true, force: true });
4038
+ }
4039
+ } catch (proxyRestartErr) {
4040
+ if (forceDebug) console.log(formatLogMessage('debug', `Proxy switch browser cleanup: ${proxyRestartErr.message}`));
4041
+ }
4042
+
4043
+ const proxyArgs = batchProxyKey ? getProxyArgs(currentBatch[0].config, forceDebug) : [];
4044
+
4045
+ // Pre-flight: verify proxy is reachable before launching browser
4046
+ if (proxyArgs.length > 0) {
4047
+ const health = await testProxy(currentBatch[0].config, 5000);
4048
+ if (!health.reachable) {
4049
+ const info = getProxyInfo(currentBatch[0].config);
4050
+ console.error(formatLogMessage('error', `[proxy] Unreachable: ${info} — ${health.error}`));
4051
+ console.error(formatLogMessage('error', `[proxy] Skipping ${currentBatch.length} URL(s) in this batch`));
4052
+ const skipResults = currentBatch.map(task => ({
4053
+ success: false, url: task.url, rules: [],
4054
+ error: `Proxy unreachable: ${health.error}`
4055
+ }));
4056
+ results.push(...skipResults);
4057
+ processedUrlCount += currentBatch.length;
4058
+ urlsSinceLastCleanup += currentBatch.length;
4059
+ continue;
4060
+ }
4061
+ if (forceDebug) {
4062
+ console.log(formatLogMessage('proxy', `Proxy reachable (${health.latencyMs}ms)`));
4063
+ }
4064
+ }
4065
+
4066
+ browser = await createBrowser(proxyArgs);
4067
+ currentProxyKey = batchProxyKey;
4068
+ urlsSinceLastCleanup = 0;
4069
+ await fastTimeout(TIMEOUTS.BROWSER_STABILIZE_DELAY);
4070
+ }
3963
4071
 
3964
4072
  if (forceDebug) {
3965
4073
  console.log(formatLogMessage('debug', `Processing batch ${Math.floor(batchStart / RESOURCE_CLEANUP_INTERVAL) + 1}: ${batchSize} URL(s) (total processed: ${processedUrlCount})`));
@@ -3986,7 +4094,8 @@ function setupFrameHandling(page, forceDebug) {
3986
4094
  console.log(formatLogMessage('error', `[TIMEOUT] Batch hung. Restarting browser.`));
3987
4095
  try {
3988
4096
  await handleBrowserExit(browser, { forceDebug, timeout: 5000, exitOnFailure: false });
3989
- browser = await createBrowser();
4097
+ const timeoutProxyArgs = currentProxyKey ? getProxyArgs(currentBatch[0].config, forceDebug) : [];
4098
+ browser = await createBrowser(timeoutProxyArgs);
3990
4099
  urlsSinceLastCleanup = 0;
3991
4100
  } catch (restartErr) {
3992
4101
  throw restartErr;
@@ -4104,7 +4213,7 @@ function setupFrameHandling(page, forceDebug) {
4104
4213
  comprehensive: true
4105
4214
  });
4106
4215
  }
4107
- browser = await createBrowser();
4216
+ browser = await createBrowser(currentProxyKey ? getProxyArgs(currentBatch[0].config, forceDebug) : []);
4108
4217
  urlsSinceLastCleanup = 0; // Reset counter
4109
4218
  await fastTimeout(TIMEOUTS.EMERGENCY_RESTART_DELAY); // Give browser time to stabilize
4110
4219
  } catch (emergencyRestartErr) {
@@ -4116,7 +4225,7 @@ function setupFrameHandling(page, forceDebug) {
4116
4225
  console.log(`\n${messageColors.fileOp('🔄 Emergency hang detection restart:')} Browser appears hung, forcing restart`);
4117
4226
  try {
4118
4227
  await handleBrowserExit(browser, { forceDebug, timeout: 5000, exitOnFailure: false, cleanTempFiles: true });
4119
- browser = await createBrowser();
4228
+ browser = await createBrowser(currentProxyKey ? getProxyArgs(currentBatch[0].config, forceDebug) : []);
4120
4229
  urlsSinceLastCleanup = 0;
4121
4230
  forceRestartFlag = false; // Reset flag
4122
4231
  await fastTimeout(TIMEOUTS.EMERGENCY_RESTART_DELAY);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fanboynz/network-scanner",
3
- "version": "2.0.50",
3
+ "version": "2.0.51",
4
4
  "description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
5
5
  "main": "nwss.js",
6
6
  "scripts": {
@@ -48,7 +48,7 @@
48
48
  },
49
49
  "homepage": "https://github.com/ryanbr/network-scanner",
50
50
  "devDependencies": {
51
- "eslint": "^9.32.0",
51
+ "eslint": "^10.0.2",
52
52
  "globals": "^16.3.0"
53
53
  }
54
54
  }