@fanboynz/network-scanner 2.0.66 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/cloudflare.js CHANGED
@@ -30,8 +30,11 @@
30
30
  */
31
31
 
32
32
  // Import color utilities
33
- const { formatLogMessage } = require('./colorize');
33
+ const { formatLogMessage, messageColors } = require('./colorize');
34
+ const URL_VALIDATION_TAG = messageColors.processing('[url-validation]');
34
35
 
36
+
37
+ const CLOUDFLARE_TAG = messageColors.cloudflare('[cloudflare]');
35
38
  /**
36
39
  * Module version information
37
40
  */
@@ -54,15 +57,15 @@ const TIMEOUTS = {
54
57
  CLICK_TIMEOUT_BUFFER: 1000, // Click timeout safety buffer
55
58
  NAVIGATION_TIMEOUT: 15000, // Standard navigation timeout
56
59
  NAVIGATION_TIMEOUT_BUFFER: 2000, // Navigation timeout safety buffer
57
- ADAPTIVE_TIMEOUT_WITH_INDICATORS: 25000, // Adaptive timeout when indicators found + explicit config
58
- ADAPTIVE_TIMEOUT_WITHOUT_INDICATORS: 20000, // Adaptive timeout with explicit config only
59
- ADAPTIVE_TIMEOUT_AUTO_WITH_INDICATORS: 15000, // Adaptive timeout for auto-detected with indicators
60
- ADAPTIVE_TIMEOUT_AUTO_WITHOUT_INDICATORS: 10000, // Adaptive timeout for auto-detected without indicators
61
- // New timeouts for enhanced functionality
62
- RETRY_DELAY: 1000, // Delay between retry attempts
63
- MAX_RETRIES: 2, // Maximum retry attempts (only 2 fit within 25s outer timeout)
64
- CHALLENGE_POLL_INTERVAL: 500, // Interval for polling challenge completion
65
- CHALLENGE_MAX_POLLS: 20 // Maximum polling attempts
60
+ // Adaptive timeouts are only consulted AFTER the no-indicators early
61
+ // return in handleCloudflareProtection, so the WITHOUT_INDICATORS
62
+ // variants were unreachable and have been removed.
63
+ ADAPTIVE_TIMEOUT_WITH_INDICATORS: 25000, // Indicators present + explicit config
64
+ ADAPTIVE_TIMEOUT_AUTO_WITH_INDICATORS: 15000, // Indicators present, auto-detected only
65
+ // Removed: RETRY_DELAY, CHALLENGE_POLL_INTERVAL, CHALLENGE_MAX_POLLS --
66
+ // defined but never read. Backoff uses RETRY_CONFIG.baseDelay +
67
+ // getRetryDelay(); challenges aren't polled via fixed interval.
68
+ MAX_RETRIES: 2 // Maximum retry attempts (only 2 fit within 25s outer timeout)
66
69
  };
67
70
 
68
71
  // Fast timeout constants - optimized for speed
@@ -72,7 +75,8 @@ const FAST_TIMEOUTS = {
72
75
  CHALLENGE_WAIT: 500, // Fast challenge detection
73
76
  ELEMENT_INTERACTION_DELAY: 250, // Fast element interactions
74
77
  SELECTOR_WAIT: 3000, // Fast selector waits
75
- TURNSTILE_OPERATION: 6000, // Fast Turnstile operations
78
+ // Removed: TURNSTILE_OPERATION -- defined but never read. The
79
+ // turnstileTimeout local var that referenced it was also dead.
76
80
  JS_CHALLENGE: 10000, // Fast JS challenge completion
77
81
  CHALLENGE_SOLVING: 12000, // Overall challenge solving -- fits within 15s adaptive outer
78
82
  CHALLENGE_COMPLETION: 8000 // Fast completion check
@@ -92,18 +96,18 @@ async function clickInShadowDOM(context, selectors, forceDebug = false, waitMs =
92
96
  if (element) {
93
97
  const box = await element.boundingBox();
94
98
  if (box && box.width > 0 && box.height > 0) {
95
- if (forceDebug) console.log(formatLogMessage('cloudflare', `pierce/${selector} matched in ${Date.now() - start}ms -- box: ${box.width}x${box.height} at (${box.x},${box.y})`));
99
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} pierce/${selector} matched in ${Date.now() - start}ms -- box: ${box.width}x${box.height} at (${box.x},${box.y})`));
96
100
  await element.click();
97
101
  await element.dispose();
98
102
  return { found: true, clicked: true, selector, x: box.x + box.width / 2, y: box.y + box.height / 2 };
99
103
  }
100
- if (forceDebug) console.log(formatLogMessage('cloudflare', `pierce/${selector} found but not visible (0x0)`));
104
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} pierce/${selector} found but not visible (0x0)`));
101
105
  await element.dispose();
102
106
  // Element found but not visible
103
107
  return { found: true, clicked: false, selector, x: 0, y: 0 };
104
108
  }
105
109
  } catch (e) {
106
- if (forceDebug) console.log(formatLogMessage('cloudflare', `pierce/${selector} timeout after ${waitMs}ms`));
110
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} pierce/${selector} timeout after ${waitMs}ms`));
107
111
  continue;
108
112
  }
109
113
  }
@@ -197,17 +201,33 @@ function detectChallengeLoop(url, previousUrls = []) {
197
201
 
198
202
  if (!isChallengeUrl) return false;
199
203
 
200
- // Check if we've seen this exact URL or very similar challenge URLs
201
- const similarUrls = previousUrls.filter(prevUrl => {
202
- if (prevUrl === url) return true; // Exact match
203
- // Check for similar challenge URLs with different ray IDs
204
- if (prevUrl.includes('/cdn-cgi/challenge-platform/') && url.includes('/cdn-cgi/challenge-platform/')) {
205
- return true;
206
- }
207
- return false;
208
- });
204
+ // Two loop signals with different sensitivities:
205
+ //
206
+ // exactMatches — page reloaded between retries but came back to the
207
+ // identical URL. Strong signal: the reload didn't
208
+ // advance the challenge state. Trips on a single
209
+ // prior visit, which means it actually fires under
210
+ // the default RETRY_CONFIG.maxAttempts = 2 (where
211
+ // you only ever have one prior URL to compare to).
212
+ // Previously the threshold was a flat >= 2 which
213
+ // silently never fired with default config.
214
+ //
215
+ // cdnCgiMatches — both URLs are cdn-cgi challenge URLs (different
216
+ // ray IDs). Weaker signal: a reload that yields a
217
+ // fresh challenge is normal retry behavior, not a
218
+ // loop. Keep the original >= 2 threshold so this
219
+ // only trips with custom cloudflare_max_retries set
220
+ // to 3+ (i.e. you've seen 2 fresh challenges and
221
+ // the 3rd is still a challenge -- genuinely stuck).
222
+ const urlIsCdnCgi = url.includes('/cdn-cgi/challenge-platform/');
223
+ let exactMatches = 0;
224
+ let cdnCgiMatches = 0;
225
+ for (const prevUrl of previousUrls) {
226
+ if (prevUrl === url) exactMatches++;
227
+ else if (urlIsCdnCgi && prevUrl.includes('/cdn-cgi/challenge-platform/')) cdnCgiMatches++;
228
+ }
209
229
 
210
- return similarUrls.length >= 2; // Loop detected if we've seen similar URLs 2+ times
230
+ return exactMatches >= 1 || cdnCgiMatches >= 2;
211
231
  }
212
232
 
213
233
  /**
@@ -217,6 +237,10 @@ function detectChallengeLoop(url, previousUrls = []) {
217
237
  class CloudflareDetectionCache {
218
238
  constructor(ttl = 300000) { // 5 minutes TTL by default
219
239
  this.cache = new Map();
240
+ // Outcomes live in a separate Map so the 1000-entry eviction on the
241
+ // detection cache doesn't randomly drop "this domain timed out" entries
242
+ // and re-permit expensive retries. Same TTL applies to both.
243
+ this.outcomes = new Map();
220
244
  this.ttl = ttl;
221
245
  this.hits = 0;
222
246
  this.misses = 0;
@@ -240,16 +264,16 @@ class CloudflareDetectionCache {
240
264
  get(url) {
241
265
  const key = this.getCacheKey(url);
242
266
  const cached = this.cache.get(key);
243
-
267
+
244
268
  if (cached && Date.now() - cached.timestamp < this.ttl) {
245
269
  this.hits++;
246
270
  return cached.data;
247
271
  }
248
-
272
+
249
273
  if (cached) {
250
274
  this.cache.delete(key); // Remove expired entry
251
275
  }
252
-
276
+
253
277
  this.misses++;
254
278
  return null;
255
279
  }
@@ -260,7 +284,7 @@ class CloudflareDetectionCache {
260
284
  data,
261
285
  timestamp: Date.now()
262
286
  });
263
-
287
+
264
288
  // Prevent cache from growing too large
265
289
  if (this.cache.size > 1000) {
266
290
  const firstKey = this.cache.keys().next().value;
@@ -268,6 +292,29 @@ class CloudflareDetectionCache {
268
292
  }
269
293
  }
270
294
 
295
+ /**
296
+ * Per-domain handling-outcome cache. Used to skip subsequent URLs on a
297
+ * domain that already timed out, without polluting the detection cache.
298
+ * Returns the cached outcome data or null (TTL-checked).
299
+ */
300
+ getOutcome(url) {
301
+ const key = this.getCacheKey(url);
302
+ const entry = this.outcomes.get(key);
303
+ if (entry && Date.now() - entry.timestamp < this.ttl) {
304
+ return entry.data;
305
+ }
306
+ if (entry) this.outcomes.delete(key);
307
+ return null;
308
+ }
309
+
310
+ setOutcome(url, data) {
311
+ const key = this.getCacheKey(url);
312
+ this.outcomes.set(key, { data, timestamp: Date.now() });
313
+ if (this.outcomes.size > 1000) {
314
+ this.outcomes.delete(this.outcomes.keys().next().value);
315
+ }
316
+ }
317
+
271
318
  cleanupExpired() {
272
319
  const now = Date.now();
273
320
  for (const [key, value] of this.cache.entries()) {
@@ -275,6 +322,11 @@ class CloudflareDetectionCache {
275
322
  this.cache.delete(key);
276
323
  }
277
324
  }
325
+ for (const [key, value] of this.outcomes.entries()) {
326
+ if (now - value.timestamp >= this.ttl) {
327
+ this.outcomes.delete(key);
328
+ }
329
+ }
278
330
  }
279
331
 
280
332
  destroy() {
@@ -284,6 +336,7 @@ class CloudflareDetectionCache {
284
336
 
285
337
  clear() {
286
338
  this.cache.clear();
339
+ this.outcomes.clear();
287
340
  this.hits = 0;
288
341
  this.misses = 0;
289
342
  }
@@ -294,7 +347,8 @@ class CloudflareDetectionCache {
294
347
  hits: this.hits,
295
348
  misses: this.misses,
296
349
  hitRate: total > 0 ? (this.hits / total * 100).toFixed(2) + '%' : '0%',
297
- size: this.cache.size
350
+ size: this.cache.size,
351
+ outcomes: this.outcomes.size
298
352
  };
299
353
  }
300
354
  }
@@ -307,17 +361,82 @@ const detectionCache = new CloudflareDetectionCache();
307
361
  // produces N=URL-count copies for no useful signal beyond the first.
308
362
  let _moduleVersionLogged = false;
309
363
 
364
+ // Per-scan aggregate stats. Updated on every handleCloudflareProtection
365
+ // completion regardless of debug mode so nwss.js can print an end-of-scan
366
+ // summary ("Of 200 URLs: 47 challenged, 31 solved via JS, 12 via Turnstile,
367
+ // 4 timed out") without needing to thread the per-URL results back into the
368
+ // orchestration layer. Reset via resetAggregateStats() or implicitly by
369
+ // cleanup().
370
+ const aggregateStats = {
371
+ total: 0,
372
+ byOutcome: Object.create(null), // 'ok' -> N, 'solved(turnstile)' -> N, etc.
373
+ bySolveMethod: Object.create(null), // Includes BOTH verification-challenge
374
+ // methods ('js_challenge_wait',
375
+ // 'turnstile', 'legacy_checkbox') and
376
+ // the phishing-bypass method
377
+ // ('phishing_continue').
378
+ totalDurationMs: 0,
379
+ maxDurationMs: 0, // Cheap to track; surfaces the
380
+ // worst-case URL when avg gets
381
+ // dominated by timeouts.
382
+ failures: 0, // !overallSuccess count
383
+ timedOut: 0 // adaptive-timeout count (subset of failures)
384
+ };
385
+
386
+ function bumpAggregate(outcome, result, durationMs) {
387
+ aggregateStats.total++;
388
+ aggregateStats.byOutcome[outcome] = (aggregateStats.byOutcome[outcome] || 0) + 1;
389
+ aggregateStats.totalDurationMs += durationMs;
390
+ if (durationMs > aggregateStats.maxDurationMs) aggregateStats.maxDurationMs = durationMs;
391
+ if (!result.overallSuccess) aggregateStats.failures++;
392
+ if (result.timedOut) aggregateStats.timedOut++;
393
+ // Method-of-resolution tracking. Mirrors buildOutcomeString's branch
394
+ // order: prefer the verification-challenge method, fall back to the
395
+ // phishing-continue path. A URL where both succeeded gets counted under
396
+ // the challenge method (matches `solved(turnstile)` etc. in byOutcome).
397
+ const vMethod = result.verificationChallenge && result.verificationChallenge.method;
398
+ if (vMethod) {
399
+ aggregateStats.bySolveMethod[vMethod] = (aggregateStats.bySolveMethod[vMethod] || 0) + 1;
400
+ } else if (result.phishingWarning && result.phishingWarning.attempted && result.phishingWarning.success) {
401
+ aggregateStats.bySolveMethod['phishing_continue'] = (aggregateStats.bySolveMethod['phishing_continue'] || 0) + 1;
402
+ }
403
+ }
404
+
310
405
  /**
311
- * Gets module version information
312
- * @returns {object} Version information object
406
+ * Returns a snapshot of per-scan aggregate stats. nwss.js can call this at
407
+ * scan end to print a summary. Pass {reset:true} to atomically read+reset
408
+ * so multi-scan processes don't accumulate across runs.
313
409
  */
314
- function getModuleInfo() {
315
- return {
316
- version: CLOUDFLARE_MODULE_VERSION,
317
- name: 'Cloudflare Protection Handler'
410
+ function getAggregateStats({ reset = false } = {}) {
411
+ const snap = {
412
+ total: aggregateStats.total,
413
+ failures: aggregateStats.failures,
414
+ timedOut: aggregateStats.timedOut,
415
+ byOutcome: { ...aggregateStats.byOutcome },
416
+ bySolveMethod: { ...aggregateStats.bySolveMethod },
417
+ avgDurationMs: aggregateStats.total > 0
418
+ ? Math.round(aggregateStats.totalDurationMs / aggregateStats.total)
419
+ : 0,
420
+ maxDurationMs: aggregateStats.maxDurationMs
318
421
  };
422
+ if (reset) resetAggregateStats();
423
+ return snap;
319
424
  }
320
425
 
426
+ function resetAggregateStats() {
427
+ aggregateStats.total = 0;
428
+ aggregateStats.failures = 0;
429
+ aggregateStats.timedOut = 0;
430
+ aggregateStats.byOutcome = Object.create(null);
431
+ aggregateStats.bySolveMethod = Object.create(null);
432
+ aggregateStats.totalDurationMs = 0;
433
+ aggregateStats.maxDurationMs = 0;
434
+ }
435
+
436
+ // Note: getModuleInfo() helper was removed -- had zero callers internal
437
+ // or external. CLOUDFLARE_MODULE_VERSION stays as it's read by the
438
+ // once-per-process version banner in handleCloudflareProtection.
439
+
321
440
  /**
322
441
  * Validates if a URL should be processed by Cloudflare protection
323
442
  * Only allows HTTP/HTTPS URLs, skips browser-internal and special protocols
@@ -334,21 +453,21 @@ const HTTP_PROTO_RE = /^https?:\/\//i;
334
453
 
335
454
  function shouldProcessUrl(url, forceDebug = false) {
336
455
  if (!url || typeof url !== 'string') {
337
- if (forceDebug) console.log(formatLogMessage('cloudflare', `[url-validation] Skipping invalid URL: ${url}`));
456
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} ${URL_VALIDATION_TAG} Skipping invalid URL: ${url}`));
338
457
  return false;
339
458
  }
340
459
 
341
460
  const skipMatch = url.match(SKIP_PROTO_RE);
342
461
  if (skipMatch) {
343
462
  if (forceDebug) {
344
- console.log(formatLogMessage('cloudflare', `[url-validation] Skipping ${skipMatch[0].toLowerCase()} URL: ${url.substring(0, 100)}${url.length > 100 ? '...' : ''}`));
463
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} ${URL_VALIDATION_TAG} Skipping ${skipMatch[0].toLowerCase()} URL: ${url.substring(0, 100)}${url.length > 100 ? '...' : ''}`));
345
464
  }
346
465
  return false;
347
466
  }
348
467
 
349
468
  if (!HTTP_PROTO_RE.test(url)) {
350
469
  if (forceDebug) {
351
- console.log(formatLogMessage('cloudflare', `[url-validation] Skipping non-HTTP(S) URL: ${url.substring(0, 100)}${url.length > 100 ? '...' : ''}`));
470
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} ${URL_VALIDATION_TAG} Skipping non-HTTP(S) URL: ${url.substring(0, 100)}${url.length > 100 ? '...' : ''}`));
352
471
  }
353
472
  return false;
354
473
  }
@@ -357,13 +476,14 @@ function shouldProcessUrl(url, forceDebug = false) {
357
476
  }
358
477
 
359
478
  /**
360
- * Fast timeout helper for Puppeteer 22.x compatibility
361
- * Replaces deprecated page.waitForTimeout() with standard Promise-based approach
479
+ * Fast timeout helper for Puppeteer 22.x compatibility. Replaces deprecated
480
+ * page.waitForTimeout() with a standard Promise-based delay. The `page` arg
481
+ * used to be required for the deprecated API; it's been dropped now that
482
+ * every call site is just sleeping. Renamed from waitForTimeout to fastTimeout
483
+ * to match the CLAUDE.md convention used across the codebase.
362
484
  */
363
- async function waitForTimeout(page, timeout) {
364
- // Use fast Promise-based timeout for Puppeteer 22.x compatibility
365
- // This eliminates the deprecated API dependency and improves performance
366
- return new Promise(resolve => setTimeout(resolve, timeout));
485
+ function fastTimeout(ms) {
486
+ return new Promise(resolve => setTimeout(resolve, ms));
367
487
  }
368
488
 
369
489
  /**
@@ -493,7 +613,7 @@ async function safePageEvaluate(page, func, timeout = TIMEOUTS.PAGE_EVALUATION_S
493
613
  }
494
614
 
495
615
  if (forceDebug && attempt > 1) {
496
- console.log(formatLogMessage('cloudflare', `Page evaluation succeeded on attempt ${attempt}`));
616
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Page evaluation succeeded on attempt ${attempt}`));
497
617
  }
498
618
 
499
619
  return result;
@@ -507,13 +627,13 @@ async function safePageEvaluate(page, func, timeout = TIMEOUTS.PAGE_EVALUATION_S
507
627
  const errorType = categorizeError(error);
508
628
 
509
629
  if (forceDebug) {
510
- console.warn(formatLogMessage('cloudflare', `Page evaluation failed (attempt ${attempt}/${maxRetries}): ${error.message} [${errorType}]`));
630
+ console.warn(formatLogMessage('warn', `${CLOUDFLARE_TAG} Page evaluation failed (attempt ${attempt}/${maxRetries}): ${error.message} [${errorType}]`));
511
631
  }
512
632
 
513
633
  // Handle detached frame errors specifically
514
634
  if (errorType === ERROR_TYPES.DETACHED_FRAME) {
515
635
  if (forceDebug) {
516
- console.warn(formatLogMessage('cloudflare', `Detached frame detected on attempt ${attempt}/${maxRetries} - using longer delay`));
636
+ console.warn(formatLogMessage('warn', `${CLOUDFLARE_TAG} Detached frame detected on attempt ${attempt}/${maxRetries} - using longer delay`));
517
637
  }
518
638
  // For detached frames, brief delay before retry
519
639
  await new Promise(resolve => setTimeout(resolve, 1000));
@@ -569,9 +689,12 @@ async function safeClick(page, selector, timeout = TIMEOUTS.CLICK_TIMEOUT) {
569
689
  }
570
690
 
571
691
  /**
572
- * Safe navigation waiting with timeout protection
692
+ * Safe navigation waiting with timeout protection. The warn on timeout is
693
+ * forceDebug-gated to match the convention of the other warn sites in this
694
+ * file -- previously it fired unconditionally, which spammed stderr on every
695
+ * phishing-bypass click that didn't trigger a clean redirect.
573
696
  */
574
- async function safeWaitForNavigation(page, timeout = TIMEOUTS.NAVIGATION_TIMEOUT) {
697
+ async function safeWaitForNavigation(page, timeout = TIMEOUTS.NAVIGATION_TIMEOUT, forceDebug = false) {
575
698
  let timeoutId;
576
699
  try {
577
700
  return await Promise.race([
@@ -581,7 +704,7 @@ async function safeWaitForNavigation(page, timeout = TIMEOUTS.NAVIGATION_TIMEOUT
581
704
  })
582
705
  ]);
583
706
  } catch (error) {
584
- console.warn(formatLogMessage('cloudflare', `Navigation wait failed: ${error.message}`));
707
+ if (forceDebug) console.warn(formatLogMessage('warn', `${CLOUDFLARE_TAG} Navigation wait failed: ${error.message}`));
585
708
  } finally {
586
709
  if (timeoutId) clearTimeout(timeoutId);
587
710
  }
@@ -597,7 +720,7 @@ async function quickCloudflareDetection(page, forceDebug = false) {
597
720
 
598
721
  if (!shouldProcessUrl(currentPageUrl, forceDebug)) {
599
722
  if (forceDebug) {
600
- console.log(formatLogMessage('cloudflare', `Quick detection skipping non-HTTP(S) page: ${currentPageUrl}`));
723
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Quick detection skipping non-HTTP(S) page: ${currentPageUrl}`));
601
724
  }
602
725
  return { hasIndicators: false, skippedInvalidUrl: true };
603
726
  }
@@ -607,7 +730,7 @@ async function quickCloudflareDetection(page, forceDebug = false) {
607
730
  if (cachedResult !== null) {
608
731
  if (forceDebug) {
609
732
  const stats = detectionCache.getStats();
610
- console.log(formatLogMessage('cloudflare', `Using cached detection result (cache hit rate: ${stats.hitRate})`));
733
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Using cached detection result (cache hit rate: ${stats.hitRate})`));
611
734
  }
612
735
  // Return a fresh shallow copy tagged _fromCache so the handler's
613
736
  // logging can say "[cached]" instead of presenting cached title/body
@@ -699,7 +822,7 @@ async function quickCloudflareDetection(page, forceDebug = false) {
699
822
 
700
823
  if (forceDebug) {
701
824
  if (quickCheck.hasIndicators) {
702
- console.log(formatLogMessage('cloudflare', `Quick detection found Cloudflare indicators on ${quickCheck.url}`));
825
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Quick detection found Cloudflare indicators on ${quickCheck.url}`));
703
826
  }
704
827
  // hasErrorPage and no-indicators cases are deliberately silent here —
705
828
  // handleCloudflareProtection prints a clearer per-action line right
@@ -708,13 +831,13 @@ async function quickCloudflareDetection(page, forceDebug = false) {
708
831
  // here would just duplicate it.
709
832
 
710
833
  if (quickCheck.attempts && quickCheck.attempts > 1) {
711
- console.log(formatLogMessage('cloudflare', `Detection required ${quickCheck.attempts} attempts`));
834
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Detection required ${quickCheck.attempts} attempts`));
712
835
  }
713
836
  }
714
837
 
715
838
  return quickCheck;
716
839
  } catch (error) {
717
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Quick detection failed: ${error.message}`));
840
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Quick detection failed: ${error.message}`));
718
841
  return { hasIndicators: false, error: error.message };
719
842
  }
720
843
  }
@@ -760,37 +883,48 @@ async function analyzeCloudflareChallenge(page) {
760
883
  );
761
884
 
762
885
  const hasDataRay = !!document.querySelector('[data-ray], [data-cf-challenge]');
763
-
886
+
887
+ // Managed challenges (cf-managed). parallelChallengeDetection and the
888
+ // quick-detection slow path both look for these, but the main analyzer
889
+ // used to ignore them — a managed-challenge-only page would then slip
890
+ // past isChallengePresent. Now folded in below.
891
+ const hasManagedChallenge = !!document.querySelector(
892
+ '.cf-managed-challenge, [data-cf-managed]'
893
+ );
894
+
764
895
  const hasCaptcha = bodyText.includes('CAPTCHA') || bodyText.includes('captcha') ||
765
896
  bodyText.includes('hCaptcha') || bodyText.includes('reCAPTCHA');
766
-
897
+
767
898
  const hasJSChallenge = document.querySelector('script[src*="/cdn-cgi/challenge-platform/"]') !== null ||
768
899
  bodyText.includes('Checking your browser') ||
769
900
  bodyText.includes('Please wait while we verify');
770
-
901
+
771
902
  const hasPhishingWarning = bodyText.includes('This website has been reported for potential phishing') ||
772
903
  title.includes('Attention Required');
773
-
774
- const hasTurnstileResponse = document.querySelector('input[name="cf-turnstile-response"]') !== null;
775
-
776
- const isChallengeCompleted = hasTurnstileResponse &&
777
- document.querySelector('input[name="cf-turnstile-response"]')?.value;
778
-
904
+
905
+ // Cache the element once -- isChallengeCompleted used to re-query the
906
+ // same selector after hasTurnstileResponse had already located it.
907
+ const turnstileInput = document.querySelector('input[name="cf-turnstile-response"]');
908
+ const hasTurnstileResponse = turnstileInput !== null;
909
+ const isChallengeCompleted = hasTurnstileResponse && !!turnstileInput.value;
910
+
779
911
  const isChallengePresent = title.includes('Just a moment') ||
780
912
  title.includes('Checking your browser') ||
781
913
  bodyText.includes('Verify you are human') ||
782
- hasLegacyCheckbox ||
783
- hasChallengeRunning ||
914
+ hasLegacyCheckbox ||
915
+ hasChallengeRunning ||
784
916
  hasDataRay ||
785
917
  hasTurnstileIframe ||
786
918
  hasTurnstileContainer ||
787
- hasJSChallenge;
788
-
919
+ hasJSChallenge ||
920
+ hasManagedChallenge;
921
+
789
922
  return {
790
923
  isChallengePresent,
791
924
  isPhishingWarning: hasPhishingWarning,
792
925
  isTurnstile: hasTurnstileIframe || hasTurnstileContainer || hasTurnstileCheckbox,
793
926
  isJSChallenge: hasJSChallenge,
927
+ hasManagedChallenge,
794
928
  isChallengeCompleted,
795
929
  title,
796
930
  hasLegacyCheckbox,
@@ -849,10 +983,10 @@ async function handlePhishingWarning(page, currentUrl, forceDebug = false) {
849
983
  };
850
984
 
851
985
  try {
852
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Checking for phishing warning on ${currentUrl}`));
986
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Checking for phishing warning on ${currentUrl}`));
853
987
 
854
988
  // Shorter wait with timeout protection
855
- await waitForTimeout(page, FAST_TIMEOUTS.PHISHING_WAIT);
989
+ await fastTimeout(FAST_TIMEOUTS.PHISHING_WAIT);
856
990
 
857
991
  const challengeInfo = await analyzeCloudflareChallenge(page);
858
992
 
@@ -861,30 +995,29 @@ async function handlePhishingWarning(page, currentUrl, forceDebug = false) {
861
995
  result.details = challengeInfo;
862
996
 
863
997
  if (forceDebug) {
864
- console.log(formatLogMessage('cloudflare', `Phishing warning detected on ${currentUrl}:`));
865
- console.log(formatLogMessage('cloudflare', ` Page Title: "${challengeInfo.title}"`));
866
- console.log(formatLogMessage('cloudflare', ` Current URL: ${challengeInfo.url}`));
867
- console.log(formatLogMessage('cloudflare', ` Body snippet: ${challengeInfo.bodySnippet}`));
998
+ // One structured line; matches the collapsed Challenge-detected log.
999
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Phishing warning detected on ${currentUrl}: title="${challengeInfo.title}" url=${challengeInfo.url}`));
1000
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Body snippet: ${challengeInfo.bodySnippet}`));
868
1001
  }
869
1002
 
870
1003
  try {
871
1004
  // Use safe click with shorter timeout
872
1005
  await safeClick(page, 'a[href*="continue"]', TIMEOUTS.PHISHING_CLICK);
873
- await safeWaitForNavigation(page, TIMEOUTS.PHISHING_NAVIGATION);
874
-
1006
+ await safeWaitForNavigation(page, TIMEOUTS.PHISHING_NAVIGATION, forceDebug);
1007
+
875
1008
  result.success = true;
876
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Successfully bypassed phishing warning for ${currentUrl}`));
1009
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Successfully bypassed phishing warning for ${currentUrl}`));
877
1010
  } catch (clickError) {
878
1011
  result.error = `Failed to click continue button: ${clickError.message}`;
879
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Failed to bypass phishing warning: ${clickError.message}`));
1012
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Failed to bypass phishing warning: ${clickError.message}`));
880
1013
  }
881
1014
  } else {
882
- if (forceDebug) console.log(formatLogMessage('cloudflare', `No phishing warning detected on ${currentUrl}`));
1015
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} No phishing warning detected on ${currentUrl}`));
883
1016
  result.success = true; // No warning to handle
884
1017
  }
885
1018
  } catch (error) {
886
1019
  result.error = error.message;
887
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Phishing warning check failed for ${currentUrl}: ${error.message}`));
1020
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Phishing warning check failed for ${currentUrl}: ${error.message}`));
888
1021
  }
889
1022
 
890
1023
  return result;
@@ -917,10 +1050,10 @@ async function handleVerificationChallenge(page, currentUrl, forceDebug = false)
917
1050
  };
918
1051
 
919
1052
  try {
920
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Checking for verification challenge on ${currentUrl}`));
1053
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Checking for verification challenge on ${currentUrl}`));
921
1054
 
922
1055
  // Reduced wait time
923
- await waitForTimeout(page, FAST_TIMEOUTS.CHALLENGE_WAIT);
1056
+ await fastTimeout(FAST_TIMEOUTS.CHALLENGE_WAIT);
924
1057
 
925
1058
  const challengeInfo = await analyzeCloudflareChallenge(page);
926
1059
  result.details = challengeInfo;
@@ -929,27 +1062,21 @@ async function handleVerificationChallenge(page, currentUrl, forceDebug = false)
929
1062
  result.attempted = true;
930
1063
 
931
1064
  if (forceDebug) {
932
- console.log(formatLogMessage('cloudflare', `Challenge detected on ${currentUrl}:`));
933
- console.log(formatLogMessage('cloudflare', ` Page Title: "${challengeInfo.title}"`));
934
- console.log(formatLogMessage('cloudflare', ` Current URL: ${challengeInfo.url}`));
935
- console.log(formatLogMessage('cloudflare', ` Is Turnstile: ${challengeInfo.isTurnstile}`));
936
- console.log(formatLogMessage('cloudflare', ` Is JS Challenge: ${challengeInfo.isJSChallenge}`));
937
- console.log(formatLogMessage('cloudflare', ` Has Legacy Checkbox: ${challengeInfo.hasLegacyCheckbox}`));
938
- console.log(formatLogMessage('cloudflare', ` Has Turnstile Iframe: ${challengeInfo.hasTurnstileIframe}`));
939
- console.log(formatLogMessage('cloudflare', ` Has Turnstile Container: ${challengeInfo.hasTurnstileContainer}`));
940
- console.log(formatLogMessage('cloudflare', ` Has Turnstile Checkbox: ${challengeInfo.hasTurnstileCheckbox}`));
941
- console.log(formatLogMessage('cloudflare', ` Has CAPTCHA: ${challengeInfo.hasCaptcha}`));
942
- console.log(formatLogMessage('cloudflare', ` Has Challenge Running: ${challengeInfo.hasChallengeRunning}`));
943
- console.log(formatLogMessage('cloudflare', ` Has Data Ray: ${challengeInfo.hasDataRay}`));
944
- console.log(formatLogMessage('cloudflare', ` Has Turnstile Response: ${challengeInfo.hasTurnstileResponse}`));
945
- console.log(formatLogMessage('cloudflare', ` Body snippet: ${challengeInfo.bodySnippet}`));
1065
+ // One structured line instead of 14 separate log calls. Flags use
1066
+ // single-letter shorthand (t/f) to keep the line scannable; full
1067
+ // bodySnippet stays on its own line because it's the only field
1068
+ // that's worth more than a column-width of attention.
1069
+ const f = (v) => v ? 't' : 'f';
1070
+ const ci = challengeInfo;
1071
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Challenge detected on ${currentUrl}: title="${ci.title}" url=${ci.url} turnstile=${f(ci.isTurnstile)} js=${f(ci.isJSChallenge)} legacy=${f(ci.hasLegacyCheckbox)} iframe=${f(ci.hasTurnstileIframe)} container=${f(ci.hasTurnstileContainer)} checkbox=${f(ci.hasTurnstileCheckbox)} captcha=${f(ci.hasCaptcha)} running=${f(ci.hasChallengeRunning)} dataRay=${f(ci.hasDataRay)} tsResponse=${f(ci.hasTurnstileResponse)}`));
1072
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Body snippet: ${ci.bodySnippet}`));
946
1073
  }
947
1074
 
948
1075
  // Check for CAPTCHA that requires human intervention
949
1076
  if (challengeInfo.hasCaptcha) {
950
1077
  result.requiresHuman = true;
951
1078
  result.error = 'CAPTCHA detected - requires human intervention';
952
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Skipping automatic bypass due to CAPTCHA requirement`));
1079
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Skipping automatic bypass due to CAPTCHA requirement`));
953
1080
  return result;
954
1081
  }
955
1082
 
@@ -960,12 +1087,12 @@ async function handleVerificationChallenge(page, currentUrl, forceDebug = false)
960
1087
  result.method = solveResult.method;
961
1088
 
962
1089
  } else {
963
- if (forceDebug) console.log(formatLogMessage('cloudflare', `No verification challenge detected on ${currentUrl}`));
1090
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} No verification challenge detected on ${currentUrl}`));
964
1091
  result.success = true;
965
1092
  }
966
1093
  } catch (error) {
967
1094
  result.error = error.message;
968
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Challenge check failed for ${currentUrl}: ${error.message}`));
1095
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Challenge check failed for ${currentUrl}: ${error.message}`));
969
1096
  }
970
1097
 
971
1098
  return result;
@@ -974,179 +1101,146 @@ async function handleVerificationChallenge(page, currentUrl, forceDebug = false)
974
1101
  /**
975
1102
  * Enhanced challenge handling with retry logic and loop detection
976
1103
  */
977
- async function handleVerificationChallengeWithRetries(page, currentUrl, siteConfig, forceDebug = false) {
978
- const retryConfig = getRetryConfig(siteConfig);
979
- const visitedUrls = []; // Track URLs to detect redirect loops
1104
+ /**
1105
+ * Generic retry harness shared by the verification-challenge and
1106
+ * phishing-warning paths (was ~150 lines of duplicated try/catch/backoff
1107
+ * before extraction). Resolves with the inner result + bookkeeping fields
1108
+ * (attempts, optional maxRetriesExceeded, optional errorType). Never
1109
+ * rejects — the inner attemptFn's exceptions are categorized and either
1110
+ * retried or bundled into a failure-result return.
1111
+ *
1112
+ * @param {object} cfg
1113
+ * @param {string} cfg.label - Human label for logs ("Challenge" / "Phishing warning")
1114
+ * @param {object} cfg.retryConfig - From getRetryConfig(siteConfig)
1115
+ * @param {boolean} cfg.forceDebug
1116
+ * @param {(attempt:number) => Promise<object>} cfg.attemptFn
1117
+ * @param {object} [cfg.failureShape] - Extra fields merged into the
1118
+ * error/exhaustion return objects (e.g. {requiresHuman:false,method:null}
1119
+ * for the challenge path so its callers always see those keys).
1120
+ * @param {(attempt:number) => Promise<object|null>} [cfg.preIteration]
1121
+ * Optional hook fired before each attempt. Return a result object to
1122
+ * short-circuit the harness (e.g. challenge loop-detected); return null
1123
+ * to proceed with the attempt.
1124
+ * @param {(attempt:number) => Promise<void>} [cfg.betweenAttempts]
1125
+ * Optional hook fired after a failed attempt but before the next one
1126
+ * (e.g. page.reload() between challenge retries).
1127
+ */
1128
+ async function runWithRetries(cfg) {
1129
+ const { label, retryConfig, forceDebug, attemptFn,
1130
+ failureShape = {}, preIteration, betweenAttempts } = cfg;
980
1131
  let lastError = null;
981
-
982
- if (forceDebug) {
983
- console.log(formatLogMessage('cloudflare', `Starting verification challenge with max ${retryConfig.maxAttempts} attempts`));
984
- }
985
-
1132
+
986
1133
  for (let attempt = 1; attempt <= retryConfig.maxAttempts; attempt++) {
987
1134
  try {
988
- const currentPageUrl = await page.url();
989
- visitedUrls.push(currentPageUrl);
990
-
991
- // Check for redirect loops
992
- if (detectChallengeLoop(currentPageUrl, visitedUrls)) {
993
- const error = `Challenge redirect loop detected after ${attempt} attempts. URLs: ${visitedUrls.slice(-3).join(' -> ')}`;
994
- if (forceDebug) {
995
- console.log(formatLogMessage('cloudflare', error));
996
- }
997
- return {
998
- success: false,
999
- attempted: true,
1000
- error: error,
1001
- details: null,
1002
- requiresHuman: false,
1003
- method: null,
1004
- attempts: attempt,
1005
- loopDetected: true
1006
- };
1135
+ if (preIteration) {
1136
+ const earlyReturn = await preIteration(attempt);
1137
+ if (earlyReturn) return earlyReturn;
1007
1138
  }
1008
-
1139
+
1009
1140
  if (forceDebug && attempt > 1) {
1010
- console.log(formatLogMessage('cloudflare', `Challenge attempt ${attempt}/${retryConfig.maxAttempts} for ${currentUrl}`));
1141
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} ${label} attempt ${attempt}/${retryConfig.maxAttempts}`));
1011
1142
  }
1012
-
1013
- const result = await handleVerificationChallenge(page, currentUrl, forceDebug);
1014
-
1143
+
1144
+ const result = await attemptFn(attempt);
1145
+
1015
1146
  if (result.success || result.requiresHuman || !retryConfig.retryOnError) {
1016
1147
  if (forceDebug && attempt > 1) {
1017
- console.log(`[debug][cloudflare] Challenge ${result.success ? 'succeeded' : 'failed'} on attempt ${attempt}`);
1148
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} ${label} ${result.success ? 'succeeded' : 'failed'} on attempt ${attempt}`));
1018
1149
  }
1019
1150
  return { ...result, attempts: attempt };
1020
1151
  }
1021
-
1022
- // If this wasn't the last attempt, wait before retrying
1152
+
1023
1153
  if (attempt < retryConfig.maxAttempts) {
1024
1154
  const delay = getRetryDelay(attempt);
1025
1155
  if (forceDebug) {
1026
- console.log(formatLogMessage('cloudflare', `Challenge attempt ${attempt} failed, retrying in ${delay}ms: ${result.error}`));
1156
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} ${label} attempt ${attempt} failed, retrying in ${delay}ms: ${result.error}`));
1027
1157
  }
1028
1158
  await new Promise(resolve => setTimeout(resolve, delay));
1029
-
1030
- // Refresh the page to get a fresh challenge
1031
- try {
1032
- await page.reload({ waitUntil: 'domcontentloaded', timeout: 10000 });
1033
- await waitForTimeout(page, 2000); // Give challenge time to load
1034
- } catch (reloadErr) {
1035
- if (forceDebug) {
1036
- console.log(formatLogMessage('cloudflare', `Page reload failed on attempt ${attempt}: ${reloadErr.message}`));
1037
- }
1038
- }
1159
+ if (betweenAttempts) await betweenAttempts(attempt);
1039
1160
  }
1040
-
1041
1161
  lastError = result.error;
1042
1162
  } catch (error) {
1043
1163
  lastError = error.message;
1044
1164
  const errorType = categorizeError(error);
1045
-
1165
+
1046
1166
  if (forceDebug) {
1047
- console.warn(formatLogMessage('cloudflare', `Challenge attempt ${attempt}/${retryConfig.maxAttempts} failed: ${error.message} [${errorType}]`));
1167
+ console.warn(formatLogMessage('warn', `${CLOUDFLARE_TAG} ${label} attempt ${attempt}/${retryConfig.maxAttempts} failed: ${error.message} [${errorType}]`));
1048
1168
  }
1049
-
1050
- // Don't retry if error type is not retryable or if it's the last attempt
1169
+
1051
1170
  if (!retryConfig.retryableErrors.includes(errorType) || attempt === retryConfig.maxAttempts) {
1052
1171
  return {
1053
- success: false,
1054
- attempted: true,
1055
- error: lastError,
1056
- details: null,
1057
- requiresHuman: false,
1058
- method: null,
1059
- attempts: attempt,
1060
- errorType: errorType
1172
+ success: false, attempted: true, error: lastError, details: null,
1173
+ attempts: attempt, errorType, ...failureShape
1061
1174
  };
1062
1175
  }
1063
-
1064
- // Wait before retrying with exponential backoff
1065
1176
  if (attempt < retryConfig.maxAttempts) {
1066
1177
  await new Promise(resolve => setTimeout(resolve, getRetryDelay(attempt)));
1067
1178
  }
1068
1179
  }
1069
1180
  }
1070
-
1181
+
1071
1182
  return {
1072
- success: false,
1073
- attempted: true,
1074
- error: `All ${retryConfig.maxAttempts} challenge attempts failed. Last error: ${lastError}`,
1075
- details: null,
1076
- requiresHuman: false,
1077
- method: null,
1078
- attempts: retryConfig.maxAttempts,
1079
- maxRetriesExceeded: true
1183
+ success: false, attempted: true,
1184
+ error: `All ${retryConfig.maxAttempts} ${label.toLowerCase()} attempts failed. Last error: ${lastError}`,
1185
+ details: null, attempts: retryConfig.maxAttempts, maxRetriesExceeded: true,
1186
+ ...failureShape
1080
1187
  };
1081
1188
  }
1082
1189
 
1083
- /**
1084
- * Enhanced phishing warning handling with retry logic
1085
- */
1086
- async function handlePhishingWarningWithRetries(page, currentUrl, siteConfig, forceDebug = false) {
1190
+ async function handleVerificationChallengeWithRetries(page, currentUrl, siteConfig, forceDebug = false) {
1087
1191
  const retryConfig = getRetryConfig(siteConfig);
1088
- let lastError = null;
1089
-
1090
- for (let attempt = 1; attempt <= retryConfig.maxAttempts; attempt++) {
1091
- try {
1092
- if (forceDebug && attempt > 1) {
1093
- console.log(formatLogMessage('cloudflare', `Phishing warning attempt ${attempt}/${retryConfig.maxAttempts} for ${currentUrl}`));
1094
- }
1095
-
1096
- const result = await handlePhishingWarning(page, currentUrl, forceDebug);
1097
-
1098
- if (result.success || !retryConfig.retryOnError) {
1099
- if (forceDebug && attempt > 1) {
1100
- console.log(`[debug][cloudflare] Phishing warning ${result.success ? 'succeeded' : 'failed'} on attempt ${attempt}`);
1101
- }
1102
- return { ...result, attempts: attempt };
1103
- }
1104
-
1105
- // If this wasn't the last attempt, wait before retrying
1106
- if (attempt < retryConfig.maxAttempts) {
1107
- const delay = getRetryDelay(attempt);
1108
- if (forceDebug) {
1109
- console.log(formatLogMessage('cloudflare', `Phishing warning attempt ${attempt} failed, retrying in ${delay}ms: ${result.error}`));
1110
- }
1111
- await new Promise(resolve => setTimeout(resolve, delay));
1112
- }
1113
-
1114
- lastError = result.error;
1115
- } catch (error) {
1116
- lastError = error.message;
1117
- const errorType = categorizeError(error);
1118
-
1119
- if (forceDebug) {
1120
- console.warn(formatLogMessage('cloudflare', `Phishing warning attempt ${attempt}/${retryConfig.maxAttempts} failed: ${error.message} [${errorType}]`));
1121
- }
1122
-
1123
- // Don't retry if error type is not retryable or if it's the last attempt
1124
- if (!retryConfig.retryableErrors.includes(errorType) || attempt === retryConfig.maxAttempts) {
1192
+ const visitedUrls = []; // Track URLs to detect redirect loops
1193
+
1194
+ if (forceDebug) {
1195
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Starting verification challenge with max ${retryConfig.maxAttempts} attempts`));
1196
+ }
1197
+
1198
+ return runWithRetries({
1199
+ label: 'Challenge',
1200
+ retryConfig,
1201
+ forceDebug,
1202
+ failureShape: { requiresHuman: false, method: null },
1203
+ preIteration: async (attempt) => {
1204
+ const currentPageUrl = await page.url();
1205
+ // Loop check BEFORE push — see detectChallengeLoop notes; the prior
1206
+ // ordering counted the just-pushed URL against itself.
1207
+ if (detectChallengeLoop(currentPageUrl, visitedUrls)) {
1208
+ const error = `Challenge redirect loop detected after ${attempt} attempts. URLs: ${visitedUrls.slice(-3).join(' -> ')}`;
1209
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} ${error}`));
1125
1210
  return {
1126
- success: false,
1127
- attempted: true,
1128
- error: lastError,
1129
- details: null,
1130
- attempts: attempt,
1131
- errorType: errorType
1211
+ success: false, attempted: true, error, details: null,
1212
+ requiresHuman: false, method: null, attempts: attempt, loopDetected: true
1132
1213
  };
1133
1214
  }
1134
-
1135
- // Wait before retrying with exponential backoff
1136
- if (attempt < retryConfig.maxAttempts) {
1137
- await new Promise(resolve => setTimeout(resolve, getRetryDelay(attempt)));
1215
+ visitedUrls.push(currentPageUrl);
1216
+ return null;
1217
+ },
1218
+ betweenAttempts: async (attempt) => {
1219
+ // Refresh the page to get a fresh challenge between retries.
1220
+ try {
1221
+ await page.reload({ waitUntil: 'domcontentloaded', timeout: 10000 });
1222
+ await fastTimeout(2000);
1223
+ } catch (reloadErr) {
1224
+ if (forceDebug) {
1225
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Page reload failed on attempt ${attempt}: ${reloadErr.message}`));
1226
+ }
1138
1227
  }
1139
- }
1140
- }
1141
-
1142
- return {
1143
- success: false,
1144
- attempted: true,
1145
- error: `All ${retryConfig.maxAttempts} phishing warning attempts failed. Last error: ${lastError}`,
1146
- details: null,
1147
- attempts: retryConfig.maxAttempts,
1148
- maxRetriesExceeded: true
1149
- };
1228
+ },
1229
+ attemptFn: () => handleVerificationChallenge(page, currentUrl, forceDebug)
1230
+ });
1231
+ }
1232
+
1233
+ /**
1234
+ * Enhanced phishing warning handling with retry logic
1235
+ */
1236
+ async function handlePhishingWarningWithRetries(page, currentUrl, siteConfig, forceDebug = false) {
1237
+ const retryConfig = getRetryConfig(siteConfig);
1238
+ return runWithRetries({
1239
+ label: 'Phishing warning',
1240
+ retryConfig,
1241
+ forceDebug,
1242
+ attemptFn: () => handlePhishingWarning(page, currentUrl, forceDebug)
1243
+ });
1150
1244
  }
1151
1245
 
1152
1246
 
@@ -1183,7 +1277,7 @@ async function attemptChallengeSolveWithTimeout(page, currentUrl, challengeInfo,
1183
1277
  clearTimeout(timeoutId);
1184
1278
  }
1185
1279
  result.error = `Challenge solving timed out: ${error.message}`;
1186
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Challenge solving timeout for ${currentUrl}`));
1280
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Challenge solving timeout for ${currentUrl}`));
1187
1281
  return result;
1188
1282
  }
1189
1283
  }
@@ -1201,65 +1295,65 @@ async function attemptChallengeSolve(page, currentUrl, challengeInfo, forceDebug
1201
1295
  // Method 1: Handle JS challenges (wait for automatic completion) - Most reliable
1202
1296
  if (challengeInfo.isJSChallenge) {
1203
1297
  try {
1204
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Attempting JS challenge wait for ${currentUrl}`));
1298
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Attempting JS challenge wait for ${currentUrl}`));
1205
1299
 
1206
1300
  const jsResult = await waitForJSChallengeCompletion(page, forceDebug);
1207
1301
  if (jsResult.success) {
1208
1302
  // Wait for redirect after challenge completion
1209
1303
  try {
1210
1304
  await page.waitForNavigation({ waitUntil: 'domcontentloaded', timeout: 10000 });
1211
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Post-challenge redirect completed for ${currentUrl}`));
1305
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Post-challenge redirect completed for ${currentUrl}`));
1212
1306
  } catch (navErr) {
1213
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Post-challenge redirect timeout (may already be on target page): ${navErr.message}`));
1307
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Post-challenge redirect timeout (may already be on target page): ${navErr.message}`));
1214
1308
  }
1215
1309
  result.success = true;
1216
1310
  result.method = 'js_challenge_wait';
1217
- if (forceDebug) console.log(formatLogMessage('cloudflare', `JS challenge completed successfully for ${currentUrl}`));
1311
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} JS challenge completed successfully for ${currentUrl}`));
1218
1312
  return result;
1219
1313
  }
1220
1314
  } catch (jsError) {
1221
- if (forceDebug) console.log(formatLogMessage('cloudflare', `JS challenge wait failed for ${currentUrl}: ${jsError.message}`));
1315
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} JS challenge wait failed for ${currentUrl}: ${jsError.message}`));
1222
1316
  }
1223
1317
  } else if (forceDebug) {
1224
- console.log(formatLogMessage('cloudflare', `Skipping JS challenge method (not detected)`));
1318
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Skipping JS challenge method (not detected)`));
1225
1319
  }
1226
1320
 
1227
1321
  // Method 2: Handle Turnstile challenges (interactive)
1228
1322
  if (challengeInfo.isTurnstile) {
1229
1323
  try {
1230
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Attempting Turnstile method for ${currentUrl}`));
1324
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Attempting Turnstile method for ${currentUrl}`));
1231
1325
 
1232
1326
  const turnstileResult = await handleTurnstileChallenge(page, forceDebug);
1233
1327
  if (turnstileResult.success) {
1234
1328
  result.success = true;
1235
1329
  result.method = 'turnstile';
1236
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Turnstile challenge solved successfully for ${currentUrl}`));
1330
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Turnstile challenge solved successfully for ${currentUrl}`));
1237
1331
  return result;
1238
1332
  }
1239
1333
  } catch (turnstileError) {
1240
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Turnstile method failed for ${currentUrl}: ${turnstileError.message}`));
1334
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Turnstile method failed for ${currentUrl}: ${turnstileError.message}`));
1241
1335
  }
1242
1336
  } else if (forceDebug) {
1243
- console.log(formatLogMessage('cloudflare', `Skipping Turnstile method (not detected)`));
1337
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Skipping Turnstile method (not detected)`));
1244
1338
  }
1245
1339
 
1246
1340
  // Method 3: Legacy checkbox interaction (fallback)
1247
1341
  if (challengeInfo.hasLegacyCheckbox) {
1248
1342
  try {
1249
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Attempting legacy checkbox method for ${currentUrl}`));
1343
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Attempting legacy checkbox method for ${currentUrl}`));
1250
1344
 
1251
1345
  const legacyResult = await handleLegacyCheckbox(page, forceDebug);
1252
1346
  if (legacyResult.success) {
1253
1347
  result.success = true;
1254
1348
  result.method = 'legacy_checkbox';
1255
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Legacy checkbox method succeeded for ${currentUrl}`));
1349
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Legacy checkbox method succeeded for ${currentUrl}`));
1256
1350
  return result;
1257
1351
  }
1258
1352
  } catch (legacyError) {
1259
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Legacy checkbox method failed for ${currentUrl}: ${legacyError.message}`));
1353
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Legacy checkbox method failed for ${currentUrl}: ${legacyError.message}`));
1260
1354
  }
1261
1355
  } else if (forceDebug) {
1262
- console.log(formatLogMessage('cloudflare', `Skipping legacy checkbox method (not detected)`));
1356
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Skipping legacy checkbox method (not detected)`));
1263
1357
  }
1264
1358
 
1265
1359
  if (!result.success) {
@@ -1271,8 +1365,8 @@ async function attemptChallengeSolve(page, currentUrl, challengeInfo, forceDebug
1271
1365
  url: window.location.href,
1272
1366
  body: (document.body?.textContent || '').substring(0, 300)
1273
1367
  }));
1274
- console.log(formatLogMessage('cloudflare', `Post-attempt page state: title="${postState.title}" url=${postState.url}`));
1275
- console.log(formatLogMessage('cloudflare', `Post-attempt body: ${postState.body}`));
1368
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Post-attempt page state: title="${postState.title}" url=${postState.url}`));
1369
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Post-attempt body: ${postState.body}`));
1276
1370
  } catch (_) {}
1277
1371
  }
1278
1372
  }
@@ -1290,22 +1384,27 @@ async function handleEmbeddedIframeChallenge(page, forceDebug = false) {
1290
1384
  };
1291
1385
 
1292
1386
  try {
1293
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Checking for embedded iframe challenges`));
1387
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Checking for embedded iframe challenges`));
1294
1388
 
1295
1389
  // Use CDP-level frame detection -- bypasses closed shadow roots
1296
1390
  const frames = page.frames();
1297
1391
  if (forceDebug) {
1298
- console.log(formatLogMessage('cloudflare', `Available frames (${frames.length}):`));
1392
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Available frames (${frames.length}):`));
1299
1393
  for (const f of frames) {
1300
- console.log(formatLogMessage('cloudflare', ` ${f.url()}`));
1394
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} ${f.url()}`));
1301
1395
  }
1302
1396
  }
1303
1397
  const challengeFrame = frames.find(frame => {
1304
1398
  const frameUrl = frame.url();
1399
+ // `/turnstile/if/` is the canonical Cloudflare path; the bare
1400
+ // `turnstile` substring check that used to also live here was a
1401
+ // strict superset of it, making the narrower check dead. Kept
1402
+ // the specific path so unrelated iframes whose URL happens to
1403
+ // contain "turnstile" elsewhere (e.g. third-party CAPTCHA
1404
+ // wrappers, query params) don't get picked up.
1305
1405
  return frameUrl.includes('challenges.cloudflare.com') ||
1306
1406
  frameUrl.includes('/cdn-cgi/challenge-platform/') ||
1307
- frameUrl.includes('/turnstile/if/') ||
1308
- frameUrl.includes('turnstile');
1407
+ frameUrl.includes('/turnstile/if/');
1309
1408
  });
1310
1409
 
1311
1410
  if (!challengeFrame) {
@@ -1313,9 +1412,9 @@ async function handleEmbeddedIframeChallenge(page, forceDebug = false) {
1313
1412
  return result;
1314
1413
  }
1315
1414
 
1316
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Interacting with iframe: ${challengeFrame.url()}`));
1415
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Interacting with iframe: ${challengeFrame.url()}`));
1317
1416
 
1318
- await waitForTimeout(page, 500);
1417
+ await fastTimeout(500);
1319
1418
 
1320
1419
  let checkboxInteractionSuccess = false;
1321
1420
  try {
@@ -1330,14 +1429,14 @@ async function handleEmbeddedIframeChallenge(page, forceDebug = false) {
1330
1429
 
1331
1430
  if (shadowResult.clicked) {
1332
1431
  checkboxInteractionSuccess = true;
1333
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Shadow DOM click succeeded: ${shadowResult.selector}`));
1432
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Shadow DOM click succeeded: ${shadowResult.selector}`));
1334
1433
  } else if (shadowResult.found && shadowResult.x > 0) {
1335
1434
  await page.mouse.click(shadowResult.x, shadowResult.y);
1336
1435
  checkboxInteractionSuccess = true;
1337
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Shadow DOM mouse fallback at (${shadowResult.x}, ${shadowResult.y})`));
1436
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Shadow DOM mouse fallback at (${shadowResult.x}, ${shadowResult.y})`));
1338
1437
  }
1339
1438
  } catch (shadowErr) {
1340
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Shadow DOM click failed: ${shadowErr.message}`));
1439
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Shadow DOM click failed: ${shadowErr.message}`));
1341
1440
  }
1342
1441
 
1343
1442
  if (!checkboxInteractionSuccess) {
@@ -1346,10 +1445,10 @@ async function handleEmbeddedIframeChallenge(page, forceDebug = false) {
1346
1445
  const iframeElement = await page.$('iframe[src*="challenges.cloudflare.com"]');
1347
1446
  if (iframeElement) {
1348
1447
  await iframeElement.click();
1349
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Clicked iframe container as fallback`));
1448
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Clicked iframe container as fallback`));
1350
1449
  }
1351
1450
  } catch (containerClickError) {
1352
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Container click failed: ${containerClickError.message}`));
1451
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Container click failed: ${containerClickError.message}`));
1353
1452
  }
1354
1453
  }
1355
1454
 
@@ -1371,15 +1470,15 @@ async function handleEmbeddedIframeChallenge(page, forceDebug = false) {
1371
1470
  ]);
1372
1471
 
1373
1472
  result.success = true;
1374
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Embedded iframe challenge completed`));
1473
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Embedded iframe challenge completed`));
1375
1474
  } catch (completionError) {
1376
1475
  result.error = `Challenge completion check failed: ${completionError.message}`;
1377
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Completion check failed: ${completionError.message}`));
1476
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Completion check failed: ${completionError.message}`));
1378
1477
  }
1379
1478
 
1380
1479
  } catch (error) {
1381
1480
  result.error = `Embedded iframe handling failed: ${error.message}`;
1382
- if (forceDebug) console.log(formatLogMessage('cloudflare', result.error));
1481
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} ${result.error}`));
1383
1482
  }
1384
1483
 
1385
1484
  return result;
@@ -1397,17 +1496,25 @@ async function waitForJSChallengeCompletion(page, forceDebug = false) {
1397
1496
  let timeoutId = null;
1398
1497
 
1399
1498
  try {
1400
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Waiting for JS challenge completion`));
1499
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Waiting for JS challenge completion`));
1401
1500
 
1402
1501
  const timeoutPromise = new Promise((_, reject) => {
1403
1502
  timeoutId = setTimeout(() => reject(new Error('JS challenge timeout')), TIMEOUTS.JS_CHALLENGE_BUFFER);
1404
1503
  });
1405
1504
 
1406
- // Reduced timeout for JS challenge completion
1505
+ // Reduced timeout for JS challenge completion.
1506
+ // Cap body.textContent to 2KB per poll -- same cap as
1507
+ // analyzeCloudflareChallenge / checkChallengeCompletion. waitForFunction
1508
+ // polls at ~100ms over up to 10s = ~100 evaluations; on a content-heavy
1509
+ // page that resolves the challenge and then renders the original page,
1510
+ // uncapped textContent could materialize MB of DOM text per poll. The
1511
+ // four substrings we're testing for ("Verification successful",
1512
+ // "Checking your browser", "Please wait while we verify") all appear
1513
+ // well within the first 2KB of CF challenge pages.
1407
1514
  await Promise.race([
1408
1515
  page.waitForFunction(
1409
1516
  () => {
1410
- const bodyText = document.body.textContent;
1517
+ const bodyText = document.body ? document.body.textContent.substring(0, 2000) : '';
1411
1518
  if (bodyText.includes('Verification successful')) return true;
1412
1519
  return !bodyText.includes('Checking your browser') &&
1413
1520
  !bodyText.includes('Please wait while we verify') &&
@@ -1425,7 +1532,7 @@ async function waitForJSChallengeCompletion(page, forceDebug = false) {
1425
1532
  }
1426
1533
 
1427
1534
  result.success = true;
1428
- if (forceDebug) console.log(formatLogMessage('cloudflare', `JS challenge completed automatically`));
1535
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} JS challenge completed automatically`));
1429
1536
  } catch (error) {
1430
1537
  // Clear timeout on error
1431
1538
  if (timeoutId) {
@@ -1433,7 +1540,7 @@ async function waitForJSChallengeCompletion(page, forceDebug = false) {
1433
1540
  }
1434
1541
 
1435
1542
  result.error = `JS challenge timeout: ${error.message}`;
1436
- if (forceDebug) console.log(formatLogMessage('cloudflare', `JS challenge wait failed: ${error.message}`));
1543
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} JS challenge wait failed: ${error.message}`));
1437
1544
  }
1438
1545
 
1439
1546
  return result;
@@ -1454,12 +1561,9 @@ async function handleTurnstileChallenge(page, forceDebug = false) {
1454
1561
  return { ...result, success: true };
1455
1562
  }
1456
1563
 
1457
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Embedded iframe failed: ${iframeResult.error}, trying legacy method`));
1564
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Embedded iframe failed: ${iframeResult.error}, trying legacy method`));
1458
1565
 
1459
1566
  try {
1460
- // Use fast timeout for Turnstile operations
1461
- const turnstileTimeout = FAST_TIMEOUTS.TURNSTILE_OPERATION;
1462
-
1463
1567
  const turnstileSelectors = [
1464
1568
  'iframe[src*="challenges.cloudflare.com"]',
1465
1569
  'iframe[title*="Widget containing a Cloudflare"]',
@@ -1480,21 +1584,21 @@ async function handleTurnstileChallenge(page, forceDebug = false) {
1480
1584
  frame.url().includes('turnstile')
1481
1585
  );
1482
1586
  if (turnstileFrame) {
1483
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Found Turnstile iframe using selector: ${selector}`));
1587
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Found Turnstile iframe using selector: ${selector}`));
1484
1588
  break;
1485
1589
  }
1486
1590
  } catch (e) {
1487
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Selector ${selector} not found or timed out`));
1591
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Selector ${selector} not found or timed out`));
1488
1592
  continue;
1489
1593
  }
1490
1594
  }
1491
1595
 
1492
1596
  if (turnstileFrame) {
1493
1597
  if (forceDebug) {
1494
- console.log(formatLogMessage('cloudflare', `Found Turnstile iframe with URL: ${turnstileFrame.url()}`));
1598
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Found Turnstile iframe with URL: ${turnstileFrame.url()}`));
1495
1599
  }
1496
1600
 
1497
- await waitForTimeout(page, FAST_TIMEOUTS.ELEMENT_INTERACTION_DELAY);
1601
+ await fastTimeout(FAST_TIMEOUTS.ELEMENT_INTERACTION_DELAY);
1498
1602
 
1499
1603
  try {
1500
1604
  const shadowResult = await clickInShadowDOM(turnstileFrame, [
@@ -1507,13 +1611,13 @@ async function handleTurnstileChallenge(page, forceDebug = false) {
1507
1611
  ], forceDebug);
1508
1612
 
1509
1613
  if (shadowResult.clicked) {
1510
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Turnstile shadow DOM click succeeded: ${shadowResult.selector}`));
1614
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Turnstile shadow DOM click succeeded: ${shadowResult.selector}`));
1511
1615
  } else if (shadowResult.found && shadowResult.x > 0) {
1512
1616
  await page.mouse.click(shadowResult.x, shadowResult.y);
1513
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Turnstile shadow DOM mouse fallback at (${shadowResult.x}, ${shadowResult.y})`));
1617
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Turnstile shadow DOM mouse fallback at (${shadowResult.x}, ${shadowResult.y})`));
1514
1618
  }
1515
1619
  } catch (shadowErr) {
1516
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Shadow DOM fallback failed: ${shadowErr.message}`));
1620
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Shadow DOM fallback failed: ${shadowErr.message}`));
1517
1621
  }
1518
1622
 
1519
1623
  // Wait for Turnstile completion with reduced timeout
@@ -1528,11 +1632,11 @@ async function handleTurnstileChallenge(page, forceDebug = false) {
1528
1632
  new Promise((_, reject) => setTimeout(() => reject(new Error('Turnstile completion timeout')), TIMEOUTS.TURNSTILE_COMPLETION_BUFFER))
1529
1633
  ]);
1530
1634
 
1531
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Turnstile response token generated successfully`));
1635
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Turnstile response token generated successfully`));
1532
1636
  result.success = true;
1533
1637
  } else {
1534
1638
  // Try container-based Turnstile (non-iframe)
1535
- if (forceDebug) console.log(formatLogMessage('cloudflare', `No Turnstile iframe found, trying container-based approach`));
1639
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} No Turnstile iframe found, trying container-based approach`));
1536
1640
 
1537
1641
  const containerSelectors = [
1538
1642
  '.cf-turnstile',
@@ -1547,32 +1651,32 @@ async function handleTurnstileChallenge(page, forceDebug = false) {
1547
1651
  new Promise((_, reject) => setTimeout(() => reject(new Error('Container timeout')), FAST_TIMEOUTS.SELECTOR_WAIT + 500))
1548
1652
  ]);
1549
1653
 
1550
- await waitForTimeout(page, FAST_TIMEOUTS.ELEMENT_INTERACTION_DELAY);
1654
+ await fastTimeout(FAST_TIMEOUTS.ELEMENT_INTERACTION_DELAY);
1551
1655
  await page.click(selector);
1552
1656
 
1553
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Clicked Turnstile container: ${selector}`));
1657
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Clicked Turnstile container: ${selector}`));
1554
1658
 
1555
1659
  const completionCheck = await checkChallengeCompletion(page);
1556
1660
  if (completionCheck.isCompleted) {
1557
1661
  result.success = true;
1558
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Container-based Turnstile completed successfully`));
1662
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Container-based Turnstile completed successfully`));
1559
1663
  break;
1560
1664
  }
1561
1665
  } catch (e) {
1562
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Container selector ${selector} not found or failed`));
1666
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Container selector ${selector} not found or failed`));
1563
1667
  continue;
1564
1668
  }
1565
1669
  }
1566
1670
 
1567
1671
  if (!result.success) {
1568
1672
  result.error = 'Turnstile iframe/container not found or not interactive';
1569
- if (forceDebug) console.log(formatLogMessage('cloudflare', result.error));
1673
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} ${result.error}`));
1570
1674
  }
1571
1675
  }
1572
1676
 
1573
1677
  } catch (error) {
1574
1678
  result.error = `Turnstile handling failed: ${error.message}`;
1575
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Turnstile handling error: ${error.message}`));
1679
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Turnstile handling error: ${error.message}`));
1576
1680
  }
1577
1681
 
1578
1682
  return result;
@@ -1588,12 +1692,22 @@ async function handleLegacyCheckbox(page, forceDebug = false) {
1588
1692
  };
1589
1693
 
1590
1694
  try {
1591
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Attempting legacy checkbox challenge`));
1592
-
1695
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Attempting legacy checkbox challenge`));
1696
+
1697
+ // Aligned with the two selectors analyzeCloudflareChallenge uses to
1698
+ // set hasLegacyCheckbox (line ~789). A third selector
1699
+ // `.cf-turnstile input[type="checkbox"]` used to live here as a
1700
+ // fallback, but it had no matching detection entry -- meaning the
1701
+ // analyzer would never set hasLegacyCheckbox=true on a Turnstile-
1702
+ // embedded-checkbox page, so this handler was never invoked for it
1703
+ // anyway. Turnstile-embedded checkboxes are handled by
1704
+ // handleTurnstileChallenge's container-click path (clicking
1705
+ // `.cf-turnstile` triggers the embedded checkbox via CF's widget
1706
+ // script). Keeping the orphan selector here created a phantom
1707
+ // fallback that only fired in unreachable-in-practice scenarios.
1593
1708
  const legacySelectors = [
1594
1709
  'input[type="checkbox"]#challenge-form',
1595
- 'input[type="checkbox"][name="cf_captcha_kind"]',
1596
- '.cf-turnstile input[type="checkbox"]'
1710
+ 'input[type="checkbox"][name="cf_captcha_kind"]'
1597
1711
  ];
1598
1712
 
1599
1713
  for (const selector of legacySelectors) {
@@ -1606,29 +1720,29 @@ async function handleLegacyCheckbox(page, forceDebug = false) {
1606
1720
  const checkbox = await page.$(selector);
1607
1721
  if (checkbox) {
1608
1722
  await checkbox.click();
1609
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Clicked legacy checkbox: ${selector}`));
1723
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Clicked legacy checkbox: ${selector}`));
1610
1724
 
1611
1725
  const completionCheck = await checkChallengeCompletion(page);
1612
1726
  if (completionCheck.isCompleted) {
1613
1727
  result.success = true;
1614
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Legacy checkbox challenge completed successfully`));
1728
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Legacy checkbox challenge completed successfully`));
1615
1729
  break;
1616
1730
  }
1617
1731
  }
1618
1732
  } catch (e) {
1619
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Legacy selector ${selector} failed: ${e.message}`));
1733
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Legacy selector ${selector} failed: ${e.message}`));
1620
1734
  continue;
1621
1735
  }
1622
1736
  }
1623
1737
 
1624
1738
  if (!result.success) {
1625
1739
  result.error = 'No interactive legacy checkbox found';
1626
- if (forceDebug) console.log(formatLogMessage('cloudflare', result.error));
1740
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} ${result.error}`));
1627
1741
  }
1628
1742
 
1629
1743
  } catch (error) {
1630
1744
  result.error = `Legacy checkbox handling failed: ${error.message}`;
1631
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Legacy checkbox error: ${error.message}`));
1745
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Legacy checkbox error: ${error.message}`));
1632
1746
  }
1633
1747
 
1634
1748
  return result;
@@ -1708,44 +1822,101 @@ async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDeb
1708
1822
  const cfBypassEnabled = siteConfig.cloudflare_bypass === true || siteConfig.cloudflare_bypass === 'debug';
1709
1823
  const cfPhishEnabled = siteConfig.cloudflare_phish === true || siteConfig.cloudflare_phish === 'debug';
1710
1824
 
1711
- // Outcome-summary bookkeeping. Only paid for in debug mode — page.cookies()
1712
- // is a real CDP round-trip we don't want on every URL in production.
1825
+ // Outcome-summary bookkeeping. Cookie state (page.cookies() = CDP
1826
+ // round-trip) is paid for only in debug mode. The structured outcome
1827
+ // line ALSO fires as a warn in production for !overallSuccess /
1828
+ // timedOut so a normal scan log captures every failure with cf-ray
1829
+ // for correlation, without needing a debug re-run.
1713
1830
  // navInfo carries httpStatus + cfRay captured at page.goto time by the
1714
1831
  // caller (response object isn't reachable from the page after navigation).
1715
1832
  const startMs = Date.now();
1833
+ // Per-stage timing breakdown surfaced in the outcome line. Helps pinpoint
1834
+ // which stage dominates wall-clock when scans get slow.
1835
+ const timings = { quick: 0, phish: 0, challenge: 0 };
1716
1836
  let cookiesBefore = { cf_clearance: false, cf_bm: false };
1717
1837
  if (forceDebug) cookiesBefore = await getCfCookieState(page);
1718
1838
  let errorCode = null; // populated once quickDetection runs
1719
1839
  const logOutcome = async (result) => {
1720
- if (forceDebug) {
1721
- try {
1840
+ // One try wraps EVERYTHING summary-related: aggregate bump, outcome
1841
+ // computation, cookie reads, and log emission. The original code wrapped
1842
+ // only the log block, so a future throw in bumpAggregate would have
1843
+ // propagated past the "never let summary logging affect the return"
1844
+ // intent. Returning `result` from inside the try is fine -- there's no
1845
+ // finally and no further work after the log site.
1846
+ try {
1847
+ const durationMs = Date.now() - startMs;
1848
+ const outcome = buildOutcomeString(result, errorCode);
1849
+ // Always update aggregates regardless of debug mode -- so
1850
+ // getAggregateStats() returns useful end-of-scan numbers even in
1851
+ // silent production runs.
1852
+ bumpAggregate(outcome, result, durationMs);
1853
+
1854
+ // Three-tier production severity:
1855
+ // isFailure -> warn (overallSuccess=false OR adaptive timeout
1856
+ // fired -- the scanner did NOT do its job)
1857
+ // isUpstreamError -> info (CF returned a 5xx origin-error page; the
1858
+ // scanner did its job, the origin is just
1859
+ // unreachable. Not bypass-actionable but
1860
+ // worth per-URL visibility against CF's
1861
+ // edge logs.)
1862
+ // otherwise -> silent in production (debug still gets everything)
1863
+ const isFailure = !result.overallSuccess || result.timedOut;
1864
+ const isUpstreamError = !!result.cloudflareErrorPage;
1865
+ if (!forceDebug && !isFailure && !isUpstreamError) return result;
1866
+
1867
+ // Build common tail once (DRY: shared between debug, warn, and info lines).
1868
+ const statusTag = navInfo.httpStatus != null ? ` | http=${navInfo.httpStatus}` : '';
1869
+ const rayTag = navInfo.cfRay ? ` | cf-ray=${navInfo.cfRay}` : '';
1870
+ // Emit only non-zero stages -- previously every line carried
1871
+ // `q=2400ms p=0ms c=0ms` even on phishing-only or quick-only paths,
1872
+ // which was visually noisy in production warn output.
1873
+ const stageParts = [];
1874
+ if (timings.quick > 0) stageParts.push(`q=${timings.quick}ms`);
1875
+ if (timings.phish > 0) stageParts.push(`p=${timings.phish}ms`);
1876
+ if (timings.challenge > 0) stageParts.push(`c=${timings.challenge}ms`);
1877
+ const timingTag = stageParts.length > 0 ? ` | ${stageParts.join(' ')}` : '';
1878
+ const tail = `${outcome}${statusTag}${rayTag} | duration=${durationMs}ms${timingTag}`;
1879
+
1880
+ if (forceDebug) {
1722
1881
  const cookiesAfter = await getCfCookieState(page);
1723
- const outcome = buildOutcomeString(result, errorCode);
1724
1882
  const clearanceTag = cookiesAfter.cf_clearance
1725
1883
  ? (cookiesBefore.cf_clearance ? 'clearance=preexisting' : 'clearance=gained')
1726
1884
  : 'clearance=no';
1727
1885
  const bmTag = cookiesAfter.cf_bm
1728
1886
  ? (cookiesBefore.cf_bm ? 'cf_bm=preexisting' : 'cf_bm=gained')
1729
1887
  : 'cf_bm=no';
1730
- const statusTag = navInfo.httpStatus != null ? ` | http=${navInfo.httpStatus}` : '';
1731
- const rayTag = navInfo.cfRay ? ` | cf-ray=${navInfo.cfRay}` : '';
1732
- console.log(formatLogMessage('cloudflare', `Outcome for ${currentUrl}: ${outcome} | ${clearanceTag} | ${bmTag}${statusTag}${rayTag} | duration=${Date.now() - startMs}ms`));
1733
- } catch (_) { /* never let summary logging affect the return */ }
1734
- }
1888
+ // Debug line splices cookie state in BETWEEN outcome and the rest of
1889
+ // the tail, so it can't share the tail string verbatim -- but every
1890
+ // other component is computed once above.
1891
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Outcome for ${currentUrl}: ${outcome} | ${clearanceTag} | ${bmTag}${statusTag}${rayTag} | duration=${durationMs}ms${timingTag}`));
1892
+ } else if (isFailure) {
1893
+ // Production failure: skip cookie state (avoid CDP round-trip) but
1894
+ // keep cf-ray / http / timings -- enough to correlate failures
1895
+ // against CF's edge logs and identify slow stages. isFailure takes
1896
+ // precedence over isUpstreamError because a bypass that failed for
1897
+ // both reasons (overall failure + error page) is more meaningfully
1898
+ // categorized as a failure than as an upstream issue.
1899
+ console.warn(formatLogMessage('warn', `${CLOUDFLARE_TAG} Outcome for ${currentUrl}: ${tail}`));
1900
+ } else {
1901
+ // Production upstream-error (5xx). Routed at info severity because
1902
+ // your scanner did everything it could -- the origin is just down.
1903
+ console.log(formatLogMessage('info', `${CLOUDFLARE_TAG} Outcome for ${currentUrl}: ${tail}`));
1904
+ }
1905
+ } catch (_) { /* never let summary logging affect the return */ }
1735
1906
  return result;
1736
1907
  };
1737
1908
 
1738
1909
  if (cfDebug && !_moduleVersionLogged) {
1739
1910
  // Print once per process; the version is global and doesn't change
1740
1911
  // between URLs. Subsequent calls stay silent.
1741
- console.log(formatLogMessage('cloudflare', `Using Cloudflare module v${CLOUDFLARE_MODULE_VERSION}`));
1912
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Using Cloudflare module v${CLOUDFLARE_MODULE_VERSION}`));
1742
1913
  _moduleVersionLogged = true;
1743
1914
  }
1744
1915
 
1745
1916
  // VALIDATE URL FIRST - Skip protection handling for non-HTTP(S) URLs
1746
1917
  if (!shouldProcessUrl(currentUrl, forceDebug)) {
1747
1918
  if (forceDebug) {
1748
- console.log(formatLogMessage('cloudflare', `Skipping protection handling for non-HTTP(S) URL: ${currentUrl}`));
1919
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Skipping protection handling for non-HTTP(S) URL: ${currentUrl}`));
1749
1920
  }
1750
1921
  return await logOutcome({
1751
1922
  phishingWarning: { attempted: false, success: true },
@@ -1757,7 +1928,9 @@ async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDeb
1757
1928
  }
1758
1929
 
1759
1930
  // Quick detection first - exit early if no Cloudflare detected and no explicit config
1931
+ const quickStart = Date.now();
1760
1932
  const quickDetection = await quickCloudflareDetection(page, forceDebug);
1933
+ timings.quick = Date.now() - quickStart;
1761
1934
  if (quickDetection && quickDetection.errorCode) errorCode = quickDetection.errorCode;
1762
1935
 
1763
1936
  // Safety check: ensure quickDetection is valid
@@ -1773,12 +1946,20 @@ async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDeb
1773
1946
  if (!quickDetection.hasIndicators) {
1774
1947
  if (forceDebug) {
1775
1948
  const cachedTag = quickDetection._fromCache ? ' [cached]' : '';
1776
- if (quickDetection.hasErrorPage) {
1777
- console.log(formatLogMessage('cloudflare', `Cloudflare error page detected${cachedTag} (origin unreachable, no bypass possible) for ${currentUrl}`));
1949
+ if (quickDetection.skippedInvalidUrl) {
1950
+ // Live page URL isn't HTTP(S) -- typically a popup/redirect dropped
1951
+ // the page to about:blank between page.goto() and our detection
1952
+ // call. quickCloudflareDetection short-circuits before the
1953
+ // page.evaluate, so title/bodySnippet are absent. The old code
1954
+ // logged them anyway and rendered literal "undefined" strings.
1955
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Live page URL is not HTTP(S) -- detection skipped for ${currentUrl} (likely popup/redirect to about:blank)`));
1956
+ } else if (quickDetection.hasErrorPage) {
1957
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Cloudflare error page detected${cachedTag} (origin unreachable, no bypass possible) for ${currentUrl}`));
1958
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Quick detection details${cachedTag}: title="${quickDetection.title}", bodySnippet="${quickDetection.bodySnippet}"`));
1778
1959
  } else {
1779
- console.log(formatLogMessage('cloudflare', `No Cloudflare indicators found${cachedTag}, skipping protection handling for ${currentUrl}`));
1960
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} No Cloudflare indicators found${cachedTag}, skipping protection handling for ${currentUrl}`));
1961
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Quick detection details${cachedTag}: title="${quickDetection.title}", bodySnippet="${quickDetection.bodySnippet}"`));
1780
1962
  }
1781
- console.log(formatLogMessage('cloudflare', `Quick detection details${cachedTag}: title="${quickDetection.title}", bodySnippet="${quickDetection.bodySnippet}"`));
1782
1963
  }
1783
1964
  return await logOutcome({
1784
1965
  phishingWarning: { attempted: false, success: true },
@@ -1786,6 +1967,12 @@ async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDeb
1786
1967
  overallSuccess: true,
1787
1968
  errors: [],
1788
1969
  skippedNoIndicators: true,
1970
+ // Propagate quickCloudflareDetection's internal short-circuit so
1971
+ // buildOutcomeString routes to 'skipped(non-http)' instead of the
1972
+ // misleading 'no_indicators'. The latter implies detection ran and
1973
+ // found nothing; the former honestly says "didn't run because the
1974
+ // live page URL wasn't HTTP(S)".
1975
+ skippedInvalidUrl: !!quickDetection.skippedInvalidUrl,
1789
1976
  cloudflareErrorPage: !!quickDetection.hasErrorPage
1790
1977
  });
1791
1978
  }
@@ -1801,39 +1988,39 @@ async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDeb
1801
1988
  };
1802
1989
 
1803
1990
  try {
1804
- // Adaptive timeout based on detection results and explicit config
1805
- let adaptiveTimeout;
1806
- if (cfPhishEnabled || cfBypassEnabled) {
1807
- // Explicit config - give more time
1808
- adaptiveTimeout = quickDetection.hasIndicators ? TIMEOUTS.ADAPTIVE_TIMEOUT_WITH_INDICATORS : TIMEOUTS.ADAPTIVE_TIMEOUT_WITHOUT_INDICATORS;
1809
- } else {
1810
- // Auto-detected only - shorter timeout
1811
- adaptiveTimeout = quickDetection.hasIndicators ? TIMEOUTS.ADAPTIVE_TIMEOUT_AUTO_WITH_INDICATORS : TIMEOUTS.ADAPTIVE_TIMEOUT_AUTO_WITHOUT_INDICATORS;
1812
- }
1991
+ // Adaptive timeout based on explicit config. hasIndicators is guaranteed
1992
+ // truthy here: the early-return above this block filters out the
1993
+ // no-indicators path, so the WITHOUT_INDICATORS branches that used to
1994
+ // sit here were dead code.
1995
+ const adaptiveTimeout = (cfPhishEnabled || cfBypassEnabled)
1996
+ ? TIMEOUTS.ADAPTIVE_TIMEOUT_WITH_INDICATORS
1997
+ : TIMEOUTS.ADAPTIVE_TIMEOUT_AUTO_WITH_INDICATORS;
1813
1998
 
1814
1999
  if (forceDebug) {
1815
- console.log(formatLogMessage('cloudflare', `Using adaptive timeout of ${adaptiveTimeout}ms for ${currentUrl} (indicators: ${quickDetection.hasIndicators}, explicit config: ${!!(siteConfig.cloudflare_phish || siteConfig.cloudflare_bypass)})`));
2000
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Using adaptive timeout of ${adaptiveTimeout}ms for ${currentUrl} (indicators: ${quickDetection.hasIndicators}, explicit config: ${!!(siteConfig.cloudflare_phish || siteConfig.cloudflare_bypass)})`));
1816
2001
  }
1817
2002
 
1818
- // Check if this domain already timed out -- skip immediately
1819
- try {
1820
- const outcomeCacheKey = 'outcome:' + new URL(currentUrl).hostname;
1821
- const cachedOutcome = detectionCache.cache.get(outcomeCacheKey);
1822
- if (cachedOutcome && cachedOutcome.data && cachedOutcome.data.timedOut && Date.now() - cachedOutcome.timestamp < detectionCache.ttl) {
1823
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Skipping ${currentUrl} -- domain already timed out on a previous URL`));
1824
- return await logOutcome(cachedOutcome.data);
1825
- }
1826
- } catch (e) { /* malformed URL, proceed normally */ }
2003
+ // Check if this domain already timed out -- skip immediately.
2004
+ // getOutcome handles TTL + the malformed-URL fallback internally.
2005
+ const cachedOutcome = detectionCache.getOutcome(currentUrl);
2006
+ if (cachedOutcome && cachedOutcome.timedOut) {
2007
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Skipping ${currentUrl} -- domain already timed out on a previous URL`));
2008
+ return await logOutcome(cachedOutcome);
2009
+ }
1827
2010
 
1828
2011
  let adaptiveTimeoutId = null;
1829
2012
  const handlingResult = await Promise.race([
1830
- performCloudflareHandling(page, currentUrl, siteConfig, cfDebug).then(r => {
2013
+ // Pass the pre-derived bypass/phish flags so performCloudflareHandling
2014
+ // doesn't re-parse siteConfig.cloudflare_bypass strings a second time.
2015
+ // `timings` is mutated in place by performCloudflareHandling so the
2016
+ // outcome line can report per-stage durations.
2017
+ performCloudflareHandling(page, currentUrl, siteConfig, cfDebug, { cfBypassEnabled, cfPhishEnabled }, timings).then(r => {
1831
2018
  if (adaptiveTimeoutId) clearTimeout(adaptiveTimeoutId);
1832
2019
  return r;
1833
2020
  }),
1834
2021
  new Promise((resolve) => {
1835
2022
  adaptiveTimeoutId = setTimeout(() => {
1836
- console.warn(formatLogMessage('cloudflare', `Adaptive timeout (${adaptiveTimeout}ms) for ${currentUrl} - continuing with scan`));
2023
+ console.warn(formatLogMessage('warn', `${CLOUDFLARE_TAG} Adaptive timeout (${adaptiveTimeout}ms) for ${currentUrl} - continuing with scan`));
1837
2024
  resolve({
1838
2025
  phishingWarning: { attempted: false, success: true },
1839
2026
  verificationChallenge: { attempted: false, success: true },
@@ -1847,33 +2034,36 @@ async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDeb
1847
2034
 
1848
2035
  // Cache timeout results at domain level so subsequent URLs skip immediately
1849
2036
  if (handlingResult.timedOut) {
1850
- try {
1851
- const outcomeCacheKey = 'outcome:' + new URL(currentUrl).hostname;
1852
- detectionCache.cache.set(outcomeCacheKey, { data: handlingResult, timestamp: Date.now() });
1853
- } catch (e) { /* malformed URL, skip caching */ }
2037
+ detectionCache.setOutcome(currentUrl, handlingResult);
1854
2038
  }
1855
2039
 
1856
2040
  return await logOutcome(handlingResult);
1857
2041
  } catch (error) {
1858
2042
  result.overallSuccess = false;
1859
2043
  result.errors.push(`Cloudflare handling failed: ${error.message}`);
1860
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Overall handling failed: ${error.message}`));
2044
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Overall handling failed: ${error.message}`));
1861
2045
  return await logOutcome(result);
1862
2046
  }
1863
2047
  }
1864
2048
 
1865
2049
  /**
1866
- * Performs the actual Cloudflare handling with enhanced debug logging
1867
- *
1868
- * @param {Object} page - Puppeteer page instance
2050
+ * Performs the actual Cloudflare handling with enhanced debug logging.
2051
+ * Only ever called from handleCloudflareProtection's adaptive-timeout race,
2052
+ * so the bypass/phish flags arrive pre-derived in `flags` to avoid reparsing
2053
+ * siteConfig string variants twice per URL.
2054
+ *
2055
+ * @param {Object} page - Puppeteer page instance
1869
2056
  * @param {string} currentUrl - URL being processed
1870
- * @param {Object} siteConfig - Configuration flags
2057
+ * @param {Object} siteConfig - Forwarded to the retry harnesses for getRetryConfig
1871
2058
  * @param {boolean} forceDebug - Debug logging flag
2059
+ * @param {{cfBypassEnabled:boolean, cfPhishEnabled:boolean}} flags
2060
+ * @param {{quick:number, phish:number, challenge:number}} [timings] - In-place
2061
+ * timing accumulator from the caller. Populated as each stage completes so
2062
+ * the outer outcome line can report per-stage durations.
1872
2063
  * @returns {Promise<Object>} Same structure as handleCloudflareProtection()
1873
2064
  */
1874
- async function performCloudflareHandling(page, currentUrl, siteConfig, forceDebug = false) {
1875
- const cfBypassEnabled = siteConfig.cloudflare_bypass === true || siteConfig.cloudflare_bypass === 'debug';
1876
- const cfPhishEnabled = siteConfig.cloudflare_phish === true || siteConfig.cloudflare_phish === 'debug';
2065
+ async function performCloudflareHandling(page, currentUrl, siteConfig, forceDebug, flags, timings = {}) {
2066
+ const { cfBypassEnabled, cfPhishEnabled } = flags;
1877
2067
 
1878
2068
  const result = {
1879
2069
  phishingWarning: { attempted: false, success: false },
@@ -1882,22 +2072,24 @@ async function performCloudflareHandling(page, currentUrl, siteConfig, forceDebu
1882
2072
  errors: []
1883
2073
  };
1884
2074
 
1885
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Starting Cloudflare protection handling for ${currentUrl}`));
2075
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Starting Cloudflare protection handling for ${currentUrl}`));
1886
2076
 
1887
2077
  // Handle phishing warnings first - updates result.phishingWarning
1888
2078
  // Only runs if siteConfig.cloudflare_phish === true
1889
2079
  // Handle phishing warnings if enabled
1890
2080
  if (cfPhishEnabled) {
1891
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Phishing warning bypass enabled for ${currentUrl}`));
1892
-
2081
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Phishing warning bypass enabled for ${currentUrl}`));
2082
+
2083
+ const phishStart = Date.now();
1893
2084
  const phishingResult = await handlePhishingWarningWithRetries(page, currentUrl, siteConfig, forceDebug);
2085
+ timings.phish = Date.now() - phishStart;
1894
2086
  result.phishingWarning = phishingResult;
1895
2087
 
1896
2088
  // Check for max retries exceeded
1897
2089
  if (phishingResult.maxRetriesExceeded) {
1898
2090
  result.overallSuccess = false;
1899
2091
  result.errors.push(`Phishing warning bypass exceeded max retries (${phishingResult.attempts}): ${phishingResult.error}`);
1900
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Phishing warning max retries exceeded: ${phishingResult.error}`));
2092
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Phishing warning max retries exceeded: ${phishingResult.error}`));
1901
2093
  // Exit early if max retries exceeded
1902
2094
  return result;
1903
2095
  }
@@ -1906,16 +2098,16 @@ async function performCloudflareHandling(page, currentUrl, siteConfig, forceDebu
1906
2098
  result.overallSuccess = false;
1907
2099
  if (phishingResult.loopDetected) {
1908
2100
  result.errors.push(`Phishing warning bypass failed (redirect loop): ${phishingResult.error}`);
1909
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Phishing warning redirect loop detected: ${phishingResult.error}`));
2101
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Phishing warning redirect loop detected: ${phishingResult.error}`));
1910
2102
  } else {
1911
2103
  result.errors.push(`Phishing warning bypass failed: ${phishingResult.error}`);
1912
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Phishing warning handling failed: ${phishingResult.error}`));
2104
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Phishing warning handling failed: ${phishingResult.error}`));
1913
2105
  }
1914
2106
  } else if (phishingResult.attempted && phishingResult.success) {
1915
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Phishing warning handled successfully`));
2107
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Phishing warning handled successfully`));
1916
2108
  }
1917
2109
  } else if (forceDebug) {
1918
- console.log(formatLogMessage('cloudflare', `Phishing warning bypass disabled for ${currentUrl}`));
2110
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Phishing warning bypass disabled for ${currentUrl}`));
1919
2111
  }
1920
2112
 
1921
2113
  // Handle verification challenges second - updates result.verificationChallenge
@@ -1923,16 +2115,18 @@ async function performCloudflareHandling(page, currentUrl, siteConfig, forceDebu
1923
2115
  // Sets requiresHuman: true if CAPTCHA detected (no bypass attempted)
1924
2116
  // Handle verification challenges if enabled
1925
2117
  if (cfBypassEnabled) {
1926
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Challenge bypass enabled for ${currentUrl}`));
1927
-
2118
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Challenge bypass enabled for ${currentUrl}`));
2119
+
2120
+ const challengeStart = Date.now();
1928
2121
  const challengeResult = await handleVerificationChallengeWithRetries(page, currentUrl, siteConfig, forceDebug);
2122
+ timings.challenge = Date.now() - challengeStart;
1929
2123
  result.verificationChallenge = challengeResult;
1930
2124
 
1931
2125
  // Check for max retries exceeded
1932
2126
  if (challengeResult.maxRetriesExceeded) {
1933
2127
  result.overallSuccess = false;
1934
2128
  result.errors.push(`Challenge bypass exceeded max retries (${challengeResult.attempts}): ${challengeResult.error}`);
1935
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Challenge bypass max retries exceeded: ${challengeResult.error}`));
2129
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Challenge bypass max retries exceeded: ${challengeResult.error}`));
1936
2130
  // Exit early if max retries exceeded
1937
2131
  return result;
1938
2132
  }
@@ -1941,31 +2135,31 @@ async function performCloudflareHandling(page, currentUrl, siteConfig, forceDebu
1941
2135
  result.overallSuccess = false;
1942
2136
  if (challengeResult.requiresHuman) {
1943
2137
  result.errors.push(`Human intervention required: ${challengeResult.error}`);
1944
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Human intervention required: ${challengeResult.error}`));
2138
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Human intervention required: ${challengeResult.error}`));
1945
2139
  } else if (challengeResult.loopDetected) {
1946
2140
  result.errors.push(`Challenge bypass failed (redirect loop): ${challengeResult.error}`);
1947
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Challenge redirect loop detected: ${challengeResult.error}`));
2141
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Challenge redirect loop detected: ${challengeResult.error}`));
1948
2142
  } else {
1949
2143
  result.errors.push(`Challenge bypass failed: ${challengeResult.error}`);
1950
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Challenge bypass failed: ${challengeResult.error}`));
2144
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Challenge bypass failed: ${challengeResult.error}`));
1951
2145
  }
1952
2146
  } else if (challengeResult.attempted && challengeResult.success) {
1953
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Challenge handled successfully using method: ${challengeResult.method || 'unknown'}`));
2147
+ if (forceDebug) console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Challenge handled successfully using method: ${challengeResult.method || 'unknown'}`));
1954
2148
  }
1955
2149
  } else if (forceDebug) {
1956
- console.log(formatLogMessage('cloudflare', `Challenge bypass disabled for ${currentUrl}`));
2150
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Challenge bypass disabled for ${currentUrl}`));
1957
2151
  }
1958
2152
 
1959
2153
  // Log overall result
1960
2154
  if (!result.overallSuccess && forceDebug) {
1961
- console.log(formatLogMessage('cloudflare', `Overall Cloudflare handling failed for ${currentUrl}:`));
2155
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Overall Cloudflare handling failed for ${currentUrl}:`));
1962
2156
  result.errors.forEach(error => {
1963
- console.log(formatLogMessage('cloudflare', ` - ${error}`));
2157
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} - ${error}`));
1964
2158
  });
1965
2159
  } else if ((result.phishingWarning.attempted || result.verificationChallenge.attempted) && forceDebug) {
1966
- console.log(formatLogMessage('cloudflare', `Successfully handled Cloudflare protections for ${currentUrl}`));
2160
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Successfully handled Cloudflare protections for ${currentUrl}`));
1967
2161
  } else if (forceDebug) {
1968
- console.log(formatLogMessage('cloudflare', `No Cloudflare protections detected or enabled for ${currentUrl}`));
2162
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} No Cloudflare protections detected or enabled for ${currentUrl}`));
1969
2163
  }
1970
2164
 
1971
2165
  return result;
@@ -2000,7 +2194,7 @@ async function parallelChallengeDetection(page, forceDebug = false) {
2000
2194
  const detectedChallenges = results.filter(r => r.detected).map(r => r.type);
2001
2195
 
2002
2196
  if (forceDebug && detectedChallenges.length > 0) {
2003
- console.log(formatLogMessage('cloudflare', `Parallel detection found challenges: ${detectedChallenges.join(', ')}`));
2197
+ console.log(formatLogMessage('debug', `${CLOUDFLARE_TAG} Parallel detection found challenges: ${detectedChallenges.join(', ')}`));
2004
2198
  }
2005
2199
 
2006
2200
  return {
@@ -2010,29 +2204,6 @@ async function parallelChallengeDetection(page, forceDebug = false) {
2010
2204
  };
2011
2205
  }
2012
2206
 
2013
- /**
2014
- * Enhanced parallel detection including embedded iframe challenges
2015
- */
2016
- async function enhancedParallelChallengeDetection(page, forceDebug = false) {
2017
- const existingDetection = await parallelChallengeDetection(page, forceDebug);
2018
-
2019
- try {
2020
- const hasEmbeddedIframe = await page.evaluate(() => {
2021
- return document.querySelector('iframe[src*="challenges.cloudflare.com"]') !== null ||
2022
- document.querySelector('iframe[title*="Verify you are human"]') !== null;
2023
- });
2024
-
2025
- if (hasEmbeddedIframe && !existingDetection.challenges.includes('embedded_iframe')) {
2026
- existingDetection.challenges.push('embedded_iframe');
2027
- existingDetection.hasAnyChallenge = true;
2028
- }
2029
- } catch (e) {
2030
- // Ignore detection errors
2031
- }
2032
-
2033
- return existingDetection;
2034
- }
2035
-
2036
2207
  /**
2037
2208
  * Gets cache statistics for performance monitoring
2038
2209
  */
@@ -2048,38 +2219,37 @@ function clearDetectionCache() {
2048
2219
  }
2049
2220
 
2050
2221
  /**
2051
- * Cleanup function to prevent memory leaks in long-running processes
2222
+ * Cleanup function to prevent memory leaks in long-running processes.
2223
+ * Also resets aggregate stats so a re-init of the module starts fresh.
2052
2224
  */
2053
2225
  function cleanup() {
2054
2226
  if (detectionCache) {
2055
2227
  detectionCache.destroy();
2056
2228
  }
2229
+ resetAggregateStats();
2057
2230
  }
2058
2231
 
2232
+ // Public surface kept narrow on purpose: only what nwss.js actually imports.
2233
+ // Internal helpers (analyzeCloudflareChallenge, handlePhishingWarning,
2234
+ // handleVerificationChallenge, handleTurnstileChallenge, handleLegacyCheckbox,
2235
+ // handleEmbeddedIframeChallenge, waitForJSChallengeCompletion,
2236
+ // checkChallengeCompletion, quickCloudflareDetection, fastTimeout,
2237
+ // runWithRetries, categorizeError, getRetryConfig, detectChallengeLoop,
2238
+ // ERROR_TYPES, RETRY_CONFIG, CLOUDFLARE_MODULE_VERSION) stay as
2239
+ // module-local helpers — move them back to module.exports only if a new
2240
+ // external consumer appears.
2059
2241
  module.exports = {
2060
- analyzeCloudflareChallenge,
2061
- handlePhishingWarning,
2062
- handleVerificationChallenge,
2063
2242
  handleCloudflareProtection,
2064
- waitForTimeout,
2065
- handleTurnstileChallenge,
2066
- waitForJSChallengeCompletion,
2067
- handleLegacyCheckbox,
2068
- checkChallengeCompletion,
2069
- handleEmbeddedIframeChallenge,
2070
- enhancedParallelChallengeDetection,
2071
- quickCloudflareDetection,
2072
- getModuleInfo,
2073
- CLOUDFLARE_MODULE_VERSION,
2074
- // New exports
2075
2243
  parallelChallengeDetection,
2076
2244
  getCacheStats,
2077
2245
  clearDetectionCache,
2078
- categorizeError,
2079
- ERROR_TYPES,
2080
- RETRY_CONFIG,
2081
- getRetryConfig,
2082
- detectChallengeLoop,
2246
+ // End-of-scan aggregate diagnostics. nwss.js can call
2247
+ // getAggregateStats({reset:true}) after the scan loop to print a summary
2248
+ // ("Of 200 URLs: 47 challenged, 31 solved via JS, 12 via Turnstile,
2249
+ // 4 timed out, avg 1.8s") without threading per-URL results back through
2250
+ // its orchestration layer.
2251
+ getAggregateStats,
2252
+ resetAggregateStats,
2083
2253
  // Memory management
2084
2254
  cleanup
2085
2255
  };