@fanboynz/network-scanner 2.0.64 → 2.0.65

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/cloudflare.js CHANGED
@@ -156,6 +156,20 @@ const ERROR_TYPES = {
156
156
  UNKNOWN: 'unknown'
157
157
  };
158
158
 
159
+ /**
160
+ * Retry configuration with exponential backoff.
161
+ * Defined before getRetryConfig so the reference order is structurally
162
+ * sound — previously getRetryConfig was hoisted above this const and only
163
+ * worked because the function was never called during module load.
164
+ */
165
+ const RETRY_CONFIG = {
166
+ maxAttempts: 2, // Only 2 attempts fit within 25s outer timeout
167
+ baseDelay: 800, // Slightly faster retry delay
168
+ maxDelay: 5000, // Lower max delay cap
169
+ backoffMultiplier: 2,
170
+ retryableErrors: [ERROR_TYPES.NETWORK, ERROR_TYPES.TIMEOUT, ERROR_TYPES.ELEMENT_NOT_FOUND, ERROR_TYPES.DETACHED_FRAME]
171
+ };
172
+
159
173
  /**
160
174
  * Gets the retry configuration for a site, merging site-specific and global settings
161
175
  * @param {Object} siteConfig - Site configuration object
@@ -180,9 +194,9 @@ function detectChallengeLoop(url, previousUrls = []) {
180
194
  const isChallengeUrl = url.includes('/cdn-cgi/challenge-platform/') ||
181
195
  url.includes('challenges.cloudflare.com') ||
182
196
  url.includes('cf-ray');
183
-
197
+
184
198
  if (!isChallengeUrl) return false;
185
-
199
+
186
200
  // Check if we've seen this exact URL or very similar challenge URLs
187
201
  const similarUrls = previousUrls.filter(prevUrl => {
188
202
  if (prevUrl === url) return true; // Exact match
@@ -192,21 +206,10 @@ function detectChallengeLoop(url, previousUrls = []) {
192
206
  }
193
207
  return false;
194
208
  });
195
-
209
+
196
210
  return similarUrls.length >= 2; // Loop detected if we've seen similar URLs 2+ times
197
211
  }
198
212
 
199
- /**
200
- * Retry configuration with exponential backoff
201
- */
202
- const RETRY_CONFIG = {
203
- maxAttempts: 2, // Only 2 attempts fit within 25s outer timeout
204
- baseDelay: 800, // Slightly faster retry delay
205
- maxDelay: 5000, // Lower max delay cap
206
- backoffMultiplier: 2,
207
- retryableErrors: [ERROR_TYPES.NETWORK, ERROR_TYPES.TIMEOUT, ERROR_TYPES.ELEMENT_NOT_FOUND, ERROR_TYPES.DETACHED_FRAME]
208
- };
209
-
210
213
  /**
211
214
  * Performance cache for detection results
212
215
  * Stores detection results per domain to avoid redundant checks
@@ -217,8 +220,12 @@ class CloudflareDetectionCache {
217
220
  this.ttl = ttl;
218
221
  this.hits = 0;
219
222
  this.misses = 0;
220
- // Prevent memory buildup in long-running processes
223
+ // Prevent memory buildup in long-running processes. unref() so the
224
+ // interval never prevents the Node process from exiting on its own —
225
+ // nwss.js calls cleanup() explicitly on scan completion, but any other
226
+ // consumer of this module that forgets to is still safe.
221
227
  this.cleanupInterval = setInterval(() => this.cleanupExpired(), ttl / 10);
228
+ this.cleanupInterval.unref();
222
229
  }
223
230
 
224
231
  getCacheKey(url) {
@@ -295,6 +302,11 @@ class CloudflareDetectionCache {
295
302
  // Initialize cache singleton
296
303
  const detectionCache = new CloudflareDetectionCache();
297
304
 
305
+ // One-shot flag for the per-process module-version banner. Was previously
306
+ // logged once per URL in handleCloudflareProtection's debug header, which
307
+ // produces N=URL-count copies for no useful signal beyond the first.
308
+ let _moduleVersionLogged = false;
309
+
298
310
  /**
299
311
  * Gets module version information
300
312
  * @returns {object} Version information object
@@ -313,31 +325,28 @@ function getModuleInfo() {
313
325
  * @param {boolean} forceDebug - Debug logging flag
314
326
  * @returns {boolean} True if URL should be processed
315
327
  */
328
+ // Single precompiled regex anchored to URL start. Matches any of the
329
+ // browser-internal / special protocols we want to skip, plus succeeds on
330
+ // http(s):// for the inverse check below. Faster than running 13 sequential
331
+ // startsWith comparisons per URL.
332
+ const SKIP_PROTO_RE = /^(?:about|chrome|chrome-extension|chrome-error|chrome-search|devtools|edge|moz-extension|safari-extension|webkit|data|blob|javascript|vbscript|file|ftp|ftps):/i;
333
+ const HTTP_PROTO_RE = /^https?:\/\//i;
334
+
316
335
  function shouldProcessUrl(url, forceDebug = false) {
317
336
  if (!url || typeof url !== 'string') {
318
337
  if (forceDebug) console.log(formatLogMessage('cloudflare', `[url-validation] Skipping invalid URL: ${url}`));
319
338
  return false;
320
339
  }
321
340
 
322
- // Skip browser-internal and special protocol URLs
323
- const skipPatterns = [
324
- 'about:', 'chrome:', 'chrome-extension:', 'chrome-error:', 'chrome-search:',
325
- 'devtools:', 'edge:', 'moz-extension:', 'safari-extension:', 'webkit:',
326
- 'data:', 'blob:', 'javascript:', 'vbscript:', 'file:', 'ftp:', 'ftps:'
327
- ];
328
-
329
- const urlLower = url.toLowerCase();
330
- for (const pattern of skipPatterns) {
331
- if (urlLower.startsWith(pattern)) {
332
- if (forceDebug) {
333
- console.log(formatLogMessage('cloudflare', `[url-validation] Skipping ${pattern} URL: ${url.substring(0, 100)}${url.length > 100 ? '...' : ''}`));
334
- }
335
- return false;
341
+ const skipMatch = url.match(SKIP_PROTO_RE);
342
+ if (skipMatch) {
343
+ if (forceDebug) {
344
+ console.log(formatLogMessage('cloudflare', `[url-validation] Skipping ${skipMatch[0].toLowerCase()} URL: ${url.substring(0, 100)}${url.length > 100 ? '...' : ''}`));
336
345
  }
346
+ return false;
337
347
  }
338
348
 
339
- // Only process HTTP/HTTPS URLs
340
- if (!urlLower.startsWith('http://') && !urlLower.startsWith('https://')) {
349
+ if (!HTTP_PROTO_RE.test(url)) {
341
350
  if (forceDebug) {
342
351
  console.log(formatLogMessage('cloudflare', `[url-validation] Skipping non-HTTP(S) URL: ${url.substring(0, 100)}${url.length > 100 ? '...' : ''}`));
343
352
  }
@@ -357,10 +366,60 @@ async function waitForTimeout(page, timeout) {
357
366
  return new Promise(resolve => setTimeout(resolve, timeout));
358
367
  }
359
368
 
369
+ /**
370
+ * Captures whether the page currently has Cloudflare's two key cookies.
371
+ * cf_clearance is the post-challenge clearance token — its presence is the
372
+ * single most reliable "did the bypass actually succeed" signal, beating
373
+ * any DOM-side completion check. __cf_bm is the bot-mitigation cookie
374
+ * (typically set on every request that goes through CF's edge).
375
+ * Errors swallowed: cookie read failures should not affect bypass logic.
376
+ */
377
+ async function getCfCookieState(page) {
378
+ try {
379
+ const cookies = await page.cookies();
380
+ let cf_clearance = false;
381
+ let cf_bm = false;
382
+ for (const c of cookies) {
383
+ if (c.name === 'cf_clearance') cf_clearance = true;
384
+ else if (c.name === '__cf_bm') cf_bm = true;
385
+ }
386
+ return { cf_clearance, cf_bm };
387
+ } catch {
388
+ return { cf_clearance: false, cf_bm: false };
389
+ }
390
+ }
391
+
392
+ /**
393
+ * Maps a handleCloudflareProtection result back to a short outcome tag
394
+ * for the per-URL summary log. The tag is grep-friendly (no spaces) so
395
+ * users can post-process scan logs by outcome category.
396
+ */
397
+ function buildOutcomeString(result, errorCode) {
398
+ if (!result) return 'unknown';
399
+ if (result.skippedInvalidUrl) return 'skipped(non-http)';
400
+ if (result.quickDetectionFailed) return 'detection_failed';
401
+ if (result.cloudflareErrorPage) return `error_page(${errorCode || '5xx'})`;
402
+ if (result.timedOut) return 'timeout';
403
+ if (result.verificationChallenge?.requiresHuman) return 'captcha_required';
404
+ if (result.verificationChallenge?.attempted && result.verificationChallenge?.success) {
405
+ return `solved(${result.verificationChallenge.method || 'unknown'})`;
406
+ }
407
+ if (result.phishingWarning?.attempted && result.phishingWarning?.success) {
408
+ return 'solved(phishing_continue)';
409
+ }
410
+ if (result.skippedNoIndicators) return 'no_indicators';
411
+ if (!result.overallSuccess) return 'failed';
412
+ return 'ok';
413
+ }
414
+
360
415
  /**
361
416
  * Categorizes errors for better handling
362
417
  */
363
418
  function categorizeError(error) {
419
+ // Guard against null/undefined error so callers using categorizeError in
420
+ // safe-defaults return paths (e.g. safePageEvaluate's final fallback when
421
+ // lastError was never assigned) don't blow up reading .message.
422
+ if (!error) return ERROR_TYPES.UNKNOWN;
364
423
  const errorMessage = error.message || '';
365
424
 
366
425
  if (errorMessage.includes('detached Frame') || errorMessage.includes('Attempted to use detached')) {
@@ -550,38 +609,50 @@ async function quickCloudflareDetection(page, forceDebug = false) {
550
609
  const stats = detectionCache.getStats();
551
610
  console.log(formatLogMessage('cloudflare', `Using cached detection result (cache hit rate: ${stats.hitRate})`));
552
611
  }
553
- return cachedResult;
612
+ // Return a fresh shallow copy tagged _fromCache so the handler's
613
+ // logging can say "[cached]" instead of presenting cached title/body
614
+ // details as if they were fresh.
615
+ return { ...cachedResult, _fromCache: true };
554
616
  }
555
617
 
556
618
  // Perform actual detection with enhanced error handling
557
619
  const quickCheck = await safePageEvaluate(page, () => {
558
620
  const title = document.title || '';
559
621
  const url = window.location.href;
560
-
622
+
623
+ // Cloudflare-served 5xx origin-error pages (522/523/524/525/526/527/530).
624
+ // Title format is reliable: "<domain> | 5xx: <reason>". These are NOT
625
+ // bypass-able challenges — the origin is unreachable. Mark as
626
+ // recognized (hasErrorPage) but NOT as a bypass target (hasIndicators
627
+ // stays false) so the early-skip path still fires and the log can say
628
+ // "Cloudflare error page" instead of the misleading "No Cloudflare
629
+ // indicators found". errorCode is the captured 5xx digit so outcome
630
+ // logs can grep by specific error type.
631
+ const titleErrorMatch = title.match(/\|\s*(5\d\d):/);
632
+ if (titleErrorMatch) {
633
+ return { hasIndicators: false, hasErrorPage: true, errorCode: parseInt(titleErrorMatch[1], 10), title, url, bodySnippet: '' };
634
+ }
635
+
561
636
  // FAST PATH: Check title + URL first (string ops, no DOM traversal)
562
- const titleMatch =
637
+ const titleMatch =
563
638
  title.includes('Just a moment') ||
564
639
  title.includes('Checking your browser') ||
565
640
  title.includes('Attention Required') ||
566
641
  title.includes('Security check');
567
-
642
+
568
643
  const urlMatch =
569
644
  url.includes('/cdn-cgi/challenge-platform/') ||
570
645
  url.includes('cloudflare.com');
571
-
646
+
572
647
  if (titleMatch || urlMatch) {
573
648
  return { hasIndicators: true, title, url, bodySnippet: '' };
574
649
  }
575
650
 
576
- // MEDIUM PATH: Check a few fast selectors before expensive text extraction
577
- const selectorMatch =
578
- document.querySelector('[data-ray]') ||
579
- document.querySelector('[data-cf-challenge]') ||
580
- document.querySelector('.cf-challenge-running') ||
581
- document.querySelector('.cf-turnstile') ||
582
- document.querySelector('.cf-managed-challenge') ||
583
- document.querySelector('[data-cf-managed]') ||
584
- document.querySelector('script[src*="/cdn-cgi/challenge-platform/"]');
651
+ // MEDIUM PATH: Combine fast-path selectors into one query — one DOM
652
+ // walk for all 7 alternatives instead of up to 7 separate walks.
653
+ const selectorMatch = document.querySelector(
654
+ '[data-ray], [data-cf-challenge], .cf-challenge-running, .cf-turnstile, .cf-managed-challenge, [data-cf-managed], script[src*="/cdn-cgi/challenge-platform/"]'
655
+ );
585
656
 
586
657
  if (selectorMatch) {
587
658
  return { hasIndicators: true, title, url, bodySnippet: '' };
@@ -602,14 +673,19 @@ async function quickCloudflareDetection(page, forceDebug = false) {
602
673
  bodyText.includes('This website has been reported for potential phishing') ||
603
674
  bodyText.includes('Please wait while we verify') ||
604
675
  bodyText.includes('Checking if the site connection is secure');
605
-
606
- // Remaining slower selectors
607
- const slowSelectorMatch =
608
- document.querySelector('.cf-challenge-container') ||
609
- document.querySelector('.ctp-checkbox-container') ||
610
- document.querySelector('iframe[src*="challenges.cloudflare.com"]') ||
611
- document.querySelector('iframe[title*="Cloudflare security challenge"]');
612
-
676
+
677
+ // Remaining slower selectors — combined into one query for the same reason.
678
+ const slowSelectorMatch = document.querySelector(
679
+ '.cf-challenge-container, .ctp-checkbox-container, iframe[src*="challenges.cloudflare.com"], iframe[title*="Cloudflare security challenge"]'
680
+ );
681
+
682
+ // Body-text fallback for error pages with non-standard titles.
683
+ // Same rationale as the early title check: recognize but don't bypass.
684
+ const bodyErrorMatch = bodyText.match(/Error code (5\d\d)/);
685
+ if (bodyErrorMatch && !textMatch && !slowSelectorMatch) {
686
+ return { hasIndicators: false, hasErrorPage: true, errorCode: parseInt(bodyErrorMatch[1], 10), title, url, bodySnippet: bodyText.substring(0, 200) };
687
+ }
688
+
613
689
  return {
614
690
  hasIndicators: !!(textMatch || slowSelectorMatch),
615
691
  title,
@@ -624,10 +700,13 @@ async function quickCloudflareDetection(page, forceDebug = false) {
624
700
  if (forceDebug) {
625
701
  if (quickCheck.hasIndicators) {
626
702
  console.log(formatLogMessage('cloudflare', `Quick detection found Cloudflare indicators on ${quickCheck.url}`));
627
- } else {
628
- console.log(formatLogMessage('cloudflare', `Quick detection found no Cloudflare indicators on ${quickCheck.url}`));
629
703
  }
630
-
704
+ // hasErrorPage and no-indicators cases are deliberately silent here —
705
+ // handleCloudflareProtection prints a clearer per-action line right
706
+ // after ("Cloudflare error page detected..." or "No Cloudflare
707
+ // indicators found, skipping protection handling..."), so logging
708
+ // here would just duplicate it.
709
+
631
710
  if (quickCheck.attempts && quickCheck.attempts > 1) {
632
711
  console.log(formatLogMessage('cloudflare', `Detection required ${quickCheck.attempts} attempts`));
633
712
  }
@@ -657,28 +736,30 @@ async function analyzeCloudflareChallenge(page) {
657
736
  // Cap text extraction -- on content-heavy pages body.textContent can be megabytes
658
737
  const bodyText = document.body ? document.body.textContent.substring(0, 2000) : '';
659
738
 
660
- // Updated selectors for 2025 Cloudflare challenges
661
- const hasTurnstileIframe = document.querySelector('iframe[title*="Cloudflare security challenge"]') !== null ||
662
- document.querySelector('iframe[src*="challenges.cloudflare.com"]') !== null ||
663
- document.querySelector('iframe[title*="Widget containing a Cloudflare"]') !== null;
664
-
665
- const hasTurnstileContainer = document.querySelector('.cf-turnstile') !== null ||
666
- document.querySelector('.ctp-checkbox-container') !== null ||
667
- document.querySelector('.ctp-checkbox-label') !== null;
668
-
669
- const hasTurnstileCheckbox = document.querySelector('input[type="checkbox"].ctp-checkbox') !== null ||
670
- document.querySelector('.ctp-checkbox') !== null;
671
-
672
- const hasLegacyCheckbox = document.querySelector('input[type="checkbox"]#challenge-form') !== null ||
673
- document.querySelector('input[type="checkbox"][name="cf_captcha_kind"]') !== null;
674
-
675
- const hasChallengeRunning = document.querySelector('.cf-challenge-running') !== null ||
676
- document.querySelector('.cf-challenge-container') !== null ||
677
- document.querySelector('.challenge-stage') !== null ||
678
- document.querySelector('.challenge-form') !== null;
679
-
680
- const hasDataRay = document.querySelector('[data-ray]') !== null ||
681
- document.querySelector('[data-cf-challenge]') !== null;
739
+ // Updated selectors for 2025 Cloudflare challenges. Each category groups
740
+ // its alternatives into a single comma-separated selector so the browser
741
+ // walks the DOM once per category instead of once per alternative.
742
+ const hasTurnstileIframe = !!document.querySelector(
743
+ 'iframe[title*="Cloudflare security challenge"], iframe[src*="challenges.cloudflare.com"], iframe[title*="Widget containing a Cloudflare"]'
744
+ );
745
+
746
+ const hasTurnstileContainer = !!document.querySelector(
747
+ '.cf-turnstile, .ctp-checkbox-container, .ctp-checkbox-label'
748
+ );
749
+
750
+ const hasTurnstileCheckbox = !!document.querySelector(
751
+ 'input[type="checkbox"].ctp-checkbox, .ctp-checkbox'
752
+ );
753
+
754
+ const hasLegacyCheckbox = !!document.querySelector(
755
+ 'input[type="checkbox"]#challenge-form, input[type="checkbox"][name="cf_captcha_kind"]'
756
+ );
757
+
758
+ const hasChallengeRunning = !!document.querySelector(
759
+ '.cf-challenge-running, .cf-challenge-container, .challenge-stage, .challenge-form'
760
+ );
761
+
762
+ const hasDataRay = !!document.querySelector('[data-ray], [data-cf-challenge]');
682
763
 
683
764
  const hasCaptcha = bodyText.includes('CAPTCHA') || bodyText.includes('captcha') ||
684
765
  bodyText.includes('hCaptcha') || bodyText.includes('reCAPTCHA');
@@ -1561,13 +1642,16 @@ async function checkChallengeCompletion(page) {
1561
1642
  const isCompleted = await safePageEvaluate(page, () => {
1562
1643
  const noChallengeRunning = !document.querySelector('.cf-challenge-running');
1563
1644
  const noChallengeContainer = !document.querySelector('.cf-challenge-container');
1564
- const noChallengePage = !document.body.textContent.includes('Checking your browser') &&
1565
- !document.body.textContent.includes('Just a moment') &&
1566
- !document.body.textContent.includes('Verify you are human');
1567
-
1645
+ // Read body.textContent once each access re-walks the DOM tree to
1646
+ // materialize the string. The cap matches analyzeCloudflareChallenge.
1647
+ const bodyText = document.body ? document.body.textContent.substring(0, 2000) : '';
1648
+ const noChallengePage = !bodyText.includes('Checking your browser') &&
1649
+ !bodyText.includes('Just a moment') &&
1650
+ !bodyText.includes('Verify you are human');
1651
+
1568
1652
  const hasClearanceCookie = document.cookie.includes('cf_clearance');
1569
1653
  const hasTurnstileResponse = document.querySelector('input[name="cf-turnstile-response"]')?.value;
1570
-
1654
+
1571
1655
  return (noChallengeRunning && noChallengeContainer && noChallengePage) ||
1572
1656
  hasClearanceCookie ||
1573
1657
  hasTurnstileResponse;
@@ -1619,35 +1703,66 @@ async function checkChallengeCompletion(page) {
1619
1703
  * console.log(`Challenge solved using: ${result.verificationChallenge.method}`);
1620
1704
  * }
1621
1705
  */
1622
- async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDebug = false) {
1706
+ async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDebug = false, navInfo = {}) {
1623
1707
  const cfDebug = forceDebug || siteConfig.cloudflare_bypass === 'debug' || siteConfig.cloudflare_phish === 'debug';
1624
1708
  const cfBypassEnabled = siteConfig.cloudflare_bypass === true || siteConfig.cloudflare_bypass === 'debug';
1625
1709
  const cfPhishEnabled = siteConfig.cloudflare_phish === true || siteConfig.cloudflare_phish === 'debug';
1626
1710
 
1627
- if (cfDebug) {
1628
- console.log(formatLogMessage('cloudflare', `Using Cloudflare module v${CLOUDFLARE_MODULE_VERSION} for ${currentUrl}`));
1711
+ // Outcome-summary bookkeeping. Only paid for in debug mode — page.cookies()
1712
+ // is a real CDP round-trip we don't want on every URL in production.
1713
+ // navInfo carries httpStatus + cfRay captured at page.goto time by the
1714
+ // caller (response object isn't reachable from the page after navigation).
1715
+ const startMs = Date.now();
1716
+ let cookiesBefore = { cf_clearance: false, cf_bm: false };
1717
+ if (forceDebug) cookiesBefore = await getCfCookieState(page);
1718
+ let errorCode = null; // populated once quickDetection runs
1719
+ const logOutcome = async (result) => {
1720
+ if (forceDebug) {
1721
+ try {
1722
+ const cookiesAfter = await getCfCookieState(page);
1723
+ const outcome = buildOutcomeString(result, errorCode);
1724
+ const clearanceTag = cookiesAfter.cf_clearance
1725
+ ? (cookiesBefore.cf_clearance ? 'clearance=preexisting' : 'clearance=gained')
1726
+ : 'clearance=no';
1727
+ const bmTag = cookiesAfter.cf_bm
1728
+ ? (cookiesBefore.cf_bm ? 'cf_bm=preexisting' : 'cf_bm=gained')
1729
+ : 'cf_bm=no';
1730
+ const statusTag = navInfo.httpStatus != null ? ` | http=${navInfo.httpStatus}` : '';
1731
+ const rayTag = navInfo.cfRay ? ` | cf-ray=${navInfo.cfRay}` : '';
1732
+ console.log(formatLogMessage('cloudflare', `Outcome for ${currentUrl}: ${outcome} | ${clearanceTag} | ${bmTag}${statusTag}${rayTag} | duration=${Date.now() - startMs}ms`));
1733
+ } catch (_) { /* never let summary logging affect the return */ }
1734
+ }
1735
+ return result;
1736
+ };
1737
+
1738
+ if (cfDebug && !_moduleVersionLogged) {
1739
+ // Print once per process; the version is global and doesn't change
1740
+ // between URLs. Subsequent calls stay silent.
1741
+ console.log(formatLogMessage('cloudflare', `Using Cloudflare module v${CLOUDFLARE_MODULE_VERSION}`));
1742
+ _moduleVersionLogged = true;
1629
1743
  }
1630
-
1744
+
1631
1745
  // VALIDATE URL FIRST - Skip protection handling for non-HTTP(S) URLs
1632
1746
  if (!shouldProcessUrl(currentUrl, forceDebug)) {
1633
1747
  if (forceDebug) {
1634
1748
  console.log(formatLogMessage('cloudflare', `Skipping protection handling for non-HTTP(S) URL: ${currentUrl}`));
1635
1749
  }
1636
- return {
1750
+ return await logOutcome({
1637
1751
  phishingWarning: { attempted: false, success: true },
1638
1752
  verificationChallenge: { attempted: false, success: true },
1639
1753
  overallSuccess: true,
1640
1754
  errors: [],
1641
1755
  skippedInvalidUrl: true
1642
- };
1756
+ });
1643
1757
  }
1644
-
1758
+
1645
1759
  // Quick detection first - exit early if no Cloudflare detected and no explicit config
1646
1760
  const quickDetection = await quickCloudflareDetection(page, forceDebug);
1647
-
1761
+ if (quickDetection && quickDetection.errorCode) errorCode = quickDetection.errorCode;
1762
+
1648
1763
  // Safety check: ensure quickDetection is valid
1649
1764
  if (!quickDetection) {
1650
- return { phishingWarning: { attempted: false, success: true }, verificationChallenge: { attempted: false, success: true }, overallSuccess: true, errors: [], quickDetectionFailed: true };
1765
+ return await logOutcome({ phishingWarning: { attempted: false, success: true }, verificationChallenge: { attempted: false, success: true }, overallSuccess: true, errors: [], quickDetectionFailed: true });
1651
1766
  }
1652
1767
 
1653
1768
  // Early return structure when no Cloudflare indicators found
@@ -1656,15 +1771,23 @@ async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDeb
1656
1771
  // Trust the detection -- explicit config only matters when indicators ARE found
1657
1772
  // This avoids a 10s adaptive timeout on non-Cloudflare sites
1658
1773
  if (!quickDetection.hasIndicators) {
1659
- if (forceDebug) console.log(formatLogMessage('cloudflare', `No Cloudflare indicators found, skipping protection handling for ${currentUrl}`));
1660
- if (forceDebug) console.log(formatLogMessage('cloudflare', `Quick detection details: title="${quickDetection.title}", bodySnippet="${quickDetection.bodySnippet}"`));
1661
- return {
1774
+ if (forceDebug) {
1775
+ const cachedTag = quickDetection._fromCache ? ' [cached]' : '';
1776
+ if (quickDetection.hasErrorPage) {
1777
+ console.log(formatLogMessage('cloudflare', `Cloudflare error page detected${cachedTag} (origin unreachable, no bypass possible) for ${currentUrl}`));
1778
+ } else {
1779
+ console.log(formatLogMessage('cloudflare', `No Cloudflare indicators found${cachedTag}, skipping protection handling for ${currentUrl}`));
1780
+ }
1781
+ console.log(formatLogMessage('cloudflare', `Quick detection details${cachedTag}: title="${quickDetection.title}", bodySnippet="${quickDetection.bodySnippet}"`));
1782
+ }
1783
+ return await logOutcome({
1662
1784
  phishingWarning: { attempted: false, success: true },
1663
1785
  verificationChallenge: { attempted: false, success: true },
1664
1786
  overallSuccess: true,
1665
1787
  errors: [],
1666
- skippedNoIndicators: true
1667
- };
1788
+ skippedNoIndicators: true,
1789
+ cloudflareErrorPage: !!quickDetection.hasErrorPage
1790
+ });
1668
1791
  }
1669
1792
 
1670
1793
  // Standard return structure for all processing paths
@@ -1698,7 +1821,7 @@ async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDeb
1698
1821
  const cachedOutcome = detectionCache.cache.get(outcomeCacheKey);
1699
1822
  if (cachedOutcome && cachedOutcome.data && cachedOutcome.data.timedOut && Date.now() - cachedOutcome.timestamp < detectionCache.ttl) {
1700
1823
  if (forceDebug) console.log(formatLogMessage('cloudflare', `Skipping ${currentUrl} -- domain already timed out on a previous URL`));
1701
- return cachedOutcome.data;
1824
+ return await logOutcome(cachedOutcome.data);
1702
1825
  }
1703
1826
  } catch (e) { /* malformed URL, proceed normally */ }
1704
1827
 
@@ -1725,17 +1848,17 @@ async function handleCloudflareProtection(page, currentUrl, siteConfig, forceDeb
1725
1848
  // Cache timeout results at domain level so subsequent URLs skip immediately
1726
1849
  if (handlingResult.timedOut) {
1727
1850
  try {
1728
- const setOutcomeKey = 'outcome:' + new URL(currentUrl).hostname;
1729
- detectionCache.cache.set(setOutcomeKey, { data: handlingResult, timestamp: Date.now() });
1851
+ const outcomeCacheKey = 'outcome:' + new URL(currentUrl).hostname;
1852
+ detectionCache.cache.set(outcomeCacheKey, { data: handlingResult, timestamp: Date.now() });
1730
1853
  } catch (e) { /* malformed URL, skip caching */ }
1731
1854
  }
1732
1855
 
1733
- return handlingResult;
1856
+ return await logOutcome(handlingResult);
1734
1857
  } catch (error) {
1735
1858
  result.overallSuccess = false;
1736
1859
  result.errors.push(`Cloudflare handling failed: ${error.message}`);
1737
1860
  if (forceDebug) console.log(formatLogMessage('cloudflare', `Overall handling failed: ${error.message}`));
1738
- return result;
1861
+ return await logOutcome(result);
1739
1862
  }
1740
1863
  }
1741
1864