@fanboynz/network-scanner 2.0.66 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/flowproxy.js CHANGED
@@ -10,10 +10,12 @@
10
10
  * - Requires specific handling for security scanning tools
11
11
  */
12
12
 
13
- /**
14
- * Module version information
15
- */
16
- const FLOWPROXY_MODULE_VERSION = '1.0.0';
13
+ const { formatLogMessage, messageColors } = require('./colorize');
14
+
15
+ // Precomputed colored '[flowproxy]' subsystem prefix. formatLogMessage only
16
+ // colors the [severity] tag; this constant colors the subsystem prefix so
17
+ // '[debug] [flowproxy] X' has both tags visually distinct.
18
+ const FLOWPROXY_TAG = messageColors.processing('[flowproxy]');
17
19
 
18
20
  /**
19
21
  * Timeout constants for FlowProxy operations (in milliseconds)
@@ -22,25 +24,173 @@ const FLOWPROXY_MODULE_VERSION = '1.0.0';
22
24
  const TIMEOUTS = {
23
25
  PAGE_EVALUATION_SAFE: 10000, // Safe page evaluation timeout
24
26
  // FlowProxy-specific timeouts
25
- PAGE_LOAD_WAIT: 2000, // Initial wait for page to load
26
27
  JS_CHALLENGE_DEFAULT: 15000, // Default JavaScript challenge timeout
27
28
  RATE_LIMIT_DEFAULT: 30000, // Default rate limit delay
28
- ADDITIONAL_DELAY_DEFAULT: 5000, // Default additional processing delay
29
29
  PAGE_TIMEOUT_DEFAULT: 45000, // Default page timeout
30
- NAVIGATION_TIMEOUT_DEFAULT: 45000, // Default navigation timeout
30
+ NAVIGATION_TIMEOUT_DEFAULT: 45000 // Default navigation timeout
31
31
  };
32
32
 
33
+ // Default-false detection shape returned by the catch paths in
34
+ // safePageEvaluate / analyzeFlowProxyProtection. Hoisted so the two
35
+ // catch branches don't drift if a new detection flag is added.
36
+ const DEFAULT_DETECTION = Object.freeze({
37
+ isFlowProxyDetected: false,
38
+ hasSpecificSignal: false,
39
+ hasFlowProxyDomain: false,
40
+ hasFlowProxyElements: false,
41
+ hasFlowProxyScripts: false,
42
+ hasFlowProxyBrandText: false,
43
+ hasFlowProxyHeaders: false,
44
+ hasFlowProxyCookies: false,
45
+ matchedHeader: null,
46
+ matchedCookie: null,
47
+ hasProtectionPage: false,
48
+ hasChallengeElements: false,
49
+ isRateLimited: false,
50
+ hasJSChallenge: false,
51
+ isProcessing: false
52
+ });
53
+
54
+ // === HTTP RESPONSE HEADER / COOKIE DETECTION =================================
55
+ // Per-page accumulator for vendor-specific HTTP response signals. Populated
56
+ // by the response listener attached via attachFlowProxyHeaderListener();
57
+ // read by analyzeFlowProxyProtection() to merge with the DOM/text scan.
58
+ //
59
+ // WeakMap so the entry is released when Puppeteer drops the page reference —
60
+ // no manual cleanup needed.
61
+ const pageHeaderState = new WeakMap();
62
+
63
+ // Header/cookie tokens that uniquely identify FlowProxy/Aurologic. Lowercase
64
+ // for case-insensitive matching (response.headers() returns lowercase keys
65
+ // but values keep their case).
66
+ const VENDOR_TOKENS = ['flowproxy', 'aurologic'];
67
+
68
+ // Header names where a vendor token in the VALUE is a strong signal.
69
+ // (Server, Via, X-Powered-By, X-Cache, X-CDN are all places a CDN/proxy
70
+ // commonly self-identifies.)
71
+ const VENDOR_VALUE_HEADERS = ['server', 'via', 'x-powered-by', 'x-cache', 'x-cdn'];
72
+
73
+ // Single source of truth for "is this cookie name vendor-namespaced?"
74
+ // Used both by the Set-Cookie listener (parsing header text) and the jar
75
+ // check in analyzeFlowProxyProtection (cookies()-API results), so the two
76
+ // sides can't disagree about what counts.
77
+ function isVendorCookieName(name) {
78
+ if (!name) return false;
79
+ const n = name.toLowerCase();
80
+ return n === 'flowproxy' || n === 'aurologic' ||
81
+ n.startsWith('flowproxy_') || n.startsWith('aurologic_') ||
82
+ n.startsWith('flowproxy-') || n.startsWith('aurologic-');
83
+ }
84
+
85
+ // Default-empty listener state. One source of truth for the shape, used
86
+ // at attach time (initial state) and at read time (fallback when the
87
+ // listener was never attached). Add a new signal field once, applied
88
+ // everywhere.
89
+ function emptyHeaderState() {
90
+ return {
91
+ hasFlowProxyHeaders: false,
92
+ hasFlowProxyCookies: false,
93
+ matchedHeader: null,
94
+ matchedCookie: null
95
+ };
96
+ }
97
+
98
+ /**
99
+ * Attach a response listener to a page that watches for FlowProxy/Aurologic
100
+ * HTTP response headers + cookies. Idempotent — safe to call multiple times.
101
+ *
102
+ * Headers are the most reliable signal: DOM scraping can be fooled by any
103
+ * "Please wait" / "Loading" string, but a `Server: flowProxy` header is
104
+ * uniquely the vendor's. Cookies likewise — `flowproxy_*` / `aurologic_*`
105
+ * names don't collide with anything else in practice.
106
+ *
107
+ * Call BEFORE page.goto() so the navigation response itself is observed.
108
+ * State is read later via analyzeFlowProxyProtection().
109
+ *
110
+ * @param {import('puppeteer').Page} page - Puppeteer page instance
111
+ */
112
+ function attachFlowProxyHeaderListener(page) {
113
+ if (pageHeaderState.has(page)) return; // idempotent
114
+
115
+ const state = emptyHeaderState();
116
+ pageHeaderState.set(page, state);
117
+
118
+ page.on('response', (response) => {
119
+ // Once both signals are found there's nothing more to learn — bail
120
+ // immediately to keep per-response overhead near zero on long pages.
121
+ if (state.hasFlowProxyHeaders && state.hasFlowProxyCookies) return;
122
+
123
+ try {
124
+ const headers = response.headers();
125
+ if (!headers) return;
126
+
127
+ // 1) Vendor-token search across the well-known value-bearing headers.
128
+ if (!state.hasFlowProxyHeaders) {
129
+ for (const h of VENDOR_VALUE_HEADERS) {
130
+ const v = headers[h];
131
+ if (!v) continue;
132
+ const vl = v.toLowerCase();
133
+ for (const tok of VENDOR_TOKENS) {
134
+ if (vl.includes(tok)) {
135
+ state.hasFlowProxyHeaders = true;
136
+ state.matchedHeader = `${h}: ${v}`;
137
+ break;
138
+ }
139
+ }
140
+ if (state.hasFlowProxyHeaders) break;
141
+ }
142
+ }
143
+
144
+ // 2) Any X-FlowProxy-* or X-Aurologic-* custom header name — those
145
+ // are vendor-namespaced by convention and don't collide.
146
+ if (!state.hasFlowProxyHeaders) {
147
+ for (const key of Object.keys(headers)) {
148
+ // key is already lowercase per Puppeteer's headers() contract
149
+ if (key.startsWith('x-flowproxy-') || key.startsWith('x-aurologic-')) {
150
+ state.hasFlowProxyHeaders = true;
151
+ state.matchedHeader = `${key}: ${headers[key]}`;
152
+ break;
153
+ }
154
+ }
155
+ }
156
+
157
+ // 3) Set-Cookie parsing — extract each cookie's NAME (substring
158
+ // before the first '=') and apply the shared vendor-name
159
+ // predicate. The old substring-on-value match could false-fire
160
+ // on names like `__flowproxy=` or `notaurologic_x=` that contain
161
+ // the token without being vendor cookies; the name-level check
162
+ // matches the jar inspection in analyzeFlowProxyProtection
163
+ // exactly. Puppeteer joins multi-Set-Cookie with '\n'.
164
+ if (!state.hasFlowProxyCookies) {
165
+ const setCookie = headers['set-cookie'];
166
+ if (setCookie) {
167
+ const lines = setCookie.split('\n');
168
+ for (let i = 0; i < lines.length; i++) {
169
+ const line = lines[i];
170
+ const eq = line.indexOf('=');
171
+ if (eq <= 0) continue;
172
+ const name = line.slice(0, eq).trim();
173
+ if (isVendorCookieName(name)) {
174
+ state.hasFlowProxyCookies = true;
175
+ state.matchedCookie = name;
176
+ break;
177
+ }
178
+ }
179
+ }
180
+ }
181
+ } catch (_) {
182
+ // Observation-only — never let a header read throw into Puppeteer's
183
+ // event-emitter chain.
184
+ }
185
+ });
186
+ }
187
+
33
188
  // Fast timeout constants - optimized for speed while respecting FlowProxy delays
34
189
  const FAST_TIMEOUTS = {
35
190
  PAGE_LOAD_WAIT: 1500, // Reduced from 2000ms
36
- ADDITIONAL_DELAY_DEFAULT: 3000, // Reduced from 5000ms
37
- FALLBACK_TIMEOUT: 3000 // Reduced from 5000ms
191
+ ADDITIONAL_DELAY_DEFAULT: 3000 // Reduced from 5000ms
38
192
  };
39
193
 
40
- /**
41
- * Gets module version information
42
- * @returns {object} Version information object
43
- */
44
194
  // Protocols to skip — FlowProxy only protects web traffic
45
195
  const SKIP_PATTERNS = [
46
196
  'about:', 'chrome:', 'chrome-extension:', 'chrome-error:', 'chrome-search:',
@@ -48,13 +198,6 @@ const SKIP_PATTERNS = [
48
198
  'data:', 'blob:', 'javascript:', 'vbscript:', 'file:', 'ftp:', 'ftps:'
49
199
  ];
50
200
 
51
- function getModuleInfo() {
52
- return {
53
- version: FLOWPROXY_MODULE_VERSION,
54
- name: 'FlowProxy Protection Handler'
55
- };
56
- }
57
-
58
201
  /**
59
202
  * Validates if a URL should be processed by FlowProxy protection
60
203
  * Only allows HTTP/HTTPS URLs, skips browser-internal and special protocols
@@ -75,7 +218,7 @@ function getModuleInfo() {
75
218
  */
76
219
  function shouldProcessUrl(url, forceDebug = false) {
77
220
  if (!url || typeof url !== 'string') {
78
- if (forceDebug) console.log(`[flowproxy][url-validation] Skipping invalid URL: ${url}`);
221
+ if (forceDebug) console.log(formatLogMessage('debug', `${FLOWPROXY_TAG}[url-validation] Skipping invalid URL: ${url}`));
79
222
  return false;
80
223
  }
81
224
 
@@ -84,7 +227,7 @@ function shouldProcessUrl(url, forceDebug = false) {
84
227
  for (const pattern of SKIP_PATTERNS) {
85
228
  if (urlLower.startsWith(pattern)) {
86
229
  if (forceDebug) {
87
- console.log(`[flowproxy][url-validation] Skipping ${pattern} URL: ${url.substring(0, 100)}${url.length > 100 ? '...' : ''}`);
230
+ console.log(formatLogMessage('debug', `${FLOWPROXY_TAG}[url-validation] Skipping ${pattern} URL: ${url.substring(0, 100)}${url.length > 100 ? '...' : ''}`));
88
231
  }
89
232
  return false;
90
233
  }
@@ -93,7 +236,7 @@ function shouldProcessUrl(url, forceDebug = false) {
93
236
  // Only process HTTP/HTTPS URLs - FlowProxy only protects web traffic
94
237
  if (!urlLower.startsWith('http://') && !urlLower.startsWith('https://')) {
95
238
  if (forceDebug) {
96
- console.log(`[flowproxy][url-validation] Skipping non-HTTP(S) URL: ${url.substring(0, 100)}${url.length > 100 ? '...' : ''}`);
239
+ console.log(formatLogMessage('debug', `${FLOWPROXY_TAG}[url-validation] Skipping non-HTTP(S) URL: ${url.substring(0, 100)}${url.length > 100 ? '...' : ''}`));
97
240
  }
98
241
  return false;
99
242
  }
@@ -118,19 +261,22 @@ async function waitForTimeout(page, timeout) {
118
261
  * Safe page evaluation with timeout protection for FlowProxy analysis
119
262
  */
120
263
  async function safePageEvaluate(page, func, timeout = TIMEOUTS.PAGE_EVALUATION_SAFE) {
264
+ let timer;
121
265
  try {
122
266
  return await Promise.race([
123
267
  page.evaluate(func),
124
- new Promise((_, reject) =>
125
- setTimeout(() => reject(new Error('FlowProxy page evaluation timeout')), timeout)
126
- )
268
+ new Promise((_, reject) => {
269
+ timer = setTimeout(() => reject(new Error('FlowProxy page evaluation timeout')), timeout);
270
+ })
127
271
  ]);
128
272
  } catch (error) {
129
- // Return safe defaults if evaluation fails
130
- return {
131
- isFlowProxyDetected: false,
132
- error: error.message
133
- };
273
+ // Return full default-false shape so downstream `.hasProtectionPage`
274
+ // etc. read as `false` instead of `undefined` — keeps debug logs
275
+ // honest and conditional branches in handleFlowProxyProtection
276
+ // deterministic.
277
+ return { ...DEFAULT_DETECTION, error: error.message };
278
+ } finally {
279
+ if (timer) clearTimeout(timer);
134
280
  }
135
281
  }
136
282
 
@@ -159,9 +305,10 @@ async function safePageEvaluate(page, func, timeout = TIMEOUTS.PAGE_EVALUATION_S
159
305
  */
160
306
  async function analyzeFlowProxyProtection(page) {
161
307
  try {
162
- // Get current page URL and validate it first
163
- const currentPageUrl = await page.url();
164
-
308
+ // Get current page URL and validate it first.
309
+ // page.url() is synchronous in Puppeteer 20+; no await needed.
310
+ const currentPageUrl = page.url();
311
+
165
312
  if (!shouldProcessUrl(currentPageUrl, false)) {
166
313
  return {
167
314
  isFlowProxyDetected: false,
@@ -170,73 +317,103 @@ async function analyzeFlowProxyProtection(page) {
170
317
  };
171
318
  }
172
319
 
320
+ // Pull HTTP-layer signals collected by the response listener (populated
321
+ // by attachFlowProxyHeaderListener if the caller wired it up before
322
+ // navigation). Falls back to empty-defaults if the listener was never
323
+ // attached, so DOM-only detection still works.
324
+ const httpState = pageHeaderState.get(page) || emptyHeaderState();
325
+
173
326
  // Continue with comprehensive FlowProxy detection for valid HTTP(S) URLs
174
- return await safePageEvaluate(page, () => {
327
+ const domResult = await safePageEvaluate(page, () => {
175
328
  const title = document.title || '';
176
329
  const bodyText = document.body ? document.body.textContent : '';
177
330
  const url = window.location.href;
178
-
179
- // Check for flowProxy/aurologic specific domain indicators
180
- // FlowProxy services often redirect to aurologic domains or use flowproxy subdomains
181
- const hasFlowProxyDomain = url.includes('aurologic') ||
182
- url.includes('flowproxy') ||
183
- url.includes('ddos-protection');
184
-
185
- // Check for flowProxy challenge page indicators
186
- // These are common titles and text patterns used by FlowProxy protection pages
187
- const hasProtectionPage = title.includes('DDoS Protection') ||
188
- title.includes('Please wait') ||
189
- title.includes('Checking your browser') ||
190
- bodyText.includes('DDoS protection by aurologic') ||
191
- bodyText.includes('flowProxy') ||
192
- bodyText.includes('Verifying your browser');
193
-
194
- // Check for specific flowProxy DOM elements
195
- // FlowProxy typically adds custom data attributes and CSS classes
331
+
332
+ // === VENDOR-SPECIFIC SIGNALS (high-confidence FlowProxy markers) ===
333
+ // Anything here is unambiguous: it names FlowProxy or its parent
334
+ // company Aurologic. At least ONE of these must be present for the
335
+ // primary detection to fire — generic loaders / Cloudflare's
336
+ // "Checking your browser" / SPA spinners alone do NOT count.
337
+
338
+ // URL signals note: 'ddos-protection' was moved out of this set;
339
+ // it's too broad (matches docs/blog URLs about DDoS protection).
340
+ const hasFlowProxyDomain = url.includes('aurologic') ||
341
+ url.includes('flowproxy');
342
+
343
+ // DOM signals tied to the vendor's class/id/data-attribute namespace
344
+ // or its uniquely named challenge input.
196
345
  const hasFlowProxyElements = document.querySelector('[data-flowproxy]') !== null ||
197
- document.querySelector('.flowproxy-challenge') !== null ||
198
- document.querySelector('#flowproxy-container') !== null ||
199
- document.querySelector('.aurologic-protection') !== null;
200
-
201
- // Check for challenge indicators
202
- // FlowProxy uses various elements to indicate active challenges
203
- const hasChallengeElements = document.querySelector('.challenge-running') !== null ||
204
- document.querySelector('.verification-container') !== null ||
205
- document.querySelector('input[name="flowproxy-response"]') !== null;
206
-
207
- // Check for rate limiting indicators
208
- // Rate limiting is a common FlowProxy feature that shows specific messages
346
+ document.querySelector('.flowproxy-challenge') !== null ||
347
+ document.querySelector('#flowproxy-container') !== null ||
348
+ document.querySelector('.aurologic-protection') !== null ||
349
+ document.querySelector('input[name="flowproxy-response"]') !== null;
350
+
351
+ // Script src patterns from the vendor.
352
+ const hasFlowProxyScripts = document.querySelector('script[src*="flowproxy"]') !== null ||
353
+ document.querySelector('script[src*="aurologic"]') !== null;
354
+
355
+ // Brand-name strings — "flowProxy" (cased) and the canonical
356
+ // Aurologic attribution line.
357
+ const hasFlowProxyBrandText = bodyText.includes('DDoS protection by aurologic') ||
358
+ bodyText.includes('flowProxy');
359
+
360
+ // DOM-side specific signals only. The Node caller below merges this
361
+ // with HTTP-header / cookie signals (which live outside the page
362
+ // context) to produce the final hasSpecificSignal.
363
+ const domSpecificSignal = hasFlowProxyDomain ||
364
+ hasFlowProxyElements ||
365
+ hasFlowProxyScripts ||
366
+ hasFlowProxyBrandText;
367
+
368
+ // === GENERIC SIGNALS (low-confidence; used for sub-handling only) ===
369
+ // These flags help the handler decide WHICH delay to apply once
370
+ // FlowProxy presence is already confirmed by a specific signal.
371
+ // They are NOT inputs to isFlowProxyDetected — by themselves they
372
+ // collide with Cloudflare, Sucuri, generic SPA loaders, etc.
373
+
374
+ // Generic protection-page text (kept for verification-step semantics
375
+ // and debug logging — exposed as `hasProtectionPage` for backward
376
+ // compat with the rest of the module).
377
+ const hasProtectionPage = hasFlowProxyBrandText ||
378
+ title.includes('DDoS Protection') ||
379
+ title.includes('Please wait') ||
380
+ title.includes('Checking your browser') ||
381
+ bodyText.includes('Verifying your browser') ||
382
+ url.includes('ddos-protection');
383
+
384
+ // Generic challenge-element markers (still exposed for the handler's
385
+ // sub-decisions; hasFlowProxyElements above is the strong subset).
386
+ const hasChallengeElements = hasFlowProxyElements ||
387
+ document.querySelector('.challenge-running') !== null ||
388
+ document.querySelector('.verification-container') !== null;
389
+
209
390
  const isRateLimited = bodyText.includes('Rate limited') ||
210
- bodyText.includes('Too many requests') ||
211
- bodyText.includes('Please try again later') ||
212
- title.includes('429') ||
213
- title.includes('Rate Limit');
214
-
215
- // Check for JavaScript challenge indicators
216
- // FlowProxy often requires JavaScript to be enabled and uses specific scripts
217
- const hasJSChallenge = document.querySelector('script[src*="flowproxy"]') !== null ||
218
- document.querySelector('script[src*="aurologic"]') !== null ||
219
- bodyText.includes('JavaScript is required') ||
220
- bodyText.includes('Please enable JavaScript');
221
-
222
- // Check for loading/processing indicators
223
- // FlowProxy shows these while performing browser verification
391
+ bodyText.includes('Too many requests') ||
392
+ bodyText.includes('Please try again later') ||
393
+ title.includes('429') ||
394
+ title.includes('Rate Limit');
395
+
396
+ // hasJSChallenge gates the wait-for-challenge-completion path. The
397
+ // vendor script-src patterns are strong; the JS-required strings are
398
+ // generic but only matter when hasSpecificSignal already gated us in.
399
+ const hasJSChallenge = hasFlowProxyScripts ||
400
+ bodyText.includes('JavaScript is required') ||
401
+ bodyText.includes('Please enable JavaScript');
402
+
224
403
  const isProcessing = bodyText.includes('Processing') ||
225
- bodyText.includes('Loading') ||
226
- document.querySelector('.loading-spinner') !== null ||
227
- document.querySelector('.processing-indicator') !== null;
228
-
229
- // Main detection logic - any of these primary indicators suggest FlowProxy presence
230
- const isFlowProxyDetected = hasFlowProxyDomain ||
231
- hasProtectionPage ||
232
- hasFlowProxyElements ||
233
- hasChallengeElements;
234
-
404
+ bodyText.includes('Loading') ||
405
+ document.querySelector('.loading-spinner') !== null ||
406
+ document.querySelector('.processing-indicator') !== null;
407
+
408
+ // The Node-side caller merges this with HTTP signals to compute
409
+ // the final hasSpecificSignal / isFlowProxyDetected.
235
410
  return {
236
- isFlowProxyDetected,
411
+ domSpecificSignal,
237
412
  hasFlowProxyDomain,
238
- hasProtectionPage,
239
413
  hasFlowProxyElements,
414
+ hasFlowProxyScripts,
415
+ hasFlowProxyBrandText,
416
+ hasProtectionPage,
240
417
  hasChallengeElements,
241
418
  isRateLimited,
242
419
  hasJSChallenge,
@@ -246,19 +423,61 @@ async function analyzeFlowProxyProtection(page) {
246
423
  bodySnippet: bodyText.substring(0, 200) // First 200 chars for debugging
247
424
  };
248
425
  });
249
- } catch (error) {
250
- // Return safe defaults if page evaluation fails
426
+
427
+ // Cookie-jar check: complements the Set-Cookie response-header listener
428
+ // by reading what's ACTUALLY persisted in the browser jar. Catches:
429
+ // - cookies set on prior visits (session-reuse scenarios)
430
+ // - cookies set via document.cookie = '...' from page JS
431
+ // - Set-Cookie emitted before the listener attached (defensive)
432
+ // Uses isVendorCookieName so the predicate matches the listener.
433
+ // Try/catch because page.cookies() throws on closed/detached pages.
434
+ let hasJarCookie = false;
435
+ let jarMatchedCookie = null;
436
+ try {
437
+ const cookies = await page.cookies();
438
+ if (Array.isArray(cookies)) {
439
+ for (let i = 0; i < cookies.length; i++) {
440
+ if (isVendorCookieName(cookies[i].name)) {
441
+ hasJarCookie = true;
442
+ jarMatchedCookie = cookies[i].name;
443
+ break;
444
+ }
445
+ }
446
+ }
447
+ } catch (_) {
448
+ // Observation-only — never fail detection because the jar read errored.
449
+ }
450
+
451
+ // Unified merge: works for both the success path AND the DOM-error
452
+ // path. On error, domResult is `{...DEFAULT_DETECTION, error}` so
453
+ // domSpecificSignal is undefined; isFlowProxyDetected still becomes
454
+ // true if HTTP signals fired. Previously the error path returned
455
+ // early before recomputing primary detection, silently dropping
456
+ // header/cookie evidence whenever the DOM eval errored.
457
+ const hasFlowProxyCookies = httpState.hasFlowProxyCookies || hasJarCookie;
458
+ const hasSpecificSignal = (domResult && domResult.domSpecificSignal) ||
459
+ httpState.hasFlowProxyHeaders ||
460
+ hasFlowProxyCookies;
461
+
251
462
  return {
252
- isFlowProxyDetected: false,
253
- hasFlowProxyDomain: false,
254
- hasProtectionPage: false,
255
- hasFlowProxyElements: false,
256
- hasChallengeElements: false,
257
- isRateLimited: false,
258
- hasJSChallenge: false,
259
- isProcessing: false,
260
- error: error.message
463
+ ...domResult, // includes .error on the safePageEvaluate failure path
464
+ hasFlowProxyHeaders: httpState.hasFlowProxyHeaders,
465
+ hasFlowProxyCookies,
466
+ matchedHeader: httpState.matchedHeader,
467
+ // Listener-captured name wins over jar name when both fire — the
468
+ // listener saw the cookie at the moment it was set, which is the
469
+ // more informative time-of-event for debug output.
470
+ matchedCookie: httpState.matchedCookie || jarMatchedCookie,
471
+ hasSpecificSignal,
472
+ // PRIMARY DETECTION: at least one vendor-specific signal across DOM
473
+ // OR HTTP layer. Headers are the most reliable signal; cookies
474
+ // close behind. DOM markers remain the fallback for sites where
475
+ // the listener wasn't wired up before navigation.
476
+ isFlowProxyDetected: hasSpecificSignal
261
477
  };
478
+ } catch (error) {
479
+ // Return safe defaults if page evaluation fails
480
+ return { ...DEFAULT_DETECTION, error: error.message };
262
481
  }
263
482
  }
264
483
 
@@ -314,7 +533,7 @@ async function handleFlowProxyProtection(page, currentUrl, siteConfig, forceDebu
314
533
  // FlowProxy only protects web traffic, so other protocols should be skipped
315
534
  if (!shouldProcessUrl(currentUrl, forceDebug)) {
316
535
  if (forceDebug) {
317
- console.log(`[debug][flowproxy] Skipping protection handling for non-HTTP(S) URL: ${currentUrl}`);
536
+ console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Skipping protection handling for non-HTTP(S) URL: ${currentUrl}`));
318
537
  }
319
538
  return {
320
539
  flowProxyDetection: { attempted: false, detected: false },
@@ -336,7 +555,7 @@ async function handleFlowProxyProtection(page, currentUrl, siteConfig, forceDebu
336
555
  };
337
556
 
338
557
  try {
339
- if (forceDebug) console.log(`[debug][flowproxy] Checking for flowProxy protection on ${currentUrl}`);
558
+ if (forceDebug) console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Checking for flowProxy protection on ${currentUrl}`));
340
559
 
341
560
  // Wait for initial page load before analyzing
342
561
  // FlowProxy protection pages need time to fully render their elements
@@ -355,15 +574,23 @@ async function handleFlowProxyProtection(page, currentUrl, siteConfig, forceDebu
355
574
  result.handlingResult.attempted = true;
356
575
 
357
576
  if (forceDebug) {
358
- console.log(`[debug][flowproxy] FlowProxy protection detected on ${currentUrl}:`);
359
- console.log(`[debug][flowproxy] Page Title: "${detectionInfo.title}"`);
360
- console.log(`[debug][flowproxy] Current URL: ${detectionInfo.url}`);
361
- console.log(`[debug][flowproxy] Has Protection Page: ${detectionInfo.hasProtectionPage}`);
362
- console.log(`[debug][flowproxy] Has Challenge Elements: ${detectionInfo.hasChallengeElements}`);
363
- console.log(`[debug][flowproxy] Is Rate Limited: ${detectionInfo.isRateLimited}`);
364
- console.log(`[debug][flowproxy] Has JS Challenge: ${detectionInfo.hasJSChallenge}`);
365
- console.log(`[debug][flowproxy] Is Processing: ${detectionInfo.isProcessing}`);
366
- console.log(`[debug][flowproxy] Body Snippet: "${detectionInfo.bodySnippet}"`);
577
+ console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} FlowProxy protection detected on ${currentUrl}:`));
578
+ console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Page Title: "${detectionInfo.title}"`));
579
+ console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Current URL: ${detectionInfo.url}`));
580
+ // Specific-signal breakdown — which vendor-specific marker(s) fired
581
+ console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Specific signals: domain=${detectionInfo.hasFlowProxyDomain} elements=${detectionInfo.hasFlowProxyElements} scripts=${detectionInfo.hasFlowProxyScripts} brandText=${detectionInfo.hasFlowProxyBrandText} headers=${detectionInfo.hasFlowProxyHeaders} cookies=${detectionInfo.hasFlowProxyCookies}`));
582
+ if (detectionInfo.matchedHeader) {
583
+ console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Matched header: ${detectionInfo.matchedHeader}`));
584
+ }
585
+ if (detectionInfo.matchedCookie) {
586
+ console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Matched cookie: ${detectionInfo.matchedCookie}`));
587
+ }
588
+ console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Has Protection Page: ${detectionInfo.hasProtectionPage}`));
589
+ console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Has Challenge Elements: ${detectionInfo.hasChallengeElements}`));
590
+ console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Is Rate Limited: ${detectionInfo.isRateLimited}`));
591
+ console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Has JS Challenge: ${detectionInfo.hasJSChallenge}`));
592
+ console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Is Processing: ${detectionInfo.isProcessing}`));
593
+ console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Body Snippet: "${detectionInfo.bodySnippet}"`));
367
594
  }
368
595
 
369
596
  // HANDLE RATE LIMITING - Highest priority as it blocks all requests
@@ -371,7 +598,7 @@ async function handleFlowProxyProtection(page, currentUrl, siteConfig, forceDebu
371
598
  if (detectionInfo.isRateLimited) {
372
599
  const rateLimitDelay = siteConfig.flowproxy_delay || TIMEOUTS.RATE_LIMIT_DEFAULT;
373
600
  result.warnings.push(`Rate limiting detected - implementing ${rateLimitDelay}ms delay`);
374
- if (forceDebug) console.log(`[debug][flowproxy] Rate limiting detected, waiting ${rateLimitDelay}ms`);
601
+ if (forceDebug) console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Rate limiting detected, waiting ${rateLimitDelay}ms`));
375
602
  await waitForTimeout(page, rateLimitDelay);
376
603
  }
377
604
 
@@ -379,40 +606,39 @@ async function handleFlowProxyProtection(page, currentUrl, siteConfig, forceDebu
379
606
  // FlowProxy uses JS challenges to verify browser legitimacy
380
607
  if (detectionInfo.hasJSChallenge || detectionInfo.isProcessing) {
381
608
  const jsWaitTime = siteConfig.flowproxy_js_timeout || TIMEOUTS.JS_CHALLENGE_DEFAULT;
382
- if (forceDebug) console.log(`[debug][flowproxy] JavaScript challenge detected, waiting up to ${jsWaitTime}ms for completion`);
609
+ if (forceDebug) console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} JavaScript challenge detected, waiting up to ${jsWaitTime}ms for completion`));
383
610
 
384
611
  try {
385
- // Wait for challenge completion indicators to disappear
386
- // These conditions indicate the JS challenge has finished
387
- await Promise.race([
388
- page.waitForFunction(
612
+ // Wait for challenge completion indicators to disappear.
613
+ // page.waitForFunction has its own { timeout } — the previous
614
+ // outer Promise.race added a setTimeout that fired 5s LATER,
615
+ // leaked its timer on the success path, and never won the race
616
+ // in practice. Dropped: waitForFunction's own timeout is the
617
+ // single source of truth.
618
+ await page.waitForFunction(
389
619
  () => {
390
620
  const bodyText = document.body ? document.body.textContent : '';
391
- return !bodyText.includes('Processing') &&
621
+ return !bodyText.includes('Processing') &&
392
622
  !bodyText.includes('Checking your browser') &&
393
623
  !bodyText.includes('Please wait') &&
394
624
  !document.querySelector('.loading-spinner') &&
395
625
  !document.querySelector('.processing-indicator');
396
626
  },
397
627
  { timeout: jsWaitTime }
398
- ),
399
- new Promise((_, reject) =>
400
- setTimeout(() => reject(new Error('JS challenge timeout')), jsWaitTime + 5000)
401
- )
402
- ]);
403
-
404
- if (forceDebug) console.log(`[debug][flowproxy] JavaScript challenge appears to have completed`);
628
+ );
629
+
630
+ if (forceDebug) console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} JavaScript challenge appears to have completed`));
405
631
  } catch (timeoutErr) {
406
632
  // Continue even if timeout occurs - some challenges may take longer
407
633
  result.warnings.push(`JavaScript challenge timeout after ${jsWaitTime}ms`);
408
- if (forceDebug) console.log(`[debug][flowproxy] JavaScript challenge timeout - continuing anyway`);
634
+ if (forceDebug) console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} JavaScript challenge timeout - continuing anyway`));
409
635
  }
410
636
  }
411
637
 
412
638
  // IMPLEMENT ADDITIONAL DELAY - Final step to ensure all processing completes
413
639
  // FlowProxy may need extra time even after challenges complete
414
640
  const additionalDelay = siteConfig.flowproxy_additional_delay || FAST_TIMEOUTS.ADDITIONAL_DELAY_DEFAULT;
415
- if (forceDebug) console.log(`[debug][flowproxy] Implementing additional ${additionalDelay}ms delay for flowProxy processing`);
641
+ if (forceDebug) console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Implementing additional ${additionalDelay}ms delay for flowProxy processing`));
416
642
  await waitForTimeout(page, additionalDelay);
417
643
 
418
644
  // VERIFY SUCCESSFUL BYPASS - Check if we're still on a protection page
@@ -420,17 +646,17 @@ async function handleFlowProxyProtection(page, currentUrl, siteConfig, forceDebu
420
646
  const finalCheck = await analyzeFlowProxyProtection(page);
421
647
  if (finalCheck.isFlowProxyDetected && finalCheck.hasProtectionPage) {
422
648
  result.warnings.push('Still on flowProxy protection page after handling attempts');
423
- if (forceDebug) console.log(`[debug][flowproxy] Warning: Still appears to be on protection page`);
649
+ if (forceDebug) console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Warning: Still appears to be on protection page`));
424
650
  // Don't mark as failure - protection page may persist but still allow access
425
651
  } else {
426
652
  result.handlingResult.success = true;
427
- if (forceDebug) console.log(`[debug][flowproxy] Successfully handled flowProxy protection for ${currentUrl}`);
653
+ if (forceDebug) console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Successfully handled flowProxy protection for ${currentUrl}`));
428
654
  }
429
655
 
430
656
  } else {
431
- // No FlowProxy protection detected - mark as successful (nothing to handle)
432
- if (forceDebug) console.log(`[debug][flowproxy] No flowProxy protection detected on ${currentUrl}`);
433
- result.overallSuccess = true;
657
+ // No FlowProxy protection detected nothing to handle.
658
+ // result.overallSuccess is already true from initialization.
659
+ if (forceDebug) console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} No flowProxy protection detected on ${currentUrl}`));
434
660
  }
435
661
 
436
662
  } catch (error) {
@@ -438,86 +664,63 @@ async function handleFlowProxyProtection(page, currentUrl, siteConfig, forceDebu
438
664
  result.errors.push(`FlowProxy handling error: ${error.message}`);
439
665
  result.overallSuccess = false;
440
666
  if (forceDebug) {
441
- console.log(`[debug][flowproxy] FlowProxy handling failed for ${currentUrl}:`);
442
- console.log(`[debug][flowproxy] Error: ${error.message}`);
443
- console.log(`[debug][flowproxy] Stack: ${error.stack}`);
667
+ console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} FlowProxy handling failed for ${currentUrl}:`));
668
+ console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Error: ${error.message}`));
669
+ console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Stack: ${error.stack}`));
444
670
  }
445
671
  }
446
672
 
447
673
  // LOG COMPREHENSIVE RESULTS for debugging and monitoring
448
674
  if (result.errors.length > 0 && forceDebug) {
449
- console.log(`[debug][flowproxy] FlowProxy handling completed with errors for ${currentUrl}:`);
675
+ console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} FlowProxy handling completed with errors for ${currentUrl}:`));
450
676
  result.errors.forEach(error => {
451
- console.log(`[debug][flowproxy] - ${error}`);
677
+ console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} - ${error}`));
452
678
  });
453
679
  } else if (result.warnings.length > 0 && forceDebug) {
454
- console.log(`[debug][flowproxy] FlowProxy handling completed with warnings for ${currentUrl}:`);
680
+ console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} FlowProxy handling completed with warnings for ${currentUrl}:`));
455
681
  result.warnings.forEach(warning => {
456
- console.log(`[debug][flowproxy] - ${warning}`);
682
+ console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} - ${warning}`));
457
683
  });
458
684
  } else if (result.flowProxyDetection.attempted && forceDebug) {
459
- console.log(`[debug][flowproxy] FlowProxy handling completed successfully for ${currentUrl}`);
685
+ console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} FlowProxy handling completed successfully for ${currentUrl}`));
460
686
  }
461
687
 
462
688
  return result;
463
689
  }
464
690
 
465
691
  /**
466
- * Quick check to determine if the current page might be behind flowProxy protection
467
- * This is a lightweight alternative to full analysis for simple detection needs
468
- *
469
- * @param {import('puppeteer').Page} page - Puppeteer page instance
470
- * @returns {Promise<boolean>} True if flowProxy protection is suspected
471
- *
692
+ * Gets page-level timeout values for flowProxy-protected sites. Used by
693
+ * nwss.js to call page.setDefaultTimeout/setDefaultNavigationTimeout
694
+ * before navigating. The handler itself reads challenge/rate-limit/
695
+ * additional-delay values directly from siteConfig (with TIMEOUTS
696
+ * fallbacks), so those don't need to round-trip through this function.
697
+ *
698
+ * @param {object} siteConfig - Site configuration object
699
+ * @returns {{ pageTimeout: number, navigationTimeout: number }}
700
+ *
472
701
  * @example
473
- * if (await isFlowProxyProtected(page)) {
474
- * console.log('FlowProxy protection detected - implementing handling');
475
- * await handleFlowProxyProtection(page, url, config);
476
- * }
477
- */
478
- async function isFlowProxyProtected(page) {
479
- try {
480
- const detection = await analyzeFlowProxyProtection(page);
481
- return detection.isFlowProxyDetected;
482
- } catch (error) {
483
- // Return false if detection fails - assume no protection
484
- return false;
485
- }
486
- }
487
-
488
- /**
489
- * Gets recommended timeout values for flowProxy protected sites
490
- * Provides sensible defaults while allowing site-specific customization
491
- *
492
- * @param {object} siteConfig - Site configuration object with optional FlowProxy settings
493
- * @returns {object} Recommended timeout values for FlowProxy handling
494
- *
495
- * @example
496
- * const timeouts = getFlowProxyTimeouts({
497
- * flowproxy_delay: 60000, // Custom rate limit delay
498
- * flowproxy_js_timeout: 25000 // Custom JS challenge timeout
499
- * });
500
- *
501
- * // Use timeouts in page operations
502
- * await page.goto(url, { timeout: timeouts.pageTimeout });
702
+ * const { pageTimeout, navigationTimeout } = getFlowProxyTimeouts(siteConfig);
703
+ * page.setDefaultTimeout(pageTimeout);
704
+ * page.setDefaultNavigationTimeout(navigationTimeout);
503
705
  */
504
706
  function getFlowProxyTimeouts(siteConfig) {
505
707
  return {
506
708
  pageTimeout: siteConfig.flowproxy_page_timeout || TIMEOUTS.PAGE_TIMEOUT_DEFAULT,
507
- navigationTimeout: siteConfig.flowproxy_nav_timeout || TIMEOUTS.NAVIGATION_TIMEOUT_DEFAULT,
508
- challengeTimeout: siteConfig.flowproxy_js_timeout || TIMEOUTS.JS_CHALLENGE_DEFAULT,
509
- rateLimit: siteConfig.flowproxy_delay || TIMEOUTS.RATE_LIMIT_DEFAULT,
510
- additionalDelay: siteConfig.flowproxy_additional_delay || TIMEOUTS.ADDITIONAL_DELAY_DEFAULT
709
+ navigationTimeout: siteConfig.flowproxy_nav_timeout || TIMEOUTS.NAVIGATION_TIMEOUT_DEFAULT
511
710
  };
512
711
  }
513
712
 
514
- // Export all public functions for use in other modules
713
+ // Public surface used by nwss.js. Internal helpers (waitForTimeout,
714
+ // safePageEvaluate, analyzeFlowProxyProtection, shouldProcessUrl) stay
715
+ // module-private — the old export list included several functions no
716
+ // caller imported.
717
+ //
718
+ // attachFlowProxyHeaderListener should be called by the caller BEFORE
719
+ // navigation so the response listener observes the document response's
720
+ // own headers. Without it, header/cookie detection silently no-ops and
721
+ // the module falls back to DOM-only detection.
515
722
  module.exports = {
516
- analyzeFlowProxyProtection,
517
723
  handleFlowProxyProtection,
518
- isFlowProxyProtected,
519
724
  getFlowProxyTimeouts,
520
- waitForTimeout,
521
- getModuleInfo,
522
- FLOWPROXY_MODULE_VERSION
725
+ attachFlowProxyHeaderListener
523
726
  };