@fanboynz/network-scanner 1.0.71 → 1.0.73

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/lib/cloudflare.js +314 -34
  2. package/nwss.js +114 -23
  3. package/package.json +1 -1
package/lib/cloudflare.js CHANGED
@@ -1,13 +1,13 @@
1
1
  /**
2
2
  * Cloudflare bypass and challenge handling module - Optimized with smart detection and adaptive timeouts
3
- * Version: 2.1.0 - Enhanced with quick detection, adaptive timeouts, and comprehensive debug logging
3
+ * Version: 2.2.0 - Enhanced with retry logic, caching, and improved error handling
4
4
  * Handles phishing warnings, Turnstile challenges, and modern Cloudflare protections
5
5
  */
6
6
 
7
7
  /**
8
8
  * Module version information
9
9
  */
10
- const CLOUDFLARE_MODULE_VERSION = '2.1.0';
10
+ const CLOUDFLARE_MODULE_VERSION = '2.2.0';
11
11
 
12
12
  /**
13
13
  * Timeout constants for various operations (in milliseconds)
@@ -29,7 +29,12 @@ const TIMEOUTS = {
29
29
  ADAPTIVE_TIMEOUT_WITH_INDICATORS: 25000, // Adaptive timeout when indicators found + explicit config
30
30
  ADAPTIVE_TIMEOUT_WITHOUT_INDICATORS: 20000, // Adaptive timeout with explicit config only
31
31
  ADAPTIVE_TIMEOUT_AUTO_WITH_INDICATORS: 15000, // Adaptive timeout for auto-detected with indicators
32
- ADAPTIVE_TIMEOUT_AUTO_WITHOUT_INDICATORS: 10000 // Adaptive timeout for auto-detected without indicators
32
+ ADAPTIVE_TIMEOUT_AUTO_WITHOUT_INDICATORS: 10000, // Adaptive timeout for auto-detected without indicators
33
+ // New timeouts for enhanced functionality
34
+ RETRY_DELAY: 1000, // Delay between retry attempts
35
+ MAX_RETRIES: 3, // Maximum retry attempts for operations
36
+ CHALLENGE_POLL_INTERVAL: 500, // Interval for polling challenge completion
37
+ CHALLENGE_MAX_POLLS: 20 // Maximum polling attempts
33
38
  };
34
39
 
35
40
  // Fast timeout constants - optimized for speed
@@ -45,6 +50,101 @@ const FAST_TIMEOUTS = {
45
50
  CHALLENGE_COMPLETION: 3000 // Fast completion check
46
51
  };
47
52
 
53
+ /**
54
+ * Error categories for better handling
55
+ */
56
+ const ERROR_TYPES = {
57
+ NETWORK: 'network',
58
+ TIMEOUT: 'timeout',
59
+ ELEMENT_NOT_FOUND: 'element_not_found',
60
+ EVALUATION_FAILED: 'evaluation_failed',
61
+ NAVIGATION_FAILED: 'navigation_failed',
62
+ UNKNOWN: 'unknown'
63
+ };
64
+
65
+ /**
66
+ * Retry configuration with exponential backoff
67
+ */
68
+ const RETRY_CONFIG = {
69
+ maxAttempts: 3,
70
+ baseDelay: 1000,
71
+ maxDelay: 8000,
72
+ backoffMultiplier: 2,
73
+ retryableErrors: [ERROR_TYPES.NETWORK, ERROR_TYPES.TIMEOUT, ERROR_TYPES.ELEMENT_NOT_FOUND]
74
+ };
75
+
76
+ /**
77
+ * Performance cache for detection results
78
+ * Stores detection results per domain to avoid redundant checks
79
+ */
80
+ class CloudflareDetectionCache {
81
+ constructor(ttl = 300000) { // 5 minutes TTL by default
82
+ this.cache = new Map();
83
+ this.ttl = ttl;
84
+ this.hits = 0;
85
+ this.misses = 0;
86
+ }
87
+
88
+ getCacheKey(url) {
89
+ try {
90
+ const urlObj = new URL(url);
91
+ return `${urlObj.hostname}${urlObj.pathname}`;
92
+ } catch {
93
+ return url;
94
+ }
95
+ }
96
+
97
+ get(url) {
98
+ const key = this.getCacheKey(url);
99
+ const cached = this.cache.get(key);
100
+
101
+ if (cached && Date.now() - cached.timestamp < this.ttl) {
102
+ this.hits++;
103
+ return cached.data;
104
+ }
105
+
106
+ if (cached) {
107
+ this.cache.delete(key); // Remove expired entry
108
+ }
109
+
110
+ this.misses++;
111
+ return null;
112
+ }
113
+
114
+ set(url, data) {
115
+ const key = this.getCacheKey(url);
116
+ this.cache.set(key, {
117
+ data,
118
+ timestamp: Date.now()
119
+ });
120
+
121
+ // Prevent cache from growing too large
122
+ if (this.cache.size > 1000) {
123
+ const firstKey = this.cache.keys().next().value;
124
+ this.cache.delete(firstKey);
125
+ }
126
+ }
127
+
128
+ clear() {
129
+ this.cache.clear();
130
+ this.hits = 0;
131
+ this.misses = 0;
132
+ }
133
+
134
+ getStats() {
135
+ const total = this.hits + this.misses;
136
+ return {
137
+ hits: this.hits,
138
+ misses: this.misses,
139
+ hitRate: total > 0 ? (this.hits / total * 100).toFixed(2) + '%' : '0%',
140
+ size: this.cache.size
141
+ };
142
+ }
143
+ }
144
+
145
+ // Initialize cache singleton
146
+ const detectionCache = new CloudflareDetectionCache();
147
+
48
148
  /**
49
149
  * Gets module version information
50
150
  * @returns {object} Version information object
@@ -108,27 +208,96 @@ async function waitForTimeout(page, timeout) {
108
208
  }
109
209
 
110
210
  /**
111
- * Safe page evaluation with timeout protection
211
+ * Categorizes errors for better handling
112
212
  */
113
- async function safePageEvaluate(page, func, timeout = TIMEOUTS.PAGE_EVALUATION_SAFE) {
114
- try {
115
- return await Promise.race([
116
- page.evaluate(func),
117
- new Promise((_, reject) =>
118
- setTimeout(() => reject(new Error('Page evaluation timeout')), timeout)
119
- )
120
- ]);
121
- } catch (error) {
122
- console.warn(`[cloudflare] Page evaluation failed: ${error.message}`);
123
- return {
124
- isChallengePresent: false,
125
- isPhishingWarning: false,
126
- isTurnstile: false,
127
- isJSChallenge: false,
128
- isChallengeCompleted: false,
129
- error: error.message
130
- };
213
+ function categorizeError(error) {
214
+ const errorMessage = error.message || '';
215
+
216
+ if (errorMessage.includes('timeout') || errorMessage.includes('Timeout')) {
217
+ return ERROR_TYPES.TIMEOUT;
131
218
  }
219
+ if (errorMessage.includes('Protocol error') || errorMessage.includes('Target closed')) {
220
+ return ERROR_TYPES.NETWORK;
221
+ }
222
+ if (errorMessage.includes('evaluation') || errorMessage.includes('Evaluation')) {
223
+ return ERROR_TYPES.EVALUATION_FAILED;
224
+ }
225
+ if (errorMessage.includes('navigation') || errorMessage.includes('Navigation')) {
226
+ return ERROR_TYPES.NAVIGATION_FAILED;
227
+ }
228
+
229
+ return ERROR_TYPES.UNKNOWN;
230
+ }
231
+
232
+ /**
233
+ * Implements exponential backoff delay
234
+ */
235
+ async function getRetryDelay(attempt) {
236
+ const delay = Math.min(
237
+ RETRY_CONFIG.baseDelay * Math.pow(RETRY_CONFIG.backoffMultiplier, attempt - 1),
238
+ RETRY_CONFIG.maxDelay
239
+ );
240
+ return new Promise(resolve => setTimeout(resolve, delay));
241
+ }
242
+
243
+ /**
244
+ * Enhanced safe page evaluation with retry logic and better error handling
245
+ */
246
+ async function safePageEvaluate(page, func, timeout = TIMEOUTS.PAGE_EVALUATION_SAFE, options = {}) {
247
+ const { maxRetries = RETRY_CONFIG.maxAttempts, forceDebug = false } = options;
248
+ let lastError = null;
249
+
250
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
251
+ try {
252
+ const result = await Promise.race([
253
+ page.evaluate(func),
254
+ new Promise((_, reject) =>
255
+ setTimeout(() => reject(new Error('Page evaluation timeout')), timeout)
256
+ )
257
+ ]);
258
+
259
+ if (forceDebug && attempt > 1) {
260
+ console.log(`[cloudflare] Page evaluation succeeded on attempt ${attempt}`);
261
+ }
262
+
263
+ return result;
264
+ } catch (error) {
265
+ lastError = error;
266
+ const errorType = categorizeError(error);
267
+
268
+ if (forceDebug) {
269
+ console.warn(`[cloudflare] Page evaluation failed (attempt ${attempt}/${maxRetries}): ${error.message} [${errorType}]`);
270
+ }
271
+
272
+ // Don't retry if error type is not retryable or if it's the last attempt
273
+ if (!RETRY_CONFIG.retryableErrors.includes(errorType) || attempt === maxRetries) {
274
+ return {
275
+ isChallengePresent: false,
276
+ isPhishingWarning: false,
277
+ isTurnstile: false,
278
+ isJSChallenge: false,
279
+ isChallengeCompleted: false,
280
+ error: error.message,
281
+ errorType: errorType,
282
+ attempts: attempt
283
+ };
284
+ }
285
+
286
+ // Wait before retrying with exponential backoff
287
+ await getRetryDelay(attempt);
288
+ }
289
+ }
290
+
291
+ return {
292
+ isChallengePresent: false,
293
+ isPhishingWarning: false,
294
+ isTurnstile: false,
295
+ isJSChallenge: false,
296
+ isChallengeCompleted: false,
297
+ error: lastError?.message || 'Unknown error',
298
+ errorType: categorizeError(lastError),
299
+ attempts: maxRetries
300
+ };
132
301
  }
133
302
 
134
303
  /**
@@ -165,7 +334,7 @@ async function safeWaitForNavigation(page, timeout = TIMEOUTS.NAVIGATION_TIMEOUT
165
334
  }
166
335
 
167
336
  /**
168
- * Quick Cloudflare detection - faster initial check to avoid unnecessary waiting
337
+ * Quick Cloudflare detection with caching for performance
169
338
  */
170
339
  async function quickCloudflareDetection(page, forceDebug = false) {
171
340
  try {
@@ -179,23 +348,34 @@ async function quickCloudflareDetection(page, forceDebug = false) {
179
348
  return { hasIndicators: false, skippedInvalidUrl: true };
180
349
  }
181
350
 
182
- // Continue with existing detection logic only for valid HTTP(S) URLs
183
-
351
+ // Check cache first
352
+ const cachedResult = detectionCache.get(currentPageUrl);
353
+ if (cachedResult !== null) {
354
+ if (forceDebug) {
355
+ const stats = detectionCache.getStats();
356
+ console.log(`[debug][cloudflare] Using cached detection result (cache hit rate: ${stats.hitRate})`);
357
+ }
358
+ return cachedResult;
359
+ }
360
+
361
+ // Perform actual detection with enhanced error handling
184
362
  const quickCheck = await safePageEvaluate(page, () => {
185
363
  const title = document.title || '';
186
364
  const bodyText = document.body ? document.body.textContent.substring(0, 500) : '';
187
365
  const url = window.location.href;
188
366
 
189
- // Quick indicators of Cloudflare presence
367
+ // Enhanced indicators with 2025 patterns
190
368
  const hasCloudflareIndicators =
191
369
  title.includes('Just a moment') ||
192
370
  title.includes('Checking your browser') ||
193
371
  title.includes('Attention Required') ||
372
+ title.includes('Security check') || // New pattern
194
373
  bodyText.includes('Cloudflare') ||
195
374
  bodyText.includes('cf-ray') ||
196
375
  bodyText.includes('Verify you are human') ||
197
376
  bodyText.includes('This website has been reported for potential phishing') ||
198
377
  bodyText.includes('Please wait while we verify') ||
378
+ bodyText.includes('Checking if the site connection is secure') || // New pattern
199
379
  url.includes('/cdn-cgi/challenge-platform/') ||
200
380
  url.includes('cloudflare.com') ||
201
381
  document.querySelector('[data-ray]') ||
@@ -207,7 +387,10 @@ async function quickCloudflareDetection(page, forceDebug = false) {
207
387
  document.querySelector('iframe[src*="challenges.cloudflare.com"]') ||
208
388
  document.querySelector('iframe[title*="Cloudflare security challenge"]') ||
209
389
  document.querySelector('script[src*="/cdn-cgi/challenge-platform/"]') ||
210
- document.querySelector('a[href*="continue"]');
390
+ document.querySelector('a[href*="continue"]') ||
391
+ // New selectors for 2025
392
+ document.querySelector('.cf-managed-challenge') ||
393
+ document.querySelector('[data-cf-managed]');
211
394
 
212
395
  return {
213
396
  hasIndicators: hasCloudflareIndicators,
@@ -215,12 +398,21 @@ async function quickCloudflareDetection(page, forceDebug = false) {
215
398
  url,
216
399
  bodySnippet: bodyText.substring(0, 200)
217
400
  };
218
- }, FAST_TIMEOUTS.QUICK_DETECTION);
401
+ }, FAST_TIMEOUTS.QUICK_DETECTION, { maxRetries: 2, forceDebug });
402
+
403
+ // Cache the result
404
+ detectionCache.set(currentPageUrl, quickCheck);
219
405
 
220
- if (forceDebug && quickCheck.hasIndicators) {
221
- console.log(`[debug][cloudflare] Quick detection found Cloudflare indicators on ${quickCheck.url}`);
222
- } else if (forceDebug && !quickCheck.hasIndicators) {
223
- console.log(`[debug][cloudflare] Quick detection found no Cloudflare indicators on ${quickCheck.url}`);
406
+ if (forceDebug) {
407
+ if (quickCheck.hasIndicators) {
408
+ console.log(`[debug][cloudflare] Quick detection found Cloudflare indicators on ${quickCheck.url}`);
409
+ } else {
410
+ console.log(`[debug][cloudflare] Quick detection found no Cloudflare indicators on ${quickCheck.url}`);
411
+ }
412
+
413
+ if (quickCheck.attempts && quickCheck.attempts > 1) {
414
+ console.log(`[debug][cloudflare] Detection required ${quickCheck.attempts} attempts`);
415
+ }
224
416
  }
225
417
 
226
418
  return quickCheck;
@@ -1017,6 +1209,87 @@ async function performCloudflareHandling(page, currentUrl, siteConfig, forceDebu
1017
1209
  return result;
1018
1210
  }
1019
1211
 
1212
+ /**
1213
+ * Performs parallel detection of multiple challenge types for better performance
1214
+ */
1215
+ async function parallelChallengeDetection(page, forceDebug = false) {
1216
+ const detectionPromises = [];
1217
+
1218
+ // Check for JS challenge
1219
+ detectionPromises.push(
1220
+ page.evaluate(() => {
1221
+ return {
1222
+ type: 'js',
1223
+ detected: document.querySelector('script[src*="/cdn-cgi/challenge-platform/"]') !== null ||
1224
+ document.body?.textContent?.includes('Checking your browser') ||
1225
+ document.body?.textContent?.includes('Please wait while we verify')
1226
+ };
1227
+ }).catch(err => ({ type: 'js', detected: false, error: err.message }))
1228
+ );
1229
+
1230
+ // Check for Turnstile
1231
+ detectionPromises.push(
1232
+ page.evaluate(() => {
1233
+ return {
1234
+ type: 'turnstile',
1235
+ detected: document.querySelector('.cf-turnstile') !== null ||
1236
+ document.querySelector('iframe[src*="challenges.cloudflare.com"]') !== null ||
1237
+ document.querySelector('.ctp-checkbox-container') !== null
1238
+ };
1239
+ }).catch(err => ({ type: 'turnstile', detected: false, error: err.message }))
1240
+ );
1241
+
1242
+ // Check for phishing warning
1243
+ detectionPromises.push(
1244
+ page.evaluate(() => {
1245
+ return {
1246
+ type: 'phishing',
1247
+ detected: document.body?.textContent?.includes('This website has been reported for potential phishing') ||
1248
+ document.querySelector('a[href*="continue"]') !== null
1249
+ };
1250
+ }).catch(err => ({ type: 'phishing', detected: false, error: err.message }))
1251
+ );
1252
+
1253
+ // Check for managed challenge
1254
+ detectionPromises.push(
1255
+ page.evaluate(() => {
1256
+ return {
1257
+ type: 'managed',
1258
+ detected: document.querySelector('.cf-managed-challenge') !== null ||
1259
+ document.querySelector('[data-cf-managed]') !== null
1260
+ };
1261
+ }).catch(err => ({ type: 'managed', detected: false, error: err.message }))
1262
+ );
1263
+
1264
+ const results = await Promise.all(detectionPromises);
1265
+
1266
+ const detectedChallenges = results.filter(r => r.detected).map(r => r.type);
1267
+
1268
+ if (forceDebug && detectedChallenges.length > 0) {
1269
+ console.log(`[debug][cloudflare] Parallel detection found challenges: ${detectedChallenges.join(', ')}`);
1270
+ }
1271
+
1272
+ return {
1273
+ challenges: detectedChallenges,
1274
+ hasAnyChallenge: detectedChallenges.length > 0,
1275
+ details: results
1276
+ };
1277
+ }
1278
+
1279
+ /**
1280
+ * Gets cache statistics for performance monitoring
1281
+ */
1282
+ function getCacheStats() {
1283
+ return detectionCache.getStats();
1284
+ }
1285
+
1286
+ /**
1287
+ * Clears the detection cache
1288
+ */
1289
+ function clearDetectionCache() {
1290
+ detectionCache.clear();
1291
+ }
1292
+
1020
1293
  module.exports = {
1021
1294
  analyzeCloudflareChallenge,
1022
1295
  handlePhishingWarning,
@@ -1029,5 +1302,12 @@ module.exports = {
1029
1302
  checkChallengeCompletion,
1030
1303
  quickCloudflareDetection,
1031
1304
  getModuleInfo,
1032
- CLOUDFLARE_MODULE_VERSION
1033
- };
1305
+ CLOUDFLARE_MODULE_VERSION,
1306
+ // New exports
1307
+ parallelChallengeDetection,
1308
+ getCacheStats,
1309
+ clearDetectionCache,
1310
+ categorizeError,
1311
+ ERROR_TYPES,
1312
+ RETRY_CONFIG
1313
+ };
package/nwss.js CHANGED
@@ -1,4 +1,4 @@
1
- // === Network scanner script (nwss.js) v1.0.69 ===
1
+ // === Network scanner script (nwss.js) v1.0.73 ===
2
2
 
3
3
  // puppeteer for browser automation, fs for file system operations, psl for domain parsing.
4
4
  // const pLimit = require('p-limit'); // Will be dynamically imported
@@ -14,7 +14,12 @@ const { formatRules, handleOutput, getFormatDescription } = require('./lib/outpu
14
14
  // Rule validation
15
15
  const { validateRulesetFile, validateFullConfig, testDomainValidation, cleanRulesetFile } = require('./lib/validate_rules');
16
16
  // CF Bypass
17
- const { handleCloudflareProtection } = require('./lib/cloudflare');
17
+ const {
18
+ handleCloudflareProtection,
19
+ getCacheStats,
20
+ clearDetectionCache,
21
+ parallelChallengeDetection
22
+ } = require('./lib/cloudflare');
18
23
  // FP Bypass
19
24
  const { handleFlowProxyProtection, getFlowProxyTimeouts } = require('./lib/flowproxy');
20
25
  // ignore_similar rules
@@ -83,7 +88,7 @@ function detectPuppeteerVersion() {
83
88
  try {
84
89
  const puppeteer = require('puppeteer');
85
90
  let versionString = null;
86
-
91
+
87
92
  // Try multiple methods to get version
88
93
  if (puppeteer.version) {
89
94
  versionString = puppeteer.version;
@@ -118,7 +123,7 @@ const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/r
118
123
  const { monitorBrowserHealth, isBrowserHealthy } = require('./lib/browserhealth');
119
124
 
120
125
  // --- Script Configuration & Constants ---
121
- const VERSION = '1.0.69'; // Script version
126
+ const VERSION = '1.0.73'; // Script version
122
127
 
123
128
  // get startTime
124
129
  const startTime = Date.now();
@@ -311,6 +316,10 @@ if (clearCache && !dryRunMode) {
311
316
  forceDebug,
312
317
  cachePath: CACHE_LIMITS.DEFAULT_CACHE_PATH // Default path, will be updated after config loads if needed
313
318
  });
319
+
320
+ // Also clear Cloudflare detection cache
321
+ clearDetectionCache();
322
+ if (forceDebug) console.log(formatLogMessage('debug', 'Cleared Cloudflare detection cache'));
314
323
  }
315
324
 
316
325
  // Handle validation-only operations before main help
@@ -465,6 +474,8 @@ Global config.json options:
465
474
  Per-site config.json options:
466
475
  url: "site" or ["site1", "site2"] Single URL or list of URLs
467
476
  filterRegex: "regex" or ["regex1", "regex2"] Patterns to match requests
477
+ regex_and: true/false Use AND logic for multiple filterRegex patterns (default: false)
478
+ When true, ALL regex patterns must match the same URL
468
479
 
469
480
  Redirect Handling Options:
470
481
  follow_redirects: true/false Follow redirects to new domains (default: true)
@@ -517,6 +528,10 @@ Redirect Handling Options:
517
528
  Cloudflare Protection Options:
518
529
  cloudflare_phish: true/false Auto-click through Cloudflare phishing warnings (default: false)
519
530
  cloudflare_bypass: true/false Auto-solve Cloudflare "Verify you are human" challenges (default: false)
531
+ cloudflare_parallel_detection: true/false Use parallel detection for faster Cloudflare checks (default: true)
532
+ cloudflare_max_retries: <number> Maximum retry attempts for Cloudflare operations (default: 3)
533
+ cloudflare_cache_ttl: <milliseconds> TTL for Cloudflare detection cache (default: 300000 - 5 minutes)
534
+ cloudflare_retry_on_error: true/false Enable retry logic for Cloudflare operations (default: true)
520
535
 
521
536
  FlowProxy Protection Options:
522
537
  flowproxy_detection: true/false Enable flowProxy protection detection and handling (default: false)
@@ -1633,6 +1648,9 @@ function setupFrameHandling(page, forceDebug) {
1633
1648
  ? [new RegExp(siteConfig.filterRegex.replace(/^\/(.*)\/$/, '$1'))]
1634
1649
  : [];
1635
1650
 
1651
+ // NEW: Get regex_and setting (defaults to false for backward compatibility)
1652
+ const useRegexAnd = siteConfig.regex_and === true;
1653
+
1636
1654
  // Parse searchstring patterns using module
1637
1655
  const { searchStrings, searchStringsAnd, hasSearchString, hasSearchStringAnd } = parseSearchStrings(siteConfig.searchstring, siteConfig.searchstring_and);
1638
1656
  const useCurl = siteConfig.curl === true; // Use curl if enabled, regardless of searchstring
@@ -1708,6 +1726,11 @@ function setupFrameHandling(page, forceDebug) {
1708
1726
  patterns.forEach((pattern, idx) => {
1709
1727
  console.log(` [${idx + 1}] ${pattern}`);
1710
1728
  });
1729
+ if (useRegexAnd && patterns.length > 1) {
1730
+ console.log(formatLogMessage('info', ` Logic: AND (all patterns must match same URL)`));
1731
+ } else if (patterns.length > 1) {
1732
+ console.log(formatLogMessage('info', ` Logic: OR (any pattern can match)`));
1733
+ }
1711
1734
  }
1712
1735
 
1713
1736
  if (siteConfig.verbose === 1 && (hasSearchString || hasSearchStringAnd)) {
@@ -1974,7 +1997,7 @@ function setupFrameHandling(page, forceDebug) {
1974
1997
  if (!allowedResourceTypes || !Array.isArray(allowedResourceTypes) || allowedResourceTypes.includes(resourceType)) {
1975
1998
  if (dryRunMode) {
1976
1999
  matchedDomains.get('dryRunMatches').push({
1977
- regex: re.source,
2000
+ regex: matchedRegexPattern,
1978
2001
  domain: reqDomain,
1979
2002
  resourceType: resourceType,
1980
2003
  fullUrl: reqUrl,
@@ -1988,7 +2011,7 @@ function setupFrameHandling(page, forceDebug) {
1988
2011
  const simplifiedUrl = getRootDomain(currentUrl);
1989
2012
  if (siteConfig.verbose === 1) {
1990
2013
  const resourceInfo = (adblockRulesMode || siteConfig.adblock_rules) ? ` (${resourceType})` : '';
1991
- console.log(formatLogMessage('match', `[${simplifiedUrl}] ${reqUrl} matched regex: ${re} and resourceType: ${resourceType}${resourceInfo} [BLOCKED BUT ADDED]`));
2014
+ console.log(formatLogMessage('match', `[${simplifiedUrl}] ${reqUrl} matched regex: ${matchedRegexPattern} and resourceType: ${resourceType}${resourceInfo}`));
1992
2015
  }
1993
2016
  if (dumpUrls) {
1994
2017
  const timestamp = new Date().toISOString();
@@ -2024,8 +2047,34 @@ function setupFrameHandling(page, forceDebug) {
2024
2047
  return;
2025
2048
  }
2026
2049
 
2027
- for (const re of regexes) {
2028
- if (re.test(reqUrl)) {
2050
+ // === ENHANCED REGEX MATCHING WITH AND/OR LOGIC ===
2051
+ let regexMatched = false;
2052
+ let matchedRegexPattern = null;
2053
+
2054
+ if (regexes.length > 0) {
2055
+ if (useRegexAnd) {
2056
+ // AND logic: ALL regex patterns must match the same URL
2057
+ const allMatch = regexes.every(re => re.test(reqUrl));
2058
+ if (allMatch) {
2059
+ regexMatched = true;
2060
+ matchedRegexPattern = regexes.map(re => re.source).join(' AND ');
2061
+ if (forceDebug) {
2062
+ console.log(formatLogMessage('debug', `URL ${reqUrl} matched ALL regex patterns (AND logic)`));
2063
+ }
2064
+ }
2065
+ } else {
2066
+ // OR logic: ANY regex pattern can match (original behavior)
2067
+ for (const re of regexes) {
2068
+ if (re.test(reqUrl)) {
2069
+ regexMatched = true;
2070
+ matchedRegexPattern = re.source;
2071
+ break;
2072
+ }
2073
+ }
2074
+ }
2075
+ }
2076
+
2077
+ if (regexMatched) {
2029
2078
  const resourceType = request.resourceType();
2030
2079
 
2031
2080
  // *** UNIVERSAL RESOURCE TYPE FILTER ***
@@ -2036,7 +2085,9 @@ function setupFrameHandling(page, forceDebug) {
2036
2085
  if (forceDebug) {
2037
2086
  console.log(formatLogMessage('debug', `URL ${reqUrl} matches regex but resourceType '${resourceType}' not in allowed types [${allowedResourceTypes.join(', ')}]. Skipping ALL processing.`));
2038
2087
  }
2039
- break; // Skip this URL entirely - doesn't match required resource types
2088
+ // Skip this URL entirely - doesn't match required resource types
2089
+ request.continue();
2090
+ return;
2040
2091
  }
2041
2092
  }
2042
2093
 
@@ -2045,13 +2096,17 @@ function setupFrameHandling(page, forceDebug) {
2045
2096
  if (forceDebug) {
2046
2097
  console.log(formatLogMessage('debug', `Skipping first-party match: ${reqUrl} (firstParty disabled)`));
2047
2098
  }
2048
- break; // Skip this URL - it's first-party but firstParty is disabled
2099
+ // Skip this URL - it's first-party but firstParty is disabled
2100
+ request.continue();
2101
+ return;
2049
2102
  }
2050
2103
  if (!isFirstParty && siteConfig.thirdParty === false) {
2051
2104
  if (forceDebug) {
2052
2105
  console.log(formatLogMessage('debug', `Skipping third-party match: ${reqUrl} (thirdParty disabled)`));
2053
2106
  }
2054
- break; // Skip this URL - it's third-party but thirdParty is disabled
2107
+ // Skip this URL - it's third-party but thirdParty is disabled
2108
+ request.continue();
2109
+ return;
2055
2110
  }
2056
2111
 
2057
2112
  // REMOVED: Check if this URL matches any blocked patterns - if so, skip detection but still continue browser blocking
@@ -2062,7 +2117,7 @@ function setupFrameHandling(page, forceDebug) {
2062
2117
  if (!hasSearchString && !hasSearchStringAnd && !hasNetTools) {
2063
2118
  if (dryRunMode) {
2064
2119
  matchedDomains.get('dryRunMatches').push({
2065
- regex: re.source,
2120
+ regex: matchedRegexPattern,
2066
2121
  domain: reqDomain,
2067
2122
  resourceType: resourceType,
2068
2123
  fullUrl: reqUrl,
@@ -2074,7 +2129,7 @@ function setupFrameHandling(page, forceDebug) {
2074
2129
  const simplifiedUrl = getRootDomain(currentUrl);
2075
2130
  if (siteConfig.verbose === 1) {
2076
2131
  const resourceInfo = (adblockRulesMode || siteConfig.adblock_rules) ? ` (${resourceType})` : '';
2077
- console.log(formatLogMessage('match', `[${simplifiedUrl}] ${reqUrl} matched regex: ${re} and resourceType: ${resourceType}${resourceInfo}`));
2132
+ console.log(formatLogMessage('match', `[${simplifiedUrl}] ${reqUrl} matched regex: ${matchedRegexPattern} and resourceType: ${resourceType}${resourceInfo}`));
2078
2133
  }
2079
2134
  if (dumpUrls) {
2080
2135
  const timestamp = new Date().toISOString();
@@ -2088,17 +2143,19 @@ function setupFrameHandling(page, forceDebug) {
2088
2143
  if (forceDebug) {
2089
2144
  console.log(formatLogMessage('debug', `Skipping nettools check for already detected subdomain: ${fullSubdomain}`));
2090
2145
  }
2091
- break; // Skip to next URL
2146
+ // Skip to next URL
2147
+ request.continue();
2148
+ return;
2092
2149
  }
2093
2150
 
2094
2151
  if (forceDebug) {
2095
- console.log(formatLogMessage('debug', `${reqUrl} matched regex ${re} and resourceType ${resourceType}, queued for nettools check`));
2152
+ console.log(formatLogMessage('debug', `${reqUrl} matched regex ${matchedRegexPattern} and resourceType ${resourceType}, queued for nettools check`));
2096
2153
  }
2097
2154
 
2098
2155
  if (dryRunMode) {
2099
2156
  // For dry run, we'll collect the domain for nettools checking
2100
2157
  matchedDomains.get('dryRunMatches').push({
2101
- regex: re.source,
2158
+ regex: matchedRegexPattern,
2102
2159
  domain: reqDomain,
2103
2160
  resourceType: resourceType,
2104
2161
  fullUrl: reqUrl,
@@ -2162,15 +2219,17 @@ function setupFrameHandling(page, forceDebug) {
2162
2219
  if (forceDebug) {
2163
2220
  console.log(formatLogMessage('debug', `Skipping searchstring check for already detected subdomain: ${fullSubdomain}`));
2164
2221
  }
2165
- break; // Skip to next URL
2222
+ // Skip to next URL
2223
+ request.continue();
2224
+ return;
2166
2225
  }
2167
2226
  if (forceDebug) {
2168
2227
  const searchType = hasSearchStringAnd ? 'searchstring_and' : 'searchstring';
2169
- console.log(formatLogMessage('debug', `${reqUrl} matched regex ${re} and resourceType ${resourceType}, queued for ${searchType} content search`));
2228
+ console.log(formatLogMessage('debug', `${reqUrl} matched regex ${matchedRegexPattern} and resourceType ${resourceType}, queued for ${searchType} content search`));
2170
2229
  }
2171
2230
  if (dryRunMode) {
2172
2231
  matchedDomains.get('dryRunMatches').push({
2173
- regex: re.source,
2232
+ regex: matchedRegexPattern,
2174
2233
  domain: reqDomain,
2175
2234
  resourceType: resourceType,
2176
2235
  fullUrl: reqUrl,
@@ -2257,8 +2316,7 @@ function setupFrameHandling(page, forceDebug) {
2257
2316
  }
2258
2317
  }
2259
2318
 
2260
- break;
2261
- }
2319
+ // No break needed since we've already determined if regex matched
2262
2320
  }
2263
2321
  request.continue();
2264
2322
  });
@@ -2409,15 +2467,41 @@ function setupFrameHandling(page, forceDebug) {
2409
2467
 
2410
2468
  siteCounter++;
2411
2469
 
2412
- // Handle all Cloudflare protections using the dedicated module
2470
+ // Enhanced Cloudflare handling with parallel detection
2471
+ if (siteConfig.cloudflare_parallel_detection !== false) { // Enable by default
2472
+ try {
2473
+ const parallelResult = await parallelChallengeDetection(page, forceDebug);
2474
+ if (parallelResult.hasAnyChallenge && forceDebug) {
2475
+ console.log(formatLogMessage('debug', `[cloudflare] Parallel detection found: ${parallelResult.challenges.join(', ')}`));
2476
+ }
2477
+ } catch (parallelErr) {
2478
+ if (forceDebug) {
2479
+ console.log(formatLogMessage('debug', `[cloudflare] Parallel detection failed: ${parallelErr.message}`));
2480
+ }
2481
+ }
2482
+ }
2483
+
2484
+ // Handle all Cloudflare protections using the enhanced module
2413
2485
  const cloudflareResult = await handleCloudflareProtection(page, currentUrl, siteConfig, forceDebug);
2414
-
2486
+ // Check for retry recommendations
2487
+ if (cloudflareResult.errors && cloudflareResult.errors.length > 0) {
2488
+ const hasRetryableErrors = cloudflareResult.errors.some(err =>
2489
+ err.includes('timeout') || err.includes('network')
2490
+ );
2491
+
2492
+ if (hasRetryableErrors && forceDebug) {
2493
+ console.log(formatLogMessage('debug', '[cloudflare] Errors may be retryable - consider enabling retry logic'));
2494
+ }
2495
+ }
2496
+
2415
2497
  if (!cloudflareResult.overallSuccess) {
2416
2498
  console.warn(`⚠ [cloudflare] Protection handling failed for ${currentUrl}:`);
2417
2499
  cloudflareResult.errors.forEach(error => {
2418
2500
  console.warn(` - ${error}`);
2419
2501
  });
2420
2502
  // Continue with scan despite Cloudflare issues
2503
+ } else if (cloudflareResult.verificationChallenge?.success && forceDebug) {
2504
+ console.log(formatLogMessage('debug', `[cloudflare] Challenge solved using: ${cloudflareResult.verificationChallenge.method}`));
2421
2505
  }
2422
2506
 
2423
2507
  // Handle flowProxy protection if enabled
@@ -2959,6 +3043,13 @@ function setupFrameHandling(page, forceDebug) {
2959
3043
  console.log(formatLogMessage('debug', `Output format: ${getFormatDescription(globalOptions)}`));
2960
3044
  console.log(formatLogMessage('debug', `Generated ${outputResult.totalRules} rules from ${outputResult.successfulPageLoads} successful page loads`));
2961
3045
  console.log(formatLogMessage('debug', `Performance: ${totalDomainsSkipped} domains skipped (already detected), ${detectedDomainsCount} unique domains cached`));
3046
+ // Cloudflare cache statistics
3047
+ const cloudflareStats = getCacheStats();
3048
+ if (cloudflareStats.size > 0) {
3049
+ console.log(formatLogMessage('debug', '=== Cloudflare Cache Statistics ==='));
3050
+ console.log(formatLogMessage('debug', `Cache hit rate: ${cloudflareStats.hitRate}, Total hits: ${cloudflareStats.hits}, Misses: ${cloudflareStats.misses}`));
3051
+ console.log(formatLogMessage('debug', `Cached detections: ${cloudflareStats.size}`));
3052
+ }
2962
3053
  // Log smart cache statistics (if cache is enabled)
2963
3054
  if (smartCache) {
2964
3055
  const cacheStats = smartCache.getStats();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fanboynz/network-scanner",
3
- "version": "1.0.71",
3
+ "version": "1.0.73",
4
4
  "description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
5
5
  "main": "nwss.js",
6
6
  "scripts": {