@fanboynz/network-scanner 1.0.57 → 1.0.59

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,7 +18,7 @@ jobs:
18
18
  - name: Setup Node.js
19
19
  uses: actions/setup-node@v4
20
20
  with:
21
- node-version: '18'
21
+ node-version: '20'
22
22
  registry-url: 'https://registry.npmjs.org'
23
23
 
24
24
  - run: npm ci
@@ -4,6 +4,13 @@
4
4
  const fs = require('fs');
5
5
  const { spawnSync } = require('child_process');
6
6
 
7
+ // Configuration constants for search logic
8
+ const SEARCH_CONFIG = {
9
+ MAX_CONTENT_SIZE: 50 * 1024 * 1024, // 50MB max content size
10
+ MAX_SEARCH_STRING_LENGTH: 1000,
11
+ XML_ENTITY_TIMEOUT: 5000 // 5 second timeout for XML processing
12
+ };
13
+
7
14
  /**
8
15
  * Parses searchstring configuration into a normalized format
9
16
  * @param {string|Array<string>|undefined} searchstring - The searchstring config value (OR logic)
@@ -83,6 +90,8 @@ async function downloadWithCurl(url, userAgent = '', timeout = 30) {
83
90
  '--max-time', timeout.toString(),
84
91
  '--max-redirs', '5',
85
92
  '--fail-with-body', // Return body even on HTTP errors
93
+ '--max-filesize', '52428800', // 50MB limit
94
+ '--range', '0-52428799', // Limit download size
86
95
  '--compressed', // Accept compressed responses
87
96
  ];
88
97
 
@@ -105,7 +114,8 @@ async function downloadWithCurl(url, userAgent = '', timeout = 30) {
105
114
  const result = spawnSync('curl', curlArgs, {
106
115
  encoding: 'utf8',
107
116
  timeout: timeout * 1000,
108
- maxBuffer: 10 * 1024 * 1024 // 10MB max buffer
117
+ maxBuffer: 10 * 1024 * 1024, // 10MB max buffer
118
+ killSignal: 'SIGTERM'
109
119
  });
110
120
 
111
121
  if (result.error) {
@@ -123,6 +133,113 @@ async function downloadWithCurl(url, userAgent = '', timeout = 30) {
123
133
  });
124
134
  }
125
135
 
136
+ /**
137
+ * Downloads content with retry logic for transient failures
138
+ * @param {string} url - The URL to download
139
+ * @param {string} userAgent - User agent string to use
140
+ * @param {number} timeout - Timeout in seconds
141
+ * @param {number} retries - Number of retry attempts (default: 2)
142
+ * @returns {Promise<string>} The downloaded content
143
+ */
144
+ async function downloadWithRetry(url, userAgent = '', timeout = 30, retries = 2) {
145
+ for (let attempt = 0; attempt <= retries; attempt++) {
146
+ try {
147
+ return await downloadWithCurl(url, userAgent, timeout);
148
+ } catch (err) {
149
+ // Don't retry on final attempt
150
+ if (attempt === retries) throw err;
151
+
152
+ // Only retry on specific transient errors
153
+ const shouldRetry = err.message.includes('timeout') ||
154
+ err.message.includes('Connection refused') ||
155
+ err.message.includes('502') ||
156
+ err.message.includes('503') ||
157
+ err.message.includes('Connection reset');
158
+
159
+ if (!shouldRetry) throw err;
160
+
161
+ // Exponential backoff: 1s, 2s, 4s...
162
+ await new Promise(resolve => setTimeout(resolve, 1000 * Math.pow(2, attempt)));
163
+ }
164
+ }
165
+ }
166
+
167
+ /**
168
+ * Safely decodes XML entities with timeout protection
169
+ * @param {string} content - Content to decode
170
+ * @returns {string} Decoded content or original if processing fails
171
+ */
172
+ function safeDecodeXmlEntities(content) {
173
+ const startTime = Date.now();
174
+
175
+ try {
176
+ let decoded = content
177
+ .replace(/&lt;/g, '<')
178
+ .replace(/&gt;/g, '>')
179
+ .replace(/&amp;/g, '&')
180
+ .replace(/&quot;/g, '"')
181
+ .replace(/&#39;/g, "'")
182
+ .replace(/&apos;/g, "'");
183
+
184
+ // Check timeout before expensive regex operations
185
+ if (Date.now() - startTime > SEARCH_CONFIG.XML_ENTITY_TIMEOUT) {
186
+ console.warn('[warn] XML entity decoding timeout, using partial result');
187
+ return decoded;
188
+ }
189
+
190
+ // Decode numeric entities (decimal)
191
+ decoded = decoded.replace(/&#(\d+);/g, (match, dec) => {
192
+ const num = parseInt(dec, 10);
193
+ // Validate range for safety (valid Unicode range)
194
+ if (num >= 0 && num <= 0x10FFFF) {
195
+ return String.fromCharCode(num);
196
+ }
197
+ return match; // Keep original if invalid
198
+ });
199
+
200
+ // Check timeout again
201
+ if (Date.now() - startTime > SEARCH_CONFIG.XML_ENTITY_TIMEOUT) {
202
+ console.warn('[warn] XML entity decoding timeout, using partial result');
203
+ return decoded;
204
+ }
205
+
206
+ // Decode numeric entities (hexadecimal)
207
+ decoded = decoded.replace(/&#x([0-9a-f]+);/gi, (match, hex) => {
208
+ const num = parseInt(hex, 16);
209
+ // Validate range for safety (valid Unicode range)
210
+ if (num >= 0 && num <= 0x10FFFF) {
211
+ return String.fromCharCode(num);
212
+ }
213
+ return match; // Keep original if invalid
214
+ });
215
+
216
+ return decoded;
217
+ } catch (xmlErr) {
218
+ console.warn(`[warn] XML entity decoding failed: ${xmlErr.message}`);
219
+ return content; // Return original content if decoding fails
220
+ }
221
+ }
222
+
223
+ /**
224
+ * Safely strips XML/HTML tags with size limits
225
+ * @param {string} content - Content to strip tags from
226
+ * @returns {string} Content with tags removed
227
+ */
228
+ function safeStripTags(content) {
229
+ try {
230
+ // Limit content size for tag stripping to prevent excessive memory usage
231
+ const limitedContent = content.length > SEARCH_CONFIG.MAX_CONTENT_SIZE
232
+ ? content.substring(0, SEARCH_CONFIG.MAX_CONTENT_SIZE)
233
+ : content;
234
+
235
+ // Replace tags with spaces to preserve word boundaries
236
+ return limitedContent.replace(/<[^>]*>/g, ' ').replace(/\s+/g, ' ');
237
+ } catch (stripErr) {
238
+ console.warn(`[warn] Tag stripping failed: ${stripErr.message}`);
239
+ return content;
240
+ }
241
+ }
242
+
126
243
  /**
127
244
  * Checks if response content contains any of the search strings (OR logic)
128
245
  * or all of the AND search strings (AND logic)
@@ -131,58 +248,118 @@ async function downloadWithCurl(url, userAgent = '', timeout = 30) {
131
248
  * @param {Array<string>} searchStrings - Array of strings to search for (OR logic)
132
249
  * @param {Array<string>} searchStringsAnd - Array of strings that must all be present (AND logic)
133
250
  * @param {string} contentType - Content type for specialized handling
251
+ * @param {string} url - URL for debugging context (optional)
134
252
  * @returns {object} Object with found boolean, matchedString/matchedStrings, allMatches array, and logic type
135
253
  */
136
- function searchContent(content, searchStrings, searchStringsAnd = [], contentType = '') {
254
+ function searchContent(content, searchStrings, searchStringsAnd = [], contentType = '', url = '') {
255
+ // Input validation
256
+ if (!content || typeof content !== 'string') {
257
+ return {
258
+ found: false,
259
+ matchedString: null,
260
+ matchedStrings: [],
261
+ allMatches: [],
262
+ logicType: 'NONE',
263
+ error: 'Invalid or empty content'
264
+ };
265
+ }
266
+
267
+ // Size check and truncation with warning
268
+ const originalLength = content.length;
269
+ if (originalLength > SEARCH_CONFIG.MAX_CONTENT_SIZE) {
270
+ content = content.substring(0, SEARCH_CONFIG.MAX_CONTENT_SIZE);
271
+ console.warn(`[warn] Content truncated from ${originalLength} to ${SEARCH_CONFIG.MAX_CONTENT_SIZE} chars for ${url || 'unknown URL'}`);
272
+ }
137
273
  let searchableContent = content;
274
+
275
+ const isXmlContent = contentType.toLowerCase().includes('xml') ||
276
+ contentType.toLowerCase().includes('html');
138
277
 
139
- // For XML content, also search decoded entities and stripped tags for better matching
140
- if (contentType.includes('xml')) {
141
- // Decode common XML entities
142
- const decodedContent = content
143
- .replace(/&lt;/g, '<')
144
- .replace(/&gt;/g, '>')
145
- .replace(/&amp;/g, '&')
146
- .replace(/&quot;/g, '"')
147
- .replace(/&#39;/g, "'");
148
-
149
- // Create version with XML tags stripped for text content search
150
- const strippedContent = decodedContent.replace(/<[^>]*>/g, ' ');
151
-
152
- // Search in: original + decoded + stripped content
153
- searchableContent = content + '\n' + decodedContent + '\n' + strippedContent;
278
+ if (isXmlContent) {
279
+ try {
280
+ // Safely decode XML entities
281
+ const decodedContent = safeDecodeXmlEntities(content);
282
+
283
+ // Safely strip tags to extract text content
284
+ const strippedContent = safeStripTags(decodedContent);
285
+
286
+ // Search in: original + decoded + stripped content
287
+ // Use newlines as separators to prevent false matches across content types
288
+ searchableContent = [content, decodedContent, strippedContent].join('\n');
289
+
290
+ } catch (xmlProcessingErr) {
291
+ console.warn(`[warn] XML processing failed for ${url || 'unknown URL'}: ${xmlProcessingErr.message}`);
292
+ // Fall back to original content
293
+ searchableContent = content;
294
+ }
295
+ }
296
+
297
+ // Input validation for search strings
298
+ const validSearchStrings = searchStrings.filter(str =>
299
+ str && typeof str === 'string' && str.length > 0 && str.length <= SEARCH_CONFIG.MAX_SEARCH_STRING_LENGTH
300
+ );
301
+ const validSearchStringsAnd = searchStringsAnd.filter(str =>
302
+ str && typeof str === 'string' && str.length > 0 && str.length <= SEARCH_CONFIG.MAX_SEARCH_STRING_LENGTH
303
+ );
304
+
305
+ // Warn about filtered search strings
306
+ if (validSearchStrings.length !== searchStrings.length) {
307
+ console.warn(`[warn] Filtered ${searchStrings.length - validSearchStrings.length} invalid search strings`);
308
+ }
309
+ if (validSearchStringsAnd.length !== searchStringsAnd.length) {
310
+ console.warn(`[warn] Filtered ${searchStringsAnd.length - validSearchStringsAnd.length} invalid AND search strings`);
154
311
  }
155
312
 
156
- // Check AND logic first (more restrictive)
157
- if (searchStringsAnd && searchStringsAnd.length > 0) {
158
- const lowerContent = searchableContent.toLowerCase();
313
+ // Early return if no valid search strings
314
+ if (validSearchStrings.length === 0 && validSearchStringsAnd.length === 0) {
315
+ return {
316
+ found: false,
317
+ matchedString: null,
318
+ matchedStrings: [],
319
+ allMatches: [],
320
+ logicType: 'NONE',
321
+ error: 'No valid search strings provided'
322
+ };
323
+ }
324
+
325
+ // Pre-compute lowercase content once for better performance
326
+ const lowerContent = searchableContent.toLowerCase();
327
+
328
+ // Check AND logic first (more restrictive) - ALL strings must be present
329
+ if (validSearchStringsAnd && validSearchStringsAnd.length > 0) {
159
330
  const foundAndStrings = [];
160
331
 
161
- for (const searchStr of searchStringsAnd) {
162
- if (lowerContent.includes(searchStr.toLowerCase())) {
332
+ for (const searchStr of validSearchStringsAnd) {
333
+ const lowerSearchStr = searchStr.toLowerCase();
334
+ if (lowerContent.includes(lowerSearchStr)) {
163
335
  foundAndStrings.push(searchStr);
336
+ } else {
337
+ // Early exit if any AND string is not found
338
+ break;
164
339
  }
165
340
  }
166
341
 
167
- // AND logic: ALL strings must be found
168
- if (foundAndStrings.length === searchStringsAnd.length) {
342
+ // AND logic: ALL valid strings must be found
343
+ if (foundAndStrings.length === validSearchStringsAnd.length) {
169
344
  return {
170
345
  found: true,
171
- matchedString: foundAndStrings.join(' AND '), // Show all matched strings
346
+ matchedString: foundAndStrings.join(' AND '),
172
347
  matchedStrings: foundAndStrings,
173
348
  allMatches: foundAndStrings,
174
- logicType: 'AND'
349
+ logicType: 'AND',
350
+ contentSize: originalLength,
351
+ searchableSize: searchableContent.length
175
352
  };
176
353
  }
177
354
  }
178
355
 
179
- // Fall back to OR logic if AND logic didn't match or wasn't specified
180
- const lowerContent = searchableContent.toLowerCase();
356
+ // OR logic: ANY string can match
181
357
  const allMatches = [];
182
358
  let firstMatch = null;
183
359
 
184
- for (const searchStr of searchStrings) {
185
- if (lowerContent.includes(searchStr.toLowerCase())) {
360
+ for (const searchStr of validSearchStrings) {
361
+ const lowerSearchStr = searchStr.toLowerCase();
362
+ if (lowerContent.includes(lowerSearchStr)) {
186
363
  allMatches.push(searchStr);
187
364
  if (!firstMatch) {
188
365
  firstMatch = searchStr;
@@ -195,7 +372,10 @@ function searchContent(content, searchStrings, searchStringsAnd = [], contentTyp
195
372
  matchedString: firstMatch,
196
373
  matchedStrings: allMatches,
197
374
  allMatches: allMatches,
198
- logicType: 'OR'
375
+ logicType: validSearchStrings.length > 0 ? 'OR' : 'NONE',
376
+ contentSize: originalLength,
377
+ searchableSize: searchableContent.length,
378
+ processedAsXml: isXmlContent
199
379
  };
200
380
  }
201
381
 
@@ -206,6 +386,9 @@ function searchContent(content, searchStrings, searchStringsAnd = [], contentTyp
206
386
  */
207
387
  function shouldAnalyzeContentType(contentType) {
208
388
  if (!contentType) return false;
389
+
390
+ // Normalize content type (remove charset and other parameters)
391
+ const normalizedType = contentType.toLowerCase().split(';')[0].trim();
209
392
 
210
393
  const textTypes = [
211
394
  'text/', // text/html, text/plain, text/xml, etc.
@@ -216,10 +399,16 @@ function shouldAnalyzeContentType(contentType) {
216
399
  'application/soap+xml', // SOAP XML
217
400
  'application/rss+xml', // RSS feeds
218
401
  'application/atom+xml', // Atom feeds
219
- 'application/xhtml+xml' // XHTML
402
+ 'application/xhtml+xml', // XHTML
403
+ 'application/ld+json', // JSON-LD structured data
404
+ 'application/manifest+json', // Web App Manifest
405
+ 'application/feed+xml', // Generic XML feeds
406
+ 'application/vnd.api+json', // JSON API specification
407
+ 'application/hal+json', // HAL (Hypertext Application Language)
408
+ 'application/problem+json' // Problem Details for HTTP APIs
220
409
  ];
221
410
 
222
- return textTypes.some(type => contentType.includes(type));
411
+ return textTypes.some(type => normalizedType.startsWith(type));
223
412
  }
224
413
 
225
414
  /**
@@ -319,10 +508,10 @@ function createCurlHandler(config) {
319
508
  }
320
509
 
321
510
  // If searchstring IS defined, download and search content
322
- const content = await downloadWithCurl(requestUrl, userAgent, 30);
511
+ const content = await downloadWithRetry(requestUrl, userAgent, 30);
323
512
 
324
513
  // Check if content contains search strings (OR or AND logic)
325
- const { found, matchedString, logicType } = searchContent(content, searchStrings, searchStringsAnd, '');
514
+ const { found, matchedString, logicType, error } = searchContent(content, searchStrings, searchStringsAnd, '', requestUrl);
326
515
 
327
516
  if (found) {
328
517
  if (!reqDomain || matchesIgnoreDomain(reqDomain, ignoreDomains)) {
@@ -352,6 +541,9 @@ function createCurlHandler(config) {
352
541
  } else if (forceDebug) {
353
542
  const partyType = isFirstParty ? 'first-party' : 'third-party';
354
543
  console.log(`[debug][curl] ${requestUrl} (${partyType}) matched regex but no searchstring found`);
544
+ if (error) {
545
+ console.log(`[debug][curl] Search error: ${error}`);
546
+ }
355
547
  }
356
548
 
357
549
  } catch (err) {
@@ -442,7 +634,7 @@ function createResponseHandler(config) {
442
634
  const content = await response.text();
443
635
 
444
636
  // Check if content contains search strings (OR or AND logic)
445
- const { found, matchedString, logicType } = searchContent(content, searchStrings, searchStringsAnd, contentType);
637
+ const { found, matchedString, logicType, error } = searchContent(content, searchStrings, searchStringsAnd, contentType, respUrl);
446
638
 
447
639
  if (found) {
448
640
  if (!respDomain || matchesIgnoreDomain(respDomain, ignoreDomains)) {
@@ -471,6 +663,9 @@ function createResponseHandler(config) {
471
663
  } else if (forceDebug) {
472
664
  const partyType = isFirstParty ? 'first-party' : 'third-party';
473
665
  console.log(`[debug] ${respUrl} (${partyType}) matched regex but no searchstring found`);
666
+ if (error) {
667
+ console.log(`[debug] Search error: ${error}`);
668
+ }
474
669
  }
475
670
 
476
671
  } catch (err) {
@@ -541,9 +736,40 @@ function validateSearchString(searchstring, searchstringAnd) {
541
736
  }
542
737
 
543
738
  // Check that both searchstring and searchstring_and aren't defined simultaneously
544
- if (searchstring && searchstringAnd) {
739
+ if ((searchstring !== undefined && searchstring !== null) &&
740
+ (searchstringAnd !== undefined && searchstringAnd !== null)) {
545
741
  return { isValid: false, error: 'Cannot use both searchstring (OR) and searchstring_and (AND) simultaneously. Choose one logic type.' };
546
742
  }
743
+
744
+ // Additional validation for search string length limits
745
+ const validateStringLength = (str, fieldName) => {
746
+ if (str.length > SEARCH_CONFIG.MAX_SEARCH_STRING_LENGTH) {
747
+ return { isValid: false, error: `${fieldName} too long (max ${SEARCH_CONFIG.MAX_SEARCH_STRING_LENGTH} chars)` };
748
+ }
749
+ return { isValid: true };
750
+ };
751
+
752
+ // Validate search string lengths
753
+ if (typeof searchstring === 'string') {
754
+ const lengthCheck = validateStringLength(searchstring, 'searchstring');
755
+ if (!lengthCheck.isValid) return lengthCheck;
756
+ } else if (Array.isArray(searchstring)) {
757
+ for (let i = 0; i < searchstring.length; i++) {
758
+ const lengthCheck = validateStringLength(searchstring[i], `searchstring[${i}]`);
759
+ if (!lengthCheck.isValid) return lengthCheck;
760
+ }
761
+ }
762
+
763
+ // Validate AND search string lengths
764
+ if (typeof searchstringAnd === 'string') {
765
+ const lengthCheck = validateStringLength(searchstringAnd, 'searchstring_and');
766
+ if (!lengthCheck.isValid) return lengthCheck;
767
+ } else if (Array.isArray(searchstringAnd)) {
768
+ for (let i = 0; i < searchstringAnd.length; i++) {
769
+ const lengthCheck = validateStringLength(searchstringAnd[i], `searchstring_and[${i}]`);
770
+ if (!lengthCheck.isValid) return lengthCheck;
771
+ }
772
+ }
547
773
 
548
774
  return { isValid: false, error: 'searchstring must be string or array of strings' };
549
775
  }
@@ -567,11 +793,13 @@ function getSearchStats(matchedDomains, searchStrings) {
567
793
  module.exports = {
568
794
  parseSearchStrings,
569
795
  searchContent,
796
+ safeDecodeXmlEntities,
570
797
  shouldAnalyzeContentType,
571
798
  createResponseHandler,
572
799
  createCurlHandler,
573
800
  downloadWithCurl,
574
801
  validateSearchString,
575
802
  getSearchStats,
576
- addDomainToCollection
803
+ addDomainToCollection,
804
+ downloadWithRetry
577
805
  };
@@ -14,6 +14,11 @@ const { formatLogMessage } = require('./colorize');
14
14
  */
15
15
  class SmartCache {
16
16
  constructor(options = {}) {
17
+ // Calculate dynamic values first
18
+ const concurrency = options.concurrency || 6;
19
+ const optimalHeapLimit = this._calculateOptimalHeapLimit(concurrency);
20
+ const checkInterval = this._calculateCheckInterval(concurrency);
21
+
17
22
  this.options = {
18
23
  maxSize: options.maxSize || 5000,
19
24
  ttl: options.ttl || 1000 * 60 * 60, // 1 hour default
@@ -24,8 +29,18 @@ class SmartCache {
24
29
  persistencePath: options.persistencePath || '.cache',
25
30
  forceDebug: options.forceDebug || false,
26
31
  autoSave: options.autoSave !== false,
27
- autoSaveInterval: options.autoSaveInterval || 60000 // 1 minute
32
+ autoSaveInterval: options.autoSaveInterval || 60000, // 1 minute
33
+ maxHeapUsage: options.maxHeapUsage || optimalHeapLimit,
34
+ memoryCheckInterval: options.memoryCheckInterval || checkInterval,
35
+ concurrency: concurrency,
36
+ aggressiveMode: options.aggressiveMode || false
28
37
  };
38
+
39
+ // Add save debouncing
40
+ this.lastSaveTime = 0;
41
+ this.saveInProgress = false;
42
+ this.saveTimeout = null;
43
+ this.pendingSave = false;
29
44
 
30
45
  // Initialize cache layers
31
46
  this._initializeCaches();
@@ -42,6 +57,34 @@ class SmartCache {
42
57
  if (this.options.enablePersistence && this.options.autoSave) {
43
58
  this._setupAutoSave();
44
59
  }
60
+
61
+ // Set up memory monitoring
62
+ this.memoryCheckInterval = setInterval(() => {
63
+ this._checkMemoryPressure();
64
+ }, this.options.memoryCheckInterval);
65
+ }
66
+
67
+ /**
68
+ * Calculate optimal heap limit based on concurrency
69
+ * @private
70
+ */
71
+ _calculateOptimalHeapLimit(concurrency) {
72
+ // Base cache needs: 100MB
73
+ // Per concurrent connection: ~75MB average
74
+ // Safety margin: 50%
75
+ const baseCacheMemory = 100 * 1024 * 1024; // 100MB
76
+ const perConnectionMemory = 75 * 1024 * 1024; // 75MB
77
+ const totalEstimated = baseCacheMemory + (concurrency * perConnectionMemory);
78
+ return Math.round(totalEstimated * 0.4); // Cache should use max 40% of estimated total
79
+ }
80
+
81
+ /**
82
+ * Calculate check interval based on concurrency
83
+ * @private
84
+ */
85
+ _calculateCheckInterval(concurrency) {
86
+ // Higher concurrency = more frequent checks
87
+ return Math.max(5000, 30000 - (concurrency * 1000)); // 5s min, scales down with concurrency
45
88
  }
46
89
 
47
90
  /**
@@ -57,29 +100,43 @@ class SmartCache {
57
100
  updateAgeOnHas: false
58
101
  });
59
102
 
60
- // Pattern matching results cache
103
+ // Pattern matching results cache - reduce size for high concurrency
104
+ const patternCacheSize = this.options.concurrency > 10 ? 500 : 1000;
61
105
  this.patternCache = new LRUCache({
62
- max: 1000,
106
+ max: patternCacheSize,
63
107
  ttl: this.options.ttl * 2 // Patterns are more stable
64
108
  });
65
109
 
66
- // Response content cache for searchstring operations
110
+ // Response content cache - aggressive limits for high concurrency
111
+ const responseCacheSize = this.options.concurrency > 10 ? 50 : 200;
112
+ const responseCacheMemory = this.options.concurrency > 10 ? 20 * 1024 * 1024 : 50 * 1024 * 1024;
67
113
  this.responseCache = new LRUCache({
68
- max: 200,
114
+ max: responseCacheSize,
69
115
  ttl: 1000 * 60 * 30, // 30 minutes for response content
70
- maxSize: 50 * 1024 * 1024, // 50MB max cache size
116
+ maxSize: responseCacheMemory,
71
117
  sizeCalculation: (value) => value.length
72
118
  });
73
119
 
120
+ // Disable response cache entirely for very high concurrency
121
+ if (this.options.concurrency > 15 || this.options.aggressiveMode) {
122
+ this.options.enableResponseCache = false;
123
+ if (this.options.forceDebug) {
124
+ console.log(formatLogMessage('debug',
125
+ `[SmartCache] Response cache disabled for high concurrency (${this.options.concurrency})`
126
+ ));
127
+ }
128
+ }
129
+
74
130
  // WHOIS/DNS results cache
75
131
  this.netToolsCache = new LRUCache({
76
132
  max: 500,
77
133
  ttl: 1000 * 60 * 60 * 24 // 24 hours for WHOIS/DNS
78
134
  });
79
135
 
80
- // Similarity cache for expensive string comparisons
136
+ // Similarity cache - reduce for high concurrency
137
+ const similarityCacheSize = this.options.concurrency > 10 ? 1000 : 2000;
81
138
  this.similarityCache = new LRUCache({
82
- max: 2000,
139
+ max: similarityCacheSize,
83
140
  ttl: this.options.ttl
84
141
  });
85
142
 
@@ -107,6 +164,9 @@ class SmartCache {
107
164
  regexCacheHits: 0,
108
165
  persistenceLoads: 0,
109
166
  persistenceSaves: 0,
167
+ memoryPressureEvents: 0,
168
+ memoryWarnings: 0,
169
+ responseCacheSkips: 0,
110
170
  startTime: Date.now()
111
171
  };
112
172
  }
@@ -274,6 +334,21 @@ class SmartCache {
274
334
  cacheResponse(url, content) {
275
335
  if (!this.options.enableResponseCache) return;
276
336
 
337
+ // Skip response caching entirely for very high concurrency
338
+ if (this.options.concurrency > 12) {
339
+ this.stats.responseCacheSkips++;
340
+ return;
341
+ }
342
+
343
+ // Check memory before caching large content
344
+ const memUsage = process.memoryUsage();
345
+ const threshold = this.options.concurrency > 10 ? 0.7 : 0.8; // Lower threshold for high concurrency
346
+ if (memUsage.heapUsed > this.options.maxHeapUsage * threshold) {
347
+ this.stats.responseCacheSkips++;
348
+ this._logMemorySkip('response cache');
349
+ return;
350
+ }
351
+
277
352
  // Only cache if content is reasonable size
278
353
  if (content && content.length < 5 * 1024 * 1024) { // 5MB limit per response
279
354
  this.responseCache.set(url, content);
@@ -350,7 +425,85 @@ class SmartCache {
350
425
  this.stats.similarityMisses++;
351
426
  return null;
352
427
  }
428
+
429
+ /**
430
+ * Monitor memory usage and proactively manage caches
431
+ * @private
432
+ */
433
+ _checkMemoryPressure() {
434
+ const memUsage = process.memoryUsage();
435
+ const heapUsedMB = Math.round(memUsage.heapUsed / 1024 / 1024);
436
+ const maxHeapMB = Math.round(this.options.maxHeapUsage / 1024 / 1024);
437
+ const usagePercent = (memUsage.heapUsed / this.options.maxHeapUsage) * 100;
438
+
439
+ // Adjust thresholds based on concurrency
440
+ const criticalThreshold = this.options.concurrency > 10 ? 0.85 : 1.0;
441
+ const warningThreshold = this.options.concurrency > 10 ? 0.70 : 0.85;
442
+ const infoThreshold = this.options.concurrency > 10 ? 0.60 : 0.75;
443
+
444
+ // Critical threshold - aggressive cleanup
445
+ if (memUsage.heapUsed > this.options.maxHeapUsage * criticalThreshold) {
446
+ this._performMemoryCleanup('critical', heapUsedMB, maxHeapMB);
447
+ return true;
448
+ }
449
+
450
+ // Warning threshold - moderate cleanup
451
+ if (memUsage.heapUsed > this.options.maxHeapUsage * warningThreshold) {
452
+ this._performMemoryCleanup('warning', heapUsedMB, maxHeapMB);
453
+ return true;
454
+ }
455
+
456
+ // Info threshold - log only
457
+ if (memUsage.heapUsed > this.options.maxHeapUsage * infoThreshold) {
458
+ this.stats.memoryWarnings++;
459
+ if (this.options.forceDebug) {
460
+ console.log(formatLogMessage('debug',
461
+ `[SmartCache] Memory info: ${heapUsedMB}MB/${maxHeapMB}MB (${usagePercent.toFixed(1)}%)`
462
+ ));
463
+ }
464
+ }
465
+
466
+ return false;
467
+ }
353
468
 
469
+ /**
470
+ * Perform memory cleanup based on severity
471
+ * @private
472
+ */
473
+ _performMemoryCleanup(level, heapUsedMB, maxHeapMB) {
474
+ this.stats.memoryPressureEvents++;
475
+
476
+ if (this.options.forceDebug) {
477
+ console.log(formatLogMessage('debug',
478
+ `[SmartCache] Memory ${level}: ${heapUsedMB}MB/${maxHeapMB}MB, performing cleanup...`
479
+ ));
480
+ }
481
+
482
+ if (level === 'critical' || this.options.concurrency > 12) {
483
+ // Aggressive cleanup - clear volatile caches
484
+ this.responseCache.clear();
485
+ this.patternCache.clear();
486
+ this.similarityCache.clear();
487
+
488
+ // For very high concurrency, also trim domain cache
489
+ if (this.options.concurrency > 15) {
490
+ const currentSize = this.domainCache.size;
491
+ this.domainCache.clear();
492
+ if (this.options.forceDebug) {
493
+ console.log(formatLogMessage('debug', `[SmartCache] Cleared ${currentSize} domain cache entries`));
494
+ }
495
+ }
496
+ } else if (level === 'warning') {
497
+ // Moderate cleanup - clear largest cache
498
+ this.responseCache.clear();
499
+ }
500
+
501
+ // Force garbage collection if available
502
+ if (global.gc) {
503
+ global.gc();
504
+ }
505
+ }
506
+
354
507
  /**
355
508
  * Get cache statistics
356
509
  * @returns {Object} Statistics object
@@ -364,6 +517,9 @@ class SmartCache {
364
517
  (this.stats.responseHits + this.stats.responseMisses) || 0;
365
518
  const netToolsHitRate = this.stats.netToolsHits /
366
519
  (this.stats.netToolsHits + this.stats.netToolsMisses) || 0;
520
+
521
+
522
+ const memUsage = process.memoryUsage();
367
523
 
368
524
  return {
369
525
  ...this.stats,
@@ -380,7 +536,11 @@ class SmartCache {
380
536
  regexCacheSize: this.regexCache.size,
381
537
  totalCacheEntries: this.domainCache.size + this.patternCache.size +
382
538
  this.responseCache.size + this.netToolsCache.size +
383
- this.similarityCache.size + this.regexCache.size
539
+ this.similarityCache.size + this.regexCache.size,
540
+ memoryUsageMB: Math.round(memUsage.heapUsed / 1024 / 1024),
541
+ memoryMaxMB: Math.round(this.options.maxHeapUsage / 1024 / 1024),
542
+ memoryUsagePercent: ((memUsage.heapUsed / this.options.maxHeapUsage) * 100).toFixed(1) + '%',
543
+ responseCacheMemoryMB: Math.round((this.responseCache.calculatedSize || 0) / 1024 / 1024)
384
544
  };
385
545
  }
386
546
 
@@ -401,6 +561,18 @@ class SmartCache {
401
561
  }
402
562
  }
403
563
 
564
+ /**
565
+ * Helper method to log memory-related cache skips
566
+ * @private
567
+ */
568
+ _logMemorySkip(operation) {
569
+ if (this.options.forceDebug) {
570
+ console.log(formatLogMessage('debug',
571
+ `[SmartCache] Skipping ${operation} due to memory pressure`
572
+ ));
573
+ }
574
+ }
575
+
404
576
  /**
405
577
  * Load persistent cache from disk
406
578
  * @private
@@ -464,6 +636,34 @@ class SmartCache {
464
636
  */
465
637
  savePersistentCache() {
466
638
  if (!this.options.enablePersistence) return;
639
+
640
+ // Prevent concurrent saves
641
+ if (this.saveInProgress) {
642
+ this.pendingSave = true;
643
+ if (this.options.forceDebug) {
644
+ console.log(formatLogMessage('debug', '[SmartCache] Save in progress, marking pending...'));
645
+ }
646
+ return;
647
+ }
648
+
649
+ // Debounce saves - don't save more than once every 10 seconds
650
+ const now = Date.now();
651
+ if (now - this.lastSaveTime < 10000) {
652
+ // Schedule a delayed save if none is pending
653
+ if (!this.saveTimeout && !this.pendingSave) {
654
+ this.pendingSave = true;
655
+ this.saveTimeout = setTimeout(() => {
656
+ this.saveTimeout = null;
657
+ if (this.pendingSave) {
658
+ this.pendingSave = false;
659
+ this.savePersistentCache();
660
+ }
661
+ }, 10000 - (now - this.lastSaveTime));
662
+ }
663
+ return;
664
+ }
665
+ this.saveInProgress = true;
666
+ this.lastSaveTime = now;
467
667
 
468
668
  const cacheDir = this.options.persistencePath;
469
669
  const cacheFile = path.join(cacheDir, 'smart-cache.json');
@@ -475,7 +675,7 @@ class SmartCache {
475
675
  }
476
676
 
477
677
  const data = {
478
- timestamp: Date.now(),
678
+ timestamp: now,
479
679
  domainCache: Array.from(this.domainCache.entries()),
480
680
  netToolsCache: Array.from(this.netToolsCache.entries()),
481
681
  stats: this.stats
@@ -495,6 +695,14 @@ class SmartCache {
495
695
  `[SmartCache] Failed to save cache: ${err.message}`
496
696
  ));
497
697
  }
698
+ } finally {
699
+ this.saveInProgress = false;
700
+
701
+ // Process any pending saves
702
+ if (this.pendingSave && !this.saveTimeout) {
703
+ this.pendingSave = false;
704
+ setTimeout(() => this.savePersistentCache(), 1000);
705
+ }
498
706
  }
499
707
  }
500
708
 
@@ -512,9 +720,16 @@ class SmartCache {
512
720
  * Clean up resources
513
721
  */
514
722
  destroy() {
723
+ if (this.memoryCheckInterval) {
724
+ clearInterval(this.memoryCheckInterval);
725
+ }
515
726
  if (this.autoSaveInterval) {
516
727
  clearInterval(this.autoSaveInterval);
517
728
  }
729
+ if (this.saveTimeout) {
730
+ clearTimeout(this.saveTimeout);
731
+ this.saveTimeout = null;
732
+ }
518
733
 
519
734
  // Save cache one last time
520
735
  if (this.options.enablePersistence) {
@@ -523,6 +738,92 @@ class SmartCache {
523
738
 
524
739
  this.clear();
525
740
  }
741
+
742
+ /**
743
+ * Clear persistent cache files and directories
744
+ * @param {Object} options - Clear options
745
+ * @param {boolean} options.silent - Suppress console output
746
+ * @param {boolean} options.forceDebug - Enable debug logging
747
+ * @returns {Object} Clear operation results
748
+ */
749
+ static clearPersistentCache(options = {}) {
750
+ const { silent = false, forceDebug = false, cachePath = '.cache' } = options;
751
+
752
+ const cachePaths = [
753
+ cachePath,
754
+ path.join(cachePath, 'smart-cache.json'),
755
+ // Add other potential cache files here if needed
756
+ ];
757
+
758
+ let clearedItems = 0;
759
+ let totalSize = 0;
760
+ const clearedFiles = [];
761
+ const errors = [];
762
+
763
+ if (!silent) {
764
+ console.log(`\n??? Clearing cache...`);
765
+ }
766
+
767
+ for (const currentCachePath of cachePaths) {
768
+ if (fs.existsSync(currentCachePath)) {
769
+ try {
770
+ const stats = fs.statSync(currentCachePath);
771
+ if (stats.isDirectory()) {
772
+ // Calculate total size of directory contents
773
+ const files = fs.readdirSync(currentCachePath);
774
+ for (const file of files) {
775
+ const filePath = path.join(currentCachePath, file);
776
+ if (fs.existsSync(filePath)) {
777
+ totalSize += fs.statSync(filePath).size;
778
+ }
779
+ }
780
+ fs.rmSync(currentCachePath, { recursive: true, force: true });
781
+ clearedItems++;
782
+ clearedFiles.push({ type: 'directory', path: currentCachePath, size: totalSize });
783
+ if (forceDebug) {
784
+ console.log(formatLogMessage('debug', `Cleared cache directory: ${currentCachePath}`));
785
+ }
786
+ } else {
787
+ totalSize += stats.size;
788
+ fs.unlinkSync(currentCachePath);
789
+ clearedItems++;
790
+ clearedFiles.push({ type: 'file', path: currentCachePath, size: stats.size });
791
+ if (forceDebug) {
792
+ console.log(formatLogMessage('debug', `Cleared cache file: ${currentCachePath}`));
793
+ }
794
+ }
795
+ } catch (clearErr) {
796
+ errors.push({ path: currentCachePath, error: clearErr.message });
797
+ if (forceDebug) {
798
+ console.log(formatLogMessage('debug', `Failed to clear ${currentCachePath}: ${clearErr.message}`));
799
+ }
800
+ }
801
+ }
802
+ }
803
+
804
+ const result = {
805
+ success: errors.length === 0,
806
+ clearedItems,
807
+ totalSize,
808
+ sizeMB: (totalSize / 1024 / 1024).toFixed(2),
809
+ clearedFiles,
810
+ errors
811
+ };
812
+
813
+ if (!silent) {
814
+ if (clearedItems > 0) {
815
+ console.log(`? Cache cleared: ${clearedItems} item(s), ${result.sizeMB}MB freed`);
816
+ } else {
817
+ console.log(`?? No cache files found to clear`);
818
+ }
819
+
820
+ if (errors.length > 0) {
821
+ console.warn(`?? ${errors.length} error(s) occurred during cache clearing`);
822
+ }
823
+ }
824
+
825
+ return result;
826
+ }
526
827
  }
527
828
 
528
829
  /**
@@ -541,11 +842,16 @@ function createSmartCache(config = {}) {
541
842
  persistencePath: config.cache_path || '.cache',
542
843
  forceDebug: config.forceDebug || false,
543
844
  autoSave: config.cache_autosave !== false,
544
- autoSaveInterval: (config.cache_autosave_minutes || 1) * 60 * 1000
845
+ autoSaveInterval: (config.cache_autosave_minutes || 1) * 60 * 1000,
846
+ maxHeapUsage: config.cache_max_heap_mb ? config.cache_max_heap_mb * 1024 * 1024 : undefined,
847
+ memoryCheckInterval: (config.cache_memory_check_seconds || 30) * 1000,
848
+ concurrency: config.max_concurrent_sites || 6,
849
+ aggressiveMode: config.cache_aggressive_mode === true
545
850
  });
546
851
  }
547
852
 
548
853
  module.exports = {
549
854
  SmartCache,
550
- createSmartCache
855
+ createSmartCache,
856
+ clearPersistentCache: SmartCache.clearPersistentCache
551
857
  };
package/nwss.js CHANGED
@@ -1,4 +1,4 @@
1
- // === Network scanner script (nwss.js) v1.0.57 ===
1
+ // === Network scanner script (nwss.js) v1.0.60 ===
2
2
 
3
3
  // puppeteer for browser automation, fs for file system operations, psl for domain parsing.
4
4
  // const pLimit = require('p-limit'); // Will be dynamically imported
@@ -34,13 +34,14 @@ const { performPageInteraction, createInteractionConfig } = require('./lib/inter
34
34
  // Domain detection cache for performance optimization
35
35
  const { createGlobalHelpers, getTotalDomainsSkipped, getDetectedDomainsCount } = require('./lib/domain-cache');
36
36
  const { createSmartCache } = require('./lib/smart-cache'); // Smart cache system
37
+ const { clearPersistentCache } = require('./lib/smart-cache');
37
38
  // Enhanced redirect handling
38
39
  const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/redirect');
39
40
  // Ensure web browser is working correctly
40
41
  const { monitorBrowserHealth, isBrowserHealthy } = require('./lib/browserhealth');
41
42
 
42
43
  // --- Script Configuration & Constants ---
43
- const VERSION = '1.0.57'; // Script version
44
+ const VERSION = '1.0.60'; // Script version
44
45
 
45
46
  // get startTime
46
47
  const startTime = Date.now();
@@ -102,6 +103,8 @@ const validateConfig = args.includes('--validate-config');
102
103
  const validateRules = args.includes('--validate-rules');
103
104
  const testValidation = args.includes('--test-validation');
104
105
  let cleanRules = args.includes('--clean-rules');
106
+ const clearCache = args.includes('--clear-cache');
107
+ const ignoreCache = args.includes('--ignore-cache');
105
108
 
106
109
  let validateRulesFile = null;
107
110
  const validateRulesIndex = args.findIndex(arg => arg === '--validate-rules');
@@ -224,6 +227,15 @@ if (args.includes('--version')) {
224
227
  process.exit(0);
225
228
  }
226
229
 
230
+ // Handle --clear-cache before config loading (uses default cache path)
231
+ if (clearCache && !dryRunMode) {
232
+ clearPersistentCache({
233
+ silent: silentMode,
234
+ forceDebug,
235
+ cachePath: '.cache' // Default path, will be updated after config loads if needed
236
+ });
237
+ }
238
+
227
239
  // Handle validation-only operations before main help
228
240
  if (testValidation) {
229
241
  console.log(`\n${messageColors.processing('Running domain validation tests...')}`);
@@ -360,6 +372,8 @@ Validation Options:
360
372
  --validate-rules [file] Validate rule file format (uses --output/--compare files if no file specified)
361
373
  --clean-rules [file] Clean rule files by removing invalid lines and optionally duplicates (uses --output/--compare files if no file specified)
362
374
  --test-validation Run domain validation tests and exit
375
+ --clear-cache Clear persistent cache before scanning (improves fresh start performance)
376
+ --ignore-cache Bypass all smart caching functionality during scanning
363
377
 
364
378
  Global config.json options:
365
379
  ignoreDomains: ["domain.com", "*.ads.com"] Domains to completely ignore (supports wildcards)
@@ -551,15 +565,40 @@ const RESOURCE_CLEANUP_INTERVAL = (() => {
551
565
  return 180;
552
566
  })();
553
567
 
554
- // Initialize smart cache system AFTER config is loaded
568
+ // Perform cache clear after config is loaded for custom cache paths
569
+ if (clearCache && dryRunMode) {
570
+ clearPersistentCache({
571
+ silent: silentMode,
572
+ forceDebug,
573
+ cachePath: config.cache_path || '.cache'
574
+ });
575
+ }
576
+
577
+ // Also clear for custom cache paths in normal mode if not already cleared
578
+ if (clearCache && !dryRunMode && config.cache_path && config.cache_path !== '.cache') {
579
+ clearPersistentCache({
580
+ silent: silentMode,
581
+ forceDebug,
582
+ cachePath: config.cache_path
583
+ });
584
+ }
585
+
586
+ // Initialize smart cache system AFTER config is loaded (unless --ignore-cache is used)
587
+ if (ignoreCache) {
588
+ smartCache = null;
589
+ if (forceDebug) console.log(formatLogMessage('debug', 'Smart cache disabled by --ignore-cache flag'));
590
+ } else {
555
591
  smartCache = createSmartCache({
556
592
  ...config,
557
593
  forceDebug,
558
- cache_persistence: config.cache_persistence !== false, // Enable by default
559
- cache_autosave: config.cache_autosave !== false,
594
+ max_concurrent_sites: MAX_CONCURRENT_SITES, // Pass concurrency info
595
+ cache_aggressive_mode: MAX_CONCURRENT_SITES > 12, // Auto-enable for high concurrency
596
+ cache_persistence: false, // Disable persistence completely
597
+ cache_autosave: false, // Disable auto-save completely
560
598
  cache_autosave_minutes: config.cache_autosave_minutes || 1,
561
599
  cache_max_size: config.cache_max_size || 5000
562
600
  });
601
+ }
563
602
 
564
603
  // Handle --clean-rules after config is loaded (so we have access to sites)
565
604
  if (cleanRules || cleanRulesFile) {
@@ -1503,7 +1542,7 @@ function setupFrameHandling(page, forceDebug) {
1503
1542
  const similarityThreshold = siteConfig.ignore_similar_threshold || ignore_similar_threshold;
1504
1543
  const ignoreSimilarIgnoredDomains = siteConfig.ignore_similar_ignored_domains !== undefined ? siteConfig.ignore_similar_ignored_domains : ignore_similar_ignored_domains;
1505
1544
 
1506
- // Use smart cache's similarity cache for performance
1545
+ // Use smart cache's similarity cache for performance (if cache is enabled)
1507
1546
  if (ignoreSimilarEnabled && smartCache) {
1508
1547
  const existingDomains = matchedDomains instanceof Map
1509
1548
  ? Array.from(matchedDomains.keys()).filter(key => !['dryRunMatches', 'dryRunNetTools', 'dryRunSearchString'].includes(key))
@@ -1522,14 +1561,14 @@ function setupFrameHandling(page, forceDebug) {
1522
1561
  // If no cached similarity exists, calculate and cache it
1523
1562
  if (cachedSimilarity === null) {
1524
1563
  const similarity = calculateSimilarity(domain, existingDomain);
1525
- if (smartCache) {
1564
+ if (smartCache && !ignoreCache) {
1526
1565
  smartCache.cacheSimilarity(domain, existingDomain, similarity);
1527
1566
  }
1528
1567
  }
1529
1568
  }
1530
1569
  }
1531
1570
 
1532
- // Check smart cache first
1571
+ // Check smart cache first (if cache is enabled)
1533
1572
  const context = {
1534
1573
  filterRegex: siteConfig.filterRegex,
1535
1574
  searchString: siteConfig.searchstring,
@@ -1581,7 +1620,7 @@ function setupFrameHandling(page, forceDebug) {
1581
1620
  // Mark full subdomain as detected for future reference
1582
1621
  markDomainAsDetected(cacheKey);
1583
1622
 
1584
- // Also mark in smart cache with context
1623
+ // Also mark in smart cache with context (if cache is enabled)
1585
1624
  if (smartCache) {
1586
1625
  smartCache.markDomainProcessed(domain, context, { resourceType, fullSubdomain });
1587
1626
  }
@@ -1831,7 +1870,7 @@ function setupFrameHandling(page, forceDebug) {
1831
1870
  }
1832
1871
 
1833
1872
  // Create and execute nettools handler
1834
- // Check smart cache for nettools results
1873
+ // Check smart cache for nettools results (if cache is enabled)
1835
1874
  const cachedWhois = smartCache ? smartCache.getCachedNetTools(reqDomain, 'whois') : null;
1836
1875
  const cachedDig = smartCache ? smartCache.getCachedNetTools(reqDomain, 'dig', digRecordType) : null;
1837
1876
 
@@ -1839,7 +1878,7 @@ function setupFrameHandling(page, forceDebug) {
1839
1878
  console.log(formatLogMessage('debug', `[SmartCache] Using cached nettools results for ${reqDomain}`));
1840
1879
  }
1841
1880
 
1842
- // Create nettools handler with cache callbacks
1881
+ // Create nettools handler with cache callbacks (if cache is enabled)
1843
1882
  const netToolsHandler = createNetToolsHandler({
1844
1883
  whoisTerms,
1845
1884
  whoisOrTerms,
@@ -1857,7 +1896,7 @@ function setupFrameHandling(page, forceDebug) {
1857
1896
  matchedDomains,
1858
1897
  addMatchedDomain,
1859
1898
  isDomainAlreadyDetected,
1860
- // Add cache callbacks if smart cache is available
1899
+ // Add cache callbacks if smart cache is available and caching is enabled
1861
1900
  onWhoisResult: smartCache ? (domain, result) => {
1862
1901
  smartCache.cacheNetTools(domain, 'whois', result);
1863
1902
  } : undefined,
@@ -1905,7 +1944,7 @@ function setupFrameHandling(page, forceDebug) {
1905
1944
 
1906
1945
  // If curl is enabled, download and analyze content immediately
1907
1946
  if (useCurl) {
1908
- // Check response cache first if smart cache is available
1947
+ // Check response cache first if smart cache is available and caching is enabled
1909
1948
  const cachedContent = smartCache ? smartCache.getCachedResponse(reqUrl) : null;
1910
1949
 
1911
1950
  if (cachedContent && forceDebug) {
@@ -1922,7 +1961,7 @@ function setupFrameHandling(page, forceDebug) {
1922
1961
  matchedDomains,
1923
1962
  addMatchedDomain, // Pass the helper function
1924
1963
  isDomainAlreadyDetected,
1925
- onContentFetched: smartCache ? (url, content) => {
1964
+ onContentFetched: smartCache && !ignoreCache ? (url, content) => {
1926
1965
  smartCache.cacheResponse(url, content);
1927
1966
  } : undefined,
1928
1967
  currentUrl,
@@ -2587,7 +2626,7 @@ function setupFrameHandling(page, forceDebug) {
2587
2626
  console.log(formatLogMessage('debug', `Output format: ${getFormatDescription(globalOptions)}`));
2588
2627
  console.log(formatLogMessage('debug', `Generated ${outputResult.totalRules} rules from ${outputResult.successfulPageLoads} successful page loads`));
2589
2628
  console.log(formatLogMessage('debug', `Performance: ${totalDomainsSkipped} domains skipped (already detected), ${detectedDomainsCount} unique domains cached`));
2590
- // Log smart cache statistics
2629
+ // Log smart cache statistics (if cache is enabled)
2591
2630
  if (smartCache) {
2592
2631
  const cacheStats = smartCache.getStats();
2593
2632
  console.log(formatLogMessage('debug', '=== Smart Cache Statistics ==='));
@@ -2677,7 +2716,7 @@ function setupFrameHandling(page, forceDebug) {
2677
2716
  const seconds = totalSeconds % 60;
2678
2717
 
2679
2718
  // Final summary report with timing and success statistics
2680
- // Clean up smart cache
2719
+ // Clean up smart cache (if it exists)
2681
2720
  if (smartCache) {
2682
2721
  smartCache.destroy();
2683
2722
  }
@@ -2699,6 +2738,9 @@ function setupFrameHandling(page, forceDebug) {
2699
2738
  if (totalDomainsSkipped > 0) {
2700
2739
  console.log(messageColors.info('Performance:') + ` ${totalDomainsSkipped} domains skipped (already detected)`);
2701
2740
  }
2741
+ if (ignoreCache && forceDebug) {
2742
+ console.log(messageColors.info('Cache:') + ` Smart caching was disabled`);
2743
+ }
2702
2744
  }
2703
2745
 
2704
2746
  // Clean process termination
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fanboynz/network-scanner",
3
- "version": "1.0.57",
3
+ "version": "1.0.59",
4
4
  "description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
5
5
  "main": "nwss.js",
6
6
  "scripts": {
@@ -10,9 +10,10 @@
10
10
  "lint": "eslint *.js lib/*.js"
11
11
  },
12
12
  "dependencies": {
13
+ "lru-cache": "^10.4.3",
13
14
  "p-limit": "^4.0.0",
14
15
  "psl": "^1.15.0",
15
- "puppeteer": "^23.10.0"
16
+ "puppeteer": "^23.11.1"
16
17
  },
17
18
  "keywords": [
18
19
  "puppeteer",
@@ -29,7 +30,7 @@
29
30
  "author": "FanboyNZ",
30
31
  "license": "GPL-3.0",
31
32
  "engines": {
32
- "node": ">=18.0.0"
33
+ "node": ">=20.0.0"
33
34
  },
34
35
  "repository": {
35
36
  "type": "git",