@fanboynz/network-scanner 1.0.56 → 1.0.58
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/smart-cache.js +857 -0
- package/nwss.js +158 -5
- package/package.json +1 -1
|
@@ -0,0 +1,857 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Smart Cache Module - Intelligent multi-layer caching system for network scanner
|
|
3
|
+
* Provides context-aware caching for domains, patterns, responses, and network tools
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
const { LRUCache } = require('lru-cache');
|
|
7
|
+
const fs = require('fs');
|
|
8
|
+
const path = require('path');
|
|
9
|
+
const { formatLogMessage } = require('./colorize');
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* SmartCache - Intelligent caching system with multiple cache layers
|
|
13
|
+
* @class
|
|
14
|
+
*/
|
|
15
|
+
class SmartCache {
|
|
16
|
+
constructor(options = {}) {
|
|
17
|
+
// Calculate dynamic values first
|
|
18
|
+
const concurrency = options.concurrency || 6;
|
|
19
|
+
const optimalHeapLimit = this._calculateOptimalHeapLimit(concurrency);
|
|
20
|
+
const checkInterval = this._calculateCheckInterval(concurrency);
|
|
21
|
+
|
|
22
|
+
this.options = {
|
|
23
|
+
maxSize: options.maxSize || 5000,
|
|
24
|
+
ttl: options.ttl || 1000 * 60 * 60, // 1 hour default
|
|
25
|
+
enablePatternCache: options.enablePatternCache !== false,
|
|
26
|
+
enableResponseCache: options.enableResponseCache !== false,
|
|
27
|
+
enableWhoisCache: options.enableWhoisCache !== false,
|
|
28
|
+
enablePersistence: options.enablePersistence === true,
|
|
29
|
+
persistencePath: options.persistencePath || '.cache',
|
|
30
|
+
forceDebug: options.forceDebug || false,
|
|
31
|
+
autoSave: options.autoSave !== false,
|
|
32
|
+
autoSaveInterval: options.autoSaveInterval || 60000, // 1 minute
|
|
33
|
+
maxHeapUsage: options.maxHeapUsage || optimalHeapLimit,
|
|
34
|
+
memoryCheckInterval: options.memoryCheckInterval || checkInterval,
|
|
35
|
+
concurrency: concurrency,
|
|
36
|
+
aggressiveMode: options.aggressiveMode || false
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
// Add save debouncing
|
|
40
|
+
this.lastSaveTime = 0;
|
|
41
|
+
this.saveInProgress = false;
|
|
42
|
+
this.saveTimeout = null;
|
|
43
|
+
this.pendingSave = false;
|
|
44
|
+
|
|
45
|
+
// Initialize cache layers
|
|
46
|
+
this._initializeCaches();
|
|
47
|
+
|
|
48
|
+
// Initialize statistics
|
|
49
|
+
this._initializeStats();
|
|
50
|
+
|
|
51
|
+
// Load persistent cache if enabled
|
|
52
|
+
if (this.options.enablePersistence) {
|
|
53
|
+
this._loadPersistentCache();
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Set up auto-save if enabled
|
|
57
|
+
if (this.options.enablePersistence && this.options.autoSave) {
|
|
58
|
+
this._setupAutoSave();
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Set up memory monitoring
|
|
62
|
+
this.memoryCheckInterval = setInterval(() => {
|
|
63
|
+
this._checkMemoryPressure();
|
|
64
|
+
}, this.options.memoryCheckInterval);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Calculate optimal heap limit based on concurrency
|
|
69
|
+
* @private
|
|
70
|
+
*/
|
|
71
|
+
_calculateOptimalHeapLimit(concurrency) {
|
|
72
|
+
// Base cache needs: 100MB
|
|
73
|
+
// Per concurrent connection: ~75MB average
|
|
74
|
+
// Safety margin: 50%
|
|
75
|
+
const baseCacheMemory = 100 * 1024 * 1024; // 100MB
|
|
76
|
+
const perConnectionMemory = 75 * 1024 * 1024; // 75MB
|
|
77
|
+
const totalEstimated = baseCacheMemory + (concurrency * perConnectionMemory);
|
|
78
|
+
return Math.round(totalEstimated * 0.4); // Cache should use max 40% of estimated total
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Calculate check interval based on concurrency
|
|
83
|
+
* @private
|
|
84
|
+
*/
|
|
85
|
+
_calculateCheckInterval(concurrency) {
|
|
86
|
+
// Higher concurrency = more frequent checks
|
|
87
|
+
return Math.max(5000, 30000 - (concurrency * 1000)); // 5s min, scales down with concurrency
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Initialize all cache layers
|
|
92
|
+
* @private
|
|
93
|
+
*/
|
|
94
|
+
_initializeCaches() {
|
|
95
|
+
// Domain detection cache with TTL
|
|
96
|
+
this.domainCache = new LRUCache({
|
|
97
|
+
max: this.options.maxSize,
|
|
98
|
+
ttl: this.options.ttl,
|
|
99
|
+
updateAgeOnGet: true,
|
|
100
|
+
updateAgeOnHas: false
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
// Pattern matching results cache - reduce size for high concurrency
|
|
104
|
+
const patternCacheSize = this.options.concurrency > 10 ? 500 : 1000;
|
|
105
|
+
this.patternCache = new LRUCache({
|
|
106
|
+
max: patternCacheSize,
|
|
107
|
+
ttl: this.options.ttl * 2 // Patterns are more stable
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
// Response content cache - aggressive limits for high concurrency
|
|
111
|
+
const responseCacheSize = this.options.concurrency > 10 ? 50 : 200;
|
|
112
|
+
const responseCacheMemory = this.options.concurrency > 10 ? 20 * 1024 * 1024 : 50 * 1024 * 1024;
|
|
113
|
+
this.responseCache = new LRUCache({
|
|
114
|
+
max: responseCacheSize,
|
|
115
|
+
ttl: 1000 * 60 * 30, // 30 minutes for response content
|
|
116
|
+
maxSize: responseCacheMemory,
|
|
117
|
+
sizeCalculation: (value) => value.length
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
// Disable response cache entirely for very high concurrency
|
|
121
|
+
if (this.options.concurrency > 15 || this.options.aggressiveMode) {
|
|
122
|
+
this.options.enableResponseCache = false;
|
|
123
|
+
if (this.options.forceDebug) {
|
|
124
|
+
console.log(formatLogMessage('debug',
|
|
125
|
+
`[SmartCache] Response cache disabled for high concurrency (${this.options.concurrency})`
|
|
126
|
+
));
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// WHOIS/DNS results cache
|
|
131
|
+
this.netToolsCache = new LRUCache({
|
|
132
|
+
max: 500,
|
|
133
|
+
ttl: 1000 * 60 * 60 * 24 // 24 hours for WHOIS/DNS
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
// Similarity cache - reduce for high concurrency
|
|
137
|
+
const similarityCacheSize = this.options.concurrency > 10 ? 1000 : 2000;
|
|
138
|
+
this.similarityCache = new LRUCache({
|
|
139
|
+
max: similarityCacheSize,
|
|
140
|
+
ttl: this.options.ttl
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
// Regex compilation cache
|
|
144
|
+
this.regexCache = new Map();
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Initialize statistics tracking
|
|
149
|
+
* @private
|
|
150
|
+
*/
|
|
151
|
+
_initializeStats() {
|
|
152
|
+
this.stats = {
|
|
153
|
+
hits: 0,
|
|
154
|
+
misses: 0,
|
|
155
|
+
patternHits: 0,
|
|
156
|
+
patternMisses: 0,
|
|
157
|
+
responseHits: 0,
|
|
158
|
+
responseMisses: 0,
|
|
159
|
+
netToolsHits: 0,
|
|
160
|
+
netToolsMisses: 0,
|
|
161
|
+
similarityHits: 0,
|
|
162
|
+
similarityMisses: 0,
|
|
163
|
+
regexCompilations: 0,
|
|
164
|
+
regexCacheHits: 0,
|
|
165
|
+
persistenceLoads: 0,
|
|
166
|
+
persistenceSaves: 0,
|
|
167
|
+
memoryPressureEvents: 0,
|
|
168
|
+
memoryWarnings: 0,
|
|
169
|
+
responseCacheSkips: 0,
|
|
170
|
+
startTime: Date.now()
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Check if domain should be skipped based on smart caching
|
|
176
|
+
* @param {string} domain - Domain to check
|
|
177
|
+
* @param {Object} context - Processing context
|
|
178
|
+
* @returns {boolean} True if domain should be skipped
|
|
179
|
+
*/
|
|
180
|
+
shouldSkipDomain(domain, context = {}) {
|
|
181
|
+
const cacheKey = this._generateCacheKey(domain, context);
|
|
182
|
+
|
|
183
|
+
if (this.domainCache.has(cacheKey)) {
|
|
184
|
+
this.stats.hits++;
|
|
185
|
+
if (this.options.forceDebug) {
|
|
186
|
+
const cached = this.domainCache.get(cacheKey);
|
|
187
|
+
const age = Date.now() - cached.timestamp;
|
|
188
|
+
console.log(formatLogMessage('debug',
|
|
189
|
+
`[SmartCache] Cache hit for ${domain} (age: ${Math.round(age/1000)}s, context: ${JSON.stringify(context)})`
|
|
190
|
+
));
|
|
191
|
+
}
|
|
192
|
+
return true;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
this.stats.misses++;
|
|
196
|
+
return false;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Mark domain as processed with context
|
|
201
|
+
* @param {string} domain - Domain to mark
|
|
202
|
+
* @param {Object} context - Processing context
|
|
203
|
+
* @param {Object} metadata - Additional metadata to store
|
|
204
|
+
*/
|
|
205
|
+
markDomainProcessed(domain, context = {}, metadata = {}) {
|
|
206
|
+
const cacheKey = this._generateCacheKey(domain, context);
|
|
207
|
+
this.domainCache.set(cacheKey, {
|
|
208
|
+
timestamp: Date.now(),
|
|
209
|
+
metadata,
|
|
210
|
+
context,
|
|
211
|
+
domain
|
|
212
|
+
});
|
|
213
|
+
|
|
214
|
+
if (this.options.forceDebug) {
|
|
215
|
+
console.log(formatLogMessage('debug',
|
|
216
|
+
`[SmartCache] Marked ${domain} as processed (context: ${JSON.stringify(context)})`
|
|
217
|
+
));
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
/**
|
|
222
|
+
* Generate cache key with context awareness
|
|
223
|
+
* @param {string} domain - Domain
|
|
224
|
+
* @param {Object} context - Context object
|
|
225
|
+
* @returns {string} Cache key
|
|
226
|
+
* @private
|
|
227
|
+
*/
|
|
228
|
+
_generateCacheKey(domain, context) {
|
|
229
|
+
const { filterRegex, searchString, resourceType, nettools } = context;
|
|
230
|
+
const components = [
|
|
231
|
+
domain,
|
|
232
|
+
filterRegex || '',
|
|
233
|
+
searchString || '',
|
|
234
|
+
resourceType || '',
|
|
235
|
+
nettools ? 'nt' : ''
|
|
236
|
+
].filter(Boolean);
|
|
237
|
+
|
|
238
|
+
return components.join(':');
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
/**
|
|
242
|
+
* Get or compile regex pattern with caching
|
|
243
|
+
* @param {string} pattern - Regex pattern string
|
|
244
|
+
* @returns {RegExp} Compiled regex
|
|
245
|
+
*/
|
|
246
|
+
getCompiledRegex(pattern) {
|
|
247
|
+
if (!this.regexCache.has(pattern)) {
|
|
248
|
+
this.stats.regexCompilations++;
|
|
249
|
+
try {
|
|
250
|
+
const regex = new RegExp(pattern.replace(/^\/(.*)\/$/, '$1'));
|
|
251
|
+
this.regexCache.set(pattern, regex);
|
|
252
|
+
} catch (err) {
|
|
253
|
+
if (this.options.forceDebug) {
|
|
254
|
+
console.log(formatLogMessage('debug',
|
|
255
|
+
`[SmartCache] Failed to compile regex: ${pattern}`
|
|
256
|
+
));
|
|
257
|
+
}
|
|
258
|
+
return null;
|
|
259
|
+
}
|
|
260
|
+
} else {
|
|
261
|
+
this.stats.regexCacheHits++;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
return this.regexCache.get(pattern);
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
/**
|
|
268
|
+
* Check pattern matching cache
|
|
269
|
+
* @param {string} url - URL to check
|
|
270
|
+
* @param {string} pattern - Regex pattern
|
|
271
|
+
* @returns {boolean|null} Cached result or null if not cached
|
|
272
|
+
*/
|
|
273
|
+
getCachedPatternMatch(url, pattern) {
|
|
274
|
+
if (!this.options.enablePatternCache) return null;
|
|
275
|
+
|
|
276
|
+
const cacheKey = `${url}:${pattern}`;
|
|
277
|
+
const cached = this.patternCache.get(cacheKey);
|
|
278
|
+
|
|
279
|
+
if (cached !== undefined) {
|
|
280
|
+
this.stats.patternHits++;
|
|
281
|
+
if (this.options.forceDebug) {
|
|
282
|
+
console.log(formatLogMessage('debug',
|
|
283
|
+
`[SmartCache] Pattern cache hit for ${url.substring(0, 50)}...`
|
|
284
|
+
));
|
|
285
|
+
}
|
|
286
|
+
return cached;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
this.stats.patternMisses++;
|
|
290
|
+
return null;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
/**
|
|
294
|
+
* Cache pattern matching result
|
|
295
|
+
* @param {string} url - URL
|
|
296
|
+
* @param {string} pattern - Regex pattern
|
|
297
|
+
* @param {boolean} result - Match result
|
|
298
|
+
*/
|
|
299
|
+
cachePatternMatch(url, pattern, result) {
|
|
300
|
+
if (!this.options.enablePatternCache) return;
|
|
301
|
+
|
|
302
|
+
const cacheKey = `${url}:${pattern}`;
|
|
303
|
+
this.patternCache.set(cacheKey, result);
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
/**
|
|
307
|
+
* Get cached response content
|
|
308
|
+
* @param {string} url - URL
|
|
309
|
+
* @returns {string|null} Cached content or null
|
|
310
|
+
*/
|
|
311
|
+
getCachedResponse(url) {
|
|
312
|
+
if (!this.options.enableResponseCache) return null;
|
|
313
|
+
|
|
314
|
+
const cached = this.responseCache.get(url);
|
|
315
|
+
if (cached) {
|
|
316
|
+
this.stats.responseHits++;
|
|
317
|
+
if (this.options.forceDebug) {
|
|
318
|
+
console.log(formatLogMessage('debug',
|
|
319
|
+
`[SmartCache] Response cache hit for ${url.substring(0, 50)}...`
|
|
320
|
+
));
|
|
321
|
+
}
|
|
322
|
+
return cached;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
this.stats.responseMisses++;
|
|
326
|
+
return null;
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
/**
|
|
330
|
+
* Cache response content
|
|
331
|
+
* @param {string} url - URL
|
|
332
|
+
* @param {string} content - Response content
|
|
333
|
+
*/
|
|
334
|
+
cacheResponse(url, content) {
|
|
335
|
+
if (!this.options.enableResponseCache) return;
|
|
336
|
+
|
|
337
|
+
// Skip response caching entirely for very high concurrency
|
|
338
|
+
if (this.options.concurrency > 12) {
|
|
339
|
+
this.stats.responseCacheSkips++;
|
|
340
|
+
return;
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
// Check memory before caching large content
|
|
344
|
+
const memUsage = process.memoryUsage();
|
|
345
|
+
const threshold = this.options.concurrency > 10 ? 0.7 : 0.8; // Lower threshold for high concurrency
|
|
346
|
+
if (memUsage.heapUsed > this.options.maxHeapUsage * threshold) {
|
|
347
|
+
this.stats.responseCacheSkips++;
|
|
348
|
+
this._logMemorySkip('response cache');
|
|
349
|
+
return;
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
// Only cache if content is reasonable size
|
|
353
|
+
if (content && content.length < 5 * 1024 * 1024) { // 5MB limit per response
|
|
354
|
+
this.responseCache.set(url, content);
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
/**
|
|
359
|
+
* Get cached WHOIS/DNS results
|
|
360
|
+
* @param {string} domain - Domain
|
|
361
|
+
* @param {string} tool - Tool name (whois/dig)
|
|
362
|
+
* @param {string} recordType - Record type for dig
|
|
363
|
+
* @returns {Object|null} Cached result or null
|
|
364
|
+
*/
|
|
365
|
+
getCachedNetTools(domain, tool, recordType = null) {
|
|
366
|
+
if (!this.options.enableWhoisCache) return null;
|
|
367
|
+
|
|
368
|
+
const cacheKey = `${tool}:${domain}${recordType ? ':' + recordType : ''}`;
|
|
369
|
+
const cached = this.netToolsCache.get(cacheKey);
|
|
370
|
+
|
|
371
|
+
if (cached) {
|
|
372
|
+
this.stats.netToolsHits++;
|
|
373
|
+
if (this.options.forceDebug) {
|
|
374
|
+
console.log(formatLogMessage('debug',
|
|
375
|
+
`[SmartCache] ${tool.toUpperCase()} cache hit for ${domain}`
|
|
376
|
+
));
|
|
377
|
+
}
|
|
378
|
+
return cached;
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
this.stats.netToolsMisses++;
|
|
382
|
+
return null;
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
/**
|
|
386
|
+
* Cache WHOIS/DNS results
|
|
387
|
+
* @param {string} domain - Domain
|
|
388
|
+
* @param {string} tool - Tool name
|
|
389
|
+
* @param {Object} result - Result to cache
|
|
390
|
+
* @param {string} recordType - Record type for dig
|
|
391
|
+
*/
|
|
392
|
+
cacheNetTools(domain, tool, result, recordType = null) {
|
|
393
|
+
if (!this.options.enableWhoisCache) return;
|
|
394
|
+
|
|
395
|
+
const cacheKey = `${tool}:${domain}${recordType ? ':' + recordType : ''}`;
|
|
396
|
+
this.netToolsCache.set(cacheKey, result);
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
/**
|
|
400
|
+
* Cache similarity comparison result
|
|
401
|
+
* @param {string} domain1 - First domain
|
|
402
|
+
* @param {string} domain2 - Second domain
|
|
403
|
+
* @param {number} similarity - Similarity score
|
|
404
|
+
*/
|
|
405
|
+
cacheSimilarity(domain1, domain2, similarity) {
|
|
406
|
+
const key = [domain1, domain2].sort().join('|');
|
|
407
|
+
this.similarityCache.set(key, similarity);
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
/**
|
|
411
|
+
* Get cached similarity score
|
|
412
|
+
* @param {string} domain1 - First domain
|
|
413
|
+
* @param {string} domain2 - Second domain
|
|
414
|
+
* @returns {number|null} Cached similarity or null
|
|
415
|
+
*/
|
|
416
|
+
getCachedSimilarity(domain1, domain2) {
|
|
417
|
+
const key = [domain1, domain2].sort().join('|');
|
|
418
|
+
const cached = this.similarityCache.get(key);
|
|
419
|
+
|
|
420
|
+
if (cached !== undefined) {
|
|
421
|
+
this.stats.similarityHits++;
|
|
422
|
+
return cached;
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
this.stats.similarityMisses++;
|
|
426
|
+
return null;
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
/**
|
|
430
|
+
* Monitor memory usage and proactively manage caches
|
|
431
|
+
* @private
|
|
432
|
+
*/
|
|
433
|
+
_checkMemoryPressure() {
|
|
434
|
+
const memUsage = process.memoryUsage();
|
|
435
|
+
const heapUsedMB = Math.round(memUsage.heapUsed / 1024 / 1024);
|
|
436
|
+
const maxHeapMB = Math.round(this.options.maxHeapUsage / 1024 / 1024);
|
|
437
|
+
const usagePercent = (memUsage.heapUsed / this.options.maxHeapUsage) * 100;
|
|
438
|
+
|
|
439
|
+
// Adjust thresholds based on concurrency
|
|
440
|
+
const criticalThreshold = this.options.concurrency > 10 ? 0.85 : 1.0;
|
|
441
|
+
const warningThreshold = this.options.concurrency > 10 ? 0.70 : 0.85;
|
|
442
|
+
const infoThreshold = this.options.concurrency > 10 ? 0.60 : 0.75;
|
|
443
|
+
|
|
444
|
+
// Critical threshold - aggressive cleanup
|
|
445
|
+
if (memUsage.heapUsed > this.options.maxHeapUsage * criticalThreshold) {
|
|
446
|
+
this._performMemoryCleanup('critical', heapUsedMB, maxHeapMB);
|
|
447
|
+
return true;
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
// Warning threshold - moderate cleanup
|
|
451
|
+
if (memUsage.heapUsed > this.options.maxHeapUsage * warningThreshold) {
|
|
452
|
+
this._performMemoryCleanup('warning', heapUsedMB, maxHeapMB);
|
|
453
|
+
return true;
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
// Info threshold - log only
|
|
457
|
+
if (memUsage.heapUsed > this.options.maxHeapUsage * infoThreshold) {
|
|
458
|
+
this.stats.memoryWarnings++;
|
|
459
|
+
if (this.options.forceDebug) {
|
|
460
|
+
console.log(formatLogMessage('debug',
|
|
461
|
+
`[SmartCache] Memory info: ${heapUsedMB}MB/${maxHeapMB}MB (${usagePercent.toFixed(1)}%)`
|
|
462
|
+
));
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
return false;
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
/**
|
|
470
|
+
* Perform memory cleanup based on severity
|
|
471
|
+
* @private
|
|
472
|
+
*/
|
|
473
|
+
_performMemoryCleanup(level, heapUsedMB, maxHeapMB) {
|
|
474
|
+
this.stats.memoryPressureEvents++;
|
|
475
|
+
|
|
476
|
+
if (this.options.forceDebug) {
|
|
477
|
+
console.log(formatLogMessage('debug',
|
|
478
|
+
`[SmartCache] Memory ${level}: ${heapUsedMB}MB/${maxHeapMB}MB, performing cleanup...`
|
|
479
|
+
));
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
if (level === 'critical' || this.options.concurrency > 12) {
|
|
483
|
+
// Aggressive cleanup - clear volatile caches
|
|
484
|
+
this.responseCache.clear();
|
|
485
|
+
this.patternCache.clear();
|
|
486
|
+
this.similarityCache.clear();
|
|
487
|
+
|
|
488
|
+
// For very high concurrency, also trim domain cache
|
|
489
|
+
if (this.options.concurrency > 15) {
|
|
490
|
+
const currentSize = this.domainCache.size;
|
|
491
|
+
this.domainCache.clear();
|
|
492
|
+
if (this.options.forceDebug) {
|
|
493
|
+
console.log(formatLogMessage('debug', `[SmartCache] Cleared ${currentSize} domain cache entries`));
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
} else if (level === 'warning') {
|
|
497
|
+
// Moderate cleanup - clear largest cache
|
|
498
|
+
this.responseCache.clear();
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
// Force garbage collection if available
|
|
502
|
+
if (global.gc) {
|
|
503
|
+
global.gc();
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
/**
|
|
508
|
+
* Get cache statistics
|
|
509
|
+
* @returns {Object} Statistics object
|
|
510
|
+
*/
|
|
511
|
+
getStats() {
|
|
512
|
+
const runtime = Date.now() - this.stats.startTime;
|
|
513
|
+
const hitRate = this.stats.hits / (this.stats.hits + this.stats.misses) || 0;
|
|
514
|
+
const patternHitRate = this.stats.patternHits /
|
|
515
|
+
(this.stats.patternHits + this.stats.patternMisses) || 0;
|
|
516
|
+
const responseHitRate = this.stats.responseHits /
|
|
517
|
+
(this.stats.responseHits + this.stats.responseMisses) || 0;
|
|
518
|
+
const netToolsHitRate = this.stats.netToolsHits /
|
|
519
|
+
(this.stats.netToolsHits + this.stats.netToolsMisses) || 0;
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
const memUsage = process.memoryUsage();
|
|
523
|
+
|
|
524
|
+
return {
|
|
525
|
+
...this.stats,
|
|
526
|
+
runtime: Math.round(runtime / 1000), // seconds
|
|
527
|
+
hitRate: (hitRate * 100).toFixed(2) + '%',
|
|
528
|
+
patternHitRate: (patternHitRate * 100).toFixed(2) + '%',
|
|
529
|
+
responseHitRate: (responseHitRate * 100).toFixed(2) + '%',
|
|
530
|
+
netToolsHitRate: (netToolsHitRate * 100).toFixed(2) + '%',
|
|
531
|
+
domainCacheSize: this.domainCache.size,
|
|
532
|
+
patternCacheSize: this.patternCache.size,
|
|
533
|
+
responseCacheSize: this.responseCache.size,
|
|
534
|
+
netToolsCacheSize: this.netToolsCache.size,
|
|
535
|
+
similarityCacheSize: this.similarityCache.size,
|
|
536
|
+
regexCacheSize: this.regexCache.size,
|
|
537
|
+
totalCacheEntries: this.domainCache.size + this.patternCache.size +
|
|
538
|
+
this.responseCache.size + this.netToolsCache.size +
|
|
539
|
+
this.similarityCache.size + this.regexCache.size,
|
|
540
|
+
memoryUsageMB: Math.round(memUsage.heapUsed / 1024 / 1024),
|
|
541
|
+
memoryMaxMB: Math.round(this.options.maxHeapUsage / 1024 / 1024),
|
|
542
|
+
memoryUsagePercent: ((memUsage.heapUsed / this.options.maxHeapUsage) * 100).toFixed(1) + '%',
|
|
543
|
+
responseCacheMemoryMB: Math.round((this.responseCache.calculatedSize || 0) / 1024 / 1024)
|
|
544
|
+
};
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
/**
|
|
548
|
+
* Clear all caches
|
|
549
|
+
*/
|
|
550
|
+
clear() {
|
|
551
|
+
this.domainCache.clear();
|
|
552
|
+
this.patternCache.clear();
|
|
553
|
+
this.responseCache.clear();
|
|
554
|
+
this.netToolsCache.clear();
|
|
555
|
+
this.similarityCache.clear();
|
|
556
|
+
this.regexCache.clear();
|
|
557
|
+
this._initializeStats();
|
|
558
|
+
|
|
559
|
+
if (this.options.forceDebug) {
|
|
560
|
+
console.log(formatLogMessage('debug', '[SmartCache] All caches cleared'));
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
/**
|
|
565
|
+
* Helper method to log memory-related cache skips
|
|
566
|
+
* @private
|
|
567
|
+
*/
|
|
568
|
+
_logMemorySkip(operation) {
|
|
569
|
+
if (this.options.forceDebug) {
|
|
570
|
+
console.log(formatLogMessage('debug',
|
|
571
|
+
`[SmartCache] Skipping ${operation} due to memory pressure`
|
|
572
|
+
));
|
|
573
|
+
}
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
/**
|
|
577
|
+
* Load persistent cache from disk
|
|
578
|
+
* @private
|
|
579
|
+
*/
|
|
580
|
+
_loadPersistentCache() {
|
|
581
|
+
const cacheFile = path.join(this.options.persistencePath, 'smart-cache.json');
|
|
582
|
+
|
|
583
|
+
if (!fs.existsSync(cacheFile)) {
|
|
584
|
+
return;
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
try {
|
|
588
|
+
const data = JSON.parse(fs.readFileSync(cacheFile, 'utf8'));
|
|
589
|
+
const now = Date.now();
|
|
590
|
+
|
|
591
|
+
// Validate cache age
|
|
592
|
+
if (data.timestamp && now - data.timestamp > 24 * 60 * 60 * 1000) {
|
|
593
|
+
if (this.options.forceDebug) {
|
|
594
|
+
console.log(formatLogMessage('debug',
|
|
595
|
+
'[SmartCache] Persistent cache too old, ignoring'
|
|
596
|
+
));
|
|
597
|
+
}
|
|
598
|
+
return;
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
// Load domain cache
|
|
602
|
+
if (data.domainCache && Array.isArray(data.domainCache)) {
|
|
603
|
+
data.domainCache.forEach(([key, value]) => {
|
|
604
|
+
// Only load if not expired
|
|
605
|
+
if (now - value.timestamp < this.options.ttl) {
|
|
606
|
+
this.domainCache.set(key, value);
|
|
607
|
+
}
|
|
608
|
+
});
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
// Load nettools cache
|
|
612
|
+
if (data.netToolsCache && Array.isArray(data.netToolsCache)) {
|
|
613
|
+
data.netToolsCache.forEach(([key, value]) => {
|
|
614
|
+
this.netToolsCache.set(key, value);
|
|
615
|
+
});
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
this.stats.persistenceLoads++;
|
|
619
|
+
|
|
620
|
+
if (this.options.forceDebug) {
|
|
621
|
+
console.log(formatLogMessage('debug',
|
|
622
|
+
`[SmartCache] Loaded persistent cache: ${this.domainCache.size} domains, ${this.netToolsCache.size} nettools`
|
|
623
|
+
));
|
|
624
|
+
}
|
|
625
|
+
} catch (err) {
|
|
626
|
+
if (this.options.forceDebug) {
|
|
627
|
+
console.log(formatLogMessage('debug',
|
|
628
|
+
`[SmartCache] Failed to load persistent cache: ${err.message}`
|
|
629
|
+
));
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
/**
|
|
635
|
+
* Save cache to disk
|
|
636
|
+
*/
|
|
637
|
+
savePersistentCache() {
|
|
638
|
+
if (!this.options.enablePersistence) return;
|
|
639
|
+
|
|
640
|
+
// Prevent concurrent saves
|
|
641
|
+
if (this.saveInProgress) {
|
|
642
|
+
this.pendingSave = true;
|
|
643
|
+
if (this.options.forceDebug) {
|
|
644
|
+
console.log(formatLogMessage('debug', '[SmartCache] Save in progress, marking pending...'));
|
|
645
|
+
}
|
|
646
|
+
return;
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
// Debounce saves - don't save more than once every 10 seconds
|
|
650
|
+
const now = Date.now();
|
|
651
|
+
if (now - this.lastSaveTime < 10000) {
|
|
652
|
+
// Schedule a delayed save if none is pending
|
|
653
|
+
if (!this.saveTimeout && !this.pendingSave) {
|
|
654
|
+
this.pendingSave = true;
|
|
655
|
+
this.saveTimeout = setTimeout(() => {
|
|
656
|
+
this.saveTimeout = null;
|
|
657
|
+
if (this.pendingSave) {
|
|
658
|
+
this.pendingSave = false;
|
|
659
|
+
this.savePersistentCache();
|
|
660
|
+
}
|
|
661
|
+
}, 10000 - (now - this.lastSaveTime));
|
|
662
|
+
}
|
|
663
|
+
return;
|
|
664
|
+
}
|
|
665
|
+
this.saveInProgress = true;
|
|
666
|
+
this.lastSaveTime = now;
|
|
667
|
+
|
|
668
|
+
const cacheDir = this.options.persistencePath;
|
|
669
|
+
const cacheFile = path.join(cacheDir, 'smart-cache.json');
|
|
670
|
+
|
|
671
|
+
try {
|
|
672
|
+
// Create cache directory if it doesn't exist
|
|
673
|
+
if (!fs.existsSync(cacheDir)) {
|
|
674
|
+
fs.mkdirSync(cacheDir, { recursive: true });
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
const data = {
|
|
678
|
+
timestamp: now,
|
|
679
|
+
domainCache: Array.from(this.domainCache.entries()),
|
|
680
|
+
netToolsCache: Array.from(this.netToolsCache.entries()),
|
|
681
|
+
stats: this.stats
|
|
682
|
+
};
|
|
683
|
+
|
|
684
|
+
fs.writeFileSync(cacheFile, JSON.stringify(data, null, 2));
|
|
685
|
+
this.stats.persistenceSaves++;
|
|
686
|
+
|
|
687
|
+
if (this.options.forceDebug) {
|
|
688
|
+
console.log(formatLogMessage('debug',
|
|
689
|
+
`[SmartCache] Saved cache to disk: ${cacheFile}`
|
|
690
|
+
));
|
|
691
|
+
}
|
|
692
|
+
} catch (err) {
|
|
693
|
+
if (this.options.forceDebug) {
|
|
694
|
+
console.log(formatLogMessage('debug',
|
|
695
|
+
`[SmartCache] Failed to save cache: ${err.message}`
|
|
696
|
+
));
|
|
697
|
+
}
|
|
698
|
+
} finally {
|
|
699
|
+
this.saveInProgress = false;
|
|
700
|
+
|
|
701
|
+
// Process any pending saves
|
|
702
|
+
if (this.pendingSave && !this.saveTimeout) {
|
|
703
|
+
this.pendingSave = false;
|
|
704
|
+
setTimeout(() => this.savePersistentCache(), 1000);
|
|
705
|
+
}
|
|
706
|
+
}
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
/**
|
|
710
|
+
* Set up auto-save interval
|
|
711
|
+
* @private
|
|
712
|
+
*/
|
|
713
|
+
_setupAutoSave() {
|
|
714
|
+
this.autoSaveInterval = setInterval(() => {
|
|
715
|
+
this.savePersistentCache();
|
|
716
|
+
}, this.options.autoSaveInterval);
|
|
717
|
+
}
|
|
718
|
+
|
|
719
|
+
/**
|
|
720
|
+
* Clean up resources
|
|
721
|
+
*/
|
|
722
|
+
destroy() {
|
|
723
|
+
if (this.memoryCheckInterval) {
|
|
724
|
+
clearInterval(this.memoryCheckInterval);
|
|
725
|
+
}
|
|
726
|
+
if (this.autoSaveInterval) {
|
|
727
|
+
clearInterval(this.autoSaveInterval);
|
|
728
|
+
}
|
|
729
|
+
if (this.saveTimeout) {
|
|
730
|
+
clearTimeout(this.saveTimeout);
|
|
731
|
+
this.saveTimeout = null;
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
// Save cache one last time
|
|
735
|
+
if (this.options.enablePersistence) {
|
|
736
|
+
this.savePersistentCache();
|
|
737
|
+
}
|
|
738
|
+
|
|
739
|
+
this.clear();
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
/**
|
|
743
|
+
* Clear persistent cache files and directories
|
|
744
|
+
* @param {Object} options - Clear options
|
|
745
|
+
* @param {boolean} options.silent - Suppress console output
|
|
746
|
+
* @param {boolean} options.forceDebug - Enable debug logging
|
|
747
|
+
* @returns {Object} Clear operation results
|
|
748
|
+
*/
|
|
749
|
+
static clearPersistentCache(options = {}) {
|
|
750
|
+
const { silent = false, forceDebug = false, cachePath = '.cache' } = options;
|
|
751
|
+
|
|
752
|
+
const cachePaths = [
|
|
753
|
+
cachePath,
|
|
754
|
+
path.join(cachePath, 'smart-cache.json'),
|
|
755
|
+
// Add other potential cache files here if needed
|
|
756
|
+
];
|
|
757
|
+
|
|
758
|
+
let clearedItems = 0;
|
|
759
|
+
let totalSize = 0;
|
|
760
|
+
const clearedFiles = [];
|
|
761
|
+
const errors = [];
|
|
762
|
+
|
|
763
|
+
if (!silent) {
|
|
764
|
+
console.log(`\n??? Clearing cache...`);
|
|
765
|
+
}
|
|
766
|
+
|
|
767
|
+
for (const currentCachePath of cachePaths) {
|
|
768
|
+
if (fs.existsSync(currentCachePath)) {
|
|
769
|
+
try {
|
|
770
|
+
const stats = fs.statSync(currentCachePath);
|
|
771
|
+
if (stats.isDirectory()) {
|
|
772
|
+
// Calculate total size of directory contents
|
|
773
|
+
const files = fs.readdirSync(currentCachePath);
|
|
774
|
+
for (const file of files) {
|
|
775
|
+
const filePath = path.join(currentCachePath, file);
|
|
776
|
+
if (fs.existsSync(filePath)) {
|
|
777
|
+
totalSize += fs.statSync(filePath).size;
|
|
778
|
+
}
|
|
779
|
+
}
|
|
780
|
+
fs.rmSync(currentCachePath, { recursive: true, force: true });
|
|
781
|
+
clearedItems++;
|
|
782
|
+
clearedFiles.push({ type: 'directory', path: currentCachePath, size: totalSize });
|
|
783
|
+
if (forceDebug) {
|
|
784
|
+
console.log(formatLogMessage('debug', `Cleared cache directory: ${currentCachePath}`));
|
|
785
|
+
}
|
|
786
|
+
} else {
|
|
787
|
+
totalSize += stats.size;
|
|
788
|
+
fs.unlinkSync(currentCachePath);
|
|
789
|
+
clearedItems++;
|
|
790
|
+
clearedFiles.push({ type: 'file', path: currentCachePath, size: stats.size });
|
|
791
|
+
if (forceDebug) {
|
|
792
|
+
console.log(formatLogMessage('debug', `Cleared cache file: ${currentCachePath}`));
|
|
793
|
+
}
|
|
794
|
+
}
|
|
795
|
+
} catch (clearErr) {
|
|
796
|
+
errors.push({ path: currentCachePath, error: clearErr.message });
|
|
797
|
+
if (forceDebug) {
|
|
798
|
+
console.log(formatLogMessage('debug', `Failed to clear ${currentCachePath}: ${clearErr.message}`));
|
|
799
|
+
}
|
|
800
|
+
}
|
|
801
|
+
}
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
const result = {
|
|
805
|
+
success: errors.length === 0,
|
|
806
|
+
clearedItems,
|
|
807
|
+
totalSize,
|
|
808
|
+
sizeMB: (totalSize / 1024 / 1024).toFixed(2),
|
|
809
|
+
clearedFiles,
|
|
810
|
+
errors
|
|
811
|
+
};
|
|
812
|
+
|
|
813
|
+
if (!silent) {
|
|
814
|
+
if (clearedItems > 0) {
|
|
815
|
+
console.log(`? Cache cleared: ${clearedItems} item(s), ${result.sizeMB}MB freed`);
|
|
816
|
+
} else {
|
|
817
|
+
console.log(`?? No cache files found to clear`);
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
if (errors.length > 0) {
|
|
821
|
+
console.warn(`?? ${errors.length} error(s) occurred during cache clearing`);
|
|
822
|
+
}
|
|
823
|
+
}
|
|
824
|
+
|
|
825
|
+
return result;
|
|
826
|
+
}
|
|
827
|
+
}
|
|
828
|
+
|
|
829
|
+
/**
|
|
830
|
+
* Factory function to create SmartCache instance with config
|
|
831
|
+
* @param {Object} config - Configuration object
|
|
832
|
+
* @returns {SmartCache} SmartCache instance
|
|
833
|
+
*/
|
|
834
|
+
function createSmartCache(config = {}) {
|
|
835
|
+
return new SmartCache({
|
|
836
|
+
maxSize: config.cache_max_size,
|
|
837
|
+
ttl: (config.cache_ttl_minutes || 60) * 60 * 1000,
|
|
838
|
+
enablePatternCache: config.cache_patterns !== false,
|
|
839
|
+
enableResponseCache: config.cache_responses !== false,
|
|
840
|
+
enableWhoisCache: config.cache_nettools !== false,
|
|
841
|
+
enablePersistence: config.cache_persistence === true,
|
|
842
|
+
persistencePath: config.cache_path || '.cache',
|
|
843
|
+
forceDebug: config.forceDebug || false,
|
|
844
|
+
autoSave: config.cache_autosave !== false,
|
|
845
|
+
autoSaveInterval: (config.cache_autosave_minutes || 1) * 60 * 1000,
|
|
846
|
+
maxHeapUsage: config.cache_max_heap_mb ? config.cache_max_heap_mb * 1024 * 1024 : undefined,
|
|
847
|
+
memoryCheckInterval: (config.cache_memory_check_seconds || 30) * 1000,
|
|
848
|
+
concurrency: config.max_concurrent_sites || 6,
|
|
849
|
+
aggressiveMode: config.cache_aggressive_mode === true
|
|
850
|
+
});
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
module.exports = {
|
|
854
|
+
SmartCache,
|
|
855
|
+
createSmartCache,
|
|
856
|
+
clearPersistentCache: SmartCache.clearPersistentCache
|
|
857
|
+
};
|
package/nwss.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// === Network scanner script (nwss.js) v1.0.
|
|
1
|
+
// === Network scanner script (nwss.js) v1.0.58 ===
|
|
2
2
|
|
|
3
3
|
// puppeteer for browser automation, fs for file system operations, psl for domain parsing.
|
|
4
4
|
// const pLimit = require('p-limit'); // Will be dynamically imported
|
|
@@ -18,7 +18,7 @@ const { handleCloudflareProtection } = require('./lib/cloudflare');
|
|
|
18
18
|
// FP Bypass
|
|
19
19
|
const { handleFlowProxyProtection, getFlowProxyTimeouts } = require('./lib/flowproxy');
|
|
20
20
|
// ignore_similar rules
|
|
21
|
-
const { shouldIgnoreSimilarDomain } = require('./lib/ignore_similar');
|
|
21
|
+
const { shouldIgnoreSimilarDomain, calculateSimilarity } = require('./lib/ignore_similar');
|
|
22
22
|
// Graceful exit
|
|
23
23
|
const { handleBrowserExit, cleanupChromeTempFiles } = require('./lib/browserexit');
|
|
24
24
|
// Whois & Dig
|
|
@@ -33,13 +33,15 @@ const { colorize, colors, messageColors, tags, formatLogMessage } = require('./l
|
|
|
33
33
|
const { performPageInteraction, createInteractionConfig } = require('./lib/interaction');
|
|
34
34
|
// Domain detection cache for performance optimization
|
|
35
35
|
const { createGlobalHelpers, getTotalDomainsSkipped, getDetectedDomainsCount } = require('./lib/domain-cache');
|
|
36
|
+
const { createSmartCache } = require('./lib/smart-cache'); // Smart cache system
|
|
37
|
+
const { clearPersistentCache } = require('./lib/smart-cache');
|
|
36
38
|
// Enhanced redirect handling
|
|
37
39
|
const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/redirect');
|
|
38
40
|
// Ensure web browser is working correctly
|
|
39
41
|
const { monitorBrowserHealth, isBrowserHealthy } = require('./lib/browserhealth');
|
|
40
42
|
|
|
41
43
|
// --- Script Configuration & Constants ---
|
|
42
|
-
const VERSION = '1.0.
|
|
44
|
+
const VERSION = '1.0.58'; // Script version
|
|
43
45
|
|
|
44
46
|
// get startTime
|
|
45
47
|
const startTime = Date.now();
|
|
@@ -48,6 +50,9 @@ const startTime = Date.now();
|
|
|
48
50
|
const domainCacheOptions = { enableLogging: false }; // Set to true for cache debug logs
|
|
49
51
|
const { isDomainAlreadyDetected, markDomainAsDetected } = createGlobalHelpers(domainCacheOptions);
|
|
50
52
|
|
|
53
|
+
// Smart cache will be initialized after config is loaded
|
|
54
|
+
let smartCache = null;
|
|
55
|
+
|
|
51
56
|
// --- Command-Line Argument Parsing ---
|
|
52
57
|
const args = process.argv.slice(2);
|
|
53
58
|
|
|
@@ -98,6 +103,8 @@ const validateConfig = args.includes('--validate-config');
|
|
|
98
103
|
const validateRules = args.includes('--validate-rules');
|
|
99
104
|
const testValidation = args.includes('--test-validation');
|
|
100
105
|
let cleanRules = args.includes('--clean-rules');
|
|
106
|
+
const clearCache = args.includes('--clear-cache');
|
|
107
|
+
const ignoreCache = args.includes('--ignore-cache');
|
|
101
108
|
|
|
102
109
|
let validateRulesFile = null;
|
|
103
110
|
const validateRulesIndex = args.findIndex(arg => arg === '--validate-rules');
|
|
@@ -220,6 +227,15 @@ if (args.includes('--version')) {
|
|
|
220
227
|
process.exit(0);
|
|
221
228
|
}
|
|
222
229
|
|
|
230
|
+
// Handle --clear-cache before config loading (uses default cache path)
|
|
231
|
+
if (clearCache && !dryRunMode) {
|
|
232
|
+
clearPersistentCache({
|
|
233
|
+
silent: silentMode,
|
|
234
|
+
forceDebug,
|
|
235
|
+
cachePath: '.cache' // Default path, will be updated after config loads if needed
|
|
236
|
+
});
|
|
237
|
+
}
|
|
238
|
+
|
|
223
239
|
// Handle validation-only operations before main help
|
|
224
240
|
if (testValidation) {
|
|
225
241
|
console.log(`\n${messageColors.processing('Running domain validation tests...')}`);
|
|
@@ -356,6 +372,8 @@ Validation Options:
|
|
|
356
372
|
--validate-rules [file] Validate rule file format (uses --output/--compare files if no file specified)
|
|
357
373
|
--clean-rules [file] Clean rule files by removing invalid lines and optionally duplicates (uses --output/--compare files if no file specified)
|
|
358
374
|
--test-validation Run domain validation tests and exit
|
|
375
|
+
--clear-cache Clear persistent cache before scanning (improves fresh start performance)
|
|
376
|
+
--ignore-cache Bypass all smart caching functionality during scanning
|
|
359
377
|
|
|
360
378
|
Global config.json options:
|
|
361
379
|
ignoreDomains: ["domain.com", "*.ads.com"] Domains to completely ignore (supports wildcards)
|
|
@@ -547,6 +565,41 @@ const RESOURCE_CLEANUP_INTERVAL = (() => {
|
|
|
547
565
|
return 180;
|
|
548
566
|
})();
|
|
549
567
|
|
|
568
|
+
// Perform cache clear after config is loaded for custom cache paths
|
|
569
|
+
if (clearCache && dryRunMode) {
|
|
570
|
+
clearPersistentCache({
|
|
571
|
+
silent: silentMode,
|
|
572
|
+
forceDebug,
|
|
573
|
+
cachePath: config.cache_path || '.cache'
|
|
574
|
+
});
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
// Also clear for custom cache paths in normal mode if not already cleared
|
|
578
|
+
if (clearCache && !dryRunMode && config.cache_path && config.cache_path !== '.cache') {
|
|
579
|
+
clearPersistentCache({
|
|
580
|
+
silent: silentMode,
|
|
581
|
+
forceDebug,
|
|
582
|
+
cachePath: config.cache_path
|
|
583
|
+
});
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
// Initialize smart cache system AFTER config is loaded (unless --ignore-cache is used)
|
|
587
|
+
if (ignoreCache) {
|
|
588
|
+
smartCache = null;
|
|
589
|
+
if (forceDebug) console.log(formatLogMessage('debug', 'Smart cache disabled by --ignore-cache flag'));
|
|
590
|
+
} else {
|
|
591
|
+
smartCache = createSmartCache({
|
|
592
|
+
...config,
|
|
593
|
+
forceDebug,
|
|
594
|
+
max_concurrent_sites: MAX_CONCURRENT_SITES, // Pass concurrency info
|
|
595
|
+
cache_aggressive_mode: MAX_CONCURRENT_SITES > 12, // Auto-enable for high concurrency
|
|
596
|
+
cache_persistence: false, // Disable persistence completely
|
|
597
|
+
cache_autosave: false, // Disable auto-save completely
|
|
598
|
+
cache_autosave_minutes: config.cache_autosave_minutes || 1,
|
|
599
|
+
cache_max_size: config.cache_max_size || 5000
|
|
600
|
+
});
|
|
601
|
+
}
|
|
602
|
+
|
|
550
603
|
// Handle --clean-rules after config is loaded (so we have access to sites)
|
|
551
604
|
if (cleanRules || cleanRulesFile) {
|
|
552
605
|
const filesToClean = cleanRulesFile ? [cleanRulesFile] : [outputFile, compareFile].filter(Boolean);
|
|
@@ -949,6 +1002,10 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
949
1002
|
// --- Main Asynchronous IIFE (Immediately Invoked Function Expression) ---
|
|
950
1003
|
// This is the main entry point and execution block for the network scanner script.
|
|
951
1004
|
(async () => {
|
|
1005
|
+
|
|
1006
|
+
// Declare userDataDir in outer scope for cleanup access
|
|
1007
|
+
let userDataDir = null;
|
|
1008
|
+
|
|
952
1009
|
/**
|
|
953
1010
|
* Creates a new browser instance with consistent configuration
|
|
954
1011
|
* Uses system Chrome and temporary directories to minimize disk usage
|
|
@@ -957,7 +1014,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
957
1014
|
async function createBrowser() {
|
|
958
1015
|
// Create temporary user data directory that we can fully control and clean up
|
|
959
1016
|
const tempUserDataDir = `/tmp/puppeteer-${Date.now()}-${Math.random().toString(36).substring(7)}`;
|
|
960
|
-
|
|
1017
|
+
userDataDir = tempUserDataDir; // Store for cleanup tracking (use outer scope variable)
|
|
961
1018
|
|
|
962
1019
|
// Try to find system Chrome installation to avoid Puppeteer downloads
|
|
963
1020
|
const systemChromePaths = [
|
|
@@ -1485,6 +1542,46 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1485
1542
|
const similarityThreshold = siteConfig.ignore_similar_threshold || ignore_similar_threshold;
|
|
1486
1543
|
const ignoreSimilarIgnoredDomains = siteConfig.ignore_similar_ignored_domains !== undefined ? siteConfig.ignore_similar_ignored_domains : ignore_similar_ignored_domains;
|
|
1487
1544
|
|
|
1545
|
+
// Use smart cache's similarity cache for performance (if cache is enabled)
|
|
1546
|
+
if (ignoreSimilarEnabled && smartCache) {
|
|
1547
|
+
const existingDomains = matchedDomains instanceof Map
|
|
1548
|
+
? Array.from(matchedDomains.keys()).filter(key => !['dryRunMatches', 'dryRunNetTools', 'dryRunSearchString'].includes(key))
|
|
1549
|
+
: Array.from(matchedDomains);
|
|
1550
|
+
|
|
1551
|
+
// Check cached similarity scores first
|
|
1552
|
+
for (const existingDomain of existingDomains) {
|
|
1553
|
+
const cachedSimilarity = smartCache.getCachedSimilarity(domain, existingDomain);
|
|
1554
|
+
if (cachedSimilarity !== null && cachedSimilarity >= similarityThreshold) {
|
|
1555
|
+
if (forceDebug) {
|
|
1556
|
+
console.log(formatLogMessage('debug', `[SmartCache] Used cached similarity: ${domain} ~= ${existingDomain} (${cachedSimilarity}%)`));
|
|
1557
|
+
}
|
|
1558
|
+
return; // Skip adding this domain
|
|
1559
|
+
}
|
|
1560
|
+
|
|
1561
|
+
// If no cached similarity exists, calculate and cache it
|
|
1562
|
+
if (cachedSimilarity === null) {
|
|
1563
|
+
const similarity = calculateSimilarity(domain, existingDomain);
|
|
1564
|
+
if (smartCache && !ignoreCache) {
|
|
1565
|
+
smartCache.cacheSimilarity(domain, existingDomain, similarity);
|
|
1566
|
+
}
|
|
1567
|
+
}
|
|
1568
|
+
}
|
|
1569
|
+
}
|
|
1570
|
+
|
|
1571
|
+
// Check smart cache first (if cache is enabled)
|
|
1572
|
+
const context = {
|
|
1573
|
+
filterRegex: siteConfig.filterRegex,
|
|
1574
|
+
searchString: siteConfig.searchstring,
|
|
1575
|
+
resourceType: resourceType
|
|
1576
|
+
};
|
|
1577
|
+
|
|
1578
|
+
if (smartCache && smartCache.shouldSkipDomain(domain, context)) {
|
|
1579
|
+
if (forceDebug) {
|
|
1580
|
+
console.log(formatLogMessage('debug', `[SmartCache] Skipping cached domain: ${domain}`));
|
|
1581
|
+
}
|
|
1582
|
+
return; // Skip adding this domain
|
|
1583
|
+
}
|
|
1584
|
+
|
|
1488
1585
|
if (ignoreSimilarEnabled) {
|
|
1489
1586
|
const existingDomains = matchedDomains instanceof Map
|
|
1490
1587
|
? Array.from(matchedDomains.keys()).filter(key => !['dryRunMatches', 'dryRunNetTools', 'dryRunSearchString'].includes(key))
|
|
@@ -1523,6 +1620,11 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1523
1620
|
// Mark full subdomain as detected for future reference
|
|
1524
1621
|
markDomainAsDetected(cacheKey);
|
|
1525
1622
|
|
|
1623
|
+
// Also mark in smart cache with context (if cache is enabled)
|
|
1624
|
+
if (smartCache) {
|
|
1625
|
+
smartCache.markDomainProcessed(domain, context, { resourceType, fullSubdomain });
|
|
1626
|
+
}
|
|
1627
|
+
|
|
1526
1628
|
if (matchedDomains instanceof Map) {
|
|
1527
1629
|
if (!matchedDomains.has(domain)) {
|
|
1528
1630
|
matchedDomains.set(domain, new Set());
|
|
@@ -1716,7 +1818,8 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1716
1818
|
|
|
1717
1819
|
// REMOVED: Check if this URL matches any blocked patterns - if so, skip detection but still continue browser blocking
|
|
1718
1820
|
// This check is no longer needed here since even_blocked handles it above
|
|
1719
|
-
|
|
1821
|
+
|
|
1822
|
+
|
|
1720
1823
|
// If NO searchstring AND NO nettools are defined, match immediately (existing behavior)
|
|
1721
1824
|
if (!hasSearchString && !hasSearchStringAnd && !hasNetTools) {
|
|
1722
1825
|
if (dryRunMode) {
|
|
@@ -1767,6 +1870,15 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1767
1870
|
}
|
|
1768
1871
|
|
|
1769
1872
|
// Create and execute nettools handler
|
|
1873
|
+
// Check smart cache for nettools results (if cache is enabled)
|
|
1874
|
+
const cachedWhois = smartCache ? smartCache.getCachedNetTools(reqDomain, 'whois') : null;
|
|
1875
|
+
const cachedDig = smartCache ? smartCache.getCachedNetTools(reqDomain, 'dig', digRecordType) : null;
|
|
1876
|
+
|
|
1877
|
+
if ((cachedWhois || cachedDig) && forceDebug) {
|
|
1878
|
+
console.log(formatLogMessage('debug', `[SmartCache] Using cached nettools results for ${reqDomain}`));
|
|
1879
|
+
}
|
|
1880
|
+
|
|
1881
|
+
// Create nettools handler with cache callbacks (if cache is enabled)
|
|
1770
1882
|
const netToolsHandler = createNetToolsHandler({
|
|
1771
1883
|
whoisTerms,
|
|
1772
1884
|
whoisOrTerms,
|
|
@@ -1784,6 +1896,15 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1784
1896
|
matchedDomains,
|
|
1785
1897
|
addMatchedDomain,
|
|
1786
1898
|
isDomainAlreadyDetected,
|
|
1899
|
+
// Add cache callbacks if smart cache is available and caching is enabled
|
|
1900
|
+
onWhoisResult: smartCache ? (domain, result) => {
|
|
1901
|
+
smartCache.cacheNetTools(domain, 'whois', result);
|
|
1902
|
+
} : undefined,
|
|
1903
|
+
onDigResult: smartCache ? (domain, result, recordType) => {
|
|
1904
|
+
smartCache.cacheNetTools(domain, 'dig', result, recordType);
|
|
1905
|
+
} : undefined,
|
|
1906
|
+
cachedWhois,
|
|
1907
|
+
cachedDig,
|
|
1787
1908
|
currentUrl,
|
|
1788
1909
|
getRootDomain,
|
|
1789
1910
|
siteConfig,
|
|
@@ -1823,6 +1944,13 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1823
1944
|
|
|
1824
1945
|
// If curl is enabled, download and analyze content immediately
|
|
1825
1946
|
if (useCurl) {
|
|
1947
|
+
// Check response cache first if smart cache is available and caching is enabled
|
|
1948
|
+
const cachedContent = smartCache ? smartCache.getCachedResponse(reqUrl) : null;
|
|
1949
|
+
|
|
1950
|
+
if (cachedContent && forceDebug) {
|
|
1951
|
+
console.log(formatLogMessage('debug', `[SmartCache] Using cached response content for ${reqUrl.substring(0, 50)}...`));
|
|
1952
|
+
// Process cached content instead of fetching
|
|
1953
|
+
} else {
|
|
1826
1954
|
try {
|
|
1827
1955
|
// Use grep handler if both grep and searchstring/searchstring_and are enabled
|
|
1828
1956
|
if (useGrep && (hasSearchString || hasSearchStringAnd)) {
|
|
@@ -1833,6 +1961,9 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1833
1961
|
matchedDomains,
|
|
1834
1962
|
addMatchedDomain, // Pass the helper function
|
|
1835
1963
|
isDomainAlreadyDetected,
|
|
1964
|
+
onContentFetched: smartCache && !ignoreCache ? (url, content) => {
|
|
1965
|
+
smartCache.cacheResponse(url, content);
|
|
1966
|
+
} : undefined,
|
|
1836
1967
|
currentUrl,
|
|
1837
1968
|
perSiteSubDomains,
|
|
1838
1969
|
ignoreDomains,
|
|
@@ -1885,6 +2016,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1885
2016
|
console.log(formatLogMessage('debug', `Curl handler failed for ${reqUrl}: ${curlErr.message}`));
|
|
1886
2017
|
}
|
|
1887
2018
|
}
|
|
2019
|
+
}
|
|
1888
2020
|
}
|
|
1889
2021
|
|
|
1890
2022
|
break;
|
|
@@ -2494,6 +2626,19 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2494
2626
|
console.log(formatLogMessage('debug', `Output format: ${getFormatDescription(globalOptions)}`));
|
|
2495
2627
|
console.log(formatLogMessage('debug', `Generated ${outputResult.totalRules} rules from ${outputResult.successfulPageLoads} successful page loads`));
|
|
2496
2628
|
console.log(formatLogMessage('debug', `Performance: ${totalDomainsSkipped} domains skipped (already detected), ${detectedDomainsCount} unique domains cached`));
|
|
2629
|
+
// Log smart cache statistics (if cache is enabled)
|
|
2630
|
+
if (smartCache) {
|
|
2631
|
+
const cacheStats = smartCache.getStats();
|
|
2632
|
+
console.log(formatLogMessage('debug', '=== Smart Cache Statistics ==='));
|
|
2633
|
+
console.log(formatLogMessage('debug', `Runtime: ${cacheStats.runtime}s, Total entries: ${cacheStats.totalCacheEntries}`));
|
|
2634
|
+
console.log(formatLogMessage('debug', `Hit Rates - Domain: ${cacheStats.hitRate}, Pattern: ${cacheStats.patternHitRate}`));
|
|
2635
|
+
console.log(formatLogMessage('debug', `Response: ${cacheStats.responseHitRate}, NetTools: ${cacheStats.netToolsHitRate}`));
|
|
2636
|
+
console.log(formatLogMessage('debug', `Regex compilations saved: ${cacheStats.regexCacheHits}`));
|
|
2637
|
+
console.log(formatLogMessage('debug', `Similarity cache hits: ${cacheStats.similarityHits}`));
|
|
2638
|
+
if (config.cache_persistence) {
|
|
2639
|
+
console.log(formatLogMessage('debug', `Persistence - Loads: ${cacheStats.persistenceLoads}, Saves: ${cacheStats.persistenceSaves}`));
|
|
2640
|
+
}
|
|
2641
|
+
}
|
|
2497
2642
|
}
|
|
2498
2643
|
|
|
2499
2644
|
// Compress log files if --compress-logs is enabled
|
|
@@ -2571,6 +2716,11 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2571
2716
|
const seconds = totalSeconds % 60;
|
|
2572
2717
|
|
|
2573
2718
|
// Final summary report with timing and success statistics
|
|
2719
|
+
// Clean up smart cache (if it exists)
|
|
2720
|
+
if (smartCache) {
|
|
2721
|
+
smartCache.destroy();
|
|
2722
|
+
}
|
|
2723
|
+
|
|
2574
2724
|
if (!silentMode) {
|
|
2575
2725
|
if (pagesWithMatches > outputResult.successfulPageLoads) {
|
|
2576
2726
|
console.log(`\n${messageColors.success(dryRunMode ? 'Dry run completed.' : 'Scan completed.')} ${outputResult.successfulPageLoads} of ${totalUrls} URLs loaded successfully, ${pagesWithMatches} had matches in ${messageColors.timing(`${hours}h ${minutes}m ${seconds}s`)}`);
|
|
@@ -2588,6 +2738,9 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2588
2738
|
if (totalDomainsSkipped > 0) {
|
|
2589
2739
|
console.log(messageColors.info('Performance:') + ` ${totalDomainsSkipped} domains skipped (already detected)`);
|
|
2590
2740
|
}
|
|
2741
|
+
if (ignoreCache && forceDebug) {
|
|
2742
|
+
console.log(messageColors.info('Cache:') + ` Smart caching was disabled`);
|
|
2743
|
+
}
|
|
2591
2744
|
}
|
|
2592
2745
|
|
|
2593
2746
|
// Clean process termination
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fanboynz/network-scanner",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.58",
|
|
4
4
|
"description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
|
|
5
5
|
"main": "nwss.js",
|
|
6
6
|
"scripts": {
|