@fanboynz/network-scanner 1.0.55 → 1.0.57

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,18 +2,18 @@ name: Publish to NPM
2
2
  on:
3
3
  push:
4
4
  branches: [ main, master ]
5
- paths-ignore:
6
- - 'README.md'
7
- - 'docs/**'
8
- - '.gitignore'
9
5
 
10
6
  jobs:
11
7
  publish:
12
8
  runs-on: ubuntu-latest
9
+ permissions:
10
+ contents: write # This is key!
11
+
13
12
  steps:
14
13
  - uses: actions/checkout@v4
15
14
  with:
16
15
  token: ${{ secrets.GITHUB_TOKEN }}
16
+ fetch-depth: 0
17
17
 
18
18
  - name: Setup Node.js
19
19
  uses: actions/setup-node@v4
@@ -21,22 +21,22 @@ jobs:
21
21
  node-version: '18'
22
22
  registry-url: 'https://registry.npmjs.org'
23
23
 
24
- - name: Install dependencies
25
- run: npm ci
26
-
27
- - name: Run linting
28
- run: npm run lint
29
-
24
+ - run: npm ci
25
+ - run: npm run lint
26
+
30
27
  - name: Configure git
31
28
  run: |
32
- git config --global user.name "github-actions[bot]"
33
- git config --global user.email "github-actions[bot]@users.noreply.github.com"
29
+ git config user.name "github-actions[bot]"
30
+ git config user.email "github-actions[bot]@users.noreply.github.com"
34
31
 
35
- - name: Version bump and publish
32
+ - name: Version and publish
36
33
  run: |
37
34
  npm version patch
38
35
  npm publish
39
- git push --follow-tags
40
36
  env:
41
37
  NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
38
+
39
+ - name: Push changes
40
+ run: git push --follow-tags
41
+ env:
42
42
  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -0,0 +1,551 @@
1
+ /**
2
+ * Smart Cache Module - Intelligent multi-layer caching system for network scanner
3
+ * Provides context-aware caching for domains, patterns, responses, and network tools
4
+ */
5
+
6
+ const { LRUCache } = require('lru-cache');
7
+ const fs = require('fs');
8
+ const path = require('path');
9
+ const { formatLogMessage } = require('./colorize');
10
+
11
+ /**
12
+ * SmartCache - Intelligent caching system with multiple cache layers
13
+ * @class
14
+ */
15
+ class SmartCache {
16
+ constructor(options = {}) {
17
+ this.options = {
18
+ maxSize: options.maxSize || 5000,
19
+ ttl: options.ttl || 1000 * 60 * 60, // 1 hour default
20
+ enablePatternCache: options.enablePatternCache !== false,
21
+ enableResponseCache: options.enableResponseCache !== false,
22
+ enableWhoisCache: options.enableWhoisCache !== false,
23
+ enablePersistence: options.enablePersistence === true,
24
+ persistencePath: options.persistencePath || '.cache',
25
+ forceDebug: options.forceDebug || false,
26
+ autoSave: options.autoSave !== false,
27
+ autoSaveInterval: options.autoSaveInterval || 60000 // 1 minute
28
+ };
29
+
30
+ // Initialize cache layers
31
+ this._initializeCaches();
32
+
33
+ // Initialize statistics
34
+ this._initializeStats();
35
+
36
+ // Load persistent cache if enabled
37
+ if (this.options.enablePersistence) {
38
+ this._loadPersistentCache();
39
+ }
40
+
41
+ // Set up auto-save if enabled
42
+ if (this.options.enablePersistence && this.options.autoSave) {
43
+ this._setupAutoSave();
44
+ }
45
+ }
46
+
47
+ /**
48
+ * Initialize all cache layers
49
+ * @private
50
+ */
51
+ _initializeCaches() {
52
+ // Domain detection cache with TTL
53
+ this.domainCache = new LRUCache({
54
+ max: this.options.maxSize,
55
+ ttl: this.options.ttl,
56
+ updateAgeOnGet: true,
57
+ updateAgeOnHas: false
58
+ });
59
+
60
+ // Pattern matching results cache
61
+ this.patternCache = new LRUCache({
62
+ max: 1000,
63
+ ttl: this.options.ttl * 2 // Patterns are more stable
64
+ });
65
+
66
+ // Response content cache for searchstring operations
67
+ this.responseCache = new LRUCache({
68
+ max: 200,
69
+ ttl: 1000 * 60 * 30, // 30 minutes for response content
70
+ maxSize: 50 * 1024 * 1024, // 50MB max cache size
71
+ sizeCalculation: (value) => value.length
72
+ });
73
+
74
+ // WHOIS/DNS results cache
75
+ this.netToolsCache = new LRUCache({
76
+ max: 500,
77
+ ttl: 1000 * 60 * 60 * 24 // 24 hours for WHOIS/DNS
78
+ });
79
+
80
+ // Similarity cache for expensive string comparisons
81
+ this.similarityCache = new LRUCache({
82
+ max: 2000,
83
+ ttl: this.options.ttl
84
+ });
85
+
86
+ // Regex compilation cache
87
+ this.regexCache = new Map();
88
+ }
89
+
90
+ /**
91
+ * Initialize statistics tracking
92
+ * @private
93
+ */
94
+ _initializeStats() {
95
+ this.stats = {
96
+ hits: 0,
97
+ misses: 0,
98
+ patternHits: 0,
99
+ patternMisses: 0,
100
+ responseHits: 0,
101
+ responseMisses: 0,
102
+ netToolsHits: 0,
103
+ netToolsMisses: 0,
104
+ similarityHits: 0,
105
+ similarityMisses: 0,
106
+ regexCompilations: 0,
107
+ regexCacheHits: 0,
108
+ persistenceLoads: 0,
109
+ persistenceSaves: 0,
110
+ startTime: Date.now()
111
+ };
112
+ }
113
+
114
+ /**
115
+ * Check if domain should be skipped based on smart caching
116
+ * @param {string} domain - Domain to check
117
+ * @param {Object} context - Processing context
118
+ * @returns {boolean} True if domain should be skipped
119
+ */
120
+ shouldSkipDomain(domain, context = {}) {
121
+ const cacheKey = this._generateCacheKey(domain, context);
122
+
123
+ if (this.domainCache.has(cacheKey)) {
124
+ this.stats.hits++;
125
+ if (this.options.forceDebug) {
126
+ const cached = this.domainCache.get(cacheKey);
127
+ const age = Date.now() - cached.timestamp;
128
+ console.log(formatLogMessage('debug',
129
+ `[SmartCache] Cache hit for ${domain} (age: ${Math.round(age/1000)}s, context: ${JSON.stringify(context)})`
130
+ ));
131
+ }
132
+ return true;
133
+ }
134
+
135
+ this.stats.misses++;
136
+ return false;
137
+ }
138
+
139
+ /**
140
+ * Mark domain as processed with context
141
+ * @param {string} domain - Domain to mark
142
+ * @param {Object} context - Processing context
143
+ * @param {Object} metadata - Additional metadata to store
144
+ */
145
+ markDomainProcessed(domain, context = {}, metadata = {}) {
146
+ const cacheKey = this._generateCacheKey(domain, context);
147
+ this.domainCache.set(cacheKey, {
148
+ timestamp: Date.now(),
149
+ metadata,
150
+ context,
151
+ domain
152
+ });
153
+
154
+ if (this.options.forceDebug) {
155
+ console.log(formatLogMessage('debug',
156
+ `[SmartCache] Marked ${domain} as processed (context: ${JSON.stringify(context)})`
157
+ ));
158
+ }
159
+ }
160
+
161
+ /**
162
+ * Generate cache key with context awareness
163
+ * @param {string} domain - Domain
164
+ * @param {Object} context - Context object
165
+ * @returns {string} Cache key
166
+ * @private
167
+ */
168
+ _generateCacheKey(domain, context) {
169
+ const { filterRegex, searchString, resourceType, nettools } = context;
170
+ const components = [
171
+ domain,
172
+ filterRegex || '',
173
+ searchString || '',
174
+ resourceType || '',
175
+ nettools ? 'nt' : ''
176
+ ].filter(Boolean);
177
+
178
+ return components.join(':');
179
+ }
180
+
181
+ /**
182
+ * Get or compile regex pattern with caching
183
+ * @param {string} pattern - Regex pattern string
184
+ * @returns {RegExp} Compiled regex
185
+ */
186
+ getCompiledRegex(pattern) {
187
+ if (!this.regexCache.has(pattern)) {
188
+ this.stats.regexCompilations++;
189
+ try {
190
+ const regex = new RegExp(pattern.replace(/^\/(.*)\/$/, '$1'));
191
+ this.regexCache.set(pattern, regex);
192
+ } catch (err) {
193
+ if (this.options.forceDebug) {
194
+ console.log(formatLogMessage('debug',
195
+ `[SmartCache] Failed to compile regex: ${pattern}`
196
+ ));
197
+ }
198
+ return null;
199
+ }
200
+ } else {
201
+ this.stats.regexCacheHits++;
202
+ }
203
+
204
+ return this.regexCache.get(pattern);
205
+ }
206
+
207
+ /**
208
+ * Check pattern matching cache
209
+ * @param {string} url - URL to check
210
+ * @param {string} pattern - Regex pattern
211
+ * @returns {boolean|null} Cached result or null if not cached
212
+ */
213
+ getCachedPatternMatch(url, pattern) {
214
+ if (!this.options.enablePatternCache) return null;
215
+
216
+ const cacheKey = `${url}:${pattern}`;
217
+ const cached = this.patternCache.get(cacheKey);
218
+
219
+ if (cached !== undefined) {
220
+ this.stats.patternHits++;
221
+ if (this.options.forceDebug) {
222
+ console.log(formatLogMessage('debug',
223
+ `[SmartCache] Pattern cache hit for ${url.substring(0, 50)}...`
224
+ ));
225
+ }
226
+ return cached;
227
+ }
228
+
229
+ this.stats.patternMisses++;
230
+ return null;
231
+ }
232
+
233
+ /**
234
+ * Cache pattern matching result
235
+ * @param {string} url - URL
236
+ * @param {string} pattern - Regex pattern
237
+ * @param {boolean} result - Match result
238
+ */
239
+ cachePatternMatch(url, pattern, result) {
240
+ if (!this.options.enablePatternCache) return;
241
+
242
+ const cacheKey = `${url}:${pattern}`;
243
+ this.patternCache.set(cacheKey, result);
244
+ }
245
+
246
+ /**
247
+ * Get cached response content
248
+ * @param {string} url - URL
249
+ * @returns {string|null} Cached content or null
250
+ */
251
+ getCachedResponse(url) {
252
+ if (!this.options.enableResponseCache) return null;
253
+
254
+ const cached = this.responseCache.get(url);
255
+ if (cached) {
256
+ this.stats.responseHits++;
257
+ if (this.options.forceDebug) {
258
+ console.log(formatLogMessage('debug',
259
+ `[SmartCache] Response cache hit for ${url.substring(0, 50)}...`
260
+ ));
261
+ }
262
+ return cached;
263
+ }
264
+
265
+ this.stats.responseMisses++;
266
+ return null;
267
+ }
268
+
269
+ /**
270
+ * Cache response content
271
+ * @param {string} url - URL
272
+ * @param {string} content - Response content
273
+ */
274
+ cacheResponse(url, content) {
275
+ if (!this.options.enableResponseCache) return;
276
+
277
+ // Only cache if content is reasonable size
278
+ if (content && content.length < 5 * 1024 * 1024) { // 5MB limit per response
279
+ this.responseCache.set(url, content);
280
+ }
281
+ }
282
+
283
+ /**
284
+ * Get cached WHOIS/DNS results
285
+ * @param {string} domain - Domain
286
+ * @param {string} tool - Tool name (whois/dig)
287
+ * @param {string} recordType - Record type for dig
288
+ * @returns {Object|null} Cached result or null
289
+ */
290
+ getCachedNetTools(domain, tool, recordType = null) {
291
+ if (!this.options.enableWhoisCache) return null;
292
+
293
+ const cacheKey = `${tool}:${domain}${recordType ? ':' + recordType : ''}`;
294
+ const cached = this.netToolsCache.get(cacheKey);
295
+
296
+ if (cached) {
297
+ this.stats.netToolsHits++;
298
+ if (this.options.forceDebug) {
299
+ console.log(formatLogMessage('debug',
300
+ `[SmartCache] ${tool.toUpperCase()} cache hit for ${domain}`
301
+ ));
302
+ }
303
+ return cached;
304
+ }
305
+
306
+ this.stats.netToolsMisses++;
307
+ return null;
308
+ }
309
+
310
+ /**
311
+ * Cache WHOIS/DNS results
312
+ * @param {string} domain - Domain
313
+ * @param {string} tool - Tool name
314
+ * @param {Object} result - Result to cache
315
+ * @param {string} recordType - Record type for dig
316
+ */
317
+ cacheNetTools(domain, tool, result, recordType = null) {
318
+ if (!this.options.enableWhoisCache) return;
319
+
320
+ const cacheKey = `${tool}:${domain}${recordType ? ':' + recordType : ''}`;
321
+ this.netToolsCache.set(cacheKey, result);
322
+ }
323
+
324
+ /**
325
+ * Cache similarity comparison result
326
+ * @param {string} domain1 - First domain
327
+ * @param {string} domain2 - Second domain
328
+ * @param {number} similarity - Similarity score
329
+ */
330
+ cacheSimilarity(domain1, domain2, similarity) {
331
+ const key = [domain1, domain2].sort().join('|');
332
+ this.similarityCache.set(key, similarity);
333
+ }
334
+
335
+ /**
336
+ * Get cached similarity score
337
+ * @param {string} domain1 - First domain
338
+ * @param {string} domain2 - Second domain
339
+ * @returns {number|null} Cached similarity or null
340
+ */
341
+ getCachedSimilarity(domain1, domain2) {
342
+ const key = [domain1, domain2].sort().join('|');
343
+ const cached = this.similarityCache.get(key);
344
+
345
+ if (cached !== undefined) {
346
+ this.stats.similarityHits++;
347
+ return cached;
348
+ }
349
+
350
+ this.stats.similarityMisses++;
351
+ return null;
352
+ }
353
+
354
+ /**
355
+ * Get cache statistics
356
+ * @returns {Object} Statistics object
357
+ */
358
+ getStats() {
359
+ const runtime = Date.now() - this.stats.startTime;
360
+ const hitRate = this.stats.hits / (this.stats.hits + this.stats.misses) || 0;
361
+ const patternHitRate = this.stats.patternHits /
362
+ (this.stats.patternHits + this.stats.patternMisses) || 0;
363
+ const responseHitRate = this.stats.responseHits /
364
+ (this.stats.responseHits + this.stats.responseMisses) || 0;
365
+ const netToolsHitRate = this.stats.netToolsHits /
366
+ (this.stats.netToolsHits + this.stats.netToolsMisses) || 0;
367
+
368
+ return {
369
+ ...this.stats,
370
+ runtime: Math.round(runtime / 1000), // seconds
371
+ hitRate: (hitRate * 100).toFixed(2) + '%',
372
+ patternHitRate: (patternHitRate * 100).toFixed(2) + '%',
373
+ responseHitRate: (responseHitRate * 100).toFixed(2) + '%',
374
+ netToolsHitRate: (netToolsHitRate * 100).toFixed(2) + '%',
375
+ domainCacheSize: this.domainCache.size,
376
+ patternCacheSize: this.patternCache.size,
377
+ responseCacheSize: this.responseCache.size,
378
+ netToolsCacheSize: this.netToolsCache.size,
379
+ similarityCacheSize: this.similarityCache.size,
380
+ regexCacheSize: this.regexCache.size,
381
+ totalCacheEntries: this.domainCache.size + this.patternCache.size +
382
+ this.responseCache.size + this.netToolsCache.size +
383
+ this.similarityCache.size + this.regexCache.size
384
+ };
385
+ }
386
+
387
+ /**
388
+ * Clear all caches
389
+ */
390
+ clear() {
391
+ this.domainCache.clear();
392
+ this.patternCache.clear();
393
+ this.responseCache.clear();
394
+ this.netToolsCache.clear();
395
+ this.similarityCache.clear();
396
+ this.regexCache.clear();
397
+ this._initializeStats();
398
+
399
+ if (this.options.forceDebug) {
400
+ console.log(formatLogMessage('debug', '[SmartCache] All caches cleared'));
401
+ }
402
+ }
403
+
404
+ /**
405
+ * Load persistent cache from disk
406
+ * @private
407
+ */
408
+ _loadPersistentCache() {
409
+ const cacheFile = path.join(this.options.persistencePath, 'smart-cache.json');
410
+
411
+ if (!fs.existsSync(cacheFile)) {
412
+ return;
413
+ }
414
+
415
+ try {
416
+ const data = JSON.parse(fs.readFileSync(cacheFile, 'utf8'));
417
+ const now = Date.now();
418
+
419
+ // Validate cache age
420
+ if (data.timestamp && now - data.timestamp > 24 * 60 * 60 * 1000) {
421
+ if (this.options.forceDebug) {
422
+ console.log(formatLogMessage('debug',
423
+ '[SmartCache] Persistent cache too old, ignoring'
424
+ ));
425
+ }
426
+ return;
427
+ }
428
+
429
+ // Load domain cache
430
+ if (data.domainCache && Array.isArray(data.domainCache)) {
431
+ data.domainCache.forEach(([key, value]) => {
432
+ // Only load if not expired
433
+ if (now - value.timestamp < this.options.ttl) {
434
+ this.domainCache.set(key, value);
435
+ }
436
+ });
437
+ }
438
+
439
+ // Load nettools cache
440
+ if (data.netToolsCache && Array.isArray(data.netToolsCache)) {
441
+ data.netToolsCache.forEach(([key, value]) => {
442
+ this.netToolsCache.set(key, value);
443
+ });
444
+ }
445
+
446
+ this.stats.persistenceLoads++;
447
+
448
+ if (this.options.forceDebug) {
449
+ console.log(formatLogMessage('debug',
450
+ `[SmartCache] Loaded persistent cache: ${this.domainCache.size} domains, ${this.netToolsCache.size} nettools`
451
+ ));
452
+ }
453
+ } catch (err) {
454
+ if (this.options.forceDebug) {
455
+ console.log(formatLogMessage('debug',
456
+ `[SmartCache] Failed to load persistent cache: ${err.message}`
457
+ ));
458
+ }
459
+ }
460
+ }
461
+
462
+ /**
463
+ * Save cache to disk
464
+ */
465
+ savePersistentCache() {
466
+ if (!this.options.enablePersistence) return;
467
+
468
+ const cacheDir = this.options.persistencePath;
469
+ const cacheFile = path.join(cacheDir, 'smart-cache.json');
470
+
471
+ try {
472
+ // Create cache directory if it doesn't exist
473
+ if (!fs.existsSync(cacheDir)) {
474
+ fs.mkdirSync(cacheDir, { recursive: true });
475
+ }
476
+
477
+ const data = {
478
+ timestamp: Date.now(),
479
+ domainCache: Array.from(this.domainCache.entries()),
480
+ netToolsCache: Array.from(this.netToolsCache.entries()),
481
+ stats: this.stats
482
+ };
483
+
484
+ fs.writeFileSync(cacheFile, JSON.stringify(data, null, 2));
485
+ this.stats.persistenceSaves++;
486
+
487
+ if (this.options.forceDebug) {
488
+ console.log(formatLogMessage('debug',
489
+ `[SmartCache] Saved cache to disk: ${cacheFile}`
490
+ ));
491
+ }
492
+ } catch (err) {
493
+ if (this.options.forceDebug) {
494
+ console.log(formatLogMessage('debug',
495
+ `[SmartCache] Failed to save cache: ${err.message}`
496
+ ));
497
+ }
498
+ }
499
+ }
500
+
501
+ /**
502
+ * Set up auto-save interval
503
+ * @private
504
+ */
505
+ _setupAutoSave() {
506
+ this.autoSaveInterval = setInterval(() => {
507
+ this.savePersistentCache();
508
+ }, this.options.autoSaveInterval);
509
+ }
510
+
511
+ /**
512
+ * Clean up resources
513
+ */
514
+ destroy() {
515
+ if (this.autoSaveInterval) {
516
+ clearInterval(this.autoSaveInterval);
517
+ }
518
+
519
+ // Save cache one last time
520
+ if (this.options.enablePersistence) {
521
+ this.savePersistentCache();
522
+ }
523
+
524
+ this.clear();
525
+ }
526
+ }
527
+
528
+ /**
529
+ * Factory function to create SmartCache instance with config
530
+ * @param {Object} config - Configuration object
531
+ * @returns {SmartCache} SmartCache instance
532
+ */
533
+ function createSmartCache(config = {}) {
534
+ return new SmartCache({
535
+ maxSize: config.cache_max_size,
536
+ ttl: (config.cache_ttl_minutes || 60) * 60 * 1000,
537
+ enablePatternCache: config.cache_patterns !== false,
538
+ enableResponseCache: config.cache_responses !== false,
539
+ enableWhoisCache: config.cache_nettools !== false,
540
+ enablePersistence: config.cache_persistence === true,
541
+ persistencePath: config.cache_path || '.cache',
542
+ forceDebug: config.forceDebug || false,
543
+ autoSave: config.cache_autosave !== false,
544
+ autoSaveInterval: (config.cache_autosave_minutes || 1) * 60 * 1000
545
+ });
546
+ }
547
+
548
+ module.exports = {
549
+ SmartCache,
550
+ createSmartCache
551
+ };
package/nwss.js CHANGED
@@ -1,4 +1,4 @@
1
- // === Network scanner script (nwss.js) v1.0.52 ===
1
+ // === Network scanner script (nwss.js) v1.0.57 ===
2
2
 
3
3
  // puppeteer for browser automation, fs for file system operations, psl for domain parsing.
4
4
  // const pLimit = require('p-limit'); // Will be dynamically imported
@@ -18,7 +18,7 @@ const { handleCloudflareProtection } = require('./lib/cloudflare');
18
18
  // FP Bypass
19
19
  const { handleFlowProxyProtection, getFlowProxyTimeouts } = require('./lib/flowproxy');
20
20
  // ignore_similar rules
21
- const { shouldIgnoreSimilarDomain } = require('./lib/ignore_similar');
21
+ const { shouldIgnoreSimilarDomain, calculateSimilarity } = require('./lib/ignore_similar');
22
22
  // Graceful exit
23
23
  const { handleBrowserExit, cleanupChromeTempFiles } = require('./lib/browserexit');
24
24
  // Whois & Dig
@@ -33,13 +33,14 @@ const { colorize, colors, messageColors, tags, formatLogMessage } = require('./l
33
33
  const { performPageInteraction, createInteractionConfig } = require('./lib/interaction');
34
34
  // Domain detection cache for performance optimization
35
35
  const { createGlobalHelpers, getTotalDomainsSkipped, getDetectedDomainsCount } = require('./lib/domain-cache');
36
+ const { createSmartCache } = require('./lib/smart-cache'); // Smart cache system
36
37
  // Enhanced redirect handling
37
38
  const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/redirect');
38
39
  // Ensure web browser is working correctly
39
40
  const { monitorBrowserHealth, isBrowserHealthy } = require('./lib/browserhealth');
40
41
 
41
42
  // --- Script Configuration & Constants ---
42
- const VERSION = '1.0.52'; // Script version
43
+ const VERSION = '1.0.57'; // Script version
43
44
 
44
45
  // get startTime
45
46
  const startTime = Date.now();
@@ -48,6 +49,9 @@ const startTime = Date.now();
48
49
  const domainCacheOptions = { enableLogging: false }; // Set to true for cache debug logs
49
50
  const { isDomainAlreadyDetected, markDomainAsDetected } = createGlobalHelpers(domainCacheOptions);
50
51
 
52
+ // Smart cache will be initialized after config is loaded
53
+ let smartCache = null;
54
+
51
55
  // --- Command-Line Argument Parsing ---
52
56
  const args = process.argv.slice(2);
53
57
 
@@ -547,6 +551,16 @@ const RESOURCE_CLEANUP_INTERVAL = (() => {
547
551
  return 180;
548
552
  })();
549
553
 
554
+ // Initialize smart cache system AFTER config is loaded
555
+ smartCache = createSmartCache({
556
+ ...config,
557
+ forceDebug,
558
+ cache_persistence: config.cache_persistence !== false, // Enable by default
559
+ cache_autosave: config.cache_autosave !== false,
560
+ cache_autosave_minutes: config.cache_autosave_minutes || 1,
561
+ cache_max_size: config.cache_max_size || 5000
562
+ });
563
+
550
564
  // Handle --clean-rules after config is loaded (so we have access to sites)
551
565
  if (cleanRules || cleanRulesFile) {
552
566
  const filesToClean = cleanRulesFile ? [cleanRulesFile] : [outputFile, compareFile].filter(Boolean);
@@ -949,6 +963,10 @@ function setupFrameHandling(page, forceDebug) {
949
963
  // --- Main Asynchronous IIFE (Immediately Invoked Function Expression) ---
950
964
  // This is the main entry point and execution block for the network scanner script.
951
965
  (async () => {
966
+
967
+ // Declare userDataDir in outer scope for cleanup access
968
+ let userDataDir = null;
969
+
952
970
  /**
953
971
  * Creates a new browser instance with consistent configuration
954
972
  * Uses system Chrome and temporary directories to minimize disk usage
@@ -957,7 +975,7 @@ function setupFrameHandling(page, forceDebug) {
957
975
  async function createBrowser() {
958
976
  // Create temporary user data directory that we can fully control and clean up
959
977
  const tempUserDataDir = `/tmp/puppeteer-${Date.now()}-${Math.random().toString(36).substring(7)}`;
960
- let userDataDir = tempUserDataDir; // Store for cleanup tracking
978
+ userDataDir = tempUserDataDir; // Store for cleanup tracking (use outer scope variable)
961
979
 
962
980
  // Try to find system Chrome installation to avoid Puppeteer downloads
963
981
  const systemChromePaths = [
@@ -1485,6 +1503,46 @@ function setupFrameHandling(page, forceDebug) {
1485
1503
  const similarityThreshold = siteConfig.ignore_similar_threshold || ignore_similar_threshold;
1486
1504
  const ignoreSimilarIgnoredDomains = siteConfig.ignore_similar_ignored_domains !== undefined ? siteConfig.ignore_similar_ignored_domains : ignore_similar_ignored_domains;
1487
1505
 
1506
+ // Use smart cache's similarity cache for performance
1507
+ if (ignoreSimilarEnabled && smartCache) {
1508
+ const existingDomains = matchedDomains instanceof Map
1509
+ ? Array.from(matchedDomains.keys()).filter(key => !['dryRunMatches', 'dryRunNetTools', 'dryRunSearchString'].includes(key))
1510
+ : Array.from(matchedDomains);
1511
+
1512
+ // Check cached similarity scores first
1513
+ for (const existingDomain of existingDomains) {
1514
+ const cachedSimilarity = smartCache.getCachedSimilarity(domain, existingDomain);
1515
+ if (cachedSimilarity !== null && cachedSimilarity >= similarityThreshold) {
1516
+ if (forceDebug) {
1517
+ console.log(formatLogMessage('debug', `[SmartCache] Used cached similarity: ${domain} ~= ${existingDomain} (${cachedSimilarity}%)`));
1518
+ }
1519
+ return; // Skip adding this domain
1520
+ }
1521
+
1522
+ // If no cached similarity exists, calculate and cache it
1523
+ if (cachedSimilarity === null) {
1524
+ const similarity = calculateSimilarity(domain, existingDomain);
1525
+ if (smartCache) {
1526
+ smartCache.cacheSimilarity(domain, existingDomain, similarity);
1527
+ }
1528
+ }
1529
+ }
1530
+ }
1531
+
1532
+ // Check smart cache first
1533
+ const context = {
1534
+ filterRegex: siteConfig.filterRegex,
1535
+ searchString: siteConfig.searchstring,
1536
+ resourceType: resourceType
1537
+ };
1538
+
1539
+ if (smartCache && smartCache.shouldSkipDomain(domain, context)) {
1540
+ if (forceDebug) {
1541
+ console.log(formatLogMessage('debug', `[SmartCache] Skipping cached domain: ${domain}`));
1542
+ }
1543
+ return; // Skip adding this domain
1544
+ }
1545
+
1488
1546
  if (ignoreSimilarEnabled) {
1489
1547
  const existingDomains = matchedDomains instanceof Map
1490
1548
  ? Array.from(matchedDomains.keys()).filter(key => !['dryRunMatches', 'dryRunNetTools', 'dryRunSearchString'].includes(key))
@@ -1523,6 +1581,11 @@ function setupFrameHandling(page, forceDebug) {
1523
1581
  // Mark full subdomain as detected for future reference
1524
1582
  markDomainAsDetected(cacheKey);
1525
1583
 
1584
+ // Also mark in smart cache with context
1585
+ if (smartCache) {
1586
+ smartCache.markDomainProcessed(domain, context, { resourceType, fullSubdomain });
1587
+ }
1588
+
1526
1589
  if (matchedDomains instanceof Map) {
1527
1590
  if (!matchedDomains.has(domain)) {
1528
1591
  matchedDomains.set(domain, new Set());
@@ -1716,7 +1779,8 @@ function setupFrameHandling(page, forceDebug) {
1716
1779
 
1717
1780
  // REMOVED: Check if this URL matches any blocked patterns - if so, skip detection but still continue browser blocking
1718
1781
  // This check is no longer needed here since even_blocked handles it above
1719
-
1782
+
1783
+
1720
1784
  // If NO searchstring AND NO nettools are defined, match immediately (existing behavior)
1721
1785
  if (!hasSearchString && !hasSearchStringAnd && !hasNetTools) {
1722
1786
  if (dryRunMode) {
@@ -1767,6 +1831,15 @@ function setupFrameHandling(page, forceDebug) {
1767
1831
  }
1768
1832
 
1769
1833
  // Create and execute nettools handler
1834
+ // Check smart cache for nettools results
1835
+ const cachedWhois = smartCache ? smartCache.getCachedNetTools(reqDomain, 'whois') : null;
1836
+ const cachedDig = smartCache ? smartCache.getCachedNetTools(reqDomain, 'dig', digRecordType) : null;
1837
+
1838
+ if ((cachedWhois || cachedDig) && forceDebug) {
1839
+ console.log(formatLogMessage('debug', `[SmartCache] Using cached nettools results for ${reqDomain}`));
1840
+ }
1841
+
1842
+ // Create nettools handler with cache callbacks
1770
1843
  const netToolsHandler = createNetToolsHandler({
1771
1844
  whoisTerms,
1772
1845
  whoisOrTerms,
@@ -1784,6 +1857,15 @@ function setupFrameHandling(page, forceDebug) {
1784
1857
  matchedDomains,
1785
1858
  addMatchedDomain,
1786
1859
  isDomainAlreadyDetected,
1860
+ // Add cache callbacks if smart cache is available
1861
+ onWhoisResult: smartCache ? (domain, result) => {
1862
+ smartCache.cacheNetTools(domain, 'whois', result);
1863
+ } : undefined,
1864
+ onDigResult: smartCache ? (domain, result, recordType) => {
1865
+ smartCache.cacheNetTools(domain, 'dig', result, recordType);
1866
+ } : undefined,
1867
+ cachedWhois,
1868
+ cachedDig,
1787
1869
  currentUrl,
1788
1870
  getRootDomain,
1789
1871
  siteConfig,
@@ -1823,6 +1905,13 @@ function setupFrameHandling(page, forceDebug) {
1823
1905
 
1824
1906
  // If curl is enabled, download and analyze content immediately
1825
1907
  if (useCurl) {
1908
+ // Check response cache first if smart cache is available
1909
+ const cachedContent = smartCache ? smartCache.getCachedResponse(reqUrl) : null;
1910
+
1911
+ if (cachedContent && forceDebug) {
1912
+ console.log(formatLogMessage('debug', `[SmartCache] Using cached response content for ${reqUrl.substring(0, 50)}...`));
1913
+ // Process cached content instead of fetching
1914
+ } else {
1826
1915
  try {
1827
1916
  // Use grep handler if both grep and searchstring/searchstring_and are enabled
1828
1917
  if (useGrep && (hasSearchString || hasSearchStringAnd)) {
@@ -1833,6 +1922,9 @@ function setupFrameHandling(page, forceDebug) {
1833
1922
  matchedDomains,
1834
1923
  addMatchedDomain, // Pass the helper function
1835
1924
  isDomainAlreadyDetected,
1925
+ onContentFetched: smartCache ? (url, content) => {
1926
+ smartCache.cacheResponse(url, content);
1927
+ } : undefined,
1836
1928
  currentUrl,
1837
1929
  perSiteSubDomains,
1838
1930
  ignoreDomains,
@@ -1885,6 +1977,7 @@ function setupFrameHandling(page, forceDebug) {
1885
1977
  console.log(formatLogMessage('debug', `Curl handler failed for ${reqUrl}: ${curlErr.message}`));
1886
1978
  }
1887
1979
  }
1980
+ }
1888
1981
  }
1889
1982
 
1890
1983
  break;
@@ -2494,6 +2587,19 @@ function setupFrameHandling(page, forceDebug) {
2494
2587
  console.log(formatLogMessage('debug', `Output format: ${getFormatDescription(globalOptions)}`));
2495
2588
  console.log(formatLogMessage('debug', `Generated ${outputResult.totalRules} rules from ${outputResult.successfulPageLoads} successful page loads`));
2496
2589
  console.log(formatLogMessage('debug', `Performance: ${totalDomainsSkipped} domains skipped (already detected), ${detectedDomainsCount} unique domains cached`));
2590
+ // Log smart cache statistics
2591
+ if (smartCache) {
2592
+ const cacheStats = smartCache.getStats();
2593
+ console.log(formatLogMessage('debug', '=== Smart Cache Statistics ==='));
2594
+ console.log(formatLogMessage('debug', `Runtime: ${cacheStats.runtime}s, Total entries: ${cacheStats.totalCacheEntries}`));
2595
+ console.log(formatLogMessage('debug', `Hit Rates - Domain: ${cacheStats.hitRate}, Pattern: ${cacheStats.patternHitRate}`));
2596
+ console.log(formatLogMessage('debug', `Response: ${cacheStats.responseHitRate}, NetTools: ${cacheStats.netToolsHitRate}`));
2597
+ console.log(formatLogMessage('debug', `Regex compilations saved: ${cacheStats.regexCacheHits}`));
2598
+ console.log(formatLogMessage('debug', `Similarity cache hits: ${cacheStats.similarityHits}`));
2599
+ if (config.cache_persistence) {
2600
+ console.log(formatLogMessage('debug', `Persistence - Loads: ${cacheStats.persistenceLoads}, Saves: ${cacheStats.persistenceSaves}`));
2601
+ }
2602
+ }
2497
2603
  }
2498
2604
 
2499
2605
  // Compress log files if --compress-logs is enabled
@@ -2571,6 +2677,11 @@ function setupFrameHandling(page, forceDebug) {
2571
2677
  const seconds = totalSeconds % 60;
2572
2678
 
2573
2679
  // Final summary report with timing and success statistics
2680
+ // Clean up smart cache
2681
+ if (smartCache) {
2682
+ smartCache.destroy();
2683
+ }
2684
+
2574
2685
  if (!silentMode) {
2575
2686
  if (pagesWithMatches > outputResult.successfulPageLoads) {
2576
2687
  console.log(`\n${messageColors.success(dryRunMode ? 'Dry run completed.' : 'Scan completed.')} ${outputResult.successfulPageLoads} of ${totalUrls} URLs loaded successfully, ${pagesWithMatches} had matches in ${messageColors.timing(`${hours}h ${minutes}m ${seconds}s`)}`);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fanboynz/network-scanner",
3
- "version": "1.0.55",
3
+ "version": "1.0.57",
4
4
  "description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
5
5
  "main": "nwss.js",
6
6
  "scripts": {