@fanboynz/network-scanner 1.0.57 → 1.0.59
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/npm-publish.yml +1 -1
- package/lib/searchstring.js +266 -38
- package/lib/smart-cache.js +318 -12
- package/nwss.js +58 -16
- package/package.json +4 -3
package/lib/searchstring.js
CHANGED
|
@@ -4,6 +4,13 @@
|
|
|
4
4
|
const fs = require('fs');
|
|
5
5
|
const { spawnSync } = require('child_process');
|
|
6
6
|
|
|
7
|
+
// Configuration constants for search logic
|
|
8
|
+
const SEARCH_CONFIG = {
|
|
9
|
+
MAX_CONTENT_SIZE: 50 * 1024 * 1024, // 50MB max content size
|
|
10
|
+
MAX_SEARCH_STRING_LENGTH: 1000,
|
|
11
|
+
XML_ENTITY_TIMEOUT: 5000 // 5 second timeout for XML processing
|
|
12
|
+
};
|
|
13
|
+
|
|
7
14
|
/**
|
|
8
15
|
* Parses searchstring configuration into a normalized format
|
|
9
16
|
* @param {string|Array<string>|undefined} searchstring - The searchstring config value (OR logic)
|
|
@@ -83,6 +90,8 @@ async function downloadWithCurl(url, userAgent = '', timeout = 30) {
|
|
|
83
90
|
'--max-time', timeout.toString(),
|
|
84
91
|
'--max-redirs', '5',
|
|
85
92
|
'--fail-with-body', // Return body even on HTTP errors
|
|
93
|
+
'--max-filesize', '52428800', // 50MB limit
|
|
94
|
+
'--range', '0-52428799', // Limit download size
|
|
86
95
|
'--compressed', // Accept compressed responses
|
|
87
96
|
];
|
|
88
97
|
|
|
@@ -105,7 +114,8 @@ async function downloadWithCurl(url, userAgent = '', timeout = 30) {
|
|
|
105
114
|
const result = spawnSync('curl', curlArgs, {
|
|
106
115
|
encoding: 'utf8',
|
|
107
116
|
timeout: timeout * 1000,
|
|
108
|
-
maxBuffer: 10 * 1024 * 1024 // 10MB max buffer
|
|
117
|
+
maxBuffer: 10 * 1024 * 1024, // 10MB max buffer
|
|
118
|
+
killSignal: 'SIGTERM'
|
|
109
119
|
});
|
|
110
120
|
|
|
111
121
|
if (result.error) {
|
|
@@ -123,6 +133,113 @@ async function downloadWithCurl(url, userAgent = '', timeout = 30) {
|
|
|
123
133
|
});
|
|
124
134
|
}
|
|
125
135
|
|
|
136
|
+
/**
|
|
137
|
+
* Downloads content with retry logic for transient failures
|
|
138
|
+
* @param {string} url - The URL to download
|
|
139
|
+
* @param {string} userAgent - User agent string to use
|
|
140
|
+
* @param {number} timeout - Timeout in seconds
|
|
141
|
+
* @param {number} retries - Number of retry attempts (default: 2)
|
|
142
|
+
* @returns {Promise<string>} The downloaded content
|
|
143
|
+
*/
|
|
144
|
+
async function downloadWithRetry(url, userAgent = '', timeout = 30, retries = 2) {
|
|
145
|
+
for (let attempt = 0; attempt <= retries; attempt++) {
|
|
146
|
+
try {
|
|
147
|
+
return await downloadWithCurl(url, userAgent, timeout);
|
|
148
|
+
} catch (err) {
|
|
149
|
+
// Don't retry on final attempt
|
|
150
|
+
if (attempt === retries) throw err;
|
|
151
|
+
|
|
152
|
+
// Only retry on specific transient errors
|
|
153
|
+
const shouldRetry = err.message.includes('timeout') ||
|
|
154
|
+
err.message.includes('Connection refused') ||
|
|
155
|
+
err.message.includes('502') ||
|
|
156
|
+
err.message.includes('503') ||
|
|
157
|
+
err.message.includes('Connection reset');
|
|
158
|
+
|
|
159
|
+
if (!shouldRetry) throw err;
|
|
160
|
+
|
|
161
|
+
// Exponential backoff: 1s, 2s, 4s...
|
|
162
|
+
await new Promise(resolve => setTimeout(resolve, 1000 * Math.pow(2, attempt)));
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* Safely decodes XML entities with timeout protection
|
|
169
|
+
* @param {string} content - Content to decode
|
|
170
|
+
* @returns {string} Decoded content or original if processing fails
|
|
171
|
+
*/
|
|
172
|
+
function safeDecodeXmlEntities(content) {
|
|
173
|
+
const startTime = Date.now();
|
|
174
|
+
|
|
175
|
+
try {
|
|
176
|
+
let decoded = content
|
|
177
|
+
.replace(/</g, '<')
|
|
178
|
+
.replace(/>/g, '>')
|
|
179
|
+
.replace(/&/g, '&')
|
|
180
|
+
.replace(/"/g, '"')
|
|
181
|
+
.replace(/'/g, "'")
|
|
182
|
+
.replace(/'/g, "'");
|
|
183
|
+
|
|
184
|
+
// Check timeout before expensive regex operations
|
|
185
|
+
if (Date.now() - startTime > SEARCH_CONFIG.XML_ENTITY_TIMEOUT) {
|
|
186
|
+
console.warn('[warn] XML entity decoding timeout, using partial result');
|
|
187
|
+
return decoded;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
// Decode numeric entities (decimal)
|
|
191
|
+
decoded = decoded.replace(/&#(\d+);/g, (match, dec) => {
|
|
192
|
+
const num = parseInt(dec, 10);
|
|
193
|
+
// Validate range for safety (valid Unicode range)
|
|
194
|
+
if (num >= 0 && num <= 0x10FFFF) {
|
|
195
|
+
return String.fromCharCode(num);
|
|
196
|
+
}
|
|
197
|
+
return match; // Keep original if invalid
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
// Check timeout again
|
|
201
|
+
if (Date.now() - startTime > SEARCH_CONFIG.XML_ENTITY_TIMEOUT) {
|
|
202
|
+
console.warn('[warn] XML entity decoding timeout, using partial result');
|
|
203
|
+
return decoded;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// Decode numeric entities (hexadecimal)
|
|
207
|
+
decoded = decoded.replace(/&#x([0-9a-f]+);/gi, (match, hex) => {
|
|
208
|
+
const num = parseInt(hex, 16);
|
|
209
|
+
// Validate range for safety (valid Unicode range)
|
|
210
|
+
if (num >= 0 && num <= 0x10FFFF) {
|
|
211
|
+
return String.fromCharCode(num);
|
|
212
|
+
}
|
|
213
|
+
return match; // Keep original if invalid
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
return decoded;
|
|
217
|
+
} catch (xmlErr) {
|
|
218
|
+
console.warn(`[warn] XML entity decoding failed: ${xmlErr.message}`);
|
|
219
|
+
return content; // Return original content if decoding fails
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
/**
|
|
224
|
+
* Safely strips XML/HTML tags with size limits
|
|
225
|
+
* @param {string} content - Content to strip tags from
|
|
226
|
+
* @returns {string} Content with tags removed
|
|
227
|
+
*/
|
|
228
|
+
function safeStripTags(content) {
|
|
229
|
+
try {
|
|
230
|
+
// Limit content size for tag stripping to prevent excessive memory usage
|
|
231
|
+
const limitedContent = content.length > SEARCH_CONFIG.MAX_CONTENT_SIZE
|
|
232
|
+
? content.substring(0, SEARCH_CONFIG.MAX_CONTENT_SIZE)
|
|
233
|
+
: content;
|
|
234
|
+
|
|
235
|
+
// Replace tags with spaces to preserve word boundaries
|
|
236
|
+
return limitedContent.replace(/<[^>]*>/g, ' ').replace(/\s+/g, ' ');
|
|
237
|
+
} catch (stripErr) {
|
|
238
|
+
console.warn(`[warn] Tag stripping failed: ${stripErr.message}`);
|
|
239
|
+
return content;
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
|
|
126
243
|
/**
|
|
127
244
|
* Checks if response content contains any of the search strings (OR logic)
|
|
128
245
|
* or all of the AND search strings (AND logic)
|
|
@@ -131,58 +248,118 @@ async function downloadWithCurl(url, userAgent = '', timeout = 30) {
|
|
|
131
248
|
* @param {Array<string>} searchStrings - Array of strings to search for (OR logic)
|
|
132
249
|
* @param {Array<string>} searchStringsAnd - Array of strings that must all be present (AND logic)
|
|
133
250
|
* @param {string} contentType - Content type for specialized handling
|
|
251
|
+
* @param {string} url - URL for debugging context (optional)
|
|
134
252
|
* @returns {object} Object with found boolean, matchedString/matchedStrings, allMatches array, and logic type
|
|
135
253
|
*/
|
|
136
|
-
function searchContent(content, searchStrings, searchStringsAnd = [], contentType = '') {
|
|
254
|
+
function searchContent(content, searchStrings, searchStringsAnd = [], contentType = '', url = '') {
|
|
255
|
+
// Input validation
|
|
256
|
+
if (!content || typeof content !== 'string') {
|
|
257
|
+
return {
|
|
258
|
+
found: false,
|
|
259
|
+
matchedString: null,
|
|
260
|
+
matchedStrings: [],
|
|
261
|
+
allMatches: [],
|
|
262
|
+
logicType: 'NONE',
|
|
263
|
+
error: 'Invalid or empty content'
|
|
264
|
+
};
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
// Size check and truncation with warning
|
|
268
|
+
const originalLength = content.length;
|
|
269
|
+
if (originalLength > SEARCH_CONFIG.MAX_CONTENT_SIZE) {
|
|
270
|
+
content = content.substring(0, SEARCH_CONFIG.MAX_CONTENT_SIZE);
|
|
271
|
+
console.warn(`[warn] Content truncated from ${originalLength} to ${SEARCH_CONFIG.MAX_CONTENT_SIZE} chars for ${url || 'unknown URL'}`);
|
|
272
|
+
}
|
|
137
273
|
let searchableContent = content;
|
|
274
|
+
|
|
275
|
+
const isXmlContent = contentType.toLowerCase().includes('xml') ||
|
|
276
|
+
contentType.toLowerCase().includes('html');
|
|
138
277
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
278
|
+
if (isXmlContent) {
|
|
279
|
+
try {
|
|
280
|
+
// Safely decode XML entities
|
|
281
|
+
const decodedContent = safeDecodeXmlEntities(content);
|
|
282
|
+
|
|
283
|
+
// Safely strip tags to extract text content
|
|
284
|
+
const strippedContent = safeStripTags(decodedContent);
|
|
285
|
+
|
|
286
|
+
// Search in: original + decoded + stripped content
|
|
287
|
+
// Use newlines as separators to prevent false matches across content types
|
|
288
|
+
searchableContent = [content, decodedContent, strippedContent].join('\n');
|
|
289
|
+
|
|
290
|
+
} catch (xmlProcessingErr) {
|
|
291
|
+
console.warn(`[warn] XML processing failed for ${url || 'unknown URL'}: ${xmlProcessingErr.message}`);
|
|
292
|
+
// Fall back to original content
|
|
293
|
+
searchableContent = content;
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
// Input validation for search strings
|
|
298
|
+
const validSearchStrings = searchStrings.filter(str =>
|
|
299
|
+
str && typeof str === 'string' && str.length > 0 && str.length <= SEARCH_CONFIG.MAX_SEARCH_STRING_LENGTH
|
|
300
|
+
);
|
|
301
|
+
const validSearchStringsAnd = searchStringsAnd.filter(str =>
|
|
302
|
+
str && typeof str === 'string' && str.length > 0 && str.length <= SEARCH_CONFIG.MAX_SEARCH_STRING_LENGTH
|
|
303
|
+
);
|
|
304
|
+
|
|
305
|
+
// Warn about filtered search strings
|
|
306
|
+
if (validSearchStrings.length !== searchStrings.length) {
|
|
307
|
+
console.warn(`[warn] Filtered ${searchStrings.length - validSearchStrings.length} invalid search strings`);
|
|
308
|
+
}
|
|
309
|
+
if (validSearchStringsAnd.length !== searchStringsAnd.length) {
|
|
310
|
+
console.warn(`[warn] Filtered ${searchStringsAnd.length - validSearchStringsAnd.length} invalid AND search strings`);
|
|
154
311
|
}
|
|
155
312
|
|
|
156
|
-
//
|
|
157
|
-
if (
|
|
158
|
-
|
|
313
|
+
// Early return if no valid search strings
|
|
314
|
+
if (validSearchStrings.length === 0 && validSearchStringsAnd.length === 0) {
|
|
315
|
+
return {
|
|
316
|
+
found: false,
|
|
317
|
+
matchedString: null,
|
|
318
|
+
matchedStrings: [],
|
|
319
|
+
allMatches: [],
|
|
320
|
+
logicType: 'NONE',
|
|
321
|
+
error: 'No valid search strings provided'
|
|
322
|
+
};
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
// Pre-compute lowercase content once for better performance
|
|
326
|
+
const lowerContent = searchableContent.toLowerCase();
|
|
327
|
+
|
|
328
|
+
// Check AND logic first (more restrictive) - ALL strings must be present
|
|
329
|
+
if (validSearchStringsAnd && validSearchStringsAnd.length > 0) {
|
|
159
330
|
const foundAndStrings = [];
|
|
160
331
|
|
|
161
|
-
for (const searchStr of
|
|
162
|
-
|
|
332
|
+
for (const searchStr of validSearchStringsAnd) {
|
|
333
|
+
const lowerSearchStr = searchStr.toLowerCase();
|
|
334
|
+
if (lowerContent.includes(lowerSearchStr)) {
|
|
163
335
|
foundAndStrings.push(searchStr);
|
|
336
|
+
} else {
|
|
337
|
+
// Early exit if any AND string is not found
|
|
338
|
+
break;
|
|
164
339
|
}
|
|
165
340
|
}
|
|
166
341
|
|
|
167
|
-
// AND logic: ALL strings must be found
|
|
168
|
-
if (foundAndStrings.length ===
|
|
342
|
+
// AND logic: ALL valid strings must be found
|
|
343
|
+
if (foundAndStrings.length === validSearchStringsAnd.length) {
|
|
169
344
|
return {
|
|
170
345
|
found: true,
|
|
171
|
-
matchedString: foundAndStrings.join(' AND '),
|
|
346
|
+
matchedString: foundAndStrings.join(' AND '),
|
|
172
347
|
matchedStrings: foundAndStrings,
|
|
173
348
|
allMatches: foundAndStrings,
|
|
174
|
-
logicType: 'AND'
|
|
349
|
+
logicType: 'AND',
|
|
350
|
+
contentSize: originalLength,
|
|
351
|
+
searchableSize: searchableContent.length
|
|
175
352
|
};
|
|
176
353
|
}
|
|
177
354
|
}
|
|
178
355
|
|
|
179
|
-
//
|
|
180
|
-
const lowerContent = searchableContent.toLowerCase();
|
|
356
|
+
// OR logic: ANY string can match
|
|
181
357
|
const allMatches = [];
|
|
182
358
|
let firstMatch = null;
|
|
183
359
|
|
|
184
|
-
for (const searchStr of
|
|
185
|
-
|
|
360
|
+
for (const searchStr of validSearchStrings) {
|
|
361
|
+
const lowerSearchStr = searchStr.toLowerCase();
|
|
362
|
+
if (lowerContent.includes(lowerSearchStr)) {
|
|
186
363
|
allMatches.push(searchStr);
|
|
187
364
|
if (!firstMatch) {
|
|
188
365
|
firstMatch = searchStr;
|
|
@@ -195,7 +372,10 @@ function searchContent(content, searchStrings, searchStringsAnd = [], contentTyp
|
|
|
195
372
|
matchedString: firstMatch,
|
|
196
373
|
matchedStrings: allMatches,
|
|
197
374
|
allMatches: allMatches,
|
|
198
|
-
logicType: 'OR'
|
|
375
|
+
logicType: validSearchStrings.length > 0 ? 'OR' : 'NONE',
|
|
376
|
+
contentSize: originalLength,
|
|
377
|
+
searchableSize: searchableContent.length,
|
|
378
|
+
processedAsXml: isXmlContent
|
|
199
379
|
};
|
|
200
380
|
}
|
|
201
381
|
|
|
@@ -206,6 +386,9 @@ function searchContent(content, searchStrings, searchStringsAnd = [], contentTyp
|
|
|
206
386
|
*/
|
|
207
387
|
function shouldAnalyzeContentType(contentType) {
|
|
208
388
|
if (!contentType) return false;
|
|
389
|
+
|
|
390
|
+
// Normalize content type (remove charset and other parameters)
|
|
391
|
+
const normalizedType = contentType.toLowerCase().split(';')[0].trim();
|
|
209
392
|
|
|
210
393
|
const textTypes = [
|
|
211
394
|
'text/', // text/html, text/plain, text/xml, etc.
|
|
@@ -216,10 +399,16 @@ function shouldAnalyzeContentType(contentType) {
|
|
|
216
399
|
'application/soap+xml', // SOAP XML
|
|
217
400
|
'application/rss+xml', // RSS feeds
|
|
218
401
|
'application/atom+xml', // Atom feeds
|
|
219
|
-
'application/xhtml+xml'
|
|
402
|
+
'application/xhtml+xml', // XHTML
|
|
403
|
+
'application/ld+json', // JSON-LD structured data
|
|
404
|
+
'application/manifest+json', // Web App Manifest
|
|
405
|
+
'application/feed+xml', // Generic XML feeds
|
|
406
|
+
'application/vnd.api+json', // JSON API specification
|
|
407
|
+
'application/hal+json', // HAL (Hypertext Application Language)
|
|
408
|
+
'application/problem+json' // Problem Details for HTTP APIs
|
|
220
409
|
];
|
|
221
410
|
|
|
222
|
-
return textTypes.some(type =>
|
|
411
|
+
return textTypes.some(type => normalizedType.startsWith(type));
|
|
223
412
|
}
|
|
224
413
|
|
|
225
414
|
/**
|
|
@@ -319,10 +508,10 @@ function createCurlHandler(config) {
|
|
|
319
508
|
}
|
|
320
509
|
|
|
321
510
|
// If searchstring IS defined, download and search content
|
|
322
|
-
const content = await
|
|
511
|
+
const content = await downloadWithRetry(requestUrl, userAgent, 30);
|
|
323
512
|
|
|
324
513
|
// Check if content contains search strings (OR or AND logic)
|
|
325
|
-
const { found, matchedString, logicType } = searchContent(content, searchStrings, searchStringsAnd, '');
|
|
514
|
+
const { found, matchedString, logicType, error } = searchContent(content, searchStrings, searchStringsAnd, '', requestUrl);
|
|
326
515
|
|
|
327
516
|
if (found) {
|
|
328
517
|
if (!reqDomain || matchesIgnoreDomain(reqDomain, ignoreDomains)) {
|
|
@@ -352,6 +541,9 @@ function createCurlHandler(config) {
|
|
|
352
541
|
} else if (forceDebug) {
|
|
353
542
|
const partyType = isFirstParty ? 'first-party' : 'third-party';
|
|
354
543
|
console.log(`[debug][curl] ${requestUrl} (${partyType}) matched regex but no searchstring found`);
|
|
544
|
+
if (error) {
|
|
545
|
+
console.log(`[debug][curl] Search error: ${error}`);
|
|
546
|
+
}
|
|
355
547
|
}
|
|
356
548
|
|
|
357
549
|
} catch (err) {
|
|
@@ -442,7 +634,7 @@ function createResponseHandler(config) {
|
|
|
442
634
|
const content = await response.text();
|
|
443
635
|
|
|
444
636
|
// Check if content contains search strings (OR or AND logic)
|
|
445
|
-
const { found, matchedString, logicType } = searchContent(content, searchStrings, searchStringsAnd, contentType);
|
|
637
|
+
const { found, matchedString, logicType, error } = searchContent(content, searchStrings, searchStringsAnd, contentType, respUrl);
|
|
446
638
|
|
|
447
639
|
if (found) {
|
|
448
640
|
if (!respDomain || matchesIgnoreDomain(respDomain, ignoreDomains)) {
|
|
@@ -471,6 +663,9 @@ function createResponseHandler(config) {
|
|
|
471
663
|
} else if (forceDebug) {
|
|
472
664
|
const partyType = isFirstParty ? 'first-party' : 'third-party';
|
|
473
665
|
console.log(`[debug] ${respUrl} (${partyType}) matched regex but no searchstring found`);
|
|
666
|
+
if (error) {
|
|
667
|
+
console.log(`[debug] Search error: ${error}`);
|
|
668
|
+
}
|
|
474
669
|
}
|
|
475
670
|
|
|
476
671
|
} catch (err) {
|
|
@@ -541,9 +736,40 @@ function validateSearchString(searchstring, searchstringAnd) {
|
|
|
541
736
|
}
|
|
542
737
|
|
|
543
738
|
// Check that both searchstring and searchstring_and aren't defined simultaneously
|
|
544
|
-
if (searchstring &&
|
|
739
|
+
if ((searchstring !== undefined && searchstring !== null) &&
|
|
740
|
+
(searchstringAnd !== undefined && searchstringAnd !== null)) {
|
|
545
741
|
return { isValid: false, error: 'Cannot use both searchstring (OR) and searchstring_and (AND) simultaneously. Choose one logic type.' };
|
|
546
742
|
}
|
|
743
|
+
|
|
744
|
+
// Additional validation for search string length limits
|
|
745
|
+
const validateStringLength = (str, fieldName) => {
|
|
746
|
+
if (str.length > SEARCH_CONFIG.MAX_SEARCH_STRING_LENGTH) {
|
|
747
|
+
return { isValid: false, error: `${fieldName} too long (max ${SEARCH_CONFIG.MAX_SEARCH_STRING_LENGTH} chars)` };
|
|
748
|
+
}
|
|
749
|
+
return { isValid: true };
|
|
750
|
+
};
|
|
751
|
+
|
|
752
|
+
// Validate search string lengths
|
|
753
|
+
if (typeof searchstring === 'string') {
|
|
754
|
+
const lengthCheck = validateStringLength(searchstring, 'searchstring');
|
|
755
|
+
if (!lengthCheck.isValid) return lengthCheck;
|
|
756
|
+
} else if (Array.isArray(searchstring)) {
|
|
757
|
+
for (let i = 0; i < searchstring.length; i++) {
|
|
758
|
+
const lengthCheck = validateStringLength(searchstring[i], `searchstring[${i}]`);
|
|
759
|
+
if (!lengthCheck.isValid) return lengthCheck;
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
|
|
763
|
+
// Validate AND search string lengths
|
|
764
|
+
if (typeof searchstringAnd === 'string') {
|
|
765
|
+
const lengthCheck = validateStringLength(searchstringAnd, 'searchstring_and');
|
|
766
|
+
if (!lengthCheck.isValid) return lengthCheck;
|
|
767
|
+
} else if (Array.isArray(searchstringAnd)) {
|
|
768
|
+
for (let i = 0; i < searchstringAnd.length; i++) {
|
|
769
|
+
const lengthCheck = validateStringLength(searchstringAnd[i], `searchstring_and[${i}]`);
|
|
770
|
+
if (!lengthCheck.isValid) return lengthCheck;
|
|
771
|
+
}
|
|
772
|
+
}
|
|
547
773
|
|
|
548
774
|
return { isValid: false, error: 'searchstring must be string or array of strings' };
|
|
549
775
|
}
|
|
@@ -567,11 +793,13 @@ function getSearchStats(matchedDomains, searchStrings) {
|
|
|
567
793
|
module.exports = {
|
|
568
794
|
parseSearchStrings,
|
|
569
795
|
searchContent,
|
|
796
|
+
safeDecodeXmlEntities,
|
|
570
797
|
shouldAnalyzeContentType,
|
|
571
798
|
createResponseHandler,
|
|
572
799
|
createCurlHandler,
|
|
573
800
|
downloadWithCurl,
|
|
574
801
|
validateSearchString,
|
|
575
802
|
getSearchStats,
|
|
576
|
-
addDomainToCollection
|
|
803
|
+
addDomainToCollection,
|
|
804
|
+
downloadWithRetry
|
|
577
805
|
};
|
package/lib/smart-cache.js
CHANGED
|
@@ -14,6 +14,11 @@ const { formatLogMessage } = require('./colorize');
|
|
|
14
14
|
*/
|
|
15
15
|
class SmartCache {
|
|
16
16
|
constructor(options = {}) {
|
|
17
|
+
// Calculate dynamic values first
|
|
18
|
+
const concurrency = options.concurrency || 6;
|
|
19
|
+
const optimalHeapLimit = this._calculateOptimalHeapLimit(concurrency);
|
|
20
|
+
const checkInterval = this._calculateCheckInterval(concurrency);
|
|
21
|
+
|
|
17
22
|
this.options = {
|
|
18
23
|
maxSize: options.maxSize || 5000,
|
|
19
24
|
ttl: options.ttl || 1000 * 60 * 60, // 1 hour default
|
|
@@ -24,8 +29,18 @@ class SmartCache {
|
|
|
24
29
|
persistencePath: options.persistencePath || '.cache',
|
|
25
30
|
forceDebug: options.forceDebug || false,
|
|
26
31
|
autoSave: options.autoSave !== false,
|
|
27
|
-
autoSaveInterval: options.autoSaveInterval || 60000 // 1 minute
|
|
32
|
+
autoSaveInterval: options.autoSaveInterval || 60000, // 1 minute
|
|
33
|
+
maxHeapUsage: options.maxHeapUsage || optimalHeapLimit,
|
|
34
|
+
memoryCheckInterval: options.memoryCheckInterval || checkInterval,
|
|
35
|
+
concurrency: concurrency,
|
|
36
|
+
aggressiveMode: options.aggressiveMode || false
|
|
28
37
|
};
|
|
38
|
+
|
|
39
|
+
// Add save debouncing
|
|
40
|
+
this.lastSaveTime = 0;
|
|
41
|
+
this.saveInProgress = false;
|
|
42
|
+
this.saveTimeout = null;
|
|
43
|
+
this.pendingSave = false;
|
|
29
44
|
|
|
30
45
|
// Initialize cache layers
|
|
31
46
|
this._initializeCaches();
|
|
@@ -42,6 +57,34 @@ class SmartCache {
|
|
|
42
57
|
if (this.options.enablePersistence && this.options.autoSave) {
|
|
43
58
|
this._setupAutoSave();
|
|
44
59
|
}
|
|
60
|
+
|
|
61
|
+
// Set up memory monitoring
|
|
62
|
+
this.memoryCheckInterval = setInterval(() => {
|
|
63
|
+
this._checkMemoryPressure();
|
|
64
|
+
}, this.options.memoryCheckInterval);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Calculate optimal heap limit based on concurrency
|
|
69
|
+
* @private
|
|
70
|
+
*/
|
|
71
|
+
_calculateOptimalHeapLimit(concurrency) {
|
|
72
|
+
// Base cache needs: 100MB
|
|
73
|
+
// Per concurrent connection: ~75MB average
|
|
74
|
+
// Safety margin: 50%
|
|
75
|
+
const baseCacheMemory = 100 * 1024 * 1024; // 100MB
|
|
76
|
+
const perConnectionMemory = 75 * 1024 * 1024; // 75MB
|
|
77
|
+
const totalEstimated = baseCacheMemory + (concurrency * perConnectionMemory);
|
|
78
|
+
return Math.round(totalEstimated * 0.4); // Cache should use max 40% of estimated total
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Calculate check interval based on concurrency
|
|
83
|
+
* @private
|
|
84
|
+
*/
|
|
85
|
+
_calculateCheckInterval(concurrency) {
|
|
86
|
+
// Higher concurrency = more frequent checks
|
|
87
|
+
return Math.max(5000, 30000 - (concurrency * 1000)); // 5s min, scales down with concurrency
|
|
45
88
|
}
|
|
46
89
|
|
|
47
90
|
/**
|
|
@@ -57,29 +100,43 @@ class SmartCache {
|
|
|
57
100
|
updateAgeOnHas: false
|
|
58
101
|
});
|
|
59
102
|
|
|
60
|
-
// Pattern matching results cache
|
|
103
|
+
// Pattern matching results cache - reduce size for high concurrency
|
|
104
|
+
const patternCacheSize = this.options.concurrency > 10 ? 500 : 1000;
|
|
61
105
|
this.patternCache = new LRUCache({
|
|
62
|
-
max:
|
|
106
|
+
max: patternCacheSize,
|
|
63
107
|
ttl: this.options.ttl * 2 // Patterns are more stable
|
|
64
108
|
});
|
|
65
109
|
|
|
66
|
-
// Response content cache for
|
|
110
|
+
// Response content cache - aggressive limits for high concurrency
|
|
111
|
+
const responseCacheSize = this.options.concurrency > 10 ? 50 : 200;
|
|
112
|
+
const responseCacheMemory = this.options.concurrency > 10 ? 20 * 1024 * 1024 : 50 * 1024 * 1024;
|
|
67
113
|
this.responseCache = new LRUCache({
|
|
68
|
-
max:
|
|
114
|
+
max: responseCacheSize,
|
|
69
115
|
ttl: 1000 * 60 * 30, // 30 minutes for response content
|
|
70
|
-
maxSize:
|
|
116
|
+
maxSize: responseCacheMemory,
|
|
71
117
|
sizeCalculation: (value) => value.length
|
|
72
118
|
});
|
|
73
119
|
|
|
120
|
+
// Disable response cache entirely for very high concurrency
|
|
121
|
+
if (this.options.concurrency > 15 || this.options.aggressiveMode) {
|
|
122
|
+
this.options.enableResponseCache = false;
|
|
123
|
+
if (this.options.forceDebug) {
|
|
124
|
+
console.log(formatLogMessage('debug',
|
|
125
|
+
`[SmartCache] Response cache disabled for high concurrency (${this.options.concurrency})`
|
|
126
|
+
));
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
74
130
|
// WHOIS/DNS results cache
|
|
75
131
|
this.netToolsCache = new LRUCache({
|
|
76
132
|
max: 500,
|
|
77
133
|
ttl: 1000 * 60 * 60 * 24 // 24 hours for WHOIS/DNS
|
|
78
134
|
});
|
|
79
135
|
|
|
80
|
-
// Similarity cache for
|
|
136
|
+
// Similarity cache - reduce for high concurrency
|
|
137
|
+
const similarityCacheSize = this.options.concurrency > 10 ? 1000 : 2000;
|
|
81
138
|
this.similarityCache = new LRUCache({
|
|
82
|
-
max:
|
|
139
|
+
max: similarityCacheSize,
|
|
83
140
|
ttl: this.options.ttl
|
|
84
141
|
});
|
|
85
142
|
|
|
@@ -107,6 +164,9 @@ class SmartCache {
|
|
|
107
164
|
regexCacheHits: 0,
|
|
108
165
|
persistenceLoads: 0,
|
|
109
166
|
persistenceSaves: 0,
|
|
167
|
+
memoryPressureEvents: 0,
|
|
168
|
+
memoryWarnings: 0,
|
|
169
|
+
responseCacheSkips: 0,
|
|
110
170
|
startTime: Date.now()
|
|
111
171
|
};
|
|
112
172
|
}
|
|
@@ -274,6 +334,21 @@ class SmartCache {
|
|
|
274
334
|
cacheResponse(url, content) {
|
|
275
335
|
if (!this.options.enableResponseCache) return;
|
|
276
336
|
|
|
337
|
+
// Skip response caching entirely for very high concurrency
|
|
338
|
+
if (this.options.concurrency > 12) {
|
|
339
|
+
this.stats.responseCacheSkips++;
|
|
340
|
+
return;
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
// Check memory before caching large content
|
|
344
|
+
const memUsage = process.memoryUsage();
|
|
345
|
+
const threshold = this.options.concurrency > 10 ? 0.7 : 0.8; // Lower threshold for high concurrency
|
|
346
|
+
if (memUsage.heapUsed > this.options.maxHeapUsage * threshold) {
|
|
347
|
+
this.stats.responseCacheSkips++;
|
|
348
|
+
this._logMemorySkip('response cache');
|
|
349
|
+
return;
|
|
350
|
+
}
|
|
351
|
+
|
|
277
352
|
// Only cache if content is reasonable size
|
|
278
353
|
if (content && content.length < 5 * 1024 * 1024) { // 5MB limit per response
|
|
279
354
|
this.responseCache.set(url, content);
|
|
@@ -350,7 +425,85 @@ class SmartCache {
|
|
|
350
425
|
this.stats.similarityMisses++;
|
|
351
426
|
return null;
|
|
352
427
|
}
|
|
428
|
+
|
|
429
|
+
/**
|
|
430
|
+
* Monitor memory usage and proactively manage caches
|
|
431
|
+
* @private
|
|
432
|
+
*/
|
|
433
|
+
_checkMemoryPressure() {
|
|
434
|
+
const memUsage = process.memoryUsage();
|
|
435
|
+
const heapUsedMB = Math.round(memUsage.heapUsed / 1024 / 1024);
|
|
436
|
+
const maxHeapMB = Math.round(this.options.maxHeapUsage / 1024 / 1024);
|
|
437
|
+
const usagePercent = (memUsage.heapUsed / this.options.maxHeapUsage) * 100;
|
|
438
|
+
|
|
439
|
+
// Adjust thresholds based on concurrency
|
|
440
|
+
const criticalThreshold = this.options.concurrency > 10 ? 0.85 : 1.0;
|
|
441
|
+
const warningThreshold = this.options.concurrency > 10 ? 0.70 : 0.85;
|
|
442
|
+
const infoThreshold = this.options.concurrency > 10 ? 0.60 : 0.75;
|
|
443
|
+
|
|
444
|
+
// Critical threshold - aggressive cleanup
|
|
445
|
+
if (memUsage.heapUsed > this.options.maxHeapUsage * criticalThreshold) {
|
|
446
|
+
this._performMemoryCleanup('critical', heapUsedMB, maxHeapMB);
|
|
447
|
+
return true;
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
// Warning threshold - moderate cleanup
|
|
451
|
+
if (memUsage.heapUsed > this.options.maxHeapUsage * warningThreshold) {
|
|
452
|
+
this._performMemoryCleanup('warning', heapUsedMB, maxHeapMB);
|
|
453
|
+
return true;
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
// Info threshold - log only
|
|
457
|
+
if (memUsage.heapUsed > this.options.maxHeapUsage * infoThreshold) {
|
|
458
|
+
this.stats.memoryWarnings++;
|
|
459
|
+
if (this.options.forceDebug) {
|
|
460
|
+
console.log(formatLogMessage('debug',
|
|
461
|
+
`[SmartCache] Memory info: ${heapUsedMB}MB/${maxHeapMB}MB (${usagePercent.toFixed(1)}%)`
|
|
462
|
+
));
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
return false;
|
|
467
|
+
}
|
|
353
468
|
|
|
469
|
+
/**
|
|
470
|
+
* Perform memory cleanup based on severity
|
|
471
|
+
* @private
|
|
472
|
+
*/
|
|
473
|
+
_performMemoryCleanup(level, heapUsedMB, maxHeapMB) {
|
|
474
|
+
this.stats.memoryPressureEvents++;
|
|
475
|
+
|
|
476
|
+
if (this.options.forceDebug) {
|
|
477
|
+
console.log(formatLogMessage('debug',
|
|
478
|
+
`[SmartCache] Memory ${level}: ${heapUsedMB}MB/${maxHeapMB}MB, performing cleanup...`
|
|
479
|
+
));
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
if (level === 'critical' || this.options.concurrency > 12) {
|
|
483
|
+
// Aggressive cleanup - clear volatile caches
|
|
484
|
+
this.responseCache.clear();
|
|
485
|
+
this.patternCache.clear();
|
|
486
|
+
this.similarityCache.clear();
|
|
487
|
+
|
|
488
|
+
// For very high concurrency, also trim domain cache
|
|
489
|
+
if (this.options.concurrency > 15) {
|
|
490
|
+
const currentSize = this.domainCache.size;
|
|
491
|
+
this.domainCache.clear();
|
|
492
|
+
if (this.options.forceDebug) {
|
|
493
|
+
console.log(formatLogMessage('debug', `[SmartCache] Cleared ${currentSize} domain cache entries`));
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
} else if (level === 'warning') {
|
|
497
|
+
// Moderate cleanup - clear largest cache
|
|
498
|
+
this.responseCache.clear();
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
// Force garbage collection if available
|
|
502
|
+
if (global.gc) {
|
|
503
|
+
global.gc();
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
|
|
354
507
|
/**
|
|
355
508
|
* Get cache statistics
|
|
356
509
|
* @returns {Object} Statistics object
|
|
@@ -364,6 +517,9 @@ class SmartCache {
|
|
|
364
517
|
(this.stats.responseHits + this.stats.responseMisses) || 0;
|
|
365
518
|
const netToolsHitRate = this.stats.netToolsHits /
|
|
366
519
|
(this.stats.netToolsHits + this.stats.netToolsMisses) || 0;
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
const memUsage = process.memoryUsage();
|
|
367
523
|
|
|
368
524
|
return {
|
|
369
525
|
...this.stats,
|
|
@@ -380,7 +536,11 @@ class SmartCache {
|
|
|
380
536
|
regexCacheSize: this.regexCache.size,
|
|
381
537
|
totalCacheEntries: this.domainCache.size + this.patternCache.size +
|
|
382
538
|
this.responseCache.size + this.netToolsCache.size +
|
|
383
|
-
this.similarityCache.size + this.regexCache.size
|
|
539
|
+
this.similarityCache.size + this.regexCache.size,
|
|
540
|
+
memoryUsageMB: Math.round(memUsage.heapUsed / 1024 / 1024),
|
|
541
|
+
memoryMaxMB: Math.round(this.options.maxHeapUsage / 1024 / 1024),
|
|
542
|
+
memoryUsagePercent: ((memUsage.heapUsed / this.options.maxHeapUsage) * 100).toFixed(1) + '%',
|
|
543
|
+
responseCacheMemoryMB: Math.round((this.responseCache.calculatedSize || 0) / 1024 / 1024)
|
|
384
544
|
};
|
|
385
545
|
}
|
|
386
546
|
|
|
@@ -401,6 +561,18 @@ class SmartCache {
|
|
|
401
561
|
}
|
|
402
562
|
}
|
|
403
563
|
|
|
564
|
+
/**
|
|
565
|
+
* Helper method to log memory-related cache skips
|
|
566
|
+
* @private
|
|
567
|
+
*/
|
|
568
|
+
_logMemorySkip(operation) {
|
|
569
|
+
if (this.options.forceDebug) {
|
|
570
|
+
console.log(formatLogMessage('debug',
|
|
571
|
+
`[SmartCache] Skipping ${operation} due to memory pressure`
|
|
572
|
+
));
|
|
573
|
+
}
|
|
574
|
+
}
|
|
575
|
+
|
|
404
576
|
/**
|
|
405
577
|
* Load persistent cache from disk
|
|
406
578
|
* @private
|
|
@@ -464,6 +636,34 @@ class SmartCache {
|
|
|
464
636
|
*/
|
|
465
637
|
savePersistentCache() {
|
|
466
638
|
if (!this.options.enablePersistence) return;
|
|
639
|
+
|
|
640
|
+
// Prevent concurrent saves
|
|
641
|
+
if (this.saveInProgress) {
|
|
642
|
+
this.pendingSave = true;
|
|
643
|
+
if (this.options.forceDebug) {
|
|
644
|
+
console.log(formatLogMessage('debug', '[SmartCache] Save in progress, marking pending...'));
|
|
645
|
+
}
|
|
646
|
+
return;
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
// Debounce saves - don't save more than once every 10 seconds
|
|
650
|
+
const now = Date.now();
|
|
651
|
+
if (now - this.lastSaveTime < 10000) {
|
|
652
|
+
// Schedule a delayed save if none is pending
|
|
653
|
+
if (!this.saveTimeout && !this.pendingSave) {
|
|
654
|
+
this.pendingSave = true;
|
|
655
|
+
this.saveTimeout = setTimeout(() => {
|
|
656
|
+
this.saveTimeout = null;
|
|
657
|
+
if (this.pendingSave) {
|
|
658
|
+
this.pendingSave = false;
|
|
659
|
+
this.savePersistentCache();
|
|
660
|
+
}
|
|
661
|
+
}, 10000 - (now - this.lastSaveTime));
|
|
662
|
+
}
|
|
663
|
+
return;
|
|
664
|
+
}
|
|
665
|
+
this.saveInProgress = true;
|
|
666
|
+
this.lastSaveTime = now;
|
|
467
667
|
|
|
468
668
|
const cacheDir = this.options.persistencePath;
|
|
469
669
|
const cacheFile = path.join(cacheDir, 'smart-cache.json');
|
|
@@ -475,7 +675,7 @@ class SmartCache {
|
|
|
475
675
|
}
|
|
476
676
|
|
|
477
677
|
const data = {
|
|
478
|
-
timestamp:
|
|
678
|
+
timestamp: now,
|
|
479
679
|
domainCache: Array.from(this.domainCache.entries()),
|
|
480
680
|
netToolsCache: Array.from(this.netToolsCache.entries()),
|
|
481
681
|
stats: this.stats
|
|
@@ -495,6 +695,14 @@ class SmartCache {
|
|
|
495
695
|
`[SmartCache] Failed to save cache: ${err.message}`
|
|
496
696
|
));
|
|
497
697
|
}
|
|
698
|
+
} finally {
|
|
699
|
+
this.saveInProgress = false;
|
|
700
|
+
|
|
701
|
+
// Process any pending saves
|
|
702
|
+
if (this.pendingSave && !this.saveTimeout) {
|
|
703
|
+
this.pendingSave = false;
|
|
704
|
+
setTimeout(() => this.savePersistentCache(), 1000);
|
|
705
|
+
}
|
|
498
706
|
}
|
|
499
707
|
}
|
|
500
708
|
|
|
@@ -512,9 +720,16 @@ class SmartCache {
|
|
|
512
720
|
* Clean up resources
|
|
513
721
|
*/
|
|
514
722
|
destroy() {
|
|
723
|
+
if (this.memoryCheckInterval) {
|
|
724
|
+
clearInterval(this.memoryCheckInterval);
|
|
725
|
+
}
|
|
515
726
|
if (this.autoSaveInterval) {
|
|
516
727
|
clearInterval(this.autoSaveInterval);
|
|
517
728
|
}
|
|
729
|
+
if (this.saveTimeout) {
|
|
730
|
+
clearTimeout(this.saveTimeout);
|
|
731
|
+
this.saveTimeout = null;
|
|
732
|
+
}
|
|
518
733
|
|
|
519
734
|
// Save cache one last time
|
|
520
735
|
if (this.options.enablePersistence) {
|
|
@@ -523,6 +738,92 @@ class SmartCache {
|
|
|
523
738
|
|
|
524
739
|
this.clear();
|
|
525
740
|
}
|
|
741
|
+
|
|
742
|
+
/**
|
|
743
|
+
* Clear persistent cache files and directories
|
|
744
|
+
* @param {Object} options - Clear options
|
|
745
|
+
* @param {boolean} options.silent - Suppress console output
|
|
746
|
+
* @param {boolean} options.forceDebug - Enable debug logging
|
|
747
|
+
* @returns {Object} Clear operation results
|
|
748
|
+
*/
|
|
749
|
+
static clearPersistentCache(options = {}) {
|
|
750
|
+
const { silent = false, forceDebug = false, cachePath = '.cache' } = options;
|
|
751
|
+
|
|
752
|
+
const cachePaths = [
|
|
753
|
+
cachePath,
|
|
754
|
+
path.join(cachePath, 'smart-cache.json'),
|
|
755
|
+
// Add other potential cache files here if needed
|
|
756
|
+
];
|
|
757
|
+
|
|
758
|
+
let clearedItems = 0;
|
|
759
|
+
let totalSize = 0;
|
|
760
|
+
const clearedFiles = [];
|
|
761
|
+
const errors = [];
|
|
762
|
+
|
|
763
|
+
if (!silent) {
|
|
764
|
+
console.log(`\n??? Clearing cache...`);
|
|
765
|
+
}
|
|
766
|
+
|
|
767
|
+
for (const currentCachePath of cachePaths) {
|
|
768
|
+
if (fs.existsSync(currentCachePath)) {
|
|
769
|
+
try {
|
|
770
|
+
const stats = fs.statSync(currentCachePath);
|
|
771
|
+
if (stats.isDirectory()) {
|
|
772
|
+
// Calculate total size of directory contents
|
|
773
|
+
const files = fs.readdirSync(currentCachePath);
|
|
774
|
+
for (const file of files) {
|
|
775
|
+
const filePath = path.join(currentCachePath, file);
|
|
776
|
+
if (fs.existsSync(filePath)) {
|
|
777
|
+
totalSize += fs.statSync(filePath).size;
|
|
778
|
+
}
|
|
779
|
+
}
|
|
780
|
+
fs.rmSync(currentCachePath, { recursive: true, force: true });
|
|
781
|
+
clearedItems++;
|
|
782
|
+
clearedFiles.push({ type: 'directory', path: currentCachePath, size: totalSize });
|
|
783
|
+
if (forceDebug) {
|
|
784
|
+
console.log(formatLogMessage('debug', `Cleared cache directory: ${currentCachePath}`));
|
|
785
|
+
}
|
|
786
|
+
} else {
|
|
787
|
+
totalSize += stats.size;
|
|
788
|
+
fs.unlinkSync(currentCachePath);
|
|
789
|
+
clearedItems++;
|
|
790
|
+
clearedFiles.push({ type: 'file', path: currentCachePath, size: stats.size });
|
|
791
|
+
if (forceDebug) {
|
|
792
|
+
console.log(formatLogMessage('debug', `Cleared cache file: ${currentCachePath}`));
|
|
793
|
+
}
|
|
794
|
+
}
|
|
795
|
+
} catch (clearErr) {
|
|
796
|
+
errors.push({ path: currentCachePath, error: clearErr.message });
|
|
797
|
+
if (forceDebug) {
|
|
798
|
+
console.log(formatLogMessage('debug', `Failed to clear ${currentCachePath}: ${clearErr.message}`));
|
|
799
|
+
}
|
|
800
|
+
}
|
|
801
|
+
}
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
const result = {
|
|
805
|
+
success: errors.length === 0,
|
|
806
|
+
clearedItems,
|
|
807
|
+
totalSize,
|
|
808
|
+
sizeMB: (totalSize / 1024 / 1024).toFixed(2),
|
|
809
|
+
clearedFiles,
|
|
810
|
+
errors
|
|
811
|
+
};
|
|
812
|
+
|
|
813
|
+
if (!silent) {
|
|
814
|
+
if (clearedItems > 0) {
|
|
815
|
+
console.log(`? Cache cleared: ${clearedItems} item(s), ${result.sizeMB}MB freed`);
|
|
816
|
+
} else {
|
|
817
|
+
console.log(`?? No cache files found to clear`);
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
if (errors.length > 0) {
|
|
821
|
+
console.warn(`?? ${errors.length} error(s) occurred during cache clearing`);
|
|
822
|
+
}
|
|
823
|
+
}
|
|
824
|
+
|
|
825
|
+
return result;
|
|
826
|
+
}
|
|
526
827
|
}
|
|
527
828
|
|
|
528
829
|
/**
|
|
@@ -541,11 +842,16 @@ function createSmartCache(config = {}) {
|
|
|
541
842
|
persistencePath: config.cache_path || '.cache',
|
|
542
843
|
forceDebug: config.forceDebug || false,
|
|
543
844
|
autoSave: config.cache_autosave !== false,
|
|
544
|
-
autoSaveInterval: (config.cache_autosave_minutes || 1) * 60 * 1000
|
|
845
|
+
autoSaveInterval: (config.cache_autosave_minutes || 1) * 60 * 1000,
|
|
846
|
+
maxHeapUsage: config.cache_max_heap_mb ? config.cache_max_heap_mb * 1024 * 1024 : undefined,
|
|
847
|
+
memoryCheckInterval: (config.cache_memory_check_seconds || 30) * 1000,
|
|
848
|
+
concurrency: config.max_concurrent_sites || 6,
|
|
849
|
+
aggressiveMode: config.cache_aggressive_mode === true
|
|
545
850
|
});
|
|
546
851
|
}
|
|
547
852
|
|
|
548
853
|
module.exports = {
|
|
549
854
|
SmartCache,
|
|
550
|
-
createSmartCache
|
|
855
|
+
createSmartCache,
|
|
856
|
+
clearPersistentCache: SmartCache.clearPersistentCache
|
|
551
857
|
};
|
package/nwss.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// === Network scanner script (nwss.js) v1.0.
|
|
1
|
+
// === Network scanner script (nwss.js) v1.0.60 ===
|
|
2
2
|
|
|
3
3
|
// puppeteer for browser automation, fs for file system operations, psl for domain parsing.
|
|
4
4
|
// const pLimit = require('p-limit'); // Will be dynamically imported
|
|
@@ -34,13 +34,14 @@ const { performPageInteraction, createInteractionConfig } = require('./lib/inter
|
|
|
34
34
|
// Domain detection cache for performance optimization
|
|
35
35
|
const { createGlobalHelpers, getTotalDomainsSkipped, getDetectedDomainsCount } = require('./lib/domain-cache');
|
|
36
36
|
const { createSmartCache } = require('./lib/smart-cache'); // Smart cache system
|
|
37
|
+
const { clearPersistentCache } = require('./lib/smart-cache');
|
|
37
38
|
// Enhanced redirect handling
|
|
38
39
|
const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/redirect');
|
|
39
40
|
// Ensure web browser is working correctly
|
|
40
41
|
const { monitorBrowserHealth, isBrowserHealthy } = require('./lib/browserhealth');
|
|
41
42
|
|
|
42
43
|
// --- Script Configuration & Constants ---
|
|
43
|
-
const VERSION = '1.0.
|
|
44
|
+
const VERSION = '1.0.60'; // Script version
|
|
44
45
|
|
|
45
46
|
// get startTime
|
|
46
47
|
const startTime = Date.now();
|
|
@@ -102,6 +103,8 @@ const validateConfig = args.includes('--validate-config');
|
|
|
102
103
|
const validateRules = args.includes('--validate-rules');
|
|
103
104
|
const testValidation = args.includes('--test-validation');
|
|
104
105
|
let cleanRules = args.includes('--clean-rules');
|
|
106
|
+
const clearCache = args.includes('--clear-cache');
|
|
107
|
+
const ignoreCache = args.includes('--ignore-cache');
|
|
105
108
|
|
|
106
109
|
let validateRulesFile = null;
|
|
107
110
|
const validateRulesIndex = args.findIndex(arg => arg === '--validate-rules');
|
|
@@ -224,6 +227,15 @@ if (args.includes('--version')) {
|
|
|
224
227
|
process.exit(0);
|
|
225
228
|
}
|
|
226
229
|
|
|
230
|
+
// Handle --clear-cache before config loading (uses default cache path)
|
|
231
|
+
if (clearCache && !dryRunMode) {
|
|
232
|
+
clearPersistentCache({
|
|
233
|
+
silent: silentMode,
|
|
234
|
+
forceDebug,
|
|
235
|
+
cachePath: '.cache' // Default path, will be updated after config loads if needed
|
|
236
|
+
});
|
|
237
|
+
}
|
|
238
|
+
|
|
227
239
|
// Handle validation-only operations before main help
|
|
228
240
|
if (testValidation) {
|
|
229
241
|
console.log(`\n${messageColors.processing('Running domain validation tests...')}`);
|
|
@@ -360,6 +372,8 @@ Validation Options:
|
|
|
360
372
|
--validate-rules [file] Validate rule file format (uses --output/--compare files if no file specified)
|
|
361
373
|
--clean-rules [file] Clean rule files by removing invalid lines and optionally duplicates (uses --output/--compare files if no file specified)
|
|
362
374
|
--test-validation Run domain validation tests and exit
|
|
375
|
+
--clear-cache Clear persistent cache before scanning (improves fresh start performance)
|
|
376
|
+
--ignore-cache Bypass all smart caching functionality during scanning
|
|
363
377
|
|
|
364
378
|
Global config.json options:
|
|
365
379
|
ignoreDomains: ["domain.com", "*.ads.com"] Domains to completely ignore (supports wildcards)
|
|
@@ -551,15 +565,40 @@ const RESOURCE_CLEANUP_INTERVAL = (() => {
|
|
|
551
565
|
return 180;
|
|
552
566
|
})();
|
|
553
567
|
|
|
554
|
-
//
|
|
568
|
+
// Perform cache clear after config is loaded for custom cache paths
|
|
569
|
+
if (clearCache && dryRunMode) {
|
|
570
|
+
clearPersistentCache({
|
|
571
|
+
silent: silentMode,
|
|
572
|
+
forceDebug,
|
|
573
|
+
cachePath: config.cache_path || '.cache'
|
|
574
|
+
});
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
// Also clear for custom cache paths in normal mode if not already cleared
|
|
578
|
+
if (clearCache && !dryRunMode && config.cache_path && config.cache_path !== '.cache') {
|
|
579
|
+
clearPersistentCache({
|
|
580
|
+
silent: silentMode,
|
|
581
|
+
forceDebug,
|
|
582
|
+
cachePath: config.cache_path
|
|
583
|
+
});
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
// Initialize smart cache system AFTER config is loaded (unless --ignore-cache is used)
|
|
587
|
+
if (ignoreCache) {
|
|
588
|
+
smartCache = null;
|
|
589
|
+
if (forceDebug) console.log(formatLogMessage('debug', 'Smart cache disabled by --ignore-cache flag'));
|
|
590
|
+
} else {
|
|
555
591
|
smartCache = createSmartCache({
|
|
556
592
|
...config,
|
|
557
593
|
forceDebug,
|
|
558
|
-
|
|
559
|
-
|
|
594
|
+
max_concurrent_sites: MAX_CONCURRENT_SITES, // Pass concurrency info
|
|
595
|
+
cache_aggressive_mode: MAX_CONCURRENT_SITES > 12, // Auto-enable for high concurrency
|
|
596
|
+
cache_persistence: false, // Disable persistence completely
|
|
597
|
+
cache_autosave: false, // Disable auto-save completely
|
|
560
598
|
cache_autosave_minutes: config.cache_autosave_minutes || 1,
|
|
561
599
|
cache_max_size: config.cache_max_size || 5000
|
|
562
600
|
});
|
|
601
|
+
}
|
|
563
602
|
|
|
564
603
|
// Handle --clean-rules after config is loaded (so we have access to sites)
|
|
565
604
|
if (cleanRules || cleanRulesFile) {
|
|
@@ -1503,7 +1542,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1503
1542
|
const similarityThreshold = siteConfig.ignore_similar_threshold || ignore_similar_threshold;
|
|
1504
1543
|
const ignoreSimilarIgnoredDomains = siteConfig.ignore_similar_ignored_domains !== undefined ? siteConfig.ignore_similar_ignored_domains : ignore_similar_ignored_domains;
|
|
1505
1544
|
|
|
1506
|
-
// Use smart cache's similarity cache for performance
|
|
1545
|
+
// Use smart cache's similarity cache for performance (if cache is enabled)
|
|
1507
1546
|
if (ignoreSimilarEnabled && smartCache) {
|
|
1508
1547
|
const existingDomains = matchedDomains instanceof Map
|
|
1509
1548
|
? Array.from(matchedDomains.keys()).filter(key => !['dryRunMatches', 'dryRunNetTools', 'dryRunSearchString'].includes(key))
|
|
@@ -1522,14 +1561,14 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1522
1561
|
// If no cached similarity exists, calculate and cache it
|
|
1523
1562
|
if (cachedSimilarity === null) {
|
|
1524
1563
|
const similarity = calculateSimilarity(domain, existingDomain);
|
|
1525
|
-
if (smartCache) {
|
|
1564
|
+
if (smartCache && !ignoreCache) {
|
|
1526
1565
|
smartCache.cacheSimilarity(domain, existingDomain, similarity);
|
|
1527
1566
|
}
|
|
1528
1567
|
}
|
|
1529
1568
|
}
|
|
1530
1569
|
}
|
|
1531
1570
|
|
|
1532
|
-
// Check smart cache first
|
|
1571
|
+
// Check smart cache first (if cache is enabled)
|
|
1533
1572
|
const context = {
|
|
1534
1573
|
filterRegex: siteConfig.filterRegex,
|
|
1535
1574
|
searchString: siteConfig.searchstring,
|
|
@@ -1581,7 +1620,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1581
1620
|
// Mark full subdomain as detected for future reference
|
|
1582
1621
|
markDomainAsDetected(cacheKey);
|
|
1583
1622
|
|
|
1584
|
-
// Also mark in smart cache with context
|
|
1623
|
+
// Also mark in smart cache with context (if cache is enabled)
|
|
1585
1624
|
if (smartCache) {
|
|
1586
1625
|
smartCache.markDomainProcessed(domain, context, { resourceType, fullSubdomain });
|
|
1587
1626
|
}
|
|
@@ -1831,7 +1870,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1831
1870
|
}
|
|
1832
1871
|
|
|
1833
1872
|
// Create and execute nettools handler
|
|
1834
|
-
// Check smart cache for nettools results
|
|
1873
|
+
// Check smart cache for nettools results (if cache is enabled)
|
|
1835
1874
|
const cachedWhois = smartCache ? smartCache.getCachedNetTools(reqDomain, 'whois') : null;
|
|
1836
1875
|
const cachedDig = smartCache ? smartCache.getCachedNetTools(reqDomain, 'dig', digRecordType) : null;
|
|
1837
1876
|
|
|
@@ -1839,7 +1878,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1839
1878
|
console.log(formatLogMessage('debug', `[SmartCache] Using cached nettools results for ${reqDomain}`));
|
|
1840
1879
|
}
|
|
1841
1880
|
|
|
1842
|
-
// Create nettools handler with cache callbacks
|
|
1881
|
+
// Create nettools handler with cache callbacks (if cache is enabled)
|
|
1843
1882
|
const netToolsHandler = createNetToolsHandler({
|
|
1844
1883
|
whoisTerms,
|
|
1845
1884
|
whoisOrTerms,
|
|
@@ -1857,7 +1896,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1857
1896
|
matchedDomains,
|
|
1858
1897
|
addMatchedDomain,
|
|
1859
1898
|
isDomainAlreadyDetected,
|
|
1860
|
-
// Add cache callbacks if smart cache is available
|
|
1899
|
+
// Add cache callbacks if smart cache is available and caching is enabled
|
|
1861
1900
|
onWhoisResult: smartCache ? (domain, result) => {
|
|
1862
1901
|
smartCache.cacheNetTools(domain, 'whois', result);
|
|
1863
1902
|
} : undefined,
|
|
@@ -1905,7 +1944,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1905
1944
|
|
|
1906
1945
|
// If curl is enabled, download and analyze content immediately
|
|
1907
1946
|
if (useCurl) {
|
|
1908
|
-
// Check response cache first if smart cache is available
|
|
1947
|
+
// Check response cache first if smart cache is available and caching is enabled
|
|
1909
1948
|
const cachedContent = smartCache ? smartCache.getCachedResponse(reqUrl) : null;
|
|
1910
1949
|
|
|
1911
1950
|
if (cachedContent && forceDebug) {
|
|
@@ -1922,7 +1961,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1922
1961
|
matchedDomains,
|
|
1923
1962
|
addMatchedDomain, // Pass the helper function
|
|
1924
1963
|
isDomainAlreadyDetected,
|
|
1925
|
-
onContentFetched: smartCache ? (url, content) => {
|
|
1964
|
+
onContentFetched: smartCache && !ignoreCache ? (url, content) => {
|
|
1926
1965
|
smartCache.cacheResponse(url, content);
|
|
1927
1966
|
} : undefined,
|
|
1928
1967
|
currentUrl,
|
|
@@ -2587,7 +2626,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2587
2626
|
console.log(formatLogMessage('debug', `Output format: ${getFormatDescription(globalOptions)}`));
|
|
2588
2627
|
console.log(formatLogMessage('debug', `Generated ${outputResult.totalRules} rules from ${outputResult.successfulPageLoads} successful page loads`));
|
|
2589
2628
|
console.log(formatLogMessage('debug', `Performance: ${totalDomainsSkipped} domains skipped (already detected), ${detectedDomainsCount} unique domains cached`));
|
|
2590
|
-
// Log smart cache statistics
|
|
2629
|
+
// Log smart cache statistics (if cache is enabled)
|
|
2591
2630
|
if (smartCache) {
|
|
2592
2631
|
const cacheStats = smartCache.getStats();
|
|
2593
2632
|
console.log(formatLogMessage('debug', '=== Smart Cache Statistics ==='));
|
|
@@ -2677,7 +2716,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2677
2716
|
const seconds = totalSeconds % 60;
|
|
2678
2717
|
|
|
2679
2718
|
// Final summary report with timing and success statistics
|
|
2680
|
-
// Clean up smart cache
|
|
2719
|
+
// Clean up smart cache (if it exists)
|
|
2681
2720
|
if (smartCache) {
|
|
2682
2721
|
smartCache.destroy();
|
|
2683
2722
|
}
|
|
@@ -2699,6 +2738,9 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2699
2738
|
if (totalDomainsSkipped > 0) {
|
|
2700
2739
|
console.log(messageColors.info('Performance:') + ` ${totalDomainsSkipped} domains skipped (already detected)`);
|
|
2701
2740
|
}
|
|
2741
|
+
if (ignoreCache && forceDebug) {
|
|
2742
|
+
console.log(messageColors.info('Cache:') + ` Smart caching was disabled`);
|
|
2743
|
+
}
|
|
2702
2744
|
}
|
|
2703
2745
|
|
|
2704
2746
|
// Clean process termination
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fanboynz/network-scanner",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.59",
|
|
4
4
|
"description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
|
|
5
5
|
"main": "nwss.js",
|
|
6
6
|
"scripts": {
|
|
@@ -10,9 +10,10 @@
|
|
|
10
10
|
"lint": "eslint *.js lib/*.js"
|
|
11
11
|
},
|
|
12
12
|
"dependencies": {
|
|
13
|
+
"lru-cache": "^10.4.3",
|
|
13
14
|
"p-limit": "^4.0.0",
|
|
14
15
|
"psl": "^1.15.0",
|
|
15
|
-
"puppeteer": "^23.
|
|
16
|
+
"puppeteer": "^23.11.1"
|
|
16
17
|
},
|
|
17
18
|
"keywords": [
|
|
18
19
|
"puppeteer",
|
|
@@ -29,7 +30,7 @@
|
|
|
29
30
|
"author": "FanboyNZ",
|
|
30
31
|
"license": "GPL-3.0",
|
|
31
32
|
"engines": {
|
|
32
|
-
"node": ">=
|
|
33
|
+
"node": ">=20.0.0"
|
|
33
34
|
},
|
|
34
35
|
"repository": {
|
|
35
36
|
"type": "git",
|