@fanboynz/network-scanner 1.0.87 → 1.0.89
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/curl.js +442 -0
- package/lib/grep.js +56 -17
- package/nwss.js +141 -81
- package/package.json +1 -1
package/lib/curl.js
ADDED
|
@@ -0,0 +1,442 @@
|
|
|
1
|
+
// === curl.js - Curl-based Content Download Module ===
|
|
2
|
+
// Handles HTTP content downloading using curl for searchstring analysis
|
|
3
|
+
|
|
4
|
+
const fs = require('fs');
|
|
5
|
+
const { spawnSync } = require('child_process');
|
|
6
|
+
const { colorize, colors, messageColors, tags, formatLogMessage } = require('./colorize');
|
|
7
|
+
|
|
8
|
+
// === Constants ===
|
|
9
|
+
const CURL_DEFAULTS = {
|
|
10
|
+
TIMEOUT_SECONDS: 30,
|
|
11
|
+
MAX_REDIRECTS: 5,
|
|
12
|
+
MAX_SIZE_BYTES: 10 * 1024 * 1024, // 10MB
|
|
13
|
+
VALIDATION_TIMEOUT: 5000, // 5 seconds
|
|
14
|
+
SPAWN_TIMEOUT_MULTIPLIER: 1000, // Convert seconds to milliseconds
|
|
15
|
+
HTTP_SUCCESS_CODE: 200,
|
|
16
|
+
CURL_SUCCESS_STATUS: 0,
|
|
17
|
+
METADATA_PIPE_PARTS: 3, // http_code|content_type|size_download
|
|
18
|
+
VERSION_LINE_INDEX: 0
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Downloads content using curl with browser-like headers
|
|
23
|
+
* @param {string} url - The URL to download
|
|
24
|
+
* @param {string} userAgent - User agent string to use
|
|
25
|
+
* @param {object} options - Download options
|
|
26
|
+
* @returns {Promise<object>} Object with content, status, and metadata
|
|
27
|
+
*/
|
|
28
|
+
async function downloadWithCurl(url, userAgent = '', options = {}) {
|
|
29
|
+
const {
|
|
30
|
+
timeout = CURL_DEFAULTS.TIMEOUT_SECONDS,
|
|
31
|
+
maxRedirects = CURL_DEFAULTS.MAX_REDIRECTS,
|
|
32
|
+
maxSize = CURL_DEFAULTS.MAX_SIZE_BYTES,
|
|
33
|
+
followRedirects = true,
|
|
34
|
+
customHeaders = {}
|
|
35
|
+
} = options;
|
|
36
|
+
|
|
37
|
+
try {
|
|
38
|
+
const curlArgs = [
|
|
39
|
+
'-s', // Silent mode
|
|
40
|
+
'--max-time', timeout.toString(),
|
|
41
|
+
'--max-redirs', maxRedirects.toString(),
|
|
42
|
+
'--fail-with-body', // Return body even on HTTP errors
|
|
43
|
+
'--compressed', // Accept compressed responses
|
|
44
|
+
'--write-out', '%{http_code}|%{content_type}|%{size_download}', // Output metadata
|
|
45
|
+
];
|
|
46
|
+
|
|
47
|
+
if (followRedirects) {
|
|
48
|
+
curlArgs.push('-L'); // Follow redirects
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// Add user agent if provided
|
|
52
|
+
if (userAgent) {
|
|
53
|
+
curlArgs.push('-H', `User-Agent: ${userAgent}`);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Add common browser headers
|
|
57
|
+
curlArgs.push(
|
|
58
|
+
'-H', 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
59
|
+
'-H', 'Accept-Language: en-US,en;q=0.5',
|
|
60
|
+
'-H', 'Accept-Encoding: gzip, deflate, br',
|
|
61
|
+
'-H', 'Connection: keep-alive',
|
|
62
|
+
'-H', 'Upgrade-Insecure-Requests: 1',
|
|
63
|
+
'-H', 'Sec-Fetch-Dest: document',
|
|
64
|
+
'-H', 'Sec-Fetch-Mode: navigate',
|
|
65
|
+
'-H', 'Sec-Fetch-Site: none',
|
|
66
|
+
'-H', 'Cache-Control: no-cache'
|
|
67
|
+
);
|
|
68
|
+
|
|
69
|
+
// Add custom headers
|
|
70
|
+
Object.entries(customHeaders).forEach(([key, value]) => {
|
|
71
|
+
curlArgs.push('-H', `${key}: ${value}`);
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
curlArgs.push(url);
|
|
75
|
+
|
|
76
|
+
// Execute curl
|
|
77
|
+
const curlResult = spawnSync('curl', curlArgs, {
|
|
78
|
+
encoding: 'utf8',
|
|
79
|
+
timeout: timeout * CURL_DEFAULTS.SPAWN_TIMEOUT_MULTIPLIER,
|
|
80
|
+
maxBuffer: maxSize
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
if (curlResult.error) {
|
|
84
|
+
throw curlResult.error;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
if (curlResult.status !== CURL_DEFAULTS.CURL_SUCCESS_STATUS) {
|
|
88
|
+
throw new Error(`Curl exited with status ${curlResult.status}: ${curlResult.stderr}`);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
const output = curlResult.stdout;
|
|
92
|
+
const lines = output.split('\n');
|
|
93
|
+
const metadata = lines[lines.length - 1]; // Last line contains write-out data
|
|
94
|
+
const content = lines.slice(0, -1).join('\n'); // Everything except last line
|
|
95
|
+
|
|
96
|
+
// Parse metadata
|
|
97
|
+
const metadataParts = metadata.split('|');
|
|
98
|
+
if (metadataParts.length !== CURL_DEFAULTS.METADATA_PIPE_PARTS) {
|
|
99
|
+
throw new Error(`Invalid metadata format: expected ${CURL_DEFAULTS.METADATA_PIPE_PARTS} parts, got ${metadataParts.length}`);
|
|
100
|
+
}
|
|
101
|
+
const [httpCode, contentType, downloadSize] = metadataParts;
|
|
102
|
+
|
|
103
|
+
return {
|
|
104
|
+
content,
|
|
105
|
+
httpCode: parseInt(httpCode) || 0,
|
|
106
|
+
contentType: contentType || 'unknown',
|
|
107
|
+
downloadSize: parseInt(downloadSize) || content.length,
|
|
108
|
+
success: true
|
|
109
|
+
};
|
|
110
|
+
|
|
111
|
+
} catch (error) {
|
|
112
|
+
return {
|
|
113
|
+
content: '',
|
|
114
|
+
httpCode: 0,
|
|
115
|
+
contentType: 'unknown',
|
|
116
|
+
downloadSize: 0,
|
|
117
|
+
success: false,
|
|
118
|
+
error: error.message
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Searches content for patterns using JavaScript (case-insensitive)
|
|
125
|
+
* @param {string} content - Content to search
|
|
126
|
+
* @param {Array<string>} searchStrings - OR patterns (any can match)
|
|
127
|
+
* @param {Array<string>} searchStringsAnd - AND patterns (all must match)
|
|
128
|
+
* @param {boolean} hasSearchStringAnd - Whether AND logic is being used
|
|
129
|
+
* @returns {object} Search result with found status and matched pattern
|
|
130
|
+
*/
|
|
131
|
+
function searchContent(content, searchStrings = [], searchStringsAnd = [], hasSearchStringAnd = false) {
|
|
132
|
+
if (!content || content.length === 0) {
|
|
133
|
+
return { found: false, matchedPattern: null, matchType: null };
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
const lowerContent = content.toLowerCase();
|
|
137
|
+
|
|
138
|
+
// Handle AND logic searchstring_and (all patterns must be present)
|
|
139
|
+
if (hasSearchStringAnd && searchStringsAnd.length > 0) {
|
|
140
|
+
const missingPatterns = [];
|
|
141
|
+
const foundPatterns = [];
|
|
142
|
+
|
|
143
|
+
for (const pattern of searchStringsAnd) {
|
|
144
|
+
const lowerPattern = pattern.toLowerCase();
|
|
145
|
+
if (lowerContent.includes(lowerPattern)) {
|
|
146
|
+
foundPatterns.push(pattern);
|
|
147
|
+
} else {
|
|
148
|
+
missingPatterns.push(pattern);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// All patterns must be found for AND logic
|
|
153
|
+
if (missingPatterns.length === 0) {
|
|
154
|
+
return {
|
|
155
|
+
found: true,
|
|
156
|
+
matchedPattern: foundPatterns.join(' AND '),
|
|
157
|
+
matchType: 'AND',
|
|
158
|
+
foundPatterns,
|
|
159
|
+
missingPatterns: []
|
|
160
|
+
};
|
|
161
|
+
} else {
|
|
162
|
+
return {
|
|
163
|
+
found: false,
|
|
164
|
+
matchedPattern: null,
|
|
165
|
+
matchType: 'AND',
|
|
166
|
+
foundPatterns,
|
|
167
|
+
missingPatterns
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// Handle OR logic searchstring (any pattern can match)
|
|
173
|
+
if (searchStrings.length > 0) {
|
|
174
|
+
for (const pattern of searchStrings) {
|
|
175
|
+
const lowerPattern = pattern.toLowerCase();
|
|
176
|
+
if (lowerContent.includes(lowerPattern)) {
|
|
177
|
+
return {
|
|
178
|
+
found: true,
|
|
179
|
+
matchedPattern: pattern,
|
|
180
|
+
matchType: 'OR'
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
return { found: false, matchedPattern: null, matchType: null };
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
/**
|
|
190
|
+
* Creates a curl-based URL handler for downloading and searching content
|
|
191
|
+
* @param {object} config - Configuration object containing all necessary parameters
|
|
192
|
+
* @returns {Function} URL handler function for curl-based content analysis
|
|
193
|
+
*/
|
|
194
|
+
function createCurlHandler(config) {
|
|
195
|
+
const {
|
|
196
|
+
searchStrings,
|
|
197
|
+
searchStringsAnd,
|
|
198
|
+
hasSearchStringAnd,
|
|
199
|
+
regexes,
|
|
200
|
+
matchedDomains,
|
|
201
|
+
addMatchedDomain,
|
|
202
|
+
isDomainAlreadyDetected,
|
|
203
|
+
onContentFetched,
|
|
204
|
+
currentUrl,
|
|
205
|
+
perSiteSubDomains,
|
|
206
|
+
ignoreDomains,
|
|
207
|
+
matchesIgnoreDomain,
|
|
208
|
+
getRootDomain,
|
|
209
|
+
siteConfig,
|
|
210
|
+
dumpUrls,
|
|
211
|
+
matchedUrlsLogFile,
|
|
212
|
+
forceDebug,
|
|
213
|
+
userAgent,
|
|
214
|
+
resourceType,
|
|
215
|
+
hasSearchString
|
|
216
|
+
} = config;
|
|
217
|
+
|
|
218
|
+
return async function curlHandler(requestUrl) {
|
|
219
|
+
try {
|
|
220
|
+
const respDomain = perSiteSubDomains ? (new URL(requestUrl)).hostname : getRootDomain(requestUrl);
|
|
221
|
+
const fullSubdomain = (new URL(requestUrl)).hostname; // Always get full subdomain for cache tracking
|
|
222
|
+
|
|
223
|
+
// Skip if already detected to avoid duplicates
|
|
224
|
+
if (isDomainAlreadyDetected(fullSubdomain)) {
|
|
225
|
+
if (forceDebug) {
|
|
226
|
+
console.log(formatLogMessage('debug', `[curl] Skipping already detected subdomain: ${fullSubdomain}`));
|
|
227
|
+
}
|
|
228
|
+
return;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// Only process URLs that match our regex patterns
|
|
232
|
+
const matchesRegex = regexes.some(re => re.test(requestUrl));
|
|
233
|
+
if (!matchesRegex) {
|
|
234
|
+
if (forceDebug) {
|
|
235
|
+
console.log(formatLogMessage('debug', `[curl] URL ${requestUrl} doesn't match any regex patterns`));
|
|
236
|
+
}
|
|
237
|
+
return;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
// Check if this is a first-party request (same domain as the URL being scanned)
|
|
241
|
+
const currentUrlHostname = new URL(currentUrl).hostname;
|
|
242
|
+
const requestHostname = new URL(requestUrl).hostname;
|
|
243
|
+
const currentRootDomain = getRootDomain(currentUrl);
|
|
244
|
+
const requestRootDomain = getRootDomain(requestUrl);
|
|
245
|
+
const isFirstParty = currentRootDomain === requestRootDomain;
|
|
246
|
+
|
|
247
|
+
// Apply first-party/third-party filtering
|
|
248
|
+
if (isFirstParty && (siteConfig.firstParty === false || siteConfig.firstParty === 0)) {
|
|
249
|
+
if (forceDebug) {
|
|
250
|
+
console.log(formatLogMessage('debug', `[curl] Skipping first-party request (firstParty disabled): ${requestUrl}`));
|
|
251
|
+
}
|
|
252
|
+
return;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
if (!isFirstParty && (siteConfig.thirdParty === false || siteConfig.thirdParty === 0)) {
|
|
256
|
+
if (forceDebug) {
|
|
257
|
+
console.log(formatLogMessage('debug', `[curl] Skipping third-party request (thirdParty disabled): ${requestUrl}`));
|
|
258
|
+
}
|
|
259
|
+
return;
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
if (forceDebug) {
|
|
263
|
+
console.log(formatLogMessage('debug', `[curl] Processing ${isFirstParty ? 'first-party' : 'third-party'} request: ${requestUrl}`));
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
// If NO searchstring is defined, match immediately (like browser behavior)
|
|
267
|
+
if (!hasSearchString || ((!searchStrings || !searchStrings.length) && (!searchStringsAnd || !searchStringsAnd.length))) {
|
|
268
|
+
if (!respDomain || matchesIgnoreDomain(respDomain, ignoreDomains)) {
|
|
269
|
+
if (forceDebug) {
|
|
270
|
+
console.log(formatLogMessage('debug', `[curl] Domain ${respDomain} is in ignore list`));
|
|
271
|
+
}
|
|
272
|
+
return;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
addMatchedDomain(respDomain, resourceType, fullSubdomain);
|
|
276
|
+
const simplifiedUrl = getRootDomain(currentUrl);
|
|
277
|
+
|
|
278
|
+
if (siteConfig.verbose === 1) {
|
|
279
|
+
const partyType = isFirstParty ? 'first-party' : 'third-party';
|
|
280
|
+
const resourceInfo = resourceType ? ` (${resourceType})` : '';
|
|
281
|
+
console.log(formatLogMessage('match', `[${simplifiedUrl}] ${requestUrl} (${partyType}, curl) matched regex${resourceInfo}`));
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
if (dumpUrls && matchedUrlsLogFile) {
|
|
285
|
+
const timestamp = new Date().toISOString();
|
|
286
|
+
const partyType = isFirstParty ? 'first-party' : 'third-party';
|
|
287
|
+
const resourceInfo = resourceType ? ` (${resourceType})` : '';
|
|
288
|
+
try {
|
|
289
|
+
fs.appendFileSync(matchedUrlsLogFile,
|
|
290
|
+
`${timestamp} [match][${simplifiedUrl}] ${requestUrl} (${partyType}, curl)${resourceInfo}\n`);
|
|
291
|
+
} catch (logErr) {
|
|
292
|
+
console.warn(formatLogMessage('warn', `Failed to write to matched URLs log: ${logErr.message}`));
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
return;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
// If searchstring IS defined, download and search content
|
|
299
|
+
if (hasSearchString && ((searchStrings && searchStrings.length > 0) || (searchStringsAnd && searchStringsAnd.length > 0)) && forceDebug) {
|
|
300
|
+
console.log(formatLogMessage('debug', `[curl] Downloading content for pattern matching: ${requestUrl}`));
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
// Prepare custom headers from site config
|
|
304
|
+
const customHeaders = siteConfig.custom_headers || {};
|
|
305
|
+
if (siteConfig.referrer_headers) {
|
|
306
|
+
const referrerUrl = Array.isArray(siteConfig.referrer_headers)
|
|
307
|
+
? siteConfig.referrer_headers[Math.floor(Math.random() * siteConfig.referrer_headers.length)]
|
|
308
|
+
: siteConfig.referrer_headers;
|
|
309
|
+
|
|
310
|
+
if (typeof referrerUrl === 'string' && referrerUrl.startsWith('http')) {
|
|
311
|
+
customHeaders['Referer'] = referrerUrl;
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
const downloadResult = await downloadWithCurl(requestUrl, userAgent, {
|
|
316
|
+
timeout: CURL_DEFAULTS.TIMEOUT_SECONDS,
|
|
317
|
+
maxRedirects: CURL_DEFAULTS.MAX_REDIRECTS,
|
|
318
|
+
customHeaders
|
|
319
|
+
});
|
|
320
|
+
|
|
321
|
+
if (!downloadResult.success) {
|
|
322
|
+
if (forceDebug) {
|
|
323
|
+
console.log(formatLogMessage('debug', `[curl] Failed to download ${requestUrl}: ${downloadResult.error}`));
|
|
324
|
+
}
|
|
325
|
+
return;
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
// Cache the fetched content if callback provided
|
|
329
|
+
if (onContentFetched) {
|
|
330
|
+
try {
|
|
331
|
+
onContentFetched(requestUrl, downloadResult.content);
|
|
332
|
+
} catch (cacheErr) {
|
|
333
|
+
if (forceDebug) {
|
|
334
|
+
console.log(formatLogMessage('debug', `[curl] Content caching failed: ${cacheErr.message}`));
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
// Search content for patterns
|
|
340
|
+
const searchResult = searchContent(
|
|
341
|
+
downloadResult.content,
|
|
342
|
+
searchStrings,
|
|
343
|
+
searchStringsAnd,
|
|
344
|
+
hasSearchStringAnd
|
|
345
|
+
);
|
|
346
|
+
|
|
347
|
+
if (searchResult.found) {
|
|
348
|
+
if (!respDomain || matchesIgnoreDomain(respDomain, ignoreDomains)) {
|
|
349
|
+
if (forceDebug) {
|
|
350
|
+
console.log(formatLogMessage('debug', `[curl] Domain ${respDomain} matches but is in ignore list`));
|
|
351
|
+
}
|
|
352
|
+
return;
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
addMatchedDomain(respDomain, resourceType, fullSubdomain);
|
|
356
|
+
const simplifiedUrl = getRootDomain(currentUrl);
|
|
357
|
+
|
|
358
|
+
if (siteConfig.verbose === 1) {
|
|
359
|
+
const partyType = isFirstParty ? 'first-party' : 'third-party';
|
|
360
|
+
const resourceInfo = resourceType ? ` (${resourceType})` : '';
|
|
361
|
+
const matchInfo = searchResult.matchType === 'AND'
|
|
362
|
+
? `patterns: ${searchResult.foundPatterns.length}/${searchStringsAnd.length}`
|
|
363
|
+
: `pattern: "${searchResult.matchedPattern}"`;
|
|
364
|
+
console.log(formatLogMessage('match',
|
|
365
|
+
`[${simplifiedUrl}] ${requestUrl} (${partyType}, curl) contains ${matchInfo}${resourceInfo}`));
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
if (dumpUrls && matchedUrlsLogFile) {
|
|
369
|
+
const timestamp = new Date().toISOString();
|
|
370
|
+
const partyType = isFirstParty ? 'first-party' : 'third-party';
|
|
371
|
+
const resourceInfo = resourceType ? ` (${resourceType})` : '';
|
|
372
|
+
const matchInfo = searchResult.matchType === 'AND'
|
|
373
|
+
? `patterns: ${searchResult.foundPatterns.length}/${searchStringsAnd.length}`
|
|
374
|
+
: `pattern: "${searchResult.matchedPattern}"`;
|
|
375
|
+
try {
|
|
376
|
+
fs.appendFileSync(matchedUrlsLogFile,
|
|
377
|
+
`${timestamp} [match][${simplifiedUrl}] ${requestUrl} (${partyType}, curl, ${matchInfo})${resourceInfo}\n`);
|
|
378
|
+
} catch (logErr) {
|
|
379
|
+
console.warn(formatLogMessage('warn', `Failed to write to matched URLs log: ${logErr.message}`));
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
} else {
|
|
383
|
+
if (forceDebug) {
|
|
384
|
+
const partyType = isFirstParty ? 'first-party' : 'third-party';
|
|
385
|
+
if (searchResult.matchType === 'AND' && searchResult.missingPatterns) {
|
|
386
|
+
console.log(formatLogMessage('debug',
|
|
387
|
+
`[curl] ${requestUrl} (${partyType}) matched regex but missing AND patterns: ${searchResult.missingPatterns.join(', ')}`));
|
|
388
|
+
} else {
|
|
389
|
+
console.log(formatLogMessage('debug',
|
|
390
|
+
`[curl] ${requestUrl} (${partyType}) matched regex but no search patterns found`));
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
} catch (err) {
|
|
396
|
+
if (forceDebug) {
|
|
397
|
+
console.log(formatLogMessage('debug', `[curl] Handler failed for ${requestUrl}: ${err.message}`));
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
};
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
/**
|
|
404
|
+
* Validates that curl is available on the system
|
|
405
|
+
* @returns {object} Validation result with isAvailable boolean and version info
|
|
406
|
+
*/
|
|
407
|
+
function validateCurlAvailability() {
|
|
408
|
+
try {
|
|
409
|
+
const result = spawnSync('curl', ['--version'], {
|
|
410
|
+
encoding: 'utf8',
|
|
411
|
+
timeout: CURL_DEFAULTS.VALIDATION_TIMEOUT
|
|
412
|
+
});
|
|
413
|
+
|
|
414
|
+
if (result.status === CURL_DEFAULTS.CURL_SUCCESS_STATUS) {
|
|
415
|
+
const version = result.stdout.split('\n')[CURL_DEFAULTS.VERSION_LINE_INDEX] || 'Unknown version';
|
|
416
|
+
return {
|
|
417
|
+
isAvailable: true,
|
|
418
|
+
version: version.trim(),
|
|
419
|
+
error: null
|
|
420
|
+
};
|
|
421
|
+
} else {
|
|
422
|
+
return {
|
|
423
|
+
isAvailable: false,
|
|
424
|
+
version: null,
|
|
425
|
+
error: 'curl command failed'
|
|
426
|
+
};
|
|
427
|
+
}
|
|
428
|
+
} catch (error) {
|
|
429
|
+
return {
|
|
430
|
+
isAvailable: false,
|
|
431
|
+
version: null,
|
|
432
|
+
error: `curl not found: ${error.message}`
|
|
433
|
+
};
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
module.exports = {
|
|
438
|
+
downloadWithCurl,
|
|
439
|
+
searchContent,
|
|
440
|
+
createCurlHandler,
|
|
441
|
+
validateCurlAvailability
|
|
442
|
+
};
|
package/lib/grep.js
CHANGED
|
@@ -7,6 +7,23 @@ const path = require('path');
|
|
|
7
7
|
const os = require('os');
|
|
8
8
|
const { colorize, colors, messageColors, tags, formatLogMessage } = require('./colorize');
|
|
9
9
|
|
|
10
|
+
// === Constants ===
|
|
11
|
+
const GREP_DEFAULTS = {
|
|
12
|
+
TIMEOUT_SECONDS: 30,
|
|
13
|
+
MAX_REDIRECTS: 5,
|
|
14
|
+
MAX_SIZE_BYTES: 10 * 1024 * 1024, // 10MB
|
|
15
|
+
VALIDATION_TIMEOUT: 5000, // 5 seconds
|
|
16
|
+
SPAWN_TIMEOUT_MULTIPLIER: 1000, // Convert seconds to milliseconds
|
|
17
|
+
GREP_TIMEOUT: 10000, // 10 seconds for grep operations
|
|
18
|
+
MAX_BUFFER_SIZE: 1024 * 1024, // 1MB max buffer
|
|
19
|
+
DEFAULT_MAX_MATCHES: 1000,
|
|
20
|
+
GREP_SUCCESS_STATUS: 0,
|
|
21
|
+
GREP_NOT_FOUND_STATUS: 1,
|
|
22
|
+
CURL_SUCCESS_STATUS: 0,
|
|
23
|
+
VERSION_LINE_INDEX: 0,
|
|
24
|
+
RANDOM_STRING_LENGTH: 9
|
|
25
|
+
};
|
|
26
|
+
|
|
10
27
|
/**
|
|
11
28
|
* Creates a temporary file with content for grep processing
|
|
12
29
|
* @param {string} content - The content to write to temp file
|
|
@@ -15,7 +32,7 @@ const { colorize, colors, messageColors, tags, formatLogMessage } = require('./c
|
|
|
15
32
|
*/
|
|
16
33
|
function createTempFile(content, prefix = 'scanner_grep') {
|
|
17
34
|
const tempDir = os.tmpdir();
|
|
18
|
-
const tempFile = path.join(tempDir, `${prefix}_${Date.now()}_${Math.random().toString(36).substr(2,
|
|
35
|
+
const tempFile = path.join(tempDir, `${prefix}_${Date.now()}_${Math.random().toString(36).substr(2, GREP_DEFAULTS.RANDOM_STRING_LENGTH)}.tmp`);
|
|
19
36
|
|
|
20
37
|
try {
|
|
21
38
|
fs.writeFileSync(tempFile, content, 'utf8');
|
|
@@ -37,7 +54,7 @@ async function grepContent(content, searchPatterns, options = {}) {
|
|
|
37
54
|
ignoreCase = true,
|
|
38
55
|
wholeWord = false,
|
|
39
56
|
regex = false,
|
|
40
|
-
maxMatches =
|
|
57
|
+
maxMatches = GREP_DEFAULTS.DEFAULT_MAX_MATCHES
|
|
41
58
|
} = options;
|
|
42
59
|
|
|
43
60
|
if (!content || searchPatterns.length === 0) {
|
|
@@ -71,12 +88,12 @@ async function grepContent(content, searchPatterns, options = {}) {
|
|
|
71
88
|
try {
|
|
72
89
|
const result = spawnSync('grep', grepArgs, {
|
|
73
90
|
encoding: 'utf8',
|
|
74
|
-
timeout:
|
|
75
|
-
maxBuffer:
|
|
91
|
+
timeout: GREP_DEFAULTS.GREP_TIMEOUT,
|
|
92
|
+
maxBuffer: GREP_DEFAULTS.MAX_BUFFER_SIZE
|
|
76
93
|
});
|
|
77
94
|
|
|
78
95
|
// grep returns 0 if found, 1 if not found, 2+ for errors
|
|
79
|
-
if (result.status ===
|
|
96
|
+
if (result.status === GREP_DEFAULTS.GREP_SUCCESS_STATUS && result.stdout) {
|
|
80
97
|
allMatches.push({
|
|
81
98
|
pattern: pattern,
|
|
82
99
|
matches: result.stdout.split('\n').filter(line => line.trim().length > 0).slice(0, maxMatches)
|
|
@@ -122,13 +139,13 @@ async function grepContent(content, searchPatterns, options = {}) {
|
|
|
122
139
|
* @param {number} timeout - Timeout in seconds (default: 30)
|
|
123
140
|
* @returns {Promise<object>} Object with found boolean, matchedPattern, and content
|
|
124
141
|
*/
|
|
125
|
-
async function downloadAndGrep(url, searchPatterns, userAgent = '', grepOptions = {}, timeout =
|
|
142
|
+
async function downloadAndGrep(url, searchPatterns, userAgent = '', grepOptions = {}, timeout = GREP_DEFAULTS.TIMEOUT_SECONDS) {
|
|
126
143
|
try {
|
|
127
144
|
const curlArgs = [
|
|
128
145
|
'-s', // Silent mode
|
|
129
146
|
'-L', // Follow redirects
|
|
130
147
|
'--max-time', timeout.toString(),
|
|
131
|
-
'--max-redirs',
|
|
148
|
+
'--max-redirs', GREP_DEFAULTS.MAX_REDIRECTS.toString(),
|
|
132
149
|
'--fail-with-body', // Return body even on HTTP errors
|
|
133
150
|
'--compressed', // Accept compressed responses
|
|
134
151
|
];
|
|
@@ -151,15 +168,15 @@ async function downloadAndGrep(url, searchPatterns, userAgent = '', grepOptions
|
|
|
151
168
|
// Download content with curl
|
|
152
169
|
const curlResult = spawnSync('curl', curlArgs, {
|
|
153
170
|
encoding: 'utf8',
|
|
154
|
-
timeout: timeout *
|
|
155
|
-
maxBuffer:
|
|
171
|
+
timeout: timeout * GREP_DEFAULTS.SPAWN_TIMEOUT_MULTIPLIER,
|
|
172
|
+
maxBuffer: GREP_DEFAULTS.MAX_SIZE_BYTES
|
|
156
173
|
});
|
|
157
174
|
|
|
158
175
|
if (curlResult.error) {
|
|
159
176
|
throw curlResult.error;
|
|
160
177
|
}
|
|
161
178
|
|
|
162
|
-
if (curlResult.status !==
|
|
179
|
+
if (curlResult.status !== GREP_DEFAULTS.CURL_SUCCESS_STATUS) {
|
|
163
180
|
throw new Error(`Curl exited with status ${curlResult.status}: ${curlResult.stderr}`);
|
|
164
181
|
}
|
|
165
182
|
|
|
@@ -191,6 +208,9 @@ function createGrepHandler(config) {
|
|
|
191
208
|
searchStrings,
|
|
192
209
|
regexes,
|
|
193
210
|
matchedDomains,
|
|
211
|
+
addMatchedDomain,
|
|
212
|
+
isDomainAlreadyDetected,
|
|
213
|
+
onContentFetched,
|
|
194
214
|
currentUrl,
|
|
195
215
|
perSiteSubDomains,
|
|
196
216
|
ignoreDomains,
|
|
@@ -201,13 +221,23 @@ function createGrepHandler(config) {
|
|
|
201
221
|
matchedUrlsLogFile,
|
|
202
222
|
forceDebug,
|
|
203
223
|
userAgent,
|
|
224
|
+
resourceType,
|
|
204
225
|
hasSearchString,
|
|
205
226
|
grepOptions = {}
|
|
206
227
|
} = config;
|
|
207
228
|
|
|
208
229
|
return async function grepHandler(requestUrl) {
|
|
209
230
|
const respDomain = perSiteSubDomains ? (new URL(requestUrl)).hostname : getRootDomain(requestUrl);
|
|
231
|
+
const fullSubdomain = (new URL(requestUrl)).hostname; // Always get full subdomain for cache tracking
|
|
210
232
|
|
|
233
|
+
// Skip if already detected to avoid duplicates
|
|
234
|
+
if (isDomainAlreadyDetected(fullSubdomain)) {
|
|
235
|
+
if (forceDebug) {
|
|
236
|
+
console.log(formatLogMessage('debug', `[grep] Skipping already detected subdomain: ${fullSubdomain}`));
|
|
237
|
+
}
|
|
238
|
+
return;
|
|
239
|
+
}
|
|
240
|
+
|
|
211
241
|
// Only process URLs that match our regex patterns
|
|
212
242
|
const matchesRegex = regexes.some(re => re.test(requestUrl));
|
|
213
243
|
if (!matchesRegex) return;
|
|
@@ -243,7 +273,7 @@ function createGrepHandler(config) {
|
|
|
243
273
|
return;
|
|
244
274
|
}
|
|
245
275
|
|
|
246
|
-
|
|
276
|
+
addMatchedDomain(respDomain, resourceType, fullSubdomain);
|
|
247
277
|
const simplifiedUrl = getRootDomain(currentUrl);
|
|
248
278
|
|
|
249
279
|
if (siteConfig.verbose === 1) {
|
|
@@ -265,14 +295,23 @@ function createGrepHandler(config) {
|
|
|
265
295
|
}
|
|
266
296
|
|
|
267
297
|
// If searchstring IS defined, download and grep content
|
|
268
|
-
const result = await downloadAndGrep(requestUrl, searchStrings, userAgent, grepOptions,
|
|
269
|
-
|
|
298
|
+
const result = await downloadAndGrep(requestUrl, searchStrings, userAgent, grepOptions, GREP_DEFAULTS.TIMEOUT_SECONDS);
|
|
299
|
+
|
|
300
|
+
// Cache the fetched content if callback provided
|
|
301
|
+
if (onContentFetched && result.content) {
|
|
302
|
+
try {
|
|
303
|
+
onContentFetched(requestUrl, result.content);
|
|
304
|
+
} catch (cacheErr) {
|
|
305
|
+
if (forceDebug) console.log(formatLogMessage('debug', `[grep] Content caching failed: ${cacheErr.message}`));
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
|
|
270
309
|
if (result.found) {
|
|
271
310
|
if (!respDomain || matchesIgnoreDomain(respDomain, ignoreDomains)) {
|
|
272
311
|
return;
|
|
273
312
|
}
|
|
274
313
|
|
|
275
|
-
|
|
314
|
+
addMatchedDomain(respDomain, resourceType, fullSubdomain);
|
|
276
315
|
const simplifiedUrl = getRootDomain(currentUrl);
|
|
277
316
|
|
|
278
317
|
if (siteConfig.verbose === 1) {
|
|
@@ -313,11 +352,11 @@ function validateGrepAvailability() {
|
|
|
313
352
|
try {
|
|
314
353
|
const result = spawnSync('grep', ['--version'], {
|
|
315
354
|
encoding: 'utf8',
|
|
316
|
-
timeout:
|
|
355
|
+
timeout: GREP_DEFAULTS.VALIDATION_TIMEOUT
|
|
317
356
|
});
|
|
318
357
|
|
|
319
|
-
if (result.status ===
|
|
320
|
-
const version = result.stdout.split('\n')[
|
|
358
|
+
if (result.status === GREP_DEFAULTS.GREP_SUCCESS_STATUS) {
|
|
359
|
+
const version = result.stdout.split('\n')[GREP_DEFAULTS.VERSION_LINE_INDEX] || 'Unknown version';
|
|
321
360
|
return {
|
|
322
361
|
isAvailable: true,
|
|
323
362
|
version: version.trim(),
|
package/nwss.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
// === Network scanner script (nwss.js) v1.0.
|
|
1
|
+
// === Network scanner script (nwss.js) v1.0.89 ===
|
|
2
2
|
|
|
3
3
|
// puppeteer for browser automation, fs for file system operations, psl for domain parsing.
|
|
4
4
|
// const pLimit = require('p-limit'); // Will be dynamically imported
|
|
@@ -11,6 +11,8 @@ const { compressMultipleFiles, formatFileSize } = require('./lib/compress');
|
|
|
11
11
|
const { parseSearchStrings, createResponseHandler, createCurlHandler } = require('./lib/searchstring');
|
|
12
12
|
const { applyAllFingerprintSpoofing } = require('./lib/fingerprint');
|
|
13
13
|
const { formatRules, handleOutput, getFormatDescription } = require('./lib/output');
|
|
14
|
+
// Curl functionality (replace searchstring curl handler)
|
|
15
|
+
const { validateCurlAvailability, createCurlHandler: createCurlModuleHandler } = require('./lib/curl');
|
|
14
16
|
// Rule validation
|
|
15
17
|
const { validateRulesetFile, validateFullConfig, testDomainValidation, cleanRulesetFile } = require('./lib/validate_rules');
|
|
16
18
|
// CF Bypass
|
|
@@ -123,7 +125,7 @@ const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/r
|
|
|
123
125
|
const { monitorBrowserHealth, isBrowserHealthy, isQuicklyResponsive } = require('./lib/browserhealth');
|
|
124
126
|
|
|
125
127
|
// --- Script Configuration & Constants ---
|
|
126
|
-
const VERSION = '1.0.
|
|
128
|
+
const VERSION = '1.0.89'; // Script version
|
|
127
129
|
|
|
128
130
|
// get startTime
|
|
129
131
|
const startTime = Date.now();
|
|
@@ -1458,21 +1460,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1458
1460
|
];
|
|
1459
1461
|
|
|
1460
1462
|
try {
|
|
1461
|
-
|
|
1462
|
-
const isHealthy = await isBrowserHealthy(browserInstance);
|
|
1463
|
-
if (!isHealthy) {
|
|
1464
|
-
if (forceDebug) {
|
|
1465
|
-
console.log(formatLogMessage('debug', `Browser health degraded before processing ${currentUrl} - forcing immediate restart`));
|
|
1466
|
-
}
|
|
1467
|
-
// Return special code to trigger immediate browser restart
|
|
1468
|
-
return {
|
|
1469
|
-
url: currentUrl,
|
|
1470
|
-
rules: [],
|
|
1471
|
-
success: false,
|
|
1472
|
-
needsImmediateRestart: true,
|
|
1473
|
-
error: 'Browser health degraded - restart required'
|
|
1474
|
-
};
|
|
1475
|
-
}
|
|
1463
|
+
|
|
1476
1464
|
// Check for Protocol timeout errors that indicate browser is broken
|
|
1477
1465
|
if (browserInstance.process() && browserInstance.process().killed) {
|
|
1478
1466
|
throw new Error('Browser process was killed - restart required');
|
|
@@ -1484,22 +1472,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1484
1472
|
throw new Error('Failed to create valid page instance');
|
|
1485
1473
|
}
|
|
1486
1474
|
|
|
1487
|
-
|
|
1488
|
-
const stillHealthy = await isQuicklyResponsive(browserInstance, 3000);
|
|
1489
|
-
|
|
1490
|
-
if (!stillHealthy) {
|
|
1491
|
-
if (forceDebug) {
|
|
1492
|
-
console.log(formatLogMessage('debug', `Browser unresponsive during page setup for ${currentUrl} - triggering restart`));
|
|
1493
|
-
}
|
|
1494
|
-
return {
|
|
1495
|
-
url: currentUrl,
|
|
1496
|
-
rules: [],
|
|
1497
|
-
success: false,
|
|
1498
|
-
needsImmediateRestart: true,
|
|
1499
|
-
error: 'Browser became unresponsive during page setup - restart required'
|
|
1500
|
-
};
|
|
1501
|
-
}
|
|
1502
|
-
|
|
1475
|
+
|
|
1503
1476
|
// Set aggressive timeouts for problematic operations
|
|
1504
1477
|
// Optimized timeouts for Puppeteer 23.x responsiveness
|
|
1505
1478
|
page.setDefaultTimeout(Math.min(timeout, TIMEOUTS.DEFAULT_PAGE_REDUCED));
|
|
@@ -1580,6 +1553,8 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1580
1553
|
// --- START: evaluateOnNewDocument for Fetch/XHR Interception (Moved and Fixed) ---
|
|
1581
1554
|
// This script is injected if --eval-on-doc is used or siteConfig.evaluateOnNewDocument is true.
|
|
1582
1555
|
const shouldInjectEvalForPage = siteConfig.evaluateOnNewDocument === true || globalEvalOnDoc;
|
|
1556
|
+
let evalOnDocSuccess = false; // Track injection success for fallback logic
|
|
1557
|
+
|
|
1583
1558
|
if (shouldInjectEvalForPage) {
|
|
1584
1559
|
if (forceDebug) {
|
|
1585
1560
|
if (globalEvalOnDoc) {
|
|
@@ -1588,24 +1563,29 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1588
1563
|
console.log(formatLogMessage('debug', `[evalOnDoc] Site-specific Fetch/XHR interception enabled for: ${currentUrl}`));
|
|
1589
1564
|
}
|
|
1590
1565
|
}
|
|
1591
|
-
|
|
1566
|
+
|
|
1567
|
+
// Strategy 1: Try full injection with health check
|
|
1592
1568
|
let browserResponsive = false;
|
|
1593
1569
|
try {
|
|
1594
1570
|
await Promise.race([
|
|
1595
1571
|
browserInstance.version(), // Quick responsiveness test
|
|
1596
1572
|
new Promise((_, reject) =>
|
|
1597
|
-
setTimeout(() => reject(new Error('Browser health check timeout')),
|
|
1573
|
+
setTimeout(() => reject(new Error('Browser health check timeout')), 3000)
|
|
1598
1574
|
)
|
|
1599
1575
|
]);
|
|
1600
1576
|
browserResponsive = true;
|
|
1601
1577
|
} catch (healthErr) {
|
|
1602
|
-
|
|
1578
|
+
if (forceDebug) {
|
|
1579
|
+
console.log(formatLogMessage('debug', `[evalOnDoc] Browser health check failed: ${healthErr.message}`));
|
|
1580
|
+
}
|
|
1603
1581
|
browserResponsive = false;
|
|
1604
1582
|
}
|
|
1605
1583
|
|
|
1584
|
+
// Strategy 2: Try injection with reduced complexity if browser is responsive
|
|
1606
1585
|
if (browserResponsive) {
|
|
1607
|
-
|
|
1608
|
-
|
|
1586
|
+
try {
|
|
1587
|
+
await Promise.race([
|
|
1588
|
+
page.evaluateOnNewDocument(() => {
|
|
1609
1589
|
// Prevent infinite reload loops
|
|
1610
1590
|
let reloadCount = 0;
|
|
1611
1591
|
const MAX_RELOADS = 2;
|
|
@@ -1666,18 +1646,62 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1666
1646
|
return originalXHROpen.apply(this, arguments);
|
|
1667
1647
|
}
|
|
1668
1648
|
};
|
|
1669
|
-
})
|
|
1670
|
-
|
|
1671
|
-
|
|
1672
|
-
|
|
1673
|
-
|
|
1674
|
-
|
|
1649
|
+
}),
|
|
1650
|
+
new Promise((_, reject) =>
|
|
1651
|
+
setTimeout(() => reject(new Error('Injection timeout')), 8000)
|
|
1652
|
+
)
|
|
1653
|
+
]);
|
|
1654
|
+
evalOnDocSuccess = true;
|
|
1655
|
+
if (forceDebug) {
|
|
1656
|
+
console.log(formatLogMessage('debug', `[evalOnDoc] Full injection successful for ${currentUrl}`));
|
|
1675
1657
|
}
|
|
1676
|
-
|
|
1658
|
+
} catch (fullInjectionErr) {
|
|
1659
|
+
if (forceDebug) {
|
|
1660
|
+
console.log(formatLogMessage('debug', `[evalOnDoc] Full injection failed: ${fullInjectionErr.message}, trying simplified fallback`));
|
|
1661
|
+
}
|
|
1662
|
+
|
|
1663
|
+
// Strategy 3: Fallback - Try minimal injection (just fetch monitoring)
|
|
1664
|
+
try {
|
|
1665
|
+
await Promise.race([
|
|
1666
|
+
page.evaluateOnNewDocument(() => {
|
|
1667
|
+
// Minimal injection - just fetch monitoring
|
|
1668
|
+
if (window.fetch) {
|
|
1669
|
+
const originalFetch = window.fetch;
|
|
1670
|
+
window.fetch = (...args) => {
|
|
1671
|
+
try {
|
|
1672
|
+
console.log('[evalOnDoc][fetch-minimal]', args[0]);
|
|
1673
|
+
return originalFetch.apply(this, args);
|
|
1674
|
+
} catch (err) {
|
|
1675
|
+
return originalFetch.apply(this, args);
|
|
1676
|
+
}
|
|
1677
|
+
};
|
|
1678
|
+
}
|
|
1679
|
+
}),
|
|
1680
|
+
new Promise((_, reject) =>
|
|
1681
|
+
setTimeout(() => reject(new Error('Minimal injection timeout')), 3000)
|
|
1682
|
+
)
|
|
1683
|
+
]);
|
|
1684
|
+
evalOnDocSuccess = true;
|
|
1685
|
+
if (forceDebug) {
|
|
1686
|
+
console.log(formatLogMessage('debug', `[evalOnDoc] Minimal injection successful for ${currentUrl}`));
|
|
1687
|
+
}
|
|
1688
|
+
} catch (minimalInjectionErr) {
|
|
1689
|
+
if (forceDebug) {
|
|
1690
|
+
console.log(formatLogMessage('debug', `[evalOnDoc] Minimal injection also failed: ${minimalInjectionErr.message}`));
|
|
1691
|
+
}
|
|
1692
|
+
evalOnDocSuccess = false;
|
|
1693
|
+
}
|
|
1694
|
+
}
|
|
1677
1695
|
} else {
|
|
1678
1696
|
if (forceDebug) {
|
|
1679
|
-
console.log(formatLogMessage('debug', `[evalOnDoc]
|
|
1697
|
+
console.log(formatLogMessage('debug', `[evalOnDoc] Browser unresponsive, skipping injection for ${currentUrl}`));
|
|
1680
1698
|
}
|
|
1699
|
+
evalOnDocSuccess = false;
|
|
1700
|
+
}
|
|
1701
|
+
|
|
1702
|
+
// Final status logging
|
|
1703
|
+
if (!evalOnDocSuccess) {
|
|
1704
|
+
console.warn(formatLogMessage('warn', `[evalOnDoc] All injection strategies failed for ${currentUrl} - continuing with standard request monitoring only`));
|
|
1681
1705
|
}
|
|
1682
1706
|
}
|
|
1683
1707
|
// --- END: evaluateOnNewDocument for Fetch/XHR Interception ---
|
|
@@ -1825,6 +1849,18 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
1825
1849
|
console.log(formatLogMessage('debug', `Using grep: ${grepCheck.version}`));
|
|
1826
1850
|
}
|
|
1827
1851
|
}
|
|
1852
|
+
|
|
1853
|
+
// Validate curl availability if needed
|
|
1854
|
+
if (useCurl) {
|
|
1855
|
+
const curlCheck = validateCurlAvailability();
|
|
1856
|
+
if (!curlCheck.isAvailable) {
|
|
1857
|
+
console.warn(formatLogMessage('warn', `Curl not available for ${currentUrl}: ${curlCheck.error}. Skipping curl-based analysis.`));
|
|
1858
|
+
useCurl = false;
|
|
1859
|
+
useGrep = false; // Grep requires curl
|
|
1860
|
+
} else if (forceDebug) {
|
|
1861
|
+
console.log(formatLogMessage('debug', `Using curl: ${curlCheck.version}`));
|
|
1862
|
+
}
|
|
1863
|
+
}
|
|
1828
1864
|
|
|
1829
1865
|
// Parse whois and dig terms
|
|
1830
1866
|
const whoisTerms = siteConfig.whois && Array.isArray(siteConfig.whois) ? siteConfig.whois : null;
|
|
@@ -2473,9 +2509,9 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2473
2509
|
// Use grep handler if both grep and searchstring/searchstring_and are enabled
|
|
2474
2510
|
if (useGrep && (hasSearchString || hasSearchStringAnd)) {
|
|
2475
2511
|
const grepHandler = createGrepHandler({
|
|
2476
|
-
searchStrings,
|
|
2477
|
-
searchStringsAnd,
|
|
2478
2512
|
regexes,
|
|
2513
|
+
searchStrings,
|
|
2514
|
+
searchStringsAnd,
|
|
2479
2515
|
matchedDomains,
|
|
2480
2516
|
addMatchedDomain, // Pass the helper function
|
|
2481
2517
|
isDomainAlreadyDetected,
|
|
@@ -2496,8 +2532,7 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2496
2532
|
forceDebug,
|
|
2497
2533
|
userAgent: curlUserAgent,
|
|
2498
2534
|
resourceType,
|
|
2499
|
-
hasSearchString,
|
|
2500
|
-
hasSearchStringAnd,
|
|
2535
|
+
hasSearchString: hasSearchString || hasSearchStringAnd,
|
|
2501
2536
|
grepOptions: {
|
|
2502
2537
|
ignoreCase: true,
|
|
2503
2538
|
wholeWord: false,
|
|
@@ -2508,20 +2543,20 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2508
2543
|
setImmediate(() => grepHandler(reqUrl));
|
|
2509
2544
|
} else {
|
|
2510
2545
|
// Use regular curl handler
|
|
2511
|
-
const
|
|
2546
|
+
const curlHandlerFromCurlModule = createCurlModuleHandler({
|
|
2512
2547
|
searchStrings,
|
|
2513
2548
|
searchStringsAnd,
|
|
2514
2549
|
hasSearchStringAnd,
|
|
2515
2550
|
regexes,
|
|
2516
2551
|
matchedDomains,
|
|
2517
|
-
addMatchedDomain,
|
|
2552
|
+
addMatchedDomain,
|
|
2518
2553
|
isDomainAlreadyDetected,
|
|
2519
|
-
|
|
2520
|
-
|
|
2521
|
-
|
|
2522
|
-
|
|
2523
|
-
|
|
2524
|
-
|
|
2554
|
+
onContentFetched: smartCache && !ignoreCache ? (url, content) => {
|
|
2555
|
+
// Only cache if not bypassing cache
|
|
2556
|
+
if (!shouldBypassCacheForUrl(url, siteConfig)) {
|
|
2557
|
+
smartCache.cacheRequest(url, { method: 'GET', siteConfig }, { body: content, status: 200 });
|
|
2558
|
+
}
|
|
2559
|
+
} : undefined,
|
|
2525
2560
|
currentUrl,
|
|
2526
2561
|
perSiteSubDomains,
|
|
2527
2562
|
ignoreDomains,
|
|
@@ -2533,10 +2568,10 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
2533
2568
|
forceDebug,
|
|
2534
2569
|
userAgent: curlUserAgent,
|
|
2535
2570
|
resourceType,
|
|
2536
|
-
hasSearchString
|
|
2571
|
+
hasSearchString: hasSearchString || hasSearchStringAnd
|
|
2537
2572
|
});
|
|
2538
2573
|
|
|
2539
|
-
setImmediate(() =>
|
|
2574
|
+
setImmediate(() => curlHandlerFromCurlModule(reqUrl));
|
|
2540
2575
|
}
|
|
2541
2576
|
} catch (curlErr) {
|
|
2542
2577
|
if (forceDebug) {
|
|
@@ -3119,20 +3154,31 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3119
3154
|
const batchEnd = Math.min(batchStart + RESOURCE_CLEANUP_INTERVAL, totalUrls);
|
|
3120
3155
|
const currentBatch = allTasks.slice(batchStart, batchEnd);
|
|
3121
3156
|
|
|
3122
|
-
//
|
|
3123
|
-
|
|
3124
|
-
|
|
3125
|
-
|
|
3126
|
-
|
|
3127
|
-
|
|
3128
|
-
|
|
3129
|
-
|
|
3130
|
-
|
|
3131
|
-
|
|
3132
|
-
|
|
3133
|
-
|
|
3134
|
-
|
|
3135
|
-
|
|
3157
|
+
// IMPROVED: Only check health if we have indicators of problems
|
|
3158
|
+
let healthCheck = { shouldRestart: false, reason: null };
|
|
3159
|
+
const recentResults = results.slice(-8); // Check more results for better pattern detection
|
|
3160
|
+
const recentFailureRate = recentResults.length > 0 ?
|
|
3161
|
+
recentResults.filter(r => !r.success).length / recentResults.length : 0;
|
|
3162
|
+
const hasHighFailureRate = recentFailureRate > 0.75; // 75% failure threshold (more conservative)
|
|
3163
|
+
const hasCriticalErrors = recentResults.filter(r => r.needsImmediateRestart).length > 2;
|
|
3164
|
+
|
|
3165
|
+
// Only run health checks when we have STRONG indicators of problems
|
|
3166
|
+
if (urlsSinceLastCleanup > 15 && (
|
|
3167
|
+
(hasHighFailureRate && recentResults.length >= 5) || // Need sufficient sample size
|
|
3168
|
+
hasCriticalErrors ||
|
|
3169
|
+
urlsSinceLastCleanup > RESOURCE_CLEANUP_INTERVAL * 0.9 // Very close to cleanup limit
|
|
3170
|
+
)) {
|
|
3171
|
+
healthCheck = await monitorBrowserHealth(browser, {}, {
|
|
3172
|
+
siteIndex: Math.floor(batchStart / RESOURCE_CLEANUP_INTERVAL),
|
|
3173
|
+
totalSites: Math.ceil(totalUrls / RESOURCE_CLEANUP_INTERVAL),
|
|
3174
|
+
urlsSinceCleanup: urlsSinceLastCleanup,
|
|
3175
|
+
cleanupInterval: RESOURCE_CLEANUP_INTERVAL,
|
|
3176
|
+
forceDebug,
|
|
3177
|
+
silentMode
|
|
3178
|
+
});
|
|
3179
|
+
} else if (forceDebug && urlsSinceLastCleanup > 10) {
|
|
3180
|
+
console.log(formatLogMessage('debug', `Skipping health check: failure rate ${Math.round(recentFailureRate * 100)}%, critical errors: ${hasCriticalErrors ? 'yes' : 'no'}`));
|
|
3181
|
+
}
|
|
3136
3182
|
|
|
3137
3183
|
const batchSize = currentBatch.length;
|
|
3138
3184
|
|
|
@@ -3142,17 +3188,21 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3142
3188
|
// Check if processing this entire site would exceed cleanup interval OR health check suggests restart
|
|
3143
3189
|
const wouldExceedLimit = urlsSinceLastCleanup + batchSize >= Math.min(RESOURCE_CLEANUP_INTERVAL, 100);
|
|
3144
3190
|
const isNotLastBatch = batchEnd < totalUrls;
|
|
3191
|
+
// IMPROVED: More restrictive health-based restart conditions
|
|
3192
|
+
const shouldRestartFromHealth = healthCheck.shouldRestart &&
|
|
3193
|
+
!healthCheck.reason?.includes('Scheduled cleanup') &&
|
|
3194
|
+
(healthCheck.reason?.includes('Critical') || healthCheck.reason?.includes('disconnected'));
|
|
3145
3195
|
|
|
3146
3196
|
// Restart browser if we've processed enough URLs, health check suggests it, and this isn't the last site
|
|
3147
|
-
if ((wouldExceedLimit ||
|
|
3197
|
+
if ((wouldExceedLimit || shouldRestartFromHealth || (hasHighFailureRate && recentResults.length >= 6)) && urlsSinceLastCleanup > 8 && isNotLastBatch) {
|
|
3148
3198
|
|
|
3149
3199
|
let restartReason = 'Unknown';
|
|
3150
|
-
if (
|
|
3200
|
+
if (shouldRestartFromHealth) {
|
|
3151
3201
|
restartReason = healthCheck.reason;
|
|
3152
|
-
} else if (
|
|
3153
|
-
restartReason =
|
|
3202
|
+
} else if (hasHighFailureRate) {
|
|
3203
|
+
restartReason = `High failure rate: ${Math.round(recentFailureRate * 100)}% in recent batch`;
|
|
3154
3204
|
} else if (wouldExceedLimit) {
|
|
3155
|
-
restartReason = `Processed ${urlsSinceLastCleanup} URLs`;
|
|
3205
|
+
restartReason = `Processed ${urlsSinceLastCleanup} URLs (scheduled maintenance)`;
|
|
3156
3206
|
}
|
|
3157
3207
|
|
|
3158
3208
|
if (!silentMode) {
|
|
@@ -3220,8 +3270,18 @@ function setupFrameHandling(page, forceDebug) {
|
|
|
3220
3270
|
const batchTasks = currentBatch.map(task => originalLimit(() => processUrl(task.url, task.config, browser)));
|
|
3221
3271
|
const batchResults = await Promise.all(batchTasks);
|
|
3222
3272
|
|
|
3223
|
-
//
|
|
3224
|
-
const
|
|
3273
|
+
// IMPROVED: Much more conservative emergency restart logic
|
|
3274
|
+
const criticalRestartCount = batchResults.filter(r => r.needsImmediateRestart).length;
|
|
3275
|
+
// Require either:
|
|
3276
|
+
// - More than 50% of batch has critical errors, OR
|
|
3277
|
+
// - At least 3 critical errors in any size batch
|
|
3278
|
+
const restartThreshold = Math.max(3, Math.floor(batchSize * 0.5)); // 50% of batch or min 3
|
|
3279
|
+
const needsImmediateRestart = criticalRestartCount >= restartThreshold && criticalRestartCount >= 2;
|
|
3280
|
+
|
|
3281
|
+
// Log restart decision for debugging
|
|
3282
|
+
if (forceDebug && criticalRestartCount > 0) {
|
|
3283
|
+
console.log(formatLogMessage('debug', `Emergency restart decision: ${criticalRestartCount}/${batchSize} critical errors (threshold: ${restartThreshold}, restart: ${needsImmediateRestart ? 'YES' : 'NO'})`));
|
|
3284
|
+
}
|
|
3225
3285
|
|
|
3226
3286
|
// Log completion of concurrent processing
|
|
3227
3287
|
if (forceDebug) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fanboynz/network-scanner",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.89",
|
|
4
4
|
"description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
|
|
5
5
|
"main": "nwss.js",
|
|
6
6
|
"scripts": {
|