@fanboynz/network-scanner 1.0.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/npm-publish.yml +33 -0
- package/JSONMANUAL.md +121 -0
- package/LICENSE +674 -0
- package/README.md +357 -0
- package/config.json +74 -0
- package/lib/browserexit.js +522 -0
- package/lib/browserhealth.js +308 -0
- package/lib/cloudflare.js +660 -0
- package/lib/colorize.js +168 -0
- package/lib/compare.js +159 -0
- package/lib/compress.js +129 -0
- package/lib/fingerprint.js +613 -0
- package/lib/flowproxy.js +274 -0
- package/lib/grep.js +348 -0
- package/lib/ignore_similar.js +237 -0
- package/lib/nettools.js +1200 -0
- package/lib/output.js +633 -0
- package/lib/redirect.js +384 -0
- package/lib/searchstring.js +561 -0
- package/lib/validate_rules.js +1107 -0
- package/nwss.1 +824 -0
- package/nwss.js +2488 -0
- package/package.json +45 -0
- package/regex-samples.md +27 -0
- package/scanner-script-org.js +588 -0
package/lib/flowproxy.js
ADDED
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* FlowProxy protection detection and handling module
|
|
3
|
+
* Detects flowProxy DDoS protection and handles it appropriately for security scanning
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Cross-version compatible timeout function for Puppeteer
|
|
8
|
+
* @param {import('puppeteer').Page} page - Puppeteer page instance
|
|
9
|
+
* @param {number} timeout - Timeout in milliseconds
|
|
10
|
+
* @returns {Promise<void>}
|
|
11
|
+
*/
|
|
12
|
+
async function waitForTimeout(page, timeout) {
|
|
13
|
+
try {
|
|
14
|
+
if (typeof page.waitForTimeout === 'function') {
|
|
15
|
+
await page.waitForTimeout(timeout);
|
|
16
|
+
} else if (typeof page.waitFor === 'function') {
|
|
17
|
+
await page.waitFor(timeout);
|
|
18
|
+
} else {
|
|
19
|
+
await new Promise(resolve => setTimeout(resolve, timeout));
|
|
20
|
+
}
|
|
21
|
+
} catch (error) {
|
|
22
|
+
await new Promise(resolve => setTimeout(resolve, timeout));
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Analyzes the current page to detect flowProxy protection
|
|
28
|
+
* @param {import('puppeteer').Page} page - Puppeteer page instance
|
|
29
|
+
* @returns {Promise<object>} Detection information object
|
|
30
|
+
*/
|
|
31
|
+
async function analyzeFlowProxyProtection(page) {
|
|
32
|
+
try {
|
|
33
|
+
return await page.evaluate(() => {
|
|
34
|
+
const title = document.title || '';
|
|
35
|
+
const bodyText = document.body ? document.body.textContent : '';
|
|
36
|
+
const url = window.location.href;
|
|
37
|
+
|
|
38
|
+
// Check for flowProxy/aurologic specific indicators
|
|
39
|
+
const hasFlowProxyDomain = url.includes('aurologic') ||
|
|
40
|
+
url.includes('flowproxy') ||
|
|
41
|
+
url.includes('ddos-protection');
|
|
42
|
+
|
|
43
|
+
// Check for flowProxy challenge page indicators
|
|
44
|
+
const hasProtectionPage = title.includes('DDoS Protection') ||
|
|
45
|
+
title.includes('Please wait') ||
|
|
46
|
+
title.includes('Checking your browser') ||
|
|
47
|
+
bodyText.includes('DDoS protection by aurologic') ||
|
|
48
|
+
bodyText.includes('flowProxy') ||
|
|
49
|
+
bodyText.includes('Verifying your browser');
|
|
50
|
+
|
|
51
|
+
// Check for specific flowProxy elements
|
|
52
|
+
const hasFlowProxyElements = document.querySelector('[data-flowproxy]') !== null ||
|
|
53
|
+
document.querySelector('.flowproxy-challenge') !== null ||
|
|
54
|
+
document.querySelector('#flowproxy-container') !== null ||
|
|
55
|
+
document.querySelector('.aurologic-protection') !== null;
|
|
56
|
+
|
|
57
|
+
// Check for challenge indicators
|
|
58
|
+
const hasChallengeElements = document.querySelector('.challenge-running') !== null ||
|
|
59
|
+
document.querySelector('.verification-container') !== null ||
|
|
60
|
+
document.querySelector('input[name="flowproxy-response"]') !== null;
|
|
61
|
+
|
|
62
|
+
// Check for rate limiting indicators
|
|
63
|
+
const isRateLimited = bodyText.includes('Rate limited') ||
|
|
64
|
+
bodyText.includes('Too many requests') ||
|
|
65
|
+
bodyText.includes('Please try again later') ||
|
|
66
|
+
title.includes('429') ||
|
|
67
|
+
title.includes('Rate Limit');
|
|
68
|
+
|
|
69
|
+
// Check for JavaScript challenge indicators
|
|
70
|
+
const hasJSChallenge = document.querySelector('script[src*="flowproxy"]') !== null ||
|
|
71
|
+
document.querySelector('script[src*="aurologic"]') !== null ||
|
|
72
|
+
bodyText.includes('JavaScript is required') ||
|
|
73
|
+
bodyText.includes('Please enable JavaScript');
|
|
74
|
+
|
|
75
|
+
// Check for loading/processing indicators
|
|
76
|
+
const isProcessing = bodyText.includes('Processing') ||
|
|
77
|
+
bodyText.includes('Loading') ||
|
|
78
|
+
document.querySelector('.loading-spinner') !== null ||
|
|
79
|
+
document.querySelector('.processing-indicator') !== null;
|
|
80
|
+
|
|
81
|
+
const isFlowProxyDetected = hasFlowProxyDomain ||
|
|
82
|
+
hasProtectionPage ||
|
|
83
|
+
hasFlowProxyElements ||
|
|
84
|
+
hasChallengeElements;
|
|
85
|
+
|
|
86
|
+
return {
|
|
87
|
+
isFlowProxyDetected,
|
|
88
|
+
hasFlowProxyDomain,
|
|
89
|
+
hasProtectionPage,
|
|
90
|
+
hasFlowProxyElements,
|
|
91
|
+
hasChallengeElements,
|
|
92
|
+
isRateLimited,
|
|
93
|
+
hasJSChallenge,
|
|
94
|
+
isProcessing,
|
|
95
|
+
title,
|
|
96
|
+
url,
|
|
97
|
+
bodySnippet: bodyText.substring(0, 200) // First 200 chars for debugging
|
|
98
|
+
};
|
|
99
|
+
});
|
|
100
|
+
} catch (error) {
|
|
101
|
+
return {
|
|
102
|
+
isFlowProxyDetected: false,
|
|
103
|
+
hasFlowProxyDomain: false,
|
|
104
|
+
hasProtectionPage: false,
|
|
105
|
+
hasFlowProxyElements: false,
|
|
106
|
+
hasChallengeElements: false,
|
|
107
|
+
isRateLimited: false,
|
|
108
|
+
hasJSChallenge: false,
|
|
109
|
+
isProcessing: false,
|
|
110
|
+
error: error.message
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Handles flowProxy protection by implementing appropriate delays and retry logic
|
|
117
|
+
* @param {import('puppeteer').Page} page - Puppeteer page instance
|
|
118
|
+
* @param {string} currentUrl - Current URL being processed
|
|
119
|
+
* @param {object} siteConfig - Site configuration object
|
|
120
|
+
* @param {boolean} forceDebug - Debug mode flag
|
|
121
|
+
* @returns {Promise<object>} Result object with handling details
|
|
122
|
+
*/
|
|
123
|
+
async function handleFlowProxyProtection(page, currentUrl, siteConfig, forceDebug = false) {
|
|
124
|
+
const result = {
|
|
125
|
+
flowProxyDetection: { attempted: false, detected: false },
|
|
126
|
+
handlingResult: { attempted: false, success: false },
|
|
127
|
+
overallSuccess: true,
|
|
128
|
+
errors: [],
|
|
129
|
+
warnings: []
|
|
130
|
+
};
|
|
131
|
+
|
|
132
|
+
try {
|
|
133
|
+
if (forceDebug) console.log(`[debug][flowproxy] Checking for flowProxy protection on ${currentUrl}`);
|
|
134
|
+
|
|
135
|
+
// Wait a moment for the page to load
|
|
136
|
+
await waitForTimeout(page, 2000);
|
|
137
|
+
|
|
138
|
+
const detectionInfo = await analyzeFlowProxyProtection(page);
|
|
139
|
+
result.flowProxyDetection = {
|
|
140
|
+
attempted: true,
|
|
141
|
+
detected: detectionInfo.isFlowProxyDetected,
|
|
142
|
+
details: detectionInfo
|
|
143
|
+
};
|
|
144
|
+
|
|
145
|
+
if (detectionInfo.isFlowProxyDetected) {
|
|
146
|
+
result.handlingResult.attempted = true;
|
|
147
|
+
|
|
148
|
+
if (forceDebug) {
|
|
149
|
+
console.log(`[debug][flowproxy] FlowProxy protection detected on ${currentUrl}:`);
|
|
150
|
+
console.log(`[debug][flowproxy] Page Title: "${detectionInfo.title}"`);
|
|
151
|
+
console.log(`[debug][flowproxy] Current URL: ${detectionInfo.url}`);
|
|
152
|
+
console.log(`[debug][flowproxy] Has Protection Page: ${detectionInfo.hasProtectionPage}`);
|
|
153
|
+
console.log(`[debug][flowproxy] Has Challenge Elements: ${detectionInfo.hasChallengeElements}`);
|
|
154
|
+
console.log(`[debug][flowproxy] Is Rate Limited: ${detectionInfo.isRateLimited}`);
|
|
155
|
+
console.log(`[debug][flowproxy] Has JS Challenge: ${detectionInfo.hasJSChallenge}`);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// Handle rate limiting
|
|
159
|
+
if (detectionInfo.isRateLimited) {
|
|
160
|
+
const rateLimitDelay = siteConfig.flowproxy_delay || 30000; // 30 second default
|
|
161
|
+
result.warnings.push(`Rate limiting detected - implementing ${rateLimitDelay}ms delay`);
|
|
162
|
+
if (forceDebug) console.log(`[debug][flowproxy] Rate limiting detected, waiting ${rateLimitDelay}ms`);
|
|
163
|
+
await waitForTimeout(page, rateLimitDelay);
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// Handle JavaScript challenges by waiting for completion
|
|
167
|
+
if (detectionInfo.hasJSChallenge || detectionInfo.isProcessing) {
|
|
168
|
+
const jsWaitTime = siteConfig.flowproxy_js_timeout || 15000; // 15 second default
|
|
169
|
+
if (forceDebug) console.log(`[debug][flowproxy] JavaScript challenge detected, waiting up to ${jsWaitTime}ms for completion`);
|
|
170
|
+
|
|
171
|
+
try {
|
|
172
|
+
// Wait for challenge to complete or timeout
|
|
173
|
+
await page.waitForFunction(
|
|
174
|
+
() => {
|
|
175
|
+
const bodyText = document.body ? document.body.textContent : '';
|
|
176
|
+
return !bodyText.includes('Processing') &&
|
|
177
|
+
!bodyText.includes('Checking your browser') &&
|
|
178
|
+
!bodyText.includes('Please wait') &&
|
|
179
|
+
!document.querySelector('.loading-spinner') &&
|
|
180
|
+
!document.querySelector('.processing-indicator');
|
|
181
|
+
},
|
|
182
|
+
{ timeout: jsWaitTime }
|
|
183
|
+
);
|
|
184
|
+
|
|
185
|
+
if (forceDebug) console.log(`[debug][flowproxy] JavaScript challenge appears to have completed`);
|
|
186
|
+
} catch (timeoutErr) {
|
|
187
|
+
result.warnings.push(`JavaScript challenge timeout after ${jsWaitTime}ms`);
|
|
188
|
+
if (forceDebug) console.log(`[debug][flowproxy] JavaScript challenge timeout - continuing anyway`);
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// Implement additional delay for flowProxy processing
|
|
193
|
+
const additionalDelay = siteConfig.flowproxy_additional_delay || 5000; // 5 second default
|
|
194
|
+
if (forceDebug) console.log(`[debug][flowproxy] Implementing additional ${additionalDelay}ms delay for flowProxy processing`);
|
|
195
|
+
await waitForTimeout(page, additionalDelay);
|
|
196
|
+
|
|
197
|
+
// Check if we're still on a protection page
|
|
198
|
+
const finalCheck = await analyzeFlowProxyProtection(page);
|
|
199
|
+
if (finalCheck.isFlowProxyDetected && finalCheck.hasProtectionPage) {
|
|
200
|
+
result.warnings.push('Still on flowProxy protection page after handling attempts');
|
|
201
|
+
if (forceDebug) console.log(`[debug][flowproxy] Warning: Still appears to be on protection page`);
|
|
202
|
+
} else {
|
|
203
|
+
result.handlingResult.success = true;
|
|
204
|
+
if (forceDebug) console.log(`[debug][flowproxy] Successfully handled flowProxy protection for ${currentUrl}`);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
} else {
|
|
208
|
+
if (forceDebug) console.log(`[debug][flowproxy] No flowProxy protection detected on ${currentUrl}`);
|
|
209
|
+
result.overallSuccess = true; // No protection to handle
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
} catch (error) {
|
|
213
|
+
result.errors.push(`FlowProxy handling error: ${error.message}`);
|
|
214
|
+
result.overallSuccess = false;
|
|
215
|
+
if (forceDebug) {
|
|
216
|
+
console.log(`[debug][flowproxy] FlowProxy handling failed for ${currentUrl}:`);
|
|
217
|
+
console.log(`[debug][flowproxy] Error: ${error.message}`);
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
// Log overall result
|
|
222
|
+
if (result.errors.length > 0 && forceDebug) {
|
|
223
|
+
console.log(`[debug][flowproxy] FlowProxy handling completed with errors for ${currentUrl}:`);
|
|
224
|
+
result.errors.forEach(error => {
|
|
225
|
+
console.log(`[debug][flowproxy] - ${error}`);
|
|
226
|
+
});
|
|
227
|
+
} else if (result.warnings.length > 0 && forceDebug) {
|
|
228
|
+
console.log(`[debug][flowproxy] FlowProxy handling completed with warnings for ${currentUrl}:`);
|
|
229
|
+
result.warnings.forEach(warning => {
|
|
230
|
+
console.log(`[debug][flowproxy] - ${warning}`);
|
|
231
|
+
});
|
|
232
|
+
} else if (result.flowProxyDetection.attempted && forceDebug) {
|
|
233
|
+
console.log(`[debug][flowproxy] FlowProxy handling completed successfully for ${currentUrl}`);
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
return result;
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
/**
|
|
240
|
+
* Checks if the current page might be behind flowProxy protection
|
|
241
|
+
* @param {import('puppeteer').Page} page - Puppeteer page instance
|
|
242
|
+
* @returns {Promise<boolean>} True if flowProxy protection is suspected
|
|
243
|
+
*/
|
|
244
|
+
async function isFlowProxyProtected(page) {
|
|
245
|
+
try {
|
|
246
|
+
const detection = await analyzeFlowProxyProtection(page);
|
|
247
|
+
return detection.isFlowProxyDetected;
|
|
248
|
+
} catch (error) {
|
|
249
|
+
return false;
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
/**
|
|
254
|
+
* Gets recommended timeout values for flowProxy protected sites
|
|
255
|
+
* @param {object} siteConfig - Site configuration object
|
|
256
|
+
* @returns {object} Recommended timeout values
|
|
257
|
+
*/
|
|
258
|
+
function getFlowProxyTimeouts(siteConfig) {
|
|
259
|
+
return {
|
|
260
|
+
pageTimeout: siteConfig.flowproxy_page_timeout || 45000, // 45 seconds
|
|
261
|
+
navigationTimeout: siteConfig.flowproxy_nav_timeout || 45000, // 45 seconds
|
|
262
|
+
challengeTimeout: siteConfig.flowproxy_js_timeout || 15000, // 15 seconds
|
|
263
|
+
rateLimit: siteConfig.flowproxy_delay || 30000, // 30 seconds
|
|
264
|
+
additionalDelay: siteConfig.flowproxy_additional_delay || 5000 // 5 seconds
|
|
265
|
+
};
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
module.exports = {
|
|
269
|
+
analyzeFlowProxyProtection,
|
|
270
|
+
handleFlowProxyProtection,
|
|
271
|
+
isFlowProxyProtected,
|
|
272
|
+
getFlowProxyTimeouts,
|
|
273
|
+
waitForTimeout
|
|
274
|
+
};
|
package/lib/grep.js
ADDED
|
@@ -0,0 +1,348 @@
|
|
|
1
|
+
// === grep.js - Grep-based Content Search Module ===
|
|
2
|
+
// Alternative to searchstring.js using grep for pattern matching
|
|
3
|
+
|
|
4
|
+
const fs = require('fs');
|
|
5
|
+
const { spawnSync } = require('child_process');
|
|
6
|
+
const path = require('path');
|
|
7
|
+
const os = require('os');
|
|
8
|
+
const { colorize, colors, messageColors, tags, formatLogMessage } = require('./colorize');
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Creates a temporary file with content for grep processing
|
|
12
|
+
* @param {string} content - The content to write to temp file
|
|
13
|
+
* @param {string} prefix - Prefix for temp filename
|
|
14
|
+
* @returns {string} Path to the created temporary file
|
|
15
|
+
*/
|
|
16
|
+
function createTempFile(content, prefix = 'scanner_grep') {
|
|
17
|
+
const tempDir = os.tmpdir();
|
|
18
|
+
const tempFile = path.join(tempDir, `${prefix}_${Date.now()}_${Math.random().toString(36).substr(2, 9)}.tmp`);
|
|
19
|
+
|
|
20
|
+
try {
|
|
21
|
+
fs.writeFileSync(tempFile, content, 'utf8');
|
|
22
|
+
return tempFile;
|
|
23
|
+
} catch (error) {
|
|
24
|
+
throw new Error(`Failed to create temp file: ${error.message}`);
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Searches content using grep with the provided patterns
|
|
30
|
+
* @param {string} content - The content to search
|
|
31
|
+
* @param {Array<string>} searchPatterns - Array of grep patterns to search for
|
|
32
|
+
* @param {object} options - Grep options
|
|
33
|
+
* @returns {Promise<object>} Object with found boolean, matchedPattern, and allMatches array
|
|
34
|
+
*/
|
|
35
|
+
async function grepContent(content, searchPatterns, options = {}) {
|
|
36
|
+
const {
|
|
37
|
+
ignoreCase = true,
|
|
38
|
+
wholeWord = false,
|
|
39
|
+
regex = false,
|
|
40
|
+
maxMatches = 1000
|
|
41
|
+
} = options;
|
|
42
|
+
|
|
43
|
+
if (!content || searchPatterns.length === 0) {
|
|
44
|
+
return { found: false, matchedPattern: null, allMatches: [] };
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
let tempFile = null;
|
|
48
|
+
|
|
49
|
+
try {
|
|
50
|
+
// Create temporary file with content
|
|
51
|
+
tempFile = createTempFile(content, 'grep_search');
|
|
52
|
+
|
|
53
|
+
const allMatches = [];
|
|
54
|
+
let firstMatch = null;
|
|
55
|
+
|
|
56
|
+
for (const pattern of searchPatterns) {
|
|
57
|
+
if (!pattern || pattern.trim().length === 0) continue;
|
|
58
|
+
|
|
59
|
+
const grepArgs = [
|
|
60
|
+
'--text', // Treat file as text
|
|
61
|
+
'--color=never', // Disable color output
|
|
62
|
+
];
|
|
63
|
+
|
|
64
|
+
if (ignoreCase) grepArgs.push('-i');
|
|
65
|
+
if (wholeWord) grepArgs.push('-w');
|
|
66
|
+
if (!regex) grepArgs.push('-F'); // Fixed strings (literal)
|
|
67
|
+
|
|
68
|
+
// Add pattern and file
|
|
69
|
+
grepArgs.push(pattern, tempFile);
|
|
70
|
+
|
|
71
|
+
try {
|
|
72
|
+
const result = spawnSync('grep', grepArgs, {
|
|
73
|
+
encoding: 'utf8',
|
|
74
|
+
timeout: 10000, // 10 second timeout
|
|
75
|
+
maxBuffer: 1024 * 1024 // 1MB max buffer
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
// grep returns 0 if found, 1 if not found, 2+ for errors
|
|
79
|
+
if (result.status === 0 && result.stdout) {
|
|
80
|
+
allMatches.push({
|
|
81
|
+
pattern: pattern,
|
|
82
|
+
matches: result.stdout.split('\n').filter(line => line.trim().length > 0).slice(0, maxMatches)
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
if (!firstMatch) {
|
|
86
|
+
firstMatch = pattern;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
} catch (grepErr) {
|
|
91
|
+
// Continue with next pattern if this one fails
|
|
92
|
+
console.warn(formatLogMessage('warn', `[grep] Pattern "${pattern}" failed: ${grepErr.message}`));
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
return {
|
|
97
|
+
found: allMatches.length > 0,
|
|
98
|
+
matchedPattern: firstMatch,
|
|
99
|
+
allMatches: allMatches
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
} catch (error) {
|
|
103
|
+
throw new Error(`Grep search failed: ${error.message}`);
|
|
104
|
+
} finally {
|
|
105
|
+
// Clean up temporary file
|
|
106
|
+
if (tempFile) {
|
|
107
|
+
try {
|
|
108
|
+
fs.unlinkSync(tempFile);
|
|
109
|
+
} catch (cleanupErr) {
|
|
110
|
+
console.warn(formatLogMessage('warn', `[grep] Failed to cleanup temp file ${tempFile}: ${cleanupErr.message}`));
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* Downloads content using curl and searches with grep
|
|
118
|
+
* @param {string} url - The URL to download
|
|
119
|
+
* @param {Array<string>} searchPatterns - Grep patterns to search for
|
|
120
|
+
* @param {string} userAgent - User agent string to use
|
|
121
|
+
* @param {object} grepOptions - Grep search options
|
|
122
|
+
* @param {number} timeout - Timeout in seconds (default: 30)
|
|
123
|
+
* @returns {Promise<object>} Object with found boolean, matchedPattern, and content
|
|
124
|
+
*/
|
|
125
|
+
async function downloadAndGrep(url, searchPatterns, userAgent = '', grepOptions = {}, timeout = 30) {
|
|
126
|
+
try {
|
|
127
|
+
const curlArgs = [
|
|
128
|
+
'-s', // Silent mode
|
|
129
|
+
'-L', // Follow redirects
|
|
130
|
+
'--max-time', timeout.toString(),
|
|
131
|
+
'--max-redirs', '5',
|
|
132
|
+
'--fail-with-body', // Return body even on HTTP errors
|
|
133
|
+
'--compressed', // Accept compressed responses
|
|
134
|
+
];
|
|
135
|
+
|
|
136
|
+
if (userAgent) {
|
|
137
|
+
curlArgs.push('-H', `User-Agent: ${userAgent}`);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Add common headers to appear more browser-like
|
|
141
|
+
curlArgs.push(
|
|
142
|
+
'-H', 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
143
|
+
'-H', 'Accept-Language: en-US,en;q=0.5',
|
|
144
|
+
'-H', 'Accept-Encoding: gzip, deflate',
|
|
145
|
+
'-H', 'Connection: keep-alive',
|
|
146
|
+
'-H', 'Upgrade-Insecure-Requests: 1'
|
|
147
|
+
);
|
|
148
|
+
|
|
149
|
+
curlArgs.push(url);
|
|
150
|
+
|
|
151
|
+
// Download content with curl
|
|
152
|
+
const curlResult = spawnSync('curl', curlArgs, {
|
|
153
|
+
encoding: 'utf8',
|
|
154
|
+
timeout: timeout * 1000,
|
|
155
|
+
maxBuffer: 10 * 1024 * 1024 // 10MB max buffer
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
if (curlResult.error) {
|
|
159
|
+
throw curlResult.error;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
if (curlResult.status !== 0) {
|
|
163
|
+
throw new Error(`Curl exited with status ${curlResult.status}: ${curlResult.stderr}`);
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
const content = curlResult.stdout;
|
|
167
|
+
|
|
168
|
+
// Search content with grep
|
|
169
|
+
const grepResult = await grepContent(content, searchPatterns, grepOptions);
|
|
170
|
+
|
|
171
|
+
return {
|
|
172
|
+
found: grepResult.found,
|
|
173
|
+
matchedPattern: grepResult.matchedPattern,
|
|
174
|
+
allMatches: grepResult.allMatches,
|
|
175
|
+
content: content,
|
|
176
|
+
contentLength: content.length
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
} catch (error) {
|
|
180
|
+
throw new Error(`Download and grep failed for ${url}: ${error.message}`);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Creates a grep-based URL handler for downloading and searching content
|
|
186
|
+
* @param {object} config - Configuration object containing all necessary parameters
|
|
187
|
+
* @returns {Function} URL handler function for grep-based content analysis
|
|
188
|
+
*/
|
|
189
|
+
function createGrepHandler(config) {
|
|
190
|
+
const {
|
|
191
|
+
searchStrings,
|
|
192
|
+
regexes,
|
|
193
|
+
matchedDomains,
|
|
194
|
+
currentUrl,
|
|
195
|
+
perSiteSubDomains,
|
|
196
|
+
ignoreDomains,
|
|
197
|
+
matchesIgnoreDomain,
|
|
198
|
+
getRootDomain,
|
|
199
|
+
siteConfig,
|
|
200
|
+
dumpUrls,
|
|
201
|
+
matchedUrlsLogFile,
|
|
202
|
+
forceDebug,
|
|
203
|
+
userAgent,
|
|
204
|
+
hasSearchString,
|
|
205
|
+
grepOptions = {}
|
|
206
|
+
} = config;
|
|
207
|
+
|
|
208
|
+
return async function grepHandler(requestUrl) {
|
|
209
|
+
const respDomain = perSiteSubDomains ? (new URL(requestUrl)).hostname : getRootDomain(requestUrl);
|
|
210
|
+
|
|
211
|
+
// Only process URLs that match our regex patterns
|
|
212
|
+
const matchesRegex = regexes.some(re => re.test(requestUrl));
|
|
213
|
+
if (!matchesRegex) return;
|
|
214
|
+
|
|
215
|
+
// Check if this is a first-party request (same domain as the URL being scanned)
|
|
216
|
+
const currentUrlHostname = new URL(currentUrl).hostname;
|
|
217
|
+
const requestHostname = new URL(requestUrl).hostname;
|
|
218
|
+
const isFirstParty = currentUrlHostname === requestHostname;
|
|
219
|
+
|
|
220
|
+
// Apply first-party/third-party filtering
|
|
221
|
+
if (isFirstParty && siteConfig.firstParty === false) {
|
|
222
|
+
if (forceDebug) {
|
|
223
|
+
console.log(formatLogMessage('debug', `[grep] Skipping first-party request (firstParty=false): ${requestUrl}`));
|
|
224
|
+
}
|
|
225
|
+
return;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
if (!isFirstParty && siteConfig.thirdParty === false) {
|
|
229
|
+
if (forceDebug) {
|
|
230
|
+
console.log(formatLogMessage('debug', `[grep] Skipping third-party request (thirdParty=false): ${requestUrl}`));
|
|
231
|
+
}
|
|
232
|
+
return;
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
try {
|
|
236
|
+
if (forceDebug) {
|
|
237
|
+
console.log(formatLogMessage('debug', `[grep] Downloading and searching content from: ${requestUrl}`));
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
// If NO searchstring is defined, match immediately (like browser behavior)
|
|
241
|
+
if (!hasSearchString) {
|
|
242
|
+
if (!respDomain || matchesIgnoreDomain(respDomain, ignoreDomains)) {
|
|
243
|
+
return;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
matchedDomains.add(respDomain);
|
|
247
|
+
const simplifiedUrl = getRootDomain(currentUrl);
|
|
248
|
+
|
|
249
|
+
if (siteConfig.verbose === 1) {
|
|
250
|
+
const partyType = isFirstParty ? 'first-party' : 'third-party';
|
|
251
|
+
console.log(formatLogMessage('match', `[${simplifiedUrl}] ${requestUrl} (${partyType}, grep) matched regex`));
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
if (dumpUrls) {
|
|
255
|
+
const timestamp = new Date().toISOString();
|
|
256
|
+
const partyType = isFirstParty ? 'first-party' : 'third-party';
|
|
257
|
+
try {
|
|
258
|
+
fs.appendFileSync(matchedUrlsLogFile,
|
|
259
|
+
`${timestamp} [match][${simplifiedUrl}] ${requestUrl} (${partyType}, grep)\n`);
|
|
260
|
+
} catch (logErr) {
|
|
261
|
+
console.warn(formatLogMessage('warn', `Failed to write to matched URLs log: ${logErr.message}`));
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
return;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
// If searchstring IS defined, download and grep content
|
|
268
|
+
const result = await downloadAndGrep(requestUrl, searchStrings, userAgent, grepOptions, 30);
|
|
269
|
+
|
|
270
|
+
if (result.found) {
|
|
271
|
+
if (!respDomain || matchesIgnoreDomain(respDomain, ignoreDomains)) {
|
|
272
|
+
return;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
matchedDomains.add(respDomain);
|
|
276
|
+
const simplifiedUrl = getRootDomain(currentUrl);
|
|
277
|
+
|
|
278
|
+
if (siteConfig.verbose === 1) {
|
|
279
|
+
const partyType = isFirstParty ? 'first-party' : 'third-party';
|
|
280
|
+
const matchCount = result.allMatches.reduce((sum, match) => sum + match.matches.length, 0);
|
|
281
|
+
console.log(formatLogMessage('match', `[${simplifiedUrl}] ${requestUrl} (${partyType}, grep) contains pattern: "${result.matchedPattern}" (${matchCount} matches)`));
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
if (dumpUrls) {
|
|
285
|
+
const timestamp = new Date().toISOString();
|
|
286
|
+
const partyType = isFirstParty ? 'first-party' : 'third-party';
|
|
287
|
+
const matchCount = result.allMatches.reduce((sum, match) => sum + match.matches.length, 0);
|
|
288
|
+
try {
|
|
289
|
+
fs.appendFileSync(matchedUrlsLogFile,
|
|
290
|
+
`${timestamp} [match][${simplifiedUrl}] ${requestUrl} (${partyType}, grep, pattern: "${result.matchedPattern}", matches: ${matchCount})\n`);
|
|
291
|
+
} catch (logErr) {
|
|
292
|
+
console.warn(formatLogMessage('warn', `Failed to write to matched URLs log: ${logErr.message}`));
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
} else if (forceDebug) {
|
|
296
|
+
const partyType = isFirstParty ? 'first-party' : 'third-party';
|
|
297
|
+
console.log(formatLogMessage('debug', `[grep] ${requestUrl} (${partyType}) matched regex but no patterns found`));
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
} catch (err) {
|
|
301
|
+
if (forceDebug) {
|
|
302
|
+
console.log(formatLogMessage('debug', `[grep] Failed to download/grep content for ${requestUrl}: ${err.message}`));
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
};
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
/**
|
|
309
|
+
* Validates that grep is available on the system
|
|
310
|
+
* @returns {object} Validation result with isAvailable boolean and version info
|
|
311
|
+
*/
|
|
312
|
+
function validateGrepAvailability() {
|
|
313
|
+
try {
|
|
314
|
+
const result = spawnSync('grep', ['--version'], {
|
|
315
|
+
encoding: 'utf8',
|
|
316
|
+
timeout: 5000
|
|
317
|
+
});
|
|
318
|
+
|
|
319
|
+
if (result.status === 0) {
|
|
320
|
+
const version = result.stdout.split('\n')[0] || 'Unknown version';
|
|
321
|
+
return {
|
|
322
|
+
isAvailable: true,
|
|
323
|
+
version: version.trim(),
|
|
324
|
+
error: null
|
|
325
|
+
};
|
|
326
|
+
} else {
|
|
327
|
+
return {
|
|
328
|
+
isAvailable: false,
|
|
329
|
+
version: null,
|
|
330
|
+
error: 'grep command failed'
|
|
331
|
+
};
|
|
332
|
+
}
|
|
333
|
+
} catch (error) {
|
|
334
|
+
return {
|
|
335
|
+
isAvailable: false,
|
|
336
|
+
version: null,
|
|
337
|
+
error: `grep not found: ${error.message}`
|
|
338
|
+
};
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
module.exports = {
|
|
343
|
+
grepContent,
|
|
344
|
+
downloadAndGrep,
|
|
345
|
+
createGrepHandler,
|
|
346
|
+
validateGrepAvailability,
|
|
347
|
+
createTempFile
|
|
348
|
+
};
|