@fanboynz/network-scanner 1.0.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,274 @@
1
+ /**
2
+ * FlowProxy protection detection and handling module
3
+ * Detects flowProxy DDoS protection and handles it appropriately for security scanning
4
+ */
5
+
6
+ /**
7
+ * Cross-version compatible timeout function for Puppeteer
8
+ * @param {import('puppeteer').Page} page - Puppeteer page instance
9
+ * @param {number} timeout - Timeout in milliseconds
10
+ * @returns {Promise<void>}
11
+ */
12
+ async function waitForTimeout(page, timeout) {
13
+ try {
14
+ if (typeof page.waitForTimeout === 'function') {
15
+ await page.waitForTimeout(timeout);
16
+ } else if (typeof page.waitFor === 'function') {
17
+ await page.waitFor(timeout);
18
+ } else {
19
+ await new Promise(resolve => setTimeout(resolve, timeout));
20
+ }
21
+ } catch (error) {
22
+ await new Promise(resolve => setTimeout(resolve, timeout));
23
+ }
24
+ }
25
+
26
+ /**
27
+ * Analyzes the current page to detect flowProxy protection
28
+ * @param {import('puppeteer').Page} page - Puppeteer page instance
29
+ * @returns {Promise<object>} Detection information object
30
+ */
31
+ async function analyzeFlowProxyProtection(page) {
32
+ try {
33
+ return await page.evaluate(() => {
34
+ const title = document.title || '';
35
+ const bodyText = document.body ? document.body.textContent : '';
36
+ const url = window.location.href;
37
+
38
+ // Check for flowProxy/aurologic specific indicators
39
+ const hasFlowProxyDomain = url.includes('aurologic') ||
40
+ url.includes('flowproxy') ||
41
+ url.includes('ddos-protection');
42
+
43
+ // Check for flowProxy challenge page indicators
44
+ const hasProtectionPage = title.includes('DDoS Protection') ||
45
+ title.includes('Please wait') ||
46
+ title.includes('Checking your browser') ||
47
+ bodyText.includes('DDoS protection by aurologic') ||
48
+ bodyText.includes('flowProxy') ||
49
+ bodyText.includes('Verifying your browser');
50
+
51
+ // Check for specific flowProxy elements
52
+ const hasFlowProxyElements = document.querySelector('[data-flowproxy]') !== null ||
53
+ document.querySelector('.flowproxy-challenge') !== null ||
54
+ document.querySelector('#flowproxy-container') !== null ||
55
+ document.querySelector('.aurologic-protection') !== null;
56
+
57
+ // Check for challenge indicators
58
+ const hasChallengeElements = document.querySelector('.challenge-running') !== null ||
59
+ document.querySelector('.verification-container') !== null ||
60
+ document.querySelector('input[name="flowproxy-response"]') !== null;
61
+
62
+ // Check for rate limiting indicators
63
+ const isRateLimited = bodyText.includes('Rate limited') ||
64
+ bodyText.includes('Too many requests') ||
65
+ bodyText.includes('Please try again later') ||
66
+ title.includes('429') ||
67
+ title.includes('Rate Limit');
68
+
69
+ // Check for JavaScript challenge indicators
70
+ const hasJSChallenge = document.querySelector('script[src*="flowproxy"]') !== null ||
71
+ document.querySelector('script[src*="aurologic"]') !== null ||
72
+ bodyText.includes('JavaScript is required') ||
73
+ bodyText.includes('Please enable JavaScript');
74
+
75
+ // Check for loading/processing indicators
76
+ const isProcessing = bodyText.includes('Processing') ||
77
+ bodyText.includes('Loading') ||
78
+ document.querySelector('.loading-spinner') !== null ||
79
+ document.querySelector('.processing-indicator') !== null;
80
+
81
+ const isFlowProxyDetected = hasFlowProxyDomain ||
82
+ hasProtectionPage ||
83
+ hasFlowProxyElements ||
84
+ hasChallengeElements;
85
+
86
+ return {
87
+ isFlowProxyDetected,
88
+ hasFlowProxyDomain,
89
+ hasProtectionPage,
90
+ hasFlowProxyElements,
91
+ hasChallengeElements,
92
+ isRateLimited,
93
+ hasJSChallenge,
94
+ isProcessing,
95
+ title,
96
+ url,
97
+ bodySnippet: bodyText.substring(0, 200) // First 200 chars for debugging
98
+ };
99
+ });
100
+ } catch (error) {
101
+ return {
102
+ isFlowProxyDetected: false,
103
+ hasFlowProxyDomain: false,
104
+ hasProtectionPage: false,
105
+ hasFlowProxyElements: false,
106
+ hasChallengeElements: false,
107
+ isRateLimited: false,
108
+ hasJSChallenge: false,
109
+ isProcessing: false,
110
+ error: error.message
111
+ };
112
+ }
113
+ }
114
+
115
+ /**
116
+ * Handles flowProxy protection by implementing appropriate delays and retry logic
117
+ * @param {import('puppeteer').Page} page - Puppeteer page instance
118
+ * @param {string} currentUrl - Current URL being processed
119
+ * @param {object} siteConfig - Site configuration object
120
+ * @param {boolean} forceDebug - Debug mode flag
121
+ * @returns {Promise<object>} Result object with handling details
122
+ */
123
+ async function handleFlowProxyProtection(page, currentUrl, siteConfig, forceDebug = false) {
124
+ const result = {
125
+ flowProxyDetection: { attempted: false, detected: false },
126
+ handlingResult: { attempted: false, success: false },
127
+ overallSuccess: true,
128
+ errors: [],
129
+ warnings: []
130
+ };
131
+
132
+ try {
133
+ if (forceDebug) console.log(`[debug][flowproxy] Checking for flowProxy protection on ${currentUrl}`);
134
+
135
+ // Wait a moment for the page to load
136
+ await waitForTimeout(page, 2000);
137
+
138
+ const detectionInfo = await analyzeFlowProxyProtection(page);
139
+ result.flowProxyDetection = {
140
+ attempted: true,
141
+ detected: detectionInfo.isFlowProxyDetected,
142
+ details: detectionInfo
143
+ };
144
+
145
+ if (detectionInfo.isFlowProxyDetected) {
146
+ result.handlingResult.attempted = true;
147
+
148
+ if (forceDebug) {
149
+ console.log(`[debug][flowproxy] FlowProxy protection detected on ${currentUrl}:`);
150
+ console.log(`[debug][flowproxy] Page Title: "${detectionInfo.title}"`);
151
+ console.log(`[debug][flowproxy] Current URL: ${detectionInfo.url}`);
152
+ console.log(`[debug][flowproxy] Has Protection Page: ${detectionInfo.hasProtectionPage}`);
153
+ console.log(`[debug][flowproxy] Has Challenge Elements: ${detectionInfo.hasChallengeElements}`);
154
+ console.log(`[debug][flowproxy] Is Rate Limited: ${detectionInfo.isRateLimited}`);
155
+ console.log(`[debug][flowproxy] Has JS Challenge: ${detectionInfo.hasJSChallenge}`);
156
+ }
157
+
158
+ // Handle rate limiting
159
+ if (detectionInfo.isRateLimited) {
160
+ const rateLimitDelay = siteConfig.flowproxy_delay || 30000; // 30 second default
161
+ result.warnings.push(`Rate limiting detected - implementing ${rateLimitDelay}ms delay`);
162
+ if (forceDebug) console.log(`[debug][flowproxy] Rate limiting detected, waiting ${rateLimitDelay}ms`);
163
+ await waitForTimeout(page, rateLimitDelay);
164
+ }
165
+
166
+ // Handle JavaScript challenges by waiting for completion
167
+ if (detectionInfo.hasJSChallenge || detectionInfo.isProcessing) {
168
+ const jsWaitTime = siteConfig.flowproxy_js_timeout || 15000; // 15 second default
169
+ if (forceDebug) console.log(`[debug][flowproxy] JavaScript challenge detected, waiting up to ${jsWaitTime}ms for completion`);
170
+
171
+ try {
172
+ // Wait for challenge to complete or timeout
173
+ await page.waitForFunction(
174
+ () => {
175
+ const bodyText = document.body ? document.body.textContent : '';
176
+ return !bodyText.includes('Processing') &&
177
+ !bodyText.includes('Checking your browser') &&
178
+ !bodyText.includes('Please wait') &&
179
+ !document.querySelector('.loading-spinner') &&
180
+ !document.querySelector('.processing-indicator');
181
+ },
182
+ { timeout: jsWaitTime }
183
+ );
184
+
185
+ if (forceDebug) console.log(`[debug][flowproxy] JavaScript challenge appears to have completed`);
186
+ } catch (timeoutErr) {
187
+ result.warnings.push(`JavaScript challenge timeout after ${jsWaitTime}ms`);
188
+ if (forceDebug) console.log(`[debug][flowproxy] JavaScript challenge timeout - continuing anyway`);
189
+ }
190
+ }
191
+
192
+ // Implement additional delay for flowProxy processing
193
+ const additionalDelay = siteConfig.flowproxy_additional_delay || 5000; // 5 second default
194
+ if (forceDebug) console.log(`[debug][flowproxy] Implementing additional ${additionalDelay}ms delay for flowProxy processing`);
195
+ await waitForTimeout(page, additionalDelay);
196
+
197
+ // Check if we're still on a protection page
198
+ const finalCheck = await analyzeFlowProxyProtection(page);
199
+ if (finalCheck.isFlowProxyDetected && finalCheck.hasProtectionPage) {
200
+ result.warnings.push('Still on flowProxy protection page after handling attempts');
201
+ if (forceDebug) console.log(`[debug][flowproxy] Warning: Still appears to be on protection page`);
202
+ } else {
203
+ result.handlingResult.success = true;
204
+ if (forceDebug) console.log(`[debug][flowproxy] Successfully handled flowProxy protection for ${currentUrl}`);
205
+ }
206
+
207
+ } else {
208
+ if (forceDebug) console.log(`[debug][flowproxy] No flowProxy protection detected on ${currentUrl}`);
209
+ result.overallSuccess = true; // No protection to handle
210
+ }
211
+
212
+ } catch (error) {
213
+ result.errors.push(`FlowProxy handling error: ${error.message}`);
214
+ result.overallSuccess = false;
215
+ if (forceDebug) {
216
+ console.log(`[debug][flowproxy] FlowProxy handling failed for ${currentUrl}:`);
217
+ console.log(`[debug][flowproxy] Error: ${error.message}`);
218
+ }
219
+ }
220
+
221
+ // Log overall result
222
+ if (result.errors.length > 0 && forceDebug) {
223
+ console.log(`[debug][flowproxy] FlowProxy handling completed with errors for ${currentUrl}:`);
224
+ result.errors.forEach(error => {
225
+ console.log(`[debug][flowproxy] - ${error}`);
226
+ });
227
+ } else if (result.warnings.length > 0 && forceDebug) {
228
+ console.log(`[debug][flowproxy] FlowProxy handling completed with warnings for ${currentUrl}:`);
229
+ result.warnings.forEach(warning => {
230
+ console.log(`[debug][flowproxy] - ${warning}`);
231
+ });
232
+ } else if (result.flowProxyDetection.attempted && forceDebug) {
233
+ console.log(`[debug][flowproxy] FlowProxy handling completed successfully for ${currentUrl}`);
234
+ }
235
+
236
+ return result;
237
+ }
238
+
239
+ /**
240
+ * Checks if the current page might be behind flowProxy protection
241
+ * @param {import('puppeteer').Page} page - Puppeteer page instance
242
+ * @returns {Promise<boolean>} True if flowProxy protection is suspected
243
+ */
244
+ async function isFlowProxyProtected(page) {
245
+ try {
246
+ const detection = await analyzeFlowProxyProtection(page);
247
+ return detection.isFlowProxyDetected;
248
+ } catch (error) {
249
+ return false;
250
+ }
251
+ }
252
+
253
+ /**
254
+ * Gets recommended timeout values for flowProxy protected sites
255
+ * @param {object} siteConfig - Site configuration object
256
+ * @returns {object} Recommended timeout values
257
+ */
258
+ function getFlowProxyTimeouts(siteConfig) {
259
+ return {
260
+ pageTimeout: siteConfig.flowproxy_page_timeout || 45000, // 45 seconds
261
+ navigationTimeout: siteConfig.flowproxy_nav_timeout || 45000, // 45 seconds
262
+ challengeTimeout: siteConfig.flowproxy_js_timeout || 15000, // 15 seconds
263
+ rateLimit: siteConfig.flowproxy_delay || 30000, // 30 seconds
264
+ additionalDelay: siteConfig.flowproxy_additional_delay || 5000 // 5 seconds
265
+ };
266
+ }
267
+
268
+ module.exports = {
269
+ analyzeFlowProxyProtection,
270
+ handleFlowProxyProtection,
271
+ isFlowProxyProtected,
272
+ getFlowProxyTimeouts,
273
+ waitForTimeout
274
+ };
package/lib/grep.js ADDED
@@ -0,0 +1,348 @@
1
+ // === grep.js - Grep-based Content Search Module ===
2
+ // Alternative to searchstring.js using grep for pattern matching
3
+
4
+ const fs = require('fs');
5
+ const { spawnSync } = require('child_process');
6
+ const path = require('path');
7
+ const os = require('os');
8
+ const { colorize, colors, messageColors, tags, formatLogMessage } = require('./colorize');
9
+
10
+ /**
11
+ * Creates a temporary file with content for grep processing
12
+ * @param {string} content - The content to write to temp file
13
+ * @param {string} prefix - Prefix for temp filename
14
+ * @returns {string} Path to the created temporary file
15
+ */
16
+ function createTempFile(content, prefix = 'scanner_grep') {
17
+ const tempDir = os.tmpdir();
18
+ const tempFile = path.join(tempDir, `${prefix}_${Date.now()}_${Math.random().toString(36).substr(2, 9)}.tmp`);
19
+
20
+ try {
21
+ fs.writeFileSync(tempFile, content, 'utf8');
22
+ return tempFile;
23
+ } catch (error) {
24
+ throw new Error(`Failed to create temp file: ${error.message}`);
25
+ }
26
+ }
27
+
28
+ /**
29
+ * Searches content using grep with the provided patterns
30
+ * @param {string} content - The content to search
31
+ * @param {Array<string>} searchPatterns - Array of grep patterns to search for
32
+ * @param {object} options - Grep options
33
+ * @returns {Promise<object>} Object with found boolean, matchedPattern, and allMatches array
34
+ */
35
+ async function grepContent(content, searchPatterns, options = {}) {
36
+ const {
37
+ ignoreCase = true,
38
+ wholeWord = false,
39
+ regex = false,
40
+ maxMatches = 1000
41
+ } = options;
42
+
43
+ if (!content || searchPatterns.length === 0) {
44
+ return { found: false, matchedPattern: null, allMatches: [] };
45
+ }
46
+
47
+ let tempFile = null;
48
+
49
+ try {
50
+ // Create temporary file with content
51
+ tempFile = createTempFile(content, 'grep_search');
52
+
53
+ const allMatches = [];
54
+ let firstMatch = null;
55
+
56
+ for (const pattern of searchPatterns) {
57
+ if (!pattern || pattern.trim().length === 0) continue;
58
+
59
+ const grepArgs = [
60
+ '--text', // Treat file as text
61
+ '--color=never', // Disable color output
62
+ ];
63
+
64
+ if (ignoreCase) grepArgs.push('-i');
65
+ if (wholeWord) grepArgs.push('-w');
66
+ if (!regex) grepArgs.push('-F'); // Fixed strings (literal)
67
+
68
+ // Add pattern and file
69
+ grepArgs.push(pattern, tempFile);
70
+
71
+ try {
72
+ const result = spawnSync('grep', grepArgs, {
73
+ encoding: 'utf8',
74
+ timeout: 10000, // 10 second timeout
75
+ maxBuffer: 1024 * 1024 // 1MB max buffer
76
+ });
77
+
78
+ // grep returns 0 if found, 1 if not found, 2+ for errors
79
+ if (result.status === 0 && result.stdout) {
80
+ allMatches.push({
81
+ pattern: pattern,
82
+ matches: result.stdout.split('\n').filter(line => line.trim().length > 0).slice(0, maxMatches)
83
+ });
84
+
85
+ if (!firstMatch) {
86
+ firstMatch = pattern;
87
+ }
88
+ }
89
+
90
+ } catch (grepErr) {
91
+ // Continue with next pattern if this one fails
92
+ console.warn(formatLogMessage('warn', `[grep] Pattern "${pattern}" failed: ${grepErr.message}`));
93
+ }
94
+ }
95
+
96
+ return {
97
+ found: allMatches.length > 0,
98
+ matchedPattern: firstMatch,
99
+ allMatches: allMatches
100
+ };
101
+
102
+ } catch (error) {
103
+ throw new Error(`Grep search failed: ${error.message}`);
104
+ } finally {
105
+ // Clean up temporary file
106
+ if (tempFile) {
107
+ try {
108
+ fs.unlinkSync(tempFile);
109
+ } catch (cleanupErr) {
110
+ console.warn(formatLogMessage('warn', `[grep] Failed to cleanup temp file ${tempFile}: ${cleanupErr.message}`));
111
+ }
112
+ }
113
+ }
114
+ }
115
+
116
+ /**
117
+ * Downloads content using curl and searches with grep
118
+ * @param {string} url - The URL to download
119
+ * @param {Array<string>} searchPatterns - Grep patterns to search for
120
+ * @param {string} userAgent - User agent string to use
121
+ * @param {object} grepOptions - Grep search options
122
+ * @param {number} timeout - Timeout in seconds (default: 30)
123
+ * @returns {Promise<object>} Object with found boolean, matchedPattern, and content
124
+ */
125
+ async function downloadAndGrep(url, searchPatterns, userAgent = '', grepOptions = {}, timeout = 30) {
126
+ try {
127
+ const curlArgs = [
128
+ '-s', // Silent mode
129
+ '-L', // Follow redirects
130
+ '--max-time', timeout.toString(),
131
+ '--max-redirs', '5',
132
+ '--fail-with-body', // Return body even on HTTP errors
133
+ '--compressed', // Accept compressed responses
134
+ ];
135
+
136
+ if (userAgent) {
137
+ curlArgs.push('-H', `User-Agent: ${userAgent}`);
138
+ }
139
+
140
+ // Add common headers to appear more browser-like
141
+ curlArgs.push(
142
+ '-H', 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
143
+ '-H', 'Accept-Language: en-US,en;q=0.5',
144
+ '-H', 'Accept-Encoding: gzip, deflate',
145
+ '-H', 'Connection: keep-alive',
146
+ '-H', 'Upgrade-Insecure-Requests: 1'
147
+ );
148
+
149
+ curlArgs.push(url);
150
+
151
+ // Download content with curl
152
+ const curlResult = spawnSync('curl', curlArgs, {
153
+ encoding: 'utf8',
154
+ timeout: timeout * 1000,
155
+ maxBuffer: 10 * 1024 * 1024 // 10MB max buffer
156
+ });
157
+
158
+ if (curlResult.error) {
159
+ throw curlResult.error;
160
+ }
161
+
162
+ if (curlResult.status !== 0) {
163
+ throw new Error(`Curl exited with status ${curlResult.status}: ${curlResult.stderr}`);
164
+ }
165
+
166
+ const content = curlResult.stdout;
167
+
168
+ // Search content with grep
169
+ const grepResult = await grepContent(content, searchPatterns, grepOptions);
170
+
171
+ return {
172
+ found: grepResult.found,
173
+ matchedPattern: grepResult.matchedPattern,
174
+ allMatches: grepResult.allMatches,
175
+ content: content,
176
+ contentLength: content.length
177
+ };
178
+
179
+ } catch (error) {
180
+ throw new Error(`Download and grep failed for ${url}: ${error.message}`);
181
+ }
182
+ }
183
+
184
+ /**
185
+ * Creates a grep-based URL handler for downloading and searching content
186
+ * @param {object} config - Configuration object containing all necessary parameters
187
+ * @returns {Function} URL handler function for grep-based content analysis
188
+ */
189
+ function createGrepHandler(config) {
190
+ const {
191
+ searchStrings,
192
+ regexes,
193
+ matchedDomains,
194
+ currentUrl,
195
+ perSiteSubDomains,
196
+ ignoreDomains,
197
+ matchesIgnoreDomain,
198
+ getRootDomain,
199
+ siteConfig,
200
+ dumpUrls,
201
+ matchedUrlsLogFile,
202
+ forceDebug,
203
+ userAgent,
204
+ hasSearchString,
205
+ grepOptions = {}
206
+ } = config;
207
+
208
+ return async function grepHandler(requestUrl) {
209
+ const respDomain = perSiteSubDomains ? (new URL(requestUrl)).hostname : getRootDomain(requestUrl);
210
+
211
+ // Only process URLs that match our regex patterns
212
+ const matchesRegex = regexes.some(re => re.test(requestUrl));
213
+ if (!matchesRegex) return;
214
+
215
+ // Check if this is a first-party request (same domain as the URL being scanned)
216
+ const currentUrlHostname = new URL(currentUrl).hostname;
217
+ const requestHostname = new URL(requestUrl).hostname;
218
+ const isFirstParty = currentUrlHostname === requestHostname;
219
+
220
+ // Apply first-party/third-party filtering
221
+ if (isFirstParty && siteConfig.firstParty === false) {
222
+ if (forceDebug) {
223
+ console.log(formatLogMessage('debug', `[grep] Skipping first-party request (firstParty=false): ${requestUrl}`));
224
+ }
225
+ return;
226
+ }
227
+
228
+ if (!isFirstParty && siteConfig.thirdParty === false) {
229
+ if (forceDebug) {
230
+ console.log(formatLogMessage('debug', `[grep] Skipping third-party request (thirdParty=false): ${requestUrl}`));
231
+ }
232
+ return;
233
+ }
234
+
235
+ try {
236
+ if (forceDebug) {
237
+ console.log(formatLogMessage('debug', `[grep] Downloading and searching content from: ${requestUrl}`));
238
+ }
239
+
240
+ // If NO searchstring is defined, match immediately (like browser behavior)
241
+ if (!hasSearchString) {
242
+ if (!respDomain || matchesIgnoreDomain(respDomain, ignoreDomains)) {
243
+ return;
244
+ }
245
+
246
+ matchedDomains.add(respDomain);
247
+ const simplifiedUrl = getRootDomain(currentUrl);
248
+
249
+ if (siteConfig.verbose === 1) {
250
+ const partyType = isFirstParty ? 'first-party' : 'third-party';
251
+ console.log(formatLogMessage('match', `[${simplifiedUrl}] ${requestUrl} (${partyType}, grep) matched regex`));
252
+ }
253
+
254
+ if (dumpUrls) {
255
+ const timestamp = new Date().toISOString();
256
+ const partyType = isFirstParty ? 'first-party' : 'third-party';
257
+ try {
258
+ fs.appendFileSync(matchedUrlsLogFile,
259
+ `${timestamp} [match][${simplifiedUrl}] ${requestUrl} (${partyType}, grep)\n`);
260
+ } catch (logErr) {
261
+ console.warn(formatLogMessage('warn', `Failed to write to matched URLs log: ${logErr.message}`));
262
+ }
263
+ }
264
+ return;
265
+ }
266
+
267
+ // If searchstring IS defined, download and grep content
268
+ const result = await downloadAndGrep(requestUrl, searchStrings, userAgent, grepOptions, 30);
269
+
270
+ if (result.found) {
271
+ if (!respDomain || matchesIgnoreDomain(respDomain, ignoreDomains)) {
272
+ return;
273
+ }
274
+
275
+ matchedDomains.add(respDomain);
276
+ const simplifiedUrl = getRootDomain(currentUrl);
277
+
278
+ if (siteConfig.verbose === 1) {
279
+ const partyType = isFirstParty ? 'first-party' : 'third-party';
280
+ const matchCount = result.allMatches.reduce((sum, match) => sum + match.matches.length, 0);
281
+ console.log(formatLogMessage('match', `[${simplifiedUrl}] ${requestUrl} (${partyType}, grep) contains pattern: "${result.matchedPattern}" (${matchCount} matches)`));
282
+ }
283
+
284
+ if (dumpUrls) {
285
+ const timestamp = new Date().toISOString();
286
+ const partyType = isFirstParty ? 'first-party' : 'third-party';
287
+ const matchCount = result.allMatches.reduce((sum, match) => sum + match.matches.length, 0);
288
+ try {
289
+ fs.appendFileSync(matchedUrlsLogFile,
290
+ `${timestamp} [match][${simplifiedUrl}] ${requestUrl} (${partyType}, grep, pattern: "${result.matchedPattern}", matches: ${matchCount})\n`);
291
+ } catch (logErr) {
292
+ console.warn(formatLogMessage('warn', `Failed to write to matched URLs log: ${logErr.message}`));
293
+ }
294
+ }
295
+ } else if (forceDebug) {
296
+ const partyType = isFirstParty ? 'first-party' : 'third-party';
297
+ console.log(formatLogMessage('debug', `[grep] ${requestUrl} (${partyType}) matched regex but no patterns found`));
298
+ }
299
+
300
+ } catch (err) {
301
+ if (forceDebug) {
302
+ console.log(formatLogMessage('debug', `[grep] Failed to download/grep content for ${requestUrl}: ${err.message}`));
303
+ }
304
+ }
305
+ };
306
+ }
307
+
308
+ /**
309
+ * Validates that grep is available on the system
310
+ * @returns {object} Validation result with isAvailable boolean and version info
311
+ */
312
+ function validateGrepAvailability() {
313
+ try {
314
+ const result = spawnSync('grep', ['--version'], {
315
+ encoding: 'utf8',
316
+ timeout: 5000
317
+ });
318
+
319
+ if (result.status === 0) {
320
+ const version = result.stdout.split('\n')[0] || 'Unknown version';
321
+ return {
322
+ isAvailable: true,
323
+ version: version.trim(),
324
+ error: null
325
+ };
326
+ } else {
327
+ return {
328
+ isAvailable: false,
329
+ version: null,
330
+ error: 'grep command failed'
331
+ };
332
+ }
333
+ } catch (error) {
334
+ return {
335
+ isAvailable: false,
336
+ version: null,
337
+ error: `grep not found: ${error.message}`
338
+ };
339
+ }
340
+ }
341
+
342
+ module.exports = {
343
+ grepContent,
344
+ downloadAndGrep,
345
+ createGrepHandler,
346
+ validateGrepAvailability,
347
+ createTempFile
348
+ };