@fanboynz/network-scanner 1.0.87 → 1.0.88

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/lib/curl.js +442 -0
  2. package/lib/grep.js +56 -17
  3. package/nwss.js +29 -16
  4. package/package.json +1 -1
package/lib/curl.js ADDED
@@ -0,0 +1,442 @@
1
+ // === curl.js - Curl-based Content Download Module ===
2
+ // Handles HTTP content downloading using curl for searchstring analysis
3
+
4
+ const fs = require('fs');
5
+ const { spawnSync } = require('child_process');
6
+ const { colorize, colors, messageColors, tags, formatLogMessage } = require('./colorize');
7
+
8
+ // === Constants ===
9
+ const CURL_DEFAULTS = {
10
+ TIMEOUT_SECONDS: 30,
11
+ MAX_REDIRECTS: 5,
12
+ MAX_SIZE_BYTES: 10 * 1024 * 1024, // 10MB
13
+ VALIDATION_TIMEOUT: 5000, // 5 seconds
14
+ SPAWN_TIMEOUT_MULTIPLIER: 1000, // Convert seconds to milliseconds
15
+ HTTP_SUCCESS_CODE: 200,
16
+ CURL_SUCCESS_STATUS: 0,
17
+ METADATA_PIPE_PARTS: 3, // http_code|content_type|size_download
18
+ VERSION_LINE_INDEX: 0
19
+ };
20
+
21
+ /**
22
+ * Downloads content using curl with browser-like headers
23
+ * @param {string} url - The URL to download
24
+ * @param {string} userAgent - User agent string to use
25
+ * @param {object} options - Download options
26
+ * @returns {Promise<object>} Object with content, status, and metadata
27
+ */
28
+ async function downloadWithCurl(url, userAgent = '', options = {}) {
29
+ const {
30
+ timeout = CURL_DEFAULTS.TIMEOUT_SECONDS,
31
+ maxRedirects = CURL_DEFAULTS.MAX_REDIRECTS,
32
+ maxSize = CURL_DEFAULTS.MAX_SIZE_BYTES,
33
+ followRedirects = true,
34
+ customHeaders = {}
35
+ } = options;
36
+
37
+ try {
38
+ const curlArgs = [
39
+ '-s', // Silent mode
40
+ '--max-time', timeout.toString(),
41
+ '--max-redirs', maxRedirects.toString(),
42
+ '--fail-with-body', // Return body even on HTTP errors
43
+ '--compressed', // Accept compressed responses
44
+ '--write-out', '%{http_code}|%{content_type}|%{size_download}', // Output metadata
45
+ ];
46
+
47
+ if (followRedirects) {
48
+ curlArgs.push('-L'); // Follow redirects
49
+ }
50
+
51
+ // Add user agent if provided
52
+ if (userAgent) {
53
+ curlArgs.push('-H', `User-Agent: ${userAgent}`);
54
+ }
55
+
56
+ // Add common browser headers
57
+ curlArgs.push(
58
+ '-H', 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
59
+ '-H', 'Accept-Language: en-US,en;q=0.5',
60
+ '-H', 'Accept-Encoding: gzip, deflate, br',
61
+ '-H', 'Connection: keep-alive',
62
+ '-H', 'Upgrade-Insecure-Requests: 1',
63
+ '-H', 'Sec-Fetch-Dest: document',
64
+ '-H', 'Sec-Fetch-Mode: navigate',
65
+ '-H', 'Sec-Fetch-Site: none',
66
+ '-H', 'Cache-Control: no-cache'
67
+ );
68
+
69
+ // Add custom headers
70
+ Object.entries(customHeaders).forEach(([key, value]) => {
71
+ curlArgs.push('-H', `${key}: ${value}`);
72
+ });
73
+
74
+ curlArgs.push(url);
75
+
76
+ // Execute curl
77
+ const curlResult = spawnSync('curl', curlArgs, {
78
+ encoding: 'utf8',
79
+ timeout: timeout * CURL_DEFAULTS.SPAWN_TIMEOUT_MULTIPLIER,
80
+ maxBuffer: maxSize
81
+ });
82
+
83
+ if (curlResult.error) {
84
+ throw curlResult.error;
85
+ }
86
+
87
+ if (curlResult.status !== CURL_DEFAULTS.CURL_SUCCESS_STATUS) {
88
+ throw new Error(`Curl exited with status ${curlResult.status}: ${curlResult.stderr}`);
89
+ }
90
+
91
+ const output = curlResult.stdout;
92
+ const lines = output.split('\n');
93
+ const metadata = lines[lines.length - 1]; // Last line contains write-out data
94
+ const content = lines.slice(0, -1).join('\n'); // Everything except last line
95
+
96
+ // Parse metadata
97
+ const metadataParts = metadata.split('|');
98
+ if (metadataParts.length !== CURL_DEFAULTS.METADATA_PIPE_PARTS) {
99
+ throw new Error(`Invalid metadata format: expected ${CURL_DEFAULTS.METADATA_PIPE_PARTS} parts, got ${metadataParts.length}`);
100
+ }
101
+ const [httpCode, contentType, downloadSize] = metadataParts;
102
+
103
+ return {
104
+ content,
105
+ httpCode: parseInt(httpCode) || 0,
106
+ contentType: contentType || 'unknown',
107
+ downloadSize: parseInt(downloadSize) || content.length,
108
+ success: true
109
+ };
110
+
111
+ } catch (error) {
112
+ return {
113
+ content: '',
114
+ httpCode: 0,
115
+ contentType: 'unknown',
116
+ downloadSize: 0,
117
+ success: false,
118
+ error: error.message
119
+ };
120
+ }
121
+ }
122
+
123
+ /**
124
+ * Searches content for patterns using JavaScript (case-insensitive)
125
+ * @param {string} content - Content to search
126
+ * @param {Array<string>} searchStrings - OR patterns (any can match)
127
+ * @param {Array<string>} searchStringsAnd - AND patterns (all must match)
128
+ * @param {boolean} hasSearchStringAnd - Whether AND logic is being used
129
+ * @returns {object} Search result with found status and matched pattern
130
+ */
131
+ function searchContent(content, searchStrings = [], searchStringsAnd = [], hasSearchStringAnd = false) {
132
+ if (!content || content.length === 0) {
133
+ return { found: false, matchedPattern: null, matchType: null };
134
+ }
135
+
136
+ const lowerContent = content.toLowerCase();
137
+
138
+ // Handle AND logic searchstring_and (all patterns must be present)
139
+ if (hasSearchStringAnd && searchStringsAnd.length > 0) {
140
+ const missingPatterns = [];
141
+ const foundPatterns = [];
142
+
143
+ for (const pattern of searchStringsAnd) {
144
+ const lowerPattern = pattern.toLowerCase();
145
+ if (lowerContent.includes(lowerPattern)) {
146
+ foundPatterns.push(pattern);
147
+ } else {
148
+ missingPatterns.push(pattern);
149
+ }
150
+ }
151
+
152
+ // All patterns must be found for AND logic
153
+ if (missingPatterns.length === 0) {
154
+ return {
155
+ found: true,
156
+ matchedPattern: foundPatterns.join(' AND '),
157
+ matchType: 'AND',
158
+ foundPatterns,
159
+ missingPatterns: []
160
+ };
161
+ } else {
162
+ return {
163
+ found: false,
164
+ matchedPattern: null,
165
+ matchType: 'AND',
166
+ foundPatterns,
167
+ missingPatterns
168
+ };
169
+ }
170
+ }
171
+
172
+ // Handle OR logic searchstring (any pattern can match)
173
+ if (searchStrings.length > 0) {
174
+ for (const pattern of searchStrings) {
175
+ const lowerPattern = pattern.toLowerCase();
176
+ if (lowerContent.includes(lowerPattern)) {
177
+ return {
178
+ found: true,
179
+ matchedPattern: pattern,
180
+ matchType: 'OR'
181
+ };
182
+ }
183
+ }
184
+ }
185
+
186
+ return { found: false, matchedPattern: null, matchType: null };
187
+ }
188
+
189
+ /**
190
+ * Creates a curl-based URL handler for downloading and searching content
191
+ * @param {object} config - Configuration object containing all necessary parameters
192
+ * @returns {Function} URL handler function for curl-based content analysis
193
+ */
194
+ function createCurlHandler(config) {
195
+ const {
196
+ searchStrings,
197
+ searchStringsAnd,
198
+ hasSearchStringAnd,
199
+ regexes,
200
+ matchedDomains,
201
+ addMatchedDomain,
202
+ isDomainAlreadyDetected,
203
+ onContentFetched,
204
+ currentUrl,
205
+ perSiteSubDomains,
206
+ ignoreDomains,
207
+ matchesIgnoreDomain,
208
+ getRootDomain,
209
+ siteConfig,
210
+ dumpUrls,
211
+ matchedUrlsLogFile,
212
+ forceDebug,
213
+ userAgent,
214
+ resourceType,
215
+ hasSearchString
216
+ } = config;
217
+
218
+ return async function curlHandler(requestUrl) {
219
+ try {
220
+ const respDomain = perSiteSubDomains ? (new URL(requestUrl)).hostname : getRootDomain(requestUrl);
221
+ const fullSubdomain = (new URL(requestUrl)).hostname; // Always get full subdomain for cache tracking
222
+
223
+ // Skip if already detected to avoid duplicates
224
+ if (isDomainAlreadyDetected(fullSubdomain)) {
225
+ if (forceDebug) {
226
+ console.log(formatLogMessage('debug', `[curl] Skipping already detected subdomain: ${fullSubdomain}`));
227
+ }
228
+ return;
229
+ }
230
+
231
+ // Only process URLs that match our regex patterns
232
+ const matchesRegex = regexes.some(re => re.test(requestUrl));
233
+ if (!matchesRegex) {
234
+ if (forceDebug) {
235
+ console.log(formatLogMessage('debug', `[curl] URL ${requestUrl} doesn't match any regex patterns`));
236
+ }
237
+ return;
238
+ }
239
+
240
+ // Check if this is a first-party request (same domain as the URL being scanned)
241
+ const currentUrlHostname = new URL(currentUrl).hostname;
242
+ const requestHostname = new URL(requestUrl).hostname;
243
+ const currentRootDomain = getRootDomain(currentUrl);
244
+ const requestRootDomain = getRootDomain(requestUrl);
245
+ const isFirstParty = currentRootDomain === requestRootDomain;
246
+
247
+ // Apply first-party/third-party filtering
248
+ if (isFirstParty && (siteConfig.firstParty === false || siteConfig.firstParty === 0)) {
249
+ if (forceDebug) {
250
+ console.log(formatLogMessage('debug', `[curl] Skipping first-party request (firstParty disabled): ${requestUrl}`));
251
+ }
252
+ return;
253
+ }
254
+
255
+ if (!isFirstParty && (siteConfig.thirdParty === false || siteConfig.thirdParty === 0)) {
256
+ if (forceDebug) {
257
+ console.log(formatLogMessage('debug', `[curl] Skipping third-party request (thirdParty disabled): ${requestUrl}`));
258
+ }
259
+ return;
260
+ }
261
+
262
+ if (forceDebug) {
263
+ console.log(formatLogMessage('debug', `[curl] Processing ${isFirstParty ? 'first-party' : 'third-party'} request: ${requestUrl}`));
264
+ }
265
+
266
+ // If NO searchstring is defined, match immediately (like browser behavior)
267
+ if (!hasSearchString || ((!searchStrings || !searchStrings.length) && (!searchStringsAnd || !searchStringsAnd.length))) {
268
+ if (!respDomain || matchesIgnoreDomain(respDomain, ignoreDomains)) {
269
+ if (forceDebug) {
270
+ console.log(formatLogMessage('debug', `[curl] Domain ${respDomain} is in ignore list`));
271
+ }
272
+ return;
273
+ }
274
+
275
+ addMatchedDomain(respDomain, resourceType, fullSubdomain);
276
+ const simplifiedUrl = getRootDomain(currentUrl);
277
+
278
+ if (siteConfig.verbose === 1) {
279
+ const partyType = isFirstParty ? 'first-party' : 'third-party';
280
+ const resourceInfo = resourceType ? ` (${resourceType})` : '';
281
+ console.log(formatLogMessage('match', `[${simplifiedUrl}] ${requestUrl} (${partyType}, curl) matched regex${resourceInfo}`));
282
+ }
283
+
284
+ if (dumpUrls && matchedUrlsLogFile) {
285
+ const timestamp = new Date().toISOString();
286
+ const partyType = isFirstParty ? 'first-party' : 'third-party';
287
+ const resourceInfo = resourceType ? ` (${resourceType})` : '';
288
+ try {
289
+ fs.appendFileSync(matchedUrlsLogFile,
290
+ `${timestamp} [match][${simplifiedUrl}] ${requestUrl} (${partyType}, curl)${resourceInfo}\n`);
291
+ } catch (logErr) {
292
+ console.warn(formatLogMessage('warn', `Failed to write to matched URLs log: ${logErr.message}`));
293
+ }
294
+ }
295
+ return;
296
+ }
297
+
298
+ // If searchstring IS defined, download and search content
299
+ if (hasSearchString && ((searchStrings && searchStrings.length > 0) || (searchStringsAnd && searchStringsAnd.length > 0)) && forceDebug) {
300
+ console.log(formatLogMessage('debug', `[curl] Downloading content for pattern matching: ${requestUrl}`));
301
+ }
302
+
303
+ // Prepare custom headers from site config
304
+ const customHeaders = siteConfig.custom_headers || {};
305
+ if (siteConfig.referrer_headers) {
306
+ const referrerUrl = Array.isArray(siteConfig.referrer_headers)
307
+ ? siteConfig.referrer_headers[Math.floor(Math.random() * siteConfig.referrer_headers.length)]
308
+ : siteConfig.referrer_headers;
309
+
310
+ if (typeof referrerUrl === 'string' && referrerUrl.startsWith('http')) {
311
+ customHeaders['Referer'] = referrerUrl;
312
+ }
313
+ }
314
+
315
+ const downloadResult = await downloadWithCurl(requestUrl, userAgent, {
316
+ timeout: CURL_DEFAULTS.TIMEOUT_SECONDS,
317
+ maxRedirects: CURL_DEFAULTS.MAX_REDIRECTS,
318
+ customHeaders
319
+ });
320
+
321
+ if (!downloadResult.success) {
322
+ if (forceDebug) {
323
+ console.log(formatLogMessage('debug', `[curl] Failed to download ${requestUrl}: ${downloadResult.error}`));
324
+ }
325
+ return;
326
+ }
327
+
328
+ // Cache the fetched content if callback provided
329
+ if (onContentFetched) {
330
+ try {
331
+ onContentFetched(requestUrl, downloadResult.content);
332
+ } catch (cacheErr) {
333
+ if (forceDebug) {
334
+ console.log(formatLogMessage('debug', `[curl] Content caching failed: ${cacheErr.message}`));
335
+ }
336
+ }
337
+ }
338
+
339
+ // Search content for patterns
340
+ const searchResult = searchContent(
341
+ downloadResult.content,
342
+ searchStrings,
343
+ searchStringsAnd,
344
+ hasSearchStringAnd
345
+ );
346
+
347
+ if (searchResult.found) {
348
+ if (!respDomain || matchesIgnoreDomain(respDomain, ignoreDomains)) {
349
+ if (forceDebug) {
350
+ console.log(formatLogMessage('debug', `[curl] Domain ${respDomain} matches but is in ignore list`));
351
+ }
352
+ return;
353
+ }
354
+
355
+ addMatchedDomain(respDomain, resourceType, fullSubdomain);
356
+ const simplifiedUrl = getRootDomain(currentUrl);
357
+
358
+ if (siteConfig.verbose === 1) {
359
+ const partyType = isFirstParty ? 'first-party' : 'third-party';
360
+ const resourceInfo = resourceType ? ` (${resourceType})` : '';
361
+ const matchInfo = searchResult.matchType === 'AND'
362
+ ? `patterns: ${searchResult.foundPatterns.length}/${searchStringsAnd.length}`
363
+ : `pattern: "${searchResult.matchedPattern}"`;
364
+ console.log(formatLogMessage('match',
365
+ `[${simplifiedUrl}] ${requestUrl} (${partyType}, curl) contains ${matchInfo}${resourceInfo}`));
366
+ }
367
+
368
+ if (dumpUrls && matchedUrlsLogFile) {
369
+ const timestamp = new Date().toISOString();
370
+ const partyType = isFirstParty ? 'first-party' : 'third-party';
371
+ const resourceInfo = resourceType ? ` (${resourceType})` : '';
372
+ const matchInfo = searchResult.matchType === 'AND'
373
+ ? `patterns: ${searchResult.foundPatterns.length}/${searchStringsAnd.length}`
374
+ : `pattern: "${searchResult.matchedPattern}"`;
375
+ try {
376
+ fs.appendFileSync(matchedUrlsLogFile,
377
+ `${timestamp} [match][${simplifiedUrl}] ${requestUrl} (${partyType}, curl, ${matchInfo})${resourceInfo}\n`);
378
+ } catch (logErr) {
379
+ console.warn(formatLogMessage('warn', `Failed to write to matched URLs log: ${logErr.message}`));
380
+ }
381
+ }
382
+ } else {
383
+ if (forceDebug) {
384
+ const partyType = isFirstParty ? 'first-party' : 'third-party';
385
+ if (searchResult.matchType === 'AND' && searchResult.missingPatterns) {
386
+ console.log(formatLogMessage('debug',
387
+ `[curl] ${requestUrl} (${partyType}) matched regex but missing AND patterns: ${searchResult.missingPatterns.join(', ')}`));
388
+ } else {
389
+ console.log(formatLogMessage('debug',
390
+ `[curl] ${requestUrl} (${partyType}) matched regex but no search patterns found`));
391
+ }
392
+ }
393
+ }
394
+
395
+ } catch (err) {
396
+ if (forceDebug) {
397
+ console.log(formatLogMessage('debug', `[curl] Handler failed for ${requestUrl}: ${err.message}`));
398
+ }
399
+ }
400
+ };
401
+ }
402
+
403
+ /**
404
+ * Validates that curl is available on the system
405
+ * @returns {object} Validation result with isAvailable boolean and version info
406
+ */
407
+ function validateCurlAvailability() {
408
+ try {
409
+ const result = spawnSync('curl', ['--version'], {
410
+ encoding: 'utf8',
411
+ timeout: CURL_DEFAULTS.VALIDATION_TIMEOUT
412
+ });
413
+
414
+ if (result.status === CURL_DEFAULTS.CURL_SUCCESS_STATUS) {
415
+ const version = result.stdout.split('\n')[CURL_DEFAULTS.VERSION_LINE_INDEX] || 'Unknown version';
416
+ return {
417
+ isAvailable: true,
418
+ version: version.trim(),
419
+ error: null
420
+ };
421
+ } else {
422
+ return {
423
+ isAvailable: false,
424
+ version: null,
425
+ error: 'curl command failed'
426
+ };
427
+ }
428
+ } catch (error) {
429
+ return {
430
+ isAvailable: false,
431
+ version: null,
432
+ error: `curl not found: ${error.message}`
433
+ };
434
+ }
435
+ }
436
+
437
+ module.exports = {
438
+ downloadWithCurl,
439
+ searchContent,
440
+ createCurlHandler,
441
+ validateCurlAvailability
442
+ };
package/lib/grep.js CHANGED
@@ -7,6 +7,23 @@ const path = require('path');
7
7
  const os = require('os');
8
8
  const { colorize, colors, messageColors, tags, formatLogMessage } = require('./colorize');
9
9
 
10
+ // === Constants ===
11
+ const GREP_DEFAULTS = {
12
+ TIMEOUT_SECONDS: 30,
13
+ MAX_REDIRECTS: 5,
14
+ MAX_SIZE_BYTES: 10 * 1024 * 1024, // 10MB
15
+ VALIDATION_TIMEOUT: 5000, // 5 seconds
16
+ SPAWN_TIMEOUT_MULTIPLIER: 1000, // Convert seconds to milliseconds
17
+ GREP_TIMEOUT: 10000, // 10 seconds for grep operations
18
+ MAX_BUFFER_SIZE: 1024 * 1024, // 1MB max buffer
19
+ DEFAULT_MAX_MATCHES: 1000,
20
+ GREP_SUCCESS_STATUS: 0,
21
+ GREP_NOT_FOUND_STATUS: 1,
22
+ CURL_SUCCESS_STATUS: 0,
23
+ VERSION_LINE_INDEX: 0,
24
+ RANDOM_STRING_LENGTH: 9
25
+ };
26
+
10
27
  /**
11
28
  * Creates a temporary file with content for grep processing
12
29
  * @param {string} content - The content to write to temp file
@@ -15,7 +32,7 @@ const { colorize, colors, messageColors, tags, formatLogMessage } = require('./c
15
32
  */
16
33
  function createTempFile(content, prefix = 'scanner_grep') {
17
34
  const tempDir = os.tmpdir();
18
- const tempFile = path.join(tempDir, `${prefix}_${Date.now()}_${Math.random().toString(36).substr(2, 9)}.tmp`);
35
+ const tempFile = path.join(tempDir, `${prefix}_${Date.now()}_${Math.random().toString(36).substr(2, GREP_DEFAULTS.RANDOM_STRING_LENGTH)}.tmp`);
19
36
 
20
37
  try {
21
38
  fs.writeFileSync(tempFile, content, 'utf8');
@@ -37,7 +54,7 @@ async function grepContent(content, searchPatterns, options = {}) {
37
54
  ignoreCase = true,
38
55
  wholeWord = false,
39
56
  regex = false,
40
- maxMatches = 1000
57
+ maxMatches = GREP_DEFAULTS.DEFAULT_MAX_MATCHES
41
58
  } = options;
42
59
 
43
60
  if (!content || searchPatterns.length === 0) {
@@ -71,12 +88,12 @@ async function grepContent(content, searchPatterns, options = {}) {
71
88
  try {
72
89
  const result = spawnSync('grep', grepArgs, {
73
90
  encoding: 'utf8',
74
- timeout: 10000, // 10 second timeout
75
- maxBuffer: 1024 * 1024 // 1MB max buffer
91
+ timeout: GREP_DEFAULTS.GREP_TIMEOUT,
92
+ maxBuffer: GREP_DEFAULTS.MAX_BUFFER_SIZE
76
93
  });
77
94
 
78
95
  // grep returns 0 if found, 1 if not found, 2+ for errors
79
- if (result.status === 0 && result.stdout) {
96
+ if (result.status === GREP_DEFAULTS.GREP_SUCCESS_STATUS && result.stdout) {
80
97
  allMatches.push({
81
98
  pattern: pattern,
82
99
  matches: result.stdout.split('\n').filter(line => line.trim().length > 0).slice(0, maxMatches)
@@ -122,13 +139,13 @@ async function grepContent(content, searchPatterns, options = {}) {
122
139
  * @param {number} timeout - Timeout in seconds (default: 30)
123
140
  * @returns {Promise<object>} Object with found boolean, matchedPattern, and content
124
141
  */
125
- async function downloadAndGrep(url, searchPatterns, userAgent = '', grepOptions = {}, timeout = 30) {
142
+ async function downloadAndGrep(url, searchPatterns, userAgent = '', grepOptions = {}, timeout = GREP_DEFAULTS.TIMEOUT_SECONDS) {
126
143
  try {
127
144
  const curlArgs = [
128
145
  '-s', // Silent mode
129
146
  '-L', // Follow redirects
130
147
  '--max-time', timeout.toString(),
131
- '--max-redirs', '5',
148
+ '--max-redirs', GREP_DEFAULTS.MAX_REDIRECTS.toString(),
132
149
  '--fail-with-body', // Return body even on HTTP errors
133
150
  '--compressed', // Accept compressed responses
134
151
  ];
@@ -151,15 +168,15 @@ async function downloadAndGrep(url, searchPatterns, userAgent = '', grepOptions
151
168
  // Download content with curl
152
169
  const curlResult = spawnSync('curl', curlArgs, {
153
170
  encoding: 'utf8',
154
- timeout: timeout * 1000,
155
- maxBuffer: 10 * 1024 * 1024 // 10MB max buffer
171
+ timeout: timeout * GREP_DEFAULTS.SPAWN_TIMEOUT_MULTIPLIER,
172
+ maxBuffer: GREP_DEFAULTS.MAX_SIZE_BYTES
156
173
  });
157
174
 
158
175
  if (curlResult.error) {
159
176
  throw curlResult.error;
160
177
  }
161
178
 
162
- if (curlResult.status !== 0) {
179
+ if (curlResult.status !== GREP_DEFAULTS.CURL_SUCCESS_STATUS) {
163
180
  throw new Error(`Curl exited with status ${curlResult.status}: ${curlResult.stderr}`);
164
181
  }
165
182
 
@@ -191,6 +208,9 @@ function createGrepHandler(config) {
191
208
  searchStrings,
192
209
  regexes,
193
210
  matchedDomains,
211
+ addMatchedDomain,
212
+ isDomainAlreadyDetected,
213
+ onContentFetched,
194
214
  currentUrl,
195
215
  perSiteSubDomains,
196
216
  ignoreDomains,
@@ -201,13 +221,23 @@ function createGrepHandler(config) {
201
221
  matchedUrlsLogFile,
202
222
  forceDebug,
203
223
  userAgent,
224
+ resourceType,
204
225
  hasSearchString,
205
226
  grepOptions = {}
206
227
  } = config;
207
228
 
208
229
  return async function grepHandler(requestUrl) {
209
230
  const respDomain = perSiteSubDomains ? (new URL(requestUrl)).hostname : getRootDomain(requestUrl);
231
+ const fullSubdomain = (new URL(requestUrl)).hostname; // Always get full subdomain for cache tracking
210
232
 
233
+ // Skip if already detected to avoid duplicates
234
+ if (isDomainAlreadyDetected(fullSubdomain)) {
235
+ if (forceDebug) {
236
+ console.log(formatLogMessage('debug', `[grep] Skipping already detected subdomain: ${fullSubdomain}`));
237
+ }
238
+ return;
239
+ }
240
+
211
241
  // Only process URLs that match our regex patterns
212
242
  const matchesRegex = regexes.some(re => re.test(requestUrl));
213
243
  if (!matchesRegex) return;
@@ -243,7 +273,7 @@ function createGrepHandler(config) {
243
273
  return;
244
274
  }
245
275
 
246
- matchedDomains.add(respDomain);
276
+ addMatchedDomain(respDomain, resourceType, fullSubdomain);
247
277
  const simplifiedUrl = getRootDomain(currentUrl);
248
278
 
249
279
  if (siteConfig.verbose === 1) {
@@ -265,14 +295,23 @@ function createGrepHandler(config) {
265
295
  }
266
296
 
267
297
  // If searchstring IS defined, download and grep content
268
- const result = await downloadAndGrep(requestUrl, searchStrings, userAgent, grepOptions, 30);
269
-
298
+ const result = await downloadAndGrep(requestUrl, searchStrings, userAgent, grepOptions, GREP_DEFAULTS.TIMEOUT_SECONDS);
299
+
300
+ // Cache the fetched content if callback provided
301
+ if (onContentFetched && result.content) {
302
+ try {
303
+ onContentFetched(requestUrl, result.content);
304
+ } catch (cacheErr) {
305
+ if (forceDebug) console.log(formatLogMessage('debug', `[grep] Content caching failed: ${cacheErr.message}`));
306
+ }
307
+ }
308
+
270
309
  if (result.found) {
271
310
  if (!respDomain || matchesIgnoreDomain(respDomain, ignoreDomains)) {
272
311
  return;
273
312
  }
274
313
 
275
- matchedDomains.add(respDomain);
314
+ addMatchedDomain(respDomain, resourceType, fullSubdomain);
276
315
  const simplifiedUrl = getRootDomain(currentUrl);
277
316
 
278
317
  if (siteConfig.verbose === 1) {
@@ -313,11 +352,11 @@ function validateGrepAvailability() {
313
352
  try {
314
353
  const result = spawnSync('grep', ['--version'], {
315
354
  encoding: 'utf8',
316
- timeout: 5000
355
+ timeout: GREP_DEFAULTS.VALIDATION_TIMEOUT
317
356
  });
318
357
 
319
- if (result.status === 0) {
320
- const version = result.stdout.split('\n')[0] || 'Unknown version';
358
+ if (result.status === GREP_DEFAULTS.GREP_SUCCESS_STATUS) {
359
+ const version = result.stdout.split('\n')[GREP_DEFAULTS.VERSION_LINE_INDEX] || 'Unknown version';
321
360
  return {
322
361
  isAvailable: true,
323
362
  version: version.trim(),
package/nwss.js CHANGED
@@ -1,4 +1,4 @@
1
- // === Network scanner script (nwss.js) v1.0.87 ===
1
+ // === Network scanner script (nwss.js) v1.0.88 ===
2
2
 
3
3
  // puppeteer for browser automation, fs for file system operations, psl for domain parsing.
4
4
  // const pLimit = require('p-limit'); // Will be dynamically imported
@@ -11,6 +11,8 @@ const { compressMultipleFiles, formatFileSize } = require('./lib/compress');
11
11
  const { parseSearchStrings, createResponseHandler, createCurlHandler } = require('./lib/searchstring');
12
12
  const { applyAllFingerprintSpoofing } = require('./lib/fingerprint');
13
13
  const { formatRules, handleOutput, getFormatDescription } = require('./lib/output');
14
+ // Curl functionality (replace searchstring curl handler)
15
+ const { validateCurlAvailability, createCurlHandler: createCurlModuleHandler } = require('./lib/curl');
14
16
  // Rule validation
15
17
  const { validateRulesetFile, validateFullConfig, testDomainValidation, cleanRulesetFile } = require('./lib/validate_rules');
16
18
  // CF Bypass
@@ -123,7 +125,7 @@ const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/r
123
125
  const { monitorBrowserHealth, isBrowserHealthy, isQuicklyResponsive } = require('./lib/browserhealth');
124
126
 
125
127
  // --- Script Configuration & Constants ---
126
- const VERSION = '1.0.87'; // Script version
128
+ const VERSION = '1.0.88'; // Script version
127
129
 
128
130
  // get startTime
129
131
  const startTime = Date.now();
@@ -1825,6 +1827,18 @@ function setupFrameHandling(page, forceDebug) {
1825
1827
  console.log(formatLogMessage('debug', `Using grep: ${grepCheck.version}`));
1826
1828
  }
1827
1829
  }
1830
+
1831
+ // Validate curl availability if needed
1832
+ if (useCurl) {
1833
+ const curlCheck = validateCurlAvailability();
1834
+ if (!curlCheck.isAvailable) {
1835
+ console.warn(formatLogMessage('warn', `Curl not available for ${currentUrl}: ${curlCheck.error}. Skipping curl-based analysis.`));
1836
+ useCurl = false;
1837
+ useGrep = false; // Grep requires curl
1838
+ } else if (forceDebug) {
1839
+ console.log(formatLogMessage('debug', `Using curl: ${curlCheck.version}`));
1840
+ }
1841
+ }
1828
1842
 
1829
1843
  // Parse whois and dig terms
1830
1844
  const whoisTerms = siteConfig.whois && Array.isArray(siteConfig.whois) ? siteConfig.whois : null;
@@ -2473,9 +2487,9 @@ function setupFrameHandling(page, forceDebug) {
2473
2487
  // Use grep handler if both grep and searchstring/searchstring_and are enabled
2474
2488
  if (useGrep && (hasSearchString || hasSearchStringAnd)) {
2475
2489
  const grepHandler = createGrepHandler({
2476
- searchStrings,
2477
- searchStringsAnd,
2478
2490
  regexes,
2491
+ searchStrings,
2492
+ searchStringsAnd,
2479
2493
  matchedDomains,
2480
2494
  addMatchedDomain, // Pass the helper function
2481
2495
  isDomainAlreadyDetected,
@@ -2496,8 +2510,7 @@ function setupFrameHandling(page, forceDebug) {
2496
2510
  forceDebug,
2497
2511
  userAgent: curlUserAgent,
2498
2512
  resourceType,
2499
- hasSearchString,
2500
- hasSearchStringAnd,
2513
+ hasSearchString: hasSearchString || hasSearchStringAnd,
2501
2514
  grepOptions: {
2502
2515
  ignoreCase: true,
2503
2516
  wholeWord: false,
@@ -2508,20 +2521,20 @@ function setupFrameHandling(page, forceDebug) {
2508
2521
  setImmediate(() => grepHandler(reqUrl));
2509
2522
  } else {
2510
2523
  // Use regular curl handler
2511
- const curlHandler = createCurlHandler({
2524
+ const curlHandlerFromCurlModule = createCurlModuleHandler({
2512
2525
  searchStrings,
2513
2526
  searchStringsAnd,
2514
2527
  hasSearchStringAnd,
2515
2528
  regexes,
2516
2529
  matchedDomains,
2517
- addMatchedDomain, // Pass the helper function
2530
+ addMatchedDomain,
2518
2531
  isDomainAlreadyDetected,
2519
- onContentFetched: smartCache && !ignoreCache ? (url, content) => {
2520
- // Only cache if not bypassing cache
2521
- if (!shouldBypassCacheForUrl(url, siteConfig)) {
2522
- smartCache.cacheRequest(url, { method: 'GET', siteConfig }, { body: content, status: 200 });
2523
- }
2524
- } : undefined,
2532
+ onContentFetched: smartCache && !ignoreCache ? (url, content) => {
2533
+ // Only cache if not bypassing cache
2534
+ if (!shouldBypassCacheForUrl(url, siteConfig)) {
2535
+ smartCache.cacheRequest(url, { method: 'GET', siteConfig }, { body: content, status: 200 });
2536
+ }
2537
+ } : undefined,
2525
2538
  currentUrl,
2526
2539
  perSiteSubDomains,
2527
2540
  ignoreDomains,
@@ -2533,10 +2546,10 @@ function setupFrameHandling(page, forceDebug) {
2533
2546
  forceDebug,
2534
2547
  userAgent: curlUserAgent,
2535
2548
  resourceType,
2536
- hasSearchString
2549
+ hasSearchString: hasSearchString || hasSearchStringAnd
2537
2550
  });
2538
2551
 
2539
- setImmediate(() => curlHandler(reqUrl));
2552
+ setImmediate(() => curlHandlerFromCurlModule(reqUrl));
2540
2553
  }
2541
2554
  } catch (curlErr) {
2542
2555
  if (forceDebug) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fanboynz/network-scanner",
3
- "version": "1.0.87",
3
+ "version": "1.0.88",
4
4
  "description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
5
5
  "main": "nwss.js",
6
6
  "scripts": {