npm - @fanboynz/network-scanner - Versions diffs - 1.0.87 → 1.0.89 - Mend

@fanboynz/network-scanner 1.0.87 → 1.0.89

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/lib/curl.js ADDED Viewed

@@ -0,0 +1,442 @@
+// === curl.js - Curl-based Content Download Module ===
+// Handles HTTP content downloading using curl for searchstring analysis
+const fs = require('fs');
+const { spawnSync } = require('child_process');
+const { colorize, colors, messageColors, tags, formatLogMessage } = require('./colorize');
+// === Constants ===
+const CURL_DEFAULTS = {
+  TIMEOUT_SECONDS: 30,
+  MAX_REDIRECTS: 5,
+  MAX_SIZE_BYTES: 10 * 1024 * 1024, // 10MB
+  VALIDATION_TIMEOUT: 5000, // 5 seconds
+  SPAWN_TIMEOUT_MULTIPLIER: 1000, // Convert seconds to milliseconds
+  HTTP_SUCCESS_CODE: 200,
+  CURL_SUCCESS_STATUS: 0,
+  METADATA_PIPE_PARTS: 3, // http_code|content_type|size_download
+  VERSION_LINE_INDEX: 0
+};
+/**
+ * Downloads content using curl with browser-like headers
+ * @param {string} url - The URL to download
+ * @param {string} userAgent - User agent string to use
+ * @param {object} options - Download options
+ * @returns {Promise<object>} Object with content, status, and metadata
+ */
+async function downloadWithCurl(url, userAgent = '', options = {}) {
+  const {
+    timeout = CURL_DEFAULTS.TIMEOUT_SECONDS,
+    maxRedirects = CURL_DEFAULTS.MAX_REDIRECTS,
+    maxSize = CURL_DEFAULTS.MAX_SIZE_BYTES,
+    followRedirects = true,
+    customHeaders = {}
+  } = options;
+  try {
+    const curlArgs = [
+      '-s', // Silent mode
+      '--max-time', timeout.toString(),
+      '--max-redirs', maxRedirects.toString(),
+      '--fail-with-body', // Return body even on HTTP errors
+      '--compressed', // Accept compressed responses
+      '--write-out', '%{http_code}|%{content_type}|%{size_download}', // Output metadata
+    ];
+    if (followRedirects) {
+      curlArgs.push('-L'); // Follow redirects
+    }
+    // Add user agent if provided
+    if (userAgent) {
+      curlArgs.push('-H', `User-Agent: ${userAgent}`);
+    }
+    // Add common browser headers
+    curlArgs.push(
+      '-H', 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+      '-H', 'Accept-Language: en-US,en;q=0.5',
+      '-H', 'Accept-Encoding: gzip, deflate, br',
+      '-H', 'Connection: keep-alive',
+      '-H', 'Upgrade-Insecure-Requests: 1',
+      '-H', 'Sec-Fetch-Dest: document',
+      '-H', 'Sec-Fetch-Mode: navigate',
+      '-H', 'Sec-Fetch-Site: none',
+      '-H', 'Cache-Control: no-cache'
+    );
+    // Add custom headers
+    Object.entries(customHeaders).forEach(([key, value]) => {
+      curlArgs.push('-H', `${key}: ${value}`);
+    });
+    curlArgs.push(url);
+    // Execute curl
+    const curlResult = spawnSync('curl', curlArgs, {
+      encoding: 'utf8',
+      timeout: timeout * CURL_DEFAULTS.SPAWN_TIMEOUT_MULTIPLIER,
+      maxBuffer: maxSize
+    });
+    if (curlResult.error) {
+      throw curlResult.error;
+    }
+    if (curlResult.status !== CURL_DEFAULTS.CURL_SUCCESS_STATUS) {
+      throw new Error(`Curl exited with status ${curlResult.status}: ${curlResult.stderr}`);
+    }
+    const output = curlResult.stdout;
+    const lines = output.split('\n');
+    const metadata = lines[lines.length - 1]; // Last line contains write-out data
+    const content = lines.slice(0, -1).join('\n'); // Everything except last line
+    // Parse metadata
+    const metadataParts = metadata.split('|');
+    if (metadataParts.length !== CURL_DEFAULTS.METADATA_PIPE_PARTS) {
+      throw new Error(`Invalid metadata format: expected ${CURL_DEFAULTS.METADATA_PIPE_PARTS} parts, got ${metadataParts.length}`);
+    }
+    const [httpCode, contentType, downloadSize] = metadataParts;
+    return {
+      content,
+      httpCode: parseInt(httpCode) || 0,
+      contentType: contentType || 'unknown',
+      downloadSize: parseInt(downloadSize) || content.length,
+      success: true
+    };
+  } catch (error) {
+    return {
+      content: '',
+      httpCode: 0,
+      contentType: 'unknown',
+      downloadSize: 0,
+      success: false,
+      error: error.message
+    };
+  }
+}
+/**
+ * Searches content for patterns using JavaScript (case-insensitive)
+ * @param {string} content - Content to search
+ * @param {Array<string>} searchStrings - OR patterns (any can match)
+ * @param {Array<string>} searchStringsAnd - AND patterns (all must match)
+ * @param {boolean} hasSearchStringAnd - Whether AND logic is being used
+ * @returns {object} Search result with found status and matched pattern
+ */
+function searchContent(content, searchStrings = [], searchStringsAnd = [], hasSearchStringAnd = false) {
+  if (!content || content.length === 0) {
+    return { found: false, matchedPattern: null, matchType: null };
+  }
+  const lowerContent = content.toLowerCase();
+  // Handle AND logic searchstring_and (all patterns must be present)
+  if (hasSearchStringAnd && searchStringsAnd.length > 0) {
+    const missingPatterns = [];
+    const foundPatterns = [];
+    for (const pattern of searchStringsAnd) {
+      const lowerPattern = pattern.toLowerCase();
+      if (lowerContent.includes(lowerPattern)) {
+        foundPatterns.push(pattern);
+      } else {
+        missingPatterns.push(pattern);
+      }
+    }
+    // All patterns must be found for AND logic
+    if (missingPatterns.length === 0) {
+      return {
+        found: true,
+        matchedPattern: foundPatterns.join(' AND '),
+        matchType: 'AND',
+        foundPatterns,
+        missingPatterns: []
+      };
+    } else {
+      return {
+        found: false,
+        matchedPattern: null,
+        matchType: 'AND',
+        foundPatterns,
+        missingPatterns
+      };
+    }
+  }
+  // Handle OR logic searchstring (any pattern can match)
+  if (searchStrings.length > 0) {
+    for (const pattern of searchStrings) {
+      const lowerPattern = pattern.toLowerCase();
+      if (lowerContent.includes(lowerPattern)) {
+        return {
+          found: true,
+          matchedPattern: pattern,
+          matchType: 'OR'
+        };
+      }
+    }
+  }
+  return { found: false, matchedPattern: null, matchType: null };
+}
+/**
+ * Creates a curl-based URL handler for downloading and searching content
+ * @param {object} config - Configuration object containing all necessary parameters
+ * @returns {Function} URL handler function for curl-based content analysis
+ */
+function createCurlHandler(config) {
+  const {
+    searchStrings,
+    searchStringsAnd,
+    hasSearchStringAnd,
+    regexes,
+    matchedDomains,
+    addMatchedDomain,
+    isDomainAlreadyDetected,
+    onContentFetched,
+    currentUrl,
+    perSiteSubDomains,
+    ignoreDomains,
+    matchesIgnoreDomain,
+    getRootDomain,
+    siteConfig,
+    dumpUrls,
+    matchedUrlsLogFile,
+    forceDebug,
+    userAgent,
+    resourceType,
+    hasSearchString
+  } = config;
+  return async function curlHandler(requestUrl) {
+    try {
+      const respDomain = perSiteSubDomains ? (new URL(requestUrl)).hostname : getRootDomain(requestUrl);
+      const fullSubdomain = (new URL(requestUrl)).hostname; // Always get full subdomain for cache tracking
+      // Skip if already detected to avoid duplicates
+      if (isDomainAlreadyDetected(fullSubdomain)) {
+        if (forceDebug) {
+          console.log(formatLogMessage('debug', `[curl] Skipping already detected subdomain: ${fullSubdomain}`));
+        }
+        return;
+      }
+      // Only process URLs that match our regex patterns
+      const matchesRegex = regexes.some(re => re.test(requestUrl));
+      if (!matchesRegex) {
+        if (forceDebug) {
+          console.log(formatLogMessage('debug', `[curl] URL ${requestUrl} doesn't match any regex patterns`));
+        }
+        return;
+      }
+      // Check if this is a first-party request (same domain as the URL being scanned)
+      const currentUrlHostname = new URL(currentUrl).hostname;
+      const requestHostname = new URL(requestUrl).hostname;
+      const currentRootDomain = getRootDomain(currentUrl);
+      const requestRootDomain = getRootDomain(requestUrl);
+      const isFirstParty = currentRootDomain === requestRootDomain;
+      // Apply first-party/third-party filtering
+      if (isFirstParty && (siteConfig.firstParty === false || siteConfig.firstParty === 0)) {
+        if (forceDebug) {
+          console.log(formatLogMessage('debug', `[curl] Skipping first-party request (firstParty disabled): ${requestUrl}`));
+        }
+        return;
+      }
+      if (!isFirstParty && (siteConfig.thirdParty === false || siteConfig.thirdParty === 0)) {
+        if (forceDebug) {
+          console.log(formatLogMessage('debug', `[curl] Skipping third-party request (thirdParty disabled): ${requestUrl}`));
+        }
+        return;
+      }
+      if (forceDebug) {
+        console.log(formatLogMessage('debug', `[curl] Processing ${isFirstParty ? 'first-party' : 'third-party'} request: ${requestUrl}`));
+      }
+      // If NO searchstring is defined, match immediately (like browser behavior)
+      if (!hasSearchString || ((!searchStrings || !searchStrings.length) && (!searchStringsAnd || !searchStringsAnd.length))) {
+        if (!respDomain || matchesIgnoreDomain(respDomain, ignoreDomains)) {
+          if (forceDebug) {
+            console.log(formatLogMessage('debug', `[curl] Domain ${respDomain} is in ignore list`));
+          }
+          return;
+        }
+        addMatchedDomain(respDomain, resourceType, fullSubdomain);
+        const simplifiedUrl = getRootDomain(currentUrl);
+        if (siteConfig.verbose === 1) {
+          const partyType = isFirstParty ? 'first-party' : 'third-party';
+          const resourceInfo = resourceType ? ` (${resourceType})` : '';
+          console.log(formatLogMessage('match', `[${simplifiedUrl}] ${requestUrl} (${partyType}, curl) matched regex${resourceInfo}`));
+        }
+        if (dumpUrls && matchedUrlsLogFile) {
+          const timestamp = new Date().toISOString();
+          const partyType = isFirstParty ? 'first-party' : 'third-party';
+          const resourceInfo = resourceType ? ` (${resourceType})` : '';
+          try {
+            fs.appendFileSync(matchedUrlsLogFile,
+              `${timestamp} [match][${simplifiedUrl}] ${requestUrl} (${partyType}, curl)${resourceInfo}\n`);
+          } catch (logErr) {
+            console.warn(formatLogMessage('warn', `Failed to write to matched URLs log: ${logErr.message}`));
+          }
+        }
+        return;
+      }
+      // If searchstring IS defined, download and search content
+      if (hasSearchString && ((searchStrings && searchStrings.length > 0) || (searchStringsAnd && searchStringsAnd.length > 0)) && forceDebug) {
+        console.log(formatLogMessage('debug', `[curl] Downloading content for pattern matching: ${requestUrl}`));
+      }
+      // Prepare custom headers from site config
+      const customHeaders = siteConfig.custom_headers || {};
+      if (siteConfig.referrer_headers) {
+        const referrerUrl = Array.isArray(siteConfig.referrer_headers)
+          ? siteConfig.referrer_headers[Math.floor(Math.random() * siteConfig.referrer_headers.length)]
+          : siteConfig.referrer_headers;
+        if (typeof referrerUrl === 'string' && referrerUrl.startsWith('http')) {
+          customHeaders['Referer'] = referrerUrl;
+        }
+      }
+      const downloadResult = await downloadWithCurl(requestUrl, userAgent, {
+        timeout: CURL_DEFAULTS.TIMEOUT_SECONDS,
+        maxRedirects: CURL_DEFAULTS.MAX_REDIRECTS,
+        customHeaders
+      });
+      if (!downloadResult.success) {
+        if (forceDebug) {
+          console.log(formatLogMessage('debug', `[curl] Failed to download ${requestUrl}: ${downloadResult.error}`));
+        }
+        return;
+      }
+      // Cache the fetched content if callback provided
+      if (onContentFetched) {
+        try {
+          onContentFetched(requestUrl, downloadResult.content);
+        } catch (cacheErr) {
+          if (forceDebug) {
+            console.log(formatLogMessage('debug', `[curl] Content caching failed: ${cacheErr.message}`));
+          }
+        }
+      }
+      // Search content for patterns
+      const searchResult = searchContent(
+        downloadResult.content,
+        searchStrings,
+        searchStringsAnd,
+        hasSearchStringAnd
+      );
+      if (searchResult.found) {
+        if (!respDomain || matchesIgnoreDomain(respDomain, ignoreDomains)) {
+          if (forceDebug) {
+            console.log(formatLogMessage('debug', `[curl] Domain ${respDomain} matches but is in ignore list`));
+          }
+          return;
+        }
+        addMatchedDomain(respDomain, resourceType, fullSubdomain);
+        const simplifiedUrl = getRootDomain(currentUrl);
+        if (siteConfig.verbose === 1) {
+          const partyType = isFirstParty ? 'first-party' : 'third-party';
+          const resourceInfo = resourceType ? ` (${resourceType})` : '';
+          const matchInfo = searchResult.matchType === 'AND'
+            ? `patterns: ${searchResult.foundPatterns.length}/${searchStringsAnd.length}`
+            : `pattern: "${searchResult.matchedPattern}"`;
+          console.log(formatLogMessage('match',
+            `[${simplifiedUrl}] ${requestUrl} (${partyType}, curl) contains ${matchInfo}${resourceInfo}`));
+        }
+        if (dumpUrls && matchedUrlsLogFile) {
+          const timestamp = new Date().toISOString();
+          const partyType = isFirstParty ? 'first-party' : 'third-party';
+          const resourceInfo = resourceType ? ` (${resourceType})` : '';
+          const matchInfo = searchResult.matchType === 'AND'
+            ? `patterns: ${searchResult.foundPatterns.length}/${searchStringsAnd.length}`
+            : `pattern: "${searchResult.matchedPattern}"`;
+          try {
+            fs.appendFileSync(matchedUrlsLogFile,
+              `${timestamp} [match][${simplifiedUrl}] ${requestUrl} (${partyType}, curl, ${matchInfo})${resourceInfo}\n`);
+          } catch (logErr) {
+            console.warn(formatLogMessage('warn', `Failed to write to matched URLs log: ${logErr.message}`));
+          }
+        }
+      } else {
+        if (forceDebug) {
+          const partyType = isFirstParty ? 'first-party' : 'third-party';
+          if (searchResult.matchType === 'AND' && searchResult.missingPatterns) {
+            console.log(formatLogMessage('debug',
+              `[curl] ${requestUrl} (${partyType}) matched regex but missing AND patterns: ${searchResult.missingPatterns.join(', ')}`));
+          } else {
+            console.log(formatLogMessage('debug',
+              `[curl] ${requestUrl} (${partyType}) matched regex but no search patterns found`));
+          }
+        }
+      }
+    } catch (err) {
+      if (forceDebug) {
+        console.log(formatLogMessage('debug', `[curl] Handler failed for ${requestUrl}: ${err.message}`));
+      }
+    }
+  };
+}
+/**
+ * Validates that curl is available on the system
+ * @returns {object} Validation result with isAvailable boolean and version info
+ */
+function validateCurlAvailability() {
+  try {
+    const result = spawnSync('curl', ['--version'], {
+      encoding: 'utf8',
+      timeout: CURL_DEFAULTS.VALIDATION_TIMEOUT
+    });
+    if (result.status === CURL_DEFAULTS.CURL_SUCCESS_STATUS) {
+      const version = result.stdout.split('\n')[CURL_DEFAULTS.VERSION_LINE_INDEX] || 'Unknown version';
+      return {
+        isAvailable: true,
+        version: version.trim(),
+        error: null
+      };
+    } else {
+      return {
+        isAvailable: false,
+        version: null,
+        error: 'curl command failed'
+      };
+    }
+  } catch (error) {
+    return {
+      isAvailable: false,
+      version: null,
+      error: `curl not found: ${error.message}`
+    };
+  }
+}
+module.exports = {
+  downloadWithCurl,
+  searchContent,
+  createCurlHandler,
+  validateCurlAvailability
+};

package/lib/grep.js CHANGED Viewed

@@ -7,6 +7,23 @@ const path = require('path');
 const os = require('os');
 const { colorize, colors, messageColors, tags, formatLogMessage } = require('./colorize');
+// === Constants ===
+const GREP_DEFAULTS = {
+  TIMEOUT_SECONDS: 30,
+  MAX_REDIRECTS: 5,
+  MAX_SIZE_BYTES: 10 * 1024 * 1024, // 10MB
+  VALIDATION_TIMEOUT: 5000, // 5 seconds
+  SPAWN_TIMEOUT_MULTIPLIER: 1000, // Convert seconds to milliseconds
+  GREP_TIMEOUT: 10000, // 10 seconds for grep operations
+  MAX_BUFFER_SIZE: 1024 * 1024, // 1MB max buffer
+  DEFAULT_MAX_MATCHES: 1000,
+  GREP_SUCCESS_STATUS: 0,
+  GREP_NOT_FOUND_STATUS: 1,
+  CURL_SUCCESS_STATUS: 0,
+  VERSION_LINE_INDEX: 0,
+  RANDOM_STRING_LENGTH: 9
+};
 /**
  * Creates a temporary file with content for grep processing
  * @param {string} content - The content to write to temp file
@@ -15,7 +32,7 @@ const { colorize, colors, messageColors, tags, formatLogMessage } = require('./c
  */
 function createTempFile(content, prefix = 'scanner_grep') {
   const tempDir = os.tmpdir();
-  const tempFile = path.join(tempDir, `${prefix}_${Date.now()}_${Math.random().toString(36).substr(2, 9)}.tmp`);
+  const tempFile = path.join(tempDir, `${prefix}_${Date.now()}_${Math.random().toString(36).substr(2, GREP_DEFAULTS.RANDOM_STRING_LENGTH)}.tmp`);
   try {
     fs.writeFileSync(tempFile, content, 'utf8');
@@ -37,7 +54,7 @@ async function grepContent(content, searchPatterns, options = {}) {
     ignoreCase = true,
     wholeWord = false,
     regex = false,
-    maxMatches = 1000
+    maxMatches = GREP_DEFAULTS.DEFAULT_MAX_MATCHES
   } = options;
   if (!content || searchPatterns.length === 0) {
@@ -71,12 +88,12 @@ async function grepContent(content, searchPatterns, options = {}) {
       try {
         const result = spawnSync('grep', grepArgs, {
           encoding: 'utf8',
-          timeout: 10000, // 10 second timeout
-          maxBuffer: 1024 * 1024 // 1MB max buffer
+          timeout: GREP_DEFAULTS.GREP_TIMEOUT,
+          maxBuffer: GREP_DEFAULTS.MAX_BUFFER_SIZE
         });
         // grep returns 0 if found, 1 if not found, 2+ for errors
-        if (result.status === 0 && result.stdout) {
+        if (result.status === GREP_DEFAULTS.GREP_SUCCESS_STATUS && result.stdout) {
           allMatches.push({
             pattern: pattern,
             matches: result.stdout.split('\n').filter(line => line.trim().length > 0).slice(0, maxMatches)
@@ -122,13 +139,13 @@ async function grepContent(content, searchPatterns, options = {}) {
  * @param {number} timeout - Timeout in seconds (default: 30)
  * @returns {Promise<object>} Object with found boolean, matchedPattern, and content
  */
-async function downloadAndGrep(url, searchPatterns, userAgent = '', grepOptions = {}, timeout = 30) {
+async function downloadAndGrep(url, searchPatterns, userAgent = '', grepOptions = {}, timeout = GREP_DEFAULTS.TIMEOUT_SECONDS) {
   try {
     const curlArgs = [
       '-s', // Silent mode
       '-L', // Follow redirects
       '--max-time', timeout.toString(),
-      '--max-redirs', '5',
+      '--max-redirs', GREP_DEFAULTS.MAX_REDIRECTS.toString(),
       '--fail-with-body', // Return body even on HTTP errors
       '--compressed', // Accept compressed responses
     ];
@@ -151,15 +168,15 @@ async function downloadAndGrep(url, searchPatterns, userAgent = '', grepOptions
     // Download content with curl
     const curlResult = spawnSync('curl', curlArgs, {
       encoding: 'utf8',
-      timeout: timeout * 1000,
-      maxBuffer: 10 * 1024 * 1024 // 10MB max buffer
+      timeout: timeout * GREP_DEFAULTS.SPAWN_TIMEOUT_MULTIPLIER,
+      maxBuffer: GREP_DEFAULTS.MAX_SIZE_BYTES
     });
     if (curlResult.error) {
       throw curlResult.error;
     }
-    if (curlResult.status !== 0) {
+    if (curlResult.status !== GREP_DEFAULTS.CURL_SUCCESS_STATUS) {
       throw new Error(`Curl exited with status ${curlResult.status}: ${curlResult.stderr}`);
     }
@@ -191,6 +208,9 @@ function createGrepHandler(config) {
     searchStrings,
     regexes,
     matchedDomains,
+    addMatchedDomain,
+    isDomainAlreadyDetected,
+    onContentFetched,
     currentUrl,
     perSiteSubDomains,
     ignoreDomains,
@@ -201,13 +221,23 @@ function createGrepHandler(config) {
     matchedUrlsLogFile,
     forceDebug,
     userAgent,
+    resourceType,
     hasSearchString,
     grepOptions = {}
   } = config;
   return async function grepHandler(requestUrl) {
     const respDomain = perSiteSubDomains ? (new URL(requestUrl)).hostname : getRootDomain(requestUrl);
+    const fullSubdomain = (new URL(requestUrl)).hostname; // Always get full subdomain for cache tracking
+    // Skip if already detected to avoid duplicates
+    if (isDomainAlreadyDetected(fullSubdomain)) {
+      if (forceDebug) {
+        console.log(formatLogMessage('debug', `[grep] Skipping already detected subdomain: ${fullSubdomain}`));
+      }
+      return;
+    }
     // Only process URLs that match our regex patterns
     const matchesRegex = regexes.some(re => re.test(requestUrl));
     if (!matchesRegex) return;
@@ -243,7 +273,7 @@ function createGrepHandler(config) {
           return;
         }
-        matchedDomains.add(respDomain);
+        addMatchedDomain(respDomain, resourceType, fullSubdomain);
         const simplifiedUrl = getRootDomain(currentUrl);
         if (siteConfig.verbose === 1) {
@@ -265,14 +295,23 @@ function createGrepHandler(config) {
       }
       // If searchstring IS defined, download and grep content
-      const result = await downloadAndGrep(requestUrl, searchStrings, userAgent, grepOptions, 30);
+      const result = await downloadAndGrep(requestUrl, searchStrings, userAgent, grepOptions, GREP_DEFAULTS.TIMEOUT_SECONDS);
+      // Cache the fetched content if callback provided
+      if (onContentFetched && result.content) {
+        try {
+          onContentFetched(requestUrl, result.content);
+        } catch (cacheErr) {
+          if (forceDebug) console.log(formatLogMessage('debug', `[grep] Content caching failed: ${cacheErr.message}`));
+        }
+      }
       if (result.found) {
         if (!respDomain || matchesIgnoreDomain(respDomain, ignoreDomains)) {
           return;
         }
-        matchedDomains.add(respDomain);
+        addMatchedDomain(respDomain, resourceType, fullSubdomain);
         const simplifiedUrl = getRootDomain(currentUrl);
         if (siteConfig.verbose === 1) {
@@ -313,11 +352,11 @@ function validateGrepAvailability() {
   try {
     const result = spawnSync('grep', ['--version'], {
       encoding: 'utf8',
-      timeout: 5000
+      timeout: GREP_DEFAULTS.VALIDATION_TIMEOUT
     });
-    if (result.status === 0) {
-      const version = result.stdout.split('\n')[0] || 'Unknown version';
+    if (result.status === GREP_DEFAULTS.GREP_SUCCESS_STATUS) {
+      const version = result.stdout.split('\n')[GREP_DEFAULTS.VERSION_LINE_INDEX] || 'Unknown version';
       return {
         isAvailable: true,
         version: version.trim(),

package/nwss.js CHANGED Viewed

@@ -1,4 +1,4 @@
-// === Network scanner script (nwss.js) v1.0.87 ===
+// === Network scanner script (nwss.js) v1.0.89 ===
 // puppeteer for browser automation, fs for file system operations, psl for domain parsing.
 // const pLimit = require('p-limit'); // Will be dynamically imported
@@ -11,6 +11,8 @@ const { compressMultipleFiles, formatFileSize } = require('./lib/compress');
 const { parseSearchStrings, createResponseHandler, createCurlHandler } = require('./lib/searchstring');
 const { applyAllFingerprintSpoofing } = require('./lib/fingerprint');
 const { formatRules, handleOutput, getFormatDescription } = require('./lib/output');
+// Curl functionality (replace searchstring curl handler)
+const { validateCurlAvailability, createCurlHandler: createCurlModuleHandler } = require('./lib/curl');
 // Rule validation
 const { validateRulesetFile, validateFullConfig, testDomainValidation, cleanRulesetFile } = require('./lib/validate_rules');
 // CF Bypass
@@ -123,7 +125,7 @@ const { navigateWithRedirectHandling, handleRedirectTimeout } = require('./lib/r
 const { monitorBrowserHealth, isBrowserHealthy, isQuicklyResponsive } = require('./lib/browserhealth');
 // --- Script Configuration & Constants ---
-const VERSION = '1.0.87'; // Script version
+const VERSION = '1.0.89'; // Script version
 // get startTime
 const startTime = Date.now();
@@ -1458,21 +1460,7 @@ function setupFrameHandling(page, forceDebug) {
     ];
     try {
-      // Health check before creating new page
-      const isHealthy = await isBrowserHealthy(browserInstance);
-      if (!isHealthy) {
-        if (forceDebug) {
-          console.log(formatLogMessage('debug', `Browser health degraded before processing ${currentUrl} - forcing immediate restart`));
-        }
-        // Return special code to trigger immediate browser restart
-        return {
-          url: currentUrl,
-          rules: [],
-          success: false,
-          needsImmediateRestart: true,
-          error: 'Browser health degraded - restart required'
-        };
-      }
       // Check for Protocol timeout errors that indicate browser is broken
       if (browserInstance.process() && browserInstance.process().killed) {
         throw new Error('Browser process was killed - restart required');
@@ -1484,22 +1472,7 @@ function setupFrameHandling(page, forceDebug) {
         throw new Error('Failed to create valid page instance');
       }
-      // Additional health check after page creation but before critical setup
-      const stillHealthy = await isQuicklyResponsive(browserInstance, 3000);
-      if (!stillHealthy) {
-        if (forceDebug) {
-          console.log(formatLogMessage('debug', `Browser unresponsive during page setup for ${currentUrl} - triggering restart`));
-        }
-        return {
-          url: currentUrl,
-          rules: [],
-          success: false,
-          needsImmediateRestart: true,
-          error: 'Browser became unresponsive during page setup - restart required'
-        };
-      }
       // Set aggressive timeouts for problematic operations
       // Optimized timeouts for Puppeteer 23.x responsiveness
       page.setDefaultTimeout(Math.min(timeout, TIMEOUTS.DEFAULT_PAGE_REDUCED));
@@ -1580,6 +1553,8 @@ function setupFrameHandling(page, forceDebug) {
       // --- START: evaluateOnNewDocument for Fetch/XHR Interception (Moved and Fixed) ---
       // This script is injected if --eval-on-doc is used or siteConfig.evaluateOnNewDocument is true.
       const shouldInjectEvalForPage = siteConfig.evaluateOnNewDocument === true || globalEvalOnDoc;
+      let evalOnDocSuccess = false; // Track injection success for fallback logic
       if (shouldInjectEvalForPage) {
           if (forceDebug) {
               if (globalEvalOnDoc) {
@@ -1588,24 +1563,29 @@ function setupFrameHandling(page, forceDebug) {
                   console.log(formatLogMessage('debug', `[evalOnDoc] Site-specific Fetch/XHR interception enabled for: ${currentUrl}`));
               }
           }
-          // Quick browser health check before script injection
+          // Strategy 1: Try full injection with health check
           let browserResponsive = false;
           try {
               await Promise.race([
                   browserInstance.version(), // Quick responsiveness test
                   new Promise((_, reject) =>
-                      setTimeout(() => reject(new Error('Browser health check timeout')), 5000)
+                      setTimeout(() => reject(new Error('Browser health check timeout')), 3000)
                   )
               ]);
               browserResponsive = true;
           } catch (healthErr) {
-              console.warn(formatLogMessage('warn', `[evalOnDoc] Browser unresponsive for ${currentUrl}: ${healthErr.message} - skipping script injection`));
+              if (forceDebug) {
+                  console.log(formatLogMessage('debug', `[evalOnDoc] Browser health check failed: ${healthErr.message}`));
+              }
               browserResponsive = false;
           }
+          // Strategy 2: Try injection with reduced complexity if browser is responsive
           if (browserResponsive) {
-          try {
-              await page.evaluateOnNewDocument(() => {
+              try {
+                  await Promise.race([
+                      page.evaluateOnNewDocument(() => {
                   // Prevent infinite reload loops
                   let reloadCount = 0;
                   const MAX_RELOADS = 2;
@@ -1666,18 +1646,62 @@ function setupFrameHandling(page, forceDebug) {
                           return originalXHROpen.apply(this, arguments);
                       }
                   };
-              });
-          } catch (evalErr) {
-                  if (evalErr.message.includes('timed out') || evalErr.message.includes('ProtocolError')) {
-                      console.warn(formatLogMessage('warn', `[evalOnDoc] Script injection protocol timeout for ${currentUrl} - continuing without XHR/Fetch interception`));
-                  } else {
-                      console.warn(formatLogMessage('warn', `[evalOnDoc] Failed to set up Fetch/XHR interception for ${currentUrl}: ${evalErr.message}`));
+              }),
+                      new Promise((_, reject) =>
+                          setTimeout(() => reject(new Error('Injection timeout')), 8000)
+                      )
+                  ]);
+                  evalOnDocSuccess = true;
+                  if (forceDebug) {
+                      console.log(formatLogMessage('debug', `[evalOnDoc] Full injection successful for ${currentUrl}`));
                   }
-          }
+              } catch (fullInjectionErr) {
+                  if (forceDebug) {
+                      console.log(formatLogMessage('debug', `[evalOnDoc] Full injection failed: ${fullInjectionErr.message}, trying simplified fallback`));
+                  }
+                  // Strategy 3: Fallback - Try minimal injection (just fetch monitoring)
+                  try {
+                      await Promise.race([
+                          page.evaluateOnNewDocument(() => {
+                              // Minimal injection - just fetch monitoring
+                              if (window.fetch) {
+                                  const originalFetch = window.fetch;
+                                  window.fetch = (...args) => {
+                                      try {
+                                          console.log('[evalOnDoc][fetch-minimal]', args[0]);
+                                          return originalFetch.apply(this, args);
+                                      } catch (err) {
+                                          return originalFetch.apply(this, args);
+                                      }
+                                  };
+                              }
+                          }),
+                          new Promise((_, reject) =>
+                              setTimeout(() => reject(new Error('Minimal injection timeout')), 3000)
+                          )
+                      ]);
+                      evalOnDocSuccess = true;
+                      if (forceDebug) {
+                          console.log(formatLogMessage('debug', `[evalOnDoc] Minimal injection successful for ${currentUrl}`));
+                      }
+                  } catch (minimalInjectionErr) {
+                      if (forceDebug) {
+                          console.log(formatLogMessage('debug', `[evalOnDoc] Minimal injection also failed: ${minimalInjectionErr.message}`));
+                      }
+                      evalOnDocSuccess = false;
+                  }
+              }
           } else {
               if (forceDebug) {
-                  console.log(formatLogMessage('debug', `[evalOnDoc] Continuing ${currentUrl} without XHR/Fetch interception due to browser health`));
+                  console.log(formatLogMessage('debug', `[evalOnDoc] Browser unresponsive, skipping injection for ${currentUrl}`));
               }
+              evalOnDocSuccess = false;
+          }
+          // Final status logging
+          if (!evalOnDocSuccess) {
+              console.warn(formatLogMessage('warn', `[evalOnDoc] All injection strategies failed for ${currentUrl} - continuing with standard request monitoring only`));
           }
       }
       // --- END: evaluateOnNewDocument for Fetch/XHR Interception ---
@@ -1825,6 +1849,18 @@ function setupFrameHandling(page, forceDebug) {
        console.log(formatLogMessage('debug', `Using grep: ${grepCheck.version}`));
      }
    }
+   // Validate curl availability if needed
+   if (useCurl) {
+     const curlCheck = validateCurlAvailability();
+     if (!curlCheck.isAvailable) {
+       console.warn(formatLogMessage('warn', `Curl not available for ${currentUrl}: ${curlCheck.error}. Skipping curl-based analysis.`));
+       useCurl = false;
+       useGrep = false; // Grep requires curl
+     } else if (forceDebug) {
+       console.log(formatLogMessage('debug', `Using curl: ${curlCheck.version}`));
+     }
+   }
    // Parse whois and dig terms
    const whoisTerms = siteConfig.whois && Array.isArray(siteConfig.whois) ? siteConfig.whois : null;
@@ -2473,9 +2509,9 @@ function setupFrameHandling(page, forceDebug) {
                // Use grep handler if both grep and searchstring/searchstring_and are enabled
                if (useGrep && (hasSearchString || hasSearchStringAnd)) {
                  const grepHandler = createGrepHandler({
-                   searchStrings,
-				   searchStringsAnd,
                    regexes,
+                   searchStrings,
+                   searchStringsAnd,
                    matchedDomains,
                    addMatchedDomain, // Pass the helper function
                    isDomainAlreadyDetected,
@@ -2496,8 +2532,7 @@ function setupFrameHandling(page, forceDebug) {
                    forceDebug,
                    userAgent: curlUserAgent,
                    resourceType,
-                   hasSearchString,
-				   hasSearchStringAnd,
+                   hasSearchString: hasSearchString || hasSearchStringAnd,
                    grepOptions: {
                      ignoreCase: true,
                      wholeWord: false,
@@ -2508,20 +2543,20 @@ function setupFrameHandling(page, forceDebug) {
                  setImmediate(() => grepHandler(reqUrl));
                } else {
                  // Use regular curl handler
-                 const curlHandler = createCurlHandler({
+                 const curlHandlerFromCurlModule = createCurlModuleHandler({
                    searchStrings,
                    searchStringsAnd,
                    hasSearchStringAnd,
                    regexes,
                    matchedDomains,
-                   addMatchedDomain, // Pass the helper function
+                   addMatchedDomain,
                    isDomainAlreadyDetected,
-                 onContentFetched: smartCache && !ignoreCache ? (url, content) => {
-                   // Only cache if not bypassing cache
-                   if (!shouldBypassCacheForUrl(url, siteConfig)) {
-                     smartCache.cacheRequest(url, { method: 'GET', siteConfig }, { body: content, status: 200 });
-                   }
-                 } : undefined,
+                   onContentFetched: smartCache && !ignoreCache ? (url, content) => {
+                     // Only cache if not bypassing cache
+                     if (!shouldBypassCacheForUrl(url, siteConfig)) {
+                       smartCache.cacheRequest(url, { method: 'GET', siteConfig }, { body: content, status: 200 });
+                     }
+                   } : undefined,
                    currentUrl,
                    perSiteSubDomains,
                    ignoreDomains,
@@ -2533,10 +2568,10 @@ function setupFrameHandling(page, forceDebug) {
                    forceDebug,
                    userAgent: curlUserAgent,
                    resourceType,
-                   hasSearchString
+                   hasSearchString: hasSearchString || hasSearchStringAnd
                  });
-                 setImmediate(() => curlHandler(reqUrl));
+                 setImmediate(() => curlHandlerFromCurlModule(reqUrl));
                }
              } catch (curlErr) {
                if (forceDebug) {
@@ -3119,20 +3154,31 @@ function setupFrameHandling(page, forceDebug) {
     const batchEnd = Math.min(batchStart + RESOURCE_CLEANUP_INTERVAL, totalUrls);
     const currentBatch = allTasks.slice(batchStart, batchEnd);
-    // Check browser health before processing each site
-    const healthCheck = await monitorBrowserHealth(browser, {}, {
-      siteIndex: Math.floor(batchStart / RESOURCE_CLEANUP_INTERVAL),
-      totalSites: Math.ceil(totalUrls / RESOURCE_CLEANUP_INTERVAL),
-      urlsSinceCleanup: urlsSinceLastCleanup,
-      cleanupInterval: RESOURCE_CLEANUP_INTERVAL,
-      forceDebug,
-      silentMode
-    });
-    // Check if browser was unhealthy during recent processing
-    const recentResults = results.slice(-3);
-    const hasRecentFailures = recentResults.filter(r => !r.success).length >= 2;
-    const shouldRestartFromFailures = hasRecentFailures && urlsSinceLastCleanup > 3;
+    // IMPROVED: Only check health if we have indicators of problems
+    let healthCheck = { shouldRestart: false, reason: null };
+    const recentResults = results.slice(-8); // Check more results for better pattern detection
+    const recentFailureRate = recentResults.length > 0 ?
+      recentResults.filter(r => !r.success).length / recentResults.length : 0;
+    const hasHighFailureRate = recentFailureRate > 0.75; // 75% failure threshold (more conservative)
+    const hasCriticalErrors = recentResults.filter(r => r.needsImmediateRestart).length > 2;
+    // Only run health checks when we have STRONG indicators of problems
+    if (urlsSinceLastCleanup > 15 && (
+        (hasHighFailureRate && recentResults.length >= 5) ||  // Need sufficient sample size
+        hasCriticalErrors ||
+        urlsSinceLastCleanup > RESOURCE_CLEANUP_INTERVAL * 0.9  // Very close to cleanup limit
+    )) {
+      healthCheck = await monitorBrowserHealth(browser, {}, {
+        siteIndex: Math.floor(batchStart / RESOURCE_CLEANUP_INTERVAL),
+        totalSites: Math.ceil(totalUrls / RESOURCE_CLEANUP_INTERVAL),
+        urlsSinceCleanup: urlsSinceLastCleanup,
+        cleanupInterval: RESOURCE_CLEANUP_INTERVAL,
+        forceDebug,
+        silentMode
+      });
+    } else if (forceDebug && urlsSinceLastCleanup > 10) {
+      console.log(formatLogMessage('debug', `Skipping health check: failure rate ${Math.round(recentFailureRate * 100)}%, critical errors: ${hasCriticalErrors ? 'yes' : 'no'}`));
+    }
     const batchSize = currentBatch.length;
@@ -3142,17 +3188,21 @@ function setupFrameHandling(page, forceDebug) {
     // Check if processing this entire site would exceed cleanup interval OR health check suggests restart
     const wouldExceedLimit = urlsSinceLastCleanup + batchSize >= Math.min(RESOURCE_CLEANUP_INTERVAL, 100);
     const isNotLastBatch = batchEnd < totalUrls;
+    // IMPROVED: More restrictive health-based restart conditions
+    const shouldRestartFromHealth = healthCheck.shouldRestart &&
+      !healthCheck.reason?.includes('Scheduled cleanup') &&
+      (healthCheck.reason?.includes('Critical') || healthCheck.reason?.includes('disconnected'));
     // Restart browser if we've processed enough URLs, health check suggests it, and this isn't the last site
-    if ((wouldExceedLimit || healthCheck.shouldRestart || shouldRestartFromFailures) && urlsSinceLastCleanup > 0 && isNotLastBatch) {
+    if ((wouldExceedLimit || shouldRestartFromHealth || (hasHighFailureRate && recentResults.length >= 6)) && urlsSinceLastCleanup > 8 && isNotLastBatch) {
       let restartReason = 'Unknown';
-      if (healthCheck.shouldRestart) {
+      if (shouldRestartFromHealth) {
         restartReason = healthCheck.reason;
-      } else if (shouldRestartFromFailures) {
-        restartReason = 'Multiple recent failures detected';
+      } else if (hasHighFailureRate) {
+        restartReason = `High failure rate: ${Math.round(recentFailureRate * 100)}% in recent batch`;
       } else if (wouldExceedLimit) {
-        restartReason = `Processed ${urlsSinceLastCleanup} URLs`;
+        restartReason = `Processed ${urlsSinceLastCleanup} URLs (scheduled maintenance)`;
       }
       if (!silentMode) {
@@ -3220,8 +3270,18 @@ function setupFrameHandling(page, forceDebug) {
     const batchTasks = currentBatch.map(task => originalLimit(() => processUrl(task.url, task.config, browser)));
     const batchResults = await Promise.all(batchTasks);
-    // Check if any results indicate immediate restart is needed
-    const needsImmediateRestart = batchResults.some(r => r.needsImmediateRestart);
+    // IMPROVED: Much more conservative emergency restart logic
+    const criticalRestartCount = batchResults.filter(r => r.needsImmediateRestart).length;
+    // Require either:
+    // - More than 50% of batch has critical errors, OR
+    // - At least 3 critical errors in any size batch
+    const restartThreshold = Math.max(3, Math.floor(batchSize * 0.5)); // 50% of batch or min 3
+    const needsImmediateRestart = criticalRestartCount >= restartThreshold && criticalRestartCount >= 2;
+    // Log restart decision for debugging
+    if (forceDebug && criticalRestartCount > 0) {
+      console.log(formatLogMessage('debug', `Emergency restart decision: ${criticalRestartCount}/${batchSize} critical errors (threshold: ${restartThreshold}, restart: ${needsImmediateRestart ? 'YES' : 'NO'})`));
+    }
     // Log completion of concurrent processing
     if (forceDebug) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@fanboynz/network-scanner",
-  "version": "1.0.87",
+  "version": "1.0.89",
   "description": "A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.",
   "main": "nwss.js",
   "scripts": {