npm - @fanboynz/network-scanner - Versions diffs - 2.0.66 → 3.0.0 - Mend

@fanboynz/network-scanner 2.0.66 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/.github/workflows/npm-publish.yml +134 -10
package/CHANGELOG.md +135 -0
package/CLAUDE.md +18 -7
package/README.md +12 -4
package/lib/adblock-rust.js +23 -18
package/lib/adblock.js +127 -82
package/lib/browserexit.js +210 -200
package/lib/browserhealth.js +84 -60
package/lib/cdp.js +103 -81
package/lib/clear_sitedata.js +61 -159
package/lib/cloudflare.js +579 -409
package/lib/colorize.js +29 -12
package/lib/compare.js +16 -8
package/lib/compress.js +2 -1
package/lib/curl.js +287 -220
package/lib/domain-cache.js +87 -40
package/lib/dry-run.js +137 -194
package/lib/fingerprint.js +20 -18
package/lib/flowproxy.js +391 -188
package/lib/ghost-cursor.js +8 -7
package/lib/grep.js +248 -171
package/lib/ignore_similar.js +70 -124
package/lib/interaction.js +132 -235
package/lib/nettools.js +309 -87
package/lib/openvpn_vpn.js +12 -11
package/lib/output.js +92 -59
package/lib/post-processing.js +216 -162
package/lib/redirect.js +46 -30
package/lib/referrer.js +158 -165
package/lib/searchstring.js +290 -381
package/lib/smart-cache.js +141 -91
package/lib/socks-relay.js +8 -7
package/lib/spawn-async.js +137 -0
package/lib/validate_rules.js +188 -176
package/lib/wireguard_vpn.js +111 -117
package/nwss.js +740 -156
package/package.json +4 -4

package/lib/searchstring.js CHANGED Viewed

@@ -2,14 +2,19 @@
 // Handles response content analysis for searchstring functionality
 const fs = require('fs');
-const { spawnSync } = require('child_process');
+const { formatLogMessage, messageColors } = require('./colorize');
+const CURL_TAG = messageColors.processing('[curl]');
+// responseHandler is a separate code path (Puppeteer response listener,
+// not curl) — its debug output gets its own subsystem prefix so it's
+// distinguishable from curl-handler logs.
+const SEARCHSTRING_TAG = messageColors.processing('[searchstring]');
+const { runProcess } = require('./spawn-async');
 const { grepContent } = require('./grep');
 // Configuration constants for search logic
 const SEARCH_CONFIG = {
   MAX_CONTENT_SIZE: 50 * 1024 * 1024, // 50MB max content size
-  MAX_SEARCH_STRING_LENGTH: 1000,
-  XML_ENTITY_TIMEOUT: 5000 // 5 second timeout for XML processing
+  MAX_SEARCH_STRING_LENGTH: 1000
 };
 /**
@@ -46,36 +51,6 @@ function parseSearchStrings(searchstring, searchstringAnd) {
   };
 }
-/**
- * Helper function to add domain to matched collection (handles both Set and Map)
- * @param {Set|Map} matchedDomains - The matched domains collection
- * @param {Function} addMatchedDomain - Optional helper function for adding domains
- * @param {string} domain - Domain to add
- * @param {string} resourceType - Resource type (for --adblock-rules mode)
- * @param {string} fullSubdomain - Full subdomain for cache tracking (optional)
- */
-function addDomainToCollection(matchedDomains, addMatchedDomain, domain, resourceType = null, fullSubdomain = null) {
-  // Use helper function if provided (preferred method)
-  if (typeof addMatchedDomain === 'function') {
-    addMatchedDomain(domain, resourceType, fullSubdomain);
-    return;
-  }
-  // Fallback: handle different collection types directly
-  if (matchedDomains instanceof Set) {
-    matchedDomains.add(domain);
-  } else if (matchedDomains instanceof Map) {
-    if (!matchedDomains.has(domain)) {
-      matchedDomains.set(domain, new Set());
-    }
-    if (resourceType) {
-      matchedDomains.get(domain).add(resourceType);
-    }
-  } else {
-    console.warn('[warn] Unknown matchedDomains type, skipping domain addition');
-  }
-}
 /**
  * Downloads content using curl with appropriate headers and timeout
  * @param {string} url - The URL to download
@@ -84,55 +59,42 @@ function addDomainToCollection(matchedDomains, addMatchedDomain, domain, resourc
  * @returns {Promise<string>} The downloaded content
  */
 async function downloadWithCurl(url, userAgent = '', timeout = 30) {
-  return new Promise((resolve, reject) => {
-    try {
-      const curlArgs = [
-        '-s', // Silent mode
-        '-L', // Follow redirects
-        '--max-time', timeout.toString(),
-        '--max-redirs', '5',
-        '--fail-with-body', // Return body even on HTTP errors
-        '--max-filesize', '52428800', // 50MB limit
-        '--range', '0-52428799', // Limit download size
-        '--compressed', // Accept compressed responses
-      ];
+  const MAX_STDOUT_BYTES = 52428800; // 50MB, matches --max-filesize below
-      if (userAgent) {
-        curlArgs.push('-H', `User-Agent: ${userAgent}`);
-      }
+  const curlArgs = [
+    '-s',
+    '-L',
+    '--max-time', timeout.toString(),
+    '--max-redirs', '5',
+    '--fail-with-body',
+    '--max-filesize', '52428800',
+    '--range', '0-52428799',
+    '--compressed'
+  ];
+  if (userAgent) curlArgs.push('-H', `User-Agent: ${userAgent}`);
+  curlArgs.push(
+    '-H', 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+    '-H', 'Accept-Language: en-US,en;q=0.5',
+    '-H', 'Accept-Encoding: gzip, deflate',
+    '-H', 'Connection: keep-alive',
+    '-H', 'Upgrade-Insecure-Requests: 1'
+  );
+  curlArgs.push(url);
-      // Add common headers to appear more browser-like
-      curlArgs.push(
-        '-H', 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
-        '-H', 'Accept-Language: en-US,en;q=0.5',
-        '-H', 'Accept-Encoding: gzip, deflate',
-        '-H', 'Connection: keep-alive',
-        '-H', 'Upgrade-Insecure-Requests: 1'
-      );
-      curlArgs.push(url);
-      // Use spawnSync with proper argument separation
-      const result = spawnSync('curl', curlArgs, {
-        encoding: 'utf8',
-        timeout: timeout * 1000,
-        maxBuffer: 10 * 1024 * 1024, // 10MB max buffer
-        killSignal: 'SIGTERM'
-      });
-      if (result.error) {
-        throw result.error;
-      }
-      if (result.status !== 0) {
-        throw new Error(`Curl exited with status ${result.status}: ${result.stderr}`);
-      }
-      resolve(result.stdout);
-    } catch (error) {
-      reject(new Error(`Curl failed for ${url}: ${error.message}`));
-    }
+  // Shared async-spawn helper — same streaming/cap/timeout/kill plumbing
+  // that used to be ~80 lines of inline boilerplate here.
+  const result = await runProcess('curl', curlArgs, {
+    timeout: timeout * 1000,
+    maxStdout: MAX_STDOUT_BYTES
   });
+  if (result.error) throw new Error(`Curl failed for ${url}: ${result.error}`);
+  if (result.truncated) throw new Error(`Curl output exceeded ${MAX_STDOUT_BYTES} bytes for ${url}`);
+  if (result.signal) throw new Error(`Curl killed by signal ${result.signal} for ${url}`);
+  if (result.code !== 0) {
+    throw new Error(`Curl exited with status ${result.code}: ${result.stderr.toString('utf8')}`);
+  }
+  return result.stdout.toString('utf8');
 }
 /**
@@ -166,59 +128,48 @@ async function downloadWithRetry(url, userAgent = '', timeout = 30, retries = 2)
   }
 }
+// Lookup table for the 6 named entities the previous chained-replace
+// handled. Hoisted out of safeDecodeXmlEntities so the object isn't
+// reallocated per call.
+const NAMED_ENTITIES = Object.freeze({
+  '&lt;': '<', '&gt;': '>', '&amp;': '&',
+  '&quot;': '"', '&apos;': "'", '&#39;': "'"
+});
 /**
- * Safely decodes XML entities with timeout protection
+ * Safely decodes XML entities (named + numeric decimal + numeric hex)
+ * in a SINGLE regex pass. The old implementation chained 8 separate
+ * .replace() calls, each allocating a full intermediate string — for
+ * 50MB content that was ~8 × 50MB ≈ 400MB of throwaway allocations per
+ * XML response. Also drops the previous "timeout" check, which only
+ * fired between regex passes (not during them) so it never actually
+ * bounded runtime on pathological input.
  * @param {string} content - Content to decode
  * @returns {string} Decoded content or original if processing fails
  */
 function safeDecodeXmlEntities(content) {
-  const startTime = Date.now();
   try {
-    let decoded = content
-      .replace(/&lt;/g, '<')
-      .replace(/&gt;/g, '>')
-      .replace(/&amp;/g, '&')
-      .replace(/&quot;/g, '"')
-      .replace(/&#39;/g, "'")
-      .replace(/&apos;/g, "'");
-    // Check timeout before expensive regex operations
-    if (Date.now() - startTime > SEARCH_CONFIG.XML_ENTITY_TIMEOUT) {
-      console.warn('[warn] XML entity decoding timeout, using partial result');
-      return decoded;
-    }
-    // Decode numeric entities (decimal)
-    decoded = decoded.replace(/&#(\d+);/g, (match, dec) => {
-      const num = parseInt(dec, 10);
-      // Validate range for safety (valid Unicode range)
-      if (num >= 0 && num <= 0x10FFFF) {
-        return String.fromCharCode(num);
+    return content.replace(
+      /&lt;|&gt;|&amp;|&quot;|&apos;|&#39;|&#\d+;|&#x[0-9a-fA-F]+;/g,
+      (match) => {
+        // Named entity — exact match in the lookup table.
+        const named = NAMED_ENTITIES[match];
+        if (named) return named;
+        // Numeric entity — &#xNN; (hex) or &#NN; (decimal).
+        const isHex = match[2] === 'x' || match[2] === 'X';
+        const numStr = isHex ? match.slice(3, -1) : match.slice(2, -1);
+        const num = parseInt(numStr, isHex ? 16 : 10);
+        // String.fromCodePoint (NOT fromCharCode) — fromCharCode truncates
+        // to 16 bits, so &#128512; (😀, codepoint 0x1F600) would decode to
+        // '' (a single garbage BMP char) instead of the emoji.
+        // fromCodePoint handles the full Unicode range up to 0x10FFFF.
+        if (num >= 0 && num <= 0x10FFFF) return String.fromCodePoint(num);
+        return match; // out-of-range — keep original
       }
-      return match; // Keep original if invalid
-    });
-    // Check timeout again
-    if (Date.now() - startTime > SEARCH_CONFIG.XML_ENTITY_TIMEOUT) {
-      console.warn('[warn] XML entity decoding timeout, using partial result');
-      return decoded;
-    }
-    // Decode numeric entities (hexadecimal)
-    decoded = decoded.replace(/&#x([0-9a-f]+);/gi, (match, hex) => {
-      const num = parseInt(hex, 16);
-      // Validate range for safety (valid Unicode range)
-      if (num >= 0 && num <= 0x10FFFF) {
-        return String.fromCharCode(num);
-      }
-      return match; // Keep original if invalid
-    });
-    return decoded;
+    );
   } catch (xmlErr) {
-    console.warn(`[warn] XML entity decoding failed: ${xmlErr.message}`);
-    return content; // Return original content if decoding fails
+    console.warn(formatLogMessage('warn', `XML entity decoding failed: ${xmlErr.message}`));
+    return content;
   }
 }
@@ -229,15 +180,12 @@ function safeDecodeXmlEntities(content) {
  */
 function safeStripTags(content) {
   try {
-    // Limit content size for tag stripping to prevent excessive memory usage
-    const limitedContent = content.length > SEARCH_CONFIG.MAX_CONTENT_SIZE
-      ? content.substring(0, SEARCH_CONFIG.MAX_CONTENT_SIZE)
-      : content;
-    // Replace tags with spaces to preserve word boundaries
-    return limitedContent.replace(/<[^>]*>/g, ' ').replace(/\s+/g, ' ');
+    // No content-size cap here — searchContent already truncated to
+    // MAX_CONTENT_SIZE before calling, so the previous cap was a no-op.
+    // Replace tags with spaces to preserve word boundaries.
+    return content.replace(/<[^>]*>/g, ' ').replace(/\s+/g, ' ');
   } catch (stripErr) {
-    console.warn(`[warn] Tag stripping failed: ${stripErr.message}`);
+    console.warn(formatLogMessage('warn', `Tag stripping failed: ${stripErr.message}`));
     return content;
   }
 }
@@ -251,134 +199,110 @@ function safeStripTags(content) {
  * @param {Array<string>} searchStringsAnd - Array of strings that must all be present (AND logic)
  * @param {string} contentType - Content type for specialized handling
  * @param {string} url - URL for debugging context (optional)
- * @returns {object} Object with found boolean, matchedString/matchedStrings, allMatches array, and logic type
+ * @returns {{found: boolean, matchedString: string|null, logicType: 'AND'|'OR'|'NONE', error?: string}}
  */
 function searchContent(content, searchStrings, searchStringsAnd = [], contentType = '', url = '') {
-  // Input validation
+  // Input validation. Return shape carries only what callers actually
+  // destructure ({found, matchedString, logicType, error}); the old
+  // matchedStrings/allMatches/contentSize/searchableSize/processedAsXml
+  // fields were computed and returned but never read by any caller.
   if (!content || typeof content !== 'string') {
-    return {
-      found: false,
-      matchedString: null,
-      matchedStrings: [],
-      allMatches: [],
-      logicType: 'NONE',
-      error: 'Invalid or empty content'
-    };
+    return { found: false, matchedString: null, logicType: 'NONE', error: 'Invalid or empty content' };
   }
+  // Validate search strings FIRST — before paying for content truncation,
+  // XML entity decoding, tag stripping, and 3× lowercase. Previously these
+  // ran first, so a config with zero valid search strings still burned
+  // ~150MB of allocations on a 50MB XML response before returning empty.
+  const validSearchStrings = searchStrings.filter(str =>
+    str && typeof str === 'string' && str.length > 0 && str.length <= SEARCH_CONFIG.MAX_SEARCH_STRING_LENGTH
+  );
+  const validSearchStringsAnd = searchStringsAnd.filter(str =>
+    str && typeof str === 'string' && str.length > 0 && str.length <= SEARCH_CONFIG.MAX_SEARCH_STRING_LENGTH
+  );
+  if (validSearchStrings.length !== searchStrings.length) {
+    console.warn(formatLogMessage('warn', `Filtered ${searchStrings.length - validSearchStrings.length} invalid search strings`));
+  }
+  if (validSearchStringsAnd.length !== searchStringsAnd.length) {
+    console.warn(formatLogMessage('warn', `Filtered ${searchStringsAnd.length - validSearchStringsAnd.length} invalid AND search strings`));
+  }
+  if (validSearchStrings.length === 0 && validSearchStringsAnd.length === 0) {
+    return { found: false, matchedString: null, logicType: 'NONE', error: 'No valid search strings provided' };
+  }
   // Size check and truncation with warning
   const originalLength = content.length;
   if (originalLength > SEARCH_CONFIG.MAX_CONTENT_SIZE) {
     content = content.substring(0, SEARCH_CONFIG.MAX_CONTENT_SIZE);
-    console.warn(`[warn] Content truncated from ${originalLength} to ${SEARCH_CONFIG.MAX_CONTENT_SIZE} chars for ${url || 'unknown URL'}`);
+    console.warn(formatLogMessage('warn', `Content truncated from ${originalLength} to ${SEARCH_CONFIG.MAX_CONTENT_SIZE} chars for ${url || 'unknown URL'}`));
   }
-  let searchableContent = content;
-  const isXmlContent = contentType.toLowerCase().includes('xml') ||
-                      contentType.toLowerCase().includes('html');
+  // For XML/HTML we search across three views — original, entity-decoded,
+  // tag-stripped — so encoded strings ("&amp;") and DOM-text strings
+  // ("body text") and raw-source strings (attribute values) all match.
+  //
+  // The previous implementation joined all three into a single 3× string
+  // then .toLowerCase()'d it. For a 50MB response that allocated a 150MB
+  // intermediate plus a 150MB lowercase copy. Now we lowercase each
+  // version independently and probe with `versionsIncludes()` — same
+  // matching semantics (a string found in ANY version still counts) but
+  // ~half the peak memory.
+  const ct = contentType.toLowerCase();
+  const isXmlContent = ct.includes('xml') || ct.includes('html');
+  let lowerVersions;
   if (isXmlContent) {
     try {
-      // Safely decode XML entities
       const decodedContent = safeDecodeXmlEntities(content);
-      // Safely strip tags to extract text content
       const strippedContent = safeStripTags(decodedContent);
-      // Search in: original + decoded + stripped content
-      // Use newlines as separators to prevent false matches across content types
-      searchableContent = [content, decodedContent, strippedContent].join('\n');
+      lowerVersions = [
+        content.toLowerCase(),
+        decodedContent.toLowerCase(),
+        strippedContent.toLowerCase()
+      ];
     } catch (xmlProcessingErr) {
-      console.warn(`[warn] XML processing failed for ${url || 'unknown URL'}: ${xmlProcessingErr.message}`);
-      // Fall back to original content
-      searchableContent = content;
+      console.warn(formatLogMessage('warn', `XML processing failed for ${url || 'unknown URL'}: ${xmlProcessingErr.message}`));
+      lowerVersions = [content.toLowerCase()];
     }
+  } else {
+    lowerVersions = [content.toLowerCase()];
   }
+  const versionsIncludes = (needleLower) => {
+    for (let i = 0; i < lowerVersions.length; i++) {
+      if (lowerVersions[i].includes(needleLower)) return true;
+    }
+    return false;
+  };
-  // Input validation for search strings
-  const validSearchStrings = searchStrings.filter(str =>
-    str && typeof str === 'string' && str.length > 0 && str.length <= SEARCH_CONFIG.MAX_SEARCH_STRING_LENGTH
-  );
-  const validSearchStringsAnd = searchStringsAnd.filter(str =>
-    str && typeof str === 'string' && str.length > 0 && str.length <= SEARCH_CONFIG.MAX_SEARCH_STRING_LENGTH
-  );
-  // Warn about filtered search strings
-  if (validSearchStrings.length !== searchStrings.length) {
-    console.warn(`[warn] Filtered ${searchStrings.length - validSearchStrings.length} invalid search strings`);
-  }
-  if (validSearchStringsAnd.length !== searchStringsAnd.length) {
-    console.warn(`[warn] Filtered ${searchStringsAnd.length - validSearchStringsAnd.length} invalid AND search strings`);
-  }
-  // Early return if no valid search strings
-  if (validSearchStrings.length === 0 && validSearchStringsAnd.length === 0) {
-    return {
-      found: false,
-      matchedString: null,
-      matchedStrings: [],
-      allMatches: [],
-      logicType: 'NONE',
-      error: 'No valid search strings provided'
-    };
-  }
-  // Pre-compute lowercase content once for better performance
-  const lowerContent = searchableContent.toLowerCase();
-  // Check AND logic first (more restrictive) - ALL strings must be present
-  if (validSearchStringsAnd && validSearchStringsAnd.length > 0) {
-    const foundAndStrings = [];
+  // Check AND logic first (more restrictive) — ALL strings must be present
+  // in at least one of the searchable versions. Loop exits early on first
+  // NOT-found.
+  if (validSearchStringsAnd.length > 0) {
+    let allFound = true;
     for (const searchStr of validSearchStringsAnd) {
-      const lowerSearchStr = searchStr.toLowerCase();
-      if (lowerContent.includes(lowerSearchStr)) {
-        foundAndStrings.push(searchStr);
-      } else {
-        // Early exit if any AND string is not found
+      if (!versionsIncludes(searchStr.toLowerCase())) {
+        allFound = false;
         break;
       }
     }
-    // AND logic: ALL valid strings must be found
-    if (foundAndStrings.length === validSearchStringsAnd.length) {
-      return {
-        found: true,
-        matchedString: foundAndStrings.join(' AND '),
-        matchedStrings: foundAndStrings,
-        allMatches: foundAndStrings,
-        logicType: 'AND',
-        contentSize: originalLength,
-        searchableSize: searchableContent.length
-      };
+    if (allFound) {
+      return { found: true, matchedString: validSearchStringsAnd.join(' AND '), logicType: 'AND' };
     }
   }
-  // OR logic: ANY string can match
-  const allMatches = [];
-  let firstMatch = null;
+  // OR logic: ANY string can match. Early-exit on first hit since the
+  // caller only reads matchedString (the first match). Previously the
+  // loop ran to completion to fill an `allMatches` array no caller read.
   for (const searchStr of validSearchStrings) {
-    const lowerSearchStr = searchStr.toLowerCase();
-    if (lowerContent.includes(lowerSearchStr)) {
-      allMatches.push(searchStr);
-      if (!firstMatch) {
-        firstMatch = searchStr;
-      }
+    if (versionsIncludes(searchStr.toLowerCase())) {
+      return { found: true, matchedString: searchStr, logicType: 'OR' };
     }
   }
-  return {
-    found: allMatches.length > 0,
-    matchedString: firstMatch,
-    matchedStrings: allMatches,
-    allMatches: allMatches,
-    logicType: validSearchStrings.length > 0 ? 'OR' : 'NONE',
-    contentSize: originalLength,
-    searchableSize: searchableContent.length,
-    processedAsXml: isXmlContent
-  };
+  return { found: false, matchedString: null, logicType: validSearchStrings.length > 0 ? 'OR' : 'NONE' };
 }
 /**
@@ -440,44 +364,52 @@ function createCurlHandler(config) {
     hasSearchString
   } = config;
+  // Hoisted: currentUrl doesn't change for this handler's lifetime, so
+  // parsing its hostname once at handler-creation eliminates the
+  // per-request URL allocation.
+  let currentUrlHostname = '';
+  try { currentUrlHostname = new URL(currentUrl).hostname; } catch (_) {}
   return async function curlHandler(requestUrl) {
-    // Only process URLs that match our regex patterns
+    // Regex check FIRST — cheap filter that skips ~99% of requests.
+    // Previously this ran AFTER a URL parse + domain-cache lookup;
+    // the parse is the expensive bit, so doing it after the cheap
+    // gate moves the cost off the hot path.
     const matchesRegex = regexes.some(re => re.test(requestUrl));
     if (!matchesRegex) return;
-    // Extract domain and check if already detected (skip expensive operations)
-    const reqDomain = perSiteSubDomains ? (new URL(requestUrl)).hostname : getRootDomain(requestUrl);
+    // Parse requestUrl ONCE and reuse. Was parsed 2-3 times.
+    let requestHostname;
+    try { requestHostname = new URL(requestUrl).hostname; } catch (_) { return; }
+    const reqDomain = perSiteSubDomains ? requestHostname : getRootDomain(requestUrl);
     if (typeof config.isDomainAlreadyDetected === 'function' && config.isDomainAlreadyDetected(reqDomain)) {
       if (forceDebug) {
-        console.log(`[debug][curl] Skipping already detected domain: ${reqDomain}`);
+        console.log(formatLogMessage('debug', `${CURL_TAG} Skipping already detected domain: ${reqDomain}`));
       }
       return;
     }
-    // Check if this is a first-party request (same domain as the URL being scanned)
-    const currentUrlHostname = new URL(currentUrl).hostname;
-    const requestHostname = new URL(requestUrl).hostname;
     const isFirstParty = currentUrlHostname === requestHostname;
     // Apply first-party/third-party filtering
     if (isFirstParty && siteConfig.firstParty === false) {
       if (forceDebug) {
-        console.log(`[debug][curl] Skipping first-party request (firstParty=false): ${requestUrl}`);
+        console.log(formatLogMessage('debug', `${CURL_TAG} Skipping first-party request (firstParty=false): ${requestUrl}`));
       }
       return;
     }
     if (!isFirstParty && siteConfig.thirdParty === false) {
       if (forceDebug) {
-        console.log(`[debug][curl] Skipping third-party request (thirdParty=false): ${requestUrl}`);
+        console.log(formatLogMessage('debug', `${CURL_TAG} Skipping third-party request (thirdParty=false): ${requestUrl}`));
       }
       return;
     }
     try {
       if (forceDebug) {
-        console.log(`[debug][curl] Downloading content from: ${requestUrl}`);
+        console.log(formatLogMessage('debug', `${CURL_TAG} Downloading content from: ${requestUrl}`));
       }
       // If NO searchstring is defined, match immediately (like browser behavior)
@@ -486,7 +418,7 @@ function createCurlHandler(config) {
           return;
         }
-        addDomainToCollection(matchedDomains, addMatchedDomain, reqDomain, resourceType);
+        addMatchedDomain(reqDomain, resourceType);
         const simplifiedUrl = getRootDomain(currentUrl);
         if (siteConfig.verbose === 1) {
@@ -503,7 +435,7 @@ function createCurlHandler(config) {
             fs.appendFileSync(matchedUrlsLogFile,
               `${timestamp} [match][${simplifiedUrl}] ${requestUrl} (${partyType}, curl)${resourceInfo}\n`);
           } catch (logErr) {
-            console.warn(`[warn] Failed to write to matched URLs log: ${logErr.message}`);
+            console.warn(formatLogMessage('warn', `Failed to write to matched URLs log: ${logErr.message}`));
           }
         }
         return;
@@ -520,7 +452,7 @@ function createCurlHandler(config) {
           return;
         }
-        addDomainToCollection(matchedDomains, addMatchedDomain, reqDomain, resourceType);
+        addMatchedDomain(reqDomain, resourceType);
         const simplifiedUrl = getRootDomain(currentUrl);
         if (siteConfig.verbose === 1) {
@@ -537,20 +469,20 @@ function createCurlHandler(config) {
             fs.appendFileSync(matchedUrlsLogFile,
               `${timestamp} [match][${simplifiedUrl}] ${requestUrl} (${partyType}, curl, searchstring (${logicType}): "${matchedString}")${resourceInfo}\n`);
           } catch (logErr) {
-            console.warn(`[warn] Failed to write to matched URLs log: ${logErr.message}`);
+            console.warn(formatLogMessage('warn', `Failed to write to matched URLs log: ${logErr.message}`));
           }
         }
       } else if (forceDebug) {
         const partyType = isFirstParty ? 'first-party' : 'third-party';
-        console.log(`[debug][curl] ${requestUrl} (${partyType}) matched regex but no searchstring found`);
+        console.log(formatLogMessage('debug', `${CURL_TAG} ${requestUrl} (${partyType}) matched regex but no searchstring found`));
         if (error) {
-          console.log(`[debug][curl] Search error: ${error}`);
+          console.log(formatLogMessage('debug', `${CURL_TAG} Search error: ${error}`));
         }
       }
     } catch (err) {
       if (forceDebug) {
-        console.log(`[debug][curl] Failed to download content for ${requestUrl}: ${err.message}`);
+        console.log(formatLogMessage('debug', `${CURL_TAG} Failed to download content for ${requestUrl}: ${err.message}`));
       }
     }
   };
@@ -582,56 +514,66 @@ function createResponseHandler(config) {
     resourceType // Will be null for response handler
   } = config;
+  // Hoisted: currentUrl doesn't change for this handler's lifetime.
+  // Root domain (not bare hostname) so first-party matches the definition
+  // used by nwss.js's main request handler AND lib/curl.js — previously
+  // this module used hostname equality, so cdn.example.com and
+  // static.example.com were classified third-party here but first-party
+  // by the main handler. Unified to the registrable-root rule.
+  let currentRootDomain = '';
+  try { currentRootDomain = getRootDomain(currentUrl); } catch (_) {}
   return async function responseHandler(response) {
     const respUrl = response.url();
-    const respDomain = perSiteSubDomains ? (new URL(respUrl)).hostname : getRootDomain(respUrl);
-    // Only process responses that match our regex patterns
-    const fullSubdomain = (new URL(respUrl)).hostname; // Always get full subdomain for cache tracking
-    // Skip if already detected to avoid duplicates
+    // Regex check FIRST — cheapest filter, eliminates ~99% of responses
+    // before paying for URL parses + domain-cache lookup. Previously this
+    // ran AFTER 2× URL parses + isDomainAlreadyDetected; reordering moves
+    // the parse cost off the hot path of every subresource response.
+    const matchesRegex = regexes.some(re => re.test(respUrl));
+    if (!matchesRegex) return;
+    // Parse respUrl ONCE and reuse. Was parsed 2-3 times per response.
+    let respHostname;
+    try { respHostname = new URL(respUrl).hostname; } catch (_) { return; }
+    const fullSubdomain = respHostname; // hostname is always the full subdomain
     if (typeof config.isDomainAlreadyDetected === 'function' && config.isDomainAlreadyDetected(fullSubdomain)) {
       return;
     }
-    const matchesRegex = regexes.some(re => re.test(respUrl));
-    if (!matchesRegex) return;
-    // Extract domain and check if already detected (skip expensive operations)
-    // The main request handler already filtered first-party/third-party requests
-    // This response handler only runs for requests that passed that filter
-    // However, we need to apply the same first-party/third-party logic here for searchstring analysis
-    // because the response handler analyzes content, not just URLs
-    // Apply first-party/third-party filtering for searchstring analysis
-    // Use the exact same logic as the main request handler
+    // respDomain (root domain) is only needed inside the `if (found)` block
+    // below. Deferring the getRootDomain call avoids the URL re-parse for
+    // every regex-matched response whose content doesn't contain the
+    // searchstring — the common case on most pages.
-    const currentUrlHostname = new URL(currentUrl).hostname;
-    const responseHostname = new URL(respUrl).hostname;
-    const isFirstParty = currentUrlHostname === responseHostname;
+    // First-party / third-party gate. Root-domain comparison matches the
+    // main handler and curl.js — old hostname comparison disagreed.
+    const respRootDomain = getRootDomain(respUrl);
+    const isFirstParty = currentRootDomain === respRootDomain;
     if (isFirstParty && siteConfig.firstParty === false) {
       if (forceDebug) {
-        console.log(`[debug] Skipping first-party response for searchstring analysis (firstParty=false): ${respUrl}`);
+        console.log(formatLogMessage('debug', `${SEARCHSTRING_TAG} Skipping first-party response for searchstring analysis (firstParty=false): ${respUrl}`));
       }
       return;
     }
     if (!isFirstParty && siteConfig.thirdParty === false) {
       if (forceDebug) {
-        console.log(`[debug] Skipping third-party response for searchstring analysis (thirdParty=false): ${respUrl}`);
+        console.log(formatLogMessage('debug', `${SEARCHSTRING_TAG} Skipping third-party response for searchstring analysis (thirdParty=false): ${respUrl}`));
       }
       return;
     }
     try {
       // Only capture appropriate content types to avoid binary data
       const contentType = response.headers()['content-type'] || '';
       if (!shouldAnalyzeContentType(contentType)) {
         if (forceDebug) {
-          console.log(`[debug] Skipping content analysis for ${respUrl} (content-type: ${contentType})`);
+          console.log(formatLogMessage('debug', `${SEARCHSTRING_TAG} Skipping content analysis for ${respUrl} (content-type: ${contentType})`));
         }
         return;
       }
       const content = await response.text();
       // Cache the fetched content if callback provided
@@ -640,7 +582,7 @@ function createResponseHandler(config) {
           config.onContentFetched(respUrl, content);
         } catch (cacheErr) {
           if (forceDebug) {
-            console.log(`[debug] Content caching failed: ${cacheErr.message}`);
+            console.log(formatLogMessage('debug', `${SEARCHSTRING_TAG} Content caching failed: ${cacheErr.message}`));
           }
         }
       }
@@ -677,7 +619,7 @@ function createResponseHandler(config) {
           }
         } catch (grepErr) {
           if (forceDebug) {
-            console.log(`[debug] Grep failed for ${respUrl}, falling back to JavaScript: ${grepErr.message}`);
+            console.log(formatLogMessage('debug', `${SEARCHSTRING_TAG} Grep failed for ${respUrl}, falling back to JavaScript: ${grepErr.message}`));
           }
           // Fallback to JavaScript search
           searchResult = searchContent(content, searchStrings, searchStringsAnd, contentType, respUrl);
@@ -690,10 +632,13 @@ function createResponseHandler(config) {
       const { found, matchedString, logicType, error } = searchResult;
       if (found) {
+        // Reuse respRootDomain from the first-party check — was already
+        // computed above. Saves a second getRootDomain call per match.
+        const respDomain = perSiteSubDomains ? respHostname : respRootDomain;
         if (!respDomain || matchesIgnoreDomain(respDomain, ignoreDomains)) {
           return;
         }
         // Response handler doesn't have access to specific resource type
         // Use the addMatchedDomain helper which handles fullSubdomain properly
         addMatchedDomain(respDomain, null, fullSubdomain);
@@ -713,138 +658,104 @@ function createResponseHandler(config) {
             fs.appendFileSync(matchedUrlsLogFile,
               `${timestamp} [match][${simplifiedUrl}] ${respUrl} (${partyType}, ${searchMethod}, searchstring (${logicType}): "${matchedString}")\n`);
           } catch (logErr) {
-            console.warn(`[warn] Failed to write to matched URLs log: ${logErr.message}`);
+            console.warn(formatLogMessage('warn', `Failed to write to matched URLs log: ${logErr.message}`));
           }
         }
       } else if (forceDebug) {
         const partyType = isFirstParty ? 'first-party' : 'third-party';
         const searchMethod = useGrep ? 'grep' : 'js';
-        console.log(`[debug] ${respUrl} (${partyType}, ${searchMethod}) matched regex but no searchstring found`);
+        console.log(formatLogMessage('debug', `${SEARCHSTRING_TAG} ${respUrl} (${partyType}, ${searchMethod}) matched regex but no searchstring found`));
         if (error) {
-          console.log(`[debug] Search error: ${error}`);
+          console.log(formatLogMessage('debug', `${SEARCHSTRING_TAG} Search error: ${error}`));
         }
       }
     } catch (err) {
       if (forceDebug) {
-        console.log(`[debug] Failed to read response content for ${respUrl}: ${err.message}`);
+        console.log(formatLogMessage('debug', `${SEARCHSTRING_TAG} Failed to read response content for ${respUrl}: ${err.message}`));
       }
     }
   };
 }
 /**
- * Validates searchstring configuration
- * @param {any} searchstring - The searchstring value to validate
- * @param {any} searchstringAnd - The searchstring_and value to validate
- * @returns {object} Validation result with isValid boolean and error message
+ * Validates a single string-or-array-of-strings value against the
+ * shared rules: type, non-empty, per-element type/non-empty, length cap.
+ * Used by validateSearchString for both searchstring and searchstring_and.
+ *
+ * @param {string|Array<string>} value
+ * @param {string} fieldName - e.g. 'searchstring' or 'searchstring_and'
+ * @returns {{isValid: boolean, error: string|null}}
  */
-function validateSearchString(searchstring, searchstringAnd) {
-  if (searchstring === undefined || searchstring === null) {
-    return { isValid: true, error: null };
-  }
-  if (typeof searchstring === 'string') {
-    if (searchstring.length === 0) {
-      return { isValid: false, error: 'searchstring cannot be empty string' };
+function validateSearchValue(value, fieldName) {
+  if (typeof value === 'string') {
+    if (value.length === 0) {
+      return { isValid: false, error: `${fieldName} cannot be empty string` };
+    }
+    if (value.length > SEARCH_CONFIG.MAX_SEARCH_STRING_LENGTH) {
+      return { isValid: false, error: `${fieldName} too long (max ${SEARCH_CONFIG.MAX_SEARCH_STRING_LENGTH} chars)` };
     }
     return { isValid: true, error: null };
   }
-  if (Array.isArray(searchstring)) {
-    if (searchstring.length === 0) {
-      return { isValid: false, error: 'searchstring array cannot be empty' };
+  if (Array.isArray(value)) {
+    if (value.length === 0) {
+      return { isValid: false, error: `${fieldName} array cannot be empty` };
     }
-    for (let i = 0; i < searchstring.length; i++) {
-      if (typeof searchstring[i] !== 'string') {
-        return { isValid: false, error: `searchstring[${i}] must be a string` };
+    for (let i = 0; i < value.length; i++) {
+      if (typeof value[i] !== 'string') {
+        return { isValid: false, error: `${fieldName}[${i}] must be a string` };
+      }
+      if (value[i].length === 0) {
+        return { isValid: false, error: `${fieldName}[${i}] cannot be empty string` };
       }
-      if (searchstring[i].length === 0) {
-        return { isValid: false, error: `searchstring[${i}] cannot be empty string` };
+      if (value[i].length > SEARCH_CONFIG.MAX_SEARCH_STRING_LENGTH) {
+        return { isValid: false, error: `${fieldName}[${i}] too long (max ${SEARCH_CONFIG.MAX_SEARCH_STRING_LENGTH} chars)` };
       }
     }
     return { isValid: true, error: null };
   }
-  // Validate searchstring_and
-  if (searchstringAnd !== undefined && searchstringAnd !== null) {
-    if (typeof searchstringAnd === 'string') {
-      if (searchstringAnd.length === 0) {
-        return { isValid: false, error: 'searchstring_and cannot be empty string' };
-      }
-    } else if (Array.isArray(searchstringAnd)) {
-      if (searchstringAnd.length === 0) {
-        return { isValid: false, error: 'searchstring_and array cannot be empty' };
-      }
-      for (let i = 0; i < searchstringAnd.length; i++) {
-        if (typeof searchstringAnd[i] !== 'string') {
-          return { isValid: false, error: `searchstring_and[${i}] must be a string` };
-        }
-        if (searchstringAnd[i].length === 0) {
-          return { isValid: false, error: `searchstring_and[${i}] cannot be empty string` };
-        }
-      }
-    } else {
-      return { isValid: false, error: 'searchstring_and must be string or array of strings' };
-    }
+  return { isValid: false, error: `${fieldName} must be string or array of strings` };
+}
+/**
+ * Validates searchstring configuration. The old structure returned
+ * early on valid string/array searchstring, so 60+ lines of validation
+ * below (the both-defined check, length caps, searchstring_and type
+ * check) were unreachable for valid inputs — e.g. passing both
+ * searchstring AND searchstring_and would have passed validation
+ * despite the documented mutual-exclusion rule. Rewritten as a linear
+ * sequence of independent checks via the shared validateSearchValue
+ * helper so every rule actually runs.
+ *
+ * @param {any} searchstring - The searchstring value (OR logic)
+ * @param {any} searchstringAnd - The searchstring_and value (AND logic)
+ * @returns {{isValid: boolean, error: string|null}}
+ */
+function validateSearchString(searchstring, searchstringAnd) {
+  const hasOR = searchstring !== undefined && searchstring !== null;
+  const hasAND = searchstringAnd !== undefined && searchstringAnd !== null;
+  // Both unset is fine — no searchstring filtering will be applied.
+  if (!hasOR && !hasAND) {
+    return { isValid: true, error: null };
   }
-  // Check that both searchstring and searchstring_and aren't defined simultaneously
-  if ((searchstring !== undefined && searchstring !== null) &&
-      (searchstringAnd !== undefined && searchstringAnd !== null)) {
+  // Mutual exclusion: can't combine OR and AND logic in one site config.
+  if (hasOR && hasAND) {
     return { isValid: false, error: 'Cannot use both searchstring (OR) and searchstring_and (AND) simultaneously. Choose one logic type.' };
   }
-  // Additional validation for search string length limits
-  const validateStringLength = (str, fieldName) => {
-    if (str.length > SEARCH_CONFIG.MAX_SEARCH_STRING_LENGTH) {
-      return { isValid: false, error: `${fieldName} too long (max ${SEARCH_CONFIG.MAX_SEARCH_STRING_LENGTH} chars)` };
-    }
-    return { isValid: true };
-  };
-  // Validate search string lengths
-  if (typeof searchstring === 'string') {
-    const lengthCheck = validateStringLength(searchstring, 'searchstring');
-    if (!lengthCheck.isValid) return lengthCheck;
-  } else if (Array.isArray(searchstring)) {
-    for (let i = 0; i < searchstring.length; i++) {
-      const lengthCheck = validateStringLength(searchstring[i], `searchstring[${i}]`);
-      if (!lengthCheck.isValid) return lengthCheck;
-    }
+  if (hasOR) {
+    const check = validateSearchValue(searchstring, 'searchstring');
+    if (!check.isValid) return check;
   }
-  // Validate AND search string lengths
-  if (typeof searchstringAnd === 'string') {
-    const lengthCheck = validateStringLength(searchstringAnd, 'searchstring_and');
-    if (!lengthCheck.isValid) return lengthCheck;
-  } else if (Array.isArray(searchstringAnd)) {
-    for (let i = 0; i < searchstringAnd.length; i++) {
-      const lengthCheck = validateStringLength(searchstringAnd[i], `searchstring_and[${i}]`);
-      if (!lengthCheck.isValid) return lengthCheck;
-    }
+  if (hasAND) {
+    const check = validateSearchValue(searchstringAnd, 'searchstring_and');
+    if (!check.isValid) return check;
   }
-  return { isValid: false, error: 'searchstring must be string or array of strings' };
-}
-/**
- * Gets statistics about search string matches
- * @param {Set|Map} matchedDomains - Set or Map of matched domains
- * @param {Array<string>} searchStrings - Array of search strings used
- * @returns {object} Statistics object
- */
-function getSearchStats(matchedDomains, searchStrings) {
-  const totalMatches = matchedDomains instanceof Map ? matchedDomains.size : matchedDomains.size;
-  return {
-    totalMatches,
-    searchStringCount: searchStrings.length,
-    searchStrings: [...searchStrings]
-  };
+  return { isValid: true, error: null };
 }
 module.exports = {
@@ -856,7 +767,5 @@ module.exports = {
   createCurlHandler,
   downloadWithCurl,
   validateSearchString,
-  getSearchStats,
-  addDomainToCollection,
   downloadWithRetry
 };