npm - @fanboynz/network-scanner - Versions diffs - 2.0.66 → 3.0.0 - Mend

@fanboynz/network-scanner 2.0.66 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/.github/workflows/npm-publish.yml +134 -10
package/CHANGELOG.md +135 -0
package/CLAUDE.md +18 -7
package/README.md +12 -4
package/lib/adblock-rust.js +23 -18
package/lib/adblock.js +127 -82
package/lib/browserexit.js +210 -200
package/lib/browserhealth.js +84 -60
package/lib/cdp.js +103 -81
package/lib/clear_sitedata.js +61 -159
package/lib/cloudflare.js +579 -409
package/lib/colorize.js +29 -12
package/lib/compare.js +16 -8
package/lib/compress.js +2 -1
package/lib/curl.js +287 -220
package/lib/domain-cache.js +87 -40
package/lib/dry-run.js +137 -194
package/lib/fingerprint.js +20 -18
package/lib/flowproxy.js +391 -188
package/lib/ghost-cursor.js +8 -7
package/lib/grep.js +248 -171
package/lib/ignore_similar.js +70 -124
package/lib/interaction.js +132 -235
package/lib/nettools.js +309 -87
package/lib/openvpn_vpn.js +12 -11
package/lib/output.js +92 -59
package/lib/post-processing.js +216 -162
package/lib/redirect.js +46 -30
package/lib/referrer.js +158 -165
package/lib/searchstring.js +290 -381
package/lib/smart-cache.js +141 -91
package/lib/socks-relay.js +8 -7
package/lib/spawn-async.js +137 -0
package/lib/validate_rules.js +188 -176
package/lib/wireguard_vpn.js +111 -117
package/nwss.js +740 -156
package/package.json +4 -4

package/lib/curl.js CHANGED Viewed

@@ -2,22 +2,36 @@
 // Handles HTTP content downloading using curl for searchstring analysis
 const fs = require('fs');
+// spawnSync only kept for validateCurlAvailability (runs once at
+// startup). Production curl downloads go through runProcess (async).
 const { spawnSync } = require('child_process');
-const { colorize, colors, messageColors, tags, formatLogMessage } = require('./colorize');
+const { runProcess } = require('./spawn-async');
+const { messageColors, formatLogMessage } = require('./colorize');
+const { getReferrerForUrl } = require('./referrer');
+const CURL_TAG = messageColors.processing('[curl]');
 // === Constants ===
 const CURL_DEFAULTS = {
   TIMEOUT_SECONDS: 30,
   MAX_REDIRECTS: 5,
-  MAX_SIZE_BYTES: 10 * 1024 * 1024, // 10MB
-  VALIDATION_TIMEOUT: 5000, // 5 seconds
-  SPAWN_TIMEOUT_MULTIPLIER: 1000, // Convert seconds to milliseconds
-  HTTP_SUCCESS_CODE: 200,
+  // 50MB to match lib/searchstring.js's downloadWithCurl cap — the two
+  // modules previously had different defaults (10MB vs 50MB) so the same
+  // URL could succeed or fail depending on which code path fetched it.
+  MAX_SIZE_BYTES: 50 * 1024 * 1024,
+  VALIDATION_TIMEOUT: 5000,
   CURL_SUCCESS_STATUS: 0,
-  METADATA_PIPE_PARTS: 3, // http_code|content_type|size_download
   VERSION_LINE_INDEX: 0
 };
+// Module-level so downloadWithCurl doesn't reallocate this closure on
+// every call. No state captured — pure factory.
+function errResult(msg) {
+  return {
+    content: '', httpCode: 0, contentType: 'unknown', downloadSize: 0,
+    success: false, error: msg
+  };
+}
 /**
  * Downloads content using curl with browser-like headers
  * @param {string} url - The URL to download
@@ -34,90 +48,80 @@ async function downloadWithCurl(url, userAgent = '', options = {}) {
     customHeaders = {}
   } = options;
-  try {
-    const curlArgs = [
-      '-s', // Silent mode
-      '--max-time', timeout.toString(),
-      '--max-redirs', maxRedirects.toString(),
-      '--fail-with-body', // Return body even on HTTP errors
-      '--compressed', // Accept compressed responses
-      '--write-out', '%{http_code}|%{content_type}|%{size_download}', // Output metadata
-    ];
-    if (followRedirects) {
-      curlArgs.push('-L'); // Follow redirects
-    }
+  const curlArgs = [
+    '-s',
+    '--max-time', timeout.toString(),
+    '--max-redirs', maxRedirects.toString(),
+    '--fail-with-body',
+    '--compressed',
+    // Leading '\n' guarantees the metadata sits on its own line even
+    // when content has no trailing newline (older format had no
+    // separator and concatenated metadata with the last content byte).
+    '--write-out', '\n%{http_code}|%{content_type}|%{size_download}'
+  ];
-    // Add user agent if provided
-    if (userAgent) {
-      curlArgs.push('-H', `User-Agent: ${userAgent}`);
-    }
+  if (followRedirects) curlArgs.push('-L');
+  if (userAgent) curlArgs.push('-H', `User-Agent: ${userAgent}`);
-    // Add common browser headers
-    curlArgs.push(
-      '-H', 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
-      '-H', 'Accept-Language: en-US,en;q=0.5',
-      '-H', 'Accept-Encoding: gzip, deflate, br',
-      '-H', 'Connection: keep-alive',
-      '-H', 'Upgrade-Insecure-Requests: 1',
-      '-H', 'Sec-Fetch-Dest: document',
-      '-H', 'Sec-Fetch-Mode: navigate',
-      '-H', 'Sec-Fetch-Site: none',
-      '-H', 'Cache-Control: no-cache'
-    );
-    // Add custom headers
-    Object.entries(customHeaders).forEach(([key, value]) => {
-      curlArgs.push('-H', `${key}: ${value}`);
-    });
+  curlArgs.push(
+    '-H', 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+    '-H', 'Accept-Language: en-US,en;q=0.5',
+    '-H', 'Accept-Encoding: gzip, deflate, br',
+    '-H', 'Connection: keep-alive',
+    '-H', 'Upgrade-Insecure-Requests: 1',
+    '-H', 'Sec-Fetch-Dest: document',
+    '-H', 'Sec-Fetch-Mode: navigate',
+    '-H', 'Sec-Fetch-Site: none',
+    '-H', 'Cache-Control: no-cache'
+  );
-    curlArgs.push(url);
+  Object.entries(customHeaders).forEach(([key, value]) => {
+    curlArgs.push('-H', `${key}: ${value}`);
+  });
+  curlArgs.push(url);
-    // Execute curl
-    const curlResult = spawnSync('curl', curlArgs, {
-      encoding: 'utf8',
-      timeout: timeout * CURL_DEFAULTS.SPAWN_TIMEOUT_MULTIPLIER,
-      maxBuffer: maxSize
-    });
-    if (curlResult.error) {
-      throw curlResult.error;
-    }
-    if (curlResult.status !== CURL_DEFAULTS.CURL_SUCCESS_STATUS) {
-      throw new Error(`Curl exited with status ${curlResult.status}: ${curlResult.stderr}`);
-    }
-    const output = curlResult.stdout;
-    const lines = output.split('\n');
-    const metadata = lines[lines.length - 1]; // Last line contains write-out data
-    const content = lines.slice(0, -1).join('\n'); // Everything except last line
-    // Parse metadata
-    const metadataParts = metadata.split('|');
-    if (metadataParts.length !== CURL_DEFAULTS.METADATA_PIPE_PARTS) {
-      throw new Error(`Invalid metadata format: expected ${CURL_DEFAULTS.METADATA_PIPE_PARTS} parts, got ${metadataParts.length}`);
-    }
-    const [httpCode, contentType, downloadSize] = metadataParts;
-    return {
-      content,
-      httpCode: parseInt(httpCode) || 0,
-      contentType: contentType || 'unknown',
-      downloadSize: parseInt(downloadSize) || content.length,
-      success: true
-    };
-  } catch (error) {
-    return {
-      content: '',
-      httpCode: 0,
-      contentType: 'unknown',
-      downloadSize: 0,
-      success: false,
-      error: error.message
-    };
+  // Shared async-spawn helper handles streaming/cap/timeout/kill plumbing.
+  const result = await runProcess('curl', curlArgs, {
+    timeout: timeout * 1000,
+    maxStdout: maxSize
+  });
+  if (result.error) return errResult(result.error);
+  if (result.truncated) return errResult(`Output exceeded ${maxSize} bytes`);
+  if (result.signal) return errResult(`Killed by signal ${result.signal}`);
+  if (result.code !== CURL_DEFAULTS.CURL_SUCCESS_STATUS) {
+    return errResult(`Curl exited with status ${result.code}: ${result.stderr.toString('utf8')}`);
+  }
+  const output = result.stdout.toString('utf8');
+  // lastIndexOf('\n') is a single O(n) scan from the end vs the old
+  // split('\n') + slice(0,-1) + join('\n') which was three full passes
+  // plus two intermediate array allocations.
+  const sepIdx = output.lastIndexOf('\n');
+  if (sepIdx === -1) return errResult('No metadata separator in curl output');
+  const content = output.slice(0, sepIdx);
+  const metadata = output.slice(sepIdx + 1);
+  // Split on first/last pipe so the middle (content-type) can legitimately
+  // contain pipes — naive split('|') with parts-count check would drop the
+  // whole response with 'Invalid metadata format' for such content-types.
+  const firstPipe = metadata.indexOf('|');
+  const lastPipe = metadata.lastIndexOf('|');
+  if (firstPipe === -1 || firstPipe === lastPipe) {
+    return errResult(`Invalid metadata format: missing pipes in "${metadata}"`);
   }
+  const httpCode = metadata.slice(0, firstPipe);
+  const contentType = metadata.slice(firstPipe + 1, lastPipe);
+  const downloadSize = metadata.slice(lastPipe + 1);
+  return {
+    content,
+    httpCode: parseInt(httpCode, 10) || 0,
+    contentType: contentType || 'unknown',
+    downloadSize: parseInt(downloadSize, 10) || content.length,
+    success: true
+  };
 }
 /**
@@ -134,58 +138,103 @@ function searchContent(content, searchStrings = [], searchStringsAnd = [], hasSe
   }
   const lowerContent = content.toLowerCase();
-  // Handle AND logic searchstring_and (all patterns must be present)
+  // Handle AND logic searchstring_and (all patterns must be present).
+  // Short-circuits on first missing pattern — the old code walked the
+  // entire list to build a full missingPatterns array that's only used
+  // by a debug log. Now we early-exit and report the first miss (the
+  // debug log's missingPatterns.join(', ') still works with one entry).
   if (hasSearchStringAnd && searchStringsAnd.length > 0) {
-    const missingPatterns = [];
-    const foundPatterns = [];
-    for (const pattern of searchStringsAnd) {
-      const lowerPattern = pattern.toLowerCase();
-      if (lowerContent.includes(lowerPattern)) {
-        foundPatterns.push(pattern);
-      } else {
-        missingPatterns.push(pattern);
+    // Pre-lower patterns once — was per-iteration toLowerCase before.
+    // For a 20-pattern AND check the difference is small per call but
+    // the pattern itself never changes between iterations of the loop.
+    const lowered = searchStringsAnd.map(p => p.toLowerCase());
+    for (let i = 0; i < searchStringsAnd.length; i++) {
+      if (!lowerContent.includes(lowered[i])) {
+        return {
+          found: false,
+          matchedPattern: null,
+          matchType: 'AND',
+          foundPatterns: searchStringsAnd.slice(0, i),
+          missingPatterns: [searchStringsAnd[i]]
+        };
       }
     }
-    // All patterns must be found for AND logic
-    if (missingPatterns.length === 0) {
-      return {
-        found: true,
-        matchedPattern: foundPatterns.join(' AND '),
-        matchType: 'AND',
-        foundPatterns,
-        missingPatterns: []
-      };
-    } else {
-      return {
-        found: false,
-        matchedPattern: null,
-        matchType: 'AND',
-        foundPatterns,
-        missingPatterns
-      };
-    }
+    return {
+      found: true,
+      matchedPattern: searchStringsAnd.join(' AND '),
+      matchType: 'AND',
+      foundPatterns: searchStringsAnd,
+      missingPatterns: []
+    };
   }
-  // Handle OR logic searchstring (any pattern can match)
+  // Handle OR logic searchstring (any pattern can match). Same pre-lower
+  // optimization, though OR usually short-circuits early so the savings
+  // are smaller.
   if (searchStrings.length > 0) {
-    for (const pattern of searchStrings) {
-      const lowerPattern = pattern.toLowerCase();
-      if (lowerContent.includes(lowerPattern)) {
-        return {
-          found: true,
-          matchedPattern: pattern,
+    for (let i = 0; i < searchStrings.length; i++) {
+      if (lowerContent.includes(searchStrings[i].toLowerCase())) {
+        return {
+          found: true,
+          matchedPattern: searchStrings[i],
           matchType: 'OR'
         };
       }
     }
   }
   return { found: false, matchedPattern: null, matchType: null };
 }
+/**
+ * Emits a match for a curl-fetched URL to both the verbose console
+ * (when siteConfig.verbose === 1) and the matched-URLs log file
+ * (when dumpUrls is true). Single source of truth for the format —
+ * both no-searchstring and with-searchstring match paths funnel
+ * through here so partyType / resourceInfo / timestamp / format
+ * don't drift between the two branches.
+ *
+ * @param {object} opts
+ * @param {string}  opts.simplifiedUrl
+ * @param {string}  opts.requestUrl
+ * @param {boolean} opts.isFirstParty
+ * @param {string|null} opts.resourceType
+ * @param {string|null} opts.matchInfo - null for "matched regex only"
+ *                                       (no searchstring), a string like
+ *                                       'pattern: "X"' or 'patterns: 2/3'
+ *                                       for searchstring matches
+ * @param {number|undefined} opts.verbose
+ * @param {boolean} opts.dumpUrls
+ * @param {string}  opts.matchedUrlsLogFile
+ */
+function logMatchedRequest({
+  simplifiedUrl, requestUrl, isFirstParty, resourceType,
+  matchInfo, verbose, dumpUrls, matchedUrlsLogFile
+}) {
+  const partyType = isFirstParty ? 'first-party' : 'third-party';
+  const resourceInfo = resourceType ? ` (${resourceType})` : '';
+  if (verbose === 1) {
+    const verboseSuffix = matchInfo ? ` contains ${matchInfo}` : ' matched regex';
+    console.log(formatLogMessage('match',
+      `[${simplifiedUrl}] ${requestUrl} (${partyType}, curl)${verboseSuffix}${resourceInfo}`));
+  }
+  if (dumpUrls && matchedUrlsLogFile) {
+    const timestamp = new Date().toISOString();
+    // matchInfo goes INSIDE the (party, curl, ...) parens to mirror the
+    // pre-refactor file format.
+    const fileExtra = matchInfo ? `, ${matchInfo}` : '';
+    try {
+      fs.appendFileSync(matchedUrlsLogFile,
+        `${timestamp} [match][${simplifiedUrl}] ${requestUrl} (${partyType}, curl${fileExtra})${resourceInfo}\n`);
+    } catch (logErr) {
+      console.warn(formatLogMessage('warn', `Failed to write to matched URLs log: ${logErr.message}`));
+    }
+  }
+}
 /**
  * Creates a curl-based URL handler for downloading and searching content
  * @param {object} config - Configuration object containing all necessary parameters
@@ -197,7 +246,8 @@ function createCurlHandler(config) {
     searchStringsAnd,
     hasSearchStringAnd,
     regexes,
-    matchedDomains,
+    // matchedDomains intentionally not destructured — only addMatchedDomain
+    // is called; the underlying collection is opaque to this handler.
     addMatchedDomain,
     isDomainAlreadyDetected,
     onContentFetched,
@@ -215,101 +265,128 @@ function createCurlHandler(config) {
     hasSearchString
   } = config;
+  // Hoisted: currentUrl doesn't change for this handler's lifetime, so
+  // parsing its root domain once at handler-creation eliminates the
+  // per-request parse + getRootDomain call.
+  let currentRootDomain = '';
+  try { currentRootDomain = getRootDomain(currentUrl); } catch (_) {}
   return async function curlHandler(requestUrl) {
     try {
-      const respDomain = perSiteSubDomains ? (new URL(requestUrl)).hostname : getRootDomain(requestUrl);
-      const fullSubdomain = (new URL(requestUrl)).hostname; // Always get full subdomain for cache tracking
-      // Skip if already detected to avoid duplicates
-      if (isDomainAlreadyDetected(fullSubdomain)) {
+      // Regex check FIRST — cheap filter that skips ~99% of requests.
+      // Previously this ran AFTER a URL parse + domain-cache lookup,
+      // paying for parses on requests we then immediately drop.
+      const matchesRegex = regexes.some(re => re.test(requestUrl));
+      if (!matchesRegex) {
         if (forceDebug) {
-          console.log(formatLogMessage('debug', `[curl] Skipping already detected subdomain: ${fullSubdomain}`));
+          console.log(formatLogMessage('debug', `${CURL_TAG} URL ${requestUrl} doesn't match any regex patterns`));
         }
         return;
       }
-      // Only process URLs that match our regex patterns
-      const matchesRegex = regexes.some(re => re.test(requestUrl));
-      if (!matchesRegex) {
+      // Parse requestUrl ONCE and reuse. The prior structure parsed it
+      // 4-6 times: two `new URL().hostname` calls, two dead-var
+      // hostname computations that were never read, plus the
+      // getRootDomain calls. Single parse + the cache key (fullSubdomain)
+      // + first-party root-domain comparison all come from this one URL
+      // object now.
+      let requestHostname;
+      try { requestHostname = new URL(requestUrl).hostname; } catch (_) { return; }
+      const fullSubdomain = requestHostname; // always the full subdomain
+      // Compute requestRootDomain ONCE — derive respDomain from it when
+      // perSiteSubDomains is false, and reuse it for the first-party
+      // check. Previously getRootDomain(requestUrl) was called twice in
+      // that path.
+      const requestRootDomain = getRootDomain(requestUrl);
+      const respDomain = perSiteSubDomains ? requestHostname : requestRootDomain;
+      // Skip if already detected to avoid duplicates
+      if (isDomainAlreadyDetected(fullSubdomain)) {
         if (forceDebug) {
-          console.log(formatLogMessage('debug', `[curl] URL ${requestUrl} doesn't match any regex patterns`));
+          console.log(formatLogMessage('debug', `${CURL_TAG} Skipping already detected subdomain: ${fullSubdomain}`));
         }
         return;
       }
-      // Check if this is a first-party request (same domain as the URL being scanned)
-      const currentUrlHostname = new URL(currentUrl).hostname;
-      const requestHostname = new URL(requestUrl).hostname;
-      const currentRootDomain = getRootDomain(currentUrl);
-      const requestRootDomain = getRootDomain(requestUrl);
+      // First-party = same registrable root domain. Same definition the
+      // main request handler uses; matches what searchstring.js's
+      // responseHandler does too (post the cross-module unification).
       const isFirstParty = currentRootDomain === requestRootDomain;
-      // Apply first-party/third-party filtering
-      if (isFirstParty && (siteConfig.firstParty === false || siteConfig.firstParty === 0)) {
+      // Apply first-party/third-party filtering. `=== false` only (no
+      // `|| === 0`) — matches lib/searchstring.js and the main request
+      // handler, which all treat these as boolean flags. Accepting 0 as
+      // "disabled" here but not elsewhere would silently disagree if a
+      // user ever set "firstParty": 0 in JSON config.
+      if (isFirstParty && siteConfig.firstParty === false) {
         if (forceDebug) {
-          console.log(formatLogMessage('debug', `[curl] Skipping first-party request (firstParty disabled): ${requestUrl}`));
+          console.log(formatLogMessage('debug', `${CURL_TAG} Skipping first-party request (firstParty disabled): ${requestUrl}`));
         }
         return;
       }
-      if (!isFirstParty && (siteConfig.thirdParty === false || siteConfig.thirdParty === 0)) {
+      if (!isFirstParty && siteConfig.thirdParty === false) {
         if (forceDebug) {
-          console.log(formatLogMessage('debug', `[curl] Skipping third-party request (thirdParty disabled): ${requestUrl}`));
+          console.log(formatLogMessage('debug', `${CURL_TAG} Skipping third-party request (thirdParty disabled): ${requestUrl}`));
         }
         return;
       }
       if (forceDebug) {
-        console.log(formatLogMessage('debug', `[curl] Processing ${isFirstParty ? 'first-party' : 'third-party'} request: ${requestUrl}`));
+        console.log(formatLogMessage('debug', `${CURL_TAG} Processing ${isFirstParty ? 'first-party' : 'third-party'} request: ${requestUrl}`));
       }
-      // If NO searchstring is defined, match immediately (like browser behavior)
-      if (!hasSearchString || ((!searchStrings || !searchStrings.length) && (!searchStringsAnd || !searchStringsAnd.length))) {
+      // If NO searchstring is defined, match immediately (like browser
+      // behavior). Simplified from the prior convoluted condition
+      // (hasSearchString being true while both arrays are empty is
+      // impossible given parseSearchStrings, so the OR was redundant).
+      if (!hasSearchString && !hasSearchStringAnd) {
         if (!respDomain || matchesIgnoreDomain(respDomain, ignoreDomains)) {
           if (forceDebug) {
-            console.log(formatLogMessage('debug', `[curl] Domain ${respDomain} is in ignore list`));
+            console.log(formatLogMessage('debug', `${CURL_TAG} Domain ${respDomain} is in ignore list`));
           }
           return;
         }
         addMatchedDomain(respDomain, resourceType, fullSubdomain);
-        const simplifiedUrl = getRootDomain(currentUrl);
-        if (siteConfig.verbose === 1) {
-          const partyType = isFirstParty ? 'first-party' : 'third-party';
-          const resourceInfo = resourceType ? ` (${resourceType})` : '';
-          console.log(formatLogMessage('match', `[${simplifiedUrl}] ${requestUrl} (${partyType}, curl) matched regex${resourceInfo}`));
-        }
-        if (dumpUrls && matchedUrlsLogFile) {
-          const timestamp = new Date().toISOString();
-          const partyType = isFirstParty ? 'first-party' : 'third-party';
-          const resourceInfo = resourceType ? ` (${resourceType})` : '';
-          try {
-            fs.appendFileSync(matchedUrlsLogFile,
-              `${timestamp} [match][${simplifiedUrl}] ${requestUrl} (${partyType}, curl)${resourceInfo}\n`);
-          } catch (logErr) {
-            console.warn(formatLogMessage('warn', `Failed to write to matched URLs log: ${logErr.message}`));
-          }
-        }
+        logMatchedRequest({
+          simplifiedUrl: currentRootDomain,
+          requestUrl,
+          isFirstParty,
+          resourceType,
+          matchInfo: null, // no searchstring — log says "matched regex"
+          verbose: siteConfig.verbose,
+          dumpUrls,
+          matchedUrlsLogFile
+        });
         return;
       }
       // If searchstring IS defined, download and search content
-      if (hasSearchString && ((searchStrings && searchStrings.length > 0) || (searchStringsAnd && searchStringsAnd.length > 0)) && forceDebug) {
-        console.log(formatLogMessage('debug', `[curl] Downloading content for pattern matching: ${requestUrl}`));
+      if ((hasSearchString || hasSearchStringAnd) && forceDebug) {
+        console.log(formatLogMessage('debug', `${CURL_TAG} Downloading content for pattern matching: ${requestUrl}`));
       }
-      // Prepare custom headers from site config
-      const customHeaders = siteConfig.custom_headers || {};
+      // Prepare custom headers from site config. SHALLOW-COPY so the
+      // Referer assignment below doesn't mutate the underlying siteConfig
+      // object — the old `siteConfig.custom_headers || {}` was a reference
+      // (when present), so setting customHeaders['Referer'] persisted the
+      // first URL's random-mode referrer onto siteConfig.custom_headers,
+      // and every subsequent URL inherited that pinned value. Silent
+      // breakage of {mode:'random_search'} variation across a site's URLs.
+      //
+      // Uses getReferrerForUrl so ALL referrer modes work — the old
+      // inline string/array logic dropped object modes silently.
+      const customHeaders = { ...(siteConfig.custom_headers || {}) };
       if (siteConfig.referrer_headers) {
-        const referrerUrl = Array.isArray(siteConfig.referrer_headers)
-          ? siteConfig.referrer_headers[Math.floor(Math.random() * siteConfig.referrer_headers.length)]
-          : siteConfig.referrer_headers;
-        if (typeof referrerUrl === 'string' && referrerUrl.startsWith('http')) {
-          customHeaders['Referer'] = referrerUrl;
-        }
+        const referrerUrl = getReferrerForUrl(
+          requestUrl,
+          siteConfig.referrer_headers,
+          siteConfig.referrer_disable,
+          forceDebug
+        );
+        if (referrerUrl) customHeaders['Referer'] = referrerUrl;
       }
       const downloadResult = await downloadWithCurl(requestUrl, userAgent, {
@@ -320,7 +397,7 @@ function createCurlHandler(config) {
       if (!downloadResult.success) {
         if (forceDebug) {
-          console.log(formatLogMessage('debug', `[curl] Failed to download ${requestUrl}: ${downloadResult.error}`));
+          console.log(formatLogMessage('debug', `${CURL_TAG} Failed to download ${requestUrl}: ${downloadResult.error}`));
         }
         return;
       }
@@ -331,7 +408,7 @@ function createCurlHandler(config) {
           onContentFetched(requestUrl, downloadResult.content);
         } catch (cacheErr) {
           if (forceDebug) {
-            console.log(formatLogMessage('debug', `[curl] Content caching failed: ${cacheErr.message}`));
+            console.log(formatLogMessage('debug', `${CURL_TAG} Content caching failed: ${cacheErr.message}`));
           }
         }
       }
@@ -347,54 +424,41 @@ function createCurlHandler(config) {
       if (searchResult.found) {
         if (!respDomain || matchesIgnoreDomain(respDomain, ignoreDomains)) {
           if (forceDebug) {
-            console.log(formatLogMessage('debug', `[curl] Domain ${respDomain} matches but is in ignore list`));
+            console.log(formatLogMessage('debug', `${CURL_TAG} Domain ${respDomain} matches but is in ignore list`));
           }
           return;
         }
         addMatchedDomain(respDomain, resourceType, fullSubdomain);
-        const simplifiedUrl = getRootDomain(currentUrl);
-        if (siteConfig.verbose === 1) {
-          const partyType = isFirstParty ? 'first-party' : 'third-party';
-          const resourceInfo = resourceType ? ` (${resourceType})` : '';
-          const matchInfo = searchResult.matchType === 'AND'
-            ? `patterns: ${searchResult.foundPatterns.length}/${searchStringsAnd.length}`
-            : `pattern: "${searchResult.matchedPattern}"`;
-          console.log(formatLogMessage('match',
-            `[${simplifiedUrl}] ${requestUrl} (${partyType}, curl) contains ${matchInfo}${resourceInfo}`));
-        }
-        if (dumpUrls && matchedUrlsLogFile) {
-          const timestamp = new Date().toISOString();
-          const partyType = isFirstParty ? 'first-party' : 'third-party';
-          const resourceInfo = resourceType ? ` (${resourceType})` : '';
-          const matchInfo = searchResult.matchType === 'AND'
-            ? `patterns: ${searchResult.foundPatterns.length}/${searchStringsAnd.length}`
-            : `pattern: "${searchResult.matchedPattern}"`;
-          try {
-            fs.appendFileSync(matchedUrlsLogFile,
-              `${timestamp} [match][${simplifiedUrl}] ${requestUrl} (${partyType}, curl, ${matchInfo})${resourceInfo}\n`);
-          } catch (logErr) {
-            console.warn(formatLogMessage('warn', `Failed to write to matched URLs log: ${logErr.message}`));
-          }
-        }
+        const matchInfo = searchResult.matchType === 'AND'
+          ? `patterns: ${searchResult.foundPatterns.length}/${searchStringsAnd.length}`
+          : `pattern: "${searchResult.matchedPattern}"`;
+        logMatchedRequest({
+          simplifiedUrl: currentRootDomain,
+          requestUrl,
+          isFirstParty,
+          resourceType,
+          matchInfo,
+          verbose: siteConfig.verbose,
+          dumpUrls,
+          matchedUrlsLogFile
+        });
       } else {
         if (forceDebug) {
           const partyType = isFirstParty ? 'first-party' : 'third-party';
           if (searchResult.matchType === 'AND' && searchResult.missingPatterns) {
             console.log(formatLogMessage('debug',
-              `[curl] ${requestUrl} (${partyType}) matched regex but missing AND patterns: ${searchResult.missingPatterns.join(', ')}`));
+              `${CURL_TAG} ${requestUrl} (${partyType}) matched regex but missing AND patterns: ${searchResult.missingPatterns.join(', ')}`));
           } else {
             console.log(formatLogMessage('debug',
-              `[curl] ${requestUrl} (${partyType}) matched regex but no search patterns found`));
+              `${CURL_TAG} ${requestUrl} (${partyType}) matched regex but no search patterns found`));
           }
         }
       }
     } catch (err) {
       if (forceDebug) {
-        console.log(formatLogMessage('debug', `[curl] Handler failed for ${requestUrl}: ${err.message}`));
+        console.log(formatLogMessage('debug', `${CURL_TAG} Handler failed for ${requestUrl}: ${err.message}`));
       }
     }
   };
@@ -434,9 +498,12 @@ function validateCurlAvailability() {
   }
 }
+// Public surface used by nwss.js (createCurlHandler + validateCurlAvailability).
+// downloadWithCurl and searchContent are module-internal helpers — no external
+// caller imports them from here. lib/searchstring.js has its own independently-
+// defined functions of the same names, which is why a naive grep showed
+// false-positive 'external uses'.
 module.exports = {
-  downloadWithCurl,
-  searchContent,
   createCurlHandler,
   validateCurlAvailability
 };