npm - @fanboynz/network-scanner - Versions diffs - 3.2.0 → 3.4.0 - Mend

@fanboynz/network-scanner 3.2.0 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/nwss.js CHANGED Viewed

@@ -9,7 +9,7 @@ const fs = require('fs');
 const os = require('os');
 const psl = require('psl');
 const path = require('path');
-const { createRotatingResolver, createDnsCircuitBreaker, parseDnsServers, isNonExistenceError } = require('./lib/dns');
+const { createRotatingResolver, createDnsCircuitBreaker, parseDnsServers, isNonExistenceError, dohTemplatesForResolvers } = require('./lib/dns');
 const { createGrepHandler, validateGrepAvailability } = require('./lib/grep');
 const { compressMultipleFiles } = require('./lib/compress');
 const { parseSearchStrings, createResponseHandler } = require('./lib/searchstring');
@@ -17,6 +17,7 @@ const { applyAllFingerprintSpoofing, USER_AGENT_COLLECTIONS, CHROME_BUILD, CHROM
 const { formatRules, handleOutput, getFormatDescription } = require('./lib/output');
 // Curl functionality (replace searchstring curl handler)
 const { validateCurlAvailability, createCurlHandler: createCurlModuleHandler } = require('./lib/curl');
+const { runProcess } = require('./lib/spawn-async');
 // Rule validation
 const { validateRulesetFile, validateFullConfig, testDomainValidation, cleanRulesetFile, normalizeSiteConfig } = require('./lib/validate_rules');
 // CF Bypass
@@ -55,6 +56,7 @@ const CSS_BLOCKED_TAG = messageColors.processing('[css_blocked]');
 const EVAL_ON_DOC_TAG = messageColors.processing('[evalOnDoc]');
 const REALTIME_CLEANUP_TAG = messageColors.processing('[realtime_cleanup]');
 const VPN_TAG = messageColors.processing('[vpn]');
+const POPUP_TAG = messageColors.processing('[popup]');
 // Precomputed colored '[SmartCache]' subsystem prefix — paired with the
 // same constant in lib/smart-cache.js so debug lines from both files
 // produce consistently colored output. formatLogMessage only colors the
@@ -64,7 +66,7 @@ const SMART_CACHE_TAG = messageColors.processing('[SmartCache]');
 // log lines (start/completed). Same cyan as the other monitoring tags.
 const CONCURRENCY_TAG = messageColors.processing('[CONCURRENCY]');
 // Enhanced mouse interaction and page simulation
-const { performPageInteraction, createInteractionConfig, computeInteractionCeilingMs, performContentClicks, humanLikeMouseMove } = require('./lib/interaction');
+const { performPageInteraction, createInteractionConfig, computeInteractionCeilingMs, performContentClicks, humanLikeMouseMove, performTargetedClicks } = require('./lib/interaction');
 // Optional ghost-cursor support for advanced Bezier-based mouse movements
 const { createGhostCursor, ghostMove, ghostClick, ghostRandomMove, resolveGhostCursorConfig } = require('./lib/ghost-cursor');
 // Domain detection cache for performance optimization
@@ -240,6 +242,7 @@ if (fs.existsSync(NWSSCONFIG_PATH)) {
         resource_cleanup_interval: ['--cleanup-interval'],
         dns: ['--dns'],
         dns_cache: ['--dns-cache'],
+        doh_disable: ['--doh-disable'],
         cache_requests: ['--cache-requests'],
         dumpurls: ['--dumpurls'],
         remove_tempfiles: ['--remove-tempfiles'],
@@ -376,7 +379,13 @@ if (dnsCacheMode) enableDiskCache();
 // Filters NXDOMAIN / unresolvable hostnames in <100ms before paying the
 // ~5-15s Puppeteer + Cloudflare detection round-trip on each.
 const dnsPrecheckEnabled = !args.includes('--no-dns-precheck');
-const dnsPrecheckTimeoutMs = 2000;
+// 4s (was 2s): under a concurrent scan the c-ares UDP burst against the pinned
+// resolvers can take >2s to answer — a tight timeout false-counted those as
+// resolver errors and tripped the circuit breaker. A clean NXDOMAIN still
+// returns fast (the resolver answers immediately), so the higher ceiling only
+// costs time when the resolver is genuinely slow — exactly when we want to wait
+// rather than false-fail. Paired with the resolver's concurrency cap below.
+const dnsPrecheckTimeoutMs = 4000;
 // --show-dead-domains: collect hostnames that are definitively DEAD (do not
 // exist / unreachable) and print them at the end of the scan so they can be
@@ -387,7 +396,11 @@ const dnsPrecheckTimeoutMs = 2000;
 const showDeadDomains = args.includes('--show-dead-domains');
 const _deadDomains = new Map();
 function recordDeadDomain(urlOrHost, reason) {
-  if (!showDeadDomains || !urlOrHost) return;
+  // Populate unconditionally — the pre-check skip reads _deadDomains to drop
+  // repeat URLs on a host already proven dead this run, which must work whether
+  // or not --show-dead-domains is set. The end-of-scan REPORT is separately
+  // gated on showDeadDomains, so the flag still controls output, not recording.
+  if (!urlOrHost) return;
   let host = urlOrHost;
   try { host = new URL(urlOrHost).hostname; } catch { /* already a bare host */ }
   if (host && !_deadDomains.has(host)) _deadDomains.set(host, reason);
@@ -407,7 +420,7 @@ const DNS_NEGATIVE_CACHE_MAX = 1000;
 // persisting it can't silently drop a live host. Opt-in via --dns-cache: dead
 // hosts are remembered for DNS_NEGATIVE_PERSIST_TTL_MS and reloaded next run;
 // otherwise it's a 5-min in-memory-only cache. The persist TTL is deliberately
-// shorter than the dig/whois positive cache (20h): a domain that doesn't exist
+// shorter than the dig/whois positive cache (dig 20h / whois 36h): a domain that doesn't exist
 // now MAY get registered, and this is a domain-hunting scanner, so the dead
 // ones are re-checked twice a day rather than trusted for ~a day.
 const DNS_NEGATIVE_PERSIST_TTL_MS = 12 * 60 * 60 * 1000; // 12 hours
@@ -437,6 +450,31 @@ const dnsResolver = createRotatingResolver({ servers: dnsServersOverride, forceD
 // system /etc/resolv.conf, which on a flaky setup times out and silently drops
 // dig-gated domains). Only when --dns is explicitly set.
 if (dnsServersOverride.length > 0) setDigResolvers(dnsServersOverride);
+// Pin Chrome's NAVIGATION resolver to the same providers via DoH. Chrome
+// ignores --dns for page loads and reads /etc/resolv.conf directly, so a broken
+// system resolver (e.g. one returning REFUSED) can ERR_NAME_NOT_RESOLVED a
+// domain the pre-check already resolved. Mapping --dns to the matching DoH
+// endpoint makes navigation use the pinned provider instead of resolv.conf.
+// 'automatic' mode (not 'secure') so Chrome still falls back to system DNS if
+// DoH is unreachable rather than failing the whole batch. Empty templates when
+// --dns is absent or maps to no known DoH provider — Chrome keeps system DNS.
+//
+// Applied ONLY to direct connections (see createBrowser): when a proxy or VPN
+// is active, the exit/tunnel does the resolution (remote DNS / pushed DNS), so
+// pinning local DoH would be redundant and could resolve geo-split domains to
+// the wrong region. In those modes Chrome defers to the proxy/VPN as before.
+// --doh-disable (default false) opts out of the Chrome DoH pinning entirely —
+// navigation falls back to system resolv.conf even when --dns maps to a known
+// provider. The pre-check and dig still honor --dns. Use it if DoH adds
+// unwanted latency, is blocked on the network, or you specifically want Chrome
+// to resolve via the system path.
+const dohDisabled = args.includes('--doh-disable');
+const chromeDoh = dnsServersOverride.length > 0
+  ? dohTemplatesForResolvers(dnsServersOverride)
+  : { templates: '', mapped: [], unmapped: [] };
+// anyVpnConfigured and the DoH startup log live inside the main IIFE below:
+// `sites` is destructured from the config later in module load, so referencing
+// it at this point in top-level evaluation would TDZ-throw.
 // Circuit breaker: if resolver errors dominate, suspend the pre-check for a
 // cooldown so a refusal storm doesn't keep hammering a broken resolver (sites
 // still load — a suspended pre-check just proceeds to navigation).
@@ -715,6 +753,9 @@ if (blockAdsIndex !== -1) {
   adblockEnabled = true;
   const engine = adblockEngineName === 'rust' ? adblockRust : adblockJs;
+  // Only ever assigned the os.tmpdir() path below — never a user file — so the
+  // unlink in finally can never touch the caller's own lists.
+  let combinedTmpFile = null;
   try {
     if (engine === adblockRust) {
       // Rust wrapper accepts an array directly — no temp file needed.
@@ -723,15 +764,22 @@ if (blockAdsIndex !== -1) {
       // JS engine takes a single path; concat to a temp file when multiple lists.
       let rulesFile = rulesFiles[0];
       if (rulesFiles.length > 1) {
-        rulesFile = path.join(os.tmpdir(), `nwss-adblock-combined-${Date.now()}.txt`);
+        combinedTmpFile = path.join(os.tmpdir(), `nwss-adblock-combined-${Date.now()}.txt`);
+        rulesFile = combinedTmpFile;
         const combined = rulesFiles.map(f => fs.readFileSync(f, 'utf-8')).join('\n');
         fs.writeFileSync(rulesFile, combined);
       }
+      // parseAdblockRules reads the file synchronously and in full before
+      // returning, so the temp copy is safe to remove immediately afterwards.
       adblockMatcher = engine.parseAdblockRules(rulesFile, { enableLogging: forceDebug });
     }
   } catch (err) {
     console.log(`Error: Failed to load adblock engine '${adblockEngineName}': ${err.message}`);
     process.exit(1);
+  } finally {
+    if (combinedTmpFile) {
+      try { fs.unlinkSync(combinedTmpFile); } catch { /* best effort — OS reaps tmpdir */ }
+    }
   }
   const stats = adblockMatcher.getStats();
   const ruleDesc = stats.total != null
@@ -803,9 +851,13 @@ General Options:
 Validation Options:
   --cache-requests               Cache HTTP requests to avoid re-requesting same URLs within scan
-  --dns <ip[,ip,...]>            Resolver(s) for the DNS pre-check AND nettools' dig (not Chrome nav / whois).
-                                 One pins all queries to it; several rotate per query. Overrides /etc/resolv.conf.
-  --dns-cache                    Persist dig/whois results to disk between runs (20h TTL, 2000-entry cap each),
+  --dns <ip[,ip,...]>            Resolver(s) for the DNS pre-check, nettools' dig, and — when they map to a
+                                 known DoH provider — Chrome's page navigation via DoH on direct connections
+                                 (skipped under proxy/VPN; not whois). Overrides /etc/resolv.conf.
+                                 One pins all queries to it; several rotate per query.
+  --doh-disable                  Opt out of the Chrome-navigation DoH pinning (default: off). Chrome then
+                                 resolves via system resolv.conf; --dns still pins the pre-check and dig.
+  --dns-cache                    Persist dig/whois results to disk between runs (dig 20h / whois 36h TTL, 2000-entry cap each),
                                  plus the DNS pre-check negative cache (NXDOMAIN only, 12h TTL, .dnsnegcache)
   --no-dns-precheck              Disable per-URL DNS resolution check before page navigation.
                                  By default, URLs whose hostname doesn't resolve are skipped
@@ -879,6 +931,9 @@ Redirect Handling Options:
   source: true/false                           Save page source HTML after load
   firstParty: true/false                       Allow first-party matches (default: false)
   thirdParty: true/false                       Allow third-party matches (default: true)
+  redirect_first_party: true/false             Treat redirect-destination domains as first-party (default: true).
+                                              false keeps redirect targets third-party so filterRegex/dig can match
+                                              them (e.g. capturing an ad/cloak redirect's end domain)
   screenshot: true/false/\"force\"                Capture screenshot (true=on failure, \"force\"=always)
   headful: true/false                          Launch browser with GUI for this site
   fingerprint_protection: true/false/"random" Enable fingerprint spoofing: true/false/"random"
@@ -916,6 +971,9 @@ Advanced Options:
   interact_scrolling: true/false              Enable scrolling simulation (default: true)
   interact_clicks: true/false                 Enable element clicking simulation (default: false)
   interact_typing: true/false                 Enable typing simulation (default: false)
+  click_elements: ["sel1","sel2"]             After load, click these CSS selectors in order, main frame + iframes
+                                              (organic nav / play button). Honors realistic_click + cursor_mode "ghost"; missing skipped
+  click_wait: <milliseconds>                  Per-click: max wait for the element to appear + settle/nav after (default: 5000)
   cursor_mode: "ghost"                        Use ghost-cursor Bezier mouse (requires: npm i ghost-cursor)
   ghost_cursor_speed: <number>                Ghost-cursor speed multiplier (default: auto)
   ghost_cursor_hesitate: <milliseconds>       Delay before ghost-cursor clicks (default: 50)
@@ -933,7 +991,7 @@ Advanced Options:
   whois_delay: <milliseconds>                Delay between whois requests for this site (default: global whois_delay)
   dig: ["term1", "term2"]                     Check dig output for ALL specified terms (AND logic)
   dig-or: ["term1", "term2"]                  Check dig output for ANY specified term (OR logic)
-  goto_options: {"waitUntil": "domcontentloaded"} Custom page.goto() options (default: {"waitUntil": "load"})
+  goto_options: {"waitUntil": "domcontentloaded"} Custom page.goto() options (default: {"waitUntil": "domcontentloaded"})
   dig_subdomain: true/false                    Use subdomain for dig lookup instead of root domain (default: false)
   digRecordType: "A"                          DNS record type for dig (default: A)
@@ -1423,6 +1481,7 @@ if (dumpUrls) {
 // Avoids blocking I/O on every intercepted request in debug/dumpurls mode
 const _logBuffers = new Map();  // filePath -> string[]
 const LOG_FLUSH_INTERVAL = 2000; // Flush every 2 seconds
+const LOG_BUFFER_MAX_RETAINED = 10000; // Cap a file's retry backlog (lines) so a permanently unwritable path can't grow memory unboundedly
 let _logFlushTimer = null;
 function bufferedLogWrite(filePath, entry) {
@@ -1435,18 +1494,20 @@ function bufferedLogWrite(filePath, entry) {
 function flushLogBuffers() {
   for (const [filePath, entries] of _logBuffers) {
-    if (entries.length > 0) {
-      try {
-        const data = entries.join('');
-        entries.length = 0; // Clear buffer immediately
-        fs.writeFile(filePath, data, { flag: 'a' }, (err) => {
-          if (err) {
-            console.warn(formatLogMessage('warn', `Failed to flush log buffer to ${filePath}: ${err.message}`));
-          }
-        });
-      } catch (err) {
-        console.warn(formatLogMessage('warn', `Failed to flush log buffer to ${filePath}: ${err.message}`));
-      }
+    if (entries.length === 0) continue;
+    try {
+      // Synchronous append on purpose: the batched 2s flush is small, and a
+      // blocking append cannot overlap the next timer tick (it holds the event
+      // loop for its duration) — eliminating the interleaved concurrent-append
+      // hazard of the old async fs.writeFile({flag:'a'}). Clear ONLY after the
+      // write succeeds, so a transient failure retries next tick instead of
+      // being silently dropped (the old code cleared before the async write
+      // confirmed). Bounded so a permanently unwritable path can't grow memory.
+      fs.appendFileSync(filePath, entries.join(''));
+      entries.length = 0;
+    } catch (err) {
+      console.warn(formatLogMessage('warn', `Failed to flush log buffer to ${filePath}: ${err.message}`));
+      if (entries.length > LOG_BUFFER_MAX_RETAINED) entries.length = 0;
     }
   }
 }
@@ -1490,21 +1551,29 @@ if (forceDebug && globalComments) {
  * @param {string} url - The URL string to parse.
  * @returns {string} The root domain, or the original hostname if parsing fails (e.g., for IP addresses or invalid URLs), or an empty string on error.
  */
-const _rootDomainCache = new Map();
-function getRootDomain(url) {
-  const cached = _rootDomainCache.get(url);
+// psl.parse memoized by hostname. The request handlers parse the root domain
+// of EVERY request, and a page hits the same few hosts repeatedly (CDN,
+// analytics, ad domains) — so a hostname-keyed memo turns almost all of those
+// into Map hits instead of repeated public-suffix-list lookups. Keyed by
+// hostname (not full URL) so distinct paths/queries on one host share one
+// entry: higher hit rate, fewer + shorter keys than a URL-keyed cache.
+// psl.parse is pure and never throws (malformed input → {domain: null}), so
+// the catch is defensive only.
+const _hostRootCache = new Map();
+function rootDomainForHost(hostname) {
+  if (!hostname) return '';
+  const cached = _hostRootCache.get(hostname);
   if (cached !== undefined) return cached;
-  try {
-    const { hostname } = new URL(url);
-    const parsed = psl.parse(hostname);
-    const result = parsed.domain || hostname;
-    if (_rootDomainCache.size > 5000) _rootDomainCache.clear();
-    _rootDomainCache.set(url, result);
-    return result;
-  } catch {
-    _rootDomainCache.set(url, '');
-    return '';
-  }
+  let result;
+  try { const parsed = psl.parse(hostname); result = parsed.domain || hostname; }
+  catch { result = hostname; }
+  if (_hostRootCache.size > 5000) _hostRootCache.clear();
+  _hostRootCache.set(hostname, result);
+  return result;
+}
+function getRootDomain(url) {
+  try { return rootDomainForHost(new URL(url).hostname); }
+  catch { return ''; }
 }
 /**
@@ -1839,7 +1908,33 @@ function setupFrameHandling(page, forceDebug) {
   // Declare userDataDir in outer scope for cleanup access
   let userDataDir = null;
+  // Browser-level decision (the browser launches once per batch, so this can't
+  // be per-site): only disable Chrome's pop-up blocker when at least one site
+  // actually wants popups captured. A real browser blocks non-gesture
+  // window.open(), so non-popup scans keep the blocker on for stealth.
+  // capture_popups scans turn it off so non-gesture popunders (document-level
+  // onclick / timer SDKs) fire and get captured too — gesture-triggered
+  // popups already work via the synthetic-click path regardless of this flag.
+  const wantPopups = Array.isArray(sites) && sites.some(s => s && s.capture_popups === true);
+  if (wantPopups && forceDebug) {
+    console.log(formatLogMessage('debug', `${POPUP_TAG} capture_popups set — launching with --disable-popup-blocking (non-gesture popunders allowed)`));
+  }
+  // DoH gate: any VPN site disables Chrome DoH (the tunnel resolves). Computed
+  // here (not at module top) because `sites` is only initialized by this point.
+  // Read by createBrowser's launch args; the startup log reports the decision.
+  const anyVpnConfigured = Array.isArray(sites) && sites.some(s => s && (s.vpn || s.openvpn));
+  if (dnsServersOverride.length > 0 && !silentMode) {
+    if (dohDisabled) {
+      console.log(formatLogMessage('info', `Chrome DoH disabled via --doh-disable — navigation uses system resolv.conf; --dns still pins the pre-check and dig.`));
+    } else if (chromeDoh.templates) {
+      console.log(formatLogMessage('info', `Chrome navigation will use DoH (automatic) on direct connections: ${chromeDoh.templates}${anyVpnConfigured ? ' — VPN configured, so it defers to VPN resolution' : ' — deferred to proxy resolution on proxied sites'}`));
+    } else {
+      console.warn(formatLogMessage('warn', `--dns servers (${chromeDoh.unmapped.join(', ')}) have no known DoH endpoint — Chrome navigation stays on system resolv.conf; only the pre-check and dig are pinned. Known providers: Google, Cloudflare, Quad9, OpenDNS, AdGuard, CleanBrowsing, DNS.SB, Mullvad.`));
+    }
+  }
   /**
    * Creates a new browser instance with consistent configuration
    * Uses system Chrome and temporary directories to minimize disk usage
@@ -1930,6 +2025,12 @@ function setupFrameHandling(page, forceDebug) {
       // Puppeteer 22.x headless mode optimization
       // Auto-detect best headless mode based on Puppeteer version
       headless: headlessMode,
+      // Bypass TLS cert errors at the browser level (drives CDP
+      // Security.setIgnoreCertificateErrors). Robust on new-headless Chrome,
+      // where the --ignore-certificate-errors *flag* is increasingly ignored.
+      // An ad/tracker scanner must reach self-signed / mismatched-cert ad and
+      // embed domains; we observe traffic, we don't transmit secrets.
+      acceptInsecureCerts: true,
       args: [
         // CRITICAL: Remove automation detection markers
         '--disable-blink-features=AutomationControlled',
@@ -1941,6 +2042,19 @@ function setupFrameHandling(page, forceDebug) {
         '--use-mock-keychain',
         '--disable-client-side-phishing-detection',
         '--enable-features=NetworkService',
+        // DoH for Chrome's navigation resolver when --dns maps to a known
+        // provider — but ONLY on direct connections. A proxied launch carries
+        // a --proxy-server in extraArgs and does its own (remote) DNS; a VPN
+        // tunnels resolution. In both cases local DoH is redundant and could
+        // resolve geo-split domains to the wrong region, so it's skipped and
+        // Chrome defers to the proxy/VPN. 'automatic' keeps a system-DNS
+        // fallback if DoH is unreachable. Flags omitted when not applicable.
+        ...((chromeDoh.templates
+             && !dohDisabled
+             && !anyVpnConfigured
+             && !extraArgs.some(a => typeof a === 'string' && a.startsWith('--proxy-server')))
+          ? ['--dns-over-https-mode=automatic', `--dns-over-https-templates=${chromeDoh.templates}`]
+          : []),
         // Disk space controls - minimal cache for scanning workloads
         `--disk-cache-size=${CACHE_LIMITS.DISK_CACHE_SIZE}`,
         `--media-cache-size=${CACHE_LIMITS.MEDIA_CACHE_SIZE}`,
@@ -2018,6 +2132,10 @@ function setupFrameHandling(page, forceDebug) {
         '--memory-pressure-off',
         '--max_old_space_size=2048',   // V8 heap limit
         '--disable-prompt-on-repost',  // Fixes form popup on page reload
+        // Disable Chrome's pop-up blocker (chrome://settings/content/popups)
+        // ONLY when a site wants popups captured — lets non-gesture popunders
+        // fire. Gated so non-popup scans keep the blocker on for stealth.
+        ...(wantPopups ? ['--disable-popup-blocking'] : []),
         ...(keepBrowserOpen ? [] : ['--disable-background-networking']),
         '--no-sandbox',
         '--disable-setuid-sandbox',
@@ -2420,10 +2538,18 @@ function setupFrameHandling(page, forceDebug) {
       page.setDefaultNavigationTimeout(Math.min(timeout, TIMEOUTS.DEFAULT_NAVIGATION));
       // Aggressive timeouts prevent hanging in Puppeteer 23.x while maintaining speed
-      page.on('console', (msg) => {
-        if (forceDebug && msg.type() === 'error') console.log(formatLogMessage('debug', `Console error: ${msg.text()}`));
-      });
+      // Only attach a console listener under --debug. Registering ANY 'console'
+      // listener makes Puppeteer enable the CDP Runtime domain, which arms
+      // console-based automation/DevTools traps (e.g. disable-devtool logs an
+      // object with a getter and detects the inspector reading it → redirects
+      // away). The body is a no-op without forceDebug, so attaching it
+      // unconditionally armed that trap for zero benefit.
+      if (forceDebug) {
+        page.on('console', (msg) => {
+          if (msg.type() === 'error') console.log(formatLogMessage('debug', `Console error: ${msg.text()}`));
+        });
+      }
       // Add page crash handler
       page.on('error', (err) => {
         if (forceDebug) console.log(formatLogMessage('debug', `Page crashed: ${err.message}`));
@@ -3308,12 +3434,18 @@ function setupFrameHandling(page, forceDebug) {
         // (normalizeSiteConfig now coerces interact: 1 → true with a warning,
         // so by the time we get here both should be booleans — but keep the
         // diagnostic accurate for the truly-missing case.)
+        const hasClickElements = Array.isArray(siteConfig.click_elements) && siteConfig.click_elements.length > 0;
         const interactOn = siteConfig.interact === true;
         const clicksOn = siteConfig.interact_clicks === true;
-        if (!interactOn && !clicksOn) {
-          console.log(formatLogMessage('debug', `[popup] capture_popups is enabled but neither 'interact' nor 'interact_clicks' is — set BOTH to true to fire user-gesture clicks; without them, only popups opened via in-page redirects will capture`));
+        if (hasClickElements && (!interactOn || !clicksOn)) {
+          // click_elements fires its own trusted gesture clicks, so popups it
+          // triggers capture regardless of interact/interact_clicks. Don't warn
+          // "no clicks fire" — surface the random-click coverage gap instead.
+          console.log(formatLogMessage('debug', `[popup] capture_popups: click_elements supplies targeted gesture clicks (popups they trigger WILL capture). interact=${interactOn}, interact_clicks=${clicksOn} — enable both for random content-zone click coverage of overlay popunders too`));
+        } else if (!interactOn && !clicksOn) {
+          console.log(formatLogMessage('debug', `[popup] capture_popups is enabled but neither 'interact' nor 'interact_clicks' is — set BOTH to true to fire user-gesture clicks; without them, only popups opened via in-page redirects (or click_elements) will capture`));
         } else if (!interactOn) {
-          console.log(formatLogMessage('debug', `[popup] capture_popups is enabled but 'interact' is not — set interact: true to enable the interaction loop (interact_clicks is already set); without it, no fake clicks fire`));
+          console.log(formatLogMessage('debug', `[popup] capture_popups is enabled but 'interact' is not — set interact: true to enable the interaction loop (interact_clicks is already set); without it, no random fake clicks fire`));
         } else if (!clicksOn) {
           console.log(formatLogMessage('debug', `[popup] capture_popups is enabled but 'interact_clicks' is not — set interact_clicks: true to enable element-targeted clicks; without it, only random content-zone clicks fire and may miss overlay-based popunders`));
         }
@@ -3362,8 +3494,7 @@ function setupFrameHandling(page, forceDebug) {
             try {
               const parsedUrl = new URL(checkedUrl);
               fullSubdomain = parsedUrl.hostname;
-              const pslResult = psl.parse(fullSubdomain);
-              checkedRootDomain = pslResult.domain || fullSubdomain;
+              checkedRootDomain = rootDomainForHost(fullSubdomain);
             } catch (_) { return; }
             if (!checkedRootDomain) return;
@@ -3638,30 +3769,24 @@ function setupFrameHandling(page, forceDebug) {
         try {
           const parsedUrl = new URL(checkedUrl);
           fullSubdomain = parsedUrl.hostname;
-          const pslResult = psl.parse(fullSubdomain);
-          checkedRootDomain = pslResult.domain || fullSubdomain;
+          checkedRootDomain = rootDomainForHost(fullSubdomain);
         } catch (e) {}
+        // Never BLOCK the top-level document (the scanned page OR a main-frame
+        // redirect target). Aborting it makes the navigation never commit (page
+        // stays at about:blank → navigation timeout), silently breaking any
+        // scanned URL that matches our own filter lists (adblock / blocked /
+        // blockDomainsByUrl) — common on adult/pirate/stream domains. This flag
+        // ONLY guards the abort paths below; the request still flows through the
+        // match logic, so a main-frame redirect destination (e.g. a
+        // filecrypt → ad-domain hop) is still captured via filterRegex/dig/whois.
+        // isNavigationRequest is true for sub-frame docs too, so the mainFrame()
+        // check keeps ad iframes blockable.
+        let isMainFrameDoc = false;
+        try { isMainFrameDoc = request.isNavigationRequest() && request.frame() === page.mainFrame(); } catch (_) {}
         // Check against ALL first-party domains (original + all redirects)
         const isFirstParty = checkedRootDomain && firstPartyDomains.has(checkedRootDomain);
-        // Block infinite iframe loops - safely access frame URL
-        const frameUrl = (() => {
-          try {
-            const frame = request.frame();
-            return frame ? frame.url() : '';
-          } catch (err) {
-            return '';
-          }
-        })();
-        if (frameUrl && frameUrl.includes('creative.dmzjmp.com') &&
-            checkedUrl.includes('go.dmzjmp.com/api/models')) {
-          if (forceDebug) {
-            console.log(formatLogMessage('debug', `Blocking potential infinite iframe loop: ${checkedUrl}`));
-          }
-          request.abort();
-          return;
-        }
         // Enhanced debug logging to show which frame the request came from
         if (forceDebug) {
@@ -3691,7 +3816,7 @@ function setupFrameHandling(page, forceDebug) {
               request.resourceType()
             );
-            if (result.blocked) {
+            if (result.blocked && !isMainFrameDoc) {
               adblockStats.blocked++;
               if (forceDebug) {
                 console.log(formatLogMessage('debug', `${messageColors.blocked('[adblock]')} ${checkedUrl} (${result.reason})`));
@@ -3699,6 +3824,12 @@ function setupFrameHandling(page, forceDebug) {
               request.abort('blockedbyclient');
               return;
             }
+            if (result.blocked && isMainFrameDoc && forceDebug) {
+              // Matched a filter rule but it's the page we're scanning (or a
+              // main-frame redirect target) — allow it (blocking the top-level
+              // document aborts navigation). It still flows through the matcher.
+              console.log(formatLogMessage('debug', `${messageColors.highlight('[adblock]')} top-level document ${checkedUrl} matched (${result.reason}) — allowed (never block the scanned page)`));
+            }
             adblockStats.allowed++;
           } catch (err) { /* Silently continue on adblock errors */ }
         }
@@ -3752,7 +3883,7 @@ function setupFrameHandling(page, forceDebug) {
         // check so domain-based blocks short-circuit without paying the
         // per-URL regex scan. Same abort reason as the static path so
         // request.failure() observers see consistent metadata.
-        if (reqDomain && _dynamicallyBlockedDomains.size > 0 && matchesDynamicBlock(reqDomain)) {
+        if (reqDomain && _dynamicallyBlockedDomains.size > 0 && matchesDynamicBlock(reqDomain) && !isMainFrameDoc) {
           if (forceDebug) {
             console.log(formatLogMessage('debug', `${BLOCK_DOMAINS_BY_URL_TAG} aborting ${reqUrl} (domain ${reqDomain} dynamically blocked)`));
           }
@@ -3767,7 +3898,7 @@ function setupFrameHandling(page, forceDebug) {
             break;
           }
         }
-        if (blockedMatchIndex !== -1) {
+        if (blockedMatchIndex !== -1 && !isMainFrameDoc) {
           // Always track the hit (zero-cost on the un-debug path) so the
           // scan-end summary can show which patterns are doing work vs.
           // which are stale and ready to prune. Keyed by pattern.source --
@@ -4349,15 +4480,114 @@ function setupFrameHandling(page, forceDebug) {
         try {
           navigationResult = await navigateWithRedirectHandling(page, currentUrl, siteConfig, gotoOptions, forceDebug, formatLogMessage);
         } catch (navErr) {
-          // Only retry on genuine timeouts, not chrome-error:// redirects
+          // Only handle genuine timeouts here, not chrome-error:// redirects.
+          // pageUrl === 'about:blank' means the navigation never committed
+          // (server never responded) — treat as a real failure, not a partial
+          // page; only a page that actually reached a URL is worth observing.
           let pageUrl = '';
           try { if (!page.isClosed()) pageUrl = page.url(); } catch {}
           const isPopupFailure = navErr.message.includes('chrome-error://') || navErr.message.includes('invalid URL') ||
             pageUrl.startsWith('chrome-error://') || pageUrl === 'about:blank';
           if ((navErr.message.includes('timeout') || navErr.message.includes('Timeout')) && !isPopupFailure) {
-            if (forceDebug) console.log(formatLogMessage('debug', `Navigation timeout, retrying with waitUntil:networkidle2 for ${currentUrl}`));
-            const fallbackOptions = { ...gotoOptions, waitUntil: 'networkidle2', timeout: Math.min(timeout, 10000) };
-            navigationResult = await navigateWithRedirectHandling(page, currentUrl, siteConfig, fallbackOptions, forceDebug, formatLogMessage);
+            // The OLD fallback retried with networkidle2 — STRICTER than the
+            // domcontentloaded default, so it could never rescue a
+            // domcontentloaded timeout (and Puppeteer 25 has no 'commit', i.e.
+            // nothing more lenient). Two-tier recovery instead:
+            //   1. If the site used a wait STRICTER than domcontentloaded, do one
+            //      lenient retry with domcontentloaded (it fires earlier).
+            //   2. Otherwise proceed with the partially-loaded page rather than
+            //      discarding the URL — it exists and requests already fired
+            //      (captured by page.on('request')); the delay/interact phase
+            //      below keeps capturing. Streaming/embed/media pages routinely
+            //      never reach DOM-ready (a connection stays open) but their
+            //      ad/tracker calls fired early.
+            const primaryWait = gotoOptions.waitUntil || defaultWaitUntil;
+            let recovered = false;
+            if (primaryWait !== 'domcontentloaded') {
+              try {
+                if (forceDebug) console.log(formatLogMessage('debug', `Navigation timeout (${primaryWait}), retrying with waitUntil:domcontentloaded for ${currentUrl}`));
+                const fallbackOptions = { ...gotoOptions, waitUntil: 'domcontentloaded', timeout: Math.min(timeout, 15000) };
+                navigationResult = await navigateWithRedirectHandling(page, currentUrl, siteConfig, fallbackOptions, forceDebug, formatLogMessage);
+                recovered = true;
+              } catch (_) { /* fall through to proceed-with-partial */ }
+            }
+            if (!recovered) {
+              let partialUrl = currentUrl;
+              try { if (!page.isClosed()) partialUrl = page.url() || currentUrl; } catch {}
+              if (forceDebug) console.log(formatLogMessage('debug', `Navigation timeout — proceeding with partially-loaded page for ${currentUrl}`));
+              navigationResult = { finalUrl: partialUrl, redirected: false, redirectChain: [currentUrl], originalUrl: currentUrl, redirectDomains: [], httpStatus: null, cfRay: null };
+            }
+          } else if (navErr.message.includes('ERR_TOO_MANY_REDIRECTS')) {
+            // Redirect-cloaking chain exceeded Chrome's ~20-hop per-navigation
+            // ceiling, so goto() rejected. Two recovery paths — they cover
+            // opposite cases run-to-run, so try both:
+            //   1. Browser ride-through (free): a JS/meta hop on a committed
+            //      intermediate page resets Chrome's counter and carries the page
+            //      to the end site on its own. Check if it already happened, else
+            //      wait briefly for it.
+            //   2. curl-resolve (fallback, only if the page parked on
+            //      chrome-error): curl follows the chain (it gets the real chain,
+            //      not headless Chrome's endless loop) to the JS-handoff page;
+            //      navigating there directly is a SHORT hop that reaches the end
+            //      site. Skipped under proxy/VPN — curl runs DIRECT from the host
+            //      and would leak the real IP / resolve from the wrong network.
+            // If neither reaches a real page, keep the chain requests already
+            // captured (grouped under the original URL, never chrome-error).
+            let landedUrl = '';
+            const isRealPage = (u) => !!u && /^https?:\/\//.test(u) && !u.startsWith('chrome-error://') && u !== currentUrl;
+            // 1) Browser ride-through — may have completed during goto(); if not,
+            //    wait for the next navigation(s) to carry it through.
+            try { if (!page.isClosed() && isRealPage(page.url())) landedUrl = page.url(); } catch {}
+            for (let r = 0; r < 3 && !landedUrl; r++) {
+              try {
+                await page.waitForNavigation({ waitUntil: 'domcontentloaded', timeout: 8000 });
+                if (!page.isClosed() && isRealPage(page.url())) landedUrl = page.url();
+              } catch { break; } // no further navigation — stop waiting
+            }
+            if (landedUrl && forceDebug) console.log(formatLogMessage('debug', `Too many redirects — browser rode through to ${landedUrl} for ${currentUrl}`));
+            // 2) curl-resolve fallback — only if still parked (no ride-through).
+            //    Opt-in via the site's `curl` option: if you didn't enable curl
+            //    in the config, the scanner won't shell out to it here either
+            //    (consistent with the content-analysis `curl` gate).
+            if (!landedUrl) {
+              const curlResolveOk = siteConfig.curl === true && !needsProxy(siteConfig) && !anyVpnConfigured && validateCurlAvailability().isAvailable;
+              if (curlResolveOk) {
+                let resolvedUrl = '';
+                try {
+                  const curlUa = USER_AGENT_COLLECTIONS.get((siteConfig.userAgent || 'chrome').toLowerCase())
+                    || 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36';
+                  const cr = await runProcess('curl', ['-sL', '--max-redirs', '50', '--max-time', '20', '-o', '/dev/null', '-A', curlUa, '-w', '%{url_effective}', currentUrl], { timeout: 22000, maxStdout: 4096 });
+                  const u = (cr.stdout || '').trim();
+                  if (cr.code === 0 && /^https?:\/\//.test(u) && u !== currentUrl) resolvedUrl = u;
+                } catch (_) { /* curl failed */ }
+                if (resolvedUrl) {
+                  if (forceDebug) console.log(formatLogMessage('debug', `Too many redirects — curl resolved the chain to ${resolvedUrl}; navigating there directly for ${currentUrl}`));
+                  // Navigate to the resolved endpoint; the streaming/embed end page
+                  // often never reaches DOM-ready, so the goto may throw — either
+                  // way it navigated, so adopt page.url().
+                  try { navigationResult = await navigateWithRedirectHandling(page, resolvedUrl, siteConfig, gotoOptions, forceDebug, formatLogMessage); } catch (_) { /* timed out — use page.url() below */ }
+                  try { if (!page.isClosed() && page.url() && !page.url().startsWith('chrome-error://')) landedUrl = page.url(); } catch {}
+                } else if (forceDebug) {
+                  console.log(formatLogMessage('debug', `Too many redirects — no ride-through and curl could not resolve; keeping chain captures for ${currentUrl}`));
+                }
+              } else if (forceDebug) {
+                const why = siteConfig.curl !== true ? 'curl not enabled (curl:false)'
+                  : (needsProxy(siteConfig) || anyVpnConfigured) ? 'proxy/VPN active'
+                  : 'curl unavailable';
+                console.log(formatLogMessage('debug', `Too many redirects — no ride-through and curl-resolve skipped (${why}); keeping chain captures for ${currentUrl}`));
+              }
+            }
+            // navigateWithRedirectHandling may already have set navigationResult
+            // (clean curl path). Otherwise build a partial from where we landed —
+            // the end site if we rode through / curl'd, else the original URL with
+            // the chain requests already captured.
+            if (!navigationResult) {
+              const fu = landedUrl || currentUrl;
+              navigationResult = { finalUrl: fu, redirected: fu !== currentUrl, redirectChain: [currentUrl, fu], originalUrl: currentUrl, redirectDomains: [], httpStatus: null, cfRay: null };
+            }
           } else {
             throw navErr;
           }
@@ -4403,17 +4633,26 @@ function setupFrameHandling(page, forceDebug) {
           redirectHistory.add(currentUrl);
           redirectHistory.add(finalUrl);
-          // Add redirect destination to first-party domains immediately
-          if (finalDomain) {
-            firstPartyDomains.add(finalDomain);
-          }
-          // Also add any intermediate redirect domains as first-party
-          if (redirectDomains && redirectDomains.length > 0) {
-            redirectDomains.forEach(domain => {
-              const rootDomain = safeGetDomain(`http://${domain}`, false);
-              if (rootDomain) firstPartyDomains.add(rootDomain);
-            });
+          // Add redirect destination (and intermediates) to first-party domains
+          // so the landed site's own resources aren't captured as third-party.
+          // Opt out with redirect_first_party:false — then redirect targets stay
+          // THIRD-PARTY and become eligible for filterRegex/dig under
+          // thirdParty:true (e.g. capturing an ad/cloak redirect's end domain).
+          // The originally-scanned domain (added earlier) stays first-party.
+          const redirectsAreFirstParty = siteConfig.redirect_first_party !== false;
+          if (redirectsAreFirstParty) {
+            if (finalDomain) {
+              firstPartyDomains.add(finalDomain);
+            }
+            // Also add any intermediate redirect domains as first-party
+            if (redirectDomains && redirectDomains.length > 0) {
+              redirectDomains.forEach(domain => {
+                const rootDomain = safeGetDomain(`http://${domain}`, false);
+                if (rootDomain) firstPartyDomains.add(rootDomain);
+              });
+            }
+          } else if (forceDebug) {
+            console.log(formatLogMessage('debug', `redirect_first_party:false — keeping redirect target ${finalDomain} third-party for ${currentUrl}`));
           }
           if (originalDomain !== finalDomain) {
@@ -4630,13 +4869,85 @@ function setupFrameHandling(page, forceDebug) {
           // Capture hard "dead domain" navigation errors for --show-dead-domains
           // (DNS doesn't resolve / host unreachable). Blocks, timeouts and CF
           // challenges are NOT dead — they're excluded by this match.
-          const deadNav = /ERR_NAME_NOT_RESOLVED|ERR_ADDRESS_UNREACHABLE|ERR_DNS/.exec(err.message || '');
-          if (deadNav) recordDeadDomain(currentUrl, deadNav[0]);
+          // Only DEFINITIVE non-existence / unreachable signals — these now drive
+          // the in-scan dead-domain SKIP (not just --show-dead-domains reporting),
+          // so transient DNS errors must NOT match. The bare `ERR_DNS` used to
+          // catch ERR_DNS_TIMED_OUT / ERR_DNS_MALFORMED_RESPONSE / ERR_DNS_SERVER_FAILED
+          // (all transient) — dropped so a slow-DNS blip can't false-skip the
+          // rest of a live host's URLs.
+          const deadNav = /ERR_NAME_NOT_RESOLVED|ERR_ADDRESS_UNREACHABLE/.exec(err.message || '');
+          if (deadNav) {
+            recordDeadDomain(currentUrl, deadNav[0]);
+            // Corroborate-then-persist to the negative cache (.dnsnegcache with
+            // --dns-cache → cross-scan skip; else in-memory). Chrome resolves via
+            // the possibly-flaky SYSTEM resolver, so its ERR_NAME_NOT_RESOLVED may
+            // be a glitch on a LIVE host. Re-confirm via the reliable --dns
+            // resolver and cache ONLY if it ALSO returns a definitive NXDOMAIN.
+            // ERR_ADDRESS_UNREACHABLE is routing (the host resolves), so the
+            // resolve succeeds and it's correctly not cached. Fire-and-forget:
+            // off the critical path; saveDiskCache flushes on exit.
+            if (dnsPrecheckEnabled && deadNav[0] === 'ERR_NAME_NOT_RESOLVED') {
+              let navHost = '';
+              try { navHost = new URL(currentUrl).hostname; } catch {}
+              if (navHost && !/^[\d.:]+$|^\[/.test(navHost) && !dnsNegativeCache.has(navHost)) {
+                dnsResolver.resolveHost(navHost, dnsPrecheckTimeoutMs).then(
+                  () => { /* reliable resolver resolves it — system-resolver glitch, do NOT cache */ },
+                  (e) => {
+                    const code = (e && (e.code || e.message)) || '';
+                    if (isNonExistenceError(code)) {
+                      dnsNegativeCacheSet(navHost, code);
+                      recordDeadDomain(navHost, code);
+                      if (forceDebug) console.log(formatLogMessage('debug', `Dead domain confirmed by --dns resolver (${code}) — caching ${navHost} (skips next run with --dns-cache)`));
+                    }
+                  }
+                ).catch(() => {});
+              }
+            }
+          }
           throw err;
         }
       }
       }
+      // Targeted clicks: after load, click configured CSS selectors in order
+      // (e.g. a movie link, then a play button) to reach content via organic
+      // navigation/gesture instead of a direct deep-load (which some sites
+      // JS-redirect away). The request interceptor stays attached, so the
+      // post-click page's requests flow into the same filterRegex/dig matching.
+      // Reuses realistic_click for a genuine trusted gesture. Runs before the
+      // delay/interact phase so those operate on the resulting page.
+      if (Array.isArray(siteConfig.click_elements) && siteConfig.click_elements.length > 0 && page && !page.isClosed()) {
+        // If ghost-cursor is enabled for this site (cursor_mode:"ghost" or
+        // --ghost-cursor), route the targeted clicks through it — Bezier travel
+        // to the element + realistic press — matching the interact phase.
+        // Injected so interaction.js needn't require ghost-cursor.js (circular).
+        // Falls back to performTargetedClicks' humanClick/el.click when ghost is
+        // off or the package isn't installed (resolveGhostCursorConfig → null).
+        let ghostClicker = null;
+        const tcGhostCfg = resolveGhostCursorConfig(siteConfig, globalGhostCursor, forceDebug);
+        if (tcGhostCfg) {
+          const tcCursor = createGhostCursor(page, { forceDebug });
+          if (tcCursor) {
+            ghostClicker = (x, y) => ghostClick(tcCursor, { x, y }, {
+              hesitate: tcGhostCfg.hesitate,
+              page,
+              realistic: siteConfig.realistic_click === true,
+              forceDebug
+            });
+          }
+        }
+        try {
+          await performTargetedClicks(page, siteConfig.click_elements, {
+            realistic: siteConfig.realistic_click === true,
+            waitMs: Math.min(Number(siteConfig.click_wait) || 5000, Math.floor(timeout / 2)),
+            ghostClick: ghostClicker,
+            forceDebug
+          });
+        } catch (clickErr) {
+          if (forceDebug) console.log(formatLogMessage('debug', `${INTERACTION_TAG} click_elements phase failed for ${currentUrl}: ${clickErr.message}`));
+        }
+      }
       const delayMs = siteConfig.delay || TIMEOUTS.DEFAULT_DELAY;
       // Optimized delays for Puppeteer 23.x performance
@@ -4653,6 +4964,13 @@ function setupFrameHandling(page, forceDebug) {
       const actualDelay = siteConfig.delay_uncapped === true
         ? Math.min(delayMs, Math.floor(timeout / 2))
         : Math.min(delayMs, TIMEOUTS.NETWORK_IDLE);
+      // Surface the clamp — otherwise `delay: 48000` silently running as 29000
+      // (timeout/2) looks like the flag was ignored. The per-URL budget already
+      // reserves the full `delay`, so the lever to honor it is a larger timeout.
+      if (forceDebug && actualDelay < delayMs) {
+        const ceiling = siteConfig.delay_uncapped === true ? 'timeout/2; raise timeout to lift' : 'default 2s cap; set delay_uncapped:true to lift';
+        console.log(formatLogMessage('debug', `delay ${delayMs}ms clamped to ${actualDelay}ms (${ceiling}) for ${currentUrl}`));
+      }
       // Build delay promise (networkIdle + delay + optional flowProxy delay)
       const delayPromise = (async () => {
@@ -4925,6 +5243,21 @@ function setupFrameHandling(page, forceDebug) {
       let reloadSuccess = false;
+      // page.reload() can't carry a referer; when referrer_headers is set,
+      // re-navigate to the current URL with it so referer-gated embeds keep
+      // serving across the reload:N loop (the initial goto carries the referer,
+      // but reload() drops it). Nav-only scope — subresources keep their normal
+      // page-origin referer (unlike setExtraHTTPHeaders, which would force the
+      // referer onto every request and can break embeds whose subresources
+      // expect own-origin). A static referrer_headers string is identical each
+      // reload; random/mixed modes pick a fresh value per reload.
+      const reloadReferer = siteConfig.referrer_headers
+        ? getReferrerForUrl(currentUrl, siteConfig.referrer_headers, siteConfig.referrer_disable, forceDebug)
+        : '';
+      const reloadOrReferredGoto = (opts) => reloadReferer
+        ? page.goto(page.url(), { ...opts, referer: reloadReferer })
+        : page.reload(opts);
   // Skip force reload if browser seems unhealthy
   const skipForceReload = i > 2; // After 2 attempts, skip force reload
@@ -4947,7 +5280,7 @@ function setupFrameHandling(page, forceDebug) {
           await raceWithTimer(page.setCacheEnabled(false), 'Cache disable timeout', 8000);
             // Use networkidle2 for force reload to better detect when page is actually loaded
-            await page.reload({ waitUntil: 'networkidle2', timeout: Math.min(timeout, 15000) });
+            await reloadOrReferredGoto({ waitUntil: 'networkidle2', timeout: Math.min(timeout, 15000) });
           // Timeout-protected cache enable
           await raceWithTimer(page.setCacheEnabled(true), 'Cache enable timeout', 8000);
@@ -4986,7 +5319,7 @@ function setupFrameHandling(page, forceDebug) {
         ? { waitUntil: 'domcontentloaded', timeout: 10000 }  // Simpler after failures
         : { waitUntil: 'networkidle2', timeout: 15000 };     // Full wait first time
-      await page.reload(reloadOptions);
+      await reloadOrReferredGoto(reloadOptions);
           if (forceDebug) console.log(formatLogMessage('debug', `Standard reload #${i} completed for ${currentUrl}`));
         } catch (standardReloadErr) {
@@ -5263,7 +5596,7 @@ function setupFrameHandling(page, forceDebug) {
           const safeUrl = currentUrl.replace(/https?:\/\//, '').replace(/[^a-zA-Z0-9]/g, '_').substring(0, 80);
           const filename = `screenshots/${safeUrl}-${timestamp}.png`;
           try {
-            if (!fs.existsSync('screenshots')) fs.mkdirSync('screenshots', { recursive: true });
+            fs.mkdirSync('screenshots', { recursive: true }); // recursive:true is a no-op if it already exists
             await page.screenshot({ path: filename, type: 'png', fullPage: true });
             console.log(formatLogMessage('info', `Screenshot saved: ${filename}`));
           } catch (screenshotErr) {
@@ -5759,6 +6092,19 @@ function setupFrameHandling(page, forceDebug) {
        // actually starting — wrongly skipping live domains. c-ares isn't
        // threadpool-bound so it's immune to that contention.
        if (dnsPrecheckEnabled && taskDomain && !/^[\d.:]+$|^\[/.test(taskDomain)) {
+         // Already proven dead earlier THIS run — either a pre-check NXDOMAIN or
+         // a prior URL's navigation hit ERR_NAME_NOT_RESOLVED / ERR_ADDRESS_UNREACHABLE
+         // (recordDeadDomain populates _deadDomains for both). Skip the repeat
+         // instead of paying another fail-open navigation on a multi-URL dead
+         // host (e.g. dlstreams.top?id=39/54/347). In-scan only (NOT persisted):
+         // Chrome resolves via the system resolver, so a nav-level failure could
+         // be a system-resolver glitch on a live host — a false "dead" must not
+         // carry across runs. Cheap: a Map lookup, no DNS resolve.
+         if (_deadDomains.has(taskDomain)) {
+           dnsPrecheckSkips++;
+           if (forceDebug) console.log(formatLogMessage('debug', `DNS pre-check: ${taskDomain} already dead this run (${_deadDomains.get(taskDomain)}) — skipping`));
+           return { url: task.url, rules: [], success: false, error: `DNS: ${_deadDomains.get(taskDomain)}`, skipped: true };
+         }
          const cached = dnsNegativeCache.get(taskDomain);
          if (cached && Date.now() - cached.timestamp < DNS_NEGATIVE_CACHE_TTL_MS) {
            dnsPrecheckSkips++;
@@ -5833,10 +6179,24 @@ function setupFrameHandling(page, forceDebug) {
      const INTERACTION_OVERHEAD_MS = interactionOnForTask
        ? computeInteractionCeilingMs(createInteractionConfig(task.url, task.config))
        : 0;
+     // click_elements runs ONCE after load (before the delay/interact/reload
+     // phases): N selectors, each a settle/nav wait (click_wait, capped at
+     // timeout/2 — mirror the call site) plus ~2s for scroll + the click action
+     // (ghost Bezier travel is the slowest). Budget it so a heavy click chain
+     // can't trip the per-URL ceiling before the work that follows it. Not
+     // multiplied by reloadCount — the click phase is one-time.
+     const clickEls = Array.isArray(task.config.click_elements)
+       ? task.config.click_elements.filter(s => typeof s === 'string' && s.trim())
+       : [];
+     const clickWaitMs = clickEls.length
+       ? Math.min(Number(task.config.click_wait) || 5000, Math.floor((task.config.timeout || 35000) / 2))
+       : 0;
+     const CLICK_ELEMENTS_OVERHEAD_MS = clickEls.length * (clickWaitMs + 2000);
      const PER_URL_TIMEOUT_MS = Math.max(
        75000,
        (task.config.timeout || 35000)
          + ((task.config.delay || 0) + INTERACTION_OVERHEAD_MS) * (1 + reloadCount)
+         + CLICK_ELEMENTS_OVERHEAD_MS
          + 30000
      );
      // Feed the hang-check restart so it never escalates before this URL's own