npm - @vibecheckai/cli - Versions diffs - 3.1.8 → 3.2.0 - Mend

@vibecheckai/cli 3.1.8 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/bin/registry.js +106 -116
package/bin/runners/context/generators/mcp.js +18 -0
package/bin/runners/context/index.js +72 -4
package/bin/runners/context/proof-context.js +293 -1
package/bin/runners/context/security-scanner.js +311 -73
package/bin/runners/lib/analyzers.js +607 -20
package/bin/runners/lib/detectors-v2.js +172 -15
package/bin/runners/lib/entitlements-v2.js +48 -1
package/bin/runners/lib/evidence-pack.js +678 -0
package/bin/runners/lib/html-proof-report.js +913 -0
package/bin/runners/lib/missions/plan.js +231 -41
package/bin/runners/lib/missions/templates.js +125 -0
package/bin/runners/lib/scan-output.js +492 -253
package/bin/runners/lib/ship-output.js +901 -641
package/bin/runners/runCheckpoint.js +44 -3
package/bin/runners/runContext.d.ts +4 -0
package/bin/runners/runDoctor.js +10 -2
package/bin/runners/runFix.js +51 -341
package/bin/runners/runInit.js +11 -0
package/bin/runners/runPolish.d.ts +4 -0
package/bin/runners/runPolish.js +608 -29
package/bin/runners/runProve.js +210 -25
package/bin/runners/runReality.js +846 -101
package/bin/runners/runScan.js +238 -4
package/bin/runners/runShip.js +19 -3
package/bin/runners/runWatch.js +14 -1
package/bin/vibecheck.js +32 -2
package/mcp-server/consolidated-tools.js +408 -42
package/mcp-server/index.js +152 -15
package/mcp-server/proof-tools.js +571 -0
package/mcp-server/tier-auth.js +22 -19
package/mcp-server/tools-v3.js +744 -0
package/mcp-server/truth-firewall-tools.js +190 -4
package/package.json +3 -1
package/bin/runners/runInstall.js +0 -281
package/bin/runners/runLabs.js +0 -341

package/bin/runners/runReality.js CHANGED Viewed

@@ -1,5 +1,5 @@
 /**
- * Reality Mode v2 - Two-Pass Auth Verification + Dead UI Crawler
+ * Reality Mode v2 - Two-Pass Auth Verification + Dead UI Crawler + Fake Data Detection
  *
  * ═══════════════════════════════════════════════════════════════════════════════
  * ENTERPRISE EDITION - World-Class Terminal Experience
@@ -8,7 +8,7 @@
  * TIER ENFORCEMENT:
  * - FREE: Preview mode (5 pages, 20 clicks, no auth boundary)
  * - STARTER: Full budgets + basic auth verification
- * - PRO: Advanced auth boundary (multi-role, 2-pass)
+ * - PRO: Advanced auth boundary (multi-role, 2-pass) + fake data detection
  *
  * Pass A (anon): crawl + click, record which routes look protected
  * Pass B (auth): crawl same routes using storageState, verify protected routes accessible
@@ -17,6 +17,9 @@
  * - Dead UI (clicks that do nothing)
  * - HTTP errors (4xx/5xx)
  * - Auth coverage (protected route reachable anonymously = BLOCK)
+ * - Fake domain detection (localhost, jsonplaceholder, ngrok, mockapi.io)
+ * - Fake response detection (demo IDs, test keys, placeholder data)
+ * - Mock status codes (418, 999, etc.)
  * - Route coverage stats
  */
@@ -152,10 +155,177 @@ ${rgb(255, 80, 40)}   ╚████╔╝ ██║██████╔╝█
 ${rgb(255, 60, 20)}    ╚═══╝  ╚═╝╚═════╝ ╚══════╝ ╚═════╝╚═╝  ╚═╝╚══════╝ ╚═════╝╚═╝  ╚═╝${c.reset}
 ${c.dim}  ┌─────────────────────────────────────────────────────────────────────┐${c.reset}
-${c.dim}  │${c.reset}  ${rgb(255, 150, 100)}🎭${c.reset} ${c.bold}REALITY${c.reset} ${c.dim}•${c.reset} ${rgb(200, 200, 200)}Runtime UI Proof${c.reset} ${c.dim}•${c.reset} ${rgb(150, 150, 150)}Dead UI Detection${c.reset}       ${c.dim}│${c.reset}
+${c.dim}  │${c.reset}  ${rgb(255, 150, 100)}🎭${c.reset} ${c.bold}REALITY${c.reset} ${c.dim}•${c.reset} ${rgb(200, 200, 200)}Dead UI${c.reset} ${c.dim}•${c.reset} ${rgb(150, 150, 150)}Fake Data${c.reset} ${c.dim}•${c.reset} ${rgb(100, 200, 255)}Auth Coverage${c.reset}     ${c.dim}│${c.reset}
 ${c.dim}  └─────────────────────────────────────────────────────────────────────┘${c.reset}
 `;
+// ═══════════════════════════════════════════════════════════════════════════════
+// FAKE DATA DETECTION PATTERNS (from reality-mode/reality-scanner.ts)
+// ═══════════════════════════════════════════════════════════════════════════════
+// ═══════════════════════════════════════════════════════════════════════════════
+// FAKE DETECTION PATTERNS WITH CONFIDENCE SCORING
+// Each pattern has a confidence level to reduce false positives
+// ═══════════════════════════════════════════════════════════════════════════════
+const FAKE_DOMAIN_PATTERNS = [
+  // CRITICAL: These are almost certainly fake backends (confidence: 0.95+)
+  { pattern: /jsonplaceholder\.typicode\.com/i, name: "JSONPlaceholder mock API", confidence: 0.99, severity: 'BLOCK' },
+  { pattern: /reqres\.in/i, name: "ReqRes mock API", confidence: 0.99, severity: 'BLOCK' },
+  { pattern: /mockapi\.io/i, name: "MockAPI.io", confidence: 0.99, severity: 'BLOCK' },
+  { pattern: /mocky\.io/i, name: "Mocky.io", confidence: 0.99, severity: 'BLOCK' },
+  { pattern: /httpbin\.org/i, name: "HTTPBin testing API", confidence: 0.95, severity: 'BLOCK' },
+  { pattern: /api\.example\.com/i, name: "Example.com API", confidence: 0.95, severity: 'BLOCK' },
+  { pattern: /fake\.api/i, name: "Fake API pattern", confidence: 0.95, severity: 'BLOCK' },
+  { pattern: /demo\.api/i, name: "Demo API pattern", confidence: 0.90, severity: 'BLOCK' },
+  // HIGH: Likely development/testing (confidence: 0.7-0.9)
+  // NOTE: These could be legitimate in dev/CI contexts
+  { pattern: /localhost:\d+/i, name: "Localhost API", confidence: 0.75, severity: 'WARN', devContextOk: true },
+  { pattern: /127\.0\.0\.1:\d+/i, name: "Loopback API", confidence: 0.75, severity: 'WARN', devContextOk: true },
+  { pattern: /\.ngrok\.io/i, name: "Ngrok tunnel", confidence: 0.80, severity: 'WARN', devContextOk: true },
+  { pattern: /\.ngrok-free\.app/i, name: "Ngrok free tunnel", confidence: 0.80, severity: 'WARN', devContextOk: true },
+  // MEDIUM: Could be legitimate staging (confidence: 0.5-0.7)
+  // NOTE: Many organizations have legitimate staging environments
+  { pattern: /staging\.[^/]+\/api/i, name: "Staging API endpoint", confidence: 0.60, severity: 'WARN', stagingContextOk: true },
+  { pattern: /\.local\//i, name: "Local domain", confidence: 0.50, severity: 'WARN', devContextOk: true },
+  { pattern: /\.test\//i, name: "Test domain", confidence: 0.50, severity: 'WARN', devContextOk: true },
+];
+const FAKE_RESPONSE_PATTERNS = [
+  // CRITICAL: Test API keys exposed (security issue)
+  { pattern: /sk_test_[a-zA-Z0-9]{20,}/i, name: "Test Stripe secret key", confidence: 0.99, severity: 'BLOCK' },
+  { pattern: /pk_test_[a-zA-Z0-9]{20,}/i, name: "Test Stripe public key", confidence: 0.95, severity: 'WARN' },
+  // HIGH: Clearly fake IDs/data
+  { pattern: /inv_demo_[a-zA-Z0-9]+/i, name: "Demo invoice ID", confidence: 0.95, severity: 'BLOCK' },
+  { pattern: /user_demo_[a-zA-Z0-9]+/i, name: "Demo user ID", confidence: 0.95, severity: 'BLOCK' },
+  { pattern: /cus_demo_[a-zA-Z0-9]+/i, name: "Demo customer ID", confidence: 0.95, severity: 'BLOCK' },
+  { pattern: /sub_demo_[a-zA-Z0-9]+/i, name: "Demo subscription ID", confidence: 0.95, severity: 'BLOCK' },
+  { pattern: /"mock":\s*true/i, name: "Mock flag enabled", confidence: 0.95, severity: 'BLOCK' },
+  { pattern: /"isDemo":\s*true/i, name: "Demo mode flag", confidence: 0.95, severity: 'BLOCK' },
+  { pattern: /"status":\s*"simulated"/i, name: "Simulated status", confidence: 0.90, severity: 'BLOCK' },
+  // MEDIUM: Placeholder content (could be legitimate in docs/examples)
+  // NOTE: Need context awareness - these are fine in documentation/help pages
+  { pattern: /lorem\s+ipsum\s+dolor/i, name: "Lorem ipsum placeholder", confidence: 0.70, severity: 'WARN', docsContextOk: true },
+  { pattern: /john\.doe@/i, name: "John Doe placeholder email", confidence: 0.65, severity: 'WARN', docsContextOk: true },
+  { pattern: /jane\.doe@/i, name: "Jane Doe placeholder email", confidence: 0.65, severity: 'WARN', docsContextOk: true },
+  { pattern: /user@example\.com/i, name: "Example.com email", confidence: 0.50, severity: 'WARN', docsContextOk: true },
+  { pattern: /placeholder\.(com|jpg|png)/i, name: "Placeholder domain/image", confidence: 0.60, severity: 'WARN', docsContextOk: true },
+  // LOWER: Could have many false positives
+  { pattern: /"id":\s*"demo"/i, name: "Demo ID value", confidence: 0.70, severity: 'WARN' },
+  { pattern: /"id":\s*"test"/i, name: "Test ID value", confidence: 0.60, severity: 'WARN' },
+  { pattern: /"success":\s*true[^}]*"demo"/i, name: "Demo success response", confidence: 0.75, severity: 'WARN' },
+];
+// URLs that are allowed and should skip detection
+const FAKE_DETECTION_ALLOWLIST = [
+  /\/docs?\//i,          // Documentation pages
+  /\/help\//i,           // Help pages
+  /\/examples?\//i,      // Example pages
+  /\/demo\//i,           // Demo pages (intentional)
+  /\/playground\//i,     // Playground/sandbox
+  /\/api-docs?\//i,      // API documentation
+  /\/swagger/i,          // Swagger docs
+  /\/openapi/i,          // OpenAPI docs
+  /readme/i,             // README content
+  /changelog/i,          // Changelog
+];
+/**
+ * Classify a network request/response for fake data patterns
+ * Returns null if clean, or an object with detection details
+ *
+ * Enhanced with:
+ * - Confidence scoring to reduce false positives
+ * - Context awareness (dev, staging, docs)
+ * - Allowlist for legitimate use cases
+ */
+function classifyNetworkTraffic(url, responseBody, status, context = {}) {
+  // Skip static assets (images, fonts, stylesheets, scripts)
+  if (/\.(js|css|png|jpg|jpeg|svg|ico|woff|woff2|ttf|eot|gif|webp|mp4|webm|pdf)(\?|$)/i.test(url)) {
+    return null;
+  }
+  // Check allowlist - skip detection for documentation/example URLs
+  for (const allowPattern of FAKE_DETECTION_ALLOWLIST) {
+    if (allowPattern.test(url)) {
+      return null;
+    }
+  }
+  const detections = [];
+  const isDev = context.isDev || process.env.NODE_ENV === 'development';
+  const isStaging = context.isStaging || /staging|stg|preprod/i.test(url);
+  const isDocsPage = context.isDocsPage || /docs?|help|example|readme/i.test(url);
+  // Check for fake domain patterns
+  for (const { pattern, name, confidence, severity, devContextOk, stagingContextOk } of FAKE_DOMAIN_PATTERNS) {
+    if (pattern.test(url)) {
+      // Skip if this pattern is OK in current context
+      if (devContextOk && isDev) continue;
+      if (stagingContextOk && isStaging) continue;
+      detections.push({
+        type: 'fake-domain',
+        severity,
+        evidence: `URL matches fake domain pattern: ${name}`,
+        url,
+        confidence,
+        pattern: pattern.source
+      });
+      break; // One domain match is enough
+    }
+  }
+  // Check response body for fake data patterns
+  if (responseBody && typeof responseBody === 'string') {
+    // Skip very short responses (likely not meaningful data)
+    if (responseBody.length < 20) {
+      return detections.length > 0 ? detections : null;
+    }
+    for (const { pattern, name, confidence, severity, docsContextOk } of FAKE_RESPONSE_PATTERNS) {
+      // Skip patterns that are OK in docs context
+      if (docsContextOk && isDocsPage) continue;
+      if (pattern.test(responseBody)) {
+        detections.push({
+          type: 'fake-response',
+          severity,
+          evidence: `Response contains ${name}`,
+          url,
+          confidence,
+          pattern: pattern.source
+        });
+      }
+    }
+  }
+  // Check for suspicious status codes (with lower confidence)
+  if (status === 418 || status === 999 || status === 0) {
+    detections.push({
+      type: 'mock-status',
+      severity: 'WARN',
+      evidence: `Suspicious HTTP status code: ${status}`,
+      url,
+      confidence: 0.60 // Lower confidence - could be legitimate
+    });
+  }
+  // Filter out low-confidence detections if we have high-confidence ones
+  const highConfidence = detections.filter(d => d.confidence >= 0.80);
+  if (highConfidence.length > 0 && detections.length > highConfidence.length) {
+    // Return only high-confidence detections to reduce noise
+    return highConfidence;
+  }
+  return detections.length > 0 ? detections : null;
+}
 // ═══════════════════════════════════════════════════════════════════════════════
 // ICONS & SYMBOLS
 // ═══════════════════════════════════════════════════════════════════════════════
@@ -420,6 +590,10 @@ function getCategoryIcon(category) {
     'DeadUI': ICONS.deadUI,
     'AuthCoverage': ICONS.shield,
     'HTTPError': ICONS.http,
+    // Fake data detection categories
+    'FakeDomain': '🔗',
+    'FakeResponse': '🎭',
+    'MockStatus': '📡',
   };
   return icons[category] || ICONS.bullet;
 }
@@ -429,6 +603,10 @@ function getCategoryColor(category) {
     'DeadUI': colors.deadUI,
     'AuthCoverage': colors.authCoverage,
     'HTTPError': colors.httpError,
+    // Fake data detection categories - all critical (red/orange)
+    'FakeDomain': rgb(255, 80, 80),    // Red - critical
+    'FakeResponse': rgb(255, 100, 60), // Orange-red
+    'MockStatus': rgb(255, 150, 50),   // Amber
   };
   return categoryColors[category] || colors.accent;
 }
@@ -645,6 +823,17 @@ function printHelp(opts = {}) {
     ${colors.accent}--timeout <ms>${c.reset}           Page timeout ${c.dim}(default: 15000)${c.reset}
     ${colors.accent}--help, -h${c.reset}               Show this help
+  ${c.bold}Visual Artifacts:${c.reset}
+    ${colors.accent}--video, --record-video${c.reset}  Record video of browser sessions
+    ${colors.accent}--trace, --record-trace${c.reset}  Record Playwright trace (viewable in trace.playwright.dev)
+    ${colors.accent}--har, --record-har${c.reset}      Record HAR network traffic
+  ${c.bold}Flakiness Reduction:${c.reset}
+    ${colors.accent}--retries <n>${c.reset}            Retry failed nav/clicks ${c.dim}(default: 2)${c.reset}
+    ${colors.accent}--stable-wait <ms>${c.reset}       Wait after actions ${c.dim}(default: 500ms)${c.reset}
+    ${colors.accent}--stability-runs <n>${c.reset}     Run N times for stability check ${c.dim}(default: 1)${c.reset}
+    ${colors.accent}--flaky-threshold <f>${c.reset}    Min occurrence rate to report ${c.dim}(default: 0.66)${c.reset}
   ${c.bold}Tier Limits:${c.reset}
     ${c.dim}FREE${c.reset}      5 pages, no auth boundary
     ${c.dim}STARTER${c.reset}   Full budgets + basic auth
@@ -784,23 +973,76 @@ async function clickOutcome(page, locator, opts = {}) {
   const beforeUrl = page.url();
   const beforeReq = opts.reqCounter.value;
+  // Enhanced mutation observer that detects more changes including:
+  // - DOM structure changes (childList, subtree)
+  // - Attribute changes (class, style, aria-*, data-*)
+  // - CSS visibility/display changes
   const domPromise = page.evaluate(() => {
     return new Promise((resolve) => {
-      const obs = new MutationObserver(() => { obs.disconnect(); resolve({ changed: true }); });
-      obs.observe(document.documentElement, { childList: true, subtree: true, attributes: true });
-      setTimeout(() => { try { obs.disconnect(); } catch {} resolve({ changed: false }); }, 900);
+      let changeCount = 0;
+      let attributeChanges = [];
+      const obs = new MutationObserver((mutations) => {
+        for (const mutation of mutations) {
+          changeCount++;
+          if (mutation.type === 'attributes') {
+            attributeChanges.push({
+              attr: mutation.attributeName,
+              target: mutation.target.tagName
+            });
+          }
+        }
+      });
+      obs.observe(document.documentElement, {
+        childList: true,
+        subtree: true,
+        attributes: true,
+        attributeFilter: ['class', 'style', 'aria-expanded', 'aria-hidden', 'aria-selected',
+                          'data-state', 'hidden', 'open', 'data-open', 'data-closed']
+      });
+      setTimeout(() => {
+        try { obs.disconnect(); } catch {}
+        resolve({
+          changed: changeCount > 0,
+          changeCount,
+          attributeChanges: attributeChanges.slice(0, 10) // Limit for performance
+        });
+      }, 900);
     });
   });
+  // Also track CSS visibility changes via computed styles
+  const beforeVisibility = await page.evaluate(() => {
+    const modals = document.querySelectorAll('[role="dialog"], .modal, .dropdown, .popover, [data-state]');
+    return Array.from(modals).slice(0, 20).map(el => ({
+      visible: getComputedStyle(el).display !== 'none' && getComputedStyle(el).visibility !== 'hidden',
+      state: el.getAttribute('data-state')
+    }));
+  }).catch(() => []);
   const navPromise = page.waitForNavigation({ timeout: 1200 }).then(() => true).catch(() => false);
   const clickRes = await locator.click({ timeout: 1200 }).then(() => ({ ok: true })).catch((e) => ({ ok: false, error: String(e?.message || e) }));
   const navRes = await navPromise;
   const domRes = await domPromise;
-  await page.waitForTimeout(250);
+  await page.waitForTimeout(300); // Slightly longer wait for CSS transitions
   const afterSig = await pageSignature(page);
   const afterUrl = page.url();
+  // Check CSS visibility changes
+  const afterVisibility = await page.evaluate(() => {
+    const modals = document.querySelectorAll('[role="dialog"], .modal, .dropdown, .popover, [data-state]');
+    return Array.from(modals).slice(0, 20).map(el => ({
+      visible: getComputedStyle(el).display !== 'none' && getComputedStyle(el).visibility !== 'hidden',
+      state: el.getAttribute('data-state')
+    }));
+  }).catch(() => []);
+  // Detect visibility state changes (modal open/close, dropdown toggle, etc.)
+  const visibilityChanged = JSON.stringify(beforeVisibility) !== JSON.stringify(afterVisibility);
   return {
     clickOk: clickRes.ok,
@@ -808,6 +1050,8 @@ async function clickOutcome(page, locator, opts = {}) {
     navHappened: !!navRes,
     urlChanged: normalizeUrl(afterUrl) !== normalizeUrl(beforeUrl),
     domChanged: !!domRes?.changed || afterSig !== beforeSig,
+    visibilityChanged, // NEW: Tracks CSS visibility/state changes
+    changeCount: domRes?.changeCount || 0, // NEW: Number of mutations detected
     reqDelta: Math.max(0, opts.reqCounter.value - beforeReq),
     beforeUrl,
     afterUrl
@@ -822,16 +1066,55 @@ async function collectLinks(page, baseUrl) {
 async function collectInteractives(page) {
   return page.evaluate(() => {
     const nodes = Array.from(document.querySelectorAll("button, a[href], input[type='submit'], [role='button'], [onclick]"));
-    return nodes.slice(0, 80).map((el, idx) => ({
-      idx,
-      tag: el.tagName.toLowerCase(),
-      role: el.getAttribute("role") || "",
-      href: el.tagName === "A" ? el.getAttribute("href") || "" : "",
-      text: (el.getAttribute("aria-label") || el.innerText || "").trim().slice(0, 80),
-      id: el.id || "",
-      disabled: !!(el.disabled || el.getAttribute("aria-disabled") === "true"),
-      key: `${el.tagName}|${el.id}|${idx}`
-    }));
+    return nodes.slice(0, 80).map((el, idx) => {
+      const text = (el.getAttribute("aria-label") || el.innerText || "").trim().slice(0, 80).toLowerCase();
+      const classList = Array.from(el.classList).join(' ').toLowerCase();
+      const id = (el.id || "").toLowerCase();
+      const dataTestId = el.getAttribute("data-testid") || "";
+      // Detect element context for false positive reduction
+      const isInsideModal = !!el.closest('[role="dialog"], [role="alertdialog"], .modal, .dialog, [data-radix-dialog-content]');
+      const isInsideDropdown = !!el.closest('[role="menu"], [role="listbox"], .dropdown, .popover, [data-radix-menu-content]');
+      const isInsideAccordion = !!el.closest('[role="region"], .accordion, [data-state], [data-radix-accordion-content]');
+      const isInsideTooltip = !!el.closest('[role="tooltip"], .tooltip');
+      // Detect button intent for false positive reduction
+      const looksLikeClose = /close|dismiss|cancel|x|×|✕|✖/i.test(text) || /close|dismiss/i.test(classList);
+      const looksLikeToggle = /toggle|expand|collapse|show|hide|menu|hamburger|more/i.test(text) || /toggle|accordion|collaps/i.test(classList);
+      const looksLikeCopy = /copy|clipboard/i.test(text) || /copy/i.test(classList);
+      const looksLikeTheme = /theme|dark|light|mode/i.test(text) || /theme/i.test(classList);
+      const looksLikeTab = el.getAttribute("role") === "tab" || /tab/i.test(classList);
+      const looksLikeSort = /sort|order|filter/i.test(text);
+      // Elements that legitimately may not trigger detectable changes
+      const isLikelyFalsePositive = looksLikeClose || looksLikeToggle || looksLikeCopy ||
+                                     looksLikeTheme || looksLikeTab || looksLikeSort ||
+                                     isInsideModal || isInsideDropdown || isInsideTooltip;
+      return {
+        idx,
+        tag: el.tagName.toLowerCase(),
+        role: el.getAttribute("role") || "",
+        href: el.tagName === "A" ? el.getAttribute("href") || "" : "",
+        text: (el.getAttribute("aria-label") || el.innerText || "").trim().slice(0, 80),
+        id: el.id || "",
+        disabled: !!(el.disabled || el.getAttribute("aria-disabled") === "true"),
+        key: `${el.tagName}|${el.id}|${idx}`,
+        // Context for false positive reduction
+        context: {
+          isInsideModal,
+          isInsideDropdown,
+          isInsideAccordion,
+          isInsideTooltip,
+          looksLikeClose,
+          looksLikeToggle,
+          looksLikeCopy,
+          looksLikeTheme,
+          looksLikeTab,
+          isLikelyFalsePositive
+        }
+      };
+    });
   });
 }
@@ -864,20 +1147,92 @@ async function attemptLogin(page, { auth }) {
   }
 }
-async function runSinglePass({ label, baseUrl, context, shotsDir, danger, maxPages, maxDepth, timeoutMs, root, onProgress }) {
+async function runSinglePass({ label, baseUrl, context, shotsDir, danger, maxPages, maxDepth, timeoutMs, root, onProgress, retries = 2, stableWait = 500 }) {
   const page = await context.newPage();
   page.setDefaultTimeout(timeoutMs);
+  // Helper for flaky-resistant navigation with retries
+  async function safeGoto(targetUrl, opts = {}) {
+    for (let attempt = 1; attempt <= retries; attempt++) {
+      try {
+        const res = await page.goto(targetUrl, { waitUntil: "domcontentloaded", ...opts });
+        // Wait for stability to reduce flakiness
+        if (stableWait > 0) {
+          await page.waitForTimeout(stableWait);
+        }
+        await page.waitForLoadState("networkidle", { timeout: 6000 }).catch(() => {});
+        return res;
+      } catch (err) {
+        if (attempt === retries) throw err;
+        await page.waitForTimeout(500 * attempt); // Exponential backoff
+      }
+    }
+    return null;
+  }
+  // Helper for flaky-resistant clicks with retries
+  async function safeClick(locator, opts = {}) {
+    for (let attempt = 1; attempt <= retries; attempt++) {
+      try {
+        await locator.click({ timeout: timeoutMs / 2, ...opts });
+        if (stableWait > 0) {
+          await page.waitForTimeout(stableWait);
+        }
+        return { success: true };
+      } catch (err) {
+        if (attempt === retries) return { success: false, error: err.message };
+        await page.waitForTimeout(300 * attempt);
+      }
+    }
+    return { success: false, error: 'Max retries exceeded' };
+  }
   const reqCounter = { value: 0 };
   const netErrors = [];
   const consoleErrors = [];
   const findings = [];
   const pagesVisited = [];
+  const fakeDataDetections = []; // Track fake data detections
+  const processedUrls = new Set(); // Avoid duplicate detections
   page.on("requestfinished", () => { reqCounter.value += 1; });
   page.on("requestfailed", (req) => { netErrors.push({ url: req.url(), failure: req.failure()?.errorText || "unknown" }); });
   page.on("console", (msg) => { if (msg.type() === "error") consoleErrors.push({ text: msg.text().slice(0, 500) }); });
   page.on("pageerror", (err) => { consoleErrors.push({ text: String(err?.message || err).slice(0, 500) }); });
+  // Intercept responses for fake data detection
+  page.on("response", async (response) => {
+    try {
+      const url = response.url();
+      const status = response.status();
+      // Skip already processed URLs and static assets
+      if (processedUrls.has(url)) return;
+      if (/\.(js|css|png|jpg|svg|ico|woff|woff2|ttf|gif|webp)(\?|$)/i.test(url)) return;
+      // Only check API-like endpoints
+      if (!url.includes('/api/') && !url.includes('/graphql') && !url.includes('/trpc') &&
+          !response.headers()['content-type']?.includes('application/json')) {
+        return;
+      }
+      processedUrls.add(url);
+      let body = '';
+      try {
+        body = await response.text();
+      } catch {
+        // Some responses can't be read
+      }
+      const detections = classifyNetworkTraffic(url, body, status);
+      if (detections && detections.length > 0) {
+        fakeDataDetections.push(...detections);
+      }
+    } catch {
+      // Ignore errors in response processing
+    }
+  });
   const visited = new Set();
   const queue = [{ url: baseUrl, depth: 0 }];
@@ -897,8 +1252,7 @@ async function runSinglePass({ label, baseUrl, context, shotsDir, danger, maxPag
       onProgress({ page: pagesVisited.length + 1, maxPages, url: targetUrl });
     }
-    const res = await page.goto(targetUrl, { waitUntil: "domcontentloaded" }).catch(() => null);
-    await page.waitForLoadState("networkidle", { timeout: 6000 }).catch(() => {});
+    const res = await safeGoto(targetUrl).catch(() => null);
     const status = res ? res.status() : null;
     const loginLike = await isLoginPage(page);
@@ -941,6 +1295,10 @@ async function runSinglePass({ label, baseUrl, context, shotsDir, danger, maxPag
       const out = await clickOutcome(page, locator, { reqCounter });
       if (!out.clickOk) {
+        // Skip click failures for elements that are likely intentionally not clickable
+        // (e.g., visually hidden close buttons, buttons behind overlays)
+        if (el.context?.isLikelyFalsePositive) continue;
         const shot = path.join(shotsDir, `${label}_click_fail_${sha1(el.key)}.png`);
         await page.screenshot({ path: shot }).catch(() => {});
         findings.push({
@@ -955,24 +1313,119 @@ async function runSinglePass({ label, baseUrl, context, shotsDir, danger, maxPag
         continue;
       }
-      if (!out.navHappened && !out.urlChanged && !out.domChanged && out.reqDelta === 0) {
+      // Enhanced Dead UI detection with false positive reduction
+      // An element is considered "dead" if clicking produced NO observable effect:
+      // - No navigation
+      // - No URL change
+      // - No DOM mutations
+      // - No CSS visibility/state changes
+      // - No network requests
+      const noEffect = !out.navHappened && !out.urlChanged && !out.domChanged &&
+                       !out.visibilityChanged && out.reqDelta === 0;
+      if (noEffect) {
+        // Apply false positive reduction based on element context
+        const ctx = el.context || {};
+        // Skip elements that are KNOWN to not produce observable changes
+        // These are legitimate UI patterns that don't need fixes
+        if (ctx.looksLikeClose && ctx.isInsideModal) {
+          // Close button inside a modal - the modal itself may have closed
+          continue;
+        }
+        if (ctx.looksLikeCopy) {
+          // Copy buttons work via clipboard API, no DOM change expected
+          continue;
+        }
+        if (ctx.looksLikeTheme) {
+          // Theme toggles may only change CSS custom properties
+          continue;
+        }
+        // Downgrade severity for likely false positives
+        // Instead of BLOCK, use WARN for elements in contexts that commonly have no-op behavior
+        let severity = "BLOCK";
+        let reason = "Click produced no navigation, no network activity, and no DOM change";
+        if (ctx.isLikelyFalsePositive) {
+          severity = "WARN";
+          reason = `Click produced no observable change (possible false positive: ${
+            ctx.looksLikeToggle ? 'toggle button' :
+            ctx.looksLikeTab ? 'tab element' :
+            ctx.looksLikeSort ? 'sort control' :
+            ctx.isInsideDropdown ? 'inside dropdown' :
+            ctx.isInsideAccordion ? 'inside accordion' :
+            'contextual element'
+          })`;
+        }
+        // Always skip tooltip-related elements as they are purely visual
+        if (ctx.isInsideTooltip) continue;
         const shot = path.join(shotsDir, `${label}_dead_${sha1(el.key)}.png`);
         await page.screenshot({ path: shot }).catch(() => {});
         findings.push({
           id: `R_${label}_DEAD_${sha1(el.key).slice(0, 8)}`,
-          severity: "BLOCK",
+          severity,
           category: "DeadUI",
           title: `[${label}] Dead UI: ${el.text || el.tag}`,
           page: page.url(),
-          reason: "Click produced no navigation, no network activity, and no DOM change",
-          screenshot: path.relative(root, shot).replace(/\\/g, "/")
+          reason,
+          screenshot: path.relative(root, shot).replace(/\\/g, "/"),
+          confidence: ctx.isLikelyFalsePositive ? 0.5 : 0.9, // Add confidence score
+          context: ctx // Include context for debugging
         });
       }
     }
   }
   await page.close();
-  return { label, pagesVisited, findings, consoleErrors: consoleErrors.slice(0, 50), networkErrors: netErrors.slice(0, 50) };
+  // Convert fake data detections to findings with confidence-based filtering
+  const seenFakeUrls = new Set();
+  // Sort by confidence (highest first) to prioritize most reliable detections
+  const sortedDetections = [...fakeDataDetections].sort((a, b) =>
+    (b.confidence || 0.5) - (a.confidence || 0.5)
+  );
+  for (const detection of sortedDetections) {
+    // Dedupe by URL + type + pattern to avoid near-duplicates
+    const key = `${detection.url}:${detection.type}:${detection.pattern || ''}`;
+    if (seenFakeUrls.has(key)) continue;
+    seenFakeUrls.add(key);
+    // Skip very low confidence detections (likely false positives)
+    const confidence = detection.confidence || 0.5;
+    if (confidence < 0.50) continue;
+    // Downgrade severity for medium confidence detections
+    let severity = detection.severity;
+    if (confidence < 0.70 && severity === 'BLOCK') {
+      severity = 'WARN';
+    }
+    findings.push({
+      id: `R_${label}_FAKE_${sha1(key).slice(0, 8)}`,
+      severity,
+      category: detection.type === 'fake-domain' ? 'FakeDomain' :
+                detection.type === 'fake-response' ? 'FakeResponse' : 'MockStatus',
+      title: `[${label}] Fake Data: ${detection.evidence}`,
+      page: detection.url,
+      reason: detection.evidence,
+      confidence, // Include confidence score for transparency
+      pattern: detection.pattern // Include pattern for debugging
+    });
+  }
+  return {
+    label,
+    pagesVisited,
+    findings,
+    consoleErrors: consoleErrors.slice(0, 50),
+    networkErrors: netErrors.slice(0, 50),
+    fakeDataDetections: fakeDataDetections.slice(0, 100)
+  };
 }
 function buildAuthCoverageFindings({ baseUrl, matchers, anonPass, authPass }) {
@@ -1024,6 +1477,95 @@ function coverageFromTruthpack({ truthpack, visitedUrls }) {
   return { total, hit, percent: total ? Math.round((hit / total) * 100) : 0, missed: Array.from(uiPaths).filter(p => !visitedPaths.has(p)).slice(0, 50) };
 }
+// ═══════════════════════════════════════════════════════════════════════════════
+// FLAKINESS & STABILITY VERIFICATION
+// ═══════════════════════════════════════════════════════════════════════════════
+/**
+ * Aggregate findings from multiple stability runs
+ * Only returns findings that appear in at least `threshold` of runs
+ * @param {Array<Array<Object>>} runFindings - Array of findings arrays from each run
+ * @param {number} threshold - Minimum occurrence rate (0-1) to include a finding
+ * @returns {Array<Object>} Deduplicated findings with flakiness scores
+ */
+function aggregateStabilityFindings(runFindings, threshold = 0.66) {
+  const totalRuns = runFindings.length;
+  if (totalRuns === 0) return [];
+  if (totalRuns === 1) return runFindings[0] || [];
+  // Group findings by their unique key (category + normalized title/reason)
+  const findingCounts = new Map();
+  for (const findings of runFindings) {
+    for (const finding of findings) {
+      // Create a stable key for deduplication
+      const key = `${finding.category}|${finding.title?.replace(/\[ANON\]|\[AUTH\]/g, '').trim()}|${finding.page || ''}`;
+      if (!findingCounts.has(key)) {
+        findingCounts.set(key, {
+          finding: { ...finding },
+          count: 0,
+          occurrences: []
+        });
+      }
+      const entry = findingCounts.get(key);
+      entry.count++;
+      entry.occurrences.push(finding);
+    }
+  }
+  // Filter to findings that meet the threshold and add flakiness score
+  const aggregated = [];
+  for (const [key, data] of findingCounts) {
+    const occurrenceRate = data.count / totalRuns;
+    if (occurrenceRate >= threshold) {
+      // Calculate flakiness score (1 = always occurs, 0 = never)
+      const flakinessScore = 1 - occurrenceRate;
+      // Merge the finding with flakiness metadata
+      const aggregatedFinding = {
+        ...data.finding,
+        stability: {
+          occurrenceRate: Math.round(occurrenceRate * 100) / 100,
+          appearedInRuns: data.count,
+          totalRuns,
+          flakinessScore: Math.round(flakinessScore * 100) / 100,
+          isFlaky: flakinessScore > 0.1, // More than 10% variance = flaky
+        }
+      };
+      // If finding appeared in all runs, it's stable
+      // If it appeared in some runs, mark as potentially flaky
+      if (data.count < totalRuns) {
+        aggregatedFinding.reason = `${aggregatedFinding.reason || ''} (appeared ${data.count}/${totalRuns} runs)`.trim();
+      }
+      aggregated.push(aggregatedFinding);
+    }
+  }
+  return aggregated;
+}
+/**
+ * Print stability verification results
+ */
+function printStabilityResults(totalRuns, stableFindings, filteredCount) {
+  if (totalRuns <= 1) return;
+  console.log();
+  console.log(`  ${colors.info}${ICONS.target}${c.reset} ${c.bold}Stability Verification${c.reset}`);
+  console.log(`     ${c.dim}Total runs:${c.reset}       ${totalRuns}`);
+  console.log(`     ${c.dim}Stable findings:${c.reset}  ${stableFindings} ${c.dim}(appeared in majority of runs)${c.reset}`);
+  if (filteredCount > 0) {
+    console.log(`     ${c.dim}Filtered (flaky):${c.reset} ${colors.success}${filteredCount}${c.reset} ${c.dim}(inconsistent across runs)${c.reset}`);
+  }
+}
 // ═══════════════════════════════════════════════════════════════════════════════
 // MAIN REALITY FUNCTION
 // ═══════════════════════════════════════════════════════════════════════════════
@@ -1055,6 +1597,13 @@ async function runReality(argsOrOpts = {}) {
       verifyAuth: argsOrOpts.includes("--verify-auth"),
       headed: argsOrOpts.includes("--headed"),
       danger: argsOrOpts.includes("--danger"),
+      // Visual artifacts options
+      recordVideo: argsOrOpts.includes("--record-video") || argsOrOpts.includes("--video"),
+      recordTrace: argsOrOpts.includes("--record-trace") || argsOrOpts.includes("--trace"),
+      recordHar: argsOrOpts.includes("--record-har") || argsOrOpts.includes("--har"),
+      // Flakiness reduction options
+      retries: parseInt(getArg(["--retries"]) || "2", 10),
+      stableWait: parseInt(getArg(["--stable-wait"]) || "500", 10),
       maxPages: parseInt(getArg(["--max-pages"]) || "18", 10),
       maxDepth: parseInt(getArg(["--max-depth"]) || "2", 10),
       timeoutMs: parseInt(getArg(["--timeout"]) || "15000", 10),
@@ -1074,7 +1623,16 @@ async function runReality(argsOrOpts = {}) {
     maxPages = 18,
     maxDepth = 2,
     danger = false,
-    timeoutMs = 15000
+    timeoutMs = 15000,
+    // Visual artifacts (videos, traces, HAR)
+    recordVideo = false,
+    recordTrace = false,
+    recordHar = false,
+    // Flakiness reduction
+    retries = 2,
+    stableWait = 500,
+    stabilityRuns = 1,
+    flakyThreshold = 0.66
   } = argsOrOpts;
   if (!url) {
@@ -1124,6 +1682,9 @@ async function runReality(argsOrOpts = {}) {
   console.log(`  ${c.dim}URL:${c.reset}      ${colors.accent}${url}${c.reset}`);
   console.log(`  ${c.dim}Mode:${c.reset}     ${verifyAuth ? `${colors.auth}Two-Pass (Auth)${c.reset}` : `${colors.anon}Single-Pass (Anon)${c.reset}`}`);
   console.log(`  ${c.dim}Budget:${c.reset}   ${maxPages} pages, depth ${maxDepth}`);
+  if (stabilityRuns > 1) {
+    console.log(`  ${c.dim}Stability:${c.reset} ${colors.info}${stabilityRuns} runs${c.reset}, threshold ${Math.round(flakyThreshold * 100)}%`);
+  }
   // Tier warning if applicable
   if (tierInfo.tier === 'free' && (originalMaxPages > maxPages || (originalVerifyAuth && !verifyAuth))) {
@@ -1145,7 +1706,13 @@ async function runReality(argsOrOpts = {}) {
   const baseUrl = normalizeUrl(url);
   const outBase = path.join(root, ".vibecheck", "reality", stamp());
   const shotsDir = path.join(outBase, "screenshots");
+  const videosDir = path.join(outBase, "videos");
+  const tracesDir = path.join(outBase, "traces");
+  const harDir = path.join(outBase, "har");
   ensureDir(shotsDir);
+  if (recordVideo) ensureDir(videosDir);
+  if (recordTrace) ensureDir(tracesDir);
+  if (recordHar) ensureDir(harDir);
   const tp = loadTruthpack(root, truthpack);
   const matchers = getProtectedMatchersFromTruthpack(tp);
@@ -1161,98 +1728,229 @@ async function runReality(argsOrOpts = {}) {
   stopSpinner('Browser launched', true);
   // ═══════════════════════════════════════════════════════════════════════════
-  // PASS A: ANONYMOUS
+  // STABILITY RUNS (multiple passes for flakiness detection)
   // ═══════════════════════════════════════════════════════════════════════════
-  printPassHeader('anon', baseUrl);
-  startSpinner('Crawling anonymously...', colors.anon);
-  const anonContext = await browser.newContext();
-  const anonPass = await runSinglePass({
-    label: "ANON",
-    baseUrl,
-    context: anonContext,
-    shotsDir,
-    danger,
-    maxPages,
-    maxDepth,
-    timeoutMs,
-    root,
-    onProgress: ({ page, maxPages: mp, url: currentUrl }) => {
-      // Could update spinner here if desired
-    }
-  });
-  await anonContext.close();
-  stopSpinner(`Crawled ${anonPass.pagesVisited.length} pages`, true);
-  printPassResult('anon', anonPass);
-  // ═══════════════════════════════════════════════════════════════════════════
-  // PASS B: AUTHENTICATED (optional)
-  // ═══════════════════════════════════════════════════════════════════════════
-  let authPass = null;
-  let authFindings = [];
+  const allRunFindings = [];
+  let lastAnonPass = null;
+  let lastAuthPass = null;
+  let anonVideoPath = null;
+  let authVideoPath = null;
+  let anonTracePath = null;
+  let authTracePath = null;
   let savedStatePath = null;
+  for (let runNum = 1; runNum <= stabilityRuns; runNum++) {
+    const isFirstRun = runNum === 1;
+    const isLastRun = runNum === stabilityRuns;
+    if (stabilityRuns > 1) {
+      console.log();
+      console.log(`  ${colors.info}${BOX.hHorizontal.repeat(3)}${c.reset} ${c.bold}Stability Run ${runNum}/${stabilityRuns}${c.reset}`);
+    }
-  if (verifyAuth) {
-    printPassHeader('auth', baseUrl);
+    // ═══════════════════════════════════════════════════════════════════════════
+    // PASS A: ANONYMOUS
+    // ═══════════════════════════════════════════════════════════════════════════
+    printPassHeader('anon', baseUrl);
-    startSpinner('Setting up authenticated session...', colors.auth);
-    const ctxOpts = storageState ? { storageState } : {};
-    const authContext = await browser.newContext(ctxOpts);
-    const authPage = await authContext.newPage();
-    await authPage.goto(baseUrl, { waitUntil: "domcontentloaded" }).catch(() => {});
-    await authPage.waitForLoadState("networkidle", { timeout: 6000 }).catch(() => {});
-    if (!storageState && auth) {
-      stopSpinner('Attempting login...', true);
-      startSpinner('Logging in...', colors.auth);
-      const loginRes = await attemptLogin(authPage, { auth });
-      if (loginRes.ok) {
-        stopSpinner('Login successful', true);
-        if (saveStorageState) {
-          const dest = path.isAbsolute(saveStorageState) ? saveStorageState : path.join(root, saveStorageState);
-          ensureDir(path.dirname(dest));
-          await authContext.storageState({ path: dest }).catch(() => {});
-          savedStatePath = dest;
-          console.log(`    ${colors.success}${ICONS.check}${c.reset} Session saved: ${c.dim}${path.relative(root, dest)}${c.reset}`);
-        }
-      } else {
-        stopSpinner('Login failed - continuing without auth', false);
-      }
-    } else {
-      stopSpinner('Using existing session', true);
+    startSpinner('Crawling anonymously...', colors.anon);
+    // Build context options for video/HAR recording (only on last run to save resources)
+    const anonContextOpts = {};
+    if (recordVideo && isLastRun) {
+      anonContextOpts.recordVideo = {
+        dir: videosDir,
+        size: { width: 1280, height: 720 }
+      };
+    }
+    if (recordHar && isLastRun) {
+      anonContextOpts.recordHar = {
+        path: path.join(harDir, 'anon-traffic.har'),
+        mode: 'full'
+      };
     }
-    await authPage.close();
-    startSpinner('Crawling with authentication...', colors.auth);
-    authPass = await runSinglePass({
-      label: "AUTH",
+    const anonContext = await browser.newContext(anonContextOpts);
+    // Start trace recording if enabled (only on last run)
+    if (recordTrace && isLastRun) {
+      await anonContext.tracing.start({
+        screenshots: true,
+        snapshots: true,
+        sources: false
+      });
+    }
+    const anonPass = await runSinglePass({
+      label: "ANON",
       baseUrl,
-      context: authContext,
-      shotsDir,
+      context: anonContext,
+      shotsDir: isLastRun ? shotsDir : path.join(outBase, `run${runNum}`, 'screenshots'),
       danger,
       maxPages,
       maxDepth,
       timeoutMs,
-      root
+      root,
+      retries,
+      stableWait,
+      onProgress: ({ page, maxPages: mp, url: currentUrl }) => {
+        // Could update spinner here if desired
+      }
     });
-    await authContext.close();
-    stopSpinner(`Crawled ${authPass.pagesVisited.length} pages`, true);
-    printPassResult('auth', authPass);
+    // Ensure shot dir exists for intermediate runs
+    if (!isLastRun) {
+      ensureDir(path.join(outBase, `run${runNum}`, 'screenshots'));
+    }
+    // Save trace if enabled (only last run)
+    if (recordTrace && isLastRun) {
+      anonTracePath = path.join(tracesDir, 'anon-trace.zip');
+      await anonContext.tracing.stop({ path: anonTracePath });
+    }
+    // Get video path before closing context (only last run)
+    if (recordVideo && isLastRun && anonPass.pagesVisited.length > 0) {
+      const pages = anonContext.pages();
+      if (pages.length > 0) {
+        const video = pages[0].video();
+        if (video) {
+          try {
+            anonVideoPath = await video.path();
+          } catch {}
+        }
+      }
+    }
+    await anonContext.close();
+    stopSpinner(`Crawled ${anonPass.pagesVisited.length} pages`, true);
+    printPassResult('anon', anonPass);
+    lastAnonPass = anonPass;
-    // Build auth coverage findings
-    if (matchers.length) {
-      startSpinner('Analyzing auth coverage...', colors.authCoverage);
-      authFindings = buildAuthCoverageFindings({ baseUrl, matchers, anonPass, authPass });
-      stopSpinner(`Found ${authFindings.length} auth issues`, authFindings.length === 0);
+    // ═══════════════════════════════════════════════════════════════════════════
+    // PASS B: AUTHENTICATED (optional)
+    // ═══════════════════════════════════════════════════════════════════════════
+    let authPass = null;
+    let authFindings = [];
+    if (verifyAuth) {
+      printPassHeader('auth', baseUrl);
+      startSpinner('Setting up authenticated session...', colors.auth);
+      const ctxOpts = storageState ? { storageState } : {};
+      // Add video/HAR recording options (only last run)
+      if (recordVideo && isLastRun) {
+        ctxOpts.recordVideo = {
+          dir: videosDir,
+          size: { width: 1280, height: 720 }
+        };
+      }
+      if (recordHar && isLastRun) {
+        ctxOpts.recordHar = {
+          path: path.join(harDir, 'auth-traffic.har'),
+          mode: 'full'
+        };
+      }
+      const authContext = await browser.newContext(ctxOpts);
+      // Start trace recording if enabled (only last run)
+      if (recordTrace && isLastRun) {
+        await authContext.tracing.start({
+          screenshots: true,
+          snapshots: true,
+          sources: false
+        });
+      }
+      const authPage = await authContext.newPage();
+      await authPage.goto(baseUrl, { waitUntil: "domcontentloaded" }).catch(() => {});
+      await authPage.waitForLoadState("networkidle", { timeout: 6000 }).catch(() => {});
+      if (!storageState && auth && isFirstRun) {
+        stopSpinner('Attempting login...', true);
+        startSpinner('Logging in...', colors.auth);
+        const loginRes = await attemptLogin(authPage, { auth });
+        if (loginRes.ok) {
+          stopSpinner('Login successful', true);
+          if (saveStorageState) {
+            const dest = path.isAbsolute(saveStorageState) ? saveStorageState : path.join(root, saveStorageState);
+            ensureDir(path.dirname(dest));
+            await authContext.storageState({ path: dest }).catch(() => {});
+            savedStatePath = dest;
+            console.log(`    ${colors.success}${ICONS.check}${c.reset} Session saved: ${c.dim}${path.relative(root, dest)}${c.reset}`);
+          }
+        } else {
+          stopSpinner('Login failed - continuing without auth', false);
+        }
+      } else {
+        stopSpinner('Using existing session', true);
+      }
+      await authPage.close();
+      startSpinner('Crawling with authentication...', colors.auth);
+      authPass = await runSinglePass({
+        label: "AUTH",
+        baseUrl,
+        context: authContext,
+        shotsDir: isLastRun ? shotsDir : path.join(outBase, `run${runNum}`, 'screenshots'),
+        danger,
+        maxPages,
+        maxDepth,
+        timeoutMs,
+        root,
+        retries,
+        stableWait
+      });
+      // Save trace if enabled (only last run)
+      if (recordTrace && isLastRun) {
+        authTracePath = path.join(tracesDir, 'auth-trace.zip');
+        await authContext.tracing.stop({ path: authTracePath });
+      }
+      // Get video path before closing context (only last run)
+      if (recordVideo && isLastRun && authPass.pagesVisited.length > 0) {
+        const pages = authContext.pages();
+        if (pages.length > 0) {
+          const video = pages[0].video();
+          if (video) {
+            try {
+              authVideoPath = await video.path();
+            } catch {}
+          }
+        }
+      }
+      await authContext.close();
+      stopSpinner(`Crawled ${authPass.pagesVisited.length} pages`, true);
+      printPassResult('auth', authPass);
+      lastAuthPass = authPass;
+      // Build auth coverage findings
+      if (matchers.length) {
+        startSpinner('Analyzing auth coverage...', colors.authCoverage);
+        authFindings = buildAuthCoverageFindings({ baseUrl, matchers, anonPass, authPass });
+        stopSpinner(`Found ${authFindings.length} auth issues`, authFindings.length === 0);
+      }
     }
+    // Collect findings from this run
+    const runFindings = [...anonPass.findings, ...(authPass?.findings || []), ...authFindings];
+    allRunFindings.push(runFindings);
   }
   await browser.close();
+  // Use last pass results for page/coverage data
+  const anonPass = lastAnonPass;
+  const authPass = lastAuthPass;
   // ═══════════════════════════════════════════════════════════════════════════
   // ANALYSIS & RESULTS
@@ -1261,10 +1959,49 @@ async function runReality(argsOrOpts = {}) {
   const allVisited = [...anonPass.pagesVisited.map(p => p.url), ...(authPass?.pagesVisited || []).map(p => p.url)];
   const coverage = coverageFromTruthpack({ truthpack: tp, visitedUrls: allVisited });
-  const findings = [...anonPass.findings, ...(authPass?.findings || []), ...authFindings];
+  // Aggregate findings from stability runs (filters out flaky findings)
+  let findings;
+  let filteredFlakyCount = 0;
+  if (stabilityRuns > 1) {
+    // Count total unique findings across all runs before filtering
+    const allFindingsFlat = allRunFindings.flat();
+    const uniqueBeforeFilter = new Set(allFindingsFlat.map(f =>
+      `${f.category}|${f.title?.replace(/\[ANON\]|\[AUTH\]/g, '').trim()}|${f.page || ''}`
+    )).size;
+    findings = aggregateStabilityFindings(allRunFindings, flakyThreshold);
+    filteredFlakyCount = uniqueBeforeFilter - findings.length;
+    printStabilityResults(stabilityRuns, findings.length, filteredFlakyCount);
+  } else {
+    // Single run - use findings directly
+    findings = allRunFindings[0] || [];
+  }
   const blocks = findings.filter(f => f.severity === "BLOCK").length;
   const warns = findings.filter(f => f.severity === "WARN").length;
+  // Build artifact manifest
+  const artifacts = {
+    screenshots: shotsDir ? path.relative(root, shotsDir).replace(/\\/g, "/") : null,
+    videos: recordVideo ? {
+      directory: path.relative(root, videosDir).replace(/\\/g, "/"),
+      anon: anonVideoPath ? path.relative(root, anonVideoPath).replace(/\\/g, "/") : null,
+      auth: authVideoPath ? path.relative(root, authVideoPath).replace(/\\/g, "/") : null
+    } : null,
+    traces: recordTrace ? {
+      directory: path.relative(root, tracesDir).replace(/\\/g, "/"),
+      anon: anonTracePath ? path.relative(root, anonTracePath).replace(/\\/g, "/") : null,
+      auth: authTracePath ? path.relative(root, authTracePath).replace(/\\/g, "/") : null
+    } : null,
+    har: recordHar ? {
+      directory: path.relative(root, harDir).replace(/\\/g, "/"),
+      anon: path.join(harDir, 'anon-traffic.har'),
+      auth: path.join(harDir, 'auth-traffic.har')
+    } : null
+  };
   // Build report
   const report = {
     meta: {
@@ -1277,8 +2014,16 @@ async function runReality(argsOrOpts = {}) {
       maxDepth,
       truthpackLoaded: !!tp,
       protectedMatcherCount: matchers.length,
-      savedStorageState: savedStatePath ? path.relative(root, savedStatePath).replace(/\\/g, "/") : null
+      savedStorageState: savedStatePath ? path.relative(root, savedStatePath).replace(/\\/g, "/") : null,
+      recordVideo,
+      recordTrace,
+      recordHar,
+      // Flakiness/stability metadata
+      stabilityRuns,
+      flakyThreshold,
+      filteredFlakyCount: stabilityRuns > 1 ? filteredFlakyCount : 0
     },
+    artifacts,
     coverage,
     passes: { anon: anonPass, auth: authPass },
     findings,