@vibecheckai/cli 3.1.8 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/bin/registry.js +106 -116
  2. package/bin/runners/context/generators/mcp.js +18 -0
  3. package/bin/runners/context/index.js +72 -4
  4. package/bin/runners/context/proof-context.js +293 -1
  5. package/bin/runners/context/security-scanner.js +311 -73
  6. package/bin/runners/lib/analyzers.js +607 -20
  7. package/bin/runners/lib/detectors-v2.js +172 -15
  8. package/bin/runners/lib/entitlements-v2.js +48 -1
  9. package/bin/runners/lib/evidence-pack.js +678 -0
  10. package/bin/runners/lib/html-proof-report.js +913 -0
  11. package/bin/runners/lib/missions/plan.js +231 -41
  12. package/bin/runners/lib/missions/templates.js +125 -0
  13. package/bin/runners/lib/scan-output.js +492 -253
  14. package/bin/runners/lib/ship-output.js +901 -641
  15. package/bin/runners/runCheckpoint.js +44 -3
  16. package/bin/runners/runContext.d.ts +4 -0
  17. package/bin/runners/runDoctor.js +10 -2
  18. package/bin/runners/runFix.js +51 -341
  19. package/bin/runners/runInit.js +11 -0
  20. package/bin/runners/runPolish.d.ts +4 -0
  21. package/bin/runners/runPolish.js +608 -29
  22. package/bin/runners/runProve.js +210 -25
  23. package/bin/runners/runReality.js +846 -101
  24. package/bin/runners/runScan.js +238 -4
  25. package/bin/runners/runShip.js +19 -3
  26. package/bin/runners/runWatch.js +14 -1
  27. package/bin/vibecheck.js +32 -2
  28. package/mcp-server/consolidated-tools.js +408 -42
  29. package/mcp-server/index.js +152 -15
  30. package/mcp-server/proof-tools.js +571 -0
  31. package/mcp-server/tier-auth.js +22 -19
  32. package/mcp-server/tools-v3.js +744 -0
  33. package/mcp-server/truth-firewall-tools.js +190 -4
  34. package/package.json +3 -1
  35. package/bin/runners/runInstall.js +0 -281
  36. package/bin/runners/runLabs.js +0 -341
@@ -1,5 +1,5 @@
1
1
  /**
2
- * Reality Mode v2 - Two-Pass Auth Verification + Dead UI Crawler
2
+ * Reality Mode v2 - Two-Pass Auth Verification + Dead UI Crawler + Fake Data Detection
3
3
  *
4
4
  * ═══════════════════════════════════════════════════════════════════════════════
5
5
  * ENTERPRISE EDITION - World-Class Terminal Experience
@@ -8,7 +8,7 @@
8
8
  * TIER ENFORCEMENT:
9
9
  * - FREE: Preview mode (5 pages, 20 clicks, no auth boundary)
10
10
  * - STARTER: Full budgets + basic auth verification
11
- * - PRO: Advanced auth boundary (multi-role, 2-pass)
11
+ * - PRO: Advanced auth boundary (multi-role, 2-pass) + fake data detection
12
12
  *
13
13
  * Pass A (anon): crawl + click, record which routes look protected
14
14
  * Pass B (auth): crawl same routes using storageState, verify protected routes accessible
@@ -17,6 +17,9 @@
17
17
  * - Dead UI (clicks that do nothing)
18
18
  * - HTTP errors (4xx/5xx)
19
19
  * - Auth coverage (protected route reachable anonymously = BLOCK)
20
+ * - Fake domain detection (localhost, jsonplaceholder, ngrok, mockapi.io)
21
+ * - Fake response detection (demo IDs, test keys, placeholder data)
22
+ * - Mock status codes (418, 999, etc.)
20
23
  * - Route coverage stats
21
24
  */
22
25
 
@@ -152,10 +155,177 @@ ${rgb(255, 80, 40)} ╚████╔╝ ██║██████╔╝█
152
155
  ${rgb(255, 60, 20)} ╚═══╝ ╚═╝╚═════╝ ╚══════╝ ╚═════╝╚═╝ ╚═╝╚══════╝ ╚═════╝╚═╝ ╚═╝${c.reset}
153
156
 
154
157
  ${c.dim} ┌─────────────────────────────────────────────────────────────────────┐${c.reset}
155
- ${c.dim} │${c.reset} ${rgb(255, 150, 100)}🎭${c.reset} ${c.bold}REALITY${c.reset} ${c.dim}•${c.reset} ${rgb(200, 200, 200)}Runtime UI Proof${c.reset} ${c.dim}•${c.reset} ${rgb(150, 150, 150)}Dead UI Detection${c.reset} ${c.dim}│${c.reset}
158
+ ${c.dim} │${c.reset} ${rgb(255, 150, 100)}🎭${c.reset} ${c.bold}REALITY${c.reset} ${c.dim}•${c.reset} ${rgb(200, 200, 200)}Dead UI${c.reset} ${c.dim}•${c.reset} ${rgb(150, 150, 150)}Fake Data${c.reset} ${c.dim}•${c.reset} ${rgb(100, 200, 255)}Auth Coverage${c.reset} ${c.dim}│${c.reset}
156
159
  ${c.dim} └─────────────────────────────────────────────────────────────────────┘${c.reset}
157
160
  `;
158
161
 
162
+ // ═══════════════════════════════════════════════════════════════════════════════
163
+ // FAKE DATA DETECTION PATTERNS (from reality-mode/reality-scanner.ts)
164
+ // ═══════════════════════════════════════════════════════════════════════════════
165
+
166
+ // ═══════════════════════════════════════════════════════════════════════════════
167
+ // FAKE DETECTION PATTERNS WITH CONFIDENCE SCORING
168
+ // Each pattern has a confidence level to reduce false positives
169
+ // ═══════════════════════════════════════════════════════════════════════════════
170
+
171
+ const FAKE_DOMAIN_PATTERNS = [
172
+ // CRITICAL: These are almost certainly fake backends (confidence: 0.95+)
173
+ { pattern: /jsonplaceholder\.typicode\.com/i, name: "JSONPlaceholder mock API", confidence: 0.99, severity: 'BLOCK' },
174
+ { pattern: /reqres\.in/i, name: "ReqRes mock API", confidence: 0.99, severity: 'BLOCK' },
175
+ { pattern: /mockapi\.io/i, name: "MockAPI.io", confidence: 0.99, severity: 'BLOCK' },
176
+ { pattern: /mocky\.io/i, name: "Mocky.io", confidence: 0.99, severity: 'BLOCK' },
177
+ { pattern: /httpbin\.org/i, name: "HTTPBin testing API", confidence: 0.95, severity: 'BLOCK' },
178
+ { pattern: /api\.example\.com/i, name: "Example.com API", confidence: 0.95, severity: 'BLOCK' },
179
+ { pattern: /fake\.api/i, name: "Fake API pattern", confidence: 0.95, severity: 'BLOCK' },
180
+ { pattern: /demo\.api/i, name: "Demo API pattern", confidence: 0.90, severity: 'BLOCK' },
181
+
182
+ // HIGH: Likely development/testing (confidence: 0.7-0.9)
183
+ // NOTE: These could be legitimate in dev/CI contexts
184
+ { pattern: /localhost:\d+/i, name: "Localhost API", confidence: 0.75, severity: 'WARN', devContextOk: true },
185
+ { pattern: /127\.0\.0\.1:\d+/i, name: "Loopback API", confidence: 0.75, severity: 'WARN', devContextOk: true },
186
+ { pattern: /\.ngrok\.io/i, name: "Ngrok tunnel", confidence: 0.80, severity: 'WARN', devContextOk: true },
187
+ { pattern: /\.ngrok-free\.app/i, name: "Ngrok free tunnel", confidence: 0.80, severity: 'WARN', devContextOk: true },
188
+
189
+ // MEDIUM: Could be legitimate staging (confidence: 0.5-0.7)
190
+ // NOTE: Many organizations have legitimate staging environments
191
+ { pattern: /staging\.[^/]+\/api/i, name: "Staging API endpoint", confidence: 0.60, severity: 'WARN', stagingContextOk: true },
192
+ { pattern: /\.local\//i, name: "Local domain", confidence: 0.50, severity: 'WARN', devContextOk: true },
193
+ { pattern: /\.test\//i, name: "Test domain", confidence: 0.50, severity: 'WARN', devContextOk: true },
194
+ ];
195
+
196
+ const FAKE_RESPONSE_PATTERNS = [
197
+ // CRITICAL: Test API keys exposed (security issue)
198
+ { pattern: /sk_test_[a-zA-Z0-9]{20,}/i, name: "Test Stripe secret key", confidence: 0.99, severity: 'BLOCK' },
199
+ { pattern: /pk_test_[a-zA-Z0-9]{20,}/i, name: "Test Stripe public key", confidence: 0.95, severity: 'WARN' },
200
+
201
+ // HIGH: Clearly fake IDs/data
202
+ { pattern: /inv_demo_[a-zA-Z0-9]+/i, name: "Demo invoice ID", confidence: 0.95, severity: 'BLOCK' },
203
+ { pattern: /user_demo_[a-zA-Z0-9]+/i, name: "Demo user ID", confidence: 0.95, severity: 'BLOCK' },
204
+ { pattern: /cus_demo_[a-zA-Z0-9]+/i, name: "Demo customer ID", confidence: 0.95, severity: 'BLOCK' },
205
+ { pattern: /sub_demo_[a-zA-Z0-9]+/i, name: "Demo subscription ID", confidence: 0.95, severity: 'BLOCK' },
206
+ { pattern: /"mock":\s*true/i, name: "Mock flag enabled", confidence: 0.95, severity: 'BLOCK' },
207
+ { pattern: /"isDemo":\s*true/i, name: "Demo mode flag", confidence: 0.95, severity: 'BLOCK' },
208
+ { pattern: /"status":\s*"simulated"/i, name: "Simulated status", confidence: 0.90, severity: 'BLOCK' },
209
+
210
+ // MEDIUM: Placeholder content (could be legitimate in docs/examples)
211
+ // NOTE: Need context awareness - these are fine in documentation/help pages
212
+ { pattern: /lorem\s+ipsum\s+dolor/i, name: "Lorem ipsum placeholder", confidence: 0.70, severity: 'WARN', docsContextOk: true },
213
+ { pattern: /john\.doe@/i, name: "John Doe placeholder email", confidence: 0.65, severity: 'WARN', docsContextOk: true },
214
+ { pattern: /jane\.doe@/i, name: "Jane Doe placeholder email", confidence: 0.65, severity: 'WARN', docsContextOk: true },
215
+ { pattern: /user@example\.com/i, name: "Example.com email", confidence: 0.50, severity: 'WARN', docsContextOk: true },
216
+ { pattern: /placeholder\.(com|jpg|png)/i, name: "Placeholder domain/image", confidence: 0.60, severity: 'WARN', docsContextOk: true },
217
+
218
+ // LOWER: Could have many false positives
219
+ { pattern: /"id":\s*"demo"/i, name: "Demo ID value", confidence: 0.70, severity: 'WARN' },
220
+ { pattern: /"id":\s*"test"/i, name: "Test ID value", confidence: 0.60, severity: 'WARN' },
221
+ { pattern: /"success":\s*true[^}]*"demo"/i, name: "Demo success response", confidence: 0.75, severity: 'WARN' },
222
+ ];
223
+
224
+ // URLs that are allowed and should skip detection
225
+ const FAKE_DETECTION_ALLOWLIST = [
226
+ /\/docs?\//i, // Documentation pages
227
+ /\/help\//i, // Help pages
228
+ /\/examples?\//i, // Example pages
229
+ /\/demo\//i, // Demo pages (intentional)
230
+ /\/playground\//i, // Playground/sandbox
231
+ /\/api-docs?\//i, // API documentation
232
+ /\/swagger/i, // Swagger docs
233
+ /\/openapi/i, // OpenAPI docs
234
+ /readme/i, // README content
235
+ /changelog/i, // Changelog
236
+ ];
237
+
238
+ /**
239
+ * Classify a network request/response for fake data patterns
240
+ * Returns null if clean, or an object with detection details
241
+ *
242
+ * Enhanced with:
243
+ * - Confidence scoring to reduce false positives
244
+ * - Context awareness (dev, staging, docs)
245
+ * - Allowlist for legitimate use cases
246
+ */
247
+ function classifyNetworkTraffic(url, responseBody, status, context = {}) {
248
+ // Skip static assets (images, fonts, stylesheets, scripts)
249
+ if (/\.(js|css|png|jpg|jpeg|svg|ico|woff|woff2|ttf|eot|gif|webp|mp4|webm|pdf)(\?|$)/i.test(url)) {
250
+ return null;
251
+ }
252
+
253
+ // Check allowlist - skip detection for documentation/example URLs
254
+ for (const allowPattern of FAKE_DETECTION_ALLOWLIST) {
255
+ if (allowPattern.test(url)) {
256
+ return null;
257
+ }
258
+ }
259
+
260
+ const detections = [];
261
+ const isDev = context.isDev || process.env.NODE_ENV === 'development';
262
+ const isStaging = context.isStaging || /staging|stg|preprod/i.test(url);
263
+ const isDocsPage = context.isDocsPage || /docs?|help|example|readme/i.test(url);
264
+
265
+ // Check for fake domain patterns
266
+ for (const { pattern, name, confidence, severity, devContextOk, stagingContextOk } of FAKE_DOMAIN_PATTERNS) {
267
+ if (pattern.test(url)) {
268
+ // Skip if this pattern is OK in current context
269
+ if (devContextOk && isDev) continue;
270
+ if (stagingContextOk && isStaging) continue;
271
+
272
+ detections.push({
273
+ type: 'fake-domain',
274
+ severity,
275
+ evidence: `URL matches fake domain pattern: ${name}`,
276
+ url,
277
+ confidence,
278
+ pattern: pattern.source
279
+ });
280
+ break; // One domain match is enough
281
+ }
282
+ }
283
+
284
+ // Check response body for fake data patterns
285
+ if (responseBody && typeof responseBody === 'string') {
286
+ // Skip very short responses (likely not meaningful data)
287
+ if (responseBody.length < 20) {
288
+ return detections.length > 0 ? detections : null;
289
+ }
290
+
291
+ for (const { pattern, name, confidence, severity, docsContextOk } of FAKE_RESPONSE_PATTERNS) {
292
+ // Skip patterns that are OK in docs context
293
+ if (docsContextOk && isDocsPage) continue;
294
+
295
+ if (pattern.test(responseBody)) {
296
+ detections.push({
297
+ type: 'fake-response',
298
+ severity,
299
+ evidence: `Response contains ${name}`,
300
+ url,
301
+ confidence,
302
+ pattern: pattern.source
303
+ });
304
+ }
305
+ }
306
+ }
307
+
308
+ // Check for suspicious status codes (with lower confidence)
309
+ if (status === 418 || status === 999 || status === 0) {
310
+ detections.push({
311
+ type: 'mock-status',
312
+ severity: 'WARN',
313
+ evidence: `Suspicious HTTP status code: ${status}`,
314
+ url,
315
+ confidence: 0.60 // Lower confidence - could be legitimate
316
+ });
317
+ }
318
+
319
+ // Filter out low-confidence detections if we have high-confidence ones
320
+ const highConfidence = detections.filter(d => d.confidence >= 0.80);
321
+ if (highConfidence.length > 0 && detections.length > highConfidence.length) {
322
+ // Return only high-confidence detections to reduce noise
323
+ return highConfidence;
324
+ }
325
+
326
+ return detections.length > 0 ? detections : null;
327
+ }
328
+
159
329
  // ═══════════════════════════════════════════════════════════════════════════════
160
330
  // ICONS & SYMBOLS
161
331
  // ═══════════════════════════════════════════════════════════════════════════════
@@ -420,6 +590,10 @@ function getCategoryIcon(category) {
420
590
  'DeadUI': ICONS.deadUI,
421
591
  'AuthCoverage': ICONS.shield,
422
592
  'HTTPError': ICONS.http,
593
+ // Fake data detection categories
594
+ 'FakeDomain': '🔗',
595
+ 'FakeResponse': '🎭',
596
+ 'MockStatus': '📡',
423
597
  };
424
598
  return icons[category] || ICONS.bullet;
425
599
  }
@@ -429,6 +603,10 @@ function getCategoryColor(category) {
429
603
  'DeadUI': colors.deadUI,
430
604
  'AuthCoverage': colors.authCoverage,
431
605
  'HTTPError': colors.httpError,
606
+ // Fake data detection categories - all critical (red/orange)
607
+ 'FakeDomain': rgb(255, 80, 80), // Red - critical
608
+ 'FakeResponse': rgb(255, 100, 60), // Orange-red
609
+ 'MockStatus': rgb(255, 150, 50), // Amber
432
610
  };
433
611
  return categoryColors[category] || colors.accent;
434
612
  }
@@ -645,6 +823,17 @@ function printHelp(opts = {}) {
645
823
  ${colors.accent}--timeout <ms>${c.reset} Page timeout ${c.dim}(default: 15000)${c.reset}
646
824
  ${colors.accent}--help, -h${c.reset} Show this help
647
825
 
826
+ ${c.bold}Visual Artifacts:${c.reset}
827
+ ${colors.accent}--video, --record-video${c.reset} Record video of browser sessions
828
+ ${colors.accent}--trace, --record-trace${c.reset} Record Playwright trace (viewable in trace.playwright.dev)
829
+ ${colors.accent}--har, --record-har${c.reset} Record HAR network traffic
830
+
831
+ ${c.bold}Flakiness Reduction:${c.reset}
832
+ ${colors.accent}--retries <n>${c.reset} Retry failed nav/clicks ${c.dim}(default: 2)${c.reset}
833
+ ${colors.accent}--stable-wait <ms>${c.reset} Wait after actions ${c.dim}(default: 500ms)${c.reset}
834
+ ${colors.accent}--stability-runs <n>${c.reset} Run N times for stability check ${c.dim}(default: 1)${c.reset}
835
+ ${colors.accent}--flaky-threshold <f>${c.reset} Min occurrence rate to report ${c.dim}(default: 0.66)${c.reset}
836
+
648
837
  ${c.bold}Tier Limits:${c.reset}
649
838
  ${c.dim}FREE${c.reset} 5 pages, no auth boundary
650
839
  ${c.dim}STARTER${c.reset} Full budgets + basic auth
@@ -784,23 +973,76 @@ async function clickOutcome(page, locator, opts = {}) {
784
973
  const beforeUrl = page.url();
785
974
  const beforeReq = opts.reqCounter.value;
786
975
 
976
+ // Enhanced mutation observer that detects more changes including:
977
+ // - DOM structure changes (childList, subtree)
978
+ // - Attribute changes (class, style, aria-*, data-*)
979
+ // - CSS visibility/display changes
787
980
  const domPromise = page.evaluate(() => {
788
981
  return new Promise((resolve) => {
789
- const obs = new MutationObserver(() => { obs.disconnect(); resolve({ changed: true }); });
790
- obs.observe(document.documentElement, { childList: true, subtree: true, attributes: true });
791
- setTimeout(() => { try { obs.disconnect(); } catch {} resolve({ changed: false }); }, 900);
982
+ let changeCount = 0;
983
+ let attributeChanges = [];
984
+
985
+ const obs = new MutationObserver((mutations) => {
986
+ for (const mutation of mutations) {
987
+ changeCount++;
988
+ if (mutation.type === 'attributes') {
989
+ attributeChanges.push({
990
+ attr: mutation.attributeName,
991
+ target: mutation.target.tagName
992
+ });
993
+ }
994
+ }
995
+ });
996
+
997
+ obs.observe(document.documentElement, {
998
+ childList: true,
999
+ subtree: true,
1000
+ attributes: true,
1001
+ attributeFilter: ['class', 'style', 'aria-expanded', 'aria-hidden', 'aria-selected',
1002
+ 'data-state', 'hidden', 'open', 'data-open', 'data-closed']
1003
+ });
1004
+
1005
+ setTimeout(() => {
1006
+ try { obs.disconnect(); } catch {}
1007
+ resolve({
1008
+ changed: changeCount > 0,
1009
+ changeCount,
1010
+ attributeChanges: attributeChanges.slice(0, 10) // Limit for performance
1011
+ });
1012
+ }, 900);
792
1013
  });
793
1014
  });
794
1015
 
1016
+ // Also track CSS visibility changes via computed styles
1017
+ const beforeVisibility = await page.evaluate(() => {
1018
+ const modals = document.querySelectorAll('[role="dialog"], .modal, .dropdown, .popover, [data-state]');
1019
+ return Array.from(modals).slice(0, 20).map(el => ({
1020
+ visible: getComputedStyle(el).display !== 'none' && getComputedStyle(el).visibility !== 'hidden',
1021
+ state: el.getAttribute('data-state')
1022
+ }));
1023
+ }).catch(() => []);
1024
+
795
1025
  const navPromise = page.waitForNavigation({ timeout: 1200 }).then(() => true).catch(() => false);
796
1026
  const clickRes = await locator.click({ timeout: 1200 }).then(() => ({ ok: true })).catch((e) => ({ ok: false, error: String(e?.message || e) }));
797
1027
 
798
1028
  const navRes = await navPromise;
799
1029
  const domRes = await domPromise;
800
- await page.waitForTimeout(250);
1030
+ await page.waitForTimeout(300); // Slightly longer wait for CSS transitions
801
1031
 
802
1032
  const afterSig = await pageSignature(page);
803
1033
  const afterUrl = page.url();
1034
+
1035
+ // Check CSS visibility changes
1036
+ const afterVisibility = await page.evaluate(() => {
1037
+ const modals = document.querySelectorAll('[role="dialog"], .modal, .dropdown, .popover, [data-state]');
1038
+ return Array.from(modals).slice(0, 20).map(el => ({
1039
+ visible: getComputedStyle(el).display !== 'none' && getComputedStyle(el).visibility !== 'hidden',
1040
+ state: el.getAttribute('data-state')
1041
+ }));
1042
+ }).catch(() => []);
1043
+
1044
+ // Detect visibility state changes (modal open/close, dropdown toggle, etc.)
1045
+ const visibilityChanged = JSON.stringify(beforeVisibility) !== JSON.stringify(afterVisibility);
804
1046
 
805
1047
  return {
806
1048
  clickOk: clickRes.ok,
@@ -808,6 +1050,8 @@ async function clickOutcome(page, locator, opts = {}) {
808
1050
  navHappened: !!navRes,
809
1051
  urlChanged: normalizeUrl(afterUrl) !== normalizeUrl(beforeUrl),
810
1052
  domChanged: !!domRes?.changed || afterSig !== beforeSig,
1053
+ visibilityChanged, // NEW: Tracks CSS visibility/state changes
1054
+ changeCount: domRes?.changeCount || 0, // NEW: Number of mutations detected
811
1055
  reqDelta: Math.max(0, opts.reqCounter.value - beforeReq),
812
1056
  beforeUrl,
813
1057
  afterUrl
@@ -822,16 +1066,55 @@ async function collectLinks(page, baseUrl) {
822
1066
  async function collectInteractives(page) {
823
1067
  return page.evaluate(() => {
824
1068
  const nodes = Array.from(document.querySelectorAll("button, a[href], input[type='submit'], [role='button'], [onclick]"));
825
- return nodes.slice(0, 80).map((el, idx) => ({
826
- idx,
827
- tag: el.tagName.toLowerCase(),
828
- role: el.getAttribute("role") || "",
829
- href: el.tagName === "A" ? el.getAttribute("href") || "" : "",
830
- text: (el.getAttribute("aria-label") || el.innerText || "").trim().slice(0, 80),
831
- id: el.id || "",
832
- disabled: !!(el.disabled || el.getAttribute("aria-disabled") === "true"),
833
- key: `${el.tagName}|${el.id}|${idx}`
834
- }));
1069
+ return nodes.slice(0, 80).map((el, idx) => {
1070
+ const text = (el.getAttribute("aria-label") || el.innerText || "").trim().slice(0, 80).toLowerCase();
1071
+ const classList = Array.from(el.classList).join(' ').toLowerCase();
1072
+ const id = (el.id || "").toLowerCase();
1073
+ const dataTestId = el.getAttribute("data-testid") || "";
1074
+
1075
+ // Detect element context for false positive reduction
1076
+ const isInsideModal = !!el.closest('[role="dialog"], [role="alertdialog"], .modal, .dialog, [data-radix-dialog-content]');
1077
+ const isInsideDropdown = !!el.closest('[role="menu"], [role="listbox"], .dropdown, .popover, [data-radix-menu-content]');
1078
+ const isInsideAccordion = !!el.closest('[role="region"], .accordion, [data-state], [data-radix-accordion-content]');
1079
+ const isInsideTooltip = !!el.closest('[role="tooltip"], .tooltip');
1080
+
1081
+ // Detect button intent for false positive reduction
1082
+ const looksLikeClose = /close|dismiss|cancel|x|×|✕|✖/i.test(text) || /close|dismiss/i.test(classList);
1083
+ const looksLikeToggle = /toggle|expand|collapse|show|hide|menu|hamburger|more/i.test(text) || /toggle|accordion|collaps/i.test(classList);
1084
+ const looksLikeCopy = /copy|clipboard/i.test(text) || /copy/i.test(classList);
1085
+ const looksLikeTheme = /theme|dark|light|mode/i.test(text) || /theme/i.test(classList);
1086
+ const looksLikeTab = el.getAttribute("role") === "tab" || /tab/i.test(classList);
1087
+ const looksLikeSort = /sort|order|filter/i.test(text);
1088
+
1089
+ // Elements that legitimately may not trigger detectable changes
1090
+ const isLikelyFalsePositive = looksLikeClose || looksLikeToggle || looksLikeCopy ||
1091
+ looksLikeTheme || looksLikeTab || looksLikeSort ||
1092
+ isInsideModal || isInsideDropdown || isInsideTooltip;
1093
+
1094
+ return {
1095
+ idx,
1096
+ tag: el.tagName.toLowerCase(),
1097
+ role: el.getAttribute("role") || "",
1098
+ href: el.tagName === "A" ? el.getAttribute("href") || "" : "",
1099
+ text: (el.getAttribute("aria-label") || el.innerText || "").trim().slice(0, 80),
1100
+ id: el.id || "",
1101
+ disabled: !!(el.disabled || el.getAttribute("aria-disabled") === "true"),
1102
+ key: `${el.tagName}|${el.id}|${idx}`,
1103
+ // Context for false positive reduction
1104
+ context: {
1105
+ isInsideModal,
1106
+ isInsideDropdown,
1107
+ isInsideAccordion,
1108
+ isInsideTooltip,
1109
+ looksLikeClose,
1110
+ looksLikeToggle,
1111
+ looksLikeCopy,
1112
+ looksLikeTheme,
1113
+ looksLikeTab,
1114
+ isLikelyFalsePositive
1115
+ }
1116
+ };
1117
+ });
835
1118
  });
836
1119
  }
837
1120
 
@@ -864,20 +1147,92 @@ async function attemptLogin(page, { auth }) {
864
1147
  }
865
1148
  }
866
1149
 
867
- async function runSinglePass({ label, baseUrl, context, shotsDir, danger, maxPages, maxDepth, timeoutMs, root, onProgress }) {
1150
+ async function runSinglePass({ label, baseUrl, context, shotsDir, danger, maxPages, maxDepth, timeoutMs, root, onProgress, retries = 2, stableWait = 500 }) {
868
1151
  const page = await context.newPage();
869
1152
  page.setDefaultTimeout(timeoutMs);
1153
+
1154
+ // Helper for flaky-resistant navigation with retries
1155
+ async function safeGoto(targetUrl, opts = {}) {
1156
+ for (let attempt = 1; attempt <= retries; attempt++) {
1157
+ try {
1158
+ const res = await page.goto(targetUrl, { waitUntil: "domcontentloaded", ...opts });
1159
+ // Wait for stability to reduce flakiness
1160
+ if (stableWait > 0) {
1161
+ await page.waitForTimeout(stableWait);
1162
+ }
1163
+ await page.waitForLoadState("networkidle", { timeout: 6000 }).catch(() => {});
1164
+ return res;
1165
+ } catch (err) {
1166
+ if (attempt === retries) throw err;
1167
+ await page.waitForTimeout(500 * attempt); // Exponential backoff
1168
+ }
1169
+ }
1170
+ return null;
1171
+ }
1172
+
1173
+ // Helper for flaky-resistant clicks with retries
1174
+ async function safeClick(locator, opts = {}) {
1175
+ for (let attempt = 1; attempt <= retries; attempt++) {
1176
+ try {
1177
+ await locator.click({ timeout: timeoutMs / 2, ...opts });
1178
+ if (stableWait > 0) {
1179
+ await page.waitForTimeout(stableWait);
1180
+ }
1181
+ return { success: true };
1182
+ } catch (err) {
1183
+ if (attempt === retries) return { success: false, error: err.message };
1184
+ await page.waitForTimeout(300 * attempt);
1185
+ }
1186
+ }
1187
+ return { success: false, error: 'Max retries exceeded' };
1188
+ }
870
1189
 
871
1190
  const reqCounter = { value: 0 };
872
1191
  const netErrors = [];
873
1192
  const consoleErrors = [];
874
1193
  const findings = [];
875
1194
  const pagesVisited = [];
1195
+ const fakeDataDetections = []; // Track fake data detections
1196
+ const processedUrls = new Set(); // Avoid duplicate detections
876
1197
 
877
1198
  page.on("requestfinished", () => { reqCounter.value += 1; });
878
1199
  page.on("requestfailed", (req) => { netErrors.push({ url: req.url(), failure: req.failure()?.errorText || "unknown" }); });
879
1200
  page.on("console", (msg) => { if (msg.type() === "error") consoleErrors.push({ text: msg.text().slice(0, 500) }); });
880
1201
  page.on("pageerror", (err) => { consoleErrors.push({ text: String(err?.message || err).slice(0, 500) }); });
1202
+
1203
+ // Intercept responses for fake data detection
1204
+ page.on("response", async (response) => {
1205
+ try {
1206
+ const url = response.url();
1207
+ const status = response.status();
1208
+
1209
+ // Skip already processed URLs and static assets
1210
+ if (processedUrls.has(url)) return;
1211
+ if (/\.(js|css|png|jpg|svg|ico|woff|woff2|ttf|gif|webp)(\?|$)/i.test(url)) return;
1212
+
1213
+ // Only check API-like endpoints
1214
+ if (!url.includes('/api/') && !url.includes('/graphql') && !url.includes('/trpc') &&
1215
+ !response.headers()['content-type']?.includes('application/json')) {
1216
+ return;
1217
+ }
1218
+
1219
+ processedUrls.add(url);
1220
+
1221
+ let body = '';
1222
+ try {
1223
+ body = await response.text();
1224
+ } catch {
1225
+ // Some responses can't be read
1226
+ }
1227
+
1228
+ const detections = classifyNetworkTraffic(url, body, status);
1229
+ if (detections && detections.length > 0) {
1230
+ fakeDataDetections.push(...detections);
1231
+ }
1232
+ } catch {
1233
+ // Ignore errors in response processing
1234
+ }
1235
+ });
881
1236
 
882
1237
  const visited = new Set();
883
1238
  const queue = [{ url: baseUrl, depth: 0 }];
@@ -897,8 +1252,7 @@ async function runSinglePass({ label, baseUrl, context, shotsDir, danger, maxPag
897
1252
  onProgress({ page: pagesVisited.length + 1, maxPages, url: targetUrl });
898
1253
  }
899
1254
 
900
- const res = await page.goto(targetUrl, { waitUntil: "domcontentloaded" }).catch(() => null);
901
- await page.waitForLoadState("networkidle", { timeout: 6000 }).catch(() => {});
1255
+ const res = await safeGoto(targetUrl).catch(() => null);
902
1256
 
903
1257
  const status = res ? res.status() : null;
904
1258
  const loginLike = await isLoginPage(page);
@@ -941,6 +1295,10 @@ async function runSinglePass({ label, baseUrl, context, shotsDir, danger, maxPag
941
1295
  const out = await clickOutcome(page, locator, { reqCounter });
942
1296
 
943
1297
  if (!out.clickOk) {
1298
+ // Skip click failures for elements that are likely intentionally not clickable
1299
+ // (e.g., visually hidden close buttons, buttons behind overlays)
1300
+ if (el.context?.isLikelyFalsePositive) continue;
1301
+
944
1302
  const shot = path.join(shotsDir, `${label}_click_fail_${sha1(el.key)}.png`);
945
1303
  await page.screenshot({ path: shot }).catch(() => {});
946
1304
  findings.push({
@@ -955,24 +1313,119 @@ async function runSinglePass({ label, baseUrl, context, shotsDir, danger, maxPag
955
1313
  continue;
956
1314
  }
957
1315
 
958
- if (!out.navHappened && !out.urlChanged && !out.domChanged && out.reqDelta === 0) {
1316
+ // Enhanced Dead UI detection with false positive reduction
1317
+ // An element is considered "dead" if clicking produced NO observable effect:
1318
+ // - No navigation
1319
+ // - No URL change
1320
+ // - No DOM mutations
1321
+ // - No CSS visibility/state changes
1322
+ // - No network requests
1323
+ const noEffect = !out.navHappened && !out.urlChanged && !out.domChanged &&
1324
+ !out.visibilityChanged && out.reqDelta === 0;
1325
+
1326
+ if (noEffect) {
1327
+ // Apply false positive reduction based on element context
1328
+ const ctx = el.context || {};
1329
+
1330
+ // Skip elements that are KNOWN to not produce observable changes
1331
+ // These are legitimate UI patterns that don't need fixes
1332
+ if (ctx.looksLikeClose && ctx.isInsideModal) {
1333
+ // Close button inside a modal - the modal itself may have closed
1334
+ continue;
1335
+ }
1336
+ if (ctx.looksLikeCopy) {
1337
+ // Copy buttons work via clipboard API, no DOM change expected
1338
+ continue;
1339
+ }
1340
+ if (ctx.looksLikeTheme) {
1341
+ // Theme toggles may only change CSS custom properties
1342
+ continue;
1343
+ }
1344
+
1345
+ // Downgrade severity for likely false positives
1346
+ // Instead of BLOCK, use WARN for elements in contexts that commonly have no-op behavior
1347
+ let severity = "BLOCK";
1348
+ let reason = "Click produced no navigation, no network activity, and no DOM change";
1349
+
1350
+ if (ctx.isLikelyFalsePositive) {
1351
+ severity = "WARN";
1352
+ reason = `Click produced no observable change (possible false positive: ${
1353
+ ctx.looksLikeToggle ? 'toggle button' :
1354
+ ctx.looksLikeTab ? 'tab element' :
1355
+ ctx.looksLikeSort ? 'sort control' :
1356
+ ctx.isInsideDropdown ? 'inside dropdown' :
1357
+ ctx.isInsideAccordion ? 'inside accordion' :
1358
+ 'contextual element'
1359
+ })`;
1360
+ }
1361
+
1362
+ // Always skip tooltip-related elements as they are purely visual
1363
+ if (ctx.isInsideTooltip) continue;
1364
+
959
1365
  const shot = path.join(shotsDir, `${label}_dead_${sha1(el.key)}.png`);
960
1366
  await page.screenshot({ path: shot }).catch(() => {});
961
1367
  findings.push({
962
1368
  id: `R_${label}_DEAD_${sha1(el.key).slice(0, 8)}`,
963
- severity: "BLOCK",
1369
+ severity,
964
1370
  category: "DeadUI",
965
1371
  title: `[${label}] Dead UI: ${el.text || el.tag}`,
966
1372
  page: page.url(),
967
- reason: "Click produced no navigation, no network activity, and no DOM change",
968
- screenshot: path.relative(root, shot).replace(/\\/g, "/")
1373
+ reason,
1374
+ screenshot: path.relative(root, shot).replace(/\\/g, "/"),
1375
+ confidence: ctx.isLikelyFalsePositive ? 0.5 : 0.9, // Add confidence score
1376
+ context: ctx // Include context for debugging
969
1377
  });
970
1378
  }
971
1379
  }
972
1380
  }
973
1381
 
974
1382
  await page.close();
975
- return { label, pagesVisited, findings, consoleErrors: consoleErrors.slice(0, 50), networkErrors: netErrors.slice(0, 50) };
1383
+
1384
+ // Convert fake data detections to findings with confidence-based filtering
1385
+ const seenFakeUrls = new Set();
1386
+
1387
+ // Sort by confidence (highest first) to prioritize most reliable detections
1388
+ const sortedDetections = [...fakeDataDetections].sort((a, b) =>
1389
+ (b.confidence || 0.5) - (a.confidence || 0.5)
1390
+ );
1391
+
1392
+ for (const detection of sortedDetections) {
1393
+ // Dedupe by URL + type + pattern to avoid near-duplicates
1394
+ const key = `${detection.url}:${detection.type}:${detection.pattern || ''}`;
1395
+ if (seenFakeUrls.has(key)) continue;
1396
+ seenFakeUrls.add(key);
1397
+
1398
+ // Skip very low confidence detections (likely false positives)
1399
+ const confidence = detection.confidence || 0.5;
1400
+ if (confidence < 0.50) continue;
1401
+
1402
+ // Downgrade severity for medium confidence detections
1403
+ let severity = detection.severity;
1404
+ if (confidence < 0.70 && severity === 'BLOCK') {
1405
+ severity = 'WARN';
1406
+ }
1407
+
1408
+ findings.push({
1409
+ id: `R_${label}_FAKE_${sha1(key).slice(0, 8)}`,
1410
+ severity,
1411
+ category: detection.type === 'fake-domain' ? 'FakeDomain' :
1412
+ detection.type === 'fake-response' ? 'FakeResponse' : 'MockStatus',
1413
+ title: `[${label}] Fake Data: ${detection.evidence}`,
1414
+ page: detection.url,
1415
+ reason: detection.evidence,
1416
+ confidence, // Include confidence score for transparency
1417
+ pattern: detection.pattern // Include pattern for debugging
1418
+ });
1419
+ }
1420
+
1421
+ return {
1422
+ label,
1423
+ pagesVisited,
1424
+ findings,
1425
+ consoleErrors: consoleErrors.slice(0, 50),
1426
+ networkErrors: netErrors.slice(0, 50),
1427
+ fakeDataDetections: fakeDataDetections.slice(0, 100)
1428
+ };
976
1429
  }
977
1430
 
978
1431
  function buildAuthCoverageFindings({ baseUrl, matchers, anonPass, authPass }) {
@@ -1024,6 +1477,95 @@ function coverageFromTruthpack({ truthpack, visitedUrls }) {
1024
1477
  return { total, hit, percent: total ? Math.round((hit / total) * 100) : 0, missed: Array.from(uiPaths).filter(p => !visitedPaths.has(p)).slice(0, 50) };
1025
1478
  }
1026
1479
 
1480
+ // ═══════════════════════════════════════════════════════════════════════════════
1481
+ // FLAKINESS & STABILITY VERIFICATION
1482
+ // ═══════════════════════════════════════════════════════════════════════════════
1483
+
1484
+ /**
1485
+ * Aggregate findings from multiple stability runs
1486
+ * Only returns findings that appear in at least `threshold` of runs
1487
+ * @param {Array<Array<Object>>} runFindings - Array of findings arrays from each run
1488
+ * @param {number} threshold - Minimum occurrence rate (0-1) to include a finding
1489
+ * @returns {Array<Object>} Deduplicated findings with flakiness scores
1490
+ */
1491
+ function aggregateStabilityFindings(runFindings, threshold = 0.66) {
1492
+ const totalRuns = runFindings.length;
1493
+ if (totalRuns === 0) return [];
1494
+ if (totalRuns === 1) return runFindings[0] || [];
1495
+
1496
+ // Group findings by their unique key (category + normalized title/reason)
1497
+ const findingCounts = new Map();
1498
+
1499
+ for (const findings of runFindings) {
1500
+ for (const finding of findings) {
1501
+ // Create a stable key for deduplication
1502
+ const key = `${finding.category}|${finding.title?.replace(/\[ANON\]|\[AUTH\]/g, '').trim()}|${finding.page || ''}`;
1503
+
1504
+ if (!findingCounts.has(key)) {
1505
+ findingCounts.set(key, {
1506
+ finding: { ...finding },
1507
+ count: 0,
1508
+ occurrences: []
1509
+ });
1510
+ }
1511
+
1512
+ const entry = findingCounts.get(key);
1513
+ entry.count++;
1514
+ entry.occurrences.push(finding);
1515
+ }
1516
+ }
1517
+
1518
+ // Filter to findings that meet the threshold and add flakiness score
1519
+ const aggregated = [];
1520
+
1521
+ for (const [key, data] of findingCounts) {
1522
+ const occurrenceRate = data.count / totalRuns;
1523
+
1524
+ if (occurrenceRate >= threshold) {
1525
+ // Calculate flakiness score (1 = always occurs, 0 = never)
1526
+ const flakinessScore = 1 - occurrenceRate;
1527
+
1528
+ // Merge the finding with flakiness metadata
1529
+ const aggregatedFinding = {
1530
+ ...data.finding,
1531
+ stability: {
1532
+ occurrenceRate: Math.round(occurrenceRate * 100) / 100,
1533
+ appearedInRuns: data.count,
1534
+ totalRuns,
1535
+ flakinessScore: Math.round(flakinessScore * 100) / 100,
1536
+ isFlaky: flakinessScore > 0.1, // More than 10% variance = flaky
1537
+ }
1538
+ };
1539
+
1540
+ // If finding appeared in all runs, it's stable
1541
+ // If it appeared in some runs, mark as potentially flaky
1542
+ if (data.count < totalRuns) {
1543
+ aggregatedFinding.reason = `${aggregatedFinding.reason || ''} (appeared ${data.count}/${totalRuns} runs)`.trim();
1544
+ }
1545
+
1546
+ aggregated.push(aggregatedFinding);
1547
+ }
1548
+ }
1549
+
1550
+ return aggregated;
1551
+ }
1552
+
1553
+ /**
1554
+ * Print stability verification results
1555
+ */
1556
+ function printStabilityResults(totalRuns, stableFindings, filteredCount) {
1557
+ if (totalRuns <= 1) return;
1558
+
1559
+ console.log();
1560
+ console.log(` ${colors.info}${ICONS.target}${c.reset} ${c.bold}Stability Verification${c.reset}`);
1561
+ console.log(` ${c.dim}Total runs:${c.reset} ${totalRuns}`);
1562
+ console.log(` ${c.dim}Stable findings:${c.reset} ${stableFindings} ${c.dim}(appeared in majority of runs)${c.reset}`);
1563
+
1564
+ if (filteredCount > 0) {
1565
+ console.log(` ${c.dim}Filtered (flaky):${c.reset} ${colors.success}${filteredCount}${c.reset} ${c.dim}(inconsistent across runs)${c.reset}`);
1566
+ }
1567
+ }
1568
+
1027
1569
  // ═══════════════════════════════════════════════════════════════════════════════
1028
1570
  // MAIN REALITY FUNCTION
1029
1571
  // ═══════════════════════════════════════════════════════════════════════════════
@@ -1055,6 +1597,13 @@ async function runReality(argsOrOpts = {}) {
1055
1597
  verifyAuth: argsOrOpts.includes("--verify-auth"),
1056
1598
  headed: argsOrOpts.includes("--headed"),
1057
1599
  danger: argsOrOpts.includes("--danger"),
1600
+ // Visual artifacts options
1601
+ recordVideo: argsOrOpts.includes("--record-video") || argsOrOpts.includes("--video"),
1602
+ recordTrace: argsOrOpts.includes("--record-trace") || argsOrOpts.includes("--trace"),
1603
+ recordHar: argsOrOpts.includes("--record-har") || argsOrOpts.includes("--har"),
1604
+ // Flakiness reduction options
1605
+ retries: parseInt(getArg(["--retries"]) || "2", 10),
1606
+ stableWait: parseInt(getArg(["--stable-wait"]) || "500", 10),
1058
1607
  maxPages: parseInt(getArg(["--max-pages"]) || "18", 10),
1059
1608
  maxDepth: parseInt(getArg(["--max-depth"]) || "2", 10),
1060
1609
  timeoutMs: parseInt(getArg(["--timeout"]) || "15000", 10),
@@ -1074,7 +1623,16 @@ async function runReality(argsOrOpts = {}) {
1074
1623
  maxPages = 18,
1075
1624
  maxDepth = 2,
1076
1625
  danger = false,
1077
- timeoutMs = 15000
1626
+ timeoutMs = 15000,
1627
+ // Visual artifacts (videos, traces, HAR)
1628
+ recordVideo = false,
1629
+ recordTrace = false,
1630
+ recordHar = false,
1631
+ // Flakiness reduction
1632
+ retries = 2,
1633
+ stableWait = 500,
1634
+ stabilityRuns = 1,
1635
+ flakyThreshold = 0.66
1078
1636
  } = argsOrOpts;
1079
1637
 
1080
1638
  if (!url) {
@@ -1124,6 +1682,9 @@ async function runReality(argsOrOpts = {}) {
1124
1682
  console.log(` ${c.dim}URL:${c.reset} ${colors.accent}${url}${c.reset}`);
1125
1683
  console.log(` ${c.dim}Mode:${c.reset} ${verifyAuth ? `${colors.auth}Two-Pass (Auth)${c.reset}` : `${colors.anon}Single-Pass (Anon)${c.reset}`}`);
1126
1684
  console.log(` ${c.dim}Budget:${c.reset} ${maxPages} pages, depth ${maxDepth}`);
1685
+ if (stabilityRuns > 1) {
1686
+ console.log(` ${c.dim}Stability:${c.reset} ${colors.info}${stabilityRuns} runs${c.reset}, threshold ${Math.round(flakyThreshold * 100)}%`);
1687
+ }
1127
1688
 
1128
1689
  // Tier warning if applicable
1129
1690
  if (tierInfo.tier === 'free' && (originalMaxPages > maxPages || (originalVerifyAuth && !verifyAuth))) {
@@ -1145,7 +1706,13 @@ async function runReality(argsOrOpts = {}) {
1145
1706
  const baseUrl = normalizeUrl(url);
1146
1707
  const outBase = path.join(root, ".vibecheck", "reality", stamp());
1147
1708
  const shotsDir = path.join(outBase, "screenshots");
1709
+ const videosDir = path.join(outBase, "videos");
1710
+ const tracesDir = path.join(outBase, "traces");
1711
+ const harDir = path.join(outBase, "har");
1148
1712
  ensureDir(shotsDir);
1713
+ if (recordVideo) ensureDir(videosDir);
1714
+ if (recordTrace) ensureDir(tracesDir);
1715
+ if (recordHar) ensureDir(harDir);
1149
1716
 
1150
1717
  const tp = loadTruthpack(root, truthpack);
1151
1718
  const matchers = getProtectedMatchersFromTruthpack(tp);
@@ -1161,98 +1728,229 @@ async function runReality(argsOrOpts = {}) {
1161
1728
  stopSpinner('Browser launched', true);
1162
1729
 
1163
1730
  // ═══════════════════════════════════════════════════════════════════════════
1164
- // PASS A: ANONYMOUS
1731
+ // STABILITY RUNS (multiple passes for flakiness detection)
1165
1732
  // ═══════════════════════════════════════════════════════════════════════════
1166
- printPassHeader('anon', baseUrl);
1167
-
1168
- startSpinner('Crawling anonymously...', colors.anon);
1169
- const anonContext = await browser.newContext();
1170
- const anonPass = await runSinglePass({
1171
- label: "ANON",
1172
- baseUrl,
1173
- context: anonContext,
1174
- shotsDir,
1175
- danger,
1176
- maxPages,
1177
- maxDepth,
1178
- timeoutMs,
1179
- root,
1180
- onProgress: ({ page, maxPages: mp, url: currentUrl }) => {
1181
- // Could update spinner here if desired
1182
- }
1183
- });
1184
- await anonContext.close();
1185
- stopSpinner(`Crawled ${anonPass.pagesVisited.length} pages`, true);
1186
1733
 
1187
- printPassResult('anon', anonPass);
1188
-
1189
- // ═══════════════════════════════════════════════════════════════════════════
1190
- // PASS B: AUTHENTICATED (optional)
1191
- // ═══════════════════════════════════════════════════════════════════════════
1192
- let authPass = null;
1193
- let authFindings = [];
1734
+ const allRunFindings = [];
1735
+ let lastAnonPass = null;
1736
+ let lastAuthPass = null;
1737
+ let anonVideoPath = null;
1738
+ let authVideoPath = null;
1739
+ let anonTracePath = null;
1740
+ let authTracePath = null;
1194
1741
  let savedStatePath = null;
1742
+
1743
+ for (let runNum = 1; runNum <= stabilityRuns; runNum++) {
1744
+ const isFirstRun = runNum === 1;
1745
+ const isLastRun = runNum === stabilityRuns;
1746
+
1747
+ if (stabilityRuns > 1) {
1748
+ console.log();
1749
+ console.log(` ${colors.info}${BOX.hHorizontal.repeat(3)}${c.reset} ${c.bold}Stability Run ${runNum}/${stabilityRuns}${c.reset}`);
1750
+ }
1195
1751
 
1196
- if (verifyAuth) {
1197
- printPassHeader('auth', baseUrl);
1752
+ // ═══════════════════════════════════════════════════════════════════════════
1753
+ // PASS A: ANONYMOUS
1754
+ // ═══════════════════════════════════════════════════════════════════════════
1755
+ printPassHeader('anon', baseUrl);
1198
1756
 
1199
- startSpinner('Setting up authenticated session...', colors.auth);
1200
- const ctxOpts = storageState ? { storageState } : {};
1201
- const authContext = await browser.newContext(ctxOpts);
1202
- const authPage = await authContext.newPage();
1203
- await authPage.goto(baseUrl, { waitUntil: "domcontentloaded" }).catch(() => {});
1204
- await authPage.waitForLoadState("networkidle", { timeout: 6000 }).catch(() => {});
1205
-
1206
- if (!storageState && auth) {
1207
- stopSpinner('Attempting login...', true);
1208
- startSpinner('Logging in...', colors.auth);
1209
-
1210
- const loginRes = await attemptLogin(authPage, { auth });
1211
-
1212
- if (loginRes.ok) {
1213
- stopSpinner('Login successful', true);
1214
- if (saveStorageState) {
1215
- const dest = path.isAbsolute(saveStorageState) ? saveStorageState : path.join(root, saveStorageState);
1216
- ensureDir(path.dirname(dest));
1217
- await authContext.storageState({ path: dest }).catch(() => {});
1218
- savedStatePath = dest;
1219
- console.log(` ${colors.success}${ICONS.check}${c.reset} Session saved: ${c.dim}${path.relative(root, dest)}${c.reset}`);
1220
- }
1221
- } else {
1222
- stopSpinner('Login failed - continuing without auth', false);
1223
- }
1224
- } else {
1225
- stopSpinner('Using existing session', true);
1757
+ startSpinner('Crawling anonymously...', colors.anon);
1758
+
1759
+ // Build context options for video/HAR recording (only on last run to save resources)
1760
+ const anonContextOpts = {};
1761
+ if (recordVideo && isLastRun) {
1762
+ anonContextOpts.recordVideo = {
1763
+ dir: videosDir,
1764
+ size: { width: 1280, height: 720 }
1765
+ };
1766
+ }
1767
+ if (recordHar && isLastRun) {
1768
+ anonContextOpts.recordHar = {
1769
+ path: path.join(harDir, 'anon-traffic.har'),
1770
+ mode: 'full'
1771
+ };
1226
1772
  }
1227
1773
 
1228
- await authPage.close();
1229
-
1230
- startSpinner('Crawling with authentication...', colors.auth);
1231
- authPass = await runSinglePass({
1232
- label: "AUTH",
1774
+ const anonContext = await browser.newContext(anonContextOpts);
1775
+
1776
+ // Start trace recording if enabled (only on last run)
1777
+ if (recordTrace && isLastRun) {
1778
+ await anonContext.tracing.start({
1779
+ screenshots: true,
1780
+ snapshots: true,
1781
+ sources: false
1782
+ });
1783
+ }
1784
+
1785
+ const anonPass = await runSinglePass({
1786
+ label: "ANON",
1233
1787
  baseUrl,
1234
- context: authContext,
1235
- shotsDir,
1788
+ context: anonContext,
1789
+ shotsDir: isLastRun ? shotsDir : path.join(outBase, `run${runNum}`, 'screenshots'),
1236
1790
  danger,
1237
1791
  maxPages,
1238
1792
  maxDepth,
1239
1793
  timeoutMs,
1240
- root
1794
+ root,
1795
+ retries,
1796
+ stableWait,
1797
+ onProgress: ({ page, maxPages: mp, url: currentUrl }) => {
1798
+ // Could update spinner here if desired
1799
+ }
1241
1800
  });
1242
- await authContext.close();
1243
- stopSpinner(`Crawled ${authPass.pagesVisited.length} pages`, true);
1244
1801
 
1245
- printPassResult('auth', authPass);
1802
+ // Ensure shot dir exists for intermediate runs
1803
+ if (!isLastRun) {
1804
+ ensureDir(path.join(outBase, `run${runNum}`, 'screenshots'));
1805
+ }
1806
+
1807
+ // Save trace if enabled (only last run)
1808
+ if (recordTrace && isLastRun) {
1809
+ anonTracePath = path.join(tracesDir, 'anon-trace.zip');
1810
+ await anonContext.tracing.stop({ path: anonTracePath });
1811
+ }
1812
+
1813
+ // Get video path before closing context (only last run)
1814
+ if (recordVideo && isLastRun && anonPass.pagesVisited.length > 0) {
1815
+ const pages = anonContext.pages();
1816
+ if (pages.length > 0) {
1817
+ const video = pages[0].video();
1818
+ if (video) {
1819
+ try {
1820
+ anonVideoPath = await video.path();
1821
+ } catch {}
1822
+ }
1823
+ }
1824
+ }
1825
+
1826
+ await anonContext.close();
1827
+ stopSpinner(`Crawled ${anonPass.pagesVisited.length} pages`, true);
1828
+
1829
+ printPassResult('anon', anonPass);
1830
+ lastAnonPass = anonPass;
1246
1831
 
1247
- // Build auth coverage findings
1248
- if (matchers.length) {
1249
- startSpinner('Analyzing auth coverage...', colors.authCoverage);
1250
- authFindings = buildAuthCoverageFindings({ baseUrl, matchers, anonPass, authPass });
1251
- stopSpinner(`Found ${authFindings.length} auth issues`, authFindings.length === 0);
1832
+ // ═══════════════════════════════════════════════════════════════════════════
1833
+ // PASS B: AUTHENTICATED (optional)
1834
+ // ═══════════════════════════════════════════════════════════════════════════
1835
+ let authPass = null;
1836
+ let authFindings = [];
1837
+
1838
+ if (verifyAuth) {
1839
+ printPassHeader('auth', baseUrl);
1840
+
1841
+ startSpinner('Setting up authenticated session...', colors.auth);
1842
+ const ctxOpts = storageState ? { storageState } : {};
1843
+
1844
+ // Add video/HAR recording options (only last run)
1845
+ if (recordVideo && isLastRun) {
1846
+ ctxOpts.recordVideo = {
1847
+ dir: videosDir,
1848
+ size: { width: 1280, height: 720 }
1849
+ };
1850
+ }
1851
+ if (recordHar && isLastRun) {
1852
+ ctxOpts.recordHar = {
1853
+ path: path.join(harDir, 'auth-traffic.har'),
1854
+ mode: 'full'
1855
+ };
1856
+ }
1857
+
1858
+ const authContext = await browser.newContext(ctxOpts);
1859
+
1860
+ // Start trace recording if enabled (only last run)
1861
+ if (recordTrace && isLastRun) {
1862
+ await authContext.tracing.start({
1863
+ screenshots: true,
1864
+ snapshots: true,
1865
+ sources: false
1866
+ });
1867
+ }
1868
+ const authPage = await authContext.newPage();
1869
+ await authPage.goto(baseUrl, { waitUntil: "domcontentloaded" }).catch(() => {});
1870
+ await authPage.waitForLoadState("networkidle", { timeout: 6000 }).catch(() => {});
1871
+
1872
+ if (!storageState && auth && isFirstRun) {
1873
+ stopSpinner('Attempting login...', true);
1874
+ startSpinner('Logging in...', colors.auth);
1875
+
1876
+ const loginRes = await attemptLogin(authPage, { auth });
1877
+
1878
+ if (loginRes.ok) {
1879
+ stopSpinner('Login successful', true);
1880
+ if (saveStorageState) {
1881
+ const dest = path.isAbsolute(saveStorageState) ? saveStorageState : path.join(root, saveStorageState);
1882
+ ensureDir(path.dirname(dest));
1883
+ await authContext.storageState({ path: dest }).catch(() => {});
1884
+ savedStatePath = dest;
1885
+ console.log(` ${colors.success}${ICONS.check}${c.reset} Session saved: ${c.dim}${path.relative(root, dest)}${c.reset}`);
1886
+ }
1887
+ } else {
1888
+ stopSpinner('Login failed - continuing without auth', false);
1889
+ }
1890
+ } else {
1891
+ stopSpinner('Using existing session', true);
1892
+ }
1893
+
1894
+ await authPage.close();
1895
+
1896
+ startSpinner('Crawling with authentication...', colors.auth);
1897
+ authPass = await runSinglePass({
1898
+ label: "AUTH",
1899
+ baseUrl,
1900
+ context: authContext,
1901
+ shotsDir: isLastRun ? shotsDir : path.join(outBase, `run${runNum}`, 'screenshots'),
1902
+ danger,
1903
+ maxPages,
1904
+ maxDepth,
1905
+ timeoutMs,
1906
+ root,
1907
+ retries,
1908
+ stableWait
1909
+ });
1910
+
1911
+ // Save trace if enabled (only last run)
1912
+ if (recordTrace && isLastRun) {
1913
+ authTracePath = path.join(tracesDir, 'auth-trace.zip');
1914
+ await authContext.tracing.stop({ path: authTracePath });
1915
+ }
1916
+
1917
+ // Get video path before closing context (only last run)
1918
+ if (recordVideo && isLastRun && authPass.pagesVisited.length > 0) {
1919
+ const pages = authContext.pages();
1920
+ if (pages.length > 0) {
1921
+ const video = pages[0].video();
1922
+ if (video) {
1923
+ try {
1924
+ authVideoPath = await video.path();
1925
+ } catch {}
1926
+ }
1927
+ }
1928
+ }
1929
+
1930
+ await authContext.close();
1931
+ stopSpinner(`Crawled ${authPass.pagesVisited.length} pages`, true);
1932
+
1933
+ printPassResult('auth', authPass);
1934
+ lastAuthPass = authPass;
1935
+
1936
+ // Build auth coverage findings
1937
+ if (matchers.length) {
1938
+ startSpinner('Analyzing auth coverage...', colors.authCoverage);
1939
+ authFindings = buildAuthCoverageFindings({ baseUrl, matchers, anonPass, authPass });
1940
+ stopSpinner(`Found ${authFindings.length} auth issues`, authFindings.length === 0);
1941
+ }
1252
1942
  }
1943
+
1944
+ // Collect findings from this run
1945
+ const runFindings = [...anonPass.findings, ...(authPass?.findings || []), ...authFindings];
1946
+ allRunFindings.push(runFindings);
1253
1947
  }
1254
1948
 
1255
1949
  await browser.close();
1950
+
1951
+ // Use last pass results for page/coverage data
1952
+ const anonPass = lastAnonPass;
1953
+ const authPass = lastAuthPass;
1256
1954
 
1257
1955
  // ═══════════════════════════════════════════════════════════════════════════
1258
1956
  // ANALYSIS & RESULTS
@@ -1261,10 +1959,49 @@ async function runReality(argsOrOpts = {}) {
1261
1959
  const allVisited = [...anonPass.pagesVisited.map(p => p.url), ...(authPass?.pagesVisited || []).map(p => p.url)];
1262
1960
  const coverage = coverageFromTruthpack({ truthpack: tp, visitedUrls: allVisited });
1263
1961
 
1264
- const findings = [...anonPass.findings, ...(authPass?.findings || []), ...authFindings];
1962
+ // Aggregate findings from stability runs (filters out flaky findings)
1963
+ let findings;
1964
+ let filteredFlakyCount = 0;
1965
+
1966
+ if (stabilityRuns > 1) {
1967
+ // Count total unique findings across all runs before filtering
1968
+ const allFindingsFlat = allRunFindings.flat();
1969
+ const uniqueBeforeFilter = new Set(allFindingsFlat.map(f =>
1970
+ `${f.category}|${f.title?.replace(/\[ANON\]|\[AUTH\]/g, '').trim()}|${f.page || ''}`
1971
+ )).size;
1972
+
1973
+ findings = aggregateStabilityFindings(allRunFindings, flakyThreshold);
1974
+ filteredFlakyCount = uniqueBeforeFilter - findings.length;
1975
+
1976
+ printStabilityResults(stabilityRuns, findings.length, filteredFlakyCount);
1977
+ } else {
1978
+ // Single run - use findings directly
1979
+ findings = allRunFindings[0] || [];
1980
+ }
1981
+
1265
1982
  const blocks = findings.filter(f => f.severity === "BLOCK").length;
1266
1983
  const warns = findings.filter(f => f.severity === "WARN").length;
1267
1984
 
1985
+ // Build artifact manifest
1986
+ const artifacts = {
1987
+ screenshots: shotsDir ? path.relative(root, shotsDir).replace(/\\/g, "/") : null,
1988
+ videos: recordVideo ? {
1989
+ directory: path.relative(root, videosDir).replace(/\\/g, "/"),
1990
+ anon: anonVideoPath ? path.relative(root, anonVideoPath).replace(/\\/g, "/") : null,
1991
+ auth: authVideoPath ? path.relative(root, authVideoPath).replace(/\\/g, "/") : null
1992
+ } : null,
1993
+ traces: recordTrace ? {
1994
+ directory: path.relative(root, tracesDir).replace(/\\/g, "/"),
1995
+ anon: anonTracePath ? path.relative(root, anonTracePath).replace(/\\/g, "/") : null,
1996
+ auth: authTracePath ? path.relative(root, authTracePath).replace(/\\/g, "/") : null
1997
+ } : null,
1998
+ har: recordHar ? {
1999
+ directory: path.relative(root, harDir).replace(/\\/g, "/"),
2000
+ anon: path.join(harDir, 'anon-traffic.har'),
2001
+ auth: path.join(harDir, 'auth-traffic.har')
2002
+ } : null
2003
+ };
2004
+
1268
2005
  // Build report
1269
2006
  const report = {
1270
2007
  meta: {
@@ -1277,8 +2014,16 @@ async function runReality(argsOrOpts = {}) {
1277
2014
  maxDepth,
1278
2015
  truthpackLoaded: !!tp,
1279
2016
  protectedMatcherCount: matchers.length,
1280
- savedStorageState: savedStatePath ? path.relative(root, savedStatePath).replace(/\\/g, "/") : null
2017
+ savedStorageState: savedStatePath ? path.relative(root, savedStatePath).replace(/\\/g, "/") : null,
2018
+ recordVideo,
2019
+ recordTrace,
2020
+ recordHar,
2021
+ // Flakiness/stability metadata
2022
+ stabilityRuns,
2023
+ flakyThreshold,
2024
+ filteredFlakyCount: stabilityRuns > 1 ? filteredFlakyCount : 0
1281
2025
  },
2026
+ artifacts,
1282
2027
  coverage,
1283
2028
  passes: { anon: anonPass, auth: authPass },
1284
2029
  findings,