@vibecheckai/cli 3.1.6 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/README.md +27 -32
  2. package/bin/registry.js +208 -343
  3. package/bin/runners/context/generators/mcp.js +18 -0
  4. package/bin/runners/context/index.js +72 -4
  5. package/bin/runners/context/proof-context.js +293 -1
  6. package/bin/runners/context/security-scanner.js +311 -73
  7. package/bin/runners/lib/analyzers.js +607 -20
  8. package/bin/runners/lib/detectors-v2.js +172 -15
  9. package/bin/runners/lib/entitlements-v2.js +48 -1
  10. package/bin/runners/lib/evidence-pack.js +678 -0
  11. package/bin/runners/lib/html-proof-report.js +913 -0
  12. package/bin/runners/lib/missions/plan.js +231 -41
  13. package/bin/runners/lib/missions/templates.js +125 -0
  14. package/bin/runners/lib/scan-output.js +492 -253
  15. package/bin/runners/lib/ship-output.js +901 -641
  16. package/bin/runners/runCheckpoint.js +44 -3
  17. package/bin/runners/runContext.d.ts +4 -0
  18. package/bin/runners/runContext.js +2 -3
  19. package/bin/runners/runDoctor.js +11 -4
  20. package/bin/runners/runFix.js +51 -341
  21. package/bin/runners/runInit.js +37 -20
  22. package/bin/runners/runPolish.d.ts +4 -0
  23. package/bin/runners/runPolish.js +608 -29
  24. package/bin/runners/runProve.js +210 -25
  25. package/bin/runners/runReality.js +861 -107
  26. package/bin/runners/runScan.js +238 -4
  27. package/bin/runners/runShip.js +19 -3
  28. package/bin/runners/runWatch.js +25 -5
  29. package/bin/vibecheck.js +35 -47
  30. package/mcp-server/consolidated-tools.js +408 -42
  31. package/mcp-server/index.js +152 -15
  32. package/mcp-server/package.json +1 -1
  33. package/mcp-server/proof-tools.js +571 -0
  34. package/mcp-server/tier-auth.js +22 -19
  35. package/mcp-server/tools-v3.js +744 -0
  36. package/mcp-server/truth-firewall-tools.js +190 -4
  37. package/package.json +3 -1
  38. package/bin/runners/runBadge.js +0 -916
  39. package/bin/runners/runContracts.js +0 -105
  40. package/bin/runners/runCtx.js +0 -680
  41. package/bin/runners/runCtxDiff.js +0 -301
  42. package/bin/runners/runCtxGuard.js +0 -176
  43. package/bin/runners/runCtxSync.js +0 -116
  44. package/bin/runners/runExport.js +0 -93
  45. package/bin/runners/runGraph.js +0 -454
  46. package/bin/runners/runInstall.js +0 -273
  47. package/bin/runners/runLabs.js +0 -341
  48. package/bin/runners/runLaunch.js +0 -181
  49. package/bin/runners/runPR.js +0 -255
  50. package/bin/runners/runPermissions.js +0 -310
  51. package/bin/runners/runPreflight.js +0 -580
  52. package/bin/runners/runReplay.js +0 -499
  53. package/bin/runners/runSecurity.js +0 -92
  54. package/bin/runners/runShare.js +0 -212
  55. package/bin/runners/runStatus.js +0 -102
  56. package/bin/runners/runVerify.js +0 -272
@@ -1,5 +1,5 @@
1
1
  /**
2
- * Reality Mode v2 - Two-Pass Auth Verification + Dead UI Crawler
2
+ * Reality Mode v2 - Two-Pass Auth Verification + Dead UI Crawler + Fake Data Detection
3
3
  *
4
4
  * ═══════════════════════════════════════════════════════════════════════════════
5
5
  * ENTERPRISE EDITION - World-Class Terminal Experience
@@ -8,7 +8,7 @@
8
8
  * TIER ENFORCEMENT:
9
9
  * - FREE: Preview mode (5 pages, 20 clicks, no auth boundary)
10
10
  * - STARTER: Full budgets + basic auth verification
11
- * - PRO: Advanced auth boundary (multi-role, 2-pass)
11
+ * - PRO: Advanced auth boundary (multi-role, 2-pass) + fake data detection
12
12
  *
13
13
  * Pass A (anon): crawl + click, record which routes look protected
14
14
  * Pass B (auth): crawl same routes using storageState, verify protected routes accessible
@@ -17,6 +17,9 @@
17
17
  * - Dead UI (clicks that do nothing)
18
18
  * - HTTP errors (4xx/5xx)
19
19
  * - Auth coverage (protected route reachable anonymously = BLOCK)
20
+ * - Fake domain detection (localhost, jsonplaceholder, ngrok, mockapi.io)
21
+ * - Fake response detection (demo IDs, test keys, placeholder data)
22
+ * - Mock status codes (418, 999, etc.)
20
23
  * - Route coverage stats
21
24
  */
22
25
 
@@ -25,6 +28,7 @@
25
28
  const fs = require("fs");
26
29
  const path = require("path");
27
30
  const crypto = require("crypto");
31
+ const { parseGlobalFlags, shouldShowBanner } = require("./lib/global-flags");
28
32
 
29
33
  // Entitlements enforcement
30
34
  const entitlements = require("./lib/entitlements-v2");
@@ -151,10 +155,177 @@ ${rgb(255, 80, 40)} ╚████╔╝ ██║██████╔╝█
151
155
  ${rgb(255, 60, 20)} ╚═══╝ ╚═╝╚═════╝ ╚══════╝ ╚═════╝╚═╝ ╚═╝╚══════╝ ╚═════╝╚═╝ ╚═╝${c.reset}
152
156
 
153
157
  ${c.dim} ┌─────────────────────────────────────────────────────────────────────┐${c.reset}
154
- ${c.dim} │${c.reset} ${rgb(255, 150, 100)}🎭${c.reset} ${c.bold}REALITY${c.reset} ${c.dim}•${c.reset} ${rgb(200, 200, 200)}Runtime UI Proof${c.reset} ${c.dim}•${c.reset} ${rgb(150, 150, 150)}Dead UI Detection${c.reset} ${c.dim}│${c.reset}
158
+ ${c.dim} │${c.reset} ${rgb(255, 150, 100)}🎭${c.reset} ${c.bold}REALITY${c.reset} ${c.dim}•${c.reset} ${rgb(200, 200, 200)}Dead UI${c.reset} ${c.dim}•${c.reset} ${rgb(150, 150, 150)}Fake Data${c.reset} ${c.dim}•${c.reset} ${rgb(100, 200, 255)}Auth Coverage${c.reset} ${c.dim}│${c.reset}
155
159
  ${c.dim} └─────────────────────────────────────────────────────────────────────┘${c.reset}
156
160
  `;
157
161
 
162
+ // ═══════════════════════════════════════════════════════════════════════════════
163
+ // FAKE DATA DETECTION PATTERNS (from reality-mode/reality-scanner.ts)
164
+ // ═══════════════════════════════════════════════════════════════════════════════
165
+
166
+ // ═══════════════════════════════════════════════════════════════════════════════
167
+ // FAKE DETECTION PATTERNS WITH CONFIDENCE SCORING
168
+ // Each pattern has a confidence level to reduce false positives
169
+ // ═══════════════════════════════════════════════════════════════════════════════
170
+
171
+ const FAKE_DOMAIN_PATTERNS = [
172
+ // CRITICAL: These are almost certainly fake backends (confidence: 0.95+)
173
+ { pattern: /jsonplaceholder\.typicode\.com/i, name: "JSONPlaceholder mock API", confidence: 0.99, severity: 'BLOCK' },
174
+ { pattern: /reqres\.in/i, name: "ReqRes mock API", confidence: 0.99, severity: 'BLOCK' },
175
+ { pattern: /mockapi\.io/i, name: "MockAPI.io", confidence: 0.99, severity: 'BLOCK' },
176
+ { pattern: /mocky\.io/i, name: "Mocky.io", confidence: 0.99, severity: 'BLOCK' },
177
+ { pattern: /httpbin\.org/i, name: "HTTPBin testing API", confidence: 0.95, severity: 'BLOCK' },
178
+ { pattern: /api\.example\.com/i, name: "Example.com API", confidence: 0.95, severity: 'BLOCK' },
179
+ { pattern: /fake\.api/i, name: "Fake API pattern", confidence: 0.95, severity: 'BLOCK' },
180
+ { pattern: /demo\.api/i, name: "Demo API pattern", confidence: 0.90, severity: 'BLOCK' },
181
+
182
+ // HIGH: Likely development/testing (confidence: 0.7-0.9)
183
+ // NOTE: These could be legitimate in dev/CI contexts
184
+ { pattern: /localhost:\d+/i, name: "Localhost API", confidence: 0.75, severity: 'WARN', devContextOk: true },
185
+ { pattern: /127\.0\.0\.1:\d+/i, name: "Loopback API", confidence: 0.75, severity: 'WARN', devContextOk: true },
186
+ { pattern: /\.ngrok\.io/i, name: "Ngrok tunnel", confidence: 0.80, severity: 'WARN', devContextOk: true },
187
+ { pattern: /\.ngrok-free\.app/i, name: "Ngrok free tunnel", confidence: 0.80, severity: 'WARN', devContextOk: true },
188
+
189
+ // MEDIUM: Could be legitimate staging (confidence: 0.5-0.7)
190
+ // NOTE: Many organizations have legitimate staging environments
191
+ { pattern: /staging\.[^/]+\/api/i, name: "Staging API endpoint", confidence: 0.60, severity: 'WARN', stagingContextOk: true },
192
+ { pattern: /\.local\//i, name: "Local domain", confidence: 0.50, severity: 'WARN', devContextOk: true },
193
+ { pattern: /\.test\//i, name: "Test domain", confidence: 0.50, severity: 'WARN', devContextOk: true },
194
+ ];
195
+
196
+ const FAKE_RESPONSE_PATTERNS = [
197
+ // CRITICAL: Test API keys exposed (security issue)
198
+ { pattern: /sk_test_[a-zA-Z0-9]{20,}/i, name: "Test Stripe secret key", confidence: 0.99, severity: 'BLOCK' },
199
+ { pattern: /pk_test_[a-zA-Z0-9]{20,}/i, name: "Test Stripe public key", confidence: 0.95, severity: 'WARN' },
200
+
201
+ // HIGH: Clearly fake IDs/data
202
+ { pattern: /inv_demo_[a-zA-Z0-9]+/i, name: "Demo invoice ID", confidence: 0.95, severity: 'BLOCK' },
203
+ { pattern: /user_demo_[a-zA-Z0-9]+/i, name: "Demo user ID", confidence: 0.95, severity: 'BLOCK' },
204
+ { pattern: /cus_demo_[a-zA-Z0-9]+/i, name: "Demo customer ID", confidence: 0.95, severity: 'BLOCK' },
205
+ { pattern: /sub_demo_[a-zA-Z0-9]+/i, name: "Demo subscription ID", confidence: 0.95, severity: 'BLOCK' },
206
+ { pattern: /"mock":\s*true/i, name: "Mock flag enabled", confidence: 0.95, severity: 'BLOCK' },
207
+ { pattern: /"isDemo":\s*true/i, name: "Demo mode flag", confidence: 0.95, severity: 'BLOCK' },
208
+ { pattern: /"status":\s*"simulated"/i, name: "Simulated status", confidence: 0.90, severity: 'BLOCK' },
209
+
210
+ // MEDIUM: Placeholder content (could be legitimate in docs/examples)
211
+ // NOTE: Need context awareness - these are fine in documentation/help pages
212
+ { pattern: /lorem\s+ipsum\s+dolor/i, name: "Lorem ipsum placeholder", confidence: 0.70, severity: 'WARN', docsContextOk: true },
213
+ { pattern: /john\.doe@/i, name: "John Doe placeholder email", confidence: 0.65, severity: 'WARN', docsContextOk: true },
214
+ { pattern: /jane\.doe@/i, name: "Jane Doe placeholder email", confidence: 0.65, severity: 'WARN', docsContextOk: true },
215
+ { pattern: /user@example\.com/i, name: "Example.com email", confidence: 0.50, severity: 'WARN', docsContextOk: true },
216
+ { pattern: /placeholder\.(com|jpg|png)/i, name: "Placeholder domain/image", confidence: 0.60, severity: 'WARN', docsContextOk: true },
217
+
218
+ // LOWER: Could have many false positives
219
+ { pattern: /"id":\s*"demo"/i, name: "Demo ID value", confidence: 0.70, severity: 'WARN' },
220
+ { pattern: /"id":\s*"test"/i, name: "Test ID value", confidence: 0.60, severity: 'WARN' },
221
+ { pattern: /"success":\s*true[^}]*"demo"/i, name: "Demo success response", confidence: 0.75, severity: 'WARN' },
222
+ ];
223
+
224
+ // URLs that are allowed and should skip detection
225
+ const FAKE_DETECTION_ALLOWLIST = [
226
+ /\/docs?\//i, // Documentation pages
227
+ /\/help\//i, // Help pages
228
+ /\/examples?\//i, // Example pages
229
+ /\/demo\//i, // Demo pages (intentional)
230
+ /\/playground\//i, // Playground/sandbox
231
+ /\/api-docs?\//i, // API documentation
232
+ /\/swagger/i, // Swagger docs
233
+ /\/openapi/i, // OpenAPI docs
234
+ /readme/i, // README content
235
+ /changelog/i, // Changelog
236
+ ];
237
+
238
+ /**
239
+ * Classify a network request/response for fake data patterns
240
+ * Returns null if clean, or an object with detection details
241
+ *
242
+ * Enhanced with:
243
+ * - Confidence scoring to reduce false positives
244
+ * - Context awareness (dev, staging, docs)
245
+ * - Allowlist for legitimate use cases
246
+ */
247
+ function classifyNetworkTraffic(url, responseBody, status, context = {}) {
248
+ // Skip static assets (images, fonts, stylesheets, scripts)
249
+ if (/\.(js|css|png|jpg|jpeg|svg|ico|woff|woff2|ttf|eot|gif|webp|mp4|webm|pdf)(\?|$)/i.test(url)) {
250
+ return null;
251
+ }
252
+
253
+ // Check allowlist - skip detection for documentation/example URLs
254
+ for (const allowPattern of FAKE_DETECTION_ALLOWLIST) {
255
+ if (allowPattern.test(url)) {
256
+ return null;
257
+ }
258
+ }
259
+
260
+ const detections = [];
261
+ const isDev = context.isDev || process.env.NODE_ENV === 'development';
262
+ const isStaging = context.isStaging || /staging|stg|preprod/i.test(url);
263
+ const isDocsPage = context.isDocsPage || /docs?|help|example|readme/i.test(url);
264
+
265
+ // Check for fake domain patterns
266
+ for (const { pattern, name, confidence, severity, devContextOk, stagingContextOk } of FAKE_DOMAIN_PATTERNS) {
267
+ if (pattern.test(url)) {
268
+ // Skip if this pattern is OK in current context
269
+ if (devContextOk && isDev) continue;
270
+ if (stagingContextOk && isStaging) continue;
271
+
272
+ detections.push({
273
+ type: 'fake-domain',
274
+ severity,
275
+ evidence: `URL matches fake domain pattern: ${name}`,
276
+ url,
277
+ confidence,
278
+ pattern: pattern.source
279
+ });
280
+ break; // One domain match is enough
281
+ }
282
+ }
283
+
284
+ // Check response body for fake data patterns
285
+ if (responseBody && typeof responseBody === 'string') {
286
+ // Skip very short responses (likely not meaningful data)
287
+ if (responseBody.length < 20) {
288
+ return detections.length > 0 ? detections : null;
289
+ }
290
+
291
+ for (const { pattern, name, confidence, severity, docsContextOk } of FAKE_RESPONSE_PATTERNS) {
292
+ // Skip patterns that are OK in docs context
293
+ if (docsContextOk && isDocsPage) continue;
294
+
295
+ if (pattern.test(responseBody)) {
296
+ detections.push({
297
+ type: 'fake-response',
298
+ severity,
299
+ evidence: `Response contains ${name}`,
300
+ url,
301
+ confidence,
302
+ pattern: pattern.source
303
+ });
304
+ }
305
+ }
306
+ }
307
+
308
+ // Check for suspicious status codes (with lower confidence)
309
+ if (status === 418 || status === 999 || status === 0) {
310
+ detections.push({
311
+ type: 'mock-status',
312
+ severity: 'WARN',
313
+ evidence: `Suspicious HTTP status code: ${status}`,
314
+ url,
315
+ confidence: 0.60 // Lower confidence - could be legitimate
316
+ });
317
+ }
318
+
319
+ // Filter out low-confidence detections if we have high-confidence ones
320
+ const highConfidence = detections.filter(d => d.confidence >= 0.80);
321
+ if (highConfidence.length > 0 && detections.length > highConfidence.length) {
322
+ // Return only high-confidence detections to reduce noise
323
+ return highConfidence;
324
+ }
325
+
326
+ return detections.length > 0 ? detections : null;
327
+ }
328
+
158
329
  // ═══════════════════════════════════════════════════════════════════════════════
159
330
  // ICONS & SYMBOLS
160
331
  // ═══════════════════════════════════════════════════════════════════════════════
@@ -419,6 +590,10 @@ function getCategoryIcon(category) {
419
590
  'DeadUI': ICONS.deadUI,
420
591
  'AuthCoverage': ICONS.shield,
421
592
  'HTTPError': ICONS.http,
593
+ // Fake data detection categories
594
+ 'FakeDomain': '🔗',
595
+ 'FakeResponse': '🎭',
596
+ 'MockStatus': '📡',
422
597
  };
423
598
  return icons[category] || ICONS.bullet;
424
599
  }
@@ -428,6 +603,10 @@ function getCategoryColor(category) {
428
603
  'DeadUI': colors.deadUI,
429
604
  'AuthCoverage': colors.authCoverage,
430
605
  'HTTPError': colors.httpError,
606
+ // Fake data detection categories - all critical (red/orange)
607
+ 'FakeDomain': rgb(255, 80, 80), // Red - critical
608
+ 'FakeResponse': rgb(255, 100, 60), // Orange-red
609
+ 'MockStatus': rgb(255, 150, 50), // Amber
431
610
  };
432
611
  return categoryColors[category] || colors.accent;
433
612
  }
@@ -612,8 +791,10 @@ function printTierWarning(tier, limits, originalMaxPages, appliedMaxPages, verif
612
791
  // HELP DISPLAY
613
792
  // ═══════════════════════════════════════════════════════════════════════════════
614
793
 
615
- function printHelp() {
616
- console.log(BANNER_FULL);
794
+ function printHelp(opts = {}) {
795
+ if (shouldShowBanner(opts)) {
796
+ console.log(BANNER_FULL);
797
+ }
617
798
  console.log(`
618
799
  ${c.bold}Usage:${c.reset} vibecheck reality --url <url> [options]
619
800
 
@@ -642,6 +823,17 @@ function printHelp() {
642
823
  ${colors.accent}--timeout <ms>${c.reset} Page timeout ${c.dim}(default: 15000)${c.reset}
643
824
  ${colors.accent}--help, -h${c.reset} Show this help
644
825
 
826
+ ${c.bold}Visual Artifacts:${c.reset}
827
+ ${colors.accent}--video, --record-video${c.reset} Record video of browser sessions
828
+ ${colors.accent}--trace, --record-trace${c.reset} Record Playwright trace (viewable in trace.playwright.dev)
829
+ ${colors.accent}--har, --record-har${c.reset} Record HAR network traffic
830
+
831
+ ${c.bold}Flakiness Reduction:${c.reset}
832
+ ${colors.accent}--retries <n>${c.reset} Retry failed nav/clicks ${c.dim}(default: 2)${c.reset}
833
+ ${colors.accent}--stable-wait <ms>${c.reset} Wait after actions ${c.dim}(default: 500ms)${c.reset}
834
+ ${colors.accent}--stability-runs <n>${c.reset} Run N times for stability check ${c.dim}(default: 1)${c.reset}
835
+ ${colors.accent}--flaky-threshold <f>${c.reset} Min occurrence rate to report ${c.dim}(default: 0.66)${c.reset}
836
+
645
837
  ${c.bold}Tier Limits:${c.reset}
646
838
  ${c.dim}FREE${c.reset} 5 pages, no auth boundary
647
839
  ${c.dim}STARTER${c.reset} Full budgets + basic auth
@@ -781,23 +973,76 @@ async function clickOutcome(page, locator, opts = {}) {
781
973
  const beforeUrl = page.url();
782
974
  const beforeReq = opts.reqCounter.value;
783
975
 
976
+ // Enhanced mutation observer that detects more changes including:
977
+ // - DOM structure changes (childList, subtree)
978
+ // - Attribute changes (class, style, aria-*, data-*)
979
+ // - CSS visibility/display changes
784
980
  const domPromise = page.evaluate(() => {
785
981
  return new Promise((resolve) => {
786
- const obs = new MutationObserver(() => { obs.disconnect(); resolve({ changed: true }); });
787
- obs.observe(document.documentElement, { childList: true, subtree: true, attributes: true });
788
- setTimeout(() => { try { obs.disconnect(); } catch {} resolve({ changed: false }); }, 900);
982
+ let changeCount = 0;
983
+ let attributeChanges = [];
984
+
985
+ const obs = new MutationObserver((mutations) => {
986
+ for (const mutation of mutations) {
987
+ changeCount++;
988
+ if (mutation.type === 'attributes') {
989
+ attributeChanges.push({
990
+ attr: mutation.attributeName,
991
+ target: mutation.target.tagName
992
+ });
993
+ }
994
+ }
995
+ });
996
+
997
+ obs.observe(document.documentElement, {
998
+ childList: true,
999
+ subtree: true,
1000
+ attributes: true,
1001
+ attributeFilter: ['class', 'style', 'aria-expanded', 'aria-hidden', 'aria-selected',
1002
+ 'data-state', 'hidden', 'open', 'data-open', 'data-closed']
1003
+ });
1004
+
1005
+ setTimeout(() => {
1006
+ try { obs.disconnect(); } catch {}
1007
+ resolve({
1008
+ changed: changeCount > 0,
1009
+ changeCount,
1010
+ attributeChanges: attributeChanges.slice(0, 10) // Limit for performance
1011
+ });
1012
+ }, 900);
789
1013
  });
790
1014
  });
791
1015
 
1016
+ // Also track CSS visibility changes via computed styles
1017
+ const beforeVisibility = await page.evaluate(() => {
1018
+ const modals = document.querySelectorAll('[role="dialog"], .modal, .dropdown, .popover, [data-state]');
1019
+ return Array.from(modals).slice(0, 20).map(el => ({
1020
+ visible: getComputedStyle(el).display !== 'none' && getComputedStyle(el).visibility !== 'hidden',
1021
+ state: el.getAttribute('data-state')
1022
+ }));
1023
+ }).catch(() => []);
1024
+
792
1025
  const navPromise = page.waitForNavigation({ timeout: 1200 }).then(() => true).catch(() => false);
793
1026
  const clickRes = await locator.click({ timeout: 1200 }).then(() => ({ ok: true })).catch((e) => ({ ok: false, error: String(e?.message || e) }));
794
1027
 
795
1028
  const navRes = await navPromise;
796
1029
  const domRes = await domPromise;
797
- await page.waitForTimeout(250);
1030
+ await page.waitForTimeout(300); // Slightly longer wait for CSS transitions
798
1031
 
799
1032
  const afterSig = await pageSignature(page);
800
1033
  const afterUrl = page.url();
1034
+
1035
+ // Check CSS visibility changes
1036
+ const afterVisibility = await page.evaluate(() => {
1037
+ const modals = document.querySelectorAll('[role="dialog"], .modal, .dropdown, .popover, [data-state]');
1038
+ return Array.from(modals).slice(0, 20).map(el => ({
1039
+ visible: getComputedStyle(el).display !== 'none' && getComputedStyle(el).visibility !== 'hidden',
1040
+ state: el.getAttribute('data-state')
1041
+ }));
1042
+ }).catch(() => []);
1043
+
1044
+ // Detect visibility state changes (modal open/close, dropdown toggle, etc.)
1045
+ const visibilityChanged = JSON.stringify(beforeVisibility) !== JSON.stringify(afterVisibility);
801
1046
 
802
1047
  return {
803
1048
  clickOk: clickRes.ok,
@@ -805,6 +1050,8 @@ async function clickOutcome(page, locator, opts = {}) {
805
1050
  navHappened: !!navRes,
806
1051
  urlChanged: normalizeUrl(afterUrl) !== normalizeUrl(beforeUrl),
807
1052
  domChanged: !!domRes?.changed || afterSig !== beforeSig,
1053
+ visibilityChanged, // NEW: Tracks CSS visibility/state changes
1054
+ changeCount: domRes?.changeCount || 0, // NEW: Number of mutations detected
808
1055
  reqDelta: Math.max(0, opts.reqCounter.value - beforeReq),
809
1056
  beforeUrl,
810
1057
  afterUrl
@@ -819,16 +1066,55 @@ async function collectLinks(page, baseUrl) {
819
1066
  async function collectInteractives(page) {
820
1067
  return page.evaluate(() => {
821
1068
  const nodes = Array.from(document.querySelectorAll("button, a[href], input[type='submit'], [role='button'], [onclick]"));
822
- return nodes.slice(0, 80).map((el, idx) => ({
823
- idx,
824
- tag: el.tagName.toLowerCase(),
825
- role: el.getAttribute("role") || "",
826
- href: el.tagName === "A" ? el.getAttribute("href") || "" : "",
827
- text: (el.getAttribute("aria-label") || el.innerText || "").trim().slice(0, 80),
828
- id: el.id || "",
829
- disabled: !!(el.disabled || el.getAttribute("aria-disabled") === "true"),
830
- key: `${el.tagName}|${el.id}|${idx}`
831
- }));
1069
+ return nodes.slice(0, 80).map((el, idx) => {
1070
+ const text = (el.getAttribute("aria-label") || el.innerText || "").trim().slice(0, 80).toLowerCase();
1071
+ const classList = Array.from(el.classList).join(' ').toLowerCase();
1072
+ const id = (el.id || "").toLowerCase();
1073
+ const dataTestId = el.getAttribute("data-testid") || "";
1074
+
1075
+ // Detect element context for false positive reduction
1076
+ const isInsideModal = !!el.closest('[role="dialog"], [role="alertdialog"], .modal, .dialog, [data-radix-dialog-content]');
1077
+ const isInsideDropdown = !!el.closest('[role="menu"], [role="listbox"], .dropdown, .popover, [data-radix-menu-content]');
1078
+ const isInsideAccordion = !!el.closest('[role="region"], .accordion, [data-state], [data-radix-accordion-content]');
1079
+ const isInsideTooltip = !!el.closest('[role="tooltip"], .tooltip');
1080
+
1081
+ // Detect button intent for false positive reduction
1082
+ const looksLikeClose = /close|dismiss|cancel|x|×|✕|✖/i.test(text) || /close|dismiss/i.test(classList);
1083
+ const looksLikeToggle = /toggle|expand|collapse|show|hide|menu|hamburger|more/i.test(text) || /toggle|accordion|collaps/i.test(classList);
1084
+ const looksLikeCopy = /copy|clipboard/i.test(text) || /copy/i.test(classList);
1085
+ const looksLikeTheme = /theme|dark|light|mode/i.test(text) || /theme/i.test(classList);
1086
+ const looksLikeTab = el.getAttribute("role") === "tab" || /tab/i.test(classList);
1087
+ const looksLikeSort = /sort|order|filter/i.test(text);
1088
+
1089
+ // Elements that legitimately may not trigger detectable changes
1090
+ const isLikelyFalsePositive = looksLikeClose || looksLikeToggle || looksLikeCopy ||
1091
+ looksLikeTheme || looksLikeTab || looksLikeSort ||
1092
+ isInsideModal || isInsideDropdown || isInsideTooltip;
1093
+
1094
+ return {
1095
+ idx,
1096
+ tag: el.tagName.toLowerCase(),
1097
+ role: el.getAttribute("role") || "",
1098
+ href: el.tagName === "A" ? el.getAttribute("href") || "" : "",
1099
+ text: (el.getAttribute("aria-label") || el.innerText || "").trim().slice(0, 80),
1100
+ id: el.id || "",
1101
+ disabled: !!(el.disabled || el.getAttribute("aria-disabled") === "true"),
1102
+ key: `${el.tagName}|${el.id}|${idx}`,
1103
+ // Context for false positive reduction
1104
+ context: {
1105
+ isInsideModal,
1106
+ isInsideDropdown,
1107
+ isInsideAccordion,
1108
+ isInsideTooltip,
1109
+ looksLikeClose,
1110
+ looksLikeToggle,
1111
+ looksLikeCopy,
1112
+ looksLikeTheme,
1113
+ looksLikeTab,
1114
+ isLikelyFalsePositive
1115
+ }
1116
+ };
1117
+ });
832
1118
  });
833
1119
  }
834
1120
 
@@ -861,20 +1147,92 @@ async function attemptLogin(page, { auth }) {
861
1147
  }
862
1148
  }
863
1149
 
864
- async function runSinglePass({ label, baseUrl, context, shotsDir, danger, maxPages, maxDepth, timeoutMs, root, onProgress }) {
1150
+ async function runSinglePass({ label, baseUrl, context, shotsDir, danger, maxPages, maxDepth, timeoutMs, root, onProgress, retries = 2, stableWait = 500 }) {
865
1151
  const page = await context.newPage();
866
1152
  page.setDefaultTimeout(timeoutMs);
1153
+
1154
+ // Helper for flaky-resistant navigation with retries
1155
+ async function safeGoto(targetUrl, opts = {}) {
1156
+ for (let attempt = 1; attempt <= retries; attempt++) {
1157
+ try {
1158
+ const res = await page.goto(targetUrl, { waitUntil: "domcontentloaded", ...opts });
1159
+ // Wait for stability to reduce flakiness
1160
+ if (stableWait > 0) {
1161
+ await page.waitForTimeout(stableWait);
1162
+ }
1163
+ await page.waitForLoadState("networkidle", { timeout: 6000 }).catch(() => {});
1164
+ return res;
1165
+ } catch (err) {
1166
+ if (attempt === retries) throw err;
1167
+ await page.waitForTimeout(500 * attempt); // Exponential backoff
1168
+ }
1169
+ }
1170
+ return null;
1171
+ }
1172
+
1173
+ // Helper for flaky-resistant clicks with retries
1174
+ async function safeClick(locator, opts = {}) {
1175
+ for (let attempt = 1; attempt <= retries; attempt++) {
1176
+ try {
1177
+ await locator.click({ timeout: timeoutMs / 2, ...opts });
1178
+ if (stableWait > 0) {
1179
+ await page.waitForTimeout(stableWait);
1180
+ }
1181
+ return { success: true };
1182
+ } catch (err) {
1183
+ if (attempt === retries) return { success: false, error: err.message };
1184
+ await page.waitForTimeout(300 * attempt);
1185
+ }
1186
+ }
1187
+ return { success: false, error: 'Max retries exceeded' };
1188
+ }
867
1189
 
868
1190
  const reqCounter = { value: 0 };
869
1191
  const netErrors = [];
870
1192
  const consoleErrors = [];
871
1193
  const findings = [];
872
1194
  const pagesVisited = [];
1195
+ const fakeDataDetections = []; // Track fake data detections
1196
+ const processedUrls = new Set(); // Avoid duplicate detections
873
1197
 
874
1198
  page.on("requestfinished", () => { reqCounter.value += 1; });
875
1199
  page.on("requestfailed", (req) => { netErrors.push({ url: req.url(), failure: req.failure()?.errorText || "unknown" }); });
876
1200
  page.on("console", (msg) => { if (msg.type() === "error") consoleErrors.push({ text: msg.text().slice(0, 500) }); });
877
1201
  page.on("pageerror", (err) => { consoleErrors.push({ text: String(err?.message || err).slice(0, 500) }); });
1202
+
1203
+ // Intercept responses for fake data detection
1204
+ page.on("response", async (response) => {
1205
+ try {
1206
+ const url = response.url();
1207
+ const status = response.status();
1208
+
1209
+ // Skip already processed URLs and static assets
1210
+ if (processedUrls.has(url)) return;
1211
+ if (/\.(js|css|png|jpg|svg|ico|woff|woff2|ttf|gif|webp)(\?|$)/i.test(url)) return;
1212
+
1213
+ // Only check API-like endpoints
1214
+ if (!url.includes('/api/') && !url.includes('/graphql') && !url.includes('/trpc') &&
1215
+ !response.headers()['content-type']?.includes('application/json')) {
1216
+ return;
1217
+ }
1218
+
1219
+ processedUrls.add(url);
1220
+
1221
+ let body = '';
1222
+ try {
1223
+ body = await response.text();
1224
+ } catch {
1225
+ // Some responses can't be read
1226
+ }
1227
+
1228
+ const detections = classifyNetworkTraffic(url, body, status);
1229
+ if (detections && detections.length > 0) {
1230
+ fakeDataDetections.push(...detections);
1231
+ }
1232
+ } catch {
1233
+ // Ignore errors in response processing
1234
+ }
1235
+ });
878
1236
 
879
1237
  const visited = new Set();
880
1238
  const queue = [{ url: baseUrl, depth: 0 }];
@@ -894,8 +1252,7 @@ async function runSinglePass({ label, baseUrl, context, shotsDir, danger, maxPag
894
1252
  onProgress({ page: pagesVisited.length + 1, maxPages, url: targetUrl });
895
1253
  }
896
1254
 
897
- const res = await page.goto(targetUrl, { waitUntil: "domcontentloaded" }).catch(() => null);
898
- await page.waitForLoadState("networkidle", { timeout: 6000 }).catch(() => {});
1255
+ const res = await safeGoto(targetUrl).catch(() => null);
899
1256
 
900
1257
  const status = res ? res.status() : null;
901
1258
  const loginLike = await isLoginPage(page);
@@ -938,6 +1295,10 @@ async function runSinglePass({ label, baseUrl, context, shotsDir, danger, maxPag
938
1295
  const out = await clickOutcome(page, locator, { reqCounter });
939
1296
 
940
1297
  if (!out.clickOk) {
1298
+ // Skip click failures for elements that are likely intentionally not clickable
1299
+ // (e.g., visually hidden close buttons, buttons behind overlays)
1300
+ if (el.context?.isLikelyFalsePositive) continue;
1301
+
941
1302
  const shot = path.join(shotsDir, `${label}_click_fail_${sha1(el.key)}.png`);
942
1303
  await page.screenshot({ path: shot }).catch(() => {});
943
1304
  findings.push({
@@ -952,24 +1313,119 @@ async function runSinglePass({ label, baseUrl, context, shotsDir, danger, maxPag
952
1313
  continue;
953
1314
  }
954
1315
 
955
- if (!out.navHappened && !out.urlChanged && !out.domChanged && out.reqDelta === 0) {
1316
+ // Enhanced Dead UI detection with false positive reduction
1317
+ // An element is considered "dead" if clicking produced NO observable effect:
1318
+ // - No navigation
1319
+ // - No URL change
1320
+ // - No DOM mutations
1321
+ // - No CSS visibility/state changes
1322
+ // - No network requests
1323
+ const noEffect = !out.navHappened && !out.urlChanged && !out.domChanged &&
1324
+ !out.visibilityChanged && out.reqDelta === 0;
1325
+
1326
+ if (noEffect) {
1327
+ // Apply false positive reduction based on element context
1328
+ const ctx = el.context || {};
1329
+
1330
+ // Skip elements that are KNOWN to not produce observable changes
1331
+ // These are legitimate UI patterns that don't need fixes
1332
+ if (ctx.looksLikeClose && ctx.isInsideModal) {
1333
+ // Close button inside a modal - the modal itself may have closed
1334
+ continue;
1335
+ }
1336
+ if (ctx.looksLikeCopy) {
1337
+ // Copy buttons work via clipboard API, no DOM change expected
1338
+ continue;
1339
+ }
1340
+ if (ctx.looksLikeTheme) {
1341
+ // Theme toggles may only change CSS custom properties
1342
+ continue;
1343
+ }
1344
+
1345
+ // Downgrade severity for likely false positives
1346
+ // Instead of BLOCK, use WARN for elements in contexts that commonly have no-op behavior
1347
+ let severity = "BLOCK";
1348
+ let reason = "Click produced no navigation, no network activity, and no DOM change";
1349
+
1350
+ if (ctx.isLikelyFalsePositive) {
1351
+ severity = "WARN";
1352
+ reason = `Click produced no observable change (possible false positive: ${
1353
+ ctx.looksLikeToggle ? 'toggle button' :
1354
+ ctx.looksLikeTab ? 'tab element' :
1355
+ ctx.looksLikeSort ? 'sort control' :
1356
+ ctx.isInsideDropdown ? 'inside dropdown' :
1357
+ ctx.isInsideAccordion ? 'inside accordion' :
1358
+ 'contextual element'
1359
+ })`;
1360
+ }
1361
+
1362
+ // Always skip tooltip-related elements as they are purely visual
1363
+ if (ctx.isInsideTooltip) continue;
1364
+
956
1365
  const shot = path.join(shotsDir, `${label}_dead_${sha1(el.key)}.png`);
957
1366
  await page.screenshot({ path: shot }).catch(() => {});
958
1367
  findings.push({
959
1368
  id: `R_${label}_DEAD_${sha1(el.key).slice(0, 8)}`,
960
- severity: "BLOCK",
1369
+ severity,
961
1370
  category: "DeadUI",
962
1371
  title: `[${label}] Dead UI: ${el.text || el.tag}`,
963
1372
  page: page.url(),
964
- reason: "Click produced no navigation, no network activity, and no DOM change",
965
- screenshot: path.relative(root, shot).replace(/\\/g, "/")
1373
+ reason,
1374
+ screenshot: path.relative(root, shot).replace(/\\/g, "/"),
1375
+ confidence: ctx.isLikelyFalsePositive ? 0.5 : 0.9, // Add confidence score
1376
+ context: ctx // Include context for debugging
966
1377
  });
967
1378
  }
968
1379
  }
969
1380
  }
970
1381
 
971
1382
  await page.close();
972
- return { label, pagesVisited, findings, consoleErrors: consoleErrors.slice(0, 50), networkErrors: netErrors.slice(0, 50) };
1383
+
1384
+ // Convert fake data detections to findings with confidence-based filtering
1385
+ const seenFakeUrls = new Set();
1386
+
1387
+ // Sort by confidence (highest first) to prioritize most reliable detections
1388
+ const sortedDetections = [...fakeDataDetections].sort((a, b) =>
1389
+ (b.confidence || 0.5) - (a.confidence || 0.5)
1390
+ );
1391
+
1392
+ for (const detection of sortedDetections) {
1393
+ // Dedupe by URL + type + pattern to avoid near-duplicates
1394
+ const key = `${detection.url}:${detection.type}:${detection.pattern || ''}`;
1395
+ if (seenFakeUrls.has(key)) continue;
1396
+ seenFakeUrls.add(key);
1397
+
1398
+ // Skip very low confidence detections (likely false positives)
1399
+ const confidence = detection.confidence || 0.5;
1400
+ if (confidence < 0.50) continue;
1401
+
1402
+ // Downgrade severity for medium confidence detections
1403
+ let severity = detection.severity;
1404
+ if (confidence < 0.70 && severity === 'BLOCK') {
1405
+ severity = 'WARN';
1406
+ }
1407
+
1408
+ findings.push({
1409
+ id: `R_${label}_FAKE_${sha1(key).slice(0, 8)}`,
1410
+ severity,
1411
+ category: detection.type === 'fake-domain' ? 'FakeDomain' :
1412
+ detection.type === 'fake-response' ? 'FakeResponse' : 'MockStatus',
1413
+ title: `[${label}] Fake Data: ${detection.evidence}`,
1414
+ page: detection.url,
1415
+ reason: detection.evidence,
1416
+ confidence, // Include confidence score for transparency
1417
+ pattern: detection.pattern // Include pattern for debugging
1418
+ });
1419
+ }
1420
+
1421
+ return {
1422
+ label,
1423
+ pagesVisited,
1424
+ findings,
1425
+ consoleErrors: consoleErrors.slice(0, 50),
1426
+ networkErrors: netErrors.slice(0, 50),
1427
+ fakeDataDetections: fakeDataDetections.slice(0, 100)
1428
+ };
973
1429
  }
974
1430
 
975
1431
  function buildAuthCoverageFindings({ baseUrl, matchers, anonPass, authPass }) {
@@ -1021,15 +1477,107 @@ function coverageFromTruthpack({ truthpack, visitedUrls }) {
1021
1477
  return { total, hit, percent: total ? Math.round((hit / total) * 100) : 0, missed: Array.from(uiPaths).filter(p => !visitedPaths.has(p)).slice(0, 50) };
1022
1478
  }
1023
1479
 
1480
+ // ═══════════════════════════════════════════════════════════════════════════════
1481
+ // FLAKINESS & STABILITY VERIFICATION
1482
+ // ═══════════════════════════════════════════════════════════════════════════════
1483
+
1484
+ /**
1485
+ * Aggregate findings from multiple stability runs
1486
+ * Only returns findings that appear in at least `threshold` of runs
1487
+ * @param {Array<Array<Object>>} runFindings - Array of findings arrays from each run
1488
+ * @param {number} threshold - Minimum occurrence rate (0-1) to include a finding
1489
+ * @returns {Array<Object>} Deduplicated findings with flakiness scores
1490
+ */
1491
+ function aggregateStabilityFindings(runFindings, threshold = 0.66) {
1492
+ const totalRuns = runFindings.length;
1493
+ if (totalRuns === 0) return [];
1494
+ if (totalRuns === 1) return runFindings[0] || [];
1495
+
1496
+ // Group findings by their unique key (category + normalized title/reason)
1497
+ const findingCounts = new Map();
1498
+
1499
+ for (const findings of runFindings) {
1500
+ for (const finding of findings) {
1501
+ // Create a stable key for deduplication
1502
+ const key = `${finding.category}|${finding.title?.replace(/\[ANON\]|\[AUTH\]/g, '').trim()}|${finding.page || ''}`;
1503
+
1504
+ if (!findingCounts.has(key)) {
1505
+ findingCounts.set(key, {
1506
+ finding: { ...finding },
1507
+ count: 0,
1508
+ occurrences: []
1509
+ });
1510
+ }
1511
+
1512
+ const entry = findingCounts.get(key);
1513
+ entry.count++;
1514
+ entry.occurrences.push(finding);
1515
+ }
1516
+ }
1517
+
1518
+ // Filter to findings that meet the threshold and add flakiness score
1519
+ const aggregated = [];
1520
+
1521
+ for (const [key, data] of findingCounts) {
1522
+ const occurrenceRate = data.count / totalRuns;
1523
+
1524
+ if (occurrenceRate >= threshold) {
1525
+ // Calculate flakiness score (1 = always occurs, 0 = never)
1526
+ const flakinessScore = 1 - occurrenceRate;
1527
+
1528
+ // Merge the finding with flakiness metadata
1529
+ const aggregatedFinding = {
1530
+ ...data.finding,
1531
+ stability: {
1532
+ occurrenceRate: Math.round(occurrenceRate * 100) / 100,
1533
+ appearedInRuns: data.count,
1534
+ totalRuns,
1535
+ flakinessScore: Math.round(flakinessScore * 100) / 100,
1536
+ isFlaky: flakinessScore > 0.1, // More than 10% variance = flaky
1537
+ }
1538
+ };
1539
+
1540
+ // If finding appeared in all runs, it's stable
1541
+ // If it appeared in some runs, mark as potentially flaky
1542
+ if (data.count < totalRuns) {
1543
+ aggregatedFinding.reason = `${aggregatedFinding.reason || ''} (appeared ${data.count}/${totalRuns} runs)`.trim();
1544
+ }
1545
+
1546
+ aggregated.push(aggregatedFinding);
1547
+ }
1548
+ }
1549
+
1550
+ return aggregated;
1551
+ }
1552
+
1553
+ /**
1554
+ * Print stability verification results
1555
+ */
1556
+ function printStabilityResults(totalRuns, stableFindings, filteredCount) {
1557
+ if (totalRuns <= 1) return;
1558
+
1559
+ console.log();
1560
+ console.log(` ${colors.info}${ICONS.target}${c.reset} ${c.bold}Stability Verification${c.reset}`);
1561
+ console.log(` ${c.dim}Total runs:${c.reset} ${totalRuns}`);
1562
+ console.log(` ${c.dim}Stable findings:${c.reset} ${stableFindings} ${c.dim}(appeared in majority of runs)${c.reset}`);
1563
+
1564
+ if (filteredCount > 0) {
1565
+ console.log(` ${c.dim}Filtered (flaky):${c.reset} ${colors.success}${filteredCount}${c.reset} ${c.dim}(inconsistent across runs)${c.reset}`);
1566
+ }
1567
+ }
1568
+
1024
1569
  // ═══════════════════════════════════════════════════════════════════════════════
1025
1570
  // MAIN REALITY FUNCTION
1026
1571
  // ═══════════════════════════════════════════════════════════════════════════════
1027
1572
 
1028
1573
  async function runReality(argsOrOpts = {}) {
1029
1574
  // Handle array args from CLI
1575
+ let globalOpts = { noBanner: false, json: false, quiet: false, ci: false };
1030
1576
  if (Array.isArray(argsOrOpts)) {
1031
- if (argsOrOpts.includes("--help") || argsOrOpts.includes("-h")) {
1032
- printHelp();
1577
+ const { flags } = parseGlobalFlags(argsOrOpts);
1578
+ globalOpts = { ...globalOpts, ...flags };
1579
+ if (globalOpts.help) {
1580
+ printHelp(globalOpts);
1033
1581
  return 0;
1034
1582
  }
1035
1583
  // Parse args to options
@@ -1049,9 +1597,17 @@ async function runReality(argsOrOpts = {}) {
1049
1597
  verifyAuth: argsOrOpts.includes("--verify-auth"),
1050
1598
  headed: argsOrOpts.includes("--headed"),
1051
1599
  danger: argsOrOpts.includes("--danger"),
1600
+ // Visual artifacts options
1601
+ recordVideo: argsOrOpts.includes("--record-video") || argsOrOpts.includes("--video"),
1602
+ recordTrace: argsOrOpts.includes("--record-trace") || argsOrOpts.includes("--trace"),
1603
+ recordHar: argsOrOpts.includes("--record-har") || argsOrOpts.includes("--har"),
1604
+ // Flakiness reduction options
1605
+ retries: parseInt(getArg(["--retries"]) || "2", 10),
1606
+ stableWait: parseInt(getArg(["--stable-wait"]) || "500", 10),
1052
1607
  maxPages: parseInt(getArg(["--max-pages"]) || "18", 10),
1053
1608
  maxDepth: parseInt(getArg(["--max-depth"]) || "2", 10),
1054
1609
  timeoutMs: parseInt(getArg(["--timeout"]) || "15000", 10),
1610
+ ...globalOpts,
1055
1611
  };
1056
1612
  }
1057
1613
 
@@ -1067,11 +1623,20 @@ async function runReality(argsOrOpts = {}) {
1067
1623
  maxPages = 18,
1068
1624
  maxDepth = 2,
1069
1625
  danger = false,
1070
- timeoutMs = 15000
1626
+ timeoutMs = 15000,
1627
+ // Visual artifacts (videos, traces, HAR)
1628
+ recordVideo = false,
1629
+ recordTrace = false,
1630
+ recordHar = false,
1631
+ // Flakiness reduction
1632
+ retries = 2,
1633
+ stableWait = 500,
1634
+ stabilityRuns = 1,
1635
+ flakyThreshold = 0.66
1071
1636
  } = argsOrOpts;
1072
1637
 
1073
1638
  if (!url) {
1074
- printHelp();
1639
+ printHelp(argsOrOpts);
1075
1640
  console.log(`\n ${colors.error}${ICONS.cross}${c.reset} ${c.bold}Error:${c.reset} --url is required\n`);
1076
1641
  return 1;
1077
1642
  }
@@ -1109,12 +1674,17 @@ async function runReality(argsOrOpts = {}) {
1109
1674
  }
1110
1675
 
1111
1676
  // Print banner
1112
- printBanner();
1677
+ if (shouldShowBanner(argsOrOpts)) {
1678
+ printBanner();
1679
+ }
1113
1680
 
1114
1681
  console.log(` ${c.dim}Project:${c.reset} ${c.bold}${projectName}${c.reset}`);
1115
1682
  console.log(` ${c.dim}URL:${c.reset} ${colors.accent}${url}${c.reset}`);
1116
1683
  console.log(` ${c.dim}Mode:${c.reset} ${verifyAuth ? `${colors.auth}Two-Pass (Auth)${c.reset}` : `${colors.anon}Single-Pass (Anon)${c.reset}`}`);
1117
1684
  console.log(` ${c.dim}Budget:${c.reset} ${maxPages} pages, depth ${maxDepth}`);
1685
+ if (stabilityRuns > 1) {
1686
+ console.log(` ${c.dim}Stability:${c.reset} ${colors.info}${stabilityRuns} runs${c.reset}, threshold ${Math.round(flakyThreshold * 100)}%`);
1687
+ }
1118
1688
 
1119
1689
  // Tier warning if applicable
1120
1690
  if (tierInfo.tier === 'free' && (originalMaxPages > maxPages || (originalVerifyAuth && !verifyAuth))) {
@@ -1136,7 +1706,13 @@ async function runReality(argsOrOpts = {}) {
1136
1706
  const baseUrl = normalizeUrl(url);
1137
1707
  const outBase = path.join(root, ".vibecheck", "reality", stamp());
1138
1708
  const shotsDir = path.join(outBase, "screenshots");
1709
+ const videosDir = path.join(outBase, "videos");
1710
+ const tracesDir = path.join(outBase, "traces");
1711
+ const harDir = path.join(outBase, "har");
1139
1712
  ensureDir(shotsDir);
1713
+ if (recordVideo) ensureDir(videosDir);
1714
+ if (recordTrace) ensureDir(tracesDir);
1715
+ if (recordHar) ensureDir(harDir);
1140
1716
 
1141
1717
  const tp = loadTruthpack(root, truthpack);
1142
1718
  const matchers = getProtectedMatchersFromTruthpack(tp);
@@ -1152,98 +1728,229 @@ async function runReality(argsOrOpts = {}) {
1152
1728
  stopSpinner('Browser launched', true);
1153
1729
 
1154
1730
  // ═══════════════════════════════════════════════════════════════════════════
1155
- // PASS A: ANONYMOUS
1731
+ // STABILITY RUNS (multiple passes for flakiness detection)
1156
1732
  // ═══════════════════════════════════════════════════════════════════════════
1157
- printPassHeader('anon', baseUrl);
1158
-
1159
- startSpinner('Crawling anonymously...', colors.anon);
1160
- const anonContext = await browser.newContext();
1161
- const anonPass = await runSinglePass({
1162
- label: "ANON",
1163
- baseUrl,
1164
- context: anonContext,
1165
- shotsDir,
1166
- danger,
1167
- maxPages,
1168
- maxDepth,
1169
- timeoutMs,
1170
- root,
1171
- onProgress: ({ page, maxPages: mp, url: currentUrl }) => {
1172
- // Could update spinner here if desired
1173
- }
1174
- });
1175
- await anonContext.close();
1176
- stopSpinner(`Crawled ${anonPass.pagesVisited.length} pages`, true);
1177
1733
 
1178
- printPassResult('anon', anonPass);
1179
-
1180
- // ═══════════════════════════════════════════════════════════════════════════
1181
- // PASS B: AUTHENTICATED (optional)
1182
- // ═══════════════════════════════════════════════════════════════════════════
1183
- let authPass = null;
1184
- let authFindings = [];
1734
+ const allRunFindings = [];
1735
+ let lastAnonPass = null;
1736
+ let lastAuthPass = null;
1737
+ let anonVideoPath = null;
1738
+ let authVideoPath = null;
1739
+ let anonTracePath = null;
1740
+ let authTracePath = null;
1185
1741
  let savedStatePath = null;
1742
+
1743
+ for (let runNum = 1; runNum <= stabilityRuns; runNum++) {
1744
+ const isFirstRun = runNum === 1;
1745
+ const isLastRun = runNum === stabilityRuns;
1746
+
1747
+ if (stabilityRuns > 1) {
1748
+ console.log();
1749
+ console.log(` ${colors.info}${BOX.hHorizontal.repeat(3)}${c.reset} ${c.bold}Stability Run ${runNum}/${stabilityRuns}${c.reset}`);
1750
+ }
1186
1751
 
1187
- if (verifyAuth) {
1188
- printPassHeader('auth', baseUrl);
1752
+ // ═══════════════════════════════════════════════════════════════════════════
1753
+ // PASS A: ANONYMOUS
1754
+ // ═══════════════════════════════════════════════════════════════════════════
1755
+ printPassHeader('anon', baseUrl);
1189
1756
 
1190
- startSpinner('Setting up authenticated session...', colors.auth);
1191
- const ctxOpts = storageState ? { storageState } : {};
1192
- const authContext = await browser.newContext(ctxOpts);
1193
- const authPage = await authContext.newPage();
1194
- await authPage.goto(baseUrl, { waitUntil: "domcontentloaded" }).catch(() => {});
1195
- await authPage.waitForLoadState("networkidle", { timeout: 6000 }).catch(() => {});
1196
-
1197
- if (!storageState && auth) {
1198
- stopSpinner('Attempting login...', true);
1199
- startSpinner('Logging in...', colors.auth);
1200
-
1201
- const loginRes = await attemptLogin(authPage, { auth });
1202
-
1203
- if (loginRes.ok) {
1204
- stopSpinner('Login successful', true);
1205
- if (saveStorageState) {
1206
- const dest = path.isAbsolute(saveStorageState) ? saveStorageState : path.join(root, saveStorageState);
1207
- ensureDir(path.dirname(dest));
1208
- await authContext.storageState({ path: dest }).catch(() => {});
1209
- savedStatePath = dest;
1210
- console.log(` ${colors.success}${ICONS.check}${c.reset} Session saved: ${c.dim}${path.relative(root, dest)}${c.reset}`);
1211
- }
1212
- } else {
1213
- stopSpinner('Login failed - continuing without auth', false);
1214
- }
1215
- } else {
1216
- stopSpinner('Using existing session', true);
1757
+ startSpinner('Crawling anonymously...', colors.anon);
1758
+
1759
+ // Build context options for video/HAR recording (only on last run to save resources)
1760
+ const anonContextOpts = {};
1761
+ if (recordVideo && isLastRun) {
1762
+ anonContextOpts.recordVideo = {
1763
+ dir: videosDir,
1764
+ size: { width: 1280, height: 720 }
1765
+ };
1766
+ }
1767
+ if (recordHar && isLastRun) {
1768
+ anonContextOpts.recordHar = {
1769
+ path: path.join(harDir, 'anon-traffic.har'),
1770
+ mode: 'full'
1771
+ };
1217
1772
  }
1218
1773
 
1219
- await authPage.close();
1220
-
1221
- startSpinner('Crawling with authentication...', colors.auth);
1222
- authPass = await runSinglePass({
1223
- label: "AUTH",
1774
+ const anonContext = await browser.newContext(anonContextOpts);
1775
+
1776
+ // Start trace recording if enabled (only on last run)
1777
+ if (recordTrace && isLastRun) {
1778
+ await anonContext.tracing.start({
1779
+ screenshots: true,
1780
+ snapshots: true,
1781
+ sources: false
1782
+ });
1783
+ }
1784
+
1785
+ const anonPass = await runSinglePass({
1786
+ label: "ANON",
1224
1787
  baseUrl,
1225
- context: authContext,
1226
- shotsDir,
1788
+ context: anonContext,
1789
+ shotsDir: isLastRun ? shotsDir : path.join(outBase, `run${runNum}`, 'screenshots'),
1227
1790
  danger,
1228
1791
  maxPages,
1229
1792
  maxDepth,
1230
1793
  timeoutMs,
1231
- root
1794
+ root,
1795
+ retries,
1796
+ stableWait,
1797
+ onProgress: ({ page, maxPages: mp, url: currentUrl }) => {
1798
+ // Could update spinner here if desired
1799
+ }
1232
1800
  });
1233
- await authContext.close();
1234
- stopSpinner(`Crawled ${authPass.pagesVisited.length} pages`, true);
1235
1801
 
1236
- printPassResult('auth', authPass);
1802
+ // Ensure shot dir exists for intermediate runs
1803
+ if (!isLastRun) {
1804
+ ensureDir(path.join(outBase, `run${runNum}`, 'screenshots'));
1805
+ }
1806
+
1807
+ // Save trace if enabled (only last run)
1808
+ if (recordTrace && isLastRun) {
1809
+ anonTracePath = path.join(tracesDir, 'anon-trace.zip');
1810
+ await anonContext.tracing.stop({ path: anonTracePath });
1811
+ }
1812
+
1813
+ // Get video path before closing context (only last run)
1814
+ if (recordVideo && isLastRun && anonPass.pagesVisited.length > 0) {
1815
+ const pages = anonContext.pages();
1816
+ if (pages.length > 0) {
1817
+ const video = pages[0].video();
1818
+ if (video) {
1819
+ try {
1820
+ anonVideoPath = await video.path();
1821
+ } catch {}
1822
+ }
1823
+ }
1824
+ }
1825
+
1826
+ await anonContext.close();
1827
+ stopSpinner(`Crawled ${anonPass.pagesVisited.length} pages`, true);
1828
+
1829
+ printPassResult('anon', anonPass);
1830
+ lastAnonPass = anonPass;
1237
1831
 
1238
- // Build auth coverage findings
1239
- if (matchers.length) {
1240
- startSpinner('Analyzing auth coverage...', colors.authCoverage);
1241
- authFindings = buildAuthCoverageFindings({ baseUrl, matchers, anonPass, authPass });
1242
- stopSpinner(`Found ${authFindings.length} auth issues`, authFindings.length === 0);
1832
+ // ═══════════════════════════════════════════════════════════════════════════
1833
+ // PASS B: AUTHENTICATED (optional)
1834
+ // ═══════════════════════════════════════════════════════════════════════════
1835
+ let authPass = null;
1836
+ let authFindings = [];
1837
+
1838
+ if (verifyAuth) {
1839
+ printPassHeader('auth', baseUrl);
1840
+
1841
+ startSpinner('Setting up authenticated session...', colors.auth);
1842
+ const ctxOpts = storageState ? { storageState } : {};
1843
+
1844
+ // Add video/HAR recording options (only last run)
1845
+ if (recordVideo && isLastRun) {
1846
+ ctxOpts.recordVideo = {
1847
+ dir: videosDir,
1848
+ size: { width: 1280, height: 720 }
1849
+ };
1850
+ }
1851
+ if (recordHar && isLastRun) {
1852
+ ctxOpts.recordHar = {
1853
+ path: path.join(harDir, 'auth-traffic.har'),
1854
+ mode: 'full'
1855
+ };
1856
+ }
1857
+
1858
+ const authContext = await browser.newContext(ctxOpts);
1859
+
1860
+ // Start trace recording if enabled (only last run)
1861
+ if (recordTrace && isLastRun) {
1862
+ await authContext.tracing.start({
1863
+ screenshots: true,
1864
+ snapshots: true,
1865
+ sources: false
1866
+ });
1867
+ }
1868
+ const authPage = await authContext.newPage();
1869
+ await authPage.goto(baseUrl, { waitUntil: "domcontentloaded" }).catch(() => {});
1870
+ await authPage.waitForLoadState("networkidle", { timeout: 6000 }).catch(() => {});
1871
+
1872
+ if (!storageState && auth && isFirstRun) {
1873
+ stopSpinner('Attempting login...', true);
1874
+ startSpinner('Logging in...', colors.auth);
1875
+
1876
+ const loginRes = await attemptLogin(authPage, { auth });
1877
+
1878
+ if (loginRes.ok) {
1879
+ stopSpinner('Login successful', true);
1880
+ if (saveStorageState) {
1881
+ const dest = path.isAbsolute(saveStorageState) ? saveStorageState : path.join(root, saveStorageState);
1882
+ ensureDir(path.dirname(dest));
1883
+ await authContext.storageState({ path: dest }).catch(() => {});
1884
+ savedStatePath = dest;
1885
+ console.log(` ${colors.success}${ICONS.check}${c.reset} Session saved: ${c.dim}${path.relative(root, dest)}${c.reset}`);
1886
+ }
1887
+ } else {
1888
+ stopSpinner('Login failed - continuing without auth', false);
1889
+ }
1890
+ } else {
1891
+ stopSpinner('Using existing session', true);
1892
+ }
1893
+
1894
+ await authPage.close();
1895
+
1896
+ startSpinner('Crawling with authentication...', colors.auth);
1897
+ authPass = await runSinglePass({
1898
+ label: "AUTH",
1899
+ baseUrl,
1900
+ context: authContext,
1901
+ shotsDir: isLastRun ? shotsDir : path.join(outBase, `run${runNum}`, 'screenshots'),
1902
+ danger,
1903
+ maxPages,
1904
+ maxDepth,
1905
+ timeoutMs,
1906
+ root,
1907
+ retries,
1908
+ stableWait
1909
+ });
1910
+
1911
+ // Save trace if enabled (only last run)
1912
+ if (recordTrace && isLastRun) {
1913
+ authTracePath = path.join(tracesDir, 'auth-trace.zip');
1914
+ await authContext.tracing.stop({ path: authTracePath });
1915
+ }
1916
+
1917
+ // Get video path before closing context (only last run)
1918
+ if (recordVideo && isLastRun && authPass.pagesVisited.length > 0) {
1919
+ const pages = authContext.pages();
1920
+ if (pages.length > 0) {
1921
+ const video = pages[0].video();
1922
+ if (video) {
1923
+ try {
1924
+ authVideoPath = await video.path();
1925
+ } catch {}
1926
+ }
1927
+ }
1928
+ }
1929
+
1930
+ await authContext.close();
1931
+ stopSpinner(`Crawled ${authPass.pagesVisited.length} pages`, true);
1932
+
1933
+ printPassResult('auth', authPass);
1934
+ lastAuthPass = authPass;
1935
+
1936
+ // Build auth coverage findings
1937
+ if (matchers.length) {
1938
+ startSpinner('Analyzing auth coverage...', colors.authCoverage);
1939
+ authFindings = buildAuthCoverageFindings({ baseUrl, matchers, anonPass, authPass });
1940
+ stopSpinner(`Found ${authFindings.length} auth issues`, authFindings.length === 0);
1941
+ }
1243
1942
  }
1943
+
1944
+ // Collect findings from this run
1945
+ const runFindings = [...anonPass.findings, ...(authPass?.findings || []), ...authFindings];
1946
+ allRunFindings.push(runFindings);
1244
1947
  }
1245
1948
 
1246
1949
  await browser.close();
1950
+
1951
+ // Use last pass results for page/coverage data
1952
+ const anonPass = lastAnonPass;
1953
+ const authPass = lastAuthPass;
1247
1954
 
1248
1955
  // ═══════════════════════════════════════════════════════════════════════════
1249
1956
  // ANALYSIS & RESULTS
@@ -1252,10 +1959,49 @@ async function runReality(argsOrOpts = {}) {
1252
1959
  const allVisited = [...anonPass.pagesVisited.map(p => p.url), ...(authPass?.pagesVisited || []).map(p => p.url)];
1253
1960
  const coverage = coverageFromTruthpack({ truthpack: tp, visitedUrls: allVisited });
1254
1961
 
1255
- const findings = [...anonPass.findings, ...(authPass?.findings || []), ...authFindings];
1962
+ // Aggregate findings from stability runs (filters out flaky findings)
1963
+ let findings;
1964
+ let filteredFlakyCount = 0;
1965
+
1966
+ if (stabilityRuns > 1) {
1967
+ // Count total unique findings across all runs before filtering
1968
+ const allFindingsFlat = allRunFindings.flat();
1969
+ const uniqueBeforeFilter = new Set(allFindingsFlat.map(f =>
1970
+ `${f.category}|${f.title?.replace(/\[ANON\]|\[AUTH\]/g, '').trim()}|${f.page || ''}`
1971
+ )).size;
1972
+
1973
+ findings = aggregateStabilityFindings(allRunFindings, flakyThreshold);
1974
+ filteredFlakyCount = uniqueBeforeFilter - findings.length;
1975
+
1976
+ printStabilityResults(stabilityRuns, findings.length, filteredFlakyCount);
1977
+ } else {
1978
+ // Single run - use findings directly
1979
+ findings = allRunFindings[0] || [];
1980
+ }
1981
+
1256
1982
  const blocks = findings.filter(f => f.severity === "BLOCK").length;
1257
1983
  const warns = findings.filter(f => f.severity === "WARN").length;
1258
1984
 
1985
+ // Build artifact manifest
1986
+ const artifacts = {
1987
+ screenshots: shotsDir ? path.relative(root, shotsDir).replace(/\\/g, "/") : null,
1988
+ videos: recordVideo ? {
1989
+ directory: path.relative(root, videosDir).replace(/\\/g, "/"),
1990
+ anon: anonVideoPath ? path.relative(root, anonVideoPath).replace(/\\/g, "/") : null,
1991
+ auth: authVideoPath ? path.relative(root, authVideoPath).replace(/\\/g, "/") : null
1992
+ } : null,
1993
+ traces: recordTrace ? {
1994
+ directory: path.relative(root, tracesDir).replace(/\\/g, "/"),
1995
+ anon: anonTracePath ? path.relative(root, anonTracePath).replace(/\\/g, "/") : null,
1996
+ auth: authTracePath ? path.relative(root, authTracePath).replace(/\\/g, "/") : null
1997
+ } : null,
1998
+ har: recordHar ? {
1999
+ directory: path.relative(root, harDir).replace(/\\/g, "/"),
2000
+ anon: path.join(harDir, 'anon-traffic.har'),
2001
+ auth: path.join(harDir, 'auth-traffic.har')
2002
+ } : null
2003
+ };
2004
+
1259
2005
  // Build report
1260
2006
  const report = {
1261
2007
  meta: {
@@ -1268,8 +2014,16 @@ async function runReality(argsOrOpts = {}) {
1268
2014
  maxDepth,
1269
2015
  truthpackLoaded: !!tp,
1270
2016
  protectedMatcherCount: matchers.length,
1271
- savedStorageState: savedStatePath ? path.relative(root, savedStatePath).replace(/\\/g, "/") : null
2017
+ savedStorageState: savedStatePath ? path.relative(root, savedStatePath).replace(/\\/g, "/") : null,
2018
+ recordVideo,
2019
+ recordTrace,
2020
+ recordHar,
2021
+ // Flakiness/stability metadata
2022
+ stabilityRuns,
2023
+ flakyThreshold,
2024
+ filteredFlakyCount: stabilityRuns > 1 ? filteredFlakyCount : 0
1272
2025
  },
2026
+ artifacts,
1273
2027
  coverage,
1274
2028
  passes: { anon: anonPass, auth: authPass },
1275
2029
  findings,