@odavl/guardian 0.1.0-rc1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/CHANGELOG.md +146 -0
  2. package/README.md +155 -97
  3. package/bin/guardian.js +1544 -55
  4. package/config/README.md +59 -0
  5. package/config/profiles/landing-demo.yaml +16 -0
  6. package/package.json +26 -11
  7. package/policies/landing-demo.json +22 -0
  8. package/src/enterprise/audit-logger.js +166 -0
  9. package/src/enterprise/pdf-exporter.js +267 -0
  10. package/src/enterprise/rbac-gate.js +142 -0
  11. package/src/enterprise/rbac.js +239 -0
  12. package/src/enterprise/site-manager.js +180 -0
  13. package/src/founder/feedback-system.js +156 -0
  14. package/src/founder/founder-tracker.js +213 -0
  15. package/src/founder/usage-signals.js +141 -0
  16. package/src/guardian/alert-ledger.js +121 -0
  17. package/src/guardian/attempt-engine.js +587 -12
  18. package/src/guardian/attempt-registry.js +42 -1
  19. package/src/guardian/attempt-relevance.js +106 -0
  20. package/src/guardian/attempt.js +85 -39
  21. package/src/guardian/attempts-filter.js +63 -0
  22. package/src/guardian/baseline.js +50 -8
  23. package/src/guardian/breakage-intelligence.js +1 -0
  24. package/src/guardian/browser-pool.js +131 -0
  25. package/src/guardian/browser.js +28 -1
  26. package/src/guardian/ci-cli.js +121 -0
  27. package/src/guardian/ci-mode.js +15 -0
  28. package/src/guardian/ci-output.js +38 -0
  29. package/src/guardian/cli-summary.js +167 -67
  30. package/src/guardian/config-loader.js +162 -0
  31. package/src/guardian/data-guardian-detector.js +189 -0
  32. package/src/guardian/detection-layers.js +271 -0
  33. package/src/guardian/drift-detector.js +100 -0
  34. package/src/guardian/enhanced-html-reporter.js +221 -4
  35. package/src/guardian/env-guard.js +127 -0
  36. package/src/guardian/failure-intelligence.js +173 -0
  37. package/src/guardian/first-run-profile.js +89 -0
  38. package/src/guardian/first-run.js +54 -0
  39. package/src/guardian/flag-validator.js +111 -0
  40. package/src/guardian/flow-executor.js +309 -44
  41. package/src/guardian/html-reporter.js +2 -0
  42. package/src/guardian/human-reporter.js +431 -0
  43. package/src/guardian/index.js +22 -19
  44. package/src/guardian/init-command.js +9 -5
  45. package/src/guardian/intent-detector.js +146 -0
  46. package/src/guardian/journey-definitions.js +132 -0
  47. package/src/guardian/journey-scan-cli.js +145 -0
  48. package/src/guardian/journey-scanner.js +583 -0
  49. package/src/guardian/junit-reporter.js +18 -1
  50. package/src/guardian/language-detection.js +99 -0
  51. package/src/guardian/live-cli.js +95 -0
  52. package/src/guardian/live-scheduler-runner.js +137 -0
  53. package/src/guardian/live-scheduler.js +146 -0
  54. package/src/guardian/market-reporter.js +357 -82
  55. package/src/guardian/parallel-executor.js +116 -0
  56. package/src/guardian/pattern-analyzer.js +348 -0
  57. package/src/guardian/policy.js +80 -3
  58. package/src/guardian/prerequisite-checker.js +101 -0
  59. package/src/guardian/preset-loader.js +27 -18
  60. package/src/guardian/profile-loader.js +96 -0
  61. package/src/guardian/reality.js +1612 -115
  62. package/src/guardian/reporter.js +27 -41
  63. package/src/guardian/run-artifacts.js +212 -0
  64. package/src/guardian/run-cleanup.js +207 -0
  65. package/src/guardian/run-latest.js +90 -0
  66. package/src/guardian/run-list.js +211 -0
  67. package/src/guardian/run-summary.js +20 -0
  68. package/src/guardian/scan-presets.js +100 -11
  69. package/src/guardian/selector-fallbacks.js +394 -0
  70. package/src/guardian/semantic-contact-detection.js +255 -0
  71. package/src/guardian/semantic-contact-finder.js +201 -0
  72. package/src/guardian/semantic-targets.js +234 -0
  73. package/src/guardian/site-introspection.js +257 -0
  74. package/src/guardian/smoke.js +258 -0
  75. package/src/guardian/snapshot-schema.js +25 -1
  76. package/src/guardian/snapshot.js +69 -3
  77. package/src/guardian/stability-scorer.js +169 -0
  78. package/src/guardian/success-evaluator.js +214 -0
  79. package/src/guardian/template-command.js +184 -0
  80. package/src/guardian/text-formatters.js +426 -0
  81. package/src/guardian/timeout-profiles.js +57 -0
  82. package/src/guardian/verdict.js +320 -0
  83. package/src/guardian/verdicts.js +74 -0
  84. package/src/guardian/wait-for-outcome.js +120 -0
  85. package/src/guardian/watch-runner.js +181 -0
  86. package/src/payments/stripe-checkout.js +169 -0
  87. package/src/plans/plan-definitions.js +148 -0
  88. package/src/plans/plan-manager.js +211 -0
  89. package/src/plans/usage-tracker.js +210 -0
  90. package/src/recipes/recipe-engine.js +188 -0
  91. package/src/recipes/recipe-failure-analysis.js +159 -0
  92. package/src/recipes/recipe-registry.js +134 -0
  93. package/src/recipes/recipe-runtime.js +507 -0
  94. package/src/recipes/recipe-store.js +410 -0
  95. package/guardian-contract-v1.md +0 -149
  96. /package/{guardian.config.json → config/guardian.config.json} +0 -0
  97. /package/{guardian.policy.json → config/guardian.policy.json} +0 -0
  98. /package/{guardian.profile.docs.yaml → config/profiles/docs.yaml} +0 -0
  99. /package/{guardian.profile.ecommerce.yaml → config/profiles/ecommerce.yaml} +0 -0
  100. /package/{guardian.profile.marketing.yaml → config/profiles/marketing.yaml} +0 -0
  101. /package/{guardian.profile.saas.yaml → config/profiles/saas.yaml} +0 -0
@@ -0,0 +1,258 @@
1
+ const { AttemptEngine } = require('./attempt-engine');
2
+ const { getAttemptDefinition } = require('./attempt-registry');
3
+ const { BrowserPool } = require('./browser-pool');
4
+ const { checkPrerequisites } = require('./prerequisite-checker');
5
+ const { validateParallel, executeParallel } = require('./parallel-executor');
6
+ const { getTimeoutProfile } = require('./timeout-profiles');
7
+ const { isCiMode } = require('./ci-mode');
8
+
9
+ const SMOKE_ATTEMPTS = ['universal_reality', 'login', 'signup', 'contact_form'];
10
+ const DEFAULT_PARALLEL = 2;
11
+ const DEFAULT_BUDGET_MS = 30000;
12
+ const DEFAULT_PREREQ_TIMEOUT = 2000;
13
+ const SMOKE_BROWSER_ARGS = ['--no-sandbox', '--disable-setuid-sandbox', '--proxy-bypass-list=*'];
14
+
15
+ function validateUrl(url) {
16
+ try {
17
+ // eslint-disable-next-line no-new
18
+ new URL(url);
19
+ return true;
20
+ } catch (e) {
21
+ return false;
22
+ }
23
+ }
24
+
25
+ function summarizeResults(results) {
26
+ const success = results.filter(r => r.outcome === 'SUCCESS').length;
27
+ const friction = results.filter(r => r.outcome === 'FRICTION').length;
28
+ const failure = results.filter(r => r.outcome === 'FAILURE').length;
29
+ const skipped = results.filter(r => r.outcome === 'SKIPPED').length;
30
+ return { success, friction, failure, skipped };
31
+ }
32
+
33
+ function authPathStatus(results) {
34
+ const authResults = results.filter(r => r.attemptId === 'login' || r.attemptId === 'signup');
35
+ const hasAuthSuccess = authResults.some(r => r.outcome === 'SUCCESS' || r.outcome === 'FRICTION');
36
+ const authFailures = authResults.filter(r => r.outcome === 'FAILURE').length;
37
+ return { hasAuthSuccess, authFailures };
38
+ }
39
+
40
+ function chooseExitCode({ failure, friction }, timedOut, authMissing, authFailuresToIgnore = 0) {
41
+ const effectiveFailures = Math.max(0, failure - authFailuresToIgnore);
42
+ if (timedOut || authMissing || effectiveFailures > 0) return 2;
43
+ if (friction > 0) return 1;
44
+ return 0;
45
+ }
46
+
47
+ async function executeSmoke(config) {
48
+ const baseUrl = config.baseUrl;
49
+ if (!validateUrl(baseUrl)) {
50
+ throw new Error(`Invalid URL: ${baseUrl}`);
51
+ }
52
+
53
+ const savedNoProxy = { NO_PROXY: process.env.NO_PROXY, no_proxy: process.env.no_proxy };
54
+ const forcedNoProxy = (process.env.NO_PROXY || process.env.no_proxy)
55
+ ? `${process.env.NO_PROXY || process.env.no_proxy},127.0.0.1,localhost`
56
+ : '127.0.0.1,localhost';
57
+ process.env.NO_PROXY = forcedNoProxy;
58
+ process.env.no_proxy = forcedNoProxy;
59
+
60
+ const ciMode = isCiMode();
61
+ const timeoutProfile = getTimeoutProfile('fast');
62
+ const resolvedTimeout = timeoutProfile.default;
63
+ const budgetMs = Number(process.env.GUARDIAN_SMOKE_BUDGET_MS || config.timeBudgetMs || DEFAULT_BUDGET_MS);
64
+
65
+ const parallelValidation = validateParallel(DEFAULT_PARALLEL);
66
+ if (!parallelValidation.valid) {
67
+ throw new Error(parallelValidation.error || 'Invalid parallel value');
68
+ }
69
+ const parallel = parallelValidation.parallel || DEFAULT_PARALLEL;
70
+
71
+ if (!ciMode) {
72
+ console.log('\nSMOKE MODE: Fast market sanity check (<30s)');
73
+ console.log(`Target: ${baseUrl}`);
74
+ console.log(`Attempts: ${SMOKE_ATTEMPTS.join(', ')}`);
75
+ } else {
76
+ console.log('SMOKE MODE: Fast market sanity check (<30s)');
77
+ console.log(`Target: ${baseUrl}`);
78
+ console.log(`Attempts: ${SMOKE_ATTEMPTS.join(', ')}`);
79
+ }
80
+
81
+ const browserPool = new BrowserPool();
82
+ await browserPool.launch({ headless: !config.headful, timeout: resolvedTimeout, args: SMOKE_BROWSER_ARGS });
83
+
84
+ const startedAt = Date.now();
85
+ let timedOut = false;
86
+ let shouldStop = false;
87
+ const attemptResults = [];
88
+
89
+ const budgetTimer = setTimeout(() => {
90
+ timedOut = true;
91
+ shouldStop = true;
92
+ }, budgetMs);
93
+
94
+ const attemptRunner = async (attemptId) => {
95
+ if (timedOut) {
96
+ return null;
97
+ }
98
+
99
+ const attemptDef = getAttemptDefinition(attemptId);
100
+ if (!attemptDef) {
101
+ return {
102
+ attemptId,
103
+ attemptName: attemptId,
104
+ outcome: 'FAILURE',
105
+ error: `Attempt ${attemptId} not found`,
106
+ friction: null
107
+ };
108
+ }
109
+
110
+ let context = null;
111
+ let page = null;
112
+ let result;
113
+
114
+ try {
115
+ const ctx = await browserPool.createContext({
116
+ timeout: resolvedTimeout,
117
+ ignoreHTTPSErrors: true
118
+ });
119
+ context = ctx.context;
120
+ page = ctx.page;
121
+ await page.goto(baseUrl, { waitUntil: 'domcontentloaded', timeout: resolvedTimeout });
122
+ const prereq = await checkPrerequisites(page, attemptId, DEFAULT_PREREQ_TIMEOUT);
123
+ if (!prereq.canProceed) {
124
+ result = {
125
+ attemptId,
126
+ attemptName: attemptDef.name,
127
+ outcome: 'SKIPPED',
128
+ skipReason: prereq.reason,
129
+ friction: null,
130
+ error: null
131
+ };
132
+ } else {
133
+ const engine = new AttemptEngine({
134
+ attemptId,
135
+ timeout: resolvedTimeout,
136
+ frictionThresholds: {
137
+ totalDurationMs: 5000,
138
+ stepDurationMs: 2500,
139
+ retryCount: 0
140
+ },
141
+ maxStepRetries: 1
142
+ });
143
+
144
+ const attemptResult = await engine.executeAttempt(page, attemptId, baseUrl, null, attemptDef.validators || []);
145
+ result = {
146
+ attemptId,
147
+ attemptName: attemptDef.name,
148
+ outcome: attemptResult.outcome,
149
+ friction: attemptResult.friction,
150
+ error: attemptResult.error,
151
+ successReason: attemptResult.successReason,
152
+ skipReason: null
153
+ };
154
+ }
155
+ } catch (err) {
156
+ result = {
157
+ attemptId,
158
+ attemptName: attemptDef?.name || attemptId,
159
+ outcome: 'FAILURE',
160
+ friction: null,
161
+ error: err.message,
162
+ skipReason: null
163
+ };
164
+ } finally {
165
+ if (context) {
166
+ await browserPool.closeContext(context);
167
+ }
168
+ }
169
+
170
+ // Enforce fail-fast
171
+ if (result.outcome === 'FAILURE') {
172
+ shouldStop = true;
173
+ }
174
+
175
+ // Enforce budget after attempt completes
176
+ if (Date.now() - startedAt >= budgetMs) {
177
+ timedOut = true;
178
+ shouldStop = true;
179
+ }
180
+
181
+ return result;
182
+ };
183
+
184
+ const parallelResults = await executeParallel(
185
+ SMOKE_ATTEMPTS,
186
+ attemptRunner,
187
+ parallel,
188
+ { shouldStop: () => shouldStop }
189
+ );
190
+
191
+ clearTimeout(budgetTimer);
192
+ for (const r of parallelResults) {
193
+ if (r) {
194
+ attemptResults.push(r);
195
+ }
196
+ }
197
+
198
+ if (process.env.GUARDIAN_SMOKE_DEBUG) {
199
+ console.log('DEBUG attempt results:', JSON.stringify(attemptResults, null, 2));
200
+ }
201
+
202
+ const summary = summarizeResults(attemptResults);
203
+ const authStatus = authPathStatus(attemptResults);
204
+ const effectiveFailures = Math.max(0, summary.failure - (authStatus.hasAuthSuccess ? authStatus.authFailures : 0));
205
+ const exitCode = chooseExitCode({ ...summary, failure: effectiveFailures }, timedOut, !authStatus.hasAuthSuccess, 0);
206
+
207
+ const elapsed = Date.now() - startedAt;
208
+
209
+ const lines = [];
210
+ lines.push(`Summary: success=${summary.success}, friction=${summary.friction}, failure=${effectiveFailures}, skipped=${summary.skipped}`);
211
+ if (timedOut) {
212
+ lines.push(`Result: FAILURE (time budget exceeded at ${elapsed}ms)`);
213
+ } else if (!authStatus.hasAuthSuccess) {
214
+ lines.push('Result: FAILURE (auth path unreachable)');
215
+ } else if (exitCode === 2) {
216
+ lines.push('Result: FAILURE');
217
+ } else if (exitCode === 1) {
218
+ lines.push('Result: FRICTION');
219
+ } else {
220
+ lines.push('Result: PASS');
221
+ }
222
+
223
+ for (const line of lines) {
224
+ console.log(line);
225
+ }
226
+
227
+ await browserPool.close();
228
+
229
+ // Restore proxy env vars
230
+ if (savedNoProxy.NO_PROXY !== undefined) {
231
+ process.env.NO_PROXY = savedNoProxy.NO_PROXY;
232
+ } else {
233
+ delete process.env.NO_PROXY;
234
+ }
235
+ if (savedNoProxy.no_proxy !== undefined) {
236
+ process.env.no_proxy = savedNoProxy.no_proxy;
237
+ } else {
238
+ delete process.env.no_proxy;
239
+ }
240
+
241
+ return { exitCode, attemptResults, timedOut, authAvailable: authStatus.hasAuthSuccess, elapsed };
242
+ }
243
+
244
+ async function runSmokeCLI(config) {
245
+ try {
246
+ const result = await executeSmoke(config);
247
+ process.exit(result.exitCode);
248
+ } catch (err) {
249
+ console.error(`Error: ${err.message}`);
250
+ process.exit(2);
251
+ }
252
+ }
253
+
254
+ module.exports = {
255
+ executeSmoke,
256
+ runSmokeCLI,
257
+ SMOKE_ATTEMPTS
258
+ };
@@ -45,7 +45,7 @@ const SNAPSHOT_SCHEMA_VERSION = 'v1';
45
45
  * @property {string} attemptId - unique attempt identifier
46
46
  * @property {string} attemptName - human-readable name
47
47
  * @property {string} goal - what the user tried to achieve
48
- * @property {string} outcome - 'SUCCESS', 'FAILURE', or 'FRICTION'
48
+ * @property {string} outcome - 'SUCCESS', 'FAILURE', 'FRICTION', 'NOT_APPLICABLE', 'DISCOVERY_FAILED', 'SKIPPED'
49
49
  * @property {number} totalDurationMs - elapsed time
50
50
  * @property {number} stepCount - how many steps executed
51
51
  * @property {number} failedStepIndex - index of first failed step, or -1 if all succeeded
@@ -53,6 +53,10 @@ const SNAPSHOT_SCHEMA_VERSION = 'v1';
53
53
  * @property {ValidatorResult[]} [validators] - soft failure detectors (Phase 2)
54
54
  * @property {number} [softFailureCount] - count of failed validators
55
55
  * @property {string} [riskCategory] - 'LEAD', 'REVENUE', 'TRUST/UX' (Phase 2)
56
+ * @property {string} [skipReason] - reason if SKIPPED, NOT_APPLICABLE, or DISCOVERY_FAILED
57
+ * @property {string[]} [selectorChainTried] - selectors attempted during discovery
58
+ * @property {Object} [discoverySignals] - element discovery signals and heuristics
59
+ * @property {string} [finalSelection] - which selector/strategy successfully matched element
56
60
  */
57
61
 
58
62
  /**
@@ -133,6 +137,13 @@ const SNAPSHOT_SCHEMA_VERSION = 'v1';
133
137
  * @typedef {Object} MarketRealitySnapshot
134
138
  * @property {string} schemaVersion - always 'v1'
135
139
  * @property {SnapshotMeta} meta
140
+ * @property {Object} [verdict] - unified run-level verdict
141
+ * @property {('READY'|'DO_NOT_LAUNCH'|'FRICTION')} verdict.verdict
142
+ * @property {{ level: ('low'|'medium'|'high'), score: number, reasons: string[] }} verdict.confidence
143
+ * @property {string} verdict.why
144
+ * @property {string[]} verdict.keyFindings
145
+ * @property {{ screenshots?: string[], traces?: string[], reportPaths?: string[], affectedPages?: string[] }} verdict.evidence
146
+ * @property {string[]} verdict.limits
136
147
  * @property {CrawlResult} [crawl]
137
148
  * @property {AttemptResult[]} attempts
138
149
  * @property {Array} flows
@@ -166,6 +177,7 @@ function createEmptySnapshot(baseUrl, runId, toolVersion) {
166
177
  attempts: [],
167
178
  flows: [],
168
179
  signals: [],
180
+ verdict: null,
169
181
  riskSummary: {
170
182
  totalSoftFailures: 0,
171
183
  totalFriction: 0,
@@ -253,6 +265,18 @@ function validateSnapshot(snapshot) {
253
265
  errors.push('Missing baseline section');
254
266
  }
255
267
 
268
+ // Basic verdict validation (if present)
269
+ if (snapshot.verdict) {
270
+ const v = snapshot.verdict;
271
+ const allowed = ['READY', 'DO_NOT_LAUNCH', 'FRICTION'];
272
+ if (!v.verdict || !allowed.includes(v.verdict)) {
273
+ errors.push('Invalid verdict.verdict');
274
+ }
275
+ if (!v.confidence || typeof v.confidence.score !== 'number' || v.confidence.score < 0 || v.confidence.score > 1) {
276
+ errors.push('Invalid verdict.confidence.score');
277
+ }
278
+ }
279
+
256
280
  return {
257
281
  valid: errors.length === 0,
258
282
  errors
@@ -34,10 +34,62 @@ class SnapshotBuilder {
34
34
  };
35
35
  }
36
36
 
37
+ /**
38
+ * Set unified verdict object
39
+ */
40
+ setVerdict(verdict) {
41
+ if (!verdict) return;
42
+ this.snapshot.verdict = {
43
+ verdict: verdict.verdict,
44
+ confidence: verdict.confidence,
45
+ why: verdict.why || '',
46
+ keyFindings: Array.isArray(verdict.keyFindings) ? verdict.keyFindings.slice(0, 7) : [],
47
+ evidence: verdict.evidence || {},
48
+ limits: Array.isArray(verdict.limits) ? verdict.limits.slice(0, 6) : []
49
+ };
50
+ }
51
+
37
52
  /**
38
53
  * Add attempt result to snapshot
39
54
  */
40
55
  addAttempt(attemptResult, artifactDir) {
56
+ // Handle NOT_APPLICABLE and DISCOVERY_FAILED attempts
57
+ if (attemptResult.outcome === 'NOT_APPLICABLE' || attemptResult.outcome === 'DISCOVERY_FAILED') {
58
+ this.snapshot.attempts.push({
59
+ attemptId: attemptResult.attemptId,
60
+ attemptName: attemptResult.attemptName,
61
+ goal: attemptResult.goal,
62
+ outcome: attemptResult.outcome,
63
+ executed: false,
64
+ skipReason: attemptResult.skipReason || (attemptResult.outcome === 'NOT_APPLICABLE' ? 'Feature not present' : 'Element discovery failed'),
65
+ skipReasonCode: attemptResult.skipReasonCode,
66
+ discoverySignals: attemptResult.discoverySignals || {},
67
+ totalDurationMs: attemptResult.totalDurationMs || 0,
68
+ stepCount: attemptResult.stepCount || 0,
69
+ failedStepIndex: -1,
70
+ friction: null
71
+ });
72
+ return; // Don't create signals for non-applicable attempts
73
+ }
74
+
75
+ // Phase 7.4: Handle SKIPPED attempts (don't add as signal)
76
+ if (attemptResult.outcome === 'SKIPPED') {
77
+ this.snapshot.attempts.push({
78
+ attemptId: attemptResult.attemptId,
79
+ attemptName: attemptResult.attemptName,
80
+ goal: attemptResult.goal,
81
+ outcome: 'SKIPPED',
82
+ executed: false,
83
+ skipReason: attemptResult.skipReason || 'Prerequisites not met',
84
+ skipReasonCode: attemptResult.skipReasonCode,
85
+ totalDurationMs: 0,
86
+ stepCount: 0,
87
+ failedStepIndex: -1,
88
+ friction: null
89
+ });
90
+ return; // Don't create signals for skipped attempts
91
+ }
92
+
41
93
  const signal = {
42
94
  id: `attempt_${attemptResult.attemptId}`,
43
95
  severity: attemptResult.outcome === 'FAILURE' ? 'high' : 'medium',
@@ -55,10 +107,17 @@ class SnapshotBuilder {
55
107
  attemptName: attemptResult.attemptName,
56
108
  goal: attemptResult.goal,
57
109
  outcome: attemptResult.outcome,
110
+ executed: true,
111
+ discoverySignals: attemptResult.discoverySignals || {},
58
112
  totalDurationMs: attemptResult.attemptResult?.totalDurationMs || 0,
59
113
  stepCount: (attemptResult.steps || []).length,
60
114
  failedStepIndex: (attemptResult.steps || []).findIndex(s => s.status === 'failed'),
61
- friction: attemptResult.friction || null
115
+ friction: attemptResult.friction || null,
116
+ evidenceSummary: {
117
+ screenshots: (attemptResult.steps || []).reduce((sum, s) => sum + (Array.isArray(s.screenshots) ? s.screenshots.length : 0), 0),
118
+ validators: Array.isArray(attemptResult.validators) ? attemptResult.validators.length : 0,
119
+ tracesCaptured: attemptResult.tracePath ? 1 : 0
120
+ }
62
121
  });
63
122
 
64
123
  // Track artifacts
@@ -66,7 +125,8 @@ class SnapshotBuilder {
66
125
  this.snapshot.evidence.attemptArtifacts[attemptResult.attemptId] = {
67
126
  reportJson: path.join(attemptResult.attemptId, 'attempt-report.json'),
68
127
  reportHtml: path.join(attemptResult.attemptId, 'attempt-report.html'),
69
- screenshotDir: path.join(attemptResult.attemptId, 'attempt-screenshots')
128
+ screenshotDir: path.join(attemptResult.attemptId, 'attempt-screenshots'),
129
+ attemptJson: attemptResult.attemptJsonPath ? path.relative(artifactDir, attemptResult.attemptJsonPath) : undefined
70
130
  };
71
131
  }
72
132
 
@@ -123,7 +183,13 @@ class SnapshotBuilder {
123
183
  stepsTotal: flowResult.stepsTotal || 0,
124
184
  durationMs: flowResult.durationMs || 0,
125
185
  failedStep: flowResult.failedStep || null,
126
- error: flowResult.error || null
186
+ error: flowResult.error || null,
187
+ successEval: flowResult.successEval ? {
188
+ status: flowResult.successEval.status,
189
+ confidence: flowResult.successEval.confidence,
190
+ reasons: (flowResult.successEval.reasons || []).slice(0, 3),
191
+ evidence: flowResult.successEval.evidence || {}
192
+ } : null
127
193
  });
128
194
 
129
195
  if (runDir) {
@@ -0,0 +1,169 @@
1
+ /**
2
+ * Stability Scorer - Real-world reliability metrics
3
+ *
4
+ * Measures how stable a journey run was:
5
+ * - Per-step stability (transient vs deterministic failures)
6
+ * - Overall run stability score (0-100)
7
+ */
8
+
9
+ /**
10
+ * Classify error type for determining if it's transient or deterministic
11
+ * @param {string} errorMessage - Error message from step
12
+ * @returns {object} - { isTransient: boolean, classification: string }
13
+ */
14
+ function classifyErrorType(errorMessage) {
15
+ if (!errorMessage) return { isTransient: false, classification: 'UNKNOWN' };
16
+
17
+ const msg = errorMessage.toLowerCase();
18
+
19
+ // Transient errors (safe to retry)
20
+ if (msg.includes('timeout') || msg.includes('timed out')) {
21
+ return { isTransient: true, classification: 'TIMEOUT' };
22
+ }
23
+ if (msg.includes('navigation') && (msg.includes('timeout') || msg.includes('closed'))) {
24
+ return { isTransient: true, classification: 'NAVIGATION_TIMEOUT' };
25
+ }
26
+ if (msg.includes('detached') || msg.includes('frame')) {
27
+ return { isTransient: true, classification: 'DETACHED_FRAME' };
28
+ }
29
+ if (msg.includes('econnrefused') || msg.includes('network') || msg.includes('socket')) {
30
+ return { isTransient: true, classification: 'NETWORK_ERROR' };
31
+ }
32
+ if (msg.includes('connection') && (msg.includes('reset') || msg.includes('closed'))) {
33
+ return { isTransient: true, classification: 'CONNECTION_ERROR' };
34
+ }
35
+
36
+ // Deterministic errors (don't retry)
37
+ if (msg.includes('not found') && (msg.includes('cta') || msg.includes('element'))) {
38
+ return { isTransient: false, classification: 'ELEMENT_NOT_FOUND' };
39
+ }
40
+ if (msg.includes('not visible')) {
41
+ return { isTransient: false, classification: 'ELEMENT_NOT_VISIBLE' };
42
+ }
43
+ if (msg.includes('cta') && msg.includes('found')) {
44
+ return { isTransient: false, classification: 'CTA_NOT_FOUND' };
45
+ }
46
+
47
+ // Default: assume transient to be safe
48
+ return { isTransient: true, classification: 'UNKNOWN' };
49
+ }
50
+
51
+ /**
52
+ * Compute stability score for a single step
53
+ * @param {object} step - Executed step result { id, name, success, attemptNumber, error }
54
+ * @returns {object} - { attempts, finalStatus, stable, confidence, errorType }
55
+ */
56
+ function scoreStepStability(step) {
57
+ const attempts = step.attemptNumber || 1;
58
+ const finalStatus = step.success ? 'SUCCESS' : 'FAILED';
59
+ const errorType = classifyErrorType(step.error);
60
+
61
+ // Determine stability
62
+ let stable = true;
63
+ let confidence = 100;
64
+
65
+ if (finalStatus === 'SUCCESS') {
66
+ if (attempts > 1) {
67
+ // Success after retries = transient failure
68
+ stable = true; // The step ultimately worked
69
+ confidence = Math.max(30, 100 - (attempts - 1) * 20);
70
+ }
71
+ } else {
72
+ // Step failed all retries
73
+ stable = false;
74
+ confidence = 10; // Very low confidence in a consistently failing step
75
+ }
76
+
77
+ return {
78
+ stepId: step.id,
79
+ attempts,
80
+ finalStatus,
81
+ stable,
82
+ confidence,
83
+ errorType: errorType.classification,
84
+ isTransient: errorType.isTransient
85
+ };
86
+ }
87
+
88
+ /**
89
+ * Compute overall run stability score (0-100)
90
+ * @param {object} result - Journey result with executedSteps array
91
+ * @returns {number} - Stability score 0-100
92
+ */
93
+ function computeRunStabilityScore(result) {
94
+ const steps = result.executedSteps || [];
95
+
96
+ if (steps.length === 0) return 0;
97
+
98
+ // Calculate step-level stability
99
+ const stepScores = steps.map(scoreStepStability);
100
+
101
+ // Count how many steps needed retries
102
+ const stepsWithRetries = stepScores.filter(s => s.attempts > 1).length;
103
+ const failedSteps = stepScores.filter(s => s.finalStatus === 'FAILED').length;
104
+
105
+ // Scoring algorithm:
106
+ // - Start at 100
107
+ // - Deduct 10 points per step that needed retries
108
+ // - Deduct 30 points per failed step
109
+ // - Floor at 0
110
+ let score = 100;
111
+ score -= stepsWithRetries * 10;
112
+ score -= failedSteps * 30;
113
+ score = Math.max(0, score);
114
+
115
+ // Consistency check: if goalReached varies, reduce score
116
+ // (This is a simple heuristic; more complex consistency checks could be added)
117
+ const hasInconsistency = false; // Would need multiple runs to detect
118
+ if (hasInconsistency) {
119
+ score = Math.max(0, score - 20);
120
+ }
121
+
122
+ return Math.round(score);
123
+ }
124
+
125
+ /**
126
+ * Build stability report from journey result
127
+ * @param {object} result - Journey scan result
128
+ * @returns {object} - Stability report with scores and metrics
129
+ */
130
+ function buildStabilityReport(result) {
131
+ const steps = result.executedSteps || [];
132
+ const stepStability = steps.map(scoreStepStability);
133
+ const runScore = computeRunStabilityScore(result);
134
+
135
+ const metrics = {
136
+ totalSteps: steps.length,
137
+ succeededSteps: stepStability.filter(s => s.finalStatus === 'SUCCESS').length,
138
+ failedSteps: stepStability.filter(s => s.finalStatus === 'FAILED').length,
139
+ stepsWithRetries: stepStability.filter(s => s.attempts > 1).length,
140
+ totalAttempts: stepStability.reduce((sum, s) => sum + s.attempts, 0)
141
+ };
142
+
143
+ return {
144
+ runStabilityScore: runScore,
145
+ metrics,
146
+ stepStability,
147
+ assessment: assessStability(runScore)
148
+ };
149
+ }
150
+
151
+ /**
152
+ * Assess stability level based on score
153
+ * @param {number} score - Stability score 0-100
154
+ * @returns {string} - Assessment: 'excellent' | 'good' | 'fair' | 'poor'
155
+ */
156
+ function assessStability(score) {
157
+ if (score >= 80) return 'excellent';
158
+ if (score >= 60) return 'good';
159
+ if (score >= 40) return 'fair';
160
+ return 'poor';
161
+ }
162
+
163
+ module.exports = {
164
+ classifyErrorType,
165
+ scoreStepStability,
166
+ computeRunStabilityScore,
167
+ buildStabilityReport,
168
+ assessStability
169
+ };