agent-scenario-loop 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +119 -0
  3. package/app/profile-session.ts +812 -0
  4. package/core/config-template.json +41 -0
  5. package/dist/core/agent-summary.d.ts +15 -0
  6. package/dist/core/agent-summary.js +177 -0
  7. package/dist/core/artifact-contract.d.ts +151 -0
  8. package/dist/core/artifact-contract.js +897 -0
  9. package/dist/core/artifact-layout.d.ts +56 -0
  10. package/dist/core/artifact-layout.js +61 -0
  11. package/dist/core/artifact-writer.d.ts +44 -0
  12. package/dist/core/artifact-writer.js +55 -0
  13. package/dist/core/comparison.d.ts +133 -0
  14. package/dist/core/comparison.js +294 -0
  15. package/dist/core/evidence-interpreter.d.ts +28 -0
  16. package/dist/core/evidence-interpreter.js +69 -0
  17. package/dist/core/execution-plan.d.ts +44 -0
  18. package/dist/core/execution-plan.js +95 -0
  19. package/dist/core/planner.d.ts +132 -0
  20. package/dist/core/planner.js +812 -0
  21. package/dist/core/ports.d.ts +198 -0
  22. package/dist/core/ports.js +146 -0
  23. package/dist/core/run-index.d.ts +62 -0
  24. package/dist/core/run-index.js +143 -0
  25. package/dist/core/schema-validator.d.ts +86 -0
  26. package/dist/core/schema-validator.js +407 -0
  27. package/dist/index.d.ts +11 -0
  28. package/dist/index.js +27 -0
  29. package/dist/runner/agent-device-driver.d.ts +126 -0
  30. package/dist/runner/agent-device-driver.js +168 -0
  31. package/dist/runner/agent-device.d.ts +295 -0
  32. package/dist/runner/agent-device.js +1271 -0
  33. package/dist/runner/android-adb-driver.d.ts +175 -0
  34. package/dist/runner/android-adb-driver.js +399 -0
  35. package/dist/runner/android-adb.d.ts +254 -0
  36. package/dist/runner/android-adb.js +1618 -0
  37. package/dist/runner/argent-driver.d.ts +183 -0
  38. package/dist/runner/argent-driver.js +297 -0
  39. package/dist/runner/argent.d.ts +349 -0
  40. package/dist/runner/argent.js +1211 -0
  41. package/dist/runner/check-plan.d.ts +45 -0
  42. package/dist/runner/check-plan.js +210 -0
  43. package/dist/runner/cli.d.ts +20 -0
  44. package/dist/runner/cli.js +23 -0
  45. package/dist/runner/compare-latest.d.ts +99 -0
  46. package/dist/runner/compare-latest.js +233 -0
  47. package/dist/runner/compare.d.ts +58 -0
  48. package/dist/runner/compare.js +157 -0
  49. package/dist/runner/demo-loop.d.ts +45 -0
  50. package/dist/runner/demo-loop.js +170 -0
  51. package/dist/runner/example-android-live.d.ts +137 -0
  52. package/dist/runner/example-android-live.js +454 -0
  53. package/dist/runner/example-ios-live.d.ts +137 -0
  54. package/dist/runner/example-ios-live.js +471 -0
  55. package/dist/runner/host-doctor.d.ts +131 -0
  56. package/dist/runner/host-doctor.js +628 -0
  57. package/dist/runner/init-project.d.ts +88 -0
  58. package/dist/runner/init-project.js +263 -0
  59. package/dist/runner/ios-simctl-driver.d.ts +69 -0
  60. package/dist/runner/ios-simctl-driver.js +97 -0
  61. package/dist/runner/ios-simctl.d.ts +254 -0
  62. package/dist/runner/ios-simctl.js +1415 -0
  63. package/dist/runner/live-android.d.ts +137 -0
  64. package/dist/runner/live-android.js +539 -0
  65. package/dist/runner/live-comparison.d.ts +67 -0
  66. package/dist/runner/live-comparison.js +147 -0
  67. package/dist/runner/live-ios.d.ts +137 -0
  68. package/dist/runner/live-ios.js +460 -0
  69. package/dist/runner/live-proof-summary.d.ts +263 -0
  70. package/dist/runner/live-proof-summary.js +465 -0
  71. package/dist/runner/live-proof.d.ts +467 -0
  72. package/dist/runner/live-proof.js +920 -0
  73. package/dist/runner/local-env.d.ts +64 -0
  74. package/dist/runner/local-env.js +155 -0
  75. package/dist/runner/profile-android.d.ts +82 -0
  76. package/dist/runner/profile-android.js +671 -0
  77. package/dist/runner/profile-ios.d.ts +108 -0
  78. package/dist/runner/profile-ios.js +532 -0
  79. package/dist/runner/profile-mobile.d.ts +254 -0
  80. package/dist/runner/profile-mobile.js +1307 -0
  81. package/dist/runner/validate-project.d.ts +273 -0
  82. package/dist/runner/validate-project.js +1501 -0
  83. package/docs/adapters.md +145 -0
  84. package/docs/api.md +94 -0
  85. package/docs/authoring.md +196 -0
  86. package/docs/concepts.md +136 -0
  87. package/docs/consumer-rehearsal.md +115 -0
  88. package/docs/contracts.md +267 -0
  89. package/docs/live-proofs.md +270 -0
  90. package/docs/principles.md +46 -0
  91. package/examples/event-logs/app-startup-baseline.log +4 -0
  92. package/examples/event-logs/app-startup-current.log +4 -0
  93. package/examples/minimal-app/README.md +70 -0
  94. package/examples/mobile-app/README.md +302 -0
  95. package/examples/mobile-app/app.json +22 -0
  96. package/examples/mobile-app/asl/package-scripts.json +32 -0
  97. package/examples/mobile-app/asl.config.json +37 -0
  98. package/examples/mobile-app/event-logs/android-app-startup.log +4 -0
  99. package/examples/mobile-app/event-logs/android-open-close-cycle.log +12 -0
  100. package/examples/mobile-app/event-logs/android-scroll-settle.log +12 -0
  101. package/examples/mobile-app/event-logs/app-startup.log +4 -0
  102. package/examples/mobile-app/event-logs/open-close-cycle.log +12 -0
  103. package/examples/mobile-app/event-logs/scroll-settle.log +12 -0
  104. package/examples/mobile-app/index.ts +20 -0
  105. package/examples/mobile-app/metro.config.js +20 -0
  106. package/examples/mobile-app/package.json +62 -0
  107. package/examples/mobile-app/patches/expo-modules-jsi@56.0.10.patch +19 -0
  108. package/examples/mobile-app/plugins/with-ios-build-compat.js +271 -0
  109. package/examples/mobile-app/pnpm-lock.yaml +4440 -0
  110. package/examples/mobile-app/runner-manifests/evidence-provider.json +79 -0
  111. package/examples/mobile-app/runner-manifests/primary-runner.json +19 -0
  112. package/examples/mobile-app/scenarios/android/app-startup-video.json +73 -0
  113. package/examples/mobile-app/scenarios/android/app-startup.json +44 -0
  114. package/examples/mobile-app/scenarios/android/open-close-cycle.json +54 -0
  115. package/examples/mobile-app/scenarios/android/scroll-settle.json +49 -0
  116. package/examples/mobile-app/scenarios/ios/app-startup.json +44 -0
  117. package/examples/mobile-app/scenarios/ios/open-close-cycle.json +54 -0
  118. package/examples/mobile-app/scenarios/ios/scroll-settle.json +49 -0
  119. package/examples/mobile-app/scenarios/mobile/app-startup.json +91 -0
  120. package/examples/mobile-app/scenarios/mobile/open-close-cycle.json +160 -0
  121. package/examples/mobile-app/scenarios/mobile/scroll-settle.json +148 -0
  122. package/examples/mobile-app/scripts/asl-capture-accessibility-provider.mjs +112 -0
  123. package/examples/mobile-app/scripts/asl-capture-profiler-provider.mjs +127 -0
  124. package/examples/mobile-app/src/devtools/profile-session.ts +7 -0
  125. package/examples/mobile-app/src/example-screen.tsx +322 -0
  126. package/examples/mobile-app/tsconfig.json +16 -0
  127. package/examples/mobile-app/tsconfig.typecheck.json +13 -0
  128. package/examples/runners/README.md +44 -0
  129. package/examples/runners/adb-android.json +25 -0
  130. package/examples/runners/agent-device-android.json +27 -0
  131. package/examples/runners/agent-device-ios.json +27 -0
  132. package/examples/runners/argent-android.json +32 -0
  133. package/examples/runners/argent-ios.json +32 -0
  134. package/examples/runners/argent-react-profiler-provider.json +15 -0
  135. package/examples/runners/axe-accessibility-provider.json +24 -0
  136. package/examples/runners/manual-log-ingest.json +9 -0
  137. package/examples/runners/rozenite-profiler-provider.json +9 -0
  138. package/examples/runners/script-accessibility-provider.json +24 -0
  139. package/examples/runners/script-memory-provider.json +24 -0
  140. package/examples/runners/script-network-provider.json +24 -0
  141. package/examples/runners/script-profiler-provider.json +30 -0
  142. package/examples/runners/xcodebuildmcp-ios.json +29 -0
  143. package/examples/scenarios/ios/app-startup.json +28 -0
  144. package/examples/scenarios/ios/open-close-cycle.json +35 -0
  145. package/examples/scenarios/mobile/app-startup.json +72 -0
  146. package/examples/scenarios/mobile/media-open-close.json +141 -0
  147. package/examples/scenarios/mobile/open-close-cycle.json +135 -0
  148. package/examples/scenarios/mobile/scroll-settle.json +106 -0
  149. package/package.json +240 -0
  150. package/schemas/budget-verdict.schema.json +115 -0
  151. package/schemas/causal-run.schema.json +279 -0
  152. package/schemas/comparison.schema.json +196 -0
  153. package/schemas/health.schema.json +108 -0
  154. package/schemas/live-proof-set.schema.json +195 -0
  155. package/schemas/live-proof.schema.json +413 -0
  156. package/schemas/manifest.schema.json +204 -0
  157. package/schemas/metrics.schema.json +137 -0
  158. package/schemas/project-validation.schema.json +343 -0
  159. package/schemas/runner-capabilities.schema.json +217 -0
  160. package/schemas/scenario.schema.json +400 -0
  161. package/schemas/verdict.schema.json +88 -0
  162. package/templates/evidence-provider.json +83 -0
  163. package/templates/gitignore-snippet +9 -0
  164. package/templates/integration-readme.md +125 -0
  165. package/templates/mobile-scenario.json +133 -0
  166. package/templates/package-scripts.json +32 -0
  167. package/templates/primary-runner.json +19 -0
  168. package/templates/project.config.json +37 -0
  169. package/templates/scripts/asl-capture-accessibility-provider.mjs +112 -0
  170. package/templates/scripts/asl-capture-profiler-provider.mjs +127 -0
@@ -0,0 +1,465 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.buildLiveProofComparisonCounts = buildLiveProofComparisonCounts;
4
+ exports.buildLiveProofComparisonStatus = buildLiveProofComparisonStatus;
5
+ exports.buildLiveProofMarkdown = buildLiveProofMarkdown;
6
+ exports.buildLiveProofNextAction = buildLiveProofNextAction;
7
+ exports.buildLiveProofSummary = buildLiveProofSummary;
8
+ exports.buildLiveProofStatus = buildLiveProofStatus;
9
+ exports.formatComparisonMetricSummary = formatComparisonMetricSummary;
10
+ exports.formatInteractionProofCaptures = formatInteractionProofCaptures;
11
+ exports.formatInteractionProofWarningDetails = formatInteractionProofWarningDetails;
12
+ exports.formatInteractionProofWarnings = formatInteractionProofWarnings;
13
+ exports.isTrustedLiveRunStatus = isTrustedLiveRunStatus;
14
+ exports.readInteractionProofCaptures = readInteractionProofCaptures;
15
+ exports.readInteractionProofWarnings = readInteractionProofWarnings;
16
+ exports.readProfileRunStatus = readProfileRunStatus;
17
+ exports.writeLiveProofSummary = writeLiveProofSummary;
18
+ const fs = require('node:fs');
19
+ const path = require('node:path');
20
+ const { createArtifactLayout } = require('../core/artifact-layout');
21
+ const { writeJsonArtifact, writeTextArtifact } = require('../core/artifact-writer');
22
+ const { SCHEMAS } = require('../core/schema-validator');
23
+ /**
24
+ * Reads the profile run status fields that agents need at the aggregate entrypoint.
25
+ *
26
+ * @param {string} runDir
27
+ * @returns {{healthStatus: string, verdictStatus: string}}
28
+ */
29
+ function readProfileRunStatus(runDir) {
30
+ const health = JSON.parse(fs.readFileSync(path.join(runDir, 'health.json'), 'utf8'));
31
+ const verdict = JSON.parse(fs.readFileSync(path.join(runDir, 'verdict.json'), 'utf8'));
32
+ return {
33
+ healthStatus: String(health.healthStatus ?? 'unknown'),
34
+ verdictStatus: String(verdict.verdictStatus ?? 'unknown'),
35
+ };
36
+ }
37
+ /**
38
+ * Reads capture inventory from a sidecar interaction proof when available.
39
+ *
40
+ * @param {string} runDir
41
+ * @returns {LiveProofInteractionProofCaptures | null}
42
+ */
43
+ function readInteractionProofCaptures(runDir) {
44
+ const metadataPath = [
45
+ path.join(runDir, 'raw', 'agent-device-metadata.json'),
46
+ path.join(runDir, 'raw', 'argent-metadata.json'),
47
+ ].find((candidate) => fs.existsSync(candidate));
48
+ if (!metadataPath) {
49
+ return null;
50
+ }
51
+ try {
52
+ const metadata = JSON.parse(fs.readFileSync(metadataPath, 'utf8'));
53
+ const captures = metadata.captures && typeof metadata.captures === 'object' && !Array.isArray(metadata.captures)
54
+ ? metadata.captures
55
+ : null;
56
+ const screenshots = Array.isArray(captures?.screenshots)
57
+ ? captures.screenshots.filter((value) => typeof value === 'string' && value.length > 0)
58
+ : [];
59
+ return screenshots.length > 0 ? { screenshots } : null;
60
+ }
61
+ catch {
62
+ return null;
63
+ }
64
+ }
65
+ /**
66
+ * Reads warning checks from a sidecar interaction proof health artifact.
67
+ *
68
+ * @param {string} runDir
69
+ * @returns {LiveProofInteractionProofWarnings | null}
70
+ */
71
+ function readInteractionProofWarnings(runDir) {
72
+ const healthPath = path.join(runDir, 'health.json');
73
+ if (!fs.existsSync(healthPath)) {
74
+ return null;
75
+ }
76
+ try {
77
+ const health = JSON.parse(fs.readFileSync(healthPath, 'utf8'));
78
+ const checks = Array.isArray(health.checks) ? health.checks : [];
79
+ const warnings = checks
80
+ .filter((check) => (check &&
81
+ typeof check === 'object' &&
82
+ !Array.isArray(check) &&
83
+ check.status === 'warning'))
84
+ .map((check) => {
85
+ const metadata = check.metadata && typeof check.metadata === 'object' && !Array.isArray(check.metadata)
86
+ ? check.metadata
87
+ : {};
88
+ return {
89
+ code: typeof check.code === 'string' ? check.code : 'warning',
90
+ message: typeof check.message === 'string' ? check.message : 'Interaction proof emitted a warning.',
91
+ name: typeof check.name === 'string' ? check.name : 'interaction_warning',
92
+ ...(typeof metadata.nextActionCode === 'string' || typeof metadata.nextAction === 'string'
93
+ ? {
94
+ nextAction: {
95
+ code: typeof metadata.nextActionCode === 'string' ? metadata.nextActionCode : 'inspect_interaction_warning',
96
+ summary: typeof metadata.nextAction === 'string' ? metadata.nextAction : 'Inspect the interaction proof warning.',
97
+ },
98
+ }
99
+ : {}),
100
+ };
101
+ });
102
+ return warnings.length > 0 ? { checks: warnings, count: warnings.length } : null;
103
+ }
104
+ catch {
105
+ return null;
106
+ }
107
+ }
108
+ /**
109
+ * Formats aggregate gate counts without hiding which linked proof lane failed.
110
+ *
111
+ * @param {{failed: number, label: string, passed: number}} options
112
+ * @returns {string}
113
+ */
114
+ function formatGateCountSummary({ failed, label, passed, }) {
115
+ if (failed === 0) {
116
+ return `${passed} passed ${label}`;
117
+ }
118
+ if (passed === 0) {
119
+ return `${failed} failed ${label}`;
120
+ }
121
+ return `${passed} passed and ${failed} failed ${label}`;
122
+ }
123
+ /**
124
+ * Builds a compact summary sentence for an aggregate live proof.
125
+ *
126
+ * @param {{platform: string, profileCount: number, comparisonCount: number}} options
127
+ * @returns {string}
128
+ */
129
+ function buildLiveProofSummary({ comparisonCount, comparisonStatus, failedInteractionProofCount = 0, failedProfileCount = 0, interactionProofCount = 0, interactionWarningCount = 0, platform, profileCount, skippedInteractionProofCount = 0, status = 'passed', }) {
130
+ const statusText = status === 'passed' ? 'passed' : 'failed';
131
+ const profileText = formatGateCountSummary({
132
+ failed: failedProfileCount,
133
+ label: 'profile run(s)',
134
+ passed: profileCount - failedProfileCount,
135
+ });
136
+ const comparisonText = comparisonCount > 0
137
+ ? `with ${comparisonCount} comparison result(s): ${comparisonStatus}`
138
+ : 'without comparison results';
139
+ const interactionText = interactionProofCount > 0
140
+ ? ` and ${formatGateCountSummary({
141
+ failed: failedInteractionProofCount,
142
+ label: 'interaction proof(s)',
143
+ passed: interactionProofCount - failedInteractionProofCount,
144
+ })}`
145
+ : '';
146
+ const skippedText = skippedInteractionProofCount > 0
147
+ ? `; skipped ${skippedInteractionProofCount} interaction proof(s)`
148
+ : '';
149
+ const warningText = interactionWarningCount > 0
150
+ ? `; ${interactionWarningCount} interaction warning(s)`
151
+ : '';
152
+ return `${platform} live proof ${statusText} with ${profileText}${interactionText} ${comparisonText}${skippedText}${warningText}.`;
153
+ }
154
+ /**
155
+ * Collapses per-scenario comparison results into one batch status.
156
+ *
157
+ * @param {LiveProofComparisonPointer[]} comparisons
158
+ * @returns {LiveProofComparisonStatus}
159
+ */
160
+ function buildLiveProofComparisonStatus(comparisons) {
161
+ if (comparisons.length === 0) {
162
+ return 'not_compared';
163
+ }
164
+ const statuses = comparisons.map((comparison) => comparison.status);
165
+ if (statuses.includes('worse')) {
166
+ return 'regressed';
167
+ }
168
+ if (statuses.includes('inconclusive')) {
169
+ return 'inconclusive';
170
+ }
171
+ if (statuses.every((status) => status === 'skipped')) {
172
+ return 'baseline_missing';
173
+ }
174
+ if (statuses.includes('skipped')) {
175
+ return 'inconclusive';
176
+ }
177
+ if (statuses.includes('mixed')) {
178
+ return 'mixed';
179
+ }
180
+ if (statuses.includes('better')) {
181
+ return 'improved';
182
+ }
183
+ return 'unchanged';
184
+ }
185
+ /**
186
+ * Counts per-scenario comparison outcomes for agent-readable aggregate summaries.
187
+ *
188
+ * @param {LiveProofComparisonPointer[]} comparisons
189
+ * @returns {LiveProofComparisonCounts}
190
+ */
191
+ function buildLiveProofComparisonCounts(comparisons) {
192
+ const counts = {
193
+ better: 0,
194
+ inconclusive: 0,
195
+ mixed: 0,
196
+ skipped: 0,
197
+ unchanged: 0,
198
+ worse: 0,
199
+ };
200
+ for (const comparison of comparisons) {
201
+ counts[comparison.status] += 1;
202
+ }
203
+ return counts;
204
+ }
205
+ /**
206
+ * Builds the next action an agent should take after reading the batch proof.
207
+ *
208
+ * @param {LiveProofComparisonStatus} comparisonStatus
209
+ * @returns {LiveProofNextAction}
210
+ */
211
+ function buildLiveProofNextAction(comparisonStatus, status = 'passed') {
212
+ if (status === 'failed') {
213
+ return {
214
+ code: 'inspect_failed_run',
215
+ summary: 'One or more live proof gates failed; inspect failed profile or interaction summaries before making optimization claims.',
216
+ };
217
+ }
218
+ if (comparisonStatus === 'regressed') {
219
+ return {
220
+ code: 'inspect_regressions',
221
+ summary: 'One or more scenario comparisons regressed; inspect comparison summaries before claiming improvement.',
222
+ };
223
+ }
224
+ if (comparisonStatus === 'baseline_missing') {
225
+ return {
226
+ code: 'establish_baseline',
227
+ summary: 'No trusted prior run was available; keep this proof as a baseline before making before/after claims.',
228
+ };
229
+ }
230
+ if (comparisonStatus === 'inconclusive') {
231
+ return {
232
+ code: 'inspect_inconclusive',
233
+ summary: 'Some comparisons are inconclusive or incomplete; inspect scenario health and missing baseline details.',
234
+ };
235
+ }
236
+ if (comparisonStatus === 'mixed') {
237
+ return {
238
+ code: 'inspect_mixed',
239
+ summary: 'Some timing metrics improved while others worsened; inspect comparison details before claiming improvement or regression.',
240
+ };
241
+ }
242
+ return {
243
+ code: 'inspect_summary',
244
+ summary: 'Scenario health passed; inspect the live-proof summary and linked evidence before reporting the result.',
245
+ };
246
+ }
247
+ /**
248
+ * Reports whether a referenced run is healthy enough to trust as proof.
249
+ *
250
+ * @param {{healthStatus?: string, verdictStatus?: string}} status
251
+ * @returns {boolean}
252
+ */
253
+ function isTrustedLiveRunStatus(status) {
254
+ return status.healthStatus === 'passed' && (status.verdictStatus === 'passed' || status.verdictStatus === 'not_evaluated');
255
+ }
256
+ /**
257
+ * Derives the aggregate live-proof status from the linked evidence pointers.
258
+ *
259
+ * @param {{preflight: {healthStatus?: string, verdictStatus?: string}, profiles: Array<{healthStatus?: string, verdictStatus?: string}>, interactionProofs: Array<{healthStatus?: string, verdictStatus?: string}>, skippedInteractionProofCount?: number}} options
260
+ * @returns {'failed' | 'passed'}
261
+ */
262
+ function buildLiveProofStatus({ interactionProofs, preflight, profiles, skippedInteractionProofCount = 0, }) {
263
+ if (!isTrustedLiveRunStatus(preflight)) {
264
+ return 'failed';
265
+ }
266
+ if (profiles.some((profile) => profile.healthStatus !== 'passed' || profile.verdictStatus !== 'passed')) {
267
+ return 'failed';
268
+ }
269
+ if (interactionProofs.some((proof) => !isTrustedLiveRunStatus(proof))) {
270
+ return 'failed';
271
+ }
272
+ return skippedInteractionProofCount > 0 ? 'failed' : 'passed';
273
+ }
274
+ /**
275
+ * Formats one comparison metric highlight for markdown.
276
+ *
277
+ * @param {LiveProofComparisonMetricSummary['notableMetrics'][number]} metric
278
+ * @returns {string}
279
+ */
280
+ function formatComparisonMetricHighlight(metric) {
281
+ const delta = metric.delta === null ? 'n/a' : `${metric.delta}${metric.unit}`;
282
+ return `${metric.name} ${metric.status} (${delta})`;
283
+ }
284
+ /**
285
+ * Formats compact metric counts and highlights for one comparison pointer.
286
+ *
287
+ * @param {LiveProofComparisonPointer} comparison
288
+ * @returns {string}
289
+ */
290
+ function formatComparisonMetricSummary(comparison) {
291
+ const summary = comparison.metricSummary;
292
+ if (!summary) {
293
+ return '';
294
+ }
295
+ const counts = `metrics better=${summary.counts.better} worse=${summary.counts.worse} unchanged=${summary.counts.unchanged} inconclusive=${summary.counts.inconclusive}`;
296
+ const highlights = summary.notableMetrics.length > 0
297
+ ? `; notable: ${summary.notableMetrics.map(formatComparisonMetricHighlight).join(', ')}`
298
+ : '';
299
+ return ` (${counts}${highlights})`;
300
+ }
301
+ /**
302
+ * Formats sidecar capture inventory for aggregate markdown.
303
+ *
304
+ * @param {LiveProofInteractionProofPointer} proof
305
+ * @returns {string}
306
+ */
307
+ function formatInteractionProofCaptures(proof) {
308
+ const screenshotCount = proof.captures?.screenshots.length ?? 0;
309
+ return screenshotCount > 0 ? ` screenshots=${screenshotCount}` : '';
310
+ }
311
+ /**
312
+ * Formats sidecar warnings for aggregate markdown.
313
+ *
314
+ * @param {LiveProofInteractionProofPointer} proof
315
+ * @returns {string}
316
+ */
317
+ function formatInteractionProofWarnings(proof) {
318
+ const warningCount = proof.warnings?.count ?? 0;
319
+ return warningCount > 0 ? ` warnings=${warningCount}` : '';
320
+ }
321
+ /**
322
+ * Formats sidecar warning details for aggregate markdown.
323
+ *
324
+ * @param {LiveProofInteractionProofPointer} proof
325
+ * @returns {string[]}
326
+ */
327
+ function formatInteractionProofWarningDetails(proof) {
328
+ return (proof.warnings?.checks ?? []).map((warning) => {
329
+ const nextAction = warning.nextAction
330
+ ? ` Next action: ${warning.nextAction.code} - ${warning.nextAction.summary}`
331
+ : '';
332
+ return ` - warning ${warning.name}: ${warning.code} - ${warning.message}${nextAction}`;
333
+ });
334
+ }
335
+ /**
336
+ * Builds markdown for the aggregate live proof entrypoint.
337
+ *
338
+ * @param {LiveProofArtifact} artifact
339
+ * @returns {string}
340
+ */
341
+ function buildLiveProofMarkdown(artifact) {
342
+ const lines = [
343
+ `# ${artifact.platform} live proof`,
344
+ '',
345
+ `Status: ${artifact.status}`,
346
+ `Run: ${artifact.runId}`,
347
+ `Comparison status: ${artifact.comparisonStatus}`,
348
+ `Comparison counts: better=${artifact.comparisonCounts.better} worse=${artifact.comparisonCounts.worse} unchanged=${artifact.comparisonCounts.unchanged} mixed=${artifact.comparisonCounts.mixed} inconclusive=${artifact.comparisonCounts.inconclusive} skipped=${artifact.comparisonCounts.skipped}`,
349
+ `Next action: ${artifact.nextAction.code} - ${artifact.nextAction.summary}`,
350
+ `Summary: ${artifact.summary}`,
351
+ '',
352
+ '## Preflight',
353
+ '',
354
+ `- ${artifact.preflight.runId}: ${artifact.preflight.summaryPath}`,
355
+ '',
356
+ '## Profiles',
357
+ '',
358
+ ...artifact.profiles.map((profile) => (`- ${profile.label} (${profile.scenarioId}): health=${profile.healthStatus} verdict=${profile.verdictStatus} - ${profile.summaryPath}`)),
359
+ ];
360
+ if (artifact.interactionProofs?.length) {
361
+ lines.push('', '## Interaction Proofs', '', ...artifact.interactionProofs.flatMap((proof) => [
362
+ `- ${proof.label} (${proof.runnerId}/${proof.scenarioId}): health=${proof.healthStatus} verdict=${proof.verdictStatus}${formatInteractionProofCaptures(proof)}${formatInteractionProofWarnings(proof)} - ${proof.summaryPath}`,
363
+ ...formatInteractionProofWarningDetails(proof),
364
+ ]));
365
+ }
366
+ if (artifact.skippedInteractionProofs?.length) {
367
+ lines.push('', '## Skipped Interaction Proofs', '', ...artifact.skippedInteractionProofs.map((proof) => (`- ${proof.label} (${proof.runnerId}/${proof.scenarioId}/${proof.runId}): ${proof.reason} Next action: ${proof.nextAction.code} - ${proof.nextAction.summary}`)));
368
+ }
369
+ if (artifact.comparisons.length > 0) {
370
+ lines.push('', '## Comparisons', '', ...artifact.comparisons.map((comparison) => (comparison.status === 'skipped'
371
+ ? `- ${comparison.label} (${comparison.scenarioId}): skipped - ${comparison.reason}`
372
+ : `- ${comparison.label} (${comparison.scenarioId}): ${comparison.status}${formatComparisonMetricSummary(comparison)} - ${comparison.summaryPath}`)));
373
+ }
374
+ return `${lines.join('\n')}\n`;
375
+ }
376
+ /**
377
+ * Writes schema-validated aggregate live proof artifacts.
378
+ *
379
+ * @param {WriteLiveProofSummaryOptions} options
380
+ * @returns {Promise<LiveProofSummaryResult>}
381
+ */
382
+ async function writeLiveProofSummary({ comparisons, interactionProofs = [], outputDir, platform, preflightDir, preflightRunId, profiles, runId, skippedInteractionProofs = [], }) {
383
+ const liveProofDir = path.join(outputDir, '_live-proof', runId);
384
+ const layout = createArtifactLayout({ outputDir: liveProofDir });
385
+ const comparisonStatus = buildLiveProofComparisonStatus(comparisons);
386
+ const comparisonCounts = buildLiveProofComparisonCounts(comparisons);
387
+ const preflightStatus = readProfileRunStatus(preflightDir);
388
+ const profilePointers = profiles.map((profile) => ({
389
+ ...readProfileRunStatus(profile.runDir),
390
+ label: profile.label,
391
+ runDir: profile.runDir,
392
+ runId: profile.runId,
393
+ scenarioId: profile.scenarioId,
394
+ summaryPath: path.join(profile.runDir, 'agent-summary.md'),
395
+ }));
396
+ const interactionProofPointers = interactionProofs.map((proof) => {
397
+ const captures = readInteractionProofCaptures(proof.runDir);
398
+ const warnings = readInteractionProofWarnings(proof.runDir);
399
+ return {
400
+ ...readProfileRunStatus(proof.runDir),
401
+ ...(captures ? { captures } : {}),
402
+ label: proof.label,
403
+ runDir: proof.runDir,
404
+ runId: proof.runId,
405
+ runnerId: proof.runnerId,
406
+ scenarioId: proof.scenarioId,
407
+ summaryPath: path.join(proof.runDir, 'agent-summary.md'),
408
+ ...(warnings ? { warnings } : {}),
409
+ };
410
+ });
411
+ const interactionWarningCount = interactionProofPointers.reduce((sum, proof) => sum + (proof.warnings?.count ?? 0), 0);
412
+ const status = buildLiveProofStatus({
413
+ interactionProofs: interactionProofPointers,
414
+ preflight: preflightStatus,
415
+ profiles: profilePointers,
416
+ skippedInteractionProofCount: skippedInteractionProofs.length,
417
+ });
418
+ const artifact = {
419
+ comparisons,
420
+ comparisonCounts,
421
+ comparisonStatus,
422
+ nextAction: buildLiveProofNextAction(comparisonStatus, status),
423
+ outputDir,
424
+ platform,
425
+ ...(interactionProofPointers.length > 0 ? { interactionProofs: interactionProofPointers } : {}),
426
+ ...(skippedInteractionProofs.length > 0 ? { skippedInteractionProofs } : {}),
427
+ preflight: {
428
+ ...preflightStatus,
429
+ runDir: preflightDir,
430
+ runId: preflightRunId,
431
+ summaryPath: path.join(preflightDir, 'agent-summary.md'),
432
+ },
433
+ profiles: profilePointers,
434
+ runId,
435
+ schemaVersion: '1.0.0',
436
+ status,
437
+ summary: buildLiveProofSummary({
438
+ comparisonCount: comparisons.length,
439
+ comparisonStatus,
440
+ failedInteractionProofCount: interactionProofPointers.filter((proof) => !isTrustedLiveRunStatus(proof)).length,
441
+ failedProfileCount: profilePointers.filter((profile) => profile.healthStatus !== 'passed' || profile.verdictStatus !== 'passed').length,
442
+ interactionProofCount: interactionProofs.length,
443
+ interactionWarningCount,
444
+ platform,
445
+ profileCount: profiles.length,
446
+ skippedInteractionProofCount: skippedInteractionProofs.length,
447
+ status,
448
+ }),
449
+ };
450
+ await writeJsonArtifact({
451
+ filePath: layout.liveProof,
452
+ value: artifact,
453
+ schema: SCHEMAS.liveProof,
454
+ label: 'Live proof artifact',
455
+ });
456
+ await writeTextArtifact({
457
+ filePath: layout.agentSummary,
458
+ content: buildLiveProofMarkdown(artifact),
459
+ });
460
+ return {
461
+ liveProofDir,
462
+ liveProofPath: layout.liveProof,
463
+ summaryPath: layout.agentSummary,
464
+ };
465
+ }