agent-scenario-loop 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +119 -0
- package/app/profile-session.ts +812 -0
- package/core/config-template.json +41 -0
- package/dist/core/agent-summary.d.ts +15 -0
- package/dist/core/agent-summary.js +177 -0
- package/dist/core/artifact-contract.d.ts +151 -0
- package/dist/core/artifact-contract.js +897 -0
- package/dist/core/artifact-layout.d.ts +56 -0
- package/dist/core/artifact-layout.js +61 -0
- package/dist/core/artifact-writer.d.ts +44 -0
- package/dist/core/artifact-writer.js +55 -0
- package/dist/core/comparison.d.ts +133 -0
- package/dist/core/comparison.js +294 -0
- package/dist/core/evidence-interpreter.d.ts +28 -0
- package/dist/core/evidence-interpreter.js +69 -0
- package/dist/core/execution-plan.d.ts +44 -0
- package/dist/core/execution-plan.js +95 -0
- package/dist/core/planner.d.ts +132 -0
- package/dist/core/planner.js +812 -0
- package/dist/core/ports.d.ts +198 -0
- package/dist/core/ports.js +146 -0
- package/dist/core/run-index.d.ts +62 -0
- package/dist/core/run-index.js +143 -0
- package/dist/core/schema-validator.d.ts +86 -0
- package/dist/core/schema-validator.js +407 -0
- package/dist/index.d.ts +11 -0
- package/dist/index.js +27 -0
- package/dist/runner/agent-device-driver.d.ts +126 -0
- package/dist/runner/agent-device-driver.js +168 -0
- package/dist/runner/agent-device.d.ts +295 -0
- package/dist/runner/agent-device.js +1271 -0
- package/dist/runner/android-adb-driver.d.ts +175 -0
- package/dist/runner/android-adb-driver.js +399 -0
- package/dist/runner/android-adb.d.ts +254 -0
- package/dist/runner/android-adb.js +1618 -0
- package/dist/runner/argent-driver.d.ts +183 -0
- package/dist/runner/argent-driver.js +297 -0
- package/dist/runner/argent.d.ts +349 -0
- package/dist/runner/argent.js +1211 -0
- package/dist/runner/check-plan.d.ts +45 -0
- package/dist/runner/check-plan.js +210 -0
- package/dist/runner/cli.d.ts +20 -0
- package/dist/runner/cli.js +23 -0
- package/dist/runner/compare-latest.d.ts +99 -0
- package/dist/runner/compare-latest.js +233 -0
- package/dist/runner/compare.d.ts +58 -0
- package/dist/runner/compare.js +157 -0
- package/dist/runner/demo-loop.d.ts +45 -0
- package/dist/runner/demo-loop.js +170 -0
- package/dist/runner/example-android-live.d.ts +137 -0
- package/dist/runner/example-android-live.js +454 -0
- package/dist/runner/example-ios-live.d.ts +137 -0
- package/dist/runner/example-ios-live.js +471 -0
- package/dist/runner/host-doctor.d.ts +131 -0
- package/dist/runner/host-doctor.js +628 -0
- package/dist/runner/init-project.d.ts +88 -0
- package/dist/runner/init-project.js +263 -0
- package/dist/runner/ios-simctl-driver.d.ts +69 -0
- package/dist/runner/ios-simctl-driver.js +97 -0
- package/dist/runner/ios-simctl.d.ts +254 -0
- package/dist/runner/ios-simctl.js +1415 -0
- package/dist/runner/live-android.d.ts +137 -0
- package/dist/runner/live-android.js +539 -0
- package/dist/runner/live-comparison.d.ts +67 -0
- package/dist/runner/live-comparison.js +147 -0
- package/dist/runner/live-ios.d.ts +137 -0
- package/dist/runner/live-ios.js +460 -0
- package/dist/runner/live-proof-summary.d.ts +263 -0
- package/dist/runner/live-proof-summary.js +465 -0
- package/dist/runner/live-proof.d.ts +467 -0
- package/dist/runner/live-proof.js +920 -0
- package/dist/runner/local-env.d.ts +64 -0
- package/dist/runner/local-env.js +155 -0
- package/dist/runner/profile-android.d.ts +82 -0
- package/dist/runner/profile-android.js +671 -0
- package/dist/runner/profile-ios.d.ts +108 -0
- package/dist/runner/profile-ios.js +532 -0
- package/dist/runner/profile-mobile.d.ts +254 -0
- package/dist/runner/profile-mobile.js +1307 -0
- package/dist/runner/validate-project.d.ts +273 -0
- package/dist/runner/validate-project.js +1501 -0
- package/docs/adapters.md +145 -0
- package/docs/api.md +94 -0
- package/docs/authoring.md +196 -0
- package/docs/concepts.md +136 -0
- package/docs/consumer-rehearsal.md +115 -0
- package/docs/contracts.md +267 -0
- package/docs/live-proofs.md +270 -0
- package/docs/principles.md +46 -0
- package/examples/event-logs/app-startup-baseline.log +4 -0
- package/examples/event-logs/app-startup-current.log +4 -0
- package/examples/minimal-app/README.md +70 -0
- package/examples/mobile-app/README.md +302 -0
- package/examples/mobile-app/app.json +22 -0
- package/examples/mobile-app/asl/package-scripts.json +32 -0
- package/examples/mobile-app/asl.config.json +37 -0
- package/examples/mobile-app/event-logs/android-app-startup.log +4 -0
- package/examples/mobile-app/event-logs/android-open-close-cycle.log +12 -0
- package/examples/mobile-app/event-logs/android-scroll-settle.log +12 -0
- package/examples/mobile-app/event-logs/app-startup.log +4 -0
- package/examples/mobile-app/event-logs/open-close-cycle.log +12 -0
- package/examples/mobile-app/event-logs/scroll-settle.log +12 -0
- package/examples/mobile-app/index.ts +20 -0
- package/examples/mobile-app/metro.config.js +20 -0
- package/examples/mobile-app/package.json +62 -0
- package/examples/mobile-app/patches/expo-modules-jsi@56.0.10.patch +19 -0
- package/examples/mobile-app/plugins/with-ios-build-compat.js +271 -0
- package/examples/mobile-app/pnpm-lock.yaml +4440 -0
- package/examples/mobile-app/runner-manifests/evidence-provider.json +79 -0
- package/examples/mobile-app/runner-manifests/primary-runner.json +19 -0
- package/examples/mobile-app/scenarios/android/app-startup-video.json +73 -0
- package/examples/mobile-app/scenarios/android/app-startup.json +44 -0
- package/examples/mobile-app/scenarios/android/open-close-cycle.json +54 -0
- package/examples/mobile-app/scenarios/android/scroll-settle.json +49 -0
- package/examples/mobile-app/scenarios/ios/app-startup.json +44 -0
- package/examples/mobile-app/scenarios/ios/open-close-cycle.json +54 -0
- package/examples/mobile-app/scenarios/ios/scroll-settle.json +49 -0
- package/examples/mobile-app/scenarios/mobile/app-startup.json +91 -0
- package/examples/mobile-app/scenarios/mobile/open-close-cycle.json +160 -0
- package/examples/mobile-app/scenarios/mobile/scroll-settle.json +148 -0
- package/examples/mobile-app/scripts/asl-capture-accessibility-provider.mjs +112 -0
- package/examples/mobile-app/scripts/asl-capture-profiler-provider.mjs +127 -0
- package/examples/mobile-app/src/devtools/profile-session.ts +7 -0
- package/examples/mobile-app/src/example-screen.tsx +322 -0
- package/examples/mobile-app/tsconfig.json +16 -0
- package/examples/mobile-app/tsconfig.typecheck.json +13 -0
- package/examples/runners/README.md +44 -0
- package/examples/runners/adb-android.json +25 -0
- package/examples/runners/agent-device-android.json +27 -0
- package/examples/runners/agent-device-ios.json +27 -0
- package/examples/runners/argent-android.json +32 -0
- package/examples/runners/argent-ios.json +32 -0
- package/examples/runners/argent-react-profiler-provider.json +15 -0
- package/examples/runners/axe-accessibility-provider.json +24 -0
- package/examples/runners/manual-log-ingest.json +9 -0
- package/examples/runners/rozenite-profiler-provider.json +9 -0
- package/examples/runners/script-accessibility-provider.json +24 -0
- package/examples/runners/script-memory-provider.json +24 -0
- package/examples/runners/script-network-provider.json +24 -0
- package/examples/runners/script-profiler-provider.json +30 -0
- package/examples/runners/xcodebuildmcp-ios.json +29 -0
- package/examples/scenarios/ios/app-startup.json +28 -0
- package/examples/scenarios/ios/open-close-cycle.json +35 -0
- package/examples/scenarios/mobile/app-startup.json +72 -0
- package/examples/scenarios/mobile/media-open-close.json +141 -0
- package/examples/scenarios/mobile/open-close-cycle.json +135 -0
- package/examples/scenarios/mobile/scroll-settle.json +106 -0
- package/package.json +240 -0
- package/schemas/budget-verdict.schema.json +115 -0
- package/schemas/causal-run.schema.json +279 -0
- package/schemas/comparison.schema.json +196 -0
- package/schemas/health.schema.json +108 -0
- package/schemas/live-proof-set.schema.json +195 -0
- package/schemas/live-proof.schema.json +413 -0
- package/schemas/manifest.schema.json +204 -0
- package/schemas/metrics.schema.json +137 -0
- package/schemas/project-validation.schema.json +343 -0
- package/schemas/runner-capabilities.schema.json +217 -0
- package/schemas/scenario.schema.json +400 -0
- package/schemas/verdict.schema.json +88 -0
- package/templates/evidence-provider.json +83 -0
- package/templates/gitignore-snippet +9 -0
- package/templates/integration-readme.md +125 -0
- package/templates/mobile-scenario.json +133 -0
- package/templates/package-scripts.json +32 -0
- package/templates/primary-runner.json +19 -0
- package/templates/project.config.json +37 -0
- package/templates/scripts/asl-capture-accessibility-provider.mjs +112 -0
- package/templates/scripts/asl-capture-profiler-provider.mjs +127 -0
|
@@ -0,0 +1,465 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.buildLiveProofComparisonCounts = buildLiveProofComparisonCounts;
|
|
4
|
+
exports.buildLiveProofComparisonStatus = buildLiveProofComparisonStatus;
|
|
5
|
+
exports.buildLiveProofMarkdown = buildLiveProofMarkdown;
|
|
6
|
+
exports.buildLiveProofNextAction = buildLiveProofNextAction;
|
|
7
|
+
exports.buildLiveProofSummary = buildLiveProofSummary;
|
|
8
|
+
exports.buildLiveProofStatus = buildLiveProofStatus;
|
|
9
|
+
exports.formatComparisonMetricSummary = formatComparisonMetricSummary;
|
|
10
|
+
exports.formatInteractionProofCaptures = formatInteractionProofCaptures;
|
|
11
|
+
exports.formatInteractionProofWarningDetails = formatInteractionProofWarningDetails;
|
|
12
|
+
exports.formatInteractionProofWarnings = formatInteractionProofWarnings;
|
|
13
|
+
exports.isTrustedLiveRunStatus = isTrustedLiveRunStatus;
|
|
14
|
+
exports.readInteractionProofCaptures = readInteractionProofCaptures;
|
|
15
|
+
exports.readInteractionProofWarnings = readInteractionProofWarnings;
|
|
16
|
+
exports.readProfileRunStatus = readProfileRunStatus;
|
|
17
|
+
exports.writeLiveProofSummary = writeLiveProofSummary;
|
|
18
|
+
const fs = require('node:fs');
|
|
19
|
+
const path = require('node:path');
|
|
20
|
+
const { createArtifactLayout } = require('../core/artifact-layout');
|
|
21
|
+
const { writeJsonArtifact, writeTextArtifact } = require('../core/artifact-writer');
|
|
22
|
+
const { SCHEMAS } = require('../core/schema-validator');
|
|
23
|
+
/**
|
|
24
|
+
* Reads the profile run status fields that agents need at the aggregate entrypoint.
|
|
25
|
+
*
|
|
26
|
+
* @param {string} runDir
|
|
27
|
+
* @returns {{healthStatus: string, verdictStatus: string}}
|
|
28
|
+
*/
|
|
29
|
+
function readProfileRunStatus(runDir) {
|
|
30
|
+
const health = JSON.parse(fs.readFileSync(path.join(runDir, 'health.json'), 'utf8'));
|
|
31
|
+
const verdict = JSON.parse(fs.readFileSync(path.join(runDir, 'verdict.json'), 'utf8'));
|
|
32
|
+
return {
|
|
33
|
+
healthStatus: String(health.healthStatus ?? 'unknown'),
|
|
34
|
+
verdictStatus: String(verdict.verdictStatus ?? 'unknown'),
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Reads capture inventory from a sidecar interaction proof when available.
|
|
39
|
+
*
|
|
40
|
+
* @param {string} runDir
|
|
41
|
+
* @returns {LiveProofInteractionProofCaptures | null}
|
|
42
|
+
*/
|
|
43
|
+
function readInteractionProofCaptures(runDir) {
|
|
44
|
+
const metadataPath = [
|
|
45
|
+
path.join(runDir, 'raw', 'agent-device-metadata.json'),
|
|
46
|
+
path.join(runDir, 'raw', 'argent-metadata.json'),
|
|
47
|
+
].find((candidate) => fs.existsSync(candidate));
|
|
48
|
+
if (!metadataPath) {
|
|
49
|
+
return null;
|
|
50
|
+
}
|
|
51
|
+
try {
|
|
52
|
+
const metadata = JSON.parse(fs.readFileSync(metadataPath, 'utf8'));
|
|
53
|
+
const captures = metadata.captures && typeof metadata.captures === 'object' && !Array.isArray(metadata.captures)
|
|
54
|
+
? metadata.captures
|
|
55
|
+
: null;
|
|
56
|
+
const screenshots = Array.isArray(captures?.screenshots)
|
|
57
|
+
? captures.screenshots.filter((value) => typeof value === 'string' && value.length > 0)
|
|
58
|
+
: [];
|
|
59
|
+
return screenshots.length > 0 ? { screenshots } : null;
|
|
60
|
+
}
|
|
61
|
+
catch {
|
|
62
|
+
return null;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Reads warning checks from a sidecar interaction proof health artifact.
|
|
67
|
+
*
|
|
68
|
+
* @param {string} runDir
|
|
69
|
+
* @returns {LiveProofInteractionProofWarnings | null}
|
|
70
|
+
*/
|
|
71
|
+
function readInteractionProofWarnings(runDir) {
|
|
72
|
+
const healthPath = path.join(runDir, 'health.json');
|
|
73
|
+
if (!fs.existsSync(healthPath)) {
|
|
74
|
+
return null;
|
|
75
|
+
}
|
|
76
|
+
try {
|
|
77
|
+
const health = JSON.parse(fs.readFileSync(healthPath, 'utf8'));
|
|
78
|
+
const checks = Array.isArray(health.checks) ? health.checks : [];
|
|
79
|
+
const warnings = checks
|
|
80
|
+
.filter((check) => (check &&
|
|
81
|
+
typeof check === 'object' &&
|
|
82
|
+
!Array.isArray(check) &&
|
|
83
|
+
check.status === 'warning'))
|
|
84
|
+
.map((check) => {
|
|
85
|
+
const metadata = check.metadata && typeof check.metadata === 'object' && !Array.isArray(check.metadata)
|
|
86
|
+
? check.metadata
|
|
87
|
+
: {};
|
|
88
|
+
return {
|
|
89
|
+
code: typeof check.code === 'string' ? check.code : 'warning',
|
|
90
|
+
message: typeof check.message === 'string' ? check.message : 'Interaction proof emitted a warning.',
|
|
91
|
+
name: typeof check.name === 'string' ? check.name : 'interaction_warning',
|
|
92
|
+
...(typeof metadata.nextActionCode === 'string' || typeof metadata.nextAction === 'string'
|
|
93
|
+
? {
|
|
94
|
+
nextAction: {
|
|
95
|
+
code: typeof metadata.nextActionCode === 'string' ? metadata.nextActionCode : 'inspect_interaction_warning',
|
|
96
|
+
summary: typeof metadata.nextAction === 'string' ? metadata.nextAction : 'Inspect the interaction proof warning.',
|
|
97
|
+
},
|
|
98
|
+
}
|
|
99
|
+
: {}),
|
|
100
|
+
};
|
|
101
|
+
});
|
|
102
|
+
return warnings.length > 0 ? { checks: warnings, count: warnings.length } : null;
|
|
103
|
+
}
|
|
104
|
+
catch {
|
|
105
|
+
return null;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Formats aggregate gate counts without hiding which linked proof lane failed.
|
|
110
|
+
*
|
|
111
|
+
* @param {{failed: number, label: string, passed: number}} options
|
|
112
|
+
* @returns {string}
|
|
113
|
+
*/
|
|
114
|
+
function formatGateCountSummary({ failed, label, passed, }) {
|
|
115
|
+
if (failed === 0) {
|
|
116
|
+
return `${passed} passed ${label}`;
|
|
117
|
+
}
|
|
118
|
+
if (passed === 0) {
|
|
119
|
+
return `${failed} failed ${label}`;
|
|
120
|
+
}
|
|
121
|
+
return `${passed} passed and ${failed} failed ${label}`;
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Builds a compact summary sentence for an aggregate live proof.
|
|
125
|
+
*
|
|
126
|
+
* @param {{platform: string, profileCount: number, comparisonCount: number}} options
|
|
127
|
+
* @returns {string}
|
|
128
|
+
*/
|
|
129
|
+
function buildLiveProofSummary({ comparisonCount, comparisonStatus, failedInteractionProofCount = 0, failedProfileCount = 0, interactionProofCount = 0, interactionWarningCount = 0, platform, profileCount, skippedInteractionProofCount = 0, status = 'passed', }) {
|
|
130
|
+
const statusText = status === 'passed' ? 'passed' : 'failed';
|
|
131
|
+
const profileText = formatGateCountSummary({
|
|
132
|
+
failed: failedProfileCount,
|
|
133
|
+
label: 'profile run(s)',
|
|
134
|
+
passed: profileCount - failedProfileCount,
|
|
135
|
+
});
|
|
136
|
+
const comparisonText = comparisonCount > 0
|
|
137
|
+
? `with ${comparisonCount} comparison result(s): ${comparisonStatus}`
|
|
138
|
+
: 'without comparison results';
|
|
139
|
+
const interactionText = interactionProofCount > 0
|
|
140
|
+
? ` and ${formatGateCountSummary({
|
|
141
|
+
failed: failedInteractionProofCount,
|
|
142
|
+
label: 'interaction proof(s)',
|
|
143
|
+
passed: interactionProofCount - failedInteractionProofCount,
|
|
144
|
+
})}`
|
|
145
|
+
: '';
|
|
146
|
+
const skippedText = skippedInteractionProofCount > 0
|
|
147
|
+
? `; skipped ${skippedInteractionProofCount} interaction proof(s)`
|
|
148
|
+
: '';
|
|
149
|
+
const warningText = interactionWarningCount > 0
|
|
150
|
+
? `; ${interactionWarningCount} interaction warning(s)`
|
|
151
|
+
: '';
|
|
152
|
+
return `${platform} live proof ${statusText} with ${profileText}${interactionText} ${comparisonText}${skippedText}${warningText}.`;
|
|
153
|
+
}
|
|
154
|
+
/**
|
|
155
|
+
* Collapses per-scenario comparison results into one batch status.
|
|
156
|
+
*
|
|
157
|
+
* @param {LiveProofComparisonPointer[]} comparisons
|
|
158
|
+
* @returns {LiveProofComparisonStatus}
|
|
159
|
+
*/
|
|
160
|
+
function buildLiveProofComparisonStatus(comparisons) {
|
|
161
|
+
if (comparisons.length === 0) {
|
|
162
|
+
return 'not_compared';
|
|
163
|
+
}
|
|
164
|
+
const statuses = comparisons.map((comparison) => comparison.status);
|
|
165
|
+
if (statuses.includes('worse')) {
|
|
166
|
+
return 'regressed';
|
|
167
|
+
}
|
|
168
|
+
if (statuses.includes('inconclusive')) {
|
|
169
|
+
return 'inconclusive';
|
|
170
|
+
}
|
|
171
|
+
if (statuses.every((status) => status === 'skipped')) {
|
|
172
|
+
return 'baseline_missing';
|
|
173
|
+
}
|
|
174
|
+
if (statuses.includes('skipped')) {
|
|
175
|
+
return 'inconclusive';
|
|
176
|
+
}
|
|
177
|
+
if (statuses.includes('mixed')) {
|
|
178
|
+
return 'mixed';
|
|
179
|
+
}
|
|
180
|
+
if (statuses.includes('better')) {
|
|
181
|
+
return 'improved';
|
|
182
|
+
}
|
|
183
|
+
return 'unchanged';
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* Counts per-scenario comparison outcomes for agent-readable aggregate summaries.
|
|
187
|
+
*
|
|
188
|
+
* @param {LiveProofComparisonPointer[]} comparisons
|
|
189
|
+
* @returns {LiveProofComparisonCounts}
|
|
190
|
+
*/
|
|
191
|
+
function buildLiveProofComparisonCounts(comparisons) {
|
|
192
|
+
const counts = {
|
|
193
|
+
better: 0,
|
|
194
|
+
inconclusive: 0,
|
|
195
|
+
mixed: 0,
|
|
196
|
+
skipped: 0,
|
|
197
|
+
unchanged: 0,
|
|
198
|
+
worse: 0,
|
|
199
|
+
};
|
|
200
|
+
for (const comparison of comparisons) {
|
|
201
|
+
counts[comparison.status] += 1;
|
|
202
|
+
}
|
|
203
|
+
return counts;
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* Builds the next action an agent should take after reading the batch proof.
|
|
207
|
+
*
|
|
208
|
+
* @param {LiveProofComparisonStatus} comparisonStatus
|
|
209
|
+
* @returns {LiveProofNextAction}
|
|
210
|
+
*/
|
|
211
|
+
function buildLiveProofNextAction(comparisonStatus, status = 'passed') {
|
|
212
|
+
if (status === 'failed') {
|
|
213
|
+
return {
|
|
214
|
+
code: 'inspect_failed_run',
|
|
215
|
+
summary: 'One or more live proof gates failed; inspect failed profile or interaction summaries before making optimization claims.',
|
|
216
|
+
};
|
|
217
|
+
}
|
|
218
|
+
if (comparisonStatus === 'regressed') {
|
|
219
|
+
return {
|
|
220
|
+
code: 'inspect_regressions',
|
|
221
|
+
summary: 'One or more scenario comparisons regressed; inspect comparison summaries before claiming improvement.',
|
|
222
|
+
};
|
|
223
|
+
}
|
|
224
|
+
if (comparisonStatus === 'baseline_missing') {
|
|
225
|
+
return {
|
|
226
|
+
code: 'establish_baseline',
|
|
227
|
+
summary: 'No trusted prior run was available; keep this proof as a baseline before making before/after claims.',
|
|
228
|
+
};
|
|
229
|
+
}
|
|
230
|
+
if (comparisonStatus === 'inconclusive') {
|
|
231
|
+
return {
|
|
232
|
+
code: 'inspect_inconclusive',
|
|
233
|
+
summary: 'Some comparisons are inconclusive or incomplete; inspect scenario health and missing baseline details.',
|
|
234
|
+
};
|
|
235
|
+
}
|
|
236
|
+
if (comparisonStatus === 'mixed') {
|
|
237
|
+
return {
|
|
238
|
+
code: 'inspect_mixed',
|
|
239
|
+
summary: 'Some timing metrics improved while others worsened; inspect comparison details before claiming improvement or regression.',
|
|
240
|
+
};
|
|
241
|
+
}
|
|
242
|
+
return {
|
|
243
|
+
code: 'inspect_summary',
|
|
244
|
+
summary: 'Scenario health passed; inspect the live-proof summary and linked evidence before reporting the result.',
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
/**
|
|
248
|
+
* Reports whether a referenced run is healthy enough to trust as proof.
|
|
249
|
+
*
|
|
250
|
+
* @param {{healthStatus?: string, verdictStatus?: string}} status
|
|
251
|
+
* @returns {boolean}
|
|
252
|
+
*/
|
|
253
|
+
function isTrustedLiveRunStatus(status) {
|
|
254
|
+
return status.healthStatus === 'passed' && (status.verdictStatus === 'passed' || status.verdictStatus === 'not_evaluated');
|
|
255
|
+
}
|
|
256
|
+
/**
|
|
257
|
+
* Derives the aggregate live-proof status from the linked evidence pointers.
|
|
258
|
+
*
|
|
259
|
+
* @param {{preflight: {healthStatus?: string, verdictStatus?: string}, profiles: Array<{healthStatus?: string, verdictStatus?: string}>, interactionProofs: Array<{healthStatus?: string, verdictStatus?: string}>, skippedInteractionProofCount?: number}} options
|
|
260
|
+
* @returns {'failed' | 'passed'}
|
|
261
|
+
*/
|
|
262
|
+
function buildLiveProofStatus({ interactionProofs, preflight, profiles, skippedInteractionProofCount = 0, }) {
|
|
263
|
+
if (!isTrustedLiveRunStatus(preflight)) {
|
|
264
|
+
return 'failed';
|
|
265
|
+
}
|
|
266
|
+
if (profiles.some((profile) => profile.healthStatus !== 'passed' || profile.verdictStatus !== 'passed')) {
|
|
267
|
+
return 'failed';
|
|
268
|
+
}
|
|
269
|
+
if (interactionProofs.some((proof) => !isTrustedLiveRunStatus(proof))) {
|
|
270
|
+
return 'failed';
|
|
271
|
+
}
|
|
272
|
+
return skippedInteractionProofCount > 0 ? 'failed' : 'passed';
|
|
273
|
+
}
|
|
274
|
+
/**
|
|
275
|
+
* Formats one comparison metric highlight for markdown.
|
|
276
|
+
*
|
|
277
|
+
* @param {LiveProofComparisonMetricSummary['notableMetrics'][number]} metric
|
|
278
|
+
* @returns {string}
|
|
279
|
+
*/
|
|
280
|
+
function formatComparisonMetricHighlight(metric) {
|
|
281
|
+
const delta = metric.delta === null ? 'n/a' : `${metric.delta}${metric.unit}`;
|
|
282
|
+
return `${metric.name} ${metric.status} (${delta})`;
|
|
283
|
+
}
|
|
284
|
+
/**
|
|
285
|
+
* Formats compact metric counts and highlights for one comparison pointer.
|
|
286
|
+
*
|
|
287
|
+
* @param {LiveProofComparisonPointer} comparison
|
|
288
|
+
* @returns {string}
|
|
289
|
+
*/
|
|
290
|
+
function formatComparisonMetricSummary(comparison) {
|
|
291
|
+
const summary = comparison.metricSummary;
|
|
292
|
+
if (!summary) {
|
|
293
|
+
return '';
|
|
294
|
+
}
|
|
295
|
+
const counts = `metrics better=${summary.counts.better} worse=${summary.counts.worse} unchanged=${summary.counts.unchanged} inconclusive=${summary.counts.inconclusive}`;
|
|
296
|
+
const highlights = summary.notableMetrics.length > 0
|
|
297
|
+
? `; notable: ${summary.notableMetrics.map(formatComparisonMetricHighlight).join(', ')}`
|
|
298
|
+
: '';
|
|
299
|
+
return ` (${counts}${highlights})`;
|
|
300
|
+
}
|
|
301
|
+
/**
|
|
302
|
+
* Formats sidecar capture inventory for aggregate markdown.
|
|
303
|
+
*
|
|
304
|
+
* @param {LiveProofInteractionProofPointer} proof
|
|
305
|
+
* @returns {string}
|
|
306
|
+
*/
|
|
307
|
+
function formatInteractionProofCaptures(proof) {
|
|
308
|
+
const screenshotCount = proof.captures?.screenshots.length ?? 0;
|
|
309
|
+
return screenshotCount > 0 ? ` screenshots=${screenshotCount}` : '';
|
|
310
|
+
}
|
|
311
|
+
/**
|
|
312
|
+
* Formats sidecar warnings for aggregate markdown.
|
|
313
|
+
*
|
|
314
|
+
* @param {LiveProofInteractionProofPointer} proof
|
|
315
|
+
* @returns {string}
|
|
316
|
+
*/
|
|
317
|
+
function formatInteractionProofWarnings(proof) {
|
|
318
|
+
const warningCount = proof.warnings?.count ?? 0;
|
|
319
|
+
return warningCount > 0 ? ` warnings=${warningCount}` : '';
|
|
320
|
+
}
|
|
321
|
+
/**
|
|
322
|
+
* Formats sidecar warning details for aggregate markdown.
|
|
323
|
+
*
|
|
324
|
+
* @param {LiveProofInteractionProofPointer} proof
|
|
325
|
+
* @returns {string[]}
|
|
326
|
+
*/
|
|
327
|
+
function formatInteractionProofWarningDetails(proof) {
|
|
328
|
+
return (proof.warnings?.checks ?? []).map((warning) => {
|
|
329
|
+
const nextAction = warning.nextAction
|
|
330
|
+
? ` Next action: ${warning.nextAction.code} - ${warning.nextAction.summary}`
|
|
331
|
+
: '';
|
|
332
|
+
return ` - warning ${warning.name}: ${warning.code} - ${warning.message}${nextAction}`;
|
|
333
|
+
});
|
|
334
|
+
}
|
|
335
|
+
/**
|
|
336
|
+
* Builds markdown for the aggregate live proof entrypoint.
|
|
337
|
+
*
|
|
338
|
+
* @param {LiveProofArtifact} artifact
|
|
339
|
+
* @returns {string}
|
|
340
|
+
*/
|
|
341
|
+
function buildLiveProofMarkdown(artifact) {
|
|
342
|
+
const lines = [
|
|
343
|
+
`# ${artifact.platform} live proof`,
|
|
344
|
+
'',
|
|
345
|
+
`Status: ${artifact.status}`,
|
|
346
|
+
`Run: ${artifact.runId}`,
|
|
347
|
+
`Comparison status: ${artifact.comparisonStatus}`,
|
|
348
|
+
`Comparison counts: better=${artifact.comparisonCounts.better} worse=${artifact.comparisonCounts.worse} unchanged=${artifact.comparisonCounts.unchanged} mixed=${artifact.comparisonCounts.mixed} inconclusive=${artifact.comparisonCounts.inconclusive} skipped=${artifact.comparisonCounts.skipped}`,
|
|
349
|
+
`Next action: ${artifact.nextAction.code} - ${artifact.nextAction.summary}`,
|
|
350
|
+
`Summary: ${artifact.summary}`,
|
|
351
|
+
'',
|
|
352
|
+
'## Preflight',
|
|
353
|
+
'',
|
|
354
|
+
`- ${artifact.preflight.runId}: ${artifact.preflight.summaryPath}`,
|
|
355
|
+
'',
|
|
356
|
+
'## Profiles',
|
|
357
|
+
'',
|
|
358
|
+
...artifact.profiles.map((profile) => (`- ${profile.label} (${profile.scenarioId}): health=${profile.healthStatus} verdict=${profile.verdictStatus} - ${profile.summaryPath}`)),
|
|
359
|
+
];
|
|
360
|
+
if (artifact.interactionProofs?.length) {
|
|
361
|
+
lines.push('', '## Interaction Proofs', '', ...artifact.interactionProofs.flatMap((proof) => [
|
|
362
|
+
`- ${proof.label} (${proof.runnerId}/${proof.scenarioId}): health=${proof.healthStatus} verdict=${proof.verdictStatus}${formatInteractionProofCaptures(proof)}${formatInteractionProofWarnings(proof)} - ${proof.summaryPath}`,
|
|
363
|
+
...formatInteractionProofWarningDetails(proof),
|
|
364
|
+
]));
|
|
365
|
+
}
|
|
366
|
+
if (artifact.skippedInteractionProofs?.length) {
|
|
367
|
+
lines.push('', '## Skipped Interaction Proofs', '', ...artifact.skippedInteractionProofs.map((proof) => (`- ${proof.label} (${proof.runnerId}/${proof.scenarioId}/${proof.runId}): ${proof.reason} Next action: ${proof.nextAction.code} - ${proof.nextAction.summary}`)));
|
|
368
|
+
}
|
|
369
|
+
if (artifact.comparisons.length > 0) {
|
|
370
|
+
lines.push('', '## Comparisons', '', ...artifact.comparisons.map((comparison) => (comparison.status === 'skipped'
|
|
371
|
+
? `- ${comparison.label} (${comparison.scenarioId}): skipped - ${comparison.reason}`
|
|
372
|
+
: `- ${comparison.label} (${comparison.scenarioId}): ${comparison.status}${formatComparisonMetricSummary(comparison)} - ${comparison.summaryPath}`)));
|
|
373
|
+
}
|
|
374
|
+
return `${lines.join('\n')}\n`;
|
|
375
|
+
}
|
|
376
|
+
/**
|
|
377
|
+
* Writes schema-validated aggregate live proof artifacts.
|
|
378
|
+
*
|
|
379
|
+
* @param {WriteLiveProofSummaryOptions} options
|
|
380
|
+
* @returns {Promise<LiveProofSummaryResult>}
|
|
381
|
+
*/
|
|
382
|
+
async function writeLiveProofSummary({ comparisons, interactionProofs = [], outputDir, platform, preflightDir, preflightRunId, profiles, runId, skippedInteractionProofs = [], }) {
|
|
383
|
+
const liveProofDir = path.join(outputDir, '_live-proof', runId);
|
|
384
|
+
const layout = createArtifactLayout({ outputDir: liveProofDir });
|
|
385
|
+
const comparisonStatus = buildLiveProofComparisonStatus(comparisons);
|
|
386
|
+
const comparisonCounts = buildLiveProofComparisonCounts(comparisons);
|
|
387
|
+
const preflightStatus = readProfileRunStatus(preflightDir);
|
|
388
|
+
const profilePointers = profiles.map((profile) => ({
|
|
389
|
+
...readProfileRunStatus(profile.runDir),
|
|
390
|
+
label: profile.label,
|
|
391
|
+
runDir: profile.runDir,
|
|
392
|
+
runId: profile.runId,
|
|
393
|
+
scenarioId: profile.scenarioId,
|
|
394
|
+
summaryPath: path.join(profile.runDir, 'agent-summary.md'),
|
|
395
|
+
}));
|
|
396
|
+
const interactionProofPointers = interactionProofs.map((proof) => {
|
|
397
|
+
const captures = readInteractionProofCaptures(proof.runDir);
|
|
398
|
+
const warnings = readInteractionProofWarnings(proof.runDir);
|
|
399
|
+
return {
|
|
400
|
+
...readProfileRunStatus(proof.runDir),
|
|
401
|
+
...(captures ? { captures } : {}),
|
|
402
|
+
label: proof.label,
|
|
403
|
+
runDir: proof.runDir,
|
|
404
|
+
runId: proof.runId,
|
|
405
|
+
runnerId: proof.runnerId,
|
|
406
|
+
scenarioId: proof.scenarioId,
|
|
407
|
+
summaryPath: path.join(proof.runDir, 'agent-summary.md'),
|
|
408
|
+
...(warnings ? { warnings } : {}),
|
|
409
|
+
};
|
|
410
|
+
});
|
|
411
|
+
const interactionWarningCount = interactionProofPointers.reduce((sum, proof) => sum + (proof.warnings?.count ?? 0), 0);
|
|
412
|
+
const status = buildLiveProofStatus({
|
|
413
|
+
interactionProofs: interactionProofPointers,
|
|
414
|
+
preflight: preflightStatus,
|
|
415
|
+
profiles: profilePointers,
|
|
416
|
+
skippedInteractionProofCount: skippedInteractionProofs.length,
|
|
417
|
+
});
|
|
418
|
+
const artifact = {
|
|
419
|
+
comparisons,
|
|
420
|
+
comparisonCounts,
|
|
421
|
+
comparisonStatus,
|
|
422
|
+
nextAction: buildLiveProofNextAction(comparisonStatus, status),
|
|
423
|
+
outputDir,
|
|
424
|
+
platform,
|
|
425
|
+
...(interactionProofPointers.length > 0 ? { interactionProofs: interactionProofPointers } : {}),
|
|
426
|
+
...(skippedInteractionProofs.length > 0 ? { skippedInteractionProofs } : {}),
|
|
427
|
+
preflight: {
|
|
428
|
+
...preflightStatus,
|
|
429
|
+
runDir: preflightDir,
|
|
430
|
+
runId: preflightRunId,
|
|
431
|
+
summaryPath: path.join(preflightDir, 'agent-summary.md'),
|
|
432
|
+
},
|
|
433
|
+
profiles: profilePointers,
|
|
434
|
+
runId,
|
|
435
|
+
schemaVersion: '1.0.0',
|
|
436
|
+
status,
|
|
437
|
+
summary: buildLiveProofSummary({
|
|
438
|
+
comparisonCount: comparisons.length,
|
|
439
|
+
comparisonStatus,
|
|
440
|
+
failedInteractionProofCount: interactionProofPointers.filter((proof) => !isTrustedLiveRunStatus(proof)).length,
|
|
441
|
+
failedProfileCount: profilePointers.filter((profile) => profile.healthStatus !== 'passed' || profile.verdictStatus !== 'passed').length,
|
|
442
|
+
interactionProofCount: interactionProofs.length,
|
|
443
|
+
interactionWarningCount,
|
|
444
|
+
platform,
|
|
445
|
+
profileCount: profiles.length,
|
|
446
|
+
skippedInteractionProofCount: skippedInteractionProofs.length,
|
|
447
|
+
status,
|
|
448
|
+
}),
|
|
449
|
+
};
|
|
450
|
+
await writeJsonArtifact({
|
|
451
|
+
filePath: layout.liveProof,
|
|
452
|
+
value: artifact,
|
|
453
|
+
schema: SCHEMAS.liveProof,
|
|
454
|
+
label: 'Live proof artifact',
|
|
455
|
+
});
|
|
456
|
+
await writeTextArtifact({
|
|
457
|
+
filePath: layout.agentSummary,
|
|
458
|
+
content: buildLiveProofMarkdown(artifact),
|
|
459
|
+
});
|
|
460
|
+
return {
|
|
461
|
+
liveProofDir,
|
|
462
|
+
liveProofPath: layout.liveProof,
|
|
463
|
+
summaryPath: layout.agentSummary,
|
|
464
|
+
};
|
|
465
|
+
}
|