agent-scenario-loop 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +119 -0
  3. package/app/profile-session.ts +812 -0
  4. package/core/config-template.json +41 -0
  5. package/dist/core/agent-summary.d.ts +15 -0
  6. package/dist/core/agent-summary.js +177 -0
  7. package/dist/core/artifact-contract.d.ts +151 -0
  8. package/dist/core/artifact-contract.js +897 -0
  9. package/dist/core/artifact-layout.d.ts +56 -0
  10. package/dist/core/artifact-layout.js +61 -0
  11. package/dist/core/artifact-writer.d.ts +44 -0
  12. package/dist/core/artifact-writer.js +55 -0
  13. package/dist/core/comparison.d.ts +133 -0
  14. package/dist/core/comparison.js +294 -0
  15. package/dist/core/evidence-interpreter.d.ts +28 -0
  16. package/dist/core/evidence-interpreter.js +69 -0
  17. package/dist/core/execution-plan.d.ts +44 -0
  18. package/dist/core/execution-plan.js +95 -0
  19. package/dist/core/planner.d.ts +132 -0
  20. package/dist/core/planner.js +812 -0
  21. package/dist/core/ports.d.ts +198 -0
  22. package/dist/core/ports.js +146 -0
  23. package/dist/core/run-index.d.ts +62 -0
  24. package/dist/core/run-index.js +143 -0
  25. package/dist/core/schema-validator.d.ts +86 -0
  26. package/dist/core/schema-validator.js +407 -0
  27. package/dist/index.d.ts +11 -0
  28. package/dist/index.js +27 -0
  29. package/dist/runner/agent-device-driver.d.ts +126 -0
  30. package/dist/runner/agent-device-driver.js +168 -0
  31. package/dist/runner/agent-device.d.ts +295 -0
  32. package/dist/runner/agent-device.js +1271 -0
  33. package/dist/runner/android-adb-driver.d.ts +175 -0
  34. package/dist/runner/android-adb-driver.js +399 -0
  35. package/dist/runner/android-adb.d.ts +254 -0
  36. package/dist/runner/android-adb.js +1618 -0
  37. package/dist/runner/argent-driver.d.ts +183 -0
  38. package/dist/runner/argent-driver.js +297 -0
  39. package/dist/runner/argent.d.ts +349 -0
  40. package/dist/runner/argent.js +1211 -0
  41. package/dist/runner/check-plan.d.ts +45 -0
  42. package/dist/runner/check-plan.js +210 -0
  43. package/dist/runner/cli.d.ts +20 -0
  44. package/dist/runner/cli.js +23 -0
  45. package/dist/runner/compare-latest.d.ts +99 -0
  46. package/dist/runner/compare-latest.js +233 -0
  47. package/dist/runner/compare.d.ts +58 -0
  48. package/dist/runner/compare.js +157 -0
  49. package/dist/runner/demo-loop.d.ts +45 -0
  50. package/dist/runner/demo-loop.js +170 -0
  51. package/dist/runner/example-android-live.d.ts +137 -0
  52. package/dist/runner/example-android-live.js +454 -0
  53. package/dist/runner/example-ios-live.d.ts +137 -0
  54. package/dist/runner/example-ios-live.js +471 -0
  55. package/dist/runner/host-doctor.d.ts +131 -0
  56. package/dist/runner/host-doctor.js +628 -0
  57. package/dist/runner/init-project.d.ts +88 -0
  58. package/dist/runner/init-project.js +263 -0
  59. package/dist/runner/ios-simctl-driver.d.ts +69 -0
  60. package/dist/runner/ios-simctl-driver.js +97 -0
  61. package/dist/runner/ios-simctl.d.ts +254 -0
  62. package/dist/runner/ios-simctl.js +1415 -0
  63. package/dist/runner/live-android.d.ts +137 -0
  64. package/dist/runner/live-android.js +539 -0
  65. package/dist/runner/live-comparison.d.ts +67 -0
  66. package/dist/runner/live-comparison.js +147 -0
  67. package/dist/runner/live-ios.d.ts +137 -0
  68. package/dist/runner/live-ios.js +460 -0
  69. package/dist/runner/live-proof-summary.d.ts +263 -0
  70. package/dist/runner/live-proof-summary.js +465 -0
  71. package/dist/runner/live-proof.d.ts +467 -0
  72. package/dist/runner/live-proof.js +920 -0
  73. package/dist/runner/local-env.d.ts +64 -0
  74. package/dist/runner/local-env.js +155 -0
  75. package/dist/runner/profile-android.d.ts +82 -0
  76. package/dist/runner/profile-android.js +671 -0
  77. package/dist/runner/profile-ios.d.ts +108 -0
  78. package/dist/runner/profile-ios.js +532 -0
  79. package/dist/runner/profile-mobile.d.ts +254 -0
  80. package/dist/runner/profile-mobile.js +1307 -0
  81. package/dist/runner/validate-project.d.ts +273 -0
  82. package/dist/runner/validate-project.js +1501 -0
  83. package/docs/adapters.md +145 -0
  84. package/docs/api.md +94 -0
  85. package/docs/authoring.md +196 -0
  86. package/docs/concepts.md +136 -0
  87. package/docs/consumer-rehearsal.md +115 -0
  88. package/docs/contracts.md +267 -0
  89. package/docs/live-proofs.md +270 -0
  90. package/docs/principles.md +46 -0
  91. package/examples/event-logs/app-startup-baseline.log +4 -0
  92. package/examples/event-logs/app-startup-current.log +4 -0
  93. package/examples/minimal-app/README.md +70 -0
  94. package/examples/mobile-app/README.md +302 -0
  95. package/examples/mobile-app/app.json +22 -0
  96. package/examples/mobile-app/asl/package-scripts.json +32 -0
  97. package/examples/mobile-app/asl.config.json +37 -0
  98. package/examples/mobile-app/event-logs/android-app-startup.log +4 -0
  99. package/examples/mobile-app/event-logs/android-open-close-cycle.log +12 -0
  100. package/examples/mobile-app/event-logs/android-scroll-settle.log +12 -0
  101. package/examples/mobile-app/event-logs/app-startup.log +4 -0
  102. package/examples/mobile-app/event-logs/open-close-cycle.log +12 -0
  103. package/examples/mobile-app/event-logs/scroll-settle.log +12 -0
  104. package/examples/mobile-app/index.ts +20 -0
  105. package/examples/mobile-app/metro.config.js +20 -0
  106. package/examples/mobile-app/package.json +62 -0
  107. package/examples/mobile-app/patches/expo-modules-jsi@56.0.10.patch +19 -0
  108. package/examples/mobile-app/plugins/with-ios-build-compat.js +271 -0
  109. package/examples/mobile-app/pnpm-lock.yaml +4440 -0
  110. package/examples/mobile-app/runner-manifests/evidence-provider.json +79 -0
  111. package/examples/mobile-app/runner-manifests/primary-runner.json +19 -0
  112. package/examples/mobile-app/scenarios/android/app-startup-video.json +73 -0
  113. package/examples/mobile-app/scenarios/android/app-startup.json +44 -0
  114. package/examples/mobile-app/scenarios/android/open-close-cycle.json +54 -0
  115. package/examples/mobile-app/scenarios/android/scroll-settle.json +49 -0
  116. package/examples/mobile-app/scenarios/ios/app-startup.json +44 -0
  117. package/examples/mobile-app/scenarios/ios/open-close-cycle.json +54 -0
  118. package/examples/mobile-app/scenarios/ios/scroll-settle.json +49 -0
  119. package/examples/mobile-app/scenarios/mobile/app-startup.json +91 -0
  120. package/examples/mobile-app/scenarios/mobile/open-close-cycle.json +160 -0
  121. package/examples/mobile-app/scenarios/mobile/scroll-settle.json +148 -0
  122. package/examples/mobile-app/scripts/asl-capture-accessibility-provider.mjs +112 -0
  123. package/examples/mobile-app/scripts/asl-capture-profiler-provider.mjs +127 -0
  124. package/examples/mobile-app/src/devtools/profile-session.ts +7 -0
  125. package/examples/mobile-app/src/example-screen.tsx +322 -0
  126. package/examples/mobile-app/tsconfig.json +16 -0
  127. package/examples/mobile-app/tsconfig.typecheck.json +13 -0
  128. package/examples/runners/README.md +44 -0
  129. package/examples/runners/adb-android.json +25 -0
  130. package/examples/runners/agent-device-android.json +27 -0
  131. package/examples/runners/agent-device-ios.json +27 -0
  132. package/examples/runners/argent-android.json +32 -0
  133. package/examples/runners/argent-ios.json +32 -0
  134. package/examples/runners/argent-react-profiler-provider.json +15 -0
  135. package/examples/runners/axe-accessibility-provider.json +24 -0
  136. package/examples/runners/manual-log-ingest.json +9 -0
  137. package/examples/runners/rozenite-profiler-provider.json +9 -0
  138. package/examples/runners/script-accessibility-provider.json +24 -0
  139. package/examples/runners/script-memory-provider.json +24 -0
  140. package/examples/runners/script-network-provider.json +24 -0
  141. package/examples/runners/script-profiler-provider.json +30 -0
  142. package/examples/runners/xcodebuildmcp-ios.json +29 -0
  143. package/examples/scenarios/ios/app-startup.json +28 -0
  144. package/examples/scenarios/ios/open-close-cycle.json +35 -0
  145. package/examples/scenarios/mobile/app-startup.json +72 -0
  146. package/examples/scenarios/mobile/media-open-close.json +141 -0
  147. package/examples/scenarios/mobile/open-close-cycle.json +135 -0
  148. package/examples/scenarios/mobile/scroll-settle.json +106 -0
  149. package/package.json +240 -0
  150. package/schemas/budget-verdict.schema.json +115 -0
  151. package/schemas/causal-run.schema.json +279 -0
  152. package/schemas/comparison.schema.json +196 -0
  153. package/schemas/health.schema.json +108 -0
  154. package/schemas/live-proof-set.schema.json +195 -0
  155. package/schemas/live-proof.schema.json +413 -0
  156. package/schemas/manifest.schema.json +204 -0
  157. package/schemas/metrics.schema.json +137 -0
  158. package/schemas/project-validation.schema.json +343 -0
  159. package/schemas/runner-capabilities.schema.json +217 -0
  160. package/schemas/scenario.schema.json +400 -0
  161. package/schemas/verdict.schema.json +88 -0
  162. package/templates/evidence-provider.json +83 -0
  163. package/templates/gitignore-snippet +9 -0
  164. package/templates/integration-readme.md +125 -0
  165. package/templates/mobile-scenario.json +133 -0
  166. package/templates/package-scripts.json +32 -0
  167. package/templates/primary-runner.json +19 -0
  168. package/templates/project.config.json +37 -0
  169. package/templates/scripts/asl-capture-accessibility-provider.mjs +112 -0
  170. package/templates/scripts/asl-capture-profiler-provider.mjs +127 -0
@@ -0,0 +1,45 @@
1
+ #!/usr/bin/env node
2
+ type CliArgs = {
3
+ providers: string[];
4
+ scenario?: string | boolean;
5
+ runner?: string | boolean;
6
+ platform?: string | boolean;
7
+ out?: string | boolean;
8
+ 'run-id'?: string | boolean;
9
+ [key: string]: string | boolean | string[] | undefined;
10
+ };
11
+ type PlanArtifacts = {
12
+ compatibility: Record<string, unknown>;
13
+ health: Record<string, unknown>;
14
+ verdict: Record<string, unknown>;
15
+ agentSummary: string;
16
+ };
17
+ /**
18
+ * Prints CLI usage to stderr.
19
+ *
20
+ * @returns {void}
21
+ */
22
+ declare function usage(output?: {
23
+ write: (message: string) => unknown;
24
+ }): void;
25
+ /**
26
+ * Parses the small flag surface for the plan-check CLI.
27
+ *
28
+ * @param {string[]} argv
29
+ * @returns {{providers: string[], [key: string]: string | boolean | string[]}}
30
+ */
31
+ declare function parseArgs(argv: string[]): CliArgs;
32
+ /**
33
+ * Builds pre-execution planner artifacts from validated scenario and runner manifests.
34
+ *
35
+ * @param {{scenarioPath: string, runnerPath: string, providerPaths?: string[], platform?: string | null, runId?: string}} options
36
+ * @returns {Promise<{compatibility: Record<string, unknown>, health: Record<string, unknown>, verdict: Record<string, unknown>, agentSummary: string}>}
37
+ */
38
+ declare function buildPlanArtifacts({ scenarioPath, runnerPath, providerPaths, platform, runId, }: {
39
+ scenarioPath: string;
40
+ runnerPath: string;
41
+ providerPaths?: string[];
42
+ platform?: string | null;
43
+ runId?: string;
44
+ }): Promise<PlanArtifacts>;
45
+ export { buildPlanArtifacts, parseArgs, usage, };
@@ -0,0 +1,210 @@
1
+ #!/usr/bin/env node
2
+ "use strict";
3
+ Object.defineProperty(exports, "__esModule", { value: true });
4
+ exports.buildPlanArtifacts = buildPlanArtifacts;
5
+ exports.parseArgs = parseArgs;
6
+ exports.usage = usage;
7
+ const fs = require('node:fs');
8
+ const path = require('node:path');
9
+ const crypto = require('node:crypto');
10
+ const { buildAgentSummaryMarkdown } = require('../core/agent-summary');
11
+ const { createArtifactLayout } = require('../core/artifact-layout');
12
+ const { writeJsonArtifact, writeTextArtifact } = require('../core/artifact-writer');
13
+ const { buildCompatibilityHealth, buildUnevaluatedVerdict, evaluateRunnerCompatibility, } = require('../core/planner');
14
+ const { SCHEMAS, SchemaValidationError, assertValidJson, } = require('../core/schema-validator');
15
+ const { hasHelpFlag, writeUsage } = require('./cli');
16
+ /**
17
+ * Prints CLI usage to stderr.
18
+ *
19
+ * @returns {void}
20
+ */
21
+ function usage(output = process.stderr) {
22
+ writeUsage([
23
+ 'Usage: agent-scenario-loop --scenario <path> --runner <path> [--provider <path> ...] [--platform <ios|android>] [--run-id <id>] [--out <dir>]',
24
+ '',
25
+ 'Aliases: asl-check-plan',
26
+ 'Writes health.json and verdict.json to --out when provided.',
27
+ 'Without --out, prints the planned artifacts as JSON.',
28
+ ], output);
29
+ }
30
+ /**
31
+ * Parses the small flag surface for the plan-check CLI.
32
+ *
33
+ * @param {string[]} argv
34
+ * @returns {{providers: string[], [key: string]: string | boolean | string[]}}
35
+ */
36
+ function parseArgs(argv) {
37
+ const args = {
38
+ providers: [],
39
+ };
40
+ for (let index = 0; index < argv.length; index += 1) {
41
+ const token = argv[index];
42
+ if (!token) {
43
+ continue;
44
+ }
45
+ if (!token.startsWith('--')) {
46
+ continue;
47
+ }
48
+ const key = token.slice(2);
49
+ if (key === 'provider') {
50
+ const value = argv[index + 1];
51
+ if (value && !value.startsWith('--')) {
52
+ args.providers.push(value);
53
+ index += 1;
54
+ }
55
+ continue;
56
+ }
57
+ const value = argv[index + 1];
58
+ if (value && !value.startsWith('--')) {
59
+ args[key] = value;
60
+ index += 1;
61
+ }
62
+ else {
63
+ args[key] = true;
64
+ }
65
+ }
66
+ return args;
67
+ }
68
+ /**
69
+ * Reads a JSON file and reports parse failures with the manifest label.
70
+ *
71
+ * @param {string} filePath
72
+ * @param {string} [label]
73
+ * @returns {unknown}
74
+ */
75
+ function readJson(filePath, label) {
76
+ try {
77
+ return JSON.parse(fs.readFileSync(filePath, 'utf8'));
78
+ }
79
+ catch (error) {
80
+ const prefix = label ? `${label} ` : '';
81
+ const message = error instanceof Error ? error.message : String(error);
82
+ throw new Error(`${prefix}could not be parsed as JSON: ${filePath}\n${message}`);
83
+ }
84
+ }
85
+ /**
86
+ * Reads a JSON file and validates it against a public contract schema.
87
+ *
88
+ * @param {string} filePath
89
+ * @param {Record<string, unknown>} schema
90
+ * @param {string} label
91
+ * @returns {unknown}
92
+ */
93
+ function readValidatedJson(filePath, schema, label) {
94
+ return assertValidJson(readJson(filePath, label), schema, label);
95
+ }
96
+ /**
97
+ * Creates a short random run id for ad-hoc plan checks.
98
+ *
99
+ * @returns {string}
100
+ */
101
+ function createRunId() {
102
+ return crypto.randomBytes(6).toString('hex');
103
+ }
104
+ /**
105
+ * Builds pre-execution planner artifacts from validated scenario and runner manifests.
106
+ *
107
+ * @param {{scenarioPath: string, runnerPath: string, providerPaths?: string[], platform?: string | null, runId?: string}} options
108
+ * @returns {Promise<{compatibility: Record<string, unknown>, health: Record<string, unknown>, verdict: Record<string, unknown>, agentSummary: string}>}
109
+ */
110
+ async function buildPlanArtifacts({ scenarioPath, runnerPath, providerPaths = [], platform = null, runId = createRunId(), }) {
111
+ const scenario = readValidatedJson(path.resolve(scenarioPath), SCHEMAS.scenario, 'Scenario manifest');
112
+ const runner = readValidatedJson(path.resolve(runnerPath), SCHEMAS.runnerCapabilities, 'Runner capability manifest');
113
+ const evidenceProviders = providerPaths.map((providerPath, index) => readValidatedJson(path.resolve(providerPath), SCHEMAS.runnerCapabilities, `Evidence provider manifest ${index + 1}`));
114
+ const compatibility = evaluateRunnerCompatibility({
115
+ scenario,
116
+ runner,
117
+ evidenceProviders,
118
+ platform,
119
+ });
120
+ const health = buildCompatibilityHealth({
121
+ scenario,
122
+ runId,
123
+ compatibility,
124
+ });
125
+ const verdict = buildUnevaluatedVerdict({
126
+ scenario,
127
+ runId,
128
+ health,
129
+ });
130
+ const agentSummary = buildAgentSummaryMarkdown({
131
+ health,
132
+ verdict,
133
+ });
134
+ assertValidJson(health, SCHEMAS.health, 'Health artifact');
135
+ assertValidJson(verdict, SCHEMAS.verdict, 'Verdict artifact');
136
+ return {
137
+ agentSummary,
138
+ compatibility,
139
+ health,
140
+ verdict,
141
+ };
142
+ }
143
+ /**
144
+ * Runs the check-plan CLI.
145
+ *
146
+ * @returns {Promise<void>}
147
+ */
148
+ async function main() {
149
+ const argv = process.argv.slice(2);
150
+ if (hasHelpFlag(argv)) {
151
+ usage(process.stdout);
152
+ return;
153
+ }
154
+ const args = parseArgs(argv);
155
+ if (typeof args.scenario !== 'string' || typeof args.runner !== 'string') {
156
+ usage();
157
+ process.exitCode = 1;
158
+ return;
159
+ }
160
+ const artifacts = await buildPlanArtifacts({
161
+ scenarioPath: args.scenario,
162
+ runnerPath: args.runner,
163
+ providerPaths: args.providers,
164
+ platform: typeof args.platform === 'string' ? args.platform : null,
165
+ ...(typeof args['run-id'] === 'string' ? { runId: args['run-id'] } : {}),
166
+ });
167
+ if (typeof args.out === 'string' && args.out.length > 0) {
168
+ const outputDir = path.resolve(args.out);
169
+ const layout = createArtifactLayout({ outputDir });
170
+ await writeJsonArtifact({
171
+ filePath: layout.health,
172
+ value: artifacts.health,
173
+ schema: SCHEMAS.health,
174
+ label: 'Health artifact',
175
+ });
176
+ await writeJsonArtifact({
177
+ filePath: layout.verdict,
178
+ value: artifacts.verdict,
179
+ schema: SCHEMAS.verdict,
180
+ label: 'Verdict artifact',
181
+ });
182
+ await writeJsonArtifact({
183
+ filePath: layout.plannerCompatibility,
184
+ value: artifacts.compatibility,
185
+ schema: {
186
+ type: 'object',
187
+ additionalProperties: true,
188
+ },
189
+ label: 'Planner compatibility artifact',
190
+ });
191
+ await writeTextArtifact({
192
+ filePath: layout.agentSummary,
193
+ content: artifacts.agentSummary,
194
+ });
195
+ process.stdout.write(`${outputDir}\n`);
196
+ return;
197
+ }
198
+ process.stdout.write(`${JSON.stringify(artifacts, null, 2)}\n`);
199
+ }
200
+ if (require.main === module) {
201
+ main().catch((error) => {
202
+ if (error instanceof SchemaValidationError) {
203
+ console.error(error.message);
204
+ }
205
+ else {
206
+ console.error(error instanceof Error ? error.message : String(error));
207
+ }
208
+ process.exitCode = 1;
209
+ });
210
+ }
@@ -0,0 +1,20 @@
1
+ type UsageOutput = {
2
+ write: (message: string) => unknown;
3
+ };
4
+ /**
5
+ * Returns true when argv asks for CLI help.
6
+ *
7
+ * @param {string[]} argv
8
+ * @returns {boolean}
9
+ */
10
+ declare function hasHelpFlag(argv: string[]): boolean;
11
+ /**
12
+ * Writes newline-terminated usage text to the chosen output stream.
13
+ *
14
+ * @param {string[]} lines
15
+ * @param {UsageOutput} [output]
16
+ * @returns {void}
17
+ */
18
+ declare function writeUsage(lines: string[], output?: UsageOutput): void;
19
+ export { hasHelpFlag, writeUsage, };
20
+ export type { UsageOutput, };
@@ -0,0 +1,23 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.hasHelpFlag = hasHelpFlag;
4
+ exports.writeUsage = writeUsage;
5
+ /**
6
+ * Returns true when argv asks for CLI help.
7
+ *
8
+ * @param {string[]} argv
9
+ * @returns {boolean}
10
+ */
11
+ function hasHelpFlag(argv) {
12
+ return argv.includes('--help') || argv.includes('-h');
13
+ }
14
+ /**
15
+ * Writes newline-terminated usage text to the chosen output stream.
16
+ *
17
+ * @param {string[]} lines
18
+ * @param {UsageOutput} [output]
19
+ * @returns {void}
20
+ */
21
+ function writeUsage(lines, output = process.stderr) {
22
+ output.write(`${lines.join('\n')}\n`);
23
+ }
@@ -0,0 +1,99 @@
1
+ #!/usr/bin/env node
2
+ import type { RunIndex, RunIndexEntry } from '../core/run-index';
3
+ type CompareLatestOptions = {
4
+ comparisonLane?: string;
5
+ currentDir: string;
6
+ rootDir: string;
7
+ scenarioId: string;
8
+ };
9
+ type CompareLatestResult = {
10
+ baselineDir: string;
11
+ comparison: Record<string, unknown>;
12
+ currentDir: string;
13
+ };
14
+ type LatestTrustedSelection = {
15
+ artifactRoot: string;
16
+ candidatesInspected: number;
17
+ scenarioId: string;
18
+ selectedRunDir: string;
19
+ selectedRunId: string;
20
+ skippedCurrentRun: boolean;
21
+ comparisonLane?: string;
22
+ scenarioHash?: string;
23
+ trustedCandidates: number;
24
+ trustedComparableCandidates?: number;
25
+ trustedScenarioContractCandidates?: number;
26
+ trustedPriorCandidates: number;
27
+ };
28
+ /**
29
+ * Prints CLI usage to stderr.
30
+ *
31
+ * @returns {void}
32
+ */
33
+ declare function usage(output?: {
34
+ write: (message: string) => unknown;
35
+ }): void;
36
+ /**
37
+ * Validates that the current run can be compared against trusted historical evidence.
38
+ *
39
+ * @param {{currentDir: string, scenarioId: string}} options
40
+ * @returns {{health: Record<string, unknown>, verdict: Record<string, unknown>}}
41
+ */
42
+ declare function assertComparableCurrentRun({ currentDir, scenarioId, }: {
43
+ currentDir: string;
44
+ scenarioId: string;
45
+ }): {
46
+ health: Record<string, unknown>;
47
+ verdict: Record<string, unknown>;
48
+ };
49
+ /**
50
+ * Returns whether a historical run used the same scenario contract as the current run.
51
+ * Runs without a current scenario hash keep legacy behavior for old artifacts.
52
+ *
53
+ * @param {RunIndexEntry} entry
54
+ * @param {string | undefined} scenarioHash
55
+ * @returns {boolean}
56
+ */
57
+ declare function isComparableScenarioContract(entry: RunIndexEntry, scenarioHash: string | undefined): boolean;
58
+ /**
59
+ * Finds the newest trusted run for a scenario while excluding the current run directory.
60
+ *
61
+ * @param {{index: RunIndex, scenarioId: string, currentDir: string, comparisonLane?: string, scenarioHash?: string}} options
62
+ * @returns {RunIndexEntry | null}
63
+ */
64
+ declare function findLatestTrustedPriorRun({ comparisonLane, index, scenarioHash, scenarioId, currentDir, }: {
65
+ comparisonLane?: string;
66
+ index: RunIndex;
67
+ scenarioHash?: string;
68
+ scenarioId: string;
69
+ currentDir: string;
70
+ }): RunIndexEntry | null;
71
+ /**
72
+ * Builds stable provenance for the latest-trusted baseline selection.
73
+ *
74
+ * @param {{baseline: RunIndexEntry, comparisonLane?: string, currentDir: string, index: RunIndex, rootDir: string, scenarioHash?: string, scenarioId: string}} options
75
+ * @returns {LatestTrustedSelection}
76
+ */
77
+ declare function buildLatestTrustedSelection({ baseline, comparisonLane, currentDir, index, rootDir, scenarioHash, scenarioId, }: {
78
+ baseline: RunIndexEntry;
79
+ comparisonLane?: string;
80
+ currentDir: string;
81
+ index: RunIndex;
82
+ rootDir: string;
83
+ scenarioHash?: string;
84
+ scenarioId: string;
85
+ }): LatestTrustedSelection;
86
+ /**
87
+ * Builds a comparison against the latest trusted prior run in an artifact root.
88
+ *
89
+ * @param {CompareLatestOptions} options
90
+ * @returns {CompareLatestResult}
91
+ */
92
+ declare function compareLatestTrustedRun({ comparisonLane, currentDir, rootDir, scenarioId, }: CompareLatestOptions): CompareLatestResult;
93
+ /**
94
+ * Runs the compare-latest CLI.
95
+ *
96
+ * @returns {Promise<void>}
97
+ */
98
+ declare function main(): Promise<void>;
99
+ export { assertComparableCurrentRun, buildLatestTrustedSelection, compareLatestTrustedRun, findLatestTrustedPriorRun, isComparableScenarioContract, main, usage, };
@@ -0,0 +1,233 @@
1
+ #!/usr/bin/env node
2
+ "use strict";
3
+ Object.defineProperty(exports, "__esModule", { value: true });
4
+ exports.assertComparableCurrentRun = assertComparableCurrentRun;
5
+ exports.buildLatestTrustedSelection = buildLatestTrustedSelection;
6
+ exports.compareLatestTrustedRun = compareLatestTrustedRun;
7
+ exports.findLatestTrustedPriorRun = findLatestTrustedPriorRun;
8
+ exports.isComparableScenarioContract = isComparableScenarioContract;
9
+ exports.main = main;
10
+ exports.usage = usage;
11
+ const path = require('node:path');
12
+ const { buildAgentSummaryMarkdown } = require('../core/agent-summary');
13
+ const { compareRunDirectories, readRunArtifacts } = require('../core/comparison');
14
+ const { buildRunIndex, readRunIndexEntry } = require('../core/run-index');
15
+ const { writeJsonArtifact, writeTextArtifact } = require('../core/artifact-writer');
16
+ const { SCHEMAS } = require('../core/schema-validator');
17
+ const { hasHelpFlag, writeUsage } = require('./cli');
18
+ const { assertNoRegressedComparison, isEnabledFlag, parseArgs, resolveOutput } = require('./compare');
19
+ /**
20
+ * Prints CLI usage to stderr.
21
+ *
22
+ * @returns {void}
23
+ */
24
+ function usage(output = process.stderr) {
25
+ writeUsage([
26
+ 'Usage: asl-compare-latest --root <artifact-root> --scenario <id> --current <run-dir> [--comparison-lane <id>] [--out <comparison.json|run-dir>] [--fail-on-regression]',
27
+ '',
28
+ 'Finds the latest trusted prior run for the scenario, then compares it with the current run.',
29
+ 'A trusted prior run must have passed health and passed verdict artifacts.',
30
+ 'The current run must pass scenario health before timing or budget comparison is allowed.',
31
+ 'Use --fail-on-regression to exit nonzero after writing evidence when comparisonStatus is worse.',
32
+ ], output);
33
+ }
34
+ /**
35
+ * Returns the scenario id recorded by a run artifact set.
36
+ *
37
+ * @param {{health: Record<string, unknown>, verdict: Record<string, unknown>}} artifacts
38
+ * @returns {string}
39
+ */
40
+ function readScenarioId(artifacts) {
41
+ return String(artifacts.health.scenarioId ?? artifacts.verdict.scenarioId ?? 'unknown-scenario');
42
+ }
43
+ /**
44
+ * Validates that the current run can be compared against trusted historical evidence.
45
+ *
46
+ * @param {{currentDir: string, scenarioId: string}} options
47
+ * @returns {{health: Record<string, unknown>, verdict: Record<string, unknown>}}
48
+ */
49
+ function assertComparableCurrentRun({ currentDir, scenarioId, }) {
50
+ const current = readRunArtifacts(currentDir);
51
+ if (current.health.healthStatus !== 'passed') {
52
+ throw new Error(`Current run health did not pass: ${currentDir}`);
53
+ }
54
+ const currentScenarioId = readScenarioId(current);
55
+ if (currentScenarioId !== scenarioId) {
56
+ throw new Error(`Current run scenario '${currentScenarioId}' does not match requested scenario '${scenarioId}'.`);
57
+ }
58
+ return current;
59
+ }
60
+ /**
61
+ * Returns whether a historical run belongs to the requested comparison lane.
62
+ * Runs without an explicit lane are compared only with other unlabeled runs.
63
+ *
64
+ * @param {RunIndexEntry} entry
65
+ * @param {string | undefined} comparisonLane
66
+ * @returns {boolean}
67
+ */
68
+ function isComparableLane(entry, comparisonLane) {
69
+ return comparisonLane ? entry.comparisonLane === comparisonLane : entry.comparisonLane === undefined;
70
+ }
71
+ /**
72
+ * Returns whether a historical run used the same scenario contract as the current run.
73
+ * Runs without a current scenario hash keep legacy behavior for old artifacts.
74
+ *
75
+ * @param {RunIndexEntry} entry
76
+ * @param {string | undefined} scenarioHash
77
+ * @returns {boolean}
78
+ */
79
+ function isComparableScenarioContract(entry, scenarioHash) {
80
+ return scenarioHash ? entry.scenarioHash === scenarioHash : true;
81
+ }
82
+ /**
83
+ * Finds the newest trusted run for a scenario while excluding the current run directory.
84
+ *
85
+ * @param {{index: RunIndex, scenarioId: string, currentDir: string, comparisonLane?: string, scenarioHash?: string}} options
86
+ * @returns {RunIndexEntry | null}
87
+ */
88
+ function findLatestTrustedPriorRun({ comparisonLane, index, scenarioHash, scenarioId, currentDir, }) {
89
+ const resolvedCurrentDir = path.resolve(currentDir);
90
+ return index.trusted.find((entry) => (entry.scenarioId === scenarioId &&
91
+ isComparableLane(entry, comparisonLane) &&
92
+ isComparableScenarioContract(entry, scenarioHash) &&
93
+ path.resolve(entry.runDir) !== resolvedCurrentDir)) ?? null;
94
+ }
95
+ /**
96
+ * Builds stable provenance for the latest-trusted baseline selection.
97
+ *
98
+ * @param {{baseline: RunIndexEntry, comparisonLane?: string, currentDir: string, index: RunIndex, rootDir: string, scenarioHash?: string, scenarioId: string}} options
99
+ * @returns {LatestTrustedSelection}
100
+ */
101
+ function buildLatestTrustedSelection({ baseline, comparisonLane, currentDir, index, rootDir, scenarioHash, scenarioId, }) {
102
+ const resolvedCurrentDir = path.resolve(currentDir);
103
+ const trustedPriorCandidates = index.trusted.filter((entry) => (entry.scenarioId === scenarioId &&
104
+ path.resolve(entry.runDir) !== resolvedCurrentDir));
105
+ const trustedComparableCandidates = trustedPriorCandidates.filter((entry) => (isComparableLane(entry, comparisonLane)));
106
+ const trustedScenarioContractCandidates = trustedComparableCandidates.filter((entry) => (isComparableScenarioContract(entry, scenarioHash)));
107
+ return {
108
+ artifactRoot: rootDir,
109
+ candidatesInspected: index.entries.length,
110
+ scenarioId,
111
+ selectedRunDir: baseline.runDir,
112
+ selectedRunId: baseline.runId,
113
+ skippedCurrentRun: index.entries.some((entry) => path.resolve(entry.runDir) === resolvedCurrentDir),
114
+ ...(comparisonLane ? { comparisonLane } : {}),
115
+ ...(scenarioHash ? { scenarioHash } : {}),
116
+ trustedCandidates: index.trusted.length,
117
+ trustedComparableCandidates: trustedComparableCandidates.length,
118
+ ...(scenarioHash ? { trustedScenarioContractCandidates: trustedScenarioContractCandidates.length } : {}),
119
+ trustedPriorCandidates: trustedPriorCandidates.length,
120
+ };
121
+ }
122
+ /**
123
+ * Builds a comparison against the latest trusted prior run in an artifact root.
124
+ *
125
+ * @param {CompareLatestOptions} options
126
+ * @returns {CompareLatestResult}
127
+ */
128
+ function compareLatestTrustedRun({ comparisonLane, currentDir, rootDir, scenarioId, }) {
129
+ const resolvedCurrentDir = path.resolve(currentDir);
130
+ const resolvedRootDir = path.resolve(rootDir);
131
+ assertComparableCurrentRun({ currentDir: resolvedCurrentDir, scenarioId });
132
+ const currentEntry = readRunIndexEntry(resolvedCurrentDir);
133
+ const resolvedComparisonLane = comparisonLane ?? currentEntry.comparisonLane;
134
+ const scenarioHash = currentEntry.scenarioHash;
135
+ const index = buildRunIndex({ rootDir: resolvedRootDir, scenarioId });
136
+ const baseline = findLatestTrustedPriorRun({
137
+ ...(resolvedComparisonLane ? { comparisonLane: resolvedComparisonLane } : {}),
138
+ ...(scenarioHash ? { scenarioHash } : {}),
139
+ index,
140
+ scenarioId,
141
+ currentDir: resolvedCurrentDir,
142
+ });
143
+ if (!baseline) {
144
+ const laneSuffix = resolvedComparisonLane
145
+ ? ` in comparison lane '${resolvedComparisonLane}'`
146
+ : ' without a comparison lane';
147
+ const scenarioHashSuffix = scenarioHash ? ` and scenario hash '${scenarioHash}'` : '';
148
+ throw new Error(`No trusted prior run found for scenario '${scenarioId}'${laneSuffix}${scenarioHashSuffix} under ${resolvedRootDir}; inspected ${index.entries.length} candidate run(s), ${index.trusted.length} trusted.`);
149
+ }
150
+ return {
151
+ baselineDir: baseline.runDir,
152
+ comparison: compareRunDirectories({
153
+ baselineDir: baseline.runDir,
154
+ currentDir: resolvedCurrentDir,
155
+ selection: buildLatestTrustedSelection({
156
+ baseline,
157
+ ...(resolvedComparisonLane ? { comparisonLane: resolvedComparisonLane } : {}),
158
+ currentDir: resolvedCurrentDir,
159
+ index,
160
+ rootDir: resolvedRootDir,
161
+ ...(scenarioHash ? { scenarioHash } : {}),
162
+ scenarioId,
163
+ }),
164
+ strategy: 'latest_trusted_prior',
165
+ }),
166
+ currentDir: resolvedCurrentDir,
167
+ };
168
+ }
169
+ /**
170
+ * Runs the compare-latest CLI.
171
+ *
172
+ * @returns {Promise<void>}
173
+ */
174
+ async function main() {
175
+ const argv = process.argv.slice(2);
176
+ if (hasHelpFlag(argv)) {
177
+ usage(process.stdout);
178
+ return;
179
+ }
180
+ const args = parseArgs(argv);
181
+ if (typeof args.root !== 'string' ||
182
+ typeof args.scenario !== 'string' ||
183
+ typeof args.current !== 'string') {
184
+ usage();
185
+ process.exitCode = 1;
186
+ return;
187
+ }
188
+ const result = compareLatestTrustedRun({
189
+ rootDir: args.root,
190
+ scenarioId: args.scenario,
191
+ currentDir: args.current,
192
+ ...(typeof args['comparison-lane'] === 'string' ? { comparisonLane: args['comparison-lane'] } : {}),
193
+ });
194
+ const failOnRegression = isEnabledFlag(args['fail-on-regression']);
195
+ if (typeof args.out === 'string' && args.out.length > 0) {
196
+ const { comparisonPath, summaryPath, printedPath } = resolveOutput(args.out);
197
+ await writeJsonArtifact({
198
+ filePath: comparisonPath,
199
+ value: result.comparison,
200
+ schema: SCHEMAS.comparison,
201
+ label: 'Comparison artifact',
202
+ });
203
+ if (summaryPath) {
204
+ const current = readRunArtifacts(result.currentDir);
205
+ await writeTextArtifact({
206
+ filePath: summaryPath,
207
+ content: buildAgentSummaryMarkdown({
208
+ health: current.health,
209
+ verdict: current.verdict,
210
+ comparison: result.comparison,
211
+ }),
212
+ });
213
+ }
214
+ process.stdout.write(`${printedPath}\n`);
215
+ if (failOnRegression) {
216
+ assertNoRegressedComparison({
217
+ comparison: result.comparison,
218
+ evidencePath: printedPath,
219
+ });
220
+ }
221
+ return;
222
+ }
223
+ process.stdout.write(`${JSON.stringify(result.comparison, null, 2)}\n`);
224
+ if (failOnRegression) {
225
+ assertNoRegressedComparison({ comparison: result.comparison });
226
+ }
227
+ }
228
+ if (require.main === module) {
229
+ main().catch((error) => {
230
+ console.error(error instanceof Error ? error.message : String(error));
231
+ process.exitCode = 1;
232
+ });
233
+ }
@@ -0,0 +1,58 @@
1
+ #!/usr/bin/env node
2
+ type CliArgs = {
3
+ baseline?: string | boolean;
4
+ current?: string | boolean;
5
+ 'fail-on-regression'?: string | boolean;
6
+ out?: string | boolean;
7
+ [key: string]: string | boolean | undefined;
8
+ };
9
+ /**
10
+ * Prints CLI usage to stderr.
11
+ *
12
+ * @returns {void}
13
+ */
14
+ declare function usage(output?: {
15
+ write: (message: string) => unknown;
16
+ }): void;
17
+ /**
18
+ * Parses `--key value` arguments for the comparison CLI.
19
+ *
20
+ * @param {string[]} argv
21
+ * @returns {CliArgs}
22
+ */
23
+ declare function parseArgs(argv: string[]): CliArgs;
24
+ /**
25
+ * Resolves `--out` as either an explicit JSON file or a run directory.
26
+ *
27
+ * @param {string} out
28
+ * @returns {{comparisonPath: string, summaryPath: string | null, printedPath: string}}
29
+ */
30
+ declare function resolveOutput(out: string): {
31
+ comparisonPath: string;
32
+ summaryPath: string | null;
33
+ printedPath: string;
34
+ };
35
+ /**
36
+ * Returns whether a boolean CLI flag was provided.
37
+ *
38
+ * @param {unknown} value
39
+ * @returns {boolean}
40
+ */
41
+ declare function isEnabledFlag(value: unknown): boolean;
42
+ /**
43
+ * Throws when a comparison result should fail a strict regression gate.
44
+ *
45
+ * @param {{comparison: Record<string, unknown>, evidencePath?: string}} options
46
+ * @returns {void}
47
+ */
48
+ declare function assertNoRegressedComparison({ comparison, evidencePath, }: {
49
+ comparison: Record<string, unknown>;
50
+ evidencePath?: string;
51
+ }): void;
52
+ /**
53
+ * Runs the compare CLI.
54
+ *
55
+ * @returns {Promise<void>}
56
+ */
57
+ declare function main(): Promise<void>;
58
+ export { assertNoRegressedComparison, isEnabledFlag, main, parseArgs, resolveOutput, usage, };