agent-scenario-loop 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -9
- package/app/profile-session.ts +98 -4
- package/dist/core/agent-summary.d.ts +3 -2
- package/dist/core/agent-summary.js +44 -2
- package/dist/core/artifact-contract.d.ts +22 -4
- package/dist/core/artifact-contract.js +512 -11
- package/dist/core/comparison.d.ts +57 -3
- package/dist/core/comparison.js +113 -1
- package/dist/core/planner.d.ts +32 -1
- package/dist/core/planner.js +144 -0
- package/dist/core/run-index.d.ts +4 -0
- package/dist/core/run-index.js +55 -1
- package/dist/core/schema-validator.d.ts +1 -0
- package/dist/core/schema-validator.js +1 -0
- package/dist/runner/compare-latest.d.ts +8 -4
- package/dist/runner/compare-latest.js +24 -5
- package/dist/runner/example-android-live.d.ts +10 -1
- package/dist/runner/example-android-live.js +55 -0
- package/dist/runner/example-ios-live.d.ts +10 -1
- package/dist/runner/example-ios-live.js +55 -0
- package/dist/runner/ios-simctl.d.ts +5 -0
- package/dist/runner/ios-simctl.js +6 -0
- package/dist/runner/live-comparison.d.ts +2 -2
- package/dist/runner/live-comparison.js +2 -1
- package/dist/runner/live-proof-summary.d.ts +5 -4
- package/dist/runner/live-proof-summary.js +12 -2
- package/dist/runner/live-proof.d.ts +3 -2
- package/dist/runner/live-proof.js +9 -2
- package/dist/runner/profile-android.d.ts +5 -0
- package/dist/runner/profile-android.js +148 -24
- package/dist/runner/profile-ios.d.ts +11 -1
- package/dist/runner/profile-ios.js +128 -9
- package/dist/runner/profile-mobile.d.ts +8 -0
- package/dist/runner/profile-mobile.js +267 -28
- package/docs/adapters.md +4 -0
- package/docs/architecture.md +90 -0
- package/docs/authoring.md +5 -1
- package/docs/concepts.md +3 -24
- package/docs/consumer-rehearsal.md +4 -0
- package/docs/contracts.md +30 -100
- package/docs/external-adapter-protocol.md +219 -0
- package/docs/live-proofs.md +83 -2
- package/docs/principles.md +9 -15
- package/examples/mobile-app/README.md +12 -0
- package/examples/mobile-app/runner-manifests/primary-runner.json +1 -0
- package/examples/runners/README.md +1 -0
- package/examples/runners/adb-android.json +1 -0
- package/examples/runners/agent-device-android.json +1 -0
- package/examples/runners/agent-device-ios.json +1 -0
- package/examples/runners/argent-android.json +1 -0
- package/examples/runners/argent-ios.json +1 -0
- package/examples/runners/xcodebuildmcp-ios.json +1 -0
- package/package.json +2 -1
- package/schemas/causal-run.schema.json +85 -2
- package/schemas/comparison.schema.json +130 -2
- package/schemas/external-adapter-message.schema.json +693 -0
- package/schemas/health.schema.json +72 -0
- package/schemas/live-proof-set.schema.json +1 -1
- package/schemas/live-proof.schema.json +14 -6
- package/schemas/manifest.schema.json +442 -1
- package/schemas/runner-capabilities.schema.json +20 -0
- package/schemas/scenario.schema.json +16 -0
- package/templates/primary-runner.json +1 -0
|
@@ -14,6 +14,7 @@ type CompareLatestResult = {
|
|
|
14
14
|
type LatestTrustedSelection = {
|
|
15
15
|
artifactRoot: string;
|
|
16
16
|
candidatesInspected: number;
|
|
17
|
+
cohortHash?: string;
|
|
17
18
|
scenarioId: string;
|
|
18
19
|
selectedRunDir: string;
|
|
19
20
|
selectedRunId: string;
|
|
@@ -21,6 +22,7 @@ type LatestTrustedSelection = {
|
|
|
21
22
|
comparisonLane?: string;
|
|
22
23
|
scenarioHash?: string;
|
|
23
24
|
trustedCandidates: number;
|
|
25
|
+
trustedCohortCandidates?: number;
|
|
24
26
|
trustedComparableCandidates?: number;
|
|
25
27
|
trustedScenarioContractCandidates?: number;
|
|
26
28
|
trustedPriorCandidates: number;
|
|
@@ -58,10 +60,11 @@ declare function isComparableScenarioContract(entry: RunIndexEntry, scenarioHash
|
|
|
58
60
|
/**
|
|
59
61
|
* Finds the newest trusted run for a scenario while excluding the current run directory.
|
|
60
62
|
*
|
|
61
|
-
* @param {{index: RunIndex, scenarioId: string, currentDir: string, comparisonLane?: string, scenarioHash?: string}} options
|
|
63
|
+
* @param {{index: RunIndex, scenarioId: string, currentDir: string, cohortHash?: string, comparisonLane?: string, scenarioHash?: string}} options
|
|
62
64
|
* @returns {RunIndexEntry | null}
|
|
63
65
|
*/
|
|
64
|
-
declare function findLatestTrustedPriorRun({ comparisonLane, index, scenarioHash, scenarioId, currentDir, }: {
|
|
66
|
+
declare function findLatestTrustedPriorRun({ cohortHash, comparisonLane, index, scenarioHash, scenarioId, currentDir, }: {
|
|
67
|
+
cohortHash?: string;
|
|
65
68
|
comparisonLane?: string;
|
|
66
69
|
index: RunIndex;
|
|
67
70
|
scenarioHash?: string;
|
|
@@ -71,11 +74,12 @@ declare function findLatestTrustedPriorRun({ comparisonLane, index, scenarioHash
|
|
|
71
74
|
/**
|
|
72
75
|
* Builds stable provenance for the latest-trusted baseline selection.
|
|
73
76
|
*
|
|
74
|
-
* @param {{baseline: RunIndexEntry, comparisonLane?: string, currentDir: string, index: RunIndex, rootDir: string, scenarioHash?: string, scenarioId: string}} options
|
|
77
|
+
* @param {{baseline: RunIndexEntry, cohortHash?: string, comparisonLane?: string, currentDir: string, index: RunIndex, rootDir: string, scenarioHash?: string, scenarioId: string}} options
|
|
75
78
|
* @returns {LatestTrustedSelection}
|
|
76
79
|
*/
|
|
77
|
-
declare function buildLatestTrustedSelection({ baseline, comparisonLane, currentDir, index, rootDir, scenarioHash, scenarioId, }: {
|
|
80
|
+
declare function buildLatestTrustedSelection({ baseline, cohortHash, comparisonLane, currentDir, index, rootDir, scenarioHash, scenarioId, }: {
|
|
78
81
|
baseline: RunIndexEntry;
|
|
82
|
+
cohortHash?: string;
|
|
79
83
|
comparisonLane?: string;
|
|
80
84
|
currentDir: string;
|
|
81
85
|
index: RunIndex;
|
|
@@ -79,31 +79,44 @@ function isComparableLane(entry, comparisonLane) {
|
|
|
79
79
|
function isComparableScenarioContract(entry, scenarioHash) {
|
|
80
80
|
return scenarioHash ? entry.scenarioHash === scenarioHash : true;
|
|
81
81
|
}
|
|
82
|
+
/**
|
|
83
|
+
* Returns whether a historical run belongs to the requested provenance cohort.
|
|
84
|
+
* Runs without a current cohort hash keep legacy behavior for old artifacts.
|
|
85
|
+
*
|
|
86
|
+
* @param {RunIndexEntry} entry
|
|
87
|
+
* @param {string | undefined} cohortHash
|
|
88
|
+
* @returns {boolean}
|
|
89
|
+
*/
|
|
90
|
+
function isComparableCohort(entry, cohortHash) {
|
|
91
|
+
return cohortHash ? entry.cohortHash === cohortHash : true;
|
|
92
|
+
}
|
|
82
93
|
/**
|
|
83
94
|
* Finds the newest trusted run for a scenario while excluding the current run directory.
|
|
84
95
|
*
|
|
85
|
-
* @param {{index: RunIndex, scenarioId: string, currentDir: string, comparisonLane?: string, scenarioHash?: string}} options
|
|
96
|
+
* @param {{index: RunIndex, scenarioId: string, currentDir: string, cohortHash?: string, comparisonLane?: string, scenarioHash?: string}} options
|
|
86
97
|
* @returns {RunIndexEntry | null}
|
|
87
98
|
*/
|
|
88
|
-
function findLatestTrustedPriorRun({ comparisonLane, index, scenarioHash, scenarioId, currentDir, }) {
|
|
99
|
+
function findLatestTrustedPriorRun({ cohortHash, comparisonLane, index, scenarioHash, scenarioId, currentDir, }) {
|
|
89
100
|
const resolvedCurrentDir = path.resolve(currentDir);
|
|
90
101
|
return index.trusted.find((entry) => (entry.scenarioId === scenarioId &&
|
|
91
102
|
isComparableLane(entry, comparisonLane) &&
|
|
92
103
|
isComparableScenarioContract(entry, scenarioHash) &&
|
|
104
|
+
isComparableCohort(entry, cohortHash) &&
|
|
93
105
|
path.resolve(entry.runDir) !== resolvedCurrentDir)) ?? null;
|
|
94
106
|
}
|
|
95
107
|
/**
|
|
96
108
|
* Builds stable provenance for the latest-trusted baseline selection.
|
|
97
109
|
*
|
|
98
|
-
* @param {{baseline: RunIndexEntry, comparisonLane?: string, currentDir: string, index: RunIndex, rootDir: string, scenarioHash?: string, scenarioId: string}} options
|
|
110
|
+
* @param {{baseline: RunIndexEntry, cohortHash?: string, comparisonLane?: string, currentDir: string, index: RunIndex, rootDir: string, scenarioHash?: string, scenarioId: string}} options
|
|
99
111
|
* @returns {LatestTrustedSelection}
|
|
100
112
|
*/
|
|
101
|
-
function buildLatestTrustedSelection({ baseline, comparisonLane, currentDir, index, rootDir, scenarioHash, scenarioId, }) {
|
|
113
|
+
function buildLatestTrustedSelection({ baseline, cohortHash, comparisonLane, currentDir, index, rootDir, scenarioHash, scenarioId, }) {
|
|
102
114
|
const resolvedCurrentDir = path.resolve(currentDir);
|
|
103
115
|
const trustedPriorCandidates = index.trusted.filter((entry) => (entry.scenarioId === scenarioId &&
|
|
104
116
|
path.resolve(entry.runDir) !== resolvedCurrentDir));
|
|
105
117
|
const trustedComparableCandidates = trustedPriorCandidates.filter((entry) => (isComparableLane(entry, comparisonLane)));
|
|
106
118
|
const trustedScenarioContractCandidates = trustedComparableCandidates.filter((entry) => (isComparableScenarioContract(entry, scenarioHash)));
|
|
119
|
+
const trustedCohortCandidates = trustedScenarioContractCandidates.filter((entry) => (isComparableCohort(entry, cohortHash)));
|
|
107
120
|
return {
|
|
108
121
|
artifactRoot: rootDir,
|
|
109
122
|
candidatesInspected: index.entries.length,
|
|
@@ -113,9 +126,11 @@ function buildLatestTrustedSelection({ baseline, comparisonLane, currentDir, ind
|
|
|
113
126
|
skippedCurrentRun: index.entries.some((entry) => path.resolve(entry.runDir) === resolvedCurrentDir),
|
|
114
127
|
...(comparisonLane ? { comparisonLane } : {}),
|
|
115
128
|
...(scenarioHash ? { scenarioHash } : {}),
|
|
129
|
+
...(cohortHash ? { cohortHash } : {}),
|
|
116
130
|
trustedCandidates: index.trusted.length,
|
|
117
131
|
trustedComparableCandidates: trustedComparableCandidates.length,
|
|
118
132
|
...(scenarioHash ? { trustedScenarioContractCandidates: trustedScenarioContractCandidates.length } : {}),
|
|
133
|
+
...(cohortHash ? { trustedCohortCandidates: trustedCohortCandidates.length } : {}),
|
|
119
134
|
trustedPriorCandidates: trustedPriorCandidates.length,
|
|
120
135
|
};
|
|
121
136
|
}
|
|
@@ -132,8 +147,10 @@ function compareLatestTrustedRun({ comparisonLane, currentDir, rootDir, scenario
|
|
|
132
147
|
const currentEntry = readRunIndexEntry(resolvedCurrentDir);
|
|
133
148
|
const resolvedComparisonLane = comparisonLane ?? currentEntry.comparisonLane;
|
|
134
149
|
const scenarioHash = currentEntry.scenarioHash;
|
|
150
|
+
const cohortHash = currentEntry.cohortHash;
|
|
135
151
|
const index = buildRunIndex({ rootDir: resolvedRootDir, scenarioId });
|
|
136
152
|
const baseline = findLatestTrustedPriorRun({
|
|
153
|
+
...(cohortHash ? { cohortHash } : {}),
|
|
137
154
|
...(resolvedComparisonLane ? { comparisonLane: resolvedComparisonLane } : {}),
|
|
138
155
|
...(scenarioHash ? { scenarioHash } : {}),
|
|
139
156
|
index,
|
|
@@ -145,7 +162,8 @@ function compareLatestTrustedRun({ comparisonLane, currentDir, rootDir, scenario
|
|
|
145
162
|
? ` in comparison lane '${resolvedComparisonLane}'`
|
|
146
163
|
: ' without a comparison lane';
|
|
147
164
|
const scenarioHashSuffix = scenarioHash ? ` and scenario hash '${scenarioHash}'` : '';
|
|
148
|
-
|
|
165
|
+
const cohortHashSuffix = cohortHash ? ` and cohort hash '${cohortHash}'` : '';
|
|
166
|
+
throw new Error(`No trusted prior run found for scenario '${scenarioId}'${laneSuffix}${scenarioHashSuffix}${cohortHashSuffix} under ${resolvedRootDir}; inspected ${index.entries.length} candidate run(s), ${index.trusted.length} trusted.`);
|
|
149
167
|
}
|
|
150
168
|
return {
|
|
151
169
|
baselineDir: baseline.runDir,
|
|
@@ -154,6 +172,7 @@ function compareLatestTrustedRun({ comparisonLane, currentDir, rootDir, scenario
|
|
|
154
172
|
currentDir: resolvedCurrentDir,
|
|
155
173
|
selection: buildLatestTrustedSelection({
|
|
156
174
|
baseline,
|
|
175
|
+
...(cohortHash ? { cohortHash } : {}),
|
|
157
176
|
...(resolvedComparisonLane ? { comparisonLane: resolvedComparisonLane } : {}),
|
|
158
177
|
currentDir: resolvedCurrentDir,
|
|
159
178
|
index,
|
|
@@ -33,6 +33,7 @@ type AndroidLiveProofResult = {
|
|
|
33
33
|
outputDir: string;
|
|
34
34
|
preflightDir: string;
|
|
35
35
|
profiles: AndroidLiveProfile[];
|
|
36
|
+
seededBaselines: AndroidLiveProfile[];
|
|
36
37
|
skippedInteractionProofs: AndroidSkippedInteractionProof[];
|
|
37
38
|
};
|
|
38
39
|
type RegressionGateOptions = {
|
|
@@ -77,6 +78,14 @@ declare function buildLiveRunId(baseRunId: string, suffix: string | null): strin
|
|
|
77
78
|
* @returns {string}
|
|
78
79
|
*/
|
|
79
80
|
declare function buildInteractionComparisonLane(runnerIds: string[]): string;
|
|
81
|
+
/**
|
|
82
|
+
* Builds a deterministic run id for a seeded baseline profile.
|
|
83
|
+
*
|
|
84
|
+
* @param {string} baseRunId
|
|
85
|
+
* @param {string | null} suffix
|
|
86
|
+
* @returns {string}
|
|
87
|
+
*/
|
|
88
|
+
declare function buildBaselineRunId(baseRunId: string, suffix: string | null): string;
|
|
80
89
|
/**
|
|
81
90
|
* Reports whether profile evidence is healthy enough to trust sidecar proofs and comparisons.
|
|
82
91
|
*
|
|
@@ -133,5 +142,5 @@ declare function formatResult(result: AndroidLiveProofResult): string;
|
|
|
133
142
|
* @returns {Promise<void>}
|
|
134
143
|
*/
|
|
135
144
|
declare function main(): Promise<void>;
|
|
136
|
-
export { assertAggregatePassed, formatResult, assertNoRegressedComparisons, buildLiveRunId, buildSkippedInteractionProofs, buildInteractionComparisonLane, isTrustedProfileRun, main, normalizeRunSuffix, resolveAndroidSerial, runExampleAndroidLiveProof, usage, };
|
|
145
|
+
export { assertAggregatePassed, formatResult, assertNoRegressedComparisons, buildLiveRunId, buildBaselineRunId, buildSkippedInteractionProofs, buildInteractionComparisonLane, isTrustedProfileRun, main, normalizeRunSuffix, resolveAndroidSerial, runExampleAndroidLiveProof, usage, };
|
|
137
146
|
export type { AndroidLiveProofOptions, AndroidInteractionProof, AndroidLiveProofResult, AndroidLiveProfile, };
|
|
@@ -5,6 +5,7 @@ exports.assertAggregatePassed = assertAggregatePassed;
|
|
|
5
5
|
exports.formatResult = formatResult;
|
|
6
6
|
exports.assertNoRegressedComparisons = assertNoRegressedComparisons;
|
|
7
7
|
exports.buildLiveRunId = buildLiveRunId;
|
|
8
|
+
exports.buildBaselineRunId = buildBaselineRunId;
|
|
8
9
|
exports.buildSkippedInteractionProofs = buildSkippedInteractionProofs;
|
|
9
10
|
exports.buildInteractionComparisonLane = buildInteractionComparisonLane;
|
|
10
11
|
exports.isTrustedProfileRun = isTrustedProfileRun;
|
|
@@ -58,6 +59,7 @@ function usage(output = process.stderr) {
|
|
|
58
59
|
'The example app must already be installed and reachable on an online Android emulator or device.',
|
|
59
60
|
`By default, the runner sets the app React Native debug host to ${DEFAULT_REACT_NATIVE_DEBUG_HOST} for the isolated Metro server.`,
|
|
60
61
|
'Use --run-suffix to preserve multiple live proof artifact sets without changing deterministic default run ids.',
|
|
62
|
+
'Use --seed-baseline with --compare-latest to capture a trusted compatible baseline before the measured run.',
|
|
61
63
|
'Use --compare-latest to compare each passed scenario against the latest trusted prior run under the artifact root.',
|
|
62
64
|
'Use --fail-on-regression with --compare-latest to exit nonzero after writing evidence when any comparison regressed.',
|
|
63
65
|
'Use --agent-device-proof to attach the shared startup UI assertion through agent-device; pass --agent-device-session-mode bind when a named session should still receive the configured serial.',
|
|
@@ -155,6 +157,16 @@ function buildInteractionComparisonLane(runnerIds) {
|
|
|
155
157
|
? `example-android-live+${runnerIds.join('+')}`
|
|
156
158
|
: 'example-android-live';
|
|
157
159
|
}
|
|
160
|
+
/**
|
|
161
|
+
* Builds a deterministic run id for a seeded baseline profile.
|
|
162
|
+
*
|
|
163
|
+
* @param {string} baseRunId
|
|
164
|
+
* @param {string | null} suffix
|
|
165
|
+
* @returns {string}
|
|
166
|
+
*/
|
|
167
|
+
function buildBaselineRunId(baseRunId, suffix) {
|
|
168
|
+
return buildLiveRunId(baseRunId, suffix ? `${suffix}-baseline` : 'baseline');
|
|
169
|
+
}
|
|
158
170
|
/**
|
|
159
171
|
* Reports whether profile evidence is healthy enough to trust sidecar proofs and comparisons.
|
|
160
172
|
*
|
|
@@ -280,8 +292,49 @@ async function runExampleAndroidLiveProof(args, options = {}) {
|
|
|
280
292
|
throw new Error(`Android live proof preflight failed; inspect ${preflight.runDir}/agent-summary.md.`);
|
|
281
293
|
}
|
|
282
294
|
const interactionProofs = [];
|
|
295
|
+
const seededBaselines = [];
|
|
283
296
|
const profiles = [];
|
|
284
297
|
const failedProfiles = [];
|
|
298
|
+
if (isEnabledFlag(args['seed-baseline'])) {
|
|
299
|
+
for (const profile of EXAMPLE_PROFILES) {
|
|
300
|
+
const baselineRunId = buildBaselineRunId(profile.runId, runSuffix);
|
|
301
|
+
const result = await runProfileAndroid({
|
|
302
|
+
...(typeof args.adb === 'string' ? { adb: args.adb } : {}),
|
|
303
|
+
'adb-capture': true,
|
|
304
|
+
'clear-logcat': true,
|
|
305
|
+
config: configPath,
|
|
306
|
+
'command-wait-ms': typeof args['command-wait-ms'] === 'string' ? args['command-wait-ms'] : '250',
|
|
307
|
+
launch: true,
|
|
308
|
+
'launch-wait-ms': typeof args['launch-wait-ms'] === 'string' ? args['launch-wait-ms'] : '1500',
|
|
309
|
+
'logcat-lines': typeof args['logcat-lines'] === 'string' ? args['logcat-lines'] : '1000',
|
|
310
|
+
out: outputDir,
|
|
311
|
+
...(packageName ? { package: packageName } : {}),
|
|
312
|
+
'profile-session': true,
|
|
313
|
+
'react-native-debug-host': reactNativeDebugHost,
|
|
314
|
+
'run-id': baselineRunId,
|
|
315
|
+
scenario: path.join(exampleRoot, 'scenarios', 'mobile', profile.scenario),
|
|
316
|
+
serial,
|
|
317
|
+
'wait-ms': typeof args['wait-ms'] === 'string' ? args['wait-ms'] : '1000',
|
|
318
|
+
}, {
|
|
319
|
+
comparisonLane,
|
|
320
|
+
...(options.delay ? { delay: options.delay } : {}),
|
|
321
|
+
...(options.executor ? { executor: options.executor } : {}),
|
|
322
|
+
});
|
|
323
|
+
const baselinePointer = {
|
|
324
|
+
healthStatus: typeof result.health.healthStatus === 'string' ? result.health.healthStatus : 'unknown',
|
|
325
|
+
label: `${profile.label}-baseline`,
|
|
326
|
+
runDir: result.runDir,
|
|
327
|
+
runId: baselineRunId,
|
|
328
|
+
scenario: profile.scenario,
|
|
329
|
+
scenarioId: profile.scenarioId,
|
|
330
|
+
verdictStatus: typeof result.verdict.verdictStatus === 'string' ? result.verdict.verdictStatus : 'unknown',
|
|
331
|
+
};
|
|
332
|
+
seededBaselines.push(baselinePointer);
|
|
333
|
+
if (!isTrustedProfileRun({ health: result.health, verdict: result.verdict })) {
|
|
334
|
+
throw new Error(`Android seeded baseline failed for ${profile.label}. Inspect ${result.runDir}/agent-summary.md.`);
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
}
|
|
285
338
|
for (const profile of EXAMPLE_PROFILES) {
|
|
286
339
|
const profileRunId = buildLiveRunId(profile.runId, runSuffix);
|
|
287
340
|
const result = await runProfileAndroid({
|
|
@@ -395,6 +448,7 @@ async function runExampleAndroidLiveProof(args, options = {}) {
|
|
|
395
448
|
outputDir,
|
|
396
449
|
preflightDir: preflight.runDir,
|
|
397
450
|
profiles,
|
|
451
|
+
seededBaselines,
|
|
398
452
|
skippedInteractionProofs,
|
|
399
453
|
};
|
|
400
454
|
if (isEnabledFlag(args['fail-on-regression'])) {
|
|
@@ -419,6 +473,7 @@ function formatResult(result) {
|
|
|
419
473
|
`Preflight: ${result.preflightDir}/agent-summary.md`,
|
|
420
474
|
...result.profiles.map((profile) => (`${profile.label}: ${profile.runDir}/agent-summary.md`)),
|
|
421
475
|
...result.interactionProofs.map((proof) => (`${proof.label}: ${proof.runDir}/agent-summary.md`)),
|
|
476
|
+
...result.seededBaselines.map((profile) => (`${profile.label}: ${profile.runDir}/agent-summary.md`)),
|
|
422
477
|
...(result.comparisons.length > 0
|
|
423
478
|
? [
|
|
424
479
|
'Comparisons:',
|
|
@@ -33,6 +33,7 @@ type IosLiveProofResult = {
|
|
|
33
33
|
outputDir: string;
|
|
34
34
|
preflightDir: string;
|
|
35
35
|
profiles: IosLiveProfile[];
|
|
36
|
+
seededBaselines: IosLiveProfile[];
|
|
36
37
|
skippedInteractionProofs: IosSkippedInteractionProof[];
|
|
37
38
|
};
|
|
38
39
|
type RegressionGateOptions = {
|
|
@@ -77,6 +78,14 @@ declare function buildLiveRunId(baseRunId: string, suffix: string | null): strin
|
|
|
77
78
|
* @returns {string}
|
|
78
79
|
*/
|
|
79
80
|
declare function buildInteractionComparisonLane(runnerIds: string[]): string;
|
|
81
|
+
/**
|
|
82
|
+
* Builds a deterministic run id for a seeded baseline profile.
|
|
83
|
+
*
|
|
84
|
+
* @param {string} baseRunId
|
|
85
|
+
* @param {string | null} suffix
|
|
86
|
+
* @returns {string}
|
|
87
|
+
*/
|
|
88
|
+
declare function buildBaselineRunId(baseRunId: string, suffix: string | null): string;
|
|
80
89
|
/**
|
|
81
90
|
* Reports whether profile evidence is healthy enough to trust sidecar proofs and comparisons.
|
|
82
91
|
*
|
|
@@ -133,5 +142,5 @@ declare function formatResult(result: IosLiveProofResult): string;
|
|
|
133
142
|
* @returns {Promise<void>}
|
|
134
143
|
*/
|
|
135
144
|
declare function main(): Promise<void>;
|
|
136
|
-
export { assertAggregatePassed, buildLiveRunId, formatResult, assertNoRegressedComparisons, buildSkippedInteractionProofs, buildInteractionComparisonLane, isTrustedProfileRun, main, normalizeRunSuffix, resolveIosDeviceId, runExampleIosLiveProof, usage, };
|
|
145
|
+
export { assertAggregatePassed, buildLiveRunId, formatResult, assertNoRegressedComparisons, buildBaselineRunId, buildSkippedInteractionProofs, buildInteractionComparisonLane, isTrustedProfileRun, main, normalizeRunSuffix, resolveIosDeviceId, runExampleIosLiveProof, usage, };
|
|
137
146
|
export type { IosLiveProofOptions, IosInteractionProof, IosLiveProofResult, IosLiveProfile, };
|
|
@@ -5,6 +5,7 @@ exports.assertAggregatePassed = assertAggregatePassed;
|
|
|
5
5
|
exports.buildLiveRunId = buildLiveRunId;
|
|
6
6
|
exports.formatResult = formatResult;
|
|
7
7
|
exports.assertNoRegressedComparisons = assertNoRegressedComparisons;
|
|
8
|
+
exports.buildBaselineRunId = buildBaselineRunId;
|
|
8
9
|
exports.buildSkippedInteractionProofs = buildSkippedInteractionProofs;
|
|
9
10
|
exports.buildInteractionComparisonLane = buildInteractionComparisonLane;
|
|
10
11
|
exports.isTrustedProfileRun = isTrustedProfileRun;
|
|
@@ -56,6 +57,7 @@ function usage(output = process.stderr) {
|
|
|
56
57
|
'Runs the packaged example iOS live proof: simctl preflight, startup, open-close, and scroll-settle.',
|
|
57
58
|
'The example app must already be installed on a booted iOS simulator and connected to Metro. Set ASL_EXAMPLE_IOS_DEV_CLIENT_URL for Expo dev-client builds that need an explicit Metro URL.',
|
|
58
59
|
'Use --run-suffix to preserve multiple live proof artifact sets without changing deterministic default run ids.',
|
|
60
|
+
'Use --seed-baseline with --compare-latest to capture a trusted compatible baseline before the measured run.',
|
|
59
61
|
'Use --compare-latest to compare each passed scenario against the latest trusted prior run under the artifact root.',
|
|
60
62
|
'Use --fail-on-regression with --compare-latest to exit nonzero after writing evidence when any comparison regressed.',
|
|
61
63
|
'Use --agent-device-proof to attach the shared startup UI assertion through agent-device; pass --agent-device-session-mode bind when a named session should still receive the configured UDID.',
|
|
@@ -166,6 +168,16 @@ function buildInteractionComparisonLane(runnerIds) {
|
|
|
166
168
|
? `example-ios-live+${runnerIds.join('+')}`
|
|
167
169
|
: 'example-ios-live';
|
|
168
170
|
}
|
|
171
|
+
/**
|
|
172
|
+
* Builds a deterministic run id for a seeded baseline profile.
|
|
173
|
+
*
|
|
174
|
+
* @param {string} baseRunId
|
|
175
|
+
* @param {string | null} suffix
|
|
176
|
+
* @returns {string}
|
|
177
|
+
*/
|
|
178
|
+
function buildBaselineRunId(baseRunId, suffix) {
|
|
179
|
+
return buildLiveRunId(baseRunId, suffix ? `${suffix}-baseline` : 'baseline');
|
|
180
|
+
}
|
|
169
181
|
/**
|
|
170
182
|
* Reports whether profile evidence is healthy enough to trust sidecar proofs and comparisons.
|
|
171
183
|
*
|
|
@@ -295,8 +307,49 @@ async function runExampleIosLiveProof(args, options = {}) {
|
|
|
295
307
|
throw new Error(`iOS live proof preflight failed; inspect ${preflight.runDir}/agent-summary.md.`);
|
|
296
308
|
}
|
|
297
309
|
const interactionProofs = [];
|
|
310
|
+
const seededBaselines = [];
|
|
298
311
|
const profiles = [];
|
|
299
312
|
const failedProfiles = [];
|
|
313
|
+
if (isEnabledFlag(args['seed-baseline'])) {
|
|
314
|
+
for (const profile of EXAMPLE_PROFILES) {
|
|
315
|
+
const baselineRunId = buildBaselineRunId(profile.runId, runSuffix);
|
|
316
|
+
const result = await runProfileIos({
|
|
317
|
+
config: configPath,
|
|
318
|
+
device: deviceId,
|
|
319
|
+
...(typeof args['log-last'] === 'string' ? { 'log-last': args['log-last'] } : {}),
|
|
320
|
+
launch: true,
|
|
321
|
+
out: outputDir,
|
|
322
|
+
...(iosDevClientUrl ? { 'ios-dev-client-url': iosDevClientUrl } : {}),
|
|
323
|
+
...(iosDevClientWaitMs ? { 'ios-dev-client-wait-ms': iosDevClientWaitMs } : {}),
|
|
324
|
+
'profile-session': true,
|
|
325
|
+
'profile-session-storage': true,
|
|
326
|
+
'run-id': baselineRunId,
|
|
327
|
+
scenario: path.join(exampleRoot, 'scenarios', 'mobile', profile.scenario),
|
|
328
|
+
'simctl-capture': true,
|
|
329
|
+
'simctl-out': path.join(outputDir, '_ios-simctl-captures', baselineRunId),
|
|
330
|
+
...(typeof args['wait-ms'] === 'string' ? { 'wait-ms': args['wait-ms'] } : {}),
|
|
331
|
+
...(bundleId ? { bundle: bundleId } : {}),
|
|
332
|
+
...(typeof args.xcrun === 'string' ? { xcrun: args.xcrun } : {}),
|
|
333
|
+
}, {
|
|
334
|
+
comparisonLane,
|
|
335
|
+
...(options.delay ? { delay: options.delay } : {}),
|
|
336
|
+
...(options.executor ? { executor: options.executor } : {}),
|
|
337
|
+
});
|
|
338
|
+
const baselinePointer = {
|
|
339
|
+
healthStatus: typeof result.health.healthStatus === 'string' ? result.health.healthStatus : 'unknown',
|
|
340
|
+
label: `${profile.label}-baseline`,
|
|
341
|
+
runDir: result.runDir,
|
|
342
|
+
runId: baselineRunId,
|
|
343
|
+
scenario: profile.scenario,
|
|
344
|
+
scenarioId: profile.scenarioId,
|
|
345
|
+
verdictStatus: typeof result.verdict.verdictStatus === 'string' ? result.verdict.verdictStatus : 'unknown',
|
|
346
|
+
};
|
|
347
|
+
seededBaselines.push(baselinePointer);
|
|
348
|
+
if (!isTrustedProfileRun({ health: result.health, verdict: result.verdict })) {
|
|
349
|
+
throw new Error(`iOS seeded baseline failed for ${profile.label}. Inspect ${result.runDir}/agent-summary.md.`);
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
}
|
|
300
353
|
for (const profile of EXAMPLE_PROFILES) {
|
|
301
354
|
const profileRunId = buildLiveRunId(profile.runId, runSuffix);
|
|
302
355
|
const result = await runProfileIos({
|
|
@@ -412,6 +465,7 @@ async function runExampleIosLiveProof(args, options = {}) {
|
|
|
412
465
|
outputDir,
|
|
413
466
|
preflightDir: preflight.runDir,
|
|
414
467
|
profiles,
|
|
468
|
+
seededBaselines,
|
|
415
469
|
skippedInteractionProofs,
|
|
416
470
|
};
|
|
417
471
|
if (isEnabledFlag(args['fail-on-regression'])) {
|
|
@@ -436,6 +490,7 @@ function formatResult(result) {
|
|
|
436
490
|
`Preflight: ${result.preflightDir}/agent-summary.md`,
|
|
437
491
|
...result.profiles.map((profile) => (`${profile.label}: ${profile.runDir}/agent-summary.md`)),
|
|
438
492
|
...result.interactionProofs.map((proof) => (`${proof.label}: ${proof.runDir}/agent-summary.md`)),
|
|
493
|
+
...result.seededBaselines.map((profile) => (`${profile.label}: ${profile.runDir}/agent-summary.md`)),
|
|
439
494
|
...(result.comparisons.length > 0
|
|
440
495
|
? [
|
|
441
496
|
'Comparisons:',
|
|
@@ -43,9 +43,14 @@ type IosSimctlDeepLink = {
|
|
|
43
43
|
};
|
|
44
44
|
type IosProfileSessionStorageCommand = {
|
|
45
45
|
command: string;
|
|
46
|
+
commandId?: string;
|
|
46
47
|
id?: string;
|
|
47
48
|
label?: string;
|
|
49
|
+
queueId?: string;
|
|
50
|
+
sequence?: number;
|
|
48
51
|
timestamp?: number;
|
|
52
|
+
waitForMilestone?: string;
|
|
53
|
+
waitTimeoutMs?: number;
|
|
49
54
|
};
|
|
50
55
|
type IosProfileSessionStorageSeed = {
|
|
51
56
|
commands?: IosProfileSessionStorageCommand[];
|
|
@@ -456,7 +456,13 @@ async function seedProfileSessionStorage({ bundleId, commands = [], dataContaine
|
|
|
456
456
|
scenario,
|
|
457
457
|
runId,
|
|
458
458
|
command: profileCommand.command,
|
|
459
|
+
...(typeof profileCommand.commandId === 'string' ? { commandId: profileCommand.commandId } : {}),
|
|
460
|
+
...(typeof profileCommand.label === 'string' ? { label: profileCommand.label } : {}),
|
|
461
|
+
...(typeof profileCommand.queueId === 'string' ? { queueId: profileCommand.queueId } : {}),
|
|
462
|
+
...(typeof profileCommand.sequence === 'number' ? { sequence: profileCommand.sequence } : {}),
|
|
459
463
|
timestamp: typeof profileCommand.timestamp === 'number' ? profileCommand.timestamp : startedAt + index + 1,
|
|
464
|
+
...(typeof profileCommand.waitForMilestone === 'string' ? { waitForMilestone: profileCommand.waitForMilestone } : {}),
|
|
465
|
+
...(typeof profileCommand.waitTimeoutMs === 'number' ? { waitTimeoutMs: profileCommand.waitTimeoutMs } : {}),
|
|
460
466
|
}));
|
|
461
467
|
manifest[profileStorageKeys.session] = JSON.stringify(session);
|
|
462
468
|
if (queuedCommands.length > 0) {
|
|
@@ -4,7 +4,7 @@ type LiveProfileForComparison = {
|
|
|
4
4
|
runId: string;
|
|
5
5
|
scenarioId: string;
|
|
6
6
|
};
|
|
7
|
-
type ComparisonMetricStatus = 'better' | 'worse' | 'unchanged' | 'inconclusive';
|
|
7
|
+
type ComparisonMetricStatus = 'better' | 'worse' | 'unchanged' | 'inconclusive' | 'low_confidence';
|
|
8
8
|
type ComparisonMetricHighlight = {
|
|
9
9
|
baseline: number | boolean | null;
|
|
10
10
|
current: number | boolean | null;
|
|
@@ -25,7 +25,7 @@ type LiveComparisonResult = {
|
|
|
25
25
|
reason: string | null;
|
|
26
26
|
runId: string;
|
|
27
27
|
scenarioId: string;
|
|
28
|
-
status: 'better' | 'worse' | 'unchanged' | 'mixed' | 'inconclusive' | 'skipped';
|
|
28
|
+
status: 'better' | 'worse' | 'unchanged' | 'mixed' | 'inconclusive' | 'low_confidence' | 'skipped';
|
|
29
29
|
summaryPath: string | null;
|
|
30
30
|
};
|
|
31
31
|
type CompareLiveProfilesOptions = {
|
|
@@ -44,6 +44,7 @@ function buildComparisonMetricSummary(comparison) {
|
|
|
44
44
|
worse: 0,
|
|
45
45
|
unchanged: 0,
|
|
46
46
|
inconclusive: 0,
|
|
47
|
+
low_confidence: 0,
|
|
47
48
|
};
|
|
48
49
|
const notableMetrics = [];
|
|
49
50
|
for (const metric of comparison.metricComparisons) {
|
|
@@ -52,7 +53,7 @@ function buildComparisonMetricSummary(comparison) {
|
|
|
52
53
|
}
|
|
53
54
|
const record = metric;
|
|
54
55
|
const status = record.status;
|
|
55
|
-
if (status !== 'better' && status !== 'worse' && status !== 'unchanged' && status !== 'inconclusive') {
|
|
56
|
+
if (status !== 'better' && status !== 'worse' && status !== 'unchanged' && status !== 'inconclusive' && status !== 'low_confidence') {
|
|
56
57
|
continue;
|
|
57
58
|
}
|
|
58
59
|
counts[status] += 1;
|
|
@@ -53,10 +53,10 @@ type LiveProofComparisonPointer = {
|
|
|
53
53
|
reason: string | null;
|
|
54
54
|
runId: string;
|
|
55
55
|
scenarioId: string;
|
|
56
|
-
status: 'better' | 'worse' | 'unchanged' | 'mixed' | 'inconclusive' | 'skipped';
|
|
56
|
+
status: 'better' | 'worse' | 'unchanged' | 'mixed' | 'inconclusive' | 'low_confidence' | 'skipped';
|
|
57
57
|
summaryPath: string | null;
|
|
58
58
|
};
|
|
59
|
-
type LiveProofComparisonMetricStatus = 'better' | 'worse' | 'unchanged' | 'inconclusive';
|
|
59
|
+
type LiveProofComparisonMetricStatus = 'better' | 'worse' | 'unchanged' | 'inconclusive' | 'low_confidence';
|
|
60
60
|
type LiveProofComparisonMetricSummary = {
|
|
61
61
|
counts: Record<LiveProofComparisonMetricStatus, number>;
|
|
62
62
|
notableMetrics: Array<{
|
|
@@ -94,17 +94,18 @@ type LiveProofArtifact = {
|
|
|
94
94
|
status: 'passed' | 'failed';
|
|
95
95
|
summary: string;
|
|
96
96
|
};
|
|
97
|
-
type LiveProofComparisonStatus = ('baseline_missing' | 'improved' | 'inconclusive' | 'mixed' | 'not_compared' | 'regressed' | 'unchanged');
|
|
97
|
+
type LiveProofComparisonStatus = ('baseline_missing' | 'improved' | 'inconclusive' | 'low_confidence' | 'mixed' | 'not_compared' | 'regressed' | 'unchanged');
|
|
98
98
|
type LiveProofComparisonCounts = {
|
|
99
99
|
better: number;
|
|
100
100
|
inconclusive: number;
|
|
101
|
+
low_confidence: number;
|
|
101
102
|
mixed: number;
|
|
102
103
|
skipped: number;
|
|
103
104
|
unchanged: number;
|
|
104
105
|
worse: number;
|
|
105
106
|
};
|
|
106
107
|
type LiveProofNextAction = {
|
|
107
|
-
code: 'establish_baseline' | 'inspect_failed_run' | 'inspect_inconclusive' | 'inspect_mixed' | 'inspect_regressions' | 'inspect_summary';
|
|
108
|
+
code: 'establish_baseline' | 'inspect_failed_run' | 'inspect_inconclusive' | 'inspect_low_confidence' | 'inspect_mixed' | 'inspect_regressions' | 'inspect_summary';
|
|
108
109
|
summary: string;
|
|
109
110
|
};
|
|
110
111
|
type LiveProofSummaryResult = {
|
|
@@ -168,6 +168,9 @@ function buildLiveProofComparisonStatus(comparisons) {
|
|
|
168
168
|
if (statuses.includes('inconclusive')) {
|
|
169
169
|
return 'inconclusive';
|
|
170
170
|
}
|
|
171
|
+
if (statuses.includes('low_confidence')) {
|
|
172
|
+
return 'low_confidence';
|
|
173
|
+
}
|
|
171
174
|
if (statuses.every((status) => status === 'skipped')) {
|
|
172
175
|
return 'baseline_missing';
|
|
173
176
|
}
|
|
@@ -192,6 +195,7 @@ function buildLiveProofComparisonCounts(comparisons) {
|
|
|
192
195
|
const counts = {
|
|
193
196
|
better: 0,
|
|
194
197
|
inconclusive: 0,
|
|
198
|
+
low_confidence: 0,
|
|
195
199
|
mixed: 0,
|
|
196
200
|
skipped: 0,
|
|
197
201
|
unchanged: 0,
|
|
@@ -233,6 +237,12 @@ function buildLiveProofNextAction(comparisonStatus, status = 'passed') {
|
|
|
233
237
|
summary: 'Some comparisons are inconclusive or incomplete; inspect scenario health and missing baseline details.',
|
|
234
238
|
};
|
|
235
239
|
}
|
|
240
|
+
if (comparisonStatus === 'low_confidence') {
|
|
241
|
+
return {
|
|
242
|
+
code: 'inspect_low_confidence',
|
|
243
|
+
summary: 'Some comparisons show low-confidence timing movement; repeat or multi-sample proof is required before treating it as a regression.',
|
|
244
|
+
};
|
|
245
|
+
}
|
|
236
246
|
if (comparisonStatus === 'mixed') {
|
|
237
247
|
return {
|
|
238
248
|
code: 'inspect_mixed',
|
|
@@ -292,7 +302,7 @@ function formatComparisonMetricSummary(comparison) {
|
|
|
292
302
|
if (!summary) {
|
|
293
303
|
return '';
|
|
294
304
|
}
|
|
295
|
-
const counts = `metrics better=${summary.counts.better} worse=${summary.counts.worse} unchanged=${summary.counts.unchanged} inconclusive=${summary.counts.inconclusive}`;
|
|
305
|
+
const counts = `metrics better=${summary.counts.better} worse=${summary.counts.worse} unchanged=${summary.counts.unchanged} inconclusive=${summary.counts.inconclusive} low_confidence=${summary.counts.low_confidence}`;
|
|
296
306
|
const highlights = summary.notableMetrics.length > 0
|
|
297
307
|
? `; notable: ${summary.notableMetrics.map(formatComparisonMetricHighlight).join(', ')}`
|
|
298
308
|
: '';
|
|
@@ -345,7 +355,7 @@ function buildLiveProofMarkdown(artifact) {
|
|
|
345
355
|
`Status: ${artifact.status}`,
|
|
346
356
|
`Run: ${artifact.runId}`,
|
|
347
357
|
`Comparison status: ${artifact.comparisonStatus}`,
|
|
348
|
-
`Comparison counts: better=${artifact.comparisonCounts.better} worse=${artifact.comparisonCounts.worse} unchanged=${artifact.comparisonCounts.unchanged} mixed=${artifact.comparisonCounts.mixed} inconclusive=${artifact.comparisonCounts.inconclusive} skipped=${artifact.comparisonCounts.skipped}`,
|
|
358
|
+
`Comparison counts: better=${artifact.comparisonCounts.better} worse=${artifact.comparisonCounts.worse} unchanged=${artifact.comparisonCounts.unchanged} mixed=${artifact.comparisonCounts.mixed} inconclusive=${artifact.comparisonCounts.inconclusive} low_confidence=${artifact.comparisonCounts.low_confidence} skipped=${artifact.comparisonCounts.skipped}`,
|
|
349
359
|
`Next action: ${artifact.nextAction.code} - ${artifact.nextAction.summary}`,
|
|
350
360
|
`Summary: ${artifact.summary}`,
|
|
351
361
|
'',
|
|
@@ -17,6 +17,7 @@ type LiveProofArtifact = {
|
|
|
17
17
|
skipped: number;
|
|
18
18
|
unchanged: number;
|
|
19
19
|
worse: number;
|
|
20
|
+
low_confidence: number;
|
|
20
21
|
};
|
|
21
22
|
comparisonStatus: string;
|
|
22
23
|
comparisons: LiveProofComparisonPointer[];
|
|
@@ -82,7 +83,7 @@ type LiveProofArtifact = {
|
|
|
82
83
|
summary: string;
|
|
83
84
|
};
|
|
84
85
|
type LiveProofComparisonCounts = LiveProofArtifact['comparisonCounts'];
|
|
85
|
-
type LiveProofMetricStatus = 'better' | 'worse' | 'unchanged' | 'inconclusive';
|
|
86
|
+
type LiveProofMetricStatus = 'better' | 'worse' | 'unchanged' | 'inconclusive' | 'low_confidence';
|
|
86
87
|
type LiveProofPlatform = LiveProofArtifact['platform'];
|
|
87
88
|
type LiveProofComparisonPointer = {
|
|
88
89
|
baselineDir?: string | null;
|
|
@@ -102,7 +103,7 @@ type LiveProofComparisonPointer = {
|
|
|
102
103
|
status?: string;
|
|
103
104
|
summaryPath?: string | null;
|
|
104
105
|
};
|
|
105
|
-
type LiveProofAggregateStatus = ('baseline_missing' | 'improved' | 'inconclusive' | 'mixed' | 'not_compared' | 'regressed' | 'unchanged');
|
|
106
|
+
type LiveProofAggregateStatus = ('baseline_missing' | 'improved' | 'inconclusive' | 'low_confidence' | 'mixed' | 'not_compared' | 'regressed' | 'unchanged');
|
|
106
107
|
type LiveProofNextActionCode = LiveProofArtifact['nextAction']['code'];
|
|
107
108
|
type LiveProofSetArtifact = {
|
|
108
109
|
failureReasons: string[];
|
|
@@ -153,6 +153,7 @@ function countLiveProofComparisons(comparisons) {
|
|
|
153
153
|
const counts = {
|
|
154
154
|
better: 0,
|
|
155
155
|
inconclusive: 0,
|
|
156
|
+
low_confidence: 0,
|
|
156
157
|
mixed: 0,
|
|
157
158
|
skipped: 0,
|
|
158
159
|
unchanged: 0,
|
|
@@ -183,6 +184,9 @@ function deriveLiveProofComparisonStatus(comparisons) {
|
|
|
183
184
|
if (statuses.includes('inconclusive')) {
|
|
184
185
|
return 'inconclusive';
|
|
185
186
|
}
|
|
187
|
+
if (statuses.includes('low_confidence')) {
|
|
188
|
+
return 'low_confidence';
|
|
189
|
+
}
|
|
186
190
|
if (statuses.every((status) => status === 'skipped')) {
|
|
187
191
|
return 'baseline_missing';
|
|
188
192
|
}
|
|
@@ -217,6 +221,9 @@ function expectedLiveProofNextActionCode(comparisonStatus, status = 'passed') {
|
|
|
217
221
|
if (comparisonStatus === 'inconclusive') {
|
|
218
222
|
return 'inspect_inconclusive';
|
|
219
223
|
}
|
|
224
|
+
if (comparisonStatus === 'low_confidence') {
|
|
225
|
+
return 'inspect_low_confidence';
|
|
226
|
+
}
|
|
220
227
|
if (comparisonStatus === 'mixed') {
|
|
221
228
|
return 'inspect_mixed';
|
|
222
229
|
}
|
|
@@ -439,7 +446,7 @@ function formatComparisonPointerMetrics(comparison) {
|
|
|
439
446
|
const highlightText = highlights.length > 0
|
|
440
447
|
? `; notable: ${highlights.map(formatMetricHighlight).join(', ')}`
|
|
441
448
|
: '';
|
|
442
|
-
return ` (metrics better=${counts.better} worse=${counts.worse} unchanged=${counts.unchanged} inconclusive=${counts.inconclusive}${highlightText})`;
|
|
449
|
+
return ` (metrics better=${counts.better} worse=${counts.worse} unchanged=${counts.unchanged} inconclusive=${counts.inconclusive} low_confidence=${counts.low_confidence}${highlightText})`;
|
|
443
450
|
}
|
|
444
451
|
/**
|
|
445
452
|
* Formats capture counts for one interaction proof pointer.
|
|
@@ -815,7 +822,7 @@ function formatLiveProof(proof) {
|
|
|
815
822
|
`Skipped interaction proofs: ${proof.skippedInteractionProofs?.length ?? 0}`,
|
|
816
823
|
...(proof.skippedInteractionProofs ?? []).map((proofPointer) => (`- ${proofPointer.label} (${proofPointer.runnerId}/${proofPointer.scenarioId}/${proofPointer.runId}): ${proofPointer.reason} next=${proofPointer.nextAction.code}`)),
|
|
817
824
|
`Comparisons: ${proof.comparisons.length}`,
|
|
818
|
-
`Comparison counts: better=${proof.comparisonCounts.better} worse=${proof.comparisonCounts.worse} unchanged=${proof.comparisonCounts.unchanged} mixed=${proof.comparisonCounts.mixed} inconclusive=${proof.comparisonCounts.inconclusive} skipped=${proof.comparisonCounts.skipped}`,
|
|
825
|
+
`Comparison counts: better=${proof.comparisonCounts.better} worse=${proof.comparisonCounts.worse} unchanged=${proof.comparisonCounts.unchanged} mixed=${proof.comparisonCounts.mixed} inconclusive=${proof.comparisonCounts.inconclusive} low_confidence=${proof.comparisonCounts.low_confidence} skipped=${proof.comparisonCounts.skipped}`,
|
|
819
826
|
...proof.comparisons.map((comparison) => (`- ${comparison.label ?? 'comparison'} (${comparison.scenarioId ?? 'unknown-scenario'}/${comparison.runId ?? 'unknown-run'}): ${comparison.status ?? 'unknown'}${formatComparisonPointerMetrics(comparison)}`)),
|
|
820
827
|
`Next action: ${proof.nextAction.code} - ${proof.nextAction.summary}`,
|
|
821
828
|
`Summary: ${proof.summary}`,
|
|
@@ -8,8 +8,13 @@ type AndroidProfileOptions = {
|
|
|
8
8
|
};
|
|
9
9
|
type AndroidAdbProfileCommand = {
|
|
10
10
|
command: string;
|
|
11
|
+
commandId?: string;
|
|
11
12
|
label?: string;
|
|
13
|
+
queueId?: string;
|
|
14
|
+
sequence?: number;
|
|
15
|
+
waitForMilestone?: string;
|
|
12
16
|
waitMs?: number;
|
|
17
|
+
waitTimeoutMs?: number;
|
|
13
18
|
};
|
|
14
19
|
type AndroidAdbDriverStep = import('./android-adb').AndroidAdbDriverStep;
|
|
15
20
|
/**
|