agent-scenario-loop 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -9
- package/app/profile-session.ts +352 -12
- package/dist/core/agent-summary.d.ts +3 -2
- package/dist/core/agent-summary.js +44 -2
- package/dist/core/artifact-contract.d.ts +28 -8
- package/dist/core/artifact-contract.js +676 -26
- package/dist/core/comparison.d.ts +57 -3
- package/dist/core/comparison.js +113 -1
- package/dist/core/planner.d.ts +32 -1
- package/dist/core/planner.js +144 -0
- package/dist/core/run-index.d.ts +4 -0
- package/dist/core/run-index.js +55 -1
- package/dist/core/schema-validator.d.ts +2 -0
- package/dist/core/schema-validator.js +2 -0
- package/dist/runner/android-adb-driver.d.ts +7 -2
- package/dist/runner/android-adb-driver.js +7 -1
- package/dist/runner/android-adb.d.ts +40 -5
- package/dist/runner/android-adb.js +1046 -664
- package/dist/runner/compare-latest.d.ts +8 -4
- package/dist/runner/compare-latest.js +24 -5
- package/dist/runner/example-android-live.d.ts +10 -1
- package/dist/runner/example-android-live.js +55 -0
- package/dist/runner/example-ios-live.d.ts +10 -1
- package/dist/runner/example-ios-live.js +55 -0
- package/dist/runner/ios-simctl.d.ts +6 -0
- package/dist/runner/ios-simctl.js +7 -0
- package/dist/runner/live-comparison.d.ts +2 -2
- package/dist/runner/live-comparison.js +2 -1
- package/dist/runner/live-proof-summary.d.ts +5 -4
- package/dist/runner/live-proof-summary.js +12 -2
- package/dist/runner/live-proof.d.ts +3 -2
- package/dist/runner/live-proof.js +9 -2
- package/dist/runner/profile-android.d.ts +16 -1
- package/dist/runner/profile-android.js +364 -26
- package/dist/runner/profile-ios.d.ts +13 -2
- package/dist/runner/profile-ios.js +341 -19
- package/dist/runner/profile-mobile.d.ts +39 -3
- package/dist/runner/profile-mobile.js +1054 -42
- package/dist/runner/validate-project.js +3 -0
- package/dist/scripts/consumer-rehearsal.d.ts +119 -0
- package/dist/scripts/consumer-rehearsal.js +757 -0
- package/dist/scripts/downstream-local-package-gate.d.ts +2 -0
- package/dist/scripts/downstream-local-package-gate.js +264 -0
- package/dist/scripts/package-smoke.d.ts +96 -0
- package/dist/scripts/package-smoke.js +2282 -0
- package/dist/scripts/release-readiness.d.ts +2 -0
- package/dist/scripts/release-readiness.js +520 -0
- package/docs/adapters.md +7 -1
- package/docs/api.md +2 -2
- package/docs/architecture.md +90 -0
- package/docs/authoring.md +39 -3
- package/docs/concepts.md +3 -24
- package/docs/consumer-rehearsal.md +31 -1
- package/docs/contracts.md +45 -101
- package/docs/external-adapter-protocol.md +219 -0
- package/docs/live-proofs.md +86 -3
- package/docs/principles.md +9 -15
- package/examples/mobile-app/README.md +12 -0
- package/examples/mobile-app/runner-manifests/evidence-provider.json +3 -3
- package/examples/mobile-app/runner-manifests/primary-runner.json +1 -0
- package/examples/mobile-app/scripts/asl-capture-profiler-provider.mjs +25 -0
- package/examples/runners/README.md +4 -3
- package/examples/runners/adb-android.json +1 -0
- package/examples/runners/agent-device-android.json +1 -0
- package/examples/runners/agent-device-ios.json +1 -0
- package/examples/runners/argent-android.json +1 -0
- package/examples/runners/argent-ios.json +1 -0
- package/examples/runners/axe-accessibility-provider.json +2 -2
- package/examples/runners/script-accessibility-provider.json +2 -2
- package/examples/runners/script-memory-provider.json +2 -2
- package/examples/runners/script-network-provider.json +2 -2
- package/examples/runners/script-profiler-provider.json +2 -2
- package/examples/runners/xcodebuildmcp-ios.json +1 -0
- package/package.json +12 -3
- package/schemas/causal-run.schema.json +85 -2
- package/schemas/comparison.schema.json +130 -2
- package/schemas/external-adapter-message.schema.json +693 -0
- package/schemas/health.schema.json +72 -0
- package/schemas/live-proof-set.schema.json +1 -1
- package/schemas/live-proof.schema.json +14 -6
- package/schemas/manifest.schema.json +515 -4
- package/schemas/profiler.schema.json +243 -0
- package/schemas/runner-capabilities.schema.json +28 -2
- package/schemas/scenario.schema.json +34 -2
- package/templates/evidence-provider.json +3 -3
- package/templates/primary-runner.json +1 -0
- package/templates/scripts/asl-capture-profiler-provider.mjs +20 -0
|
@@ -11,7 +11,7 @@ type MetricComparison = {
|
|
|
11
11
|
baseline: number | boolean | null;
|
|
12
12
|
current: number | boolean | null;
|
|
13
13
|
delta: number | null;
|
|
14
|
-
status: 'better' | 'worse' | 'unchanged' | 'inconclusive';
|
|
14
|
+
status: 'better' | 'worse' | 'unchanged' | 'inconclusive' | 'low_confidence';
|
|
15
15
|
notes?: string;
|
|
16
16
|
};
|
|
17
17
|
type ComparisonStatus = MetricComparison['status'] | 'mixed';
|
|
@@ -25,12 +25,18 @@ type ComparisonRunBasis = {
|
|
|
25
25
|
type ComparisonSelectionBasis = {
|
|
26
26
|
artifactRoot?: string;
|
|
27
27
|
candidatesInspected?: number;
|
|
28
|
+
cohortHash?: string;
|
|
29
|
+
comparisonLane?: string;
|
|
28
30
|
scenarioId?: string;
|
|
31
|
+
scenarioHash?: string;
|
|
29
32
|
selectedRunDir?: string;
|
|
30
33
|
selectedRunId?: string;
|
|
31
34
|
skippedCurrentRun?: boolean;
|
|
35
|
+
trustedCohortCandidates?: number;
|
|
36
|
+
trustedComparableCandidates?: number;
|
|
32
37
|
trustedCandidates?: number;
|
|
33
38
|
trustedPriorCandidates?: number;
|
|
39
|
+
trustedScenarioContractCandidates?: number;
|
|
34
40
|
};
|
|
35
41
|
type ComparisonBasis = {
|
|
36
42
|
baseline: ComparisonRunBasis;
|
|
@@ -38,6 +44,42 @@ type ComparisonBasis = {
|
|
|
38
44
|
selection?: ComparisonSelectionBasis;
|
|
39
45
|
strategy: ComparisonBasisStrategy;
|
|
40
46
|
};
|
|
47
|
+
type MeasurementPolicy = {
|
|
48
|
+
baselineSelection: {
|
|
49
|
+
mode: 'explicit' | 'latestTrustedPrior';
|
|
50
|
+
poisoningProtection: {
|
|
51
|
+
requirePassedHealth: boolean;
|
|
52
|
+
requirePassedVerdict: boolean;
|
|
53
|
+
requireMatchingScenarioId: boolean;
|
|
54
|
+
comparisonLane?: string;
|
|
55
|
+
scenarioHash?: string;
|
|
56
|
+
cohortHash?: string;
|
|
57
|
+
};
|
|
58
|
+
};
|
|
59
|
+
samples: {
|
|
60
|
+
baseline: {
|
|
61
|
+
validSamples: number;
|
|
62
|
+
warmupSamples: number;
|
|
63
|
+
outliersExcluded: number;
|
|
64
|
+
};
|
|
65
|
+
current: {
|
|
66
|
+
validSamples: number;
|
|
67
|
+
warmupSamples: number;
|
|
68
|
+
outliersExcluded: number;
|
|
69
|
+
};
|
|
70
|
+
};
|
|
71
|
+
tolerance: {
|
|
72
|
+
timing: {
|
|
73
|
+
absoluteMs: number;
|
|
74
|
+
relative: number;
|
|
75
|
+
};
|
|
76
|
+
};
|
|
77
|
+
confidence: {
|
|
78
|
+
level: 'single_run' | 'multi_sample' | 'insufficient' | 'low_confidence';
|
|
79
|
+
minValidSamples: number;
|
|
80
|
+
reason?: string;
|
|
81
|
+
};
|
|
82
|
+
};
|
|
41
83
|
type BuildComparisonOptions = {
|
|
42
84
|
baselineHealth: ComparisonRecord;
|
|
43
85
|
baselineVerdict: ComparisonRecord;
|
|
@@ -103,6 +145,18 @@ declare function buildComparisonBasis({ baselineDir, currentDir, baselineHealth,
|
|
|
103
145
|
selection?: ComparisonSelectionBasis;
|
|
104
146
|
strategy: ComparisonBasisStrategy;
|
|
105
147
|
}): ComparisonBasis;
|
|
148
|
+
/**
|
|
149
|
+
* Builds the measurement policy block for a comparison artifact.
|
|
150
|
+
*
|
|
151
|
+
* @param {{baselineVerdict: Record<string, unknown>, comparisonBasis?: ComparisonBasis, currentVerdict: Record<string, unknown>, metricComparisons: MetricComparison[]}} options
|
|
152
|
+
* @returns {MeasurementPolicy}
|
|
153
|
+
*/
|
|
154
|
+
declare function buildMeasurementPolicy({ baselineVerdict, comparisonBasis, currentVerdict, metricComparisons, }: {
|
|
155
|
+
baselineVerdict: ComparisonRecord;
|
|
156
|
+
comparisonBasis: ComparisonBasis | undefined;
|
|
157
|
+
currentVerdict: ComparisonRecord;
|
|
158
|
+
metricComparisons: MetricComparison[];
|
|
159
|
+
}): MeasurementPolicy;
|
|
106
160
|
/**
|
|
107
161
|
* Builds a comparison artifact from two validated run artifact sets.
|
|
108
162
|
*
|
|
@@ -129,5 +183,5 @@ declare function summarizeComparison({ comparisonStatus, missingRequired, metric
|
|
|
129
183
|
metricComparisons: MetricComparison[];
|
|
130
184
|
warnings: string[];
|
|
131
185
|
}): string;
|
|
132
|
-
export { buildComparisonBasis, buildComparisonArtifact, compareBudgetCheck, compareRunDirectories, indexBudgetChecks, readRunArtifacts, resolveComparisonStatus, summarizeComparison, };
|
|
133
|
-
export type { BuildComparisonOptions, ComparisonBasis, ComparisonBasisStrategy, CompareRunDirectoriesOptions, ComparisonBudgetCheck, ComparisonRecord, ComparisonStatus, MetricComparison, };
|
|
186
|
+
export { buildComparisonBasis, buildComparisonArtifact, compareBudgetCheck, compareRunDirectories, buildMeasurementPolicy, indexBudgetChecks, readRunArtifacts, resolveComparisonStatus, summarizeComparison, };
|
|
187
|
+
export type { BuildComparisonOptions, ComparisonBasis, ComparisonBasisStrategy, CompareRunDirectoriesOptions, ComparisonBudgetCheck, ComparisonRecord, ComparisonStatus, MeasurementPolicy, MetricComparison, };
|
package/dist/core/comparison.js
CHANGED
|
@@ -4,6 +4,7 @@ exports.buildComparisonBasis = buildComparisonBasis;
|
|
|
4
4
|
exports.buildComparisonArtifact = buildComparisonArtifact;
|
|
5
5
|
exports.compareBudgetCheck = compareBudgetCheck;
|
|
6
6
|
exports.compareRunDirectories = compareRunDirectories;
|
|
7
|
+
exports.buildMeasurementPolicy = buildMeasurementPolicy;
|
|
7
8
|
exports.indexBudgetChecks = indexBudgetChecks;
|
|
8
9
|
exports.readRunArtifacts = readRunArtifacts;
|
|
9
10
|
exports.resolveComparisonStatus = resolveComparisonStatus;
|
|
@@ -113,6 +114,21 @@ function compareBudgetCheck(baseline, current) {
|
|
|
113
114
|
: {}),
|
|
114
115
|
};
|
|
115
116
|
}
|
|
117
|
+
/**
|
|
118
|
+
* Returns whether a directional timing delta should be reported as low confidence.
|
|
119
|
+
*
|
|
120
|
+
* @param {MetricComparison} metric
|
|
121
|
+
* @param {ComparisonBudgetCheck} baseline
|
|
122
|
+
* @param {ComparisonBudgetCheck} current
|
|
123
|
+
* @returns {boolean}
|
|
124
|
+
*/
|
|
125
|
+
function isLowConfidenceTimingMovement(metric, baseline, current) {
|
|
126
|
+
return (metric.status === 'worse' &&
|
|
127
|
+
baseline.unit === 'ms' &&
|
|
128
|
+
current.unit === 'ms' &&
|
|
129
|
+
baseline.pass === true &&
|
|
130
|
+
current.pass === true);
|
|
131
|
+
}
|
|
116
132
|
/**
|
|
117
133
|
* Collapses metric-level comparison statuses into the run-level comparison status.
|
|
118
134
|
*
|
|
@@ -123,6 +139,7 @@ function compareBudgetCheck(baseline, current) {
|
|
|
123
139
|
function resolveComparisonStatus(metricComparisons, { baselineVerdictStatus, currentVerdictStatus, }) {
|
|
124
140
|
const hasBetterMetric = metricComparisons.some((metric) => metric.status === 'better');
|
|
125
141
|
const hasWorseMetric = metricComparisons.some((metric) => metric.status === 'worse');
|
|
142
|
+
const hasLowConfidenceMetric = metricComparisons.some((metric) => metric.status === 'low_confidence');
|
|
126
143
|
if (hasBetterMetric && hasWorseMetric) {
|
|
127
144
|
return 'mixed';
|
|
128
145
|
}
|
|
@@ -132,6 +149,9 @@ function resolveComparisonStatus(metricComparisons, { baselineVerdictStatus, cur
|
|
|
132
149
|
if (hasBetterMetric) {
|
|
133
150
|
return 'better';
|
|
134
151
|
}
|
|
152
|
+
if (hasLowConfidenceMetric) {
|
|
153
|
+
return 'low_confidence';
|
|
154
|
+
}
|
|
135
155
|
if (metricComparisons.length > 0 && metricComparisons.every((metric) => metric.status === 'unchanged')) {
|
|
136
156
|
return 'unchanged';
|
|
137
157
|
}
|
|
@@ -169,6 +189,79 @@ function buildComparisonBasis({ baselineDir, currentDir, baselineHealth, baselin
|
|
|
169
189
|
...(selection ? { selection } : {}),
|
|
170
190
|
};
|
|
171
191
|
}
|
|
192
|
+
/**
|
|
193
|
+
* Counts valid numeric or boolean budget samples in a verdict artifact.
|
|
194
|
+
*
|
|
195
|
+
* @param {unknown} checks
|
|
196
|
+
* @returns {number}
|
|
197
|
+
*/
|
|
198
|
+
function countValidBudgetSamples(checks) {
|
|
199
|
+
if (!Array.isArray(checks)) {
|
|
200
|
+
return 0;
|
|
201
|
+
}
|
|
202
|
+
return checks.filter((check) => (check &&
|
|
203
|
+
typeof check === 'object' &&
|
|
204
|
+
(typeof check.actual === 'number' ||
|
|
205
|
+
typeof check.actual === 'boolean'))).length;
|
|
206
|
+
}
|
|
207
|
+
/**
|
|
208
|
+
* Builds the measurement policy block for a comparison artifact.
|
|
209
|
+
*
|
|
210
|
+
* @param {{baselineVerdict: Record<string, unknown>, comparisonBasis?: ComparisonBasis, currentVerdict: Record<string, unknown>, metricComparisons: MetricComparison[]}} options
|
|
211
|
+
* @returns {MeasurementPolicy}
|
|
212
|
+
*/
|
|
213
|
+
function buildMeasurementPolicy({ baselineVerdict, comparisonBasis, currentVerdict, metricComparisons, }) {
|
|
214
|
+
const selection = comparisonBasis?.selection;
|
|
215
|
+
const validSamples = metricComparisons.length;
|
|
216
|
+
const hasLowConfidenceMovement = metricComparisons.some((metric) => metric.status === 'low_confidence');
|
|
217
|
+
const confidenceLevel = hasLowConfidenceMovement
|
|
218
|
+
? 'low_confidence'
|
|
219
|
+
:
|
|
220
|
+
validSamples === 0
|
|
221
|
+
? 'insufficient'
|
|
222
|
+
: validSamples === 1
|
|
223
|
+
? 'single_run'
|
|
224
|
+
: 'multi_sample';
|
|
225
|
+
const poisoningProtection = {
|
|
226
|
+
requirePassedHealth: true,
|
|
227
|
+
requirePassedVerdict: comparisonBasis?.strategy === 'latest_trusted_prior',
|
|
228
|
+
requireMatchingScenarioId: true,
|
|
229
|
+
...(typeof selection?.comparisonLane === 'string' ? { comparisonLane: selection.comparisonLane } : {}),
|
|
230
|
+
...(typeof selection?.scenarioHash === 'string' ? { scenarioHash: selection.scenarioHash } : {}),
|
|
231
|
+
...(typeof selection?.cohortHash === 'string' ? { cohortHash: selection.cohortHash } : {}),
|
|
232
|
+
};
|
|
233
|
+
return {
|
|
234
|
+
baselineSelection: {
|
|
235
|
+
mode: comparisonBasis?.strategy === 'latest_trusted_prior' ? 'latestTrustedPrior' : 'explicit',
|
|
236
|
+
poisoningProtection,
|
|
237
|
+
},
|
|
238
|
+
samples: {
|
|
239
|
+
baseline: {
|
|
240
|
+
validSamples: countValidBudgetSamples(baselineVerdict.budgetChecks),
|
|
241
|
+
warmupSamples: 0,
|
|
242
|
+
outliersExcluded: 0,
|
|
243
|
+
},
|
|
244
|
+
current: {
|
|
245
|
+
validSamples: countValidBudgetSamples(currentVerdict.budgetChecks),
|
|
246
|
+
warmupSamples: 0,
|
|
247
|
+
outliersExcluded: 0,
|
|
248
|
+
},
|
|
249
|
+
},
|
|
250
|
+
tolerance: {
|
|
251
|
+
timing: {
|
|
252
|
+
absoluteMs: MIN_MS_COMPARISON_TOLERANCE,
|
|
253
|
+
relative: RELATIVE_MS_COMPARISON_TOLERANCE,
|
|
254
|
+
},
|
|
255
|
+
},
|
|
256
|
+
confidence: {
|
|
257
|
+
level: confidenceLevel,
|
|
258
|
+
minValidSamples: 1,
|
|
259
|
+
...(hasLowConfidenceMovement
|
|
260
|
+
? { reason: 'Single-run timing movement stayed within passing budgets; repeat or multi-sample proof is required before treating it as a regression.' }
|
|
261
|
+
: {}),
|
|
262
|
+
},
|
|
263
|
+
};
|
|
264
|
+
}
|
|
172
265
|
/**
|
|
173
266
|
* Builds a comparison artifact from two validated run artifact sets.
|
|
174
267
|
*
|
|
@@ -202,7 +295,17 @@ function buildComparisonArtifact({ baselineHealth, baselineVerdict, comparisonBa
|
|
|
202
295
|
warnings.push(`No baseline budget check matched ${currentCheck.name}.`);
|
|
203
296
|
continue;
|
|
204
297
|
}
|
|
205
|
-
|
|
298
|
+
const metricComparison = compareBudgetCheck(baselineCheck, currentCheck);
|
|
299
|
+
if (comparisonBasis?.strategy === 'latest_trusted_prior' &&
|
|
300
|
+
isLowConfidenceTimingMovement(metricComparison, baselineCheck, currentCheck)) {
|
|
301
|
+
metricComparisons.push({
|
|
302
|
+
...metricComparison,
|
|
303
|
+
status: 'low_confidence',
|
|
304
|
+
notes: 'Single-run timing movement stayed within passing budgets; repeat or multi-sample proof is required before treating it as a regression.',
|
|
305
|
+
});
|
|
306
|
+
continue;
|
|
307
|
+
}
|
|
308
|
+
metricComparisons.push(metricComparison);
|
|
206
309
|
}
|
|
207
310
|
if (metricComparisons.length === 0) {
|
|
208
311
|
warnings.push('No comparable budget checks were available.');
|
|
@@ -228,6 +331,12 @@ function buildComparisonArtifact({ baselineHealth, baselineVerdict, comparisonBa
|
|
|
228
331
|
healthStatus: canCompare ? 'passed' : 'failed',
|
|
229
332
|
verdictStatus: typeof currentVerdict.verdictStatus === 'string' ? currentVerdict.verdictStatus : 'inconclusive',
|
|
230
333
|
...(comparisonBasis ? { comparisonBasis } : {}),
|
|
334
|
+
measurementPolicy: buildMeasurementPolicy({
|
|
335
|
+
baselineVerdict,
|
|
336
|
+
comparisonBasis,
|
|
337
|
+
currentVerdict,
|
|
338
|
+
metricComparisons,
|
|
339
|
+
}),
|
|
231
340
|
...(metricComparisons.length > 0 ? { metricComparisons } : {}),
|
|
232
341
|
evidence: {
|
|
233
342
|
missingRequired,
|
|
@@ -287,6 +396,9 @@ function summarizeComparison({ comparisonStatus, missingRequired, metricComparis
|
|
|
287
396
|
if (comparisonStatus === 'mixed') {
|
|
288
397
|
return 'Current run has mixed metric movement against the explicit baseline.';
|
|
289
398
|
}
|
|
399
|
+
if (comparisonStatus === 'low_confidence') {
|
|
400
|
+
return 'Current run has low-confidence timing movement against the baseline; repeat or multi-sample proof is required before treating it as a regression.';
|
|
401
|
+
}
|
|
290
402
|
if (comparisonStatus === 'unchanged') {
|
|
291
403
|
return 'Current run matched the explicit baseline.';
|
|
292
404
|
}
|
package/dist/core/planner.d.ts
CHANGED
|
@@ -8,10 +8,18 @@ type CompatibilityResult = {
|
|
|
8
8
|
compatible: boolean;
|
|
9
9
|
errors: PlannerIssue[];
|
|
10
10
|
warnings: PlannerIssue[];
|
|
11
|
+
downgradePolicy: {
|
|
12
|
+
mode: string;
|
|
13
|
+
allowedSubstitutions: Array<Record<string, unknown>>;
|
|
14
|
+
substitutions: Array<Record<string, unknown>>;
|
|
15
|
+
unsupported: Array<Record<string, unknown>>;
|
|
16
|
+
warnings: Array<Record<string, unknown>>;
|
|
17
|
+
};
|
|
11
18
|
matched: {
|
|
12
19
|
platforms: string[];
|
|
13
20
|
capabilities: string[];
|
|
14
21
|
driverActions: string[];
|
|
22
|
+
uiContexts: string[];
|
|
15
23
|
artifacts: string[];
|
|
16
24
|
evidenceProviders: string[];
|
|
17
25
|
};
|
|
@@ -22,6 +30,7 @@ type ScenarioStep = ManifestRecord & {
|
|
|
22
30
|
id?: unknown;
|
|
23
31
|
required?: unknown;
|
|
24
32
|
selector?: unknown;
|
|
33
|
+
uiContext?: unknown;
|
|
25
34
|
};
|
|
26
35
|
type ScenarioManifest = ManifestRecord & {
|
|
27
36
|
adapterOptions?: unknown;
|
|
@@ -44,6 +53,7 @@ type RunnerManifest = ManifestRecord & {
|
|
|
44
53
|
platforms?: unknown[];
|
|
45
54
|
capabilities?: unknown[];
|
|
46
55
|
driverActions?: unknown[];
|
|
56
|
+
uiContexts?: unknown[];
|
|
47
57
|
artifactOutputs?: unknown[];
|
|
48
58
|
};
|
|
49
59
|
/**
|
|
@@ -72,6 +82,27 @@ declare function collectProvidedDriverActions({ runner, evidenceProviders, effec
|
|
|
72
82
|
evidenceProviders: RunnerManifest[];
|
|
73
83
|
effectivePlatforms: string[];
|
|
74
84
|
}): string[];
|
|
85
|
+
/**
|
|
86
|
+
* Collects UI/system contexts owned by the primary runner and active providers.
|
|
87
|
+
*
|
|
88
|
+
* @param {{runner: Record<string, unknown>, evidenceProviders: Record<string, unknown>[], effectivePlatforms: string[]}} options
|
|
89
|
+
* @returns {string[]}
|
|
90
|
+
*/
|
|
91
|
+
declare function collectProvidedUiContexts({ runner, evidenceProviders, effectivePlatforms, }: {
|
|
92
|
+
runner: RunnerManifest;
|
|
93
|
+
evidenceProviders: RunnerManifest[];
|
|
94
|
+
effectivePlatforms: string[];
|
|
95
|
+
}): string[];
|
|
96
|
+
/**
|
|
97
|
+
* Collects UI/system contexts required by scenario steps.
|
|
98
|
+
*
|
|
99
|
+
* @param {Record<string, unknown>} scenario
|
|
100
|
+
* @returns {{required: string[], optional: string[]}}
|
|
101
|
+
*/
|
|
102
|
+
declare function collectScenarioUiContexts(scenario: ScenarioManifest): {
|
|
103
|
+
required: string[];
|
|
104
|
+
optional: string[];
|
|
105
|
+
};
|
|
75
106
|
/**
|
|
76
107
|
* Collects driver operations required by scenario steps.
|
|
77
108
|
*
|
|
@@ -128,5 +159,5 @@ declare function buildUnevaluatedVerdict({ scenario, runId, health, }: {
|
|
|
128
159
|
runId?: string;
|
|
129
160
|
health: ManifestRecord;
|
|
130
161
|
}): ManifestRecord;
|
|
131
|
-
export { buildCompatibilityHealth, buildUnevaluatedVerdict, collectProvidedDriverActions, collectScenarioDriverActions, evaluateRunnerCompatibility, intersection, uniqueSorted, validateScenarioAdapterOptions, };
|
|
162
|
+
export { buildCompatibilityHealth, buildUnevaluatedVerdict, collectProvidedDriverActions, collectProvidedUiContexts, collectScenarioDriverActions, collectScenarioUiContexts, evaluateRunnerCompatibility, intersection, uniqueSorted, validateScenarioAdapterOptions, };
|
|
132
163
|
export type { CompatibilityResult, ManifestRecord, PlannerIssue, RunnerManifest, ScenarioManifest, };
|
package/dist/core/planner.js
CHANGED
|
@@ -3,11 +3,21 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.buildCompatibilityHealth = buildCompatibilityHealth;
|
|
4
4
|
exports.buildUnevaluatedVerdict = buildUnevaluatedVerdict;
|
|
5
5
|
exports.collectProvidedDriverActions = collectProvidedDriverActions;
|
|
6
|
+
exports.collectProvidedUiContexts = collectProvidedUiContexts;
|
|
6
7
|
exports.collectScenarioDriverActions = collectScenarioDriverActions;
|
|
8
|
+
exports.collectScenarioUiContexts = collectScenarioUiContexts;
|
|
7
9
|
exports.evaluateRunnerCompatibility = evaluateRunnerCompatibility;
|
|
8
10
|
exports.intersection = intersection;
|
|
9
11
|
exports.uniqueSorted = uniqueSorted;
|
|
10
12
|
exports.validateScenarioAdapterOptions = validateScenarioAdapterOptions;
|
|
13
|
+
const UI_DRIVER_ACTIONS = new Set([
|
|
14
|
+
'tap',
|
|
15
|
+
'scroll',
|
|
16
|
+
'assertVisible',
|
|
17
|
+
'inspectTree',
|
|
18
|
+
'screenshot',
|
|
19
|
+
'record',
|
|
20
|
+
]);
|
|
11
21
|
/**
|
|
12
22
|
* Returns `value` when it is already an array; otherwise returns an empty array.
|
|
13
23
|
*
|
|
@@ -64,6 +74,67 @@ function createIssue(code, message, metadata = {}) {
|
|
|
64
74
|
...metadata,
|
|
65
75
|
};
|
|
66
76
|
}
|
|
77
|
+
/**
|
|
78
|
+
* Converts a planner issue into a capability policy entry when it affects proof strength.
|
|
79
|
+
*
|
|
80
|
+
* @param {Record<string, unknown>} issue
|
|
81
|
+
* @param {'unsupported' | 'warning'} status
|
|
82
|
+
* @returns {Record<string, unknown> | null}
|
|
83
|
+
*/
|
|
84
|
+
function issueToCapabilityPolicyEntry(issue, status) {
|
|
85
|
+
if (typeof issue.capability === 'string') {
|
|
86
|
+
return {
|
|
87
|
+
kind: 'capability',
|
|
88
|
+
name: issue.capability,
|
|
89
|
+
status,
|
|
90
|
+
code: issue.code,
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
if (typeof issue.driverAction === 'string') {
|
|
94
|
+
return {
|
|
95
|
+
kind: 'driverAction',
|
|
96
|
+
name: issue.driverAction,
|
|
97
|
+
status,
|
|
98
|
+
code: issue.code,
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
if (typeof issue.uiContext === 'string') {
|
|
102
|
+
return {
|
|
103
|
+
kind: 'uiContext',
|
|
104
|
+
name: issue.uiContext,
|
|
105
|
+
status,
|
|
106
|
+
code: issue.code,
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
if (typeof issue.artifact === 'string') {
|
|
110
|
+
return {
|
|
111
|
+
kind: 'artifact',
|
|
112
|
+
name: issue.artifact,
|
|
113
|
+
status,
|
|
114
|
+
code: issue.code,
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
return null;
|
|
118
|
+
}
|
|
119
|
+
/**
|
|
120
|
+
* Builds the no-silent-downgrade policy artifact from planner results.
|
|
121
|
+
*
|
|
122
|
+
* @param {{errors: Record<string, unknown>[], warnings: Record<string, unknown>[]}} options
|
|
123
|
+
* @returns {Record<string, unknown>}
|
|
124
|
+
*/
|
|
125
|
+
function buildDowngradePolicy({ errors, warnings, }) {
|
|
126
|
+
return {
|
|
127
|
+
mode: 'no-silent-downgrade',
|
|
128
|
+
allowedSubstitutions: [],
|
|
129
|
+
substitutions: [],
|
|
130
|
+
unsupported: errors
|
|
131
|
+
.map((issue) => issueToCapabilityPolicyEntry(issue, 'unsupported'))
|
|
132
|
+
.filter((entry) => entry !== null),
|
|
133
|
+
warnings: warnings
|
|
134
|
+
.map((issue) => issueToCapabilityPolicyEntry(issue, 'warning'))
|
|
135
|
+
.filter((entry) => entry !== null),
|
|
136
|
+
};
|
|
137
|
+
}
|
|
67
138
|
/**
|
|
68
139
|
* Returns `value` when it is a plain object; otherwise returns an empty object.
|
|
69
140
|
*
|
|
@@ -253,6 +324,53 @@ function collectProvidedDriverActions({ runner, evidenceProviders, effectivePlat
|
|
|
253
324
|
...activeProviders.flatMap((provider) => asArray(provider?.driverActions)),
|
|
254
325
|
]);
|
|
255
326
|
}
|
|
327
|
+
/**
|
|
328
|
+
* Collects UI/system contexts owned by the primary runner and active providers.
|
|
329
|
+
*
|
|
330
|
+
* @param {{runner: Record<string, unknown>, evidenceProviders: Record<string, unknown>[], effectivePlatforms: string[]}} options
|
|
331
|
+
* @returns {string[]}
|
|
332
|
+
*/
|
|
333
|
+
function collectProvidedUiContexts({ runner, evidenceProviders, effectivePlatforms, }) {
|
|
334
|
+
const activeProviders = evidenceProviders.filter((provider) => isProviderActiveForPlatforms(provider, effectivePlatforms));
|
|
335
|
+
return uniqueSorted([
|
|
336
|
+
...asArray(runner?.uiContexts),
|
|
337
|
+
...activeProviders.flatMap((provider) => asArray(provider?.uiContexts)),
|
|
338
|
+
]);
|
|
339
|
+
}
|
|
340
|
+
/**
|
|
341
|
+
* Collects UI/system contexts required by scenario steps.
|
|
342
|
+
*
|
|
343
|
+
* @param {Record<string, unknown>} scenario
|
|
344
|
+
* @returns {{required: string[], optional: string[]}}
|
|
345
|
+
*/
|
|
346
|
+
function collectScenarioUiContexts(scenario) {
|
|
347
|
+
const steps = Array.isArray(scenario.steps) ? scenario.steps : [];
|
|
348
|
+
const required = [];
|
|
349
|
+
const optional = [];
|
|
350
|
+
for (const step of steps) {
|
|
351
|
+
if (!step || typeof step !== 'object') {
|
|
352
|
+
continue;
|
|
353
|
+
}
|
|
354
|
+
const uiContext = typeof step.uiContext === 'string'
|
|
355
|
+
? step.uiContext
|
|
356
|
+
: typeof step.driverAction === 'string' && UI_DRIVER_ACTIONS.has(step.driverAction)
|
|
357
|
+
? 'app'
|
|
358
|
+
: null;
|
|
359
|
+
if (!uiContext) {
|
|
360
|
+
continue;
|
|
361
|
+
}
|
|
362
|
+
if (step.required === false) {
|
|
363
|
+
optional.push(uiContext);
|
|
364
|
+
}
|
|
365
|
+
else {
|
|
366
|
+
required.push(uiContext);
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
return {
|
|
370
|
+
required: uniqueSorted(required),
|
|
371
|
+
optional: uniqueSorted(optional),
|
|
372
|
+
};
|
|
373
|
+
}
|
|
256
374
|
/**
|
|
257
375
|
* Collects driver operations required by scenario steps.
|
|
258
376
|
*
|
|
@@ -653,10 +771,12 @@ function evaluateRunnerCompatibility({ scenario, runner, evidenceProviders = [],
|
|
|
653
771
|
compatible: false,
|
|
654
772
|
errors,
|
|
655
773
|
warnings,
|
|
774
|
+
downgradePolicy: buildDowngradePolicy({ errors, warnings }),
|
|
656
775
|
matched: {
|
|
657
776
|
platforms: [],
|
|
658
777
|
capabilities: [],
|
|
659
778
|
driverActions: [],
|
|
779
|
+
uiContexts: [],
|
|
660
780
|
artifacts: [],
|
|
661
781
|
evidenceProviders: [],
|
|
662
782
|
},
|
|
@@ -705,6 +825,26 @@ function evaluateRunnerCompatibility({ scenario, runner, evidenceProviders = [],
|
|
|
705
825
|
driverAction,
|
|
706
826
|
}));
|
|
707
827
|
}
|
|
828
|
+
const providedUiContexts = collectProvidedUiContexts({
|
|
829
|
+
runner: primaryRunner,
|
|
830
|
+
evidenceProviders,
|
|
831
|
+
effectivePlatforms,
|
|
832
|
+
});
|
|
833
|
+
const scenarioUiContexts = collectScenarioUiContexts(scenario);
|
|
834
|
+
for (const uiContext of includesAll(providedUiContexts, scenarioUiContexts.required)) {
|
|
835
|
+
errors.push(createIssue('missing_required_ui_context', `No active runner or provider declares required UI context \`${uiContext}\`.`, {
|
|
836
|
+
runnerId: getRunnerId(primaryRunner),
|
|
837
|
+
scenarioId: getScenarioId(scenario),
|
|
838
|
+
uiContext,
|
|
839
|
+
}));
|
|
840
|
+
}
|
|
841
|
+
for (const uiContext of includesAll(providedUiContexts, scenarioUiContexts.optional)) {
|
|
842
|
+
warnings.push(createIssue('missing_optional_ui_context', `No active runner or provider declares optional UI context \`${uiContext}\`.`, {
|
|
843
|
+
runnerId: getRunnerId(primaryRunner),
|
|
844
|
+
scenarioId: getScenarioId(scenario),
|
|
845
|
+
uiContext,
|
|
846
|
+
}));
|
|
847
|
+
}
|
|
708
848
|
const { activeProviders, artifacts } = collectProvidedArtifacts({
|
|
709
849
|
runner: primaryRunner,
|
|
710
850
|
evidenceProviders,
|
|
@@ -728,10 +868,12 @@ function evaluateRunnerCompatibility({ scenario, runner, evidenceProviders = [],
|
|
|
728
868
|
compatible: errors.length === 0,
|
|
729
869
|
errors,
|
|
730
870
|
warnings,
|
|
871
|
+
downgradePolicy: buildDowngradePolicy({ errors, warnings }),
|
|
731
872
|
matched: {
|
|
732
873
|
platforms: effectivePlatforms,
|
|
733
874
|
capabilities: providedCapabilities,
|
|
734
875
|
driverActions: providedDriverActions,
|
|
876
|
+
uiContexts: providedUiContexts,
|
|
735
877
|
artifacts,
|
|
736
878
|
evidenceProviders: activeProviders.map((provider) => getRunnerId(provider)),
|
|
737
879
|
},
|
|
@@ -769,10 +911,12 @@ function buildCompatibilityHealth({ scenario, runId, compatibility, }) {
|
|
|
769
911
|
healthStatus: failedChecks.length > 0 ? 'failed' : 'passed',
|
|
770
912
|
checks,
|
|
771
913
|
...(warningChecks.length > 0 ? { warnings: warningChecks } : {}),
|
|
914
|
+
downgradePolicy: compatibility.downgradePolicy ?? buildDowngradePolicy({ errors, warnings }),
|
|
772
915
|
matched: {
|
|
773
916
|
platforms: uniqueSorted(asArray(compatibility?.matched?.platforms)),
|
|
774
917
|
capabilities: uniqueSorted(asArray(compatibility?.matched?.capabilities)),
|
|
775
918
|
driverActions: uniqueSorted(asArray(compatibility?.matched?.driverActions)),
|
|
919
|
+
uiContexts: uniqueSorted(asArray(compatibility?.matched?.uiContexts)),
|
|
776
920
|
artifacts: uniqueSorted(asArray(compatibility?.matched?.artifacts)),
|
|
777
921
|
evidenceProviders: uniqueSorted(asArray(compatibility?.matched?.evidenceProviders)),
|
|
778
922
|
},
|
package/dist/core/run-index.d.ts
CHANGED
|
@@ -1,10 +1,14 @@
|
|
|
1
1
|
type RunIndexEntry = {
|
|
2
2
|
runDir: string;
|
|
3
3
|
scenarioId: string;
|
|
4
|
+
attemptId?: string;
|
|
5
|
+
attemptNumber?: number;
|
|
4
6
|
scenarioHash?: string;
|
|
7
|
+
cohortHash?: string;
|
|
5
8
|
runId: string;
|
|
6
9
|
healthStatus: string;
|
|
7
10
|
trusted: boolean;
|
|
11
|
+
trustReason: string;
|
|
8
12
|
durationMs?: number;
|
|
9
13
|
endedAt?: string;
|
|
10
14
|
flowId?: string;
|
package/dist/core/run-index.js
CHANGED
|
@@ -17,6 +17,51 @@ const { ARTIFACT_FILENAMES, PROFILE_ARTIFACT_FILENAMES } = require('./artifact-l
|
|
|
17
17
|
function readJson(filePath) {
|
|
18
18
|
return JSON.parse(fs.readFileSync(filePath, 'utf8'));
|
|
19
19
|
}
|
|
20
|
+
/**
|
|
21
|
+
* Returns whether a value is a plain object record.
|
|
22
|
+
*
|
|
23
|
+
* @param {unknown} value
|
|
24
|
+
* @returns {value is Record<string, unknown>}
|
|
25
|
+
*/
|
|
26
|
+
function isRecord(value) {
|
|
27
|
+
return Boolean(value) && typeof value === 'object' && !Array.isArray(value);
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Returns a stable reason explaining whether this run can seed latest-trusted comparisons.
|
|
31
|
+
*
|
|
32
|
+
* @param {{healthStatus: string, verdictStatus?: string, manifest: Record<string, unknown>}} options
|
|
33
|
+
* @returns {string}
|
|
34
|
+
*/
|
|
35
|
+
function resolveTrustReason({ healthStatus, manifest, verdictStatus, }) {
|
|
36
|
+
if (healthStatus !== 'passed') {
|
|
37
|
+
return 'health_not_passed';
|
|
38
|
+
}
|
|
39
|
+
if (verdictStatus !== 'passed') {
|
|
40
|
+
return 'verdict_not_passed';
|
|
41
|
+
}
|
|
42
|
+
const attempt = isRecord(manifest.attempt) ? manifest.attempt : null;
|
|
43
|
+
if (!attempt) {
|
|
44
|
+
return 'trusted_legacy_without_attempt';
|
|
45
|
+
}
|
|
46
|
+
if (attempt.status !== 'passed' || attempt.terminalState !== 'passed') {
|
|
47
|
+
return 'attempt_not_passed';
|
|
48
|
+
}
|
|
49
|
+
if (typeof attempt.attemptNumber === 'number' && attempt.attemptNumber !== 1) {
|
|
50
|
+
return 'retry_attempt_not_baseline_trusted';
|
|
51
|
+
}
|
|
52
|
+
if (typeof attempt.retryOfAttemptId === 'string' || typeof attempt.retryReason === 'string') {
|
|
53
|
+
return 'retry_lineage_not_baseline_trusted';
|
|
54
|
+
}
|
|
55
|
+
const cleanup = isRecord(attempt.cleanup) ? attempt.cleanup : null;
|
|
56
|
+
if (cleanup?.status === 'failed' || cleanup?.status === 'partial') {
|
|
57
|
+
return 'cleanup_not_complete';
|
|
58
|
+
}
|
|
59
|
+
const partialArtifacts = isRecord(attempt.partialArtifacts) ? attempt.partialArtifacts : null;
|
|
60
|
+
if (partialArtifacts?.valid === true) {
|
|
61
|
+
return 'partial_artifacts_not_baseline_trusted';
|
|
62
|
+
}
|
|
63
|
+
return 'trusted';
|
|
64
|
+
}
|
|
20
65
|
/**
|
|
21
66
|
* Returns whether a directory contains the minimum run artifact pair.
|
|
22
67
|
*
|
|
@@ -79,13 +124,22 @@ function readRunIndexEntry(runDir) {
|
|
|
79
124
|
: path.basename(runDir);
|
|
80
125
|
const healthStatus = typeof health.healthStatus === 'string' ? health.healthStatus : 'unknown';
|
|
81
126
|
const verdictStatus = typeof verdict.verdictStatus === 'string' ? verdict.verdictStatus : undefined;
|
|
127
|
+
const provenance = isRecord(manifest.provenance) ? manifest.provenance : {};
|
|
128
|
+
const attempt = isRecord(manifest.attempt) ? manifest.attempt : null;
|
|
129
|
+
const trustReason = resolveTrustReason({ healthStatus, manifest, verdictStatus });
|
|
82
130
|
return {
|
|
83
131
|
runDir,
|
|
84
132
|
scenarioId,
|
|
85
133
|
runId,
|
|
134
|
+
...(typeof attempt?.attemptId === 'string' ? { attemptId: attempt.attemptId } : {}),
|
|
135
|
+
...(typeof attempt?.attemptNumber === 'number' ? { attemptNumber: attempt.attemptNumber } : {}),
|
|
86
136
|
...(typeof manifest.scenarioHash === 'string' ? { scenarioHash: manifest.scenarioHash } : {}),
|
|
137
|
+
...(typeof provenance.cohortHash === 'string'
|
|
138
|
+
? { cohortHash: provenance.cohortHash }
|
|
139
|
+
: {}),
|
|
87
140
|
healthStatus,
|
|
88
|
-
trusted:
|
|
141
|
+
trusted: trustReason === 'trusted' || trustReason === 'trusted_legacy_without_attempt',
|
|
142
|
+
trustReason,
|
|
89
143
|
...(typeof manifest.durationMs === 'number' ? { durationMs: manifest.durationMs } : {}),
|
|
90
144
|
...(typeof manifest.endedAt === 'string' ? { endedAt: manifest.endedAt } : {}),
|
|
91
145
|
...(typeof health.flowId === 'string' ? { flowId: health.flowId } : {}),
|
|
@@ -46,11 +46,13 @@ declare const SCHEMAS: {
|
|
|
46
46
|
budgetVerdict: JsonSchema;
|
|
47
47
|
causalRun: JsonSchema;
|
|
48
48
|
comparison: JsonSchema;
|
|
49
|
+
externalAdapterMessage: JsonSchema;
|
|
49
50
|
health: JsonSchema;
|
|
50
51
|
liveProof: JsonSchema;
|
|
51
52
|
liveProofSet: JsonSchema;
|
|
52
53
|
manifest: JsonSchema;
|
|
53
54
|
metrics: JsonSchema;
|
|
55
|
+
profiler: JsonSchema;
|
|
54
56
|
projectValidation: JsonSchema;
|
|
55
57
|
scenario: JsonSchema;
|
|
56
58
|
runnerCapabilities: JsonSchema;
|
|
@@ -49,11 +49,13 @@ const SCHEMAS = {
|
|
|
49
49
|
budgetVerdict: loadSchema('budget-verdict.schema.json'),
|
|
50
50
|
causalRun: loadSchema('causal-run.schema.json'),
|
|
51
51
|
comparison: loadSchema('comparison.schema.json'),
|
|
52
|
+
externalAdapterMessage: loadSchema('external-adapter-message.schema.json'),
|
|
52
53
|
health: loadSchema('health.schema.json'),
|
|
53
54
|
liveProof: loadSchema('live-proof.schema.json'),
|
|
54
55
|
liveProofSet: loadSchema('live-proof-set.schema.json'),
|
|
55
56
|
manifest: loadSchema('manifest.schema.json'),
|
|
56
57
|
metrics: loadSchema('metrics.schema.json'),
|
|
58
|
+
profiler: loadSchema('profiler.schema.json'),
|
|
57
59
|
projectValidation: loadSchema('project-validation.schema.json'),
|
|
58
60
|
scenario: loadSchema('scenario.schema.json'),
|
|
59
61
|
runnerCapabilities: loadSchema('runner-capabilities.schema.json'),
|