agent-scenario-loop 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -9
- package/app/profile-session.ts +98 -4
- package/dist/core/agent-summary.d.ts +3 -2
- package/dist/core/agent-summary.js +44 -2
- package/dist/core/artifact-contract.d.ts +22 -4
- package/dist/core/artifact-contract.js +512 -11
- package/dist/core/comparison.d.ts +57 -3
- package/dist/core/comparison.js +113 -1
- package/dist/core/planner.d.ts +32 -1
- package/dist/core/planner.js +144 -0
- package/dist/core/run-index.d.ts +4 -0
- package/dist/core/run-index.js +55 -1
- package/dist/core/schema-validator.d.ts +1 -0
- package/dist/core/schema-validator.js +1 -0
- package/dist/runner/compare-latest.d.ts +8 -4
- package/dist/runner/compare-latest.js +24 -5
- package/dist/runner/example-android-live.d.ts +10 -1
- package/dist/runner/example-android-live.js +55 -0
- package/dist/runner/example-ios-live.d.ts +10 -1
- package/dist/runner/example-ios-live.js +55 -0
- package/dist/runner/ios-simctl.d.ts +5 -0
- package/dist/runner/ios-simctl.js +6 -0
- package/dist/runner/live-comparison.d.ts +2 -2
- package/dist/runner/live-comparison.js +2 -1
- package/dist/runner/live-proof-summary.d.ts +5 -4
- package/dist/runner/live-proof-summary.js +12 -2
- package/dist/runner/live-proof.d.ts +3 -2
- package/dist/runner/live-proof.js +9 -2
- package/dist/runner/profile-android.d.ts +5 -0
- package/dist/runner/profile-android.js +148 -24
- package/dist/runner/profile-ios.d.ts +11 -1
- package/dist/runner/profile-ios.js +128 -9
- package/dist/runner/profile-mobile.d.ts +8 -0
- package/dist/runner/profile-mobile.js +267 -28
- package/docs/adapters.md +4 -0
- package/docs/architecture.md +90 -0
- package/docs/authoring.md +5 -1
- package/docs/concepts.md +3 -24
- package/docs/consumer-rehearsal.md +4 -0
- package/docs/contracts.md +30 -100
- package/docs/external-adapter-protocol.md +219 -0
- package/docs/live-proofs.md +83 -2
- package/docs/principles.md +9 -15
- package/examples/mobile-app/README.md +12 -0
- package/examples/mobile-app/runner-manifests/primary-runner.json +1 -0
- package/examples/runners/README.md +1 -0
- package/examples/runners/adb-android.json +1 -0
- package/examples/runners/agent-device-android.json +1 -0
- package/examples/runners/agent-device-ios.json +1 -0
- package/examples/runners/argent-android.json +1 -0
- package/examples/runners/argent-ios.json +1 -0
- package/examples/runners/xcodebuildmcp-ios.json +1 -0
- package/package.json +2 -1
- package/schemas/causal-run.schema.json +85 -2
- package/schemas/comparison.schema.json +130 -2
- package/schemas/external-adapter-message.schema.json +693 -0
- package/schemas/health.schema.json +72 -0
- package/schemas/live-proof-set.schema.json +1 -1
- package/schemas/live-proof.schema.json +14 -6
- package/schemas/manifest.schema.json +442 -1
- package/schemas/runner-capabilities.schema.json +20 -0
- package/schemas/scenario.schema.json +16 -0
- package/templates/primary-runner.json +1 -0
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
],
|
|
17
17
|
"driverActions": ["tap", "scroll", "assertVisible", "inspectTree", "screenshot", "record", "readLogs"],
|
|
18
18
|
"artifactOutputs": ["logs", "signals", "screenshot", "video", "uiTree", "accessibility"],
|
|
19
|
+
"uiContexts": ["app"],
|
|
19
20
|
"lifecycle": [
|
|
20
21
|
"prepare",
|
|
21
22
|
"launch",
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agent-scenario-loop",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.3",
|
|
4
4
|
"private": false,
|
|
5
5
|
"description": "Scenario orchestration and evidence collection for agent-driven software development. Bring your own runner. Keep your scenarios. Keep your evidence.",
|
|
6
6
|
"license": "MIT",
|
|
@@ -200,6 +200,7 @@
|
|
|
200
200
|
"app/profile-session.ts",
|
|
201
201
|
"core/config-template.json",
|
|
202
202
|
"dist",
|
|
203
|
+
"!dist/**/__tests__",
|
|
203
204
|
"!dist/scripts",
|
|
204
205
|
"docs",
|
|
205
206
|
"examples",
|
|
@@ -98,10 +98,26 @@
|
|
|
98
98
|
}
|
|
99
99
|
}
|
|
100
100
|
},
|
|
101
|
+
"provenanceRef": {
|
|
102
|
+
"type": "object",
|
|
103
|
+
"additionalProperties": false,
|
|
104
|
+
"required": ["manifest", "runId"],
|
|
105
|
+
"properties": {
|
|
106
|
+
"manifest": {
|
|
107
|
+
"type": "string"
|
|
108
|
+
},
|
|
109
|
+
"runId": {
|
|
110
|
+
"type": "string"
|
|
111
|
+
},
|
|
112
|
+
"scenarioHash": {
|
|
113
|
+
"type": "string",
|
|
114
|
+
"pattern": "^[a-f0-9]{64}$"
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
},
|
|
101
118
|
"budgets": {
|
|
102
119
|
"type": "object",
|
|
103
120
|
"description": "Named budget thresholds the run should be evaluated against.",
|
|
104
|
-
"minProperties": 1,
|
|
105
121
|
"additionalProperties": {
|
|
106
122
|
"$ref": "#/$defs/budgetThreshold"
|
|
107
123
|
}
|
|
@@ -112,6 +128,41 @@
|
|
|
112
128
|
"$ref": "#/$defs/timelineEvent"
|
|
113
129
|
}
|
|
114
130
|
},
|
|
131
|
+
"iterationSummary": {
|
|
132
|
+
"type": "object",
|
|
133
|
+
"additionalProperties": false,
|
|
134
|
+
"required": ["expected", "completed", "failed", "timeouts", "incomplete", "status"],
|
|
135
|
+
"properties": {
|
|
136
|
+
"expected": {
|
|
137
|
+
"type": "integer",
|
|
138
|
+
"minimum": 1
|
|
139
|
+
},
|
|
140
|
+
"completed": {
|
|
141
|
+
"type": "integer",
|
|
142
|
+
"minimum": 0
|
|
143
|
+
},
|
|
144
|
+
"failed": {
|
|
145
|
+
"type": "integer",
|
|
146
|
+
"minimum": 0
|
|
147
|
+
},
|
|
148
|
+
"timeouts": {
|
|
149
|
+
"type": "integer",
|
|
150
|
+
"minimum": 0
|
|
151
|
+
},
|
|
152
|
+
"incomplete": {
|
|
153
|
+
"type": "array",
|
|
154
|
+
"uniqueItems": true,
|
|
155
|
+
"items": {
|
|
156
|
+
"type": "integer",
|
|
157
|
+
"minimum": 1
|
|
158
|
+
}
|
|
159
|
+
},
|
|
160
|
+
"status": {
|
|
161
|
+
"type": "string",
|
|
162
|
+
"enum": ["complete", "partial", "failed", "timeout"]
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
},
|
|
115
166
|
"artifacts": {
|
|
116
167
|
"type": "object",
|
|
117
168
|
"additionalProperties": false,
|
|
@@ -169,12 +220,31 @@
|
|
|
169
220
|
"items": {
|
|
170
221
|
"type": "object",
|
|
171
222
|
"additionalProperties": false,
|
|
172
|
-
"required": [
|
|
223
|
+
"required": [
|
|
224
|
+
"channel",
|
|
225
|
+
"completenessStatus",
|
|
226
|
+
"corruptionStatus",
|
|
227
|
+
"kind",
|
|
228
|
+
"path",
|
|
229
|
+
"redactionStatus",
|
|
230
|
+
"sha256",
|
|
231
|
+
"sizeBytes",
|
|
232
|
+
"sourceFileName",
|
|
233
|
+
"transformations"
|
|
234
|
+
],
|
|
173
235
|
"properties": {
|
|
174
236
|
"channel": {
|
|
175
237
|
"type": "string",
|
|
176
238
|
"enum": ["capture", "provider", "signal"]
|
|
177
239
|
},
|
|
240
|
+
"completenessStatus": {
|
|
241
|
+
"type": "string",
|
|
242
|
+
"enum": ["complete", "truncated", "unknown"]
|
|
243
|
+
},
|
|
244
|
+
"corruptionStatus": {
|
|
245
|
+
"type": "string",
|
|
246
|
+
"enum": ["valid", "corrupt", "unknown"]
|
|
247
|
+
},
|
|
178
248
|
"kind": {
|
|
179
249
|
"type": "string",
|
|
180
250
|
"enum": ["accessibility", "js", "logs", "memory", "network", "profiler", "screenshot", "uiTree", "video"]
|
|
@@ -183,6 +253,10 @@
|
|
|
183
253
|
"type": "string",
|
|
184
254
|
"minLength": 1
|
|
185
255
|
},
|
|
256
|
+
"redactionStatus": {
|
|
257
|
+
"type": "string",
|
|
258
|
+
"enum": ["not-redacted", "redacted", "unknown"]
|
|
259
|
+
},
|
|
186
260
|
"sha256": {
|
|
187
261
|
"type": "string",
|
|
188
262
|
"pattern": "^[a-f0-9]{64}$"
|
|
@@ -194,6 +268,15 @@
|
|
|
194
268
|
"sourceFileName": {
|
|
195
269
|
"type": "string",
|
|
196
270
|
"minLength": 1
|
|
271
|
+
},
|
|
272
|
+
"transformations": {
|
|
273
|
+
"type": "array",
|
|
274
|
+
"minItems": 1,
|
|
275
|
+
"uniqueItems": true,
|
|
276
|
+
"items": {
|
|
277
|
+
"type": "string",
|
|
278
|
+
"enum": ["copied", "normalized", "redacted", "truncated", "compressed", "transcoded", "unknown"]
|
|
279
|
+
}
|
|
197
280
|
}
|
|
198
281
|
}
|
|
199
282
|
}
|
|
@@ -33,7 +33,7 @@
|
|
|
33
33
|
},
|
|
34
34
|
"comparisonStatus": {
|
|
35
35
|
"type": "string",
|
|
36
|
-
"enum": ["better", "worse", "unchanged", "mixed", "inconclusive"]
|
|
36
|
+
"enum": ["better", "worse", "unchanged", "mixed", "inconclusive", "low_confidence"]
|
|
37
37
|
},
|
|
38
38
|
"healthStatus": {
|
|
39
39
|
"type": "string",
|
|
@@ -46,6 +46,9 @@
|
|
|
46
46
|
"comparisonBasis": {
|
|
47
47
|
"$ref": "#/$defs/comparisonBasis"
|
|
48
48
|
},
|
|
49
|
+
"measurementPolicy": {
|
|
50
|
+
"$ref": "#/$defs/measurementPolicy"
|
|
51
|
+
},
|
|
49
52
|
"metricComparisons": {
|
|
50
53
|
"type": "array",
|
|
51
54
|
"items": {
|
|
@@ -135,6 +138,10 @@
|
|
|
135
138
|
"type": "string",
|
|
136
139
|
"pattern": "^[a-f0-9]{64}$"
|
|
137
140
|
},
|
|
141
|
+
"cohortHash": {
|
|
142
|
+
"type": "string",
|
|
143
|
+
"pattern": "^[a-f0-9]{64}$"
|
|
144
|
+
},
|
|
138
145
|
"selectedRunDir": {
|
|
139
146
|
"type": "string"
|
|
140
147
|
},
|
|
@@ -159,6 +166,10 @@
|
|
|
159
166
|
"trustedScenarioContractCandidates": {
|
|
160
167
|
"type": "integer",
|
|
161
168
|
"minimum": 0
|
|
169
|
+
},
|
|
170
|
+
"trustedCohortCandidates": {
|
|
171
|
+
"type": "integer",
|
|
172
|
+
"minimum": 0
|
|
162
173
|
}
|
|
163
174
|
}
|
|
164
175
|
},
|
|
@@ -185,12 +196,129 @@
|
|
|
185
196
|
},
|
|
186
197
|
"status": {
|
|
187
198
|
"type": "string",
|
|
188
|
-
"enum": ["better", "worse", "unchanged", "inconclusive"]
|
|
199
|
+
"enum": ["better", "worse", "unchanged", "inconclusive", "low_confidence"]
|
|
189
200
|
},
|
|
190
201
|
"notes": {
|
|
191
202
|
"type": "string"
|
|
192
203
|
}
|
|
193
204
|
}
|
|
205
|
+
},
|
|
206
|
+
"measurementPolicy": {
|
|
207
|
+
"type": "object",
|
|
208
|
+
"additionalProperties": false,
|
|
209
|
+
"required": ["baselineSelection", "samples", "tolerance", "confidence"],
|
|
210
|
+
"properties": {
|
|
211
|
+
"baselineSelection": {
|
|
212
|
+
"type": "object",
|
|
213
|
+
"additionalProperties": false,
|
|
214
|
+
"required": ["mode", "poisoningProtection"],
|
|
215
|
+
"properties": {
|
|
216
|
+
"mode": {
|
|
217
|
+
"type": "string",
|
|
218
|
+
"enum": ["explicit", "latestTrustedPrior"]
|
|
219
|
+
},
|
|
220
|
+
"poisoningProtection": {
|
|
221
|
+
"type": "object",
|
|
222
|
+
"additionalProperties": false,
|
|
223
|
+
"required": ["requirePassedHealth", "requirePassedVerdict", "requireMatchingScenarioId"],
|
|
224
|
+
"properties": {
|
|
225
|
+
"requirePassedHealth": {
|
|
226
|
+
"type": "boolean"
|
|
227
|
+
},
|
|
228
|
+
"requirePassedVerdict": {
|
|
229
|
+
"type": "boolean"
|
|
230
|
+
},
|
|
231
|
+
"requireMatchingScenarioId": {
|
|
232
|
+
"type": "boolean"
|
|
233
|
+
},
|
|
234
|
+
"comparisonLane": {
|
|
235
|
+
"type": "string"
|
|
236
|
+
},
|
|
237
|
+
"scenarioHash": {
|
|
238
|
+
"type": "string",
|
|
239
|
+
"pattern": "^[a-f0-9]{64}$"
|
|
240
|
+
},
|
|
241
|
+
"cohortHash": {
|
|
242
|
+
"type": "string",
|
|
243
|
+
"pattern": "^[a-f0-9]{64}$"
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
},
|
|
249
|
+
"samples": {
|
|
250
|
+
"type": "object",
|
|
251
|
+
"additionalProperties": false,
|
|
252
|
+
"required": ["baseline", "current"],
|
|
253
|
+
"properties": {
|
|
254
|
+
"baseline": {
|
|
255
|
+
"$ref": "#/$defs/samplePolicy"
|
|
256
|
+
},
|
|
257
|
+
"current": {
|
|
258
|
+
"$ref": "#/$defs/samplePolicy"
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
},
|
|
262
|
+
"tolerance": {
|
|
263
|
+
"type": "object",
|
|
264
|
+
"additionalProperties": false,
|
|
265
|
+
"required": ["timing"],
|
|
266
|
+
"properties": {
|
|
267
|
+
"timing": {
|
|
268
|
+
"type": "object",
|
|
269
|
+
"additionalProperties": false,
|
|
270
|
+
"required": ["absoluteMs", "relative"],
|
|
271
|
+
"properties": {
|
|
272
|
+
"absoluteMs": {
|
|
273
|
+
"type": "number",
|
|
274
|
+
"minimum": 0
|
|
275
|
+
},
|
|
276
|
+
"relative": {
|
|
277
|
+
"type": "number",
|
|
278
|
+
"minimum": 0
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
},
|
|
284
|
+
"confidence": {
|
|
285
|
+
"type": "object",
|
|
286
|
+
"additionalProperties": false,
|
|
287
|
+
"required": ["level", "minValidSamples"],
|
|
288
|
+
"properties": {
|
|
289
|
+
"level": {
|
|
290
|
+
"type": "string",
|
|
291
|
+
"enum": ["single_run", "multi_sample", "insufficient", "low_confidence"]
|
|
292
|
+
},
|
|
293
|
+
"minValidSamples": {
|
|
294
|
+
"type": "integer",
|
|
295
|
+
"minimum": 1
|
|
296
|
+
},
|
|
297
|
+
"reason": {
|
|
298
|
+
"type": "string"
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
},
|
|
304
|
+
"samplePolicy": {
|
|
305
|
+
"type": "object",
|
|
306
|
+
"additionalProperties": false,
|
|
307
|
+
"required": ["validSamples", "warmupSamples", "outliersExcluded"],
|
|
308
|
+
"properties": {
|
|
309
|
+
"validSamples": {
|
|
310
|
+
"type": "integer",
|
|
311
|
+
"minimum": 0
|
|
312
|
+
},
|
|
313
|
+
"warmupSamples": {
|
|
314
|
+
"type": "integer",
|
|
315
|
+
"minimum": 0
|
|
316
|
+
},
|
|
317
|
+
"outliersExcluded": {
|
|
318
|
+
"type": "integer",
|
|
319
|
+
"minimum": 0
|
|
320
|
+
}
|
|
321
|
+
}
|
|
194
322
|
}
|
|
195
323
|
}
|
|
196
324
|
}
|