agent-scenario-loop 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/README.md +9 -9
  2. package/app/profile-session.ts +98 -4
  3. package/dist/core/agent-summary.d.ts +3 -2
  4. package/dist/core/agent-summary.js +44 -2
  5. package/dist/core/artifact-contract.d.ts +22 -4
  6. package/dist/core/artifact-contract.js +512 -11
  7. package/dist/core/comparison.d.ts +57 -3
  8. package/dist/core/comparison.js +113 -1
  9. package/dist/core/planner.d.ts +32 -1
  10. package/dist/core/planner.js +144 -0
  11. package/dist/core/run-index.d.ts +4 -0
  12. package/dist/core/run-index.js +55 -1
  13. package/dist/core/schema-validator.d.ts +1 -0
  14. package/dist/core/schema-validator.js +1 -0
  15. package/dist/runner/compare-latest.d.ts +8 -4
  16. package/dist/runner/compare-latest.js +24 -5
  17. package/dist/runner/example-android-live.d.ts +10 -1
  18. package/dist/runner/example-android-live.js +55 -0
  19. package/dist/runner/example-ios-live.d.ts +10 -1
  20. package/dist/runner/example-ios-live.js +55 -0
  21. package/dist/runner/ios-simctl.d.ts +5 -0
  22. package/dist/runner/ios-simctl.js +6 -0
  23. package/dist/runner/live-comparison.d.ts +2 -2
  24. package/dist/runner/live-comparison.js +2 -1
  25. package/dist/runner/live-proof-summary.d.ts +5 -4
  26. package/dist/runner/live-proof-summary.js +12 -2
  27. package/dist/runner/live-proof.d.ts +3 -2
  28. package/dist/runner/live-proof.js +9 -2
  29. package/dist/runner/profile-android.d.ts +5 -0
  30. package/dist/runner/profile-android.js +148 -24
  31. package/dist/runner/profile-ios.d.ts +11 -1
  32. package/dist/runner/profile-ios.js +128 -9
  33. package/dist/runner/profile-mobile.d.ts +8 -0
  34. package/dist/runner/profile-mobile.js +267 -28
  35. package/docs/adapters.md +4 -0
  36. package/docs/architecture.md +90 -0
  37. package/docs/authoring.md +5 -1
  38. package/docs/concepts.md +3 -24
  39. package/docs/consumer-rehearsal.md +4 -0
  40. package/docs/contracts.md +30 -100
  41. package/docs/external-adapter-protocol.md +219 -0
  42. package/docs/live-proofs.md +83 -2
  43. package/docs/principles.md +9 -15
  44. package/examples/mobile-app/README.md +12 -0
  45. package/examples/mobile-app/runner-manifests/primary-runner.json +1 -0
  46. package/examples/runners/README.md +1 -0
  47. package/examples/runners/adb-android.json +1 -0
  48. package/examples/runners/agent-device-android.json +1 -0
  49. package/examples/runners/agent-device-ios.json +1 -0
  50. package/examples/runners/argent-android.json +1 -0
  51. package/examples/runners/argent-ios.json +1 -0
  52. package/examples/runners/xcodebuildmcp-ios.json +1 -0
  53. package/package.json +2 -1
  54. package/schemas/causal-run.schema.json +85 -2
  55. package/schemas/comparison.schema.json +130 -2
  56. package/schemas/external-adapter-message.schema.json +693 -0
  57. package/schemas/health.schema.json +72 -0
  58. package/schemas/live-proof-set.schema.json +1 -1
  59. package/schemas/live-proof.schema.json +14 -6
  60. package/schemas/manifest.schema.json +442 -1
  61. package/schemas/runner-capabilities.schema.json +20 -0
  62. package/schemas/scenario.schema.json +16 -0
  63. package/templates/primary-runner.json +1 -0
@@ -12,6 +12,7 @@
12
12
  ],
13
13
  "driverActions": ["tap", "scroll", "assertVisible", "inspectTree", "screenshot", "record", "readLogs"],
14
14
  "artifactOutputs": ["logs", "signals", "screenshot", "video", "uiTree"],
15
+ "uiContexts": ["app"],
15
16
  "lifecycle": [
16
17
  "prepare",
17
18
  "launch",
@@ -14,6 +14,7 @@
14
14
  ],
15
15
  "driverActions": ["tap", "scroll", "assertVisible", "inspectTree", "screenshot", "readLogs"],
16
16
  "artifactOutputs": ["logs", "signals", "screenshot", "uiTree"],
17
+ "uiContexts": ["app"],
17
18
  "lifecycle": [
18
19
  "prepare",
19
20
  "launch",
@@ -14,6 +14,7 @@
14
14
  ],
15
15
  "driverActions": ["tap", "scroll", "assertVisible", "inspectTree", "screenshot", "readLogs"],
16
16
  "artifactOutputs": ["logs", "signals", "screenshot", "uiTree"],
17
+ "uiContexts": ["app"],
17
18
  "lifecycle": [
18
19
  "prepare",
19
20
  "launch",
@@ -14,6 +14,7 @@
14
14
  ],
15
15
  "driverActions": ["tap", "scroll", "assertVisible", "inspectTree", "screenshot"],
16
16
  "artifactOutputs": ["logs", "signals", "screenshot", "uiTree"],
17
+ "uiContexts": ["app"],
17
18
  "lifecycle": [
18
19
  "prepare",
19
20
  "launch",
@@ -14,6 +14,7 @@
14
14
  ],
15
15
  "driverActions": ["tap", "scroll", "assertVisible", "inspectTree", "screenshot"],
16
16
  "artifactOutputs": ["logs", "signals", "screenshot", "uiTree"],
17
+ "uiContexts": ["app"],
17
18
  "lifecycle": [
18
19
  "prepare",
19
20
  "launch",
@@ -16,6 +16,7 @@
16
16
  ],
17
17
  "driverActions": ["tap", "scroll", "assertVisible", "inspectTree", "screenshot", "record", "readLogs"],
18
18
  "artifactOutputs": ["logs", "signals", "screenshot", "video", "uiTree", "accessibility"],
19
+ "uiContexts": ["app"],
19
20
  "lifecycle": [
20
21
  "prepare",
21
22
  "launch",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-scenario-loop",
3
- "version": "0.1.2",
3
+ "version": "0.1.3",
4
4
  "private": false,
5
5
  "description": "Scenario orchestration and evidence collection for agent-driven software development. Bring your own runner. Keep your scenarios. Keep your evidence.",
6
6
  "license": "MIT",
@@ -200,6 +200,7 @@
200
200
  "app/profile-session.ts",
201
201
  "core/config-template.json",
202
202
  "dist",
203
+ "!dist/**/__tests__",
203
204
  "!dist/scripts",
204
205
  "docs",
205
206
  "examples",
@@ -98,10 +98,26 @@
98
98
  }
99
99
  }
100
100
  },
101
+ "provenanceRef": {
102
+ "type": "object",
103
+ "additionalProperties": false,
104
+ "required": ["manifest", "runId"],
105
+ "properties": {
106
+ "manifest": {
107
+ "type": "string"
108
+ },
109
+ "runId": {
110
+ "type": "string"
111
+ },
112
+ "scenarioHash": {
113
+ "type": "string",
114
+ "pattern": "^[a-f0-9]{64}$"
115
+ }
116
+ }
117
+ },
101
118
  "budgets": {
102
119
  "type": "object",
103
120
  "description": "Named budget thresholds the run should be evaluated against.",
104
- "minProperties": 1,
105
121
  "additionalProperties": {
106
122
  "$ref": "#/$defs/budgetThreshold"
107
123
  }
@@ -112,6 +128,41 @@
112
128
  "$ref": "#/$defs/timelineEvent"
113
129
  }
114
130
  },
131
+ "iterationSummary": {
132
+ "type": "object",
133
+ "additionalProperties": false,
134
+ "required": ["expected", "completed", "failed", "timeouts", "incomplete", "status"],
135
+ "properties": {
136
+ "expected": {
137
+ "type": "integer",
138
+ "minimum": 1
139
+ },
140
+ "completed": {
141
+ "type": "integer",
142
+ "minimum": 0
143
+ },
144
+ "failed": {
145
+ "type": "integer",
146
+ "minimum": 0
147
+ },
148
+ "timeouts": {
149
+ "type": "integer",
150
+ "minimum": 0
151
+ },
152
+ "incomplete": {
153
+ "type": "array",
154
+ "uniqueItems": true,
155
+ "items": {
156
+ "type": "integer",
157
+ "minimum": 1
158
+ }
159
+ },
160
+ "status": {
161
+ "type": "string",
162
+ "enum": ["complete", "partial", "failed", "timeout"]
163
+ }
164
+ }
165
+ },
115
166
  "artifacts": {
116
167
  "type": "object",
117
168
  "additionalProperties": false,
@@ -169,12 +220,31 @@
169
220
  "items": {
170
221
  "type": "object",
171
222
  "additionalProperties": false,
172
- "required": ["channel", "kind", "path", "sha256", "sizeBytes", "sourceFileName"],
223
+ "required": [
224
+ "channel",
225
+ "completenessStatus",
226
+ "corruptionStatus",
227
+ "kind",
228
+ "path",
229
+ "redactionStatus",
230
+ "sha256",
231
+ "sizeBytes",
232
+ "sourceFileName",
233
+ "transformations"
234
+ ],
173
235
  "properties": {
174
236
  "channel": {
175
237
  "type": "string",
176
238
  "enum": ["capture", "provider", "signal"]
177
239
  },
240
+ "completenessStatus": {
241
+ "type": "string",
242
+ "enum": ["complete", "truncated", "unknown"]
243
+ },
244
+ "corruptionStatus": {
245
+ "type": "string",
246
+ "enum": ["valid", "corrupt", "unknown"]
247
+ },
178
248
  "kind": {
179
249
  "type": "string",
180
250
  "enum": ["accessibility", "js", "logs", "memory", "network", "profiler", "screenshot", "uiTree", "video"]
@@ -183,6 +253,10 @@
183
253
  "type": "string",
184
254
  "minLength": 1
185
255
  },
256
+ "redactionStatus": {
257
+ "type": "string",
258
+ "enum": ["not-redacted", "redacted", "unknown"]
259
+ },
186
260
  "sha256": {
187
261
  "type": "string",
188
262
  "pattern": "^[a-f0-9]{64}$"
@@ -194,6 +268,15 @@
194
268
  "sourceFileName": {
195
269
  "type": "string",
196
270
  "minLength": 1
271
+ },
272
+ "transformations": {
273
+ "type": "array",
274
+ "minItems": 1,
275
+ "uniqueItems": true,
276
+ "items": {
277
+ "type": "string",
278
+ "enum": ["copied", "normalized", "redacted", "truncated", "compressed", "transcoded", "unknown"]
279
+ }
197
280
  }
198
281
  }
199
282
  }
@@ -33,7 +33,7 @@
33
33
  },
34
34
  "comparisonStatus": {
35
35
  "type": "string",
36
- "enum": ["better", "worse", "unchanged", "mixed", "inconclusive"]
36
+ "enum": ["better", "worse", "unchanged", "mixed", "inconclusive", "low_confidence"]
37
37
  },
38
38
  "healthStatus": {
39
39
  "type": "string",
@@ -46,6 +46,9 @@
46
46
  "comparisonBasis": {
47
47
  "$ref": "#/$defs/comparisonBasis"
48
48
  },
49
+ "measurementPolicy": {
50
+ "$ref": "#/$defs/measurementPolicy"
51
+ },
49
52
  "metricComparisons": {
50
53
  "type": "array",
51
54
  "items": {
@@ -135,6 +138,10 @@
135
138
  "type": "string",
136
139
  "pattern": "^[a-f0-9]{64}$"
137
140
  },
141
+ "cohortHash": {
142
+ "type": "string",
143
+ "pattern": "^[a-f0-9]{64}$"
144
+ },
138
145
  "selectedRunDir": {
139
146
  "type": "string"
140
147
  },
@@ -159,6 +166,10 @@
159
166
  "trustedScenarioContractCandidates": {
160
167
  "type": "integer",
161
168
  "minimum": 0
169
+ },
170
+ "trustedCohortCandidates": {
171
+ "type": "integer",
172
+ "minimum": 0
162
173
  }
163
174
  }
164
175
  },
@@ -185,12 +196,129 @@
185
196
  },
186
197
  "status": {
187
198
  "type": "string",
188
- "enum": ["better", "worse", "unchanged", "inconclusive"]
199
+ "enum": ["better", "worse", "unchanged", "inconclusive", "low_confidence"]
189
200
  },
190
201
  "notes": {
191
202
  "type": "string"
192
203
  }
193
204
  }
205
+ },
206
+ "measurementPolicy": {
207
+ "type": "object",
208
+ "additionalProperties": false,
209
+ "required": ["baselineSelection", "samples", "tolerance", "confidence"],
210
+ "properties": {
211
+ "baselineSelection": {
212
+ "type": "object",
213
+ "additionalProperties": false,
214
+ "required": ["mode", "poisoningProtection"],
215
+ "properties": {
216
+ "mode": {
217
+ "type": "string",
218
+ "enum": ["explicit", "latestTrustedPrior"]
219
+ },
220
+ "poisoningProtection": {
221
+ "type": "object",
222
+ "additionalProperties": false,
223
+ "required": ["requirePassedHealth", "requirePassedVerdict", "requireMatchingScenarioId"],
224
+ "properties": {
225
+ "requirePassedHealth": {
226
+ "type": "boolean"
227
+ },
228
+ "requirePassedVerdict": {
229
+ "type": "boolean"
230
+ },
231
+ "requireMatchingScenarioId": {
232
+ "type": "boolean"
233
+ },
234
+ "comparisonLane": {
235
+ "type": "string"
236
+ },
237
+ "scenarioHash": {
238
+ "type": "string",
239
+ "pattern": "^[a-f0-9]{64}$"
240
+ },
241
+ "cohortHash": {
242
+ "type": "string",
243
+ "pattern": "^[a-f0-9]{64}$"
244
+ }
245
+ }
246
+ }
247
+ }
248
+ },
249
+ "samples": {
250
+ "type": "object",
251
+ "additionalProperties": false,
252
+ "required": ["baseline", "current"],
253
+ "properties": {
254
+ "baseline": {
255
+ "$ref": "#/$defs/samplePolicy"
256
+ },
257
+ "current": {
258
+ "$ref": "#/$defs/samplePolicy"
259
+ }
260
+ }
261
+ },
262
+ "tolerance": {
263
+ "type": "object",
264
+ "additionalProperties": false,
265
+ "required": ["timing"],
266
+ "properties": {
267
+ "timing": {
268
+ "type": "object",
269
+ "additionalProperties": false,
270
+ "required": ["absoluteMs", "relative"],
271
+ "properties": {
272
+ "absoluteMs": {
273
+ "type": "number",
274
+ "minimum": 0
275
+ },
276
+ "relative": {
277
+ "type": "number",
278
+ "minimum": 0
279
+ }
280
+ }
281
+ }
282
+ }
283
+ },
284
+ "confidence": {
285
+ "type": "object",
286
+ "additionalProperties": false,
287
+ "required": ["level", "minValidSamples"],
288
+ "properties": {
289
+ "level": {
290
+ "type": "string",
291
+ "enum": ["single_run", "multi_sample", "insufficient", "low_confidence"]
292
+ },
293
+ "minValidSamples": {
294
+ "type": "integer",
295
+ "minimum": 1
296
+ },
297
+ "reason": {
298
+ "type": "string"
299
+ }
300
+ }
301
+ }
302
+ }
303
+ },
304
+ "samplePolicy": {
305
+ "type": "object",
306
+ "additionalProperties": false,
307
+ "required": ["validSamples", "warmupSamples", "outliersExcluded"],
308
+ "properties": {
309
+ "validSamples": {
310
+ "type": "integer",
311
+ "minimum": 0
312
+ },
313
+ "warmupSamples": {
314
+ "type": "integer",
315
+ "minimum": 0
316
+ },
317
+ "outliersExcluded": {
318
+ "type": "integer",
319
+ "minimum": 0
320
+ }
321
+ }
194
322
  }
195
323
  }
196
324
  }