agent-scenario-loop 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/README.md +9 -9
  2. package/app/profile-session.ts +352 -12
  3. package/dist/core/agent-summary.d.ts +3 -2
  4. package/dist/core/agent-summary.js +44 -2
  5. package/dist/core/artifact-contract.d.ts +28 -8
  6. package/dist/core/artifact-contract.js +676 -26
  7. package/dist/core/comparison.d.ts +57 -3
  8. package/dist/core/comparison.js +113 -1
  9. package/dist/core/planner.d.ts +32 -1
  10. package/dist/core/planner.js +144 -0
  11. package/dist/core/run-index.d.ts +4 -0
  12. package/dist/core/run-index.js +55 -1
  13. package/dist/core/schema-validator.d.ts +2 -0
  14. package/dist/core/schema-validator.js +2 -0
  15. package/dist/runner/android-adb-driver.d.ts +7 -2
  16. package/dist/runner/android-adb-driver.js +7 -1
  17. package/dist/runner/android-adb.d.ts +40 -5
  18. package/dist/runner/android-adb.js +1046 -664
  19. package/dist/runner/compare-latest.d.ts +8 -4
  20. package/dist/runner/compare-latest.js +24 -5
  21. package/dist/runner/example-android-live.d.ts +10 -1
  22. package/dist/runner/example-android-live.js +55 -0
  23. package/dist/runner/example-ios-live.d.ts +10 -1
  24. package/dist/runner/example-ios-live.js +55 -0
  25. package/dist/runner/ios-simctl.d.ts +6 -0
  26. package/dist/runner/ios-simctl.js +7 -0
  27. package/dist/runner/live-comparison.d.ts +2 -2
  28. package/dist/runner/live-comparison.js +2 -1
  29. package/dist/runner/live-proof-summary.d.ts +5 -4
  30. package/dist/runner/live-proof-summary.js +12 -2
  31. package/dist/runner/live-proof.d.ts +3 -2
  32. package/dist/runner/live-proof.js +9 -2
  33. package/dist/runner/profile-android.d.ts +16 -1
  34. package/dist/runner/profile-android.js +364 -26
  35. package/dist/runner/profile-ios.d.ts +13 -2
  36. package/dist/runner/profile-ios.js +341 -19
  37. package/dist/runner/profile-mobile.d.ts +39 -3
  38. package/dist/runner/profile-mobile.js +1054 -42
  39. package/dist/runner/validate-project.js +3 -0
  40. package/dist/scripts/consumer-rehearsal.d.ts +119 -0
  41. package/dist/scripts/consumer-rehearsal.js +757 -0
  42. package/dist/scripts/downstream-local-package-gate.d.ts +2 -0
  43. package/dist/scripts/downstream-local-package-gate.js +264 -0
  44. package/dist/scripts/package-smoke.d.ts +96 -0
  45. package/dist/scripts/package-smoke.js +2282 -0
  46. package/dist/scripts/release-readiness.d.ts +2 -0
  47. package/dist/scripts/release-readiness.js +520 -0
  48. package/docs/adapters.md +7 -1
  49. package/docs/api.md +2 -2
  50. package/docs/architecture.md +90 -0
  51. package/docs/authoring.md +39 -3
  52. package/docs/concepts.md +3 -24
  53. package/docs/consumer-rehearsal.md +31 -1
  54. package/docs/contracts.md +45 -101
  55. package/docs/external-adapter-protocol.md +219 -0
  56. package/docs/live-proofs.md +86 -3
  57. package/docs/principles.md +9 -15
  58. package/examples/mobile-app/README.md +12 -0
  59. package/examples/mobile-app/runner-manifests/evidence-provider.json +3 -3
  60. package/examples/mobile-app/runner-manifests/primary-runner.json +1 -0
  61. package/examples/mobile-app/scripts/asl-capture-profiler-provider.mjs +25 -0
  62. package/examples/runners/README.md +4 -3
  63. package/examples/runners/adb-android.json +1 -0
  64. package/examples/runners/agent-device-android.json +1 -0
  65. package/examples/runners/agent-device-ios.json +1 -0
  66. package/examples/runners/argent-android.json +1 -0
  67. package/examples/runners/argent-ios.json +1 -0
  68. package/examples/runners/axe-accessibility-provider.json +2 -2
  69. package/examples/runners/script-accessibility-provider.json +2 -2
  70. package/examples/runners/script-memory-provider.json +2 -2
  71. package/examples/runners/script-network-provider.json +2 -2
  72. package/examples/runners/script-profiler-provider.json +2 -2
  73. package/examples/runners/xcodebuildmcp-ios.json +1 -0
  74. package/package.json +12 -3
  75. package/schemas/causal-run.schema.json +85 -2
  76. package/schemas/comparison.schema.json +130 -2
  77. package/schemas/external-adapter-message.schema.json +693 -0
  78. package/schemas/health.schema.json +72 -0
  79. package/schemas/live-proof-set.schema.json +1 -1
  80. package/schemas/live-proof.schema.json +14 -6
  81. package/schemas/manifest.schema.json +515 -4
  82. package/schemas/profiler.schema.json +243 -0
  83. package/schemas/runner-capabilities.schema.json +28 -2
  84. package/schemas/scenario.schema.json +34 -2
  85. package/templates/evidence-provider.json +3 -3
  86. package/templates/primary-runner.json +1 -0
  87. package/templates/scripts/asl-capture-profiler-provider.mjs +20 -0
@@ -14,6 +14,27 @@ pnpm demo:loop -- --out artifacts/demo-loop
14
14
 
15
15
  The command runs preflight, profiles baseline/current event logs, writes run artifacts, compares the current run against the latest trusted prior run, and refreshes the current run's `agent-summary.md`.
16
16
 
17
+ It writes:
18
+
19
+ - `preflight/app-startup/health.json`
20
+ - `preflight/app-startup/verdict.json`
21
+ - `preflight/app-startup/agent-summary.md`
22
+ - `profile-runs/app-startup/demo-baseline/*`
23
+ - `profile-runs/app-startup/demo-current/*`
24
+ - `profile-runs/app-startup/demo-current/comparison.json`
25
+
26
+ This is not a replacement for live device proof. It is a stable contract check that keeps the evidence loop reproducible through trusted prior-run selection while iOS or Android runtime setup is unavailable.
27
+
28
+ ## Plan Check
29
+
30
+ Use `check-plan` to validate a scenario, runner manifest, and optional evidence-provider manifests before execution:
31
+
32
+ ```bash
33
+ pnpm check-plan -- --scenario examples/scenarios/mobile/app-startup.json --runner examples/runners/xcodebuildmcp-ios.json --platform ios --out artifacts/plan/app-startup
34
+ ```
35
+
36
+ This validates the input manifests, writes schema-checked `health.json` and `verdict.json`, writes `agent-summary.md`, and includes the raw planner match in `planner-compatibility.json`.
37
+
17
38
  ## Host/Device Access
18
39
 
19
40
  Keep deterministic validation and live device proof as separate execution lanes.
@@ -69,6 +90,48 @@ ASL_ARGENT_BIN=pnpm \
69
90
 
70
91
  The doctor composes the existing adb, simctl, agent-device, and Argent checks into one ASL artifact set. A failed doctor is environment evidence, not product evidence: fix the host access or command shape before starting scenario execution.
71
92
 
93
+ ## Platform Preflight and Profile Capture
94
+
95
+ Use `android:preflight` to verify adb and connected-device readiness before adding live Android scenario execution:
96
+
97
+ ```bash
98
+ pnpm android:preflight -- --package com.example.app --out artifacts/android-adb-preflight
99
+ ```
100
+
101
+ The command writes `health.json`, `verdict.json`, `agent-summary.md`, `raw/adb-version.txt`, `raw/adb-devices.txt`, and `raw/android-metadata.json`. If adb, a connected online device, or an optional package check fails, health fails and the verdict remains `inconclusive`.
102
+
103
+ Add `--capture-logcat --logcat-lines <count>` to write `raw/adb-logcat.txt` in the same artifact folder. Add `--react-native-debug-host <host:port>` with `--package <name>` for React Native development builds that need adb reverse plus the app `debug_http_host` preference before launch; the runner writes `raw/adb-react-native-reverse.txt` and `raw/adb-react-native-debug-host.txt`. Add `--clear-logcat --launch --wait-ms <ms>` with `--package <name>` to clear logs, launch the package, wait for a bounded capture window, and then collect logcat evidence. If requested capture-window setup or logcat capture fails, scenario health fails because timing and event evidence would be incomplete.
104
+
105
+ Use captured logcat evidence directly with Android profiling:
106
+
107
+ ```bash
108
+ pnpm profile:android -- --config core/config-template.json --scenario examples/mobile-app/scenarios/android/app-startup.json --adb-artifacts artifacts/android-adb-preflight --run-id android-run-1
109
+ ```
110
+
111
+ Or let Android profiling own the adb capture window before it writes profile artifacts:
112
+
113
+ ```bash
114
+ pnpm profile:android -- --config core/config-template.json --scenario examples/mobile-app/scenarios/android/app-startup.json --adb-capture --react-native-debug-host localhost:8097 --clear-logcat --launch --run-id android-run-1
115
+ ```
116
+
117
+ Use `profile:ios --simctl-capture` when the example app or a consuming app is already installed on a booted simulator:
118
+
119
+ ```bash
120
+ pnpm profile:ios -- --config core/config-template.json --scenario examples/mobile-app/scenarios/ios/app-startup.json --simctl-capture --profile-session --profile-session-storage --launch --run-id ios-run-1
121
+ ```
122
+
123
+ The command writes a separate simctl capture folder under the selected output root, seeds the app-owned profile session into native AsyncStorage before launch, then collects stored app profile events after the capture window. Command scenarios seed the scenario command queue through the same storage contract before launch. Command envelopes preserve `commandId`, `sequence`, `queueId`, command pacing `waitMs`, and, for normalized execution-plan commands followed by a milestone wait, `waitForMilestone` plus `waitTimeoutMs`. Deep-link command transport uses the same envelope in query parameters. When `raw/ios-profile-events.log` exists, the iOS profile runner ingests that stored truth-event log; otherwise it falls back to `raw/ios-simctl-log.txt`.
124
+
125
+ Profile manifests only list sidecar paths that were copied into the profile run or deliberately referenced as external sidecar evidence. If a simctl or adb capture folder is the real evidence source, `manifest.artifacts.diagnostics` records the diagnostic status plus `sidecarRoot`/`evidenceDependency` instead of inventing profile-root files such as `raw/device.log`, `captures/run.mp4`, or `captures/ui-tree.json`. Rehydrated runs may record `evidenceDependency.root: "sidecar"` with paths relative to `sidecarRoot`, so agents do not have to reason from long `../../` paths alone.
126
+
127
+ When a scenario requests a screenshot, pass supported simulator screenshot options through the iOS capture command with `--screenshot-type`, `--screenshot-display`, or `--screenshot-mask`; ASL records the chosen options in capture metadata and the resulting path in `manifest.artifacts.captures.screenshots`.
128
+
129
+ For profile-session capture on Android or iOS, omitting `--wait-ms` lets ASL derive the final evidence window from scenario execution waits and cycle count. On iOS, command-backed profile sessions use the expanded command queue, including setup commands, repeated cycle body commands, command pacing `waitMs`, milestone-gate `waitTimeoutMs`, and a conservative buffer. Explicit `--wait-ms` remains authoritative when a consuming app has a known startup or logging delay that the scenario cannot express.
130
+
131
+ Scenario command targets live in `adapterOptions.iosSimctl.commands`, while the app handles them through `registerProfileCommandTargetHandler`. The iOS proof does not depend on unified logs carrying JavaScript console output; it depends on app-owned stored profile events.
132
+
133
+ Attach independently produced provider evidence with `--signal <js|memory|network>:<path>` or `--capture <screenshot|video|uiTree>:<path>` so profile commands copy those files into stable run folders and inventory them in `manifest.artifacts.evidenceAttachments`.
134
+
72
135
  ## Generic Mobile Proof
73
136
 
74
137
  Use the generic live runners in a consuming app after `asl-init` has created `asl.config.json`, `scenarios/mobile/<id>.json`, and the `asl:*` package-script snippets:
@@ -134,9 +197,17 @@ pnpm example:android:live -- --run-suffix before-change
134
197
  pnpm example:android:live -- --run-suffix after-change
135
198
  ```
136
199
 
200
+ After dependency, native-build, or scenario-contract changes, use `--seed-baseline` to capture a trusted same-cohort baseline immediately before the measured run. The seeded profiles use `*-baseline` run ids, must pass health and verdict, and stay in the same comparison lane:
201
+
202
+ ```bash
203
+ pnpm example:android:live -- --run-suffix release-check --seed-baseline
204
+ ```
205
+
206
+ When latest-trusted comparison sees slower single-run timing but both baseline and current remain inside their budgets, ASL reports `low_confidence` instead of `regressed`. Treat that as a repeat-or-sample signal, not proof of product regression.
207
+
137
208
  Read [Example Mobile App: Android Capture](../examples/mobile-app/README.md#android-capture) for Metro routing, adb permissions, individual scenario commands, selector behavior, and optional video capture.
138
209
 
139
- Expo dev-client Android shells may need an explicit Metro deep link after the native app launches. Put that local URL in ignored env state, for example `ASL_EXAMPLE_ANDROID_DEV_CLIENT_URL=asl-example://expo-development-client/?url=http%3A%2F%2F10.0.2.2%3A8097`, so Android profile capture opens the correct app session before profile-session deep links. When bundle load time is variable, also set `ASL_EXAMPLE_ANDROID_DEV_CLIENT_READY_PATTERN='Running "main"'` so the runner waits for bounded logcat readiness evidence before sending scenario links.
210
+ Expo dev-client Android shells may need an explicit Metro deep link after the native app launches. Put that local URL in ignored env state, for example `ASL_EXAMPLE_ANDROID_DEV_CLIENT_URL=asl-example://expo-development-client/?url=http%3A%2F%2F10.0.2.2%3A8097`, so Android profile capture opens the correct app session before profile-session control. With storage-backed profile sessions, ASL waits for `Running "main"` by default before writing AsyncStorage. Set `ASL_EXAMPLE_ANDROID_DEV_CLIENT_READY_PATTERN` only when the app has a more precise readiness marker. If readiness fails, the runner reports an unhealthy startup gate and does not deliver stored commands or profile-session deep links.
140
211
 
141
212
  Apps using the ASL profile-session AsyncStorage bridge can opt into storage delivery with `--android-profile-session-storage`. The Android runner resolves the session `startedAt` from the selected device clock before writing AsyncStorage, so milestone timing stays device-relative instead of host-clock-relative. Override the default storage keys with `ASL_ANDROID_PROFILE_SESSION_STORAGE_KEY` and `ASL_ANDROID_PROFILE_COMMAND_STORAGE_KEY` only when adopting an existing app-owned bridge.
142
213
 
@@ -163,6 +234,14 @@ The root example live scripts pass `--compare-latest --fail-on-regression` by de
163
234
  pnpm example:ios:live -- --run-suffix after-change
164
235
  ```
165
236
 
237
+ Use `--seed-baseline` for fresh release checks where no compatible trusted iOS baseline exists yet:
238
+
239
+ ```bash
240
+ pnpm example:ios:live -- --run-suffix release-check --seed-baseline
241
+ ```
242
+
243
+ The same `low_confidence` comparison policy applies to iOS seeded baselines, where simulator and dev-client startup timing can vary between adjacent runs while still satisfying product budgets.
244
+
166
245
  Expo dev-client iOS shells may need an explicit Metro deep link after the native app launches. Put that local URL in ignored env state, for example `ASL_EXAMPLE_IOS_DEV_CLIENT_URL=asl-example://expo-development-client/?url=http%3A%2F%2Flocalhost%3A8097`, so iOS profile capture opens the correct app session before collecting evidence.
167
246
 
168
247
  The default iOS live proof transport seeds profile-session control into simulator app storage. Use `--ios-profile-session-transport deeplink` when the app should receive profile-session start and command control through app URLs instead.
@@ -247,6 +326,10 @@ pnpm compare:latest \
247
326
 
248
327
  Scenario health must pass before timing or budget evidence can support an improvement or regression claim.
249
328
 
329
+ The comparison gate is intentionally strict. If either run failed scenario health, or if the scenario ids do not match, the comparison is `inconclusive`. Numeric budget checks are compared only after that health gate passes. `comparison.json` includes `comparisonBasis` with the baseline/current run ids and run directories, giving agents artifact-local provenance instead of forcing them to infer it from folder names. It also includes `measurementPolicy`, which records the baseline selection mode, poisoning protections, valid sample counts, timing tolerance, and confidence level used for the comparison.
330
+
331
+ The latest-trusted command excludes the exact current run directory from baseline selection. Baseline trust requires passed health and passed verdict. For attempt-aware artifacts, baseline trust also requires a clean first passed attempt, no retry lineage, no failed or partial cleanup, and no valid partial-artifact diagnostic fragments. Current runs must pass scenario health before the command will compare timing or budget evidence. If the current manifest declares `comparisonLane`, baseline selection is scoped to trusted prior runs with the same lane; if the current manifest has no lane, selection stays within unlabeled trusted prior runs. Profile manifests also include `scenarioHash`, a stable fingerprint of the normalized scenario contract. When the current run has that hash, latest-trusted selection only compares against trusted prior runs with the same hash; legacy runs without the hash remain comparable only to legacy current runs. This keeps proof modes such as plain live proof and live proof plus agent-device sidecar from comparing against each other, and it keeps migrated scenario definitions from poisoning before/after verdicts. Latest-trusted artifacts set `comparisonBasis.strategy` to `latest_trusted_prior`, record selection counts for inspected, trusted, trusted-prior, lane-comparable, and scenario-contract-comparable candidates, and mirror the active lane, scenario hash, and cohort hash inside `measurementPolicy.baselineSelection.poisoningProtection` when those filters are active.
332
+
250
333
  ## Release Gate
251
334
 
252
335
  Before publishing, run:
@@ -263,8 +346,8 @@ Package smoke and consumer rehearsal keep child commands bounded so package-mana
263
346
  ASL_PACKAGE_GATE_TIMEOUT_MS=300000 pnpm release:check
264
347
  ```
265
348
 
266
- Read next:
349
+ ## Side References
267
350
 
268
- - [Contracts](contracts.md) for artifact layout and supported runner surface
269
351
  - [Consumer App Rehearsal](consumer-rehearsal.md) for adoption inside an existing app
270
352
  - [examples/mobile-app](../examples/mobile-app/README.md) for detailed dogfood app commands
353
+ - [Public API](api.md) for package imports and programmable runner composition
@@ -1,28 +1,24 @@
1
1
  # Principles
2
2
 
3
- `agent-scenario-loop` is a scenario orchestration and evidence collection layer for agent-driven software development.
3
+ `agent-scenario-loop` has one durable claim: scenarios, contracts, and evidence must outlive the current runner.
4
4
 
5
- Read this after [Concepts](concepts.md) if you want the project doctrine in a compact form.
6
-
7
- The durable value is not any one runner. The durable value is a stable scenario and evidence contract that survives runner changes.
8
-
9
- It is not another agent runner. It is the layer that coordinates runners, preserves evidence, and keeps scenarios useful as tooling changes.
10
-
11
- Scenarios are long-lived project assets. They describe important application behaviors, not the temporary mechanics of the current runner.
5
+ Read this after [Concepts](concepts.md). Concepts explains the model; this page is the compressed doctrine.
12
6
 
13
7
  ## Four planes
14
8
 
9
+ ASL separates mobile proof into four planes. Mixing them is the usual source of flaky claims.
10
+
15
11
  1. Control plane
16
- Use semantic app commands, deep links, and deterministic hooks before falling back to raw UI replay.
12
+ Use semantic app commands, deep links, and deterministic hooks to start and steer the scenario. Raw UI replay is a realism check, not the preferred control architecture.
17
13
 
18
14
  2. Truth plane
19
- Use explicit profile events, stored signals, route state, and committed artifacts as the source of truth.
15
+ Use app-owned truth events, stored signals, route state, and committed artifacts as the source of what happened.
20
16
 
21
17
  3. Evidence plane
22
- Preserve logs, screenshots, videos, profiler exports, memory captures, network captures, UI trees, metrics, and verdicts in one stable artifact layout.
18
+ Preserve logs, screenshots, videos, profiler exports, memory captures, network captures, UI trees, metrics, verdicts, comparisons, and summaries in one stable artifact layout.
23
19
 
24
20
  4. Realism plane
25
- Use taps, swipes, and full UI interaction for realism checks and last-mile validation, not as the primary control architecture.
21
+ Use taps, swipes, alerts, full UI interaction, and external device tools to prove the app still behaves under real interaction pressure.
26
22
 
27
23
  ## Invariants
28
24
 
@@ -41,6 +37,4 @@ Use taps, swipes, and full UI interaction for realism checks and last-mile valid
41
37
 
42
38
  ## Read next
43
39
 
44
- - [Contracts](contracts.md) for the current artifact and package surface
45
- - [Runner docs](../runner/README.md) for the host execution boundary
46
- - [README](../README.md) for the project entrypoint
40
+ - [Architecture](architecture.md) for the TypeScript-first, language-neutral contract boundary
@@ -233,6 +233,12 @@ pnpm example:android:live -- --run-suffix before-change
233
233
  pnpm example:android:live -- --run-suffix after-change
234
234
  ```
235
235
 
236
+ When a release, native build, or scenario edit changes the compatible cohort, seed a fresh trusted baseline in the same command:
237
+
238
+ ```bash
239
+ pnpm example:android:live -- --run-suffix release-check --seed-baseline
240
+ ```
241
+
236
242
  The individual live commands remain useful while debugging one scenario:
237
243
 
238
244
  ```bash
@@ -289,6 +295,12 @@ The root example live scripts pass `--compare-latest --fail-on-regression` by de
289
295
  pnpm example:ios:live -- --run-suffix after-change
290
296
  ```
291
297
 
298
+ When there is no compatible trusted iOS baseline for the current release cohort, seed one before the measured run:
299
+
300
+ ```bash
301
+ pnpm example:ios:live -- --run-suffix release-check --seed-baseline
302
+ ```
303
+
292
304
  If global `xcode-select` points at a beta Xcode whose simulator services are not ready, set `ASL_EXAMPLE_XCODE_DEVELOPER_DIR` before the Node runner starts:
293
305
 
294
306
  ```bash
@@ -6,11 +6,11 @@
6
6
  "capabilities": ["accessibility", "memory", "network", "profiler"],
7
7
  "driverActions": ["collectPerfSignals"],
8
8
  "artifactOutputs": ["accessibility", "memory", "network", "profiler", "signals"],
9
- "lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "finalize"],
9
+ "lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "afterCapture", "finalize"],
10
10
  "providerCommands": [
11
11
  {
12
12
  "id": "capture-accessibility",
13
- "phase": "capture",
13
+ "phase": "afterCapture",
14
14
  "command": "node",
15
15
  "cwd": "..",
16
16
  "args": [
@@ -34,7 +34,7 @@
34
34
  },
35
35
  {
36
36
  "id": "capture-profiler",
37
- "phase": "capture",
37
+ "phase": "afterCapture",
38
38
  "command": "node",
39
39
  "cwd": "..",
40
40
  "args": [
@@ -6,6 +6,7 @@
6
6
  "capabilities": ["launch", "sessionControl", "command", "logCapture", "artifactWrite"],
7
7
  "driverActions": ["tap", "scroll", "assertVisible", "inspectTree", "screenshot", "record", "readLogs"],
8
8
  "artifactOutputs": ["logs", "signals", "screenshot", "video", "uiTree"],
9
+ "uiContexts": ["app"],
9
10
  "lifecycle": [
10
11
  "prepare",
11
12
  "launch",
@@ -80,6 +80,31 @@ function writeProviderEvidence({
80
80
 
81
81
  writeJsonArtifact(outPath, {
82
82
  ...shared,
83
+ captureMode: 'afterCapture',
84
+ completenessStatus: 'complete',
85
+ comparability: {
86
+ status: 'diagnostic-only',
87
+ reason: 'Deterministic example evidence is collected after the profile window and is not used for timing budgets.',
88
+ },
89
+ dataClasses: ['cpu-samples'],
90
+ lifecycle: {
91
+ phase: 'afterCapture',
92
+ perturbsTiming: false,
93
+ },
94
+ metrics: {
95
+ commitCount: 0,
96
+ droppedFrameCount: 0,
97
+ jsLongTaskCount: 0,
98
+ },
99
+ profileKind: 'diagnostic-summary',
100
+ targetBinding: {
101
+ status: 'verified',
102
+ source: 'example-provider-fixture',
103
+ },
104
+ tool: {
105
+ name: 'agent-scenario-loop deterministic profiler',
106
+ version: '1.0.0',
107
+ },
83
108
  samples: [],
84
109
  summary: 'Deterministic example profiler evidence for package and consumer rehearsal.',
85
110
  });
@@ -32,13 +32,14 @@ They do not mean the package bundles every named tool. A fixture describes what
32
32
 
33
33
  - Keep `capabilities` about lifecycle or evidence ownership.
34
34
  - Keep `driverActions` about concrete operations the adapter can perform.
35
+ - Keep `uiContexts` about the surface the adapter can own; do not use `app` proof for system dialogs, share sheets, external browsers, WebViews, pickers, notifications, or another app unless the manifest explicitly declares that context.
35
36
  - Do not add a capability or driver action until a runner or provider can produce the corresponding evidence.
36
- - Keep `providerCommands` on evidence-provider manifests; primary runners should own lifecycle orchestration, not provider command wrappers.
37
+ - Keep `providerCommands` on evidence-provider manifests; primary runners should own lifecycle orchestration, not provider command wrappers. Prefer `phase: "afterCapture"` for diagnostics that inspect an already captured adb/simctl sidecar.
37
38
  - When a tool writes files independently, attach them through `--signal`, `--capture`, or a `providerCommands` manifest so the run keeps stable artifact paths.
38
39
  - Treat these manifests as starting contracts; consuming apps can narrow them to match the exact adapter they install.
39
40
 
40
41
  ## Tool Surface Notes
41
42
 
42
- The bundled `agent-device` driver adapter and `asl-agent-device` capture runner map the declared portable subset: app open/close, alert inspection, `tap`, `scroll`, `assertVisible`, `inspectTree`, `screenshot`, and `readLogs`. Planner compatibility validates the agent-device target metadata that must be known before runtime: `tap` needs a selector, `adapterOptions.agentDevice.ref`, or `adapterOptions.agentDevice.x/y`; `assertVisible` needs a portable selector; selector matching must be exact until the adapter maps richer match modes. The agent-device CLI may expose more commands than the fixture declares, including recording, performance, network, trace, batch, and React DevTools operations. Keep those out of the primary runner manifest until an adapter maps them into stable Agent Scenario Loop artifacts. For example, performance or React DevTools output should usually start as an evidence provider or signal attachment, while `record` should only be declared once video capture is wired into `captures.video`.
43
+ The bundled `agent-device` driver adapter and `asl-agent-device` capture runner map the declared portable subset: app open/close, alert inspection, `tap`, `scroll`, `assertVisible`, `inspectTree`, `screenshot`, and `readLogs`. Planner compatibility validates the agent-device target metadata that must be known before runtime: `tap` needs a selector, `adapterOptions.agentDevice.ref`, or `adapterOptions.agentDevice.x/y`; `assertVisible` needs a portable selector; selector matching must be exact until the adapter maps richer match modes. The agent-device CLI may expose more commands than the fixture declares, including recording, performance, network, trace, batch, and React DevTools operations. Keep those out of the primary runner manifest until an adapter maps them into stable Agent Scenario Loop artifacts. For example, Android snapshots, network dumps, and performance evidence can be attached through a provider once the project proves those commands on its devices; React DevTools, traces, and recording should stay in explicit heavy lanes until their outputs are stable ASL artifacts.
43
44
 
44
- The Argent fixtures are external-tool contracts, not bundled package dependencies. `@swmansion/argent` exposes a local MCP/CLI surface for iOS Simulator and Android Emulator control, so Agent Scenario Loop should keep two lanes distinct when an app adopts it: a primary interaction adapter for launch, gestures, screenshot requests, and UI descriptions, and a provider lane for profiler output such as React commit or CPU summaries. Android can pair fast adb interaction with an Argent profiler provider so profiling startup cost does not slow every tap or scroll. iOS adapters should treat native-devtools disconnects, restart-required statuses, required screenshot failures, and root-only UI descriptions as failed scenario health, because timing budgets are not trustworthy when required UI evidence is unverifiable. Optional screenshot failures should stay visible as warnings. When Argent can prove launch and accessibility but its iOS screenshot backend is unavailable, ASL may attach simctl as a screenshot fallback provider while keeping the Argent warning in health. Argent output files should enter ASL through `raw/`, `captures/`, `signals/js`, or provider-command attachments with stable manifest inventory; do not create Argent-specific top-level artifact folders.
45
+ The Argent fixtures are external-tool contracts, not bundled package dependencies. `@swmansion/argent` exposes a local MCP/CLI surface for iOS Simulator and Android Emulator control, so Agent Scenario Loop should keep two lanes distinct when an app adopts it: a primary interaction adapter for launch, gestures, screenshot requests, and UI descriptions, and a provider lane for profiler output such as React commit or CPU summaries. Android can pair fast adb interaction with an Argent profiler provider so profiling startup cost does not slow every tap or scroll. iOS can use Argent `describe` as AXRuntime accessibility evidence when that command is reliable for the selected simulator and bundle; treat native UIKit hierarchy restart requirements as a separate unsupported or heavy diagnostic until the project can capture them consistently. iOS adapters should treat native-devtools disconnects, restart-required statuses, required screenshot failures, and root-only UI descriptions as failed scenario health, because timing budgets are not trustworthy when required UI evidence is unverifiable. Optional screenshot failures should stay visible as warnings. When Argent can prove launch and accessibility but its iOS screenshot backend is unavailable, ASL may attach simctl as a screenshot fallback provider while keeping the Argent warning in health. Argent output files should enter ASL through `raw/`, `captures/`, `signals/js`, or provider-command attachments with stable manifest inventory; do not create Argent-specific top-level artifact folders. React profiler CPU summaries are lifecycle evidence when they require a prior start/stop session. Provider output should preserve target-binding proof, raw profile attachments, derived summaries, and diagnostic-only/comparable status instead of treating those summaries as passive snapshots.
@@ -12,6 +12,7 @@
12
12
  ],
13
13
  "driverActions": ["tap", "scroll", "assertVisible", "inspectTree", "screenshot", "record", "readLogs"],
14
14
  "artifactOutputs": ["logs", "signals", "screenshot", "video", "uiTree"],
15
+ "uiContexts": ["app"],
15
16
  "lifecycle": [
16
17
  "prepare",
17
18
  "launch",
@@ -14,6 +14,7 @@
14
14
  ],
15
15
  "driverActions": ["tap", "scroll", "assertVisible", "inspectTree", "screenshot", "readLogs"],
16
16
  "artifactOutputs": ["logs", "signals", "screenshot", "uiTree"],
17
+ "uiContexts": ["app"],
17
18
  "lifecycle": [
18
19
  "prepare",
19
20
  "launch",
@@ -14,6 +14,7 @@
14
14
  ],
15
15
  "driverActions": ["tap", "scroll", "assertVisible", "inspectTree", "screenshot", "readLogs"],
16
16
  "artifactOutputs": ["logs", "signals", "screenshot", "uiTree"],
17
+ "uiContexts": ["app"],
17
18
  "lifecycle": [
18
19
  "prepare",
19
20
  "launch",
@@ -14,6 +14,7 @@
14
14
  ],
15
15
  "driverActions": ["tap", "scroll", "assertVisible", "inspectTree", "screenshot"],
16
16
  "artifactOutputs": ["logs", "signals", "screenshot", "uiTree"],
17
+ "uiContexts": ["app"],
17
18
  "lifecycle": [
18
19
  "prepare",
19
20
  "launch",
@@ -14,6 +14,7 @@
14
14
  ],
15
15
  "driverActions": ["tap", "scroll", "assertVisible", "inspectTree", "screenshot"],
16
16
  "artifactOutputs": ["logs", "signals", "screenshot", "uiTree"],
17
+ "uiContexts": ["app"],
17
18
  "lifecycle": [
18
19
  "prepare",
19
20
  "launch",
@@ -5,11 +5,11 @@
5
5
  "platforms": ["ios", "android"],
6
6
  "capabilities": ["accessibility"],
7
7
  "artifactOutputs": ["accessibility"],
8
- "lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "finalize"],
8
+ "lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "afterCapture", "finalize"],
9
9
  "providerCommands": [
10
10
  {
11
11
  "id": "capture-accessibility",
12
- "phase": "capture",
12
+ "phase": "afterCapture",
13
13
  "command": "axe",
14
14
  "args": ["--output", "{providerDir}/accessibility.json"],
15
15
  "outputs": [
@@ -5,11 +5,11 @@
5
5
  "platforms": ["ios", "android"],
6
6
  "capabilities": ["accessibility"],
7
7
  "artifactOutputs": ["accessibility"],
8
- "lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "finalize"],
8
+ "lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "afterCapture", "finalize"],
9
9
  "providerCommands": [
10
10
  {
11
11
  "id": "capture-accessibility",
12
- "phase": "capture",
12
+ "phase": "afterCapture",
13
13
  "command": "capture-accessibility",
14
14
  "args": ["--platform", "{platform}", "--out", "{providerDir}/accessibility.json"],
15
15
  "outputs": [
@@ -5,11 +5,11 @@
5
5
  "platforms": ["ios", "android"],
6
6
  "capabilities": ["memory"],
7
7
  "artifactOutputs": ["memory", "signals"],
8
- "lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "finalize"],
8
+ "lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "afterCapture", "finalize"],
9
9
  "providerCommands": [
10
10
  {
11
11
  "id": "capture-memory",
12
- "phase": "capture",
12
+ "phase": "afterCapture",
13
13
  "command": "capture-memory",
14
14
  "args": ["--platform", "{platform}", "--run-id", "{runId}", "--out", "{providerDir}/memory.json"],
15
15
  "outputs": [
@@ -5,11 +5,11 @@
5
5
  "platforms": ["ios", "android"],
6
6
  "capabilities": ["network"],
7
7
  "artifactOutputs": ["network", "signals"],
8
- "lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "finalize"],
8
+ "lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "afterCapture", "finalize"],
9
9
  "providerCommands": [
10
10
  {
11
11
  "id": "capture-network",
12
- "phase": "capture",
12
+ "phase": "afterCapture",
13
13
  "command": "capture-network",
14
14
  "args": ["--platform", "{platform}", "--scenario", "{scenarioId}", "--out", "{providerDir}/network.har"],
15
15
  "outputs": [
@@ -6,11 +6,11 @@
6
6
  "capabilities": ["profiler"],
7
7
  "driverActions": ["collectPerfSignals"],
8
8
  "artifactOutputs": ["profiler", "signals"],
9
- "lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "finalize"],
9
+ "lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "afterCapture", "finalize"],
10
10
  "providerCommands": [
11
11
  {
12
12
  "id": "capture-profiler",
13
- "phase": "capture",
13
+ "phase": "afterCapture",
14
14
  "command": "capture-profiler",
15
15
  "args": ["--platform", "{platform}", "--run-id", "{runId}", "--out", "{providerDir}/profiler.json"],
16
16
  "outputs": [
@@ -16,6 +16,7 @@
16
16
  ],
17
17
  "driverActions": ["tap", "scroll", "assertVisible", "inspectTree", "screenshot", "record", "readLogs"],
18
18
  "artifactOutputs": ["logs", "signals", "screenshot", "video", "uiTree", "accessibility"],
19
+ "uiContexts": ["app"],
19
20
  "lifecycle": [
20
21
  "prepare",
21
22
  "launch",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-scenario-loop",
3
- "version": "0.1.2",
3
+ "version": "0.1.4",
4
4
  "private": false,
5
5
  "description": "Scenario orchestration and evidence collection for agent-driven software development. Bring your own runner. Keep your scenarios. Keep your evidence.",
6
6
  "license": "MIT",
@@ -59,6 +59,12 @@
59
59
  "require": "./dist/index.js",
60
60
  "default": "./dist/index.js"
61
61
  },
62
+ "./app/profile-session": {
63
+ "types": "./app/profile-session.ts",
64
+ "import": "./app/profile-session.ts",
65
+ "require": "./app/profile-session.ts",
66
+ "default": "./app/profile-session.ts"
67
+ },
62
68
  "./runner/android-adb": {
63
69
  "types": "./dist/runner/android-adb.d.ts",
64
70
  "import": "./dist/runner/android-adb.js",
@@ -200,7 +206,7 @@
200
206
  "app/profile-session.ts",
201
207
  "core/config-template.json",
202
208
  "dist",
203
- "!dist/scripts",
209
+ "!dist/**/__tests__",
204
210
  "docs",
205
211
  "examples",
206
212
  "!examples/mobile-app/.expo",
@@ -223,6 +229,7 @@
223
229
  "clean": "node -e \"require('node:fs').rmSync('dist',{recursive:true,force:true})\"",
224
230
  "prepack": "npm run clean && npm run build",
225
231
  "consumer:rehearse": "pnpm build && node dist/scripts/consumer-rehearsal.js",
232
+ "downstream:local-package": "pnpm build && node dist/scripts/downstream-local-package-gate.js",
226
233
  "package:smoke": "pnpm build && node dist/scripts/package-smoke.js",
227
234
  "prepublishOnly": "pnpm release:check",
228
235
  "release:check": "pnpm test && pnpm release:readiness && pnpm package:smoke && pnpm consumer:rehearse",
@@ -279,8 +286,10 @@
279
286
  "profile:ios:open-close": "pnpm build && node dist/runner/profile-ios.js --config core/config-template.json --scenario examples/scenarios/ios/open-close-cycle.json",
280
287
  "validate-project": "pnpm build && node dist/runner/validate-project.js"
281
288
  },
289
+ "dependencies": {
290
+ "@types/node": "^25.9.3"
291
+ },
282
292
  "devDependencies": {
283
- "@types/node": "^25.9.3",
284
293
  "typescript": "^6.0.3"
285
294
  }
286
295
  }
@@ -98,10 +98,26 @@
98
98
  }
99
99
  }
100
100
  },
101
+ "provenanceRef": {
102
+ "type": "object",
103
+ "additionalProperties": false,
104
+ "required": ["manifest", "runId"],
105
+ "properties": {
106
+ "manifest": {
107
+ "type": "string"
108
+ },
109
+ "runId": {
110
+ "type": "string"
111
+ },
112
+ "scenarioHash": {
113
+ "type": "string",
114
+ "pattern": "^[a-f0-9]{64}$"
115
+ }
116
+ }
117
+ },
101
118
  "budgets": {
102
119
  "type": "object",
103
120
  "description": "Named budget thresholds the run should be evaluated against.",
104
- "minProperties": 1,
105
121
  "additionalProperties": {
106
122
  "$ref": "#/$defs/budgetThreshold"
107
123
  }
@@ -112,6 +128,41 @@
112
128
  "$ref": "#/$defs/timelineEvent"
113
129
  }
114
130
  },
131
+ "iterationSummary": {
132
+ "type": "object",
133
+ "additionalProperties": false,
134
+ "required": ["expected", "completed", "failed", "timeouts", "incomplete", "status"],
135
+ "properties": {
136
+ "expected": {
137
+ "type": "integer",
138
+ "minimum": 1
139
+ },
140
+ "completed": {
141
+ "type": "integer",
142
+ "minimum": 0
143
+ },
144
+ "failed": {
145
+ "type": "integer",
146
+ "minimum": 0
147
+ },
148
+ "timeouts": {
149
+ "type": "integer",
150
+ "minimum": 0
151
+ },
152
+ "incomplete": {
153
+ "type": "array",
154
+ "uniqueItems": true,
155
+ "items": {
156
+ "type": "integer",
157
+ "minimum": 1
158
+ }
159
+ },
160
+ "status": {
161
+ "type": "string",
162
+ "enum": ["complete", "partial", "failed", "timeout"]
163
+ }
164
+ }
165
+ },
115
166
  "artifacts": {
116
167
  "type": "object",
117
168
  "additionalProperties": false,
@@ -169,12 +220,31 @@
169
220
  "items": {
170
221
  "type": "object",
171
222
  "additionalProperties": false,
172
- "required": ["channel", "kind", "path", "sha256", "sizeBytes", "sourceFileName"],
223
+ "required": [
224
+ "channel",
225
+ "completenessStatus",
226
+ "corruptionStatus",
227
+ "kind",
228
+ "path",
229
+ "redactionStatus",
230
+ "sha256",
231
+ "sizeBytes",
232
+ "sourceFileName",
233
+ "transformations"
234
+ ],
173
235
  "properties": {
174
236
  "channel": {
175
237
  "type": "string",
176
238
  "enum": ["capture", "provider", "signal"]
177
239
  },
240
+ "completenessStatus": {
241
+ "type": "string",
242
+ "enum": ["complete", "truncated", "unknown"]
243
+ },
244
+ "corruptionStatus": {
245
+ "type": "string",
246
+ "enum": ["valid", "corrupt", "unknown"]
247
+ },
178
248
  "kind": {
179
249
  "type": "string",
180
250
  "enum": ["accessibility", "js", "logs", "memory", "network", "profiler", "screenshot", "uiTree", "video"]
@@ -183,6 +253,10 @@
183
253
  "type": "string",
184
254
  "minLength": 1
185
255
  },
256
+ "redactionStatus": {
257
+ "type": "string",
258
+ "enum": ["not-redacted", "redacted", "unknown"]
259
+ },
186
260
  "sha256": {
187
261
  "type": "string",
188
262
  "pattern": "^[a-f0-9]{64}$"
@@ -194,6 +268,15 @@
194
268
  "sourceFileName": {
195
269
  "type": "string",
196
270
  "minLength": 1
271
+ },
272
+ "transformations": {
273
+ "type": "array",
274
+ "minItems": 1,
275
+ "uniqueItems": true,
276
+ "items": {
277
+ "type": "string",
278
+ "enum": ["copied", "normalized", "redacted", "truncated", "compressed", "transcoded", "unknown"]
279
+ }
197
280
  }
198
281
  }
199
282
  }