agent-scenario-loop 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/app/profile-session.ts +263 -17
- package/dist/core/artifact-contract.d.ts +6 -4
- package/dist/core/artifact-contract.js +164 -15
- package/dist/core/artifact-layout.d.ts +2 -0
- package/dist/core/artifact-layout.js +2 -0
- package/dist/core/planner.js +4 -3
- package/dist/core/schema-validator.d.ts +1 -0
- package/dist/core/schema-validator.js +1 -0
- package/dist/runner/android-adb-driver.d.ts +7 -2
- package/dist/runner/android-adb-driver.js +7 -1
- package/dist/runner/android-adb.d.ts +40 -5
- package/dist/runner/android-adb.js +1046 -664
- package/dist/runner/ios-simctl.d.ts +1 -0
- package/dist/runner/ios-simctl.js +1 -0
- package/dist/runner/profile-android.d.ts +11 -1
- package/dist/runner/profile-android.js +266 -25
- package/dist/runner/profile-ios.d.ts +3 -2
- package/dist/runner/profile-ios.js +252 -22
- package/dist/runner/profile-mobile.d.ts +63 -4
- package/dist/runner/profile-mobile.js +1002 -20
- package/dist/runner/validate-project.js +3 -0
- package/dist/scripts/consumer-rehearsal.d.ts +127 -0
- package/dist/scripts/consumer-rehearsal.js +774 -0
- package/dist/scripts/downstream-local-package-gate.d.ts +2 -0
- package/dist/scripts/downstream-local-package-gate.js +264 -0
- package/dist/scripts/package-smoke.d.ts +104 -0
- package/dist/scripts/package-smoke.js +2304 -0
- package/dist/scripts/release-check.d.ts +47 -0
- package/dist/scripts/release-check.js +117 -0
- package/dist/scripts/release-readiness.d.ts +2 -0
- package/dist/scripts/release-readiness.js +539 -0
- package/docs/adapters.md +3 -1
- package/docs/api.md +2 -2
- package/docs/authoring.md +34 -2
- package/docs/consumer-rehearsal.md +33 -1
- package/docs/contracts.md +16 -2
- package/docs/live-proofs.md +12 -4
- package/examples/mobile-app/runner-manifests/evidence-provider.json +3 -3
- package/examples/mobile-app/scripts/asl-capture-profiler-provider.mjs +25 -0
- package/examples/runners/README.md +3 -3
- package/examples/runners/axe-accessibility-provider.json +2 -2
- package/examples/runners/script-accessibility-provider.json +2 -2
- package/examples/runners/script-memory-provider.json +2 -2
- package/examples/runners/script-network-provider.json +2 -2
- package/examples/runners/script-profiler-provider.json +2 -2
- package/package.json +12 -4
- package/schemas/manifest.schema.json +73 -3
- package/schemas/profiler.schema.json +243 -0
- package/schemas/runner-capabilities.schema.json +8 -2
- package/schemas/scenario.schema.json +18 -2
- package/templates/evidence-provider.json +3 -3
- package/templates/scripts/asl-capture-profiler-provider.mjs +20 -0
|
@@ -16,6 +16,38 @@ Package gates run child package-manager and CLI commands with a bounded timeout.
|
|
|
16
16
|
ASL_PACKAGE_GATE_TIMEOUT_MS=300000 pnpm consumer:rehearse
|
|
17
17
|
```
|
|
18
18
|
|
|
19
|
+
When a parent release gate has already packed the current package, pass that tarball through `ASL_PACKAGE_TARBALL` to reuse it instead of packing again:
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
ASL_PACKAGE_TARBALL=/path/to/agent-scenario-loop-0.1.4.tgz pnpm consumer:rehearse
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Downstream Local-Package Gate
|
|
26
|
+
|
|
27
|
+
Before publishing a release candidate, validate the packed local package inside at least one real downstream app when that app has already adopted durable ASL scenarios. This catches package, runner, schema, and helper regressions before npm distribution.
|
|
28
|
+
|
|
29
|
+
From this repository, run the opt-in downstream gate with an explicit app root and explicit command arrays:
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
pnpm downstream:local-package -- \
|
|
33
|
+
--app-root /path/to/adopter-app \
|
|
34
|
+
--expected-branch chore/agent-scenario-loop-adoption \
|
|
35
|
+
--command-json '["pnpm","run","asl:validate"]'
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
The gate packs the current checkout, installs the tarball into the downstream app with `pnpm add`, verifies `node_modules/agent-scenario-loop/package.json` matches the local candidate version, runs the supplied commands, and restores `package.json` plus `pnpm-lock.yaml` unless `--keep-install` is passed. Generated downstream proof artifacts remain the consumer app's local ignored state.
|
|
39
|
+
|
|
40
|
+
For live probes, pass direct package CLI commands as additional JSON arrays so the target scenario and artifact root are explicit:
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
pnpm downstream:local-package -- \
|
|
44
|
+
--app-root /path/to/adopter-app \
|
|
45
|
+
--command-json '["pnpm","run","asl:validate"]' \
|
|
46
|
+
--command-json '["node_modules/.bin/asl-profile-android","--config","asl.config.json","--scenario","scenarios/mobile/first-journey.json","--adb-capture","--profile-session","--android-profile-session-storage","--launch","--out","artifacts/asl/android","--run-id","first-journey-android-local-candidate"]'
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
Keep adopter-specific app ids, storage keys, dev-client URLs, simulator UDIDs, auth state, accounts, and scenarios in ignored local environment state or in the consuming app. ASL owns the package candidate and evidence contract; the downstream app owns product truth.
|
|
50
|
+
|
|
19
51
|
## 1. Initialize The Scaffold
|
|
20
52
|
|
|
21
53
|
From the consuming app root:
|
|
@@ -81,7 +113,7 @@ asl-check-plan --scenario scenarios/mobile/first-journey.json --runner runner-ma
|
|
|
81
113
|
asl-profile-ios --config asl.config.json --scenario scenarios/mobile/first-journey.json --simctl-capture --profile-session --profile-session-storage --launch --out artifacts/asl/ios --run-id first-journey-ios-live --comparison-lane first-journey-ios-live
|
|
82
114
|
```
|
|
83
115
|
|
|
84
|
-
For Expo dev-client builds, set `ASL_ANDROID_DEV_CLIENT_URL` or `ASL_IOS_DEV_CLIENT_URL` to the app's dev-client URL in ignored local env state.
|
|
116
|
+
For Expo dev-client builds, set `ASL_ANDROID_DEV_CLIENT_URL` or `ASL_IOS_DEV_CLIENT_URL` to the app's dev-client URL in ignored local env state. Prefer the LAN URL advertised by Metro for physical-device validation. Use `127.0.0.1` only when the selected simulator/emulator resolves that address back to the host Metro process. Android opens the dev-client URL before profile-session control. When Android storage transport is enabled, ASL waits for `Running "main"` by default before writing profile-session storage; override `ASL_ANDROID_DEV_CLIENT_READY_PATTERN` only when the app has a better readiness marker. If startup readiness fails, ASL reports an unhealthy run and skips command delivery instead of writing into a stale native shell. iOS opens the dev-client URL before reading stored profile-session evidence.
|
|
85
117
|
|
|
86
118
|
When Android deep-link delivery is unreliable in a dev-client shell, use `--android-profile-session-storage` so `asl-profile-android` seeds the app-owned AsyncStorage session through `run-as` before collecting evidence. The runner reads the selected device clock for the session start timestamp, which keeps app-emitted milestone durations meaningful. Keep custom storage key overrides local to the consuming app.
|
|
87
119
|
|
package/docs/contracts.md
CHANGED
|
@@ -66,7 +66,7 @@ Portable scenario manifests describe the durable app behavior before choosing a
|
|
|
66
66
|
- `truthEvents`: app-owned milestone events keyed by stable milestone id
|
|
67
67
|
- `milestones`: inspectable milestone list with event names, phases, timeouts, and descriptions
|
|
68
68
|
- `expectedEvents`: event names the runner or log ingest should expect to observe
|
|
69
|
-
- `cycles`: repeat count, warmup count,
|
|
69
|
+
- `cycles`: repeat count, warmup count, failure policy, and optional setup/body step ids for repeated journeys
|
|
70
70
|
- `budgets`: product thresholds evaluated only after scenario health passes
|
|
71
71
|
- `steps`: runner-facing launch, command, wait, gesture, and capture actions
|
|
72
72
|
- `selector`: optional app target on a step, such as a test id, accessibility id, label, text, resource id, or xpath
|
|
@@ -75,6 +75,8 @@ Portable scenario manifests describe the durable app behavior before choosing a
|
|
|
75
75
|
|
|
76
76
|
The scenario contract is intentionally runner-neutral. Runners can map steps to adb, XcodeBuildMCP, agent-device, accessibility tools, profilers, or custom scripts while preserving the same journey, milestones, budgets, and expected events.
|
|
77
77
|
|
|
78
|
+
For repeated mobile command scenarios, `cycles.setupStepIds` names leading setup commands that run once before measured cycle work, while `cycles.bodyStepIds` names the first repeated body commands when inference would be ambiguous. Built-in profile-session runners also infer a setup prefix conservatively: leading readiness commands or leading commands before the first measured milestone command run once, and the remaining command body repeats for `cycles.iterations`. Wait gates remain strict; ASL does not synthesize missing app-owned truth events.
|
|
79
|
+
|
|
78
80
|
Runner capabilities describe ownership, such as launch, session control, command execution, log capture, artifact writing, or profiler support. Driver actions describe the concrete operations an adapter can perform inside a run. UI contexts describe which surface the runner or provider can own: `app`, `systemDialog`, `notificationShade`, `externalBrowser`, `webView`, `shareSheet`, `picker`, or `otherApp`. UI and capture driver actions default to `app` when a step omits `uiContext`; a scenario must opt into system or external contexts explicitly. A runner may be able to own a scenario lifecycle without supporting every driver action or UI context; the planner fails when a required step declares a `driverAction` or `uiContext` that the selected runner or an active provider does not declare.
|
|
79
81
|
|
|
80
82
|
Planner compatibility artifacts and planner-derived `health.json` include a `downgradePolicy` block with `mode: "no-silent-downgrade"`. Required capability, driver-action, UI-context, or artifact gaps are recorded as `unsupported`; optional gaps are recorded as warnings. `allowedSubstitutions` and `substitutions` are explicit arrays, so future semantic downgrades must be visible in artifacts instead of being inferred from a passed plan.
|
|
@@ -87,10 +89,14 @@ When Android adb `tap` or `scroll` steps provide a portable selector instead of
|
|
|
87
89
|
|
|
88
90
|
I/O from iOS simctl capture routes through the simctl driver adapter. `readLogs` preserves bounded simulator logs under `raw/ios-simctl-log.txt`. A scenario step with `driverAction: "screenshot"` or `artifact: "screenshot"` requests a screenshot capture, defaulting to `captures/ios-screenshot.png`. The profile manifest records the resulting capture path in `artifacts.captures.screenshots`, and capture metadata records any supported simulator screenshot options the runner used.
|
|
89
91
|
|
|
92
|
+
Manifest artifact paths are evidence claims. Optional diagnostics such as `captures.video`, `captures.uiTree`, `raw.deviceLog`, JS/memory/network signals, accessibility exports, and profiler files appear as paths only when the file was produced or intentionally referenced as a sidecar dependency. Every profile manifest also includes `artifacts.diagnostics`, an inventory of common diagnostic surfaces with `kind`, `status`, `required`, optional `path`, and a `reason`/`nextAction` when evidence was unavailable or not requested.
|
|
93
|
+
|
|
90
94
|
Planner compatibility also validates the adapter metadata that built-in runners require. Android adb `tap` steps need either `adapterOptions.androidAdb.x/y` or a portable selector; Android adb `scroll` steps need either `startX/startY/endX/endY` or a portable selector; iOS simctl command metadata needs non-empty command strings and positive integer waits/repeat counts. Argent `tap` steps need `adapterOptions.argent.x/y`, Argent `scroll` steps need `adapterOptions.argent.startX/startY/endX/endY`, and Argent `assertVisible` steps need a portable selector. These failures become `invalid_adapter_options` health checks before runtime execution starts.
|
|
91
95
|
|
|
92
96
|
Adapter-target fixtures such as `agent-device-android`, `agent-device-ios`, `argent-ios`, `argent-android`, `argent-react-profiler-provider`, and `axe-accessibility-provider` describe where external tools can plug into the same contract. They are schema-checked and planner-tested capability manifests. The bundled `agent-device` capture runner implements the portable interaction subset for iOS and Android; broader agent-device surfaces such as React DevTools, traces, network, and performance still need explicit adapters or provider attachments before they become part of the stable artifact contract. The bundled Argent runner implements launch, coordinate-backed gestures, screenshot requests, and description-backed visibility proof for portable selector match modes while keeping React profiler output in a separate Android evidence-provider lane. Argent command-surface checks prove the configured tools exist; runtime health still owns whether the selected device backend produced screenshot evidence. Required screenshot failures fail health, and optional screenshot failures are preserved as warnings. Active evidence providers can satisfy required evidence artifacts and provider-owned driver actions such as `collectPerfSignals`; providers outside the selected platform do not contribute to the match. When those tools write files independently, attached provider evidence lands in the stable manifest and artifact layout. The `script-accessibility-provider`, `script-profiler-provider`, `script-memory-provider`, and `script-network-provider` examples show provider-command wrappers for project-local tools without making those tools package dependencies.
|
|
93
97
|
|
|
98
|
+
Profiler evidence is a first-class artifact kind, but ASL does not pretend every profiler tool has the same native format. JSON profiler outputs should satisfy [schemas/profiler.schema.json](../schemas/profiler.schema.json), including provider, platform, run, scenario, tool/completeness metadata, and at least one useful content surface such as samples, metrics, events, traces, a profile object, summary, or referenced attachments. Lifecycle-backed profilers should also declare whether evidence came from passive report ingestion, an explicit session, inline capture, `afterCapture`, `postRun`, or rehydration; whether the target device/app binding was verified; whether capture perturbed timing; and whether the output is comparable or diagnostic-only. Native traces, CPU profiles, flamegraphs, React DevTools exports, and recordings can still be attached as profiler evidence through provider outputs, but agents should treat them as preserved evidence until a provider also emits structured metrics that ASL can compare or summarize.
|
|
99
|
+
|
|
94
100
|
## Public artifact layout
|
|
95
101
|
|
|
96
102
|
Every run should produce a stable artifact folder.
|
|
@@ -126,7 +132,7 @@ Profile runner artifacts:
|
|
|
126
132
|
|
|
127
133
|
Profile `agent-summary.md` files include an `attempt` section when the run has a manifest attempt block, including terminal state, cleanup state, partial-artifact validity, and retry lineage. Latest-trusted baseline selection treats attempt-aware runs as baseline-trusted only when health and verdict passed, the attempt is a clean first attempt, cleanup did not fail or remain partial, and partial artifacts are not marked valid diagnostic fragments. Older artifacts without `manifest.attempt` remain legacy-trusted when health and verdict passed, but new attempt-aware runs cannot hide retry laundering behind a green final verdict.
|
|
128
134
|
|
|
129
|
-
Profile runners assert only environment facts they own. Every completed profile manifest records ASL-controlled artifact completeness and cleanup postconditions. Live adb/simctl capture paths also assert runner-controlled foreground state, explicit lifecycle preconditions, and foreground postconditions. Use `--lifecycle-phase <phase>` when a runner can prove a non-cold precondition such as `warm-launch` or `resume`; log-ingest and preexisting artifact ingestion keep those fields `unknown/not-asserted`.
|
|
135
|
+
Profile runners assert only environment facts they own. Every completed profile manifest records ASL-controlled artifact completeness and cleanup postconditions. Live adb/simctl capture paths also assert runner-controlled foreground state, explicit lifecycle preconditions, and foreground postconditions. Use `--lifecycle-phase <phase>` when a runner can prove a non-cold precondition such as `warm-launch` or `resume`; log-ingest and preexisting artifact ingestion keep those fields `unknown/not-asserted`. Lifecycle assertions are not product milestones: a runner proving `lifecyclePhase: "resume"` does not synthesize `app_resumed` or any other app truth event. Resume readiness must still be emitted by the consuming app when a scenario waits for it.
|
|
130
136
|
|
|
131
137
|
Aggregate live proof commands write `live-proof.json` and `agent-summary.md` under `_live-proof/<run-id>`. The live-proof artifact points to preflight evidence, every scenario run, optional interaction proofs from tools such as agent-device or Argent, optional skipped interaction proof declarations, and optional latest-trusted comparison outputs, giving agents one stable entrypoint after a proof run. Preflight, profile, and interaction pointers include health and verdict status from the linked run artifacts, so agents can see what passed before opening deeper evidence. Interaction proof pointers also include sidecar screenshot capture inventory when the sidecar produced screenshots, plus `warnings` when optional sidecar checks failed without invalidating the required proof. If profile health or verdict fails, requested sidecars are not executed; they are recorded in `skippedInteractionProofs` with a reason and next action so agent feedback stays explicit without mixing runner evidence into an untrusted timing run. The aggregate artifact records `status`, `comparisonStatus`, `comparisonCounts`, optional per-comparison `metricSummary` counts/highlights, and a `nextAction` hint so agents can distinguish failed proof gates, regressions, mixed metric movement, missing baselines, inconclusive comparisons, partial sidecar evidence, and clean summaries without scraping prose.
|
|
132
138
|
|
|
@@ -150,6 +156,14 @@ The current profile runner writes health, verdict, agent summary, metrics, causa
|
|
|
150
156
|
|
|
151
157
|
Budgets are supported but optional for adoption.
|
|
152
158
|
|
|
159
|
+
Milestone budget interval semantics are explicit:
|
|
160
|
+
|
|
161
|
+
- `toMilestone` without `fromMilestone` measures elapsed time from the run or session clock origin to the matching milestone occurrence.
|
|
162
|
+
- `fromMilestone` plus `toMilestone` measures the interval between the two app-owned truth events for each iteration.
|
|
163
|
+
- repeated transition, gesture, open, close, scroll, or handoff budgets should use both milestones when the intended number is transition duration rather than cumulative elapsed time.
|
|
164
|
+
|
|
165
|
+
This distinction is visible in `metrics.json`: elapsed milestone-only runs populate `durationsMs` with milestone timestamps, while interval runs populate `durationsMs` with `to - from` values. Timing still remains untrusted unless `health.json` passes.
|
|
166
|
+
|
|
153
167
|
`buildRunIndex()` can scan an artifact root after runs complete. It indexes folders that contain both `health.json` and `verdict.json`, marks a run trusted only when health and verdict both passed, and lets agents find the latest trusted prior run for a scenario without relying on terminal history.
|
|
154
168
|
|
|
155
169
|
## Supported Runner Surface
|
package/docs/live-proofs.md
CHANGED
|
@@ -35,6 +35,8 @@ pnpm check-plan -- --scenario examples/scenarios/mobile/app-startup.json --runne
|
|
|
35
35
|
|
|
36
36
|
This validates the input manifests, writes schema-checked `health.json` and `verdict.json`, writes `agent-summary.md`, and includes the raw planner match in `planner-compatibility.json`.
|
|
37
37
|
|
|
38
|
+
Live profile wrappers also run this compatibility check before adb, simctl, agent-device, or provider capture starts. A compatible run writes `planner-compatibility.json` as the first profile artifact, then continues into the platform capture. An incompatible run writes failed `health.json`, inconclusive `verdict.json`, `agent-summary.md`, and the planner artifact in the profile run folder, then exits before touching the device runtime. This keeps missing required diagnostics, unsupported platforms, and impossible runner/provider plans out of the long capture loop.
|
|
39
|
+
|
|
38
40
|
## Host/Device Access
|
|
39
41
|
|
|
40
42
|
Keep deterministic validation and live device proof as separate execution lanes.
|
|
@@ -120,11 +122,13 @@ Use `profile:ios --simctl-capture` when the example app or a consuming app is al
|
|
|
120
122
|
pnpm profile:ios -- --config core/config-template.json --scenario examples/mobile-app/scenarios/ios/app-startup.json --simctl-capture --profile-session --profile-session-storage --launch --run-id ios-run-1
|
|
121
123
|
```
|
|
122
124
|
|
|
123
|
-
The command writes a separate simctl capture folder under the selected output root, seeds the app-owned profile session into native AsyncStorage before launch, then collects stored app profile events after the capture window. Command scenarios seed the scenario command queue through the same storage contract before launch. Command envelopes preserve `commandId`, `sequence`, `queueId`, and, for normalized execution-plan commands followed by a milestone wait, `waitForMilestone` plus `waitTimeoutMs`. Deep-link command transport uses the same envelope in query parameters. When `raw/ios-profile-events.log` exists, the iOS profile runner ingests that stored truth-event log; otherwise it falls back to `raw/ios-simctl-log.txt`.
|
|
125
|
+
The command writes a separate simctl capture folder under the selected output root, seeds the app-owned profile session into native AsyncStorage before launch, then collects stored app profile events after the capture window. Command scenarios seed the scenario command queue through the same storage contract before launch. Command envelopes preserve `commandId`, `sequence`, `queueId`, command pacing `waitMs`, and, for normalized execution-plan commands followed by a milestone wait, `waitForMilestone` plus `waitTimeoutMs`. Deep-link command transport uses the same envelope in query parameters. When `raw/ios-profile-events.log` exists, the iOS profile runner ingests that stored truth-event log; otherwise it falls back to `raw/ios-simctl-log.txt`.
|
|
126
|
+
|
|
127
|
+
Profile manifests only list sidecar paths that were copied into the profile run or deliberately referenced as external sidecar evidence. If a simctl or adb capture folder is the real evidence source, `manifest.artifacts.diagnostics` records the diagnostic status plus `sidecarRoot`/`evidenceDependency` instead of inventing profile-root files such as `raw/device.log`, `captures/run.mp4`, or `captures/ui-tree.json`. Rehydrated runs may record `evidenceDependency.root: "sidecar"` with paths relative to `sidecarRoot`, so agents do not have to reason from long `../../` paths alone.
|
|
124
128
|
|
|
125
129
|
When a scenario requests a screenshot, pass supported simulator screenshot options through the iOS capture command with `--screenshot-type`, `--screenshot-display`, or `--screenshot-mask`; ASL records the chosen options in capture metadata and the resulting path in `manifest.artifacts.captures.screenshots`.
|
|
126
130
|
|
|
127
|
-
For profile-session capture on Android or iOS, omitting `--wait-ms` lets ASL derive the final evidence window from scenario execution waits and cycle count. Explicit `--wait-ms` remains authoritative when a consuming app has a known startup or logging delay that the scenario cannot express.
|
|
131
|
+
For profile-session capture on Android or iOS, omitting `--wait-ms` lets ASL derive the final evidence window from scenario execution waits and cycle count. On iOS, command-backed profile sessions use the expanded command queue, including setup commands, repeated cycle body commands, command pacing `waitMs`, milestone-gate `waitTimeoutMs`, and a conservative buffer. Explicit `--wait-ms` remains authoritative when a consuming app has a known startup or logging delay that the scenario cannot express.
|
|
128
132
|
|
|
129
133
|
Scenario command targets live in `adapterOptions.iosSimctl.commands`, while the app handles them through `registerProfileCommandTargetHandler`. The iOS proof does not depend on unified logs carrying JavaScript console output; it depends on app-owned stored profile events.
|
|
130
134
|
|
|
@@ -205,7 +209,7 @@ When latest-trusted comparison sees slower single-run timing but both baseline a
|
|
|
205
209
|
|
|
206
210
|
Read [Example Mobile App: Android Capture](../examples/mobile-app/README.md#android-capture) for Metro routing, adb permissions, individual scenario commands, selector behavior, and optional video capture.
|
|
207
211
|
|
|
208
|
-
Expo dev-client Android shells may need an explicit Metro deep link after the native app launches. Put that local URL in ignored env state, for example `ASL_EXAMPLE_ANDROID_DEV_CLIENT_URL=asl-example://expo-development-client/?url=http%3A%2F%2F10.0.2.2%3A8097`, so Android profile capture opens the correct app session before profile-session
|
|
212
|
+
Expo dev-client Android shells may need an explicit Metro deep link after the native app launches. Put that local URL in ignored env state, for example `ASL_EXAMPLE_ANDROID_DEV_CLIENT_URL=asl-example://expo-development-client/?url=http%3A%2F%2F10.0.2.2%3A8097`, so Android profile capture opens the correct app session before profile-session control. With storage-backed profile sessions, ASL waits for `Running "main"` by default before writing AsyncStorage. Set `ASL_EXAMPLE_ANDROID_DEV_CLIENT_READY_PATTERN` only when the app has a more precise readiness marker. If readiness fails, the runner reports an unhealthy startup gate and does not deliver stored commands or profile-session deep links.
|
|
209
213
|
|
|
210
214
|
Apps using the ASL profile-session AsyncStorage bridge can opt into storage delivery with `--android-profile-session-storage`. The Android runner resolves the session `startedAt` from the selected device clock before writing AsyncStorage, so milestone timing stays device-relative instead of host-clock-relative. Override the default storage keys with `ASL_ANDROID_PROFILE_SESSION_STORAGE_KEY` and `ASL_ANDROID_PROFILE_COMMAND_STORAGE_KEY` only when adopting an existing app-owned bridge.
|
|
211
215
|
|
|
@@ -336,7 +340,7 @@ Before publishing, run:
|
|
|
336
340
|
pnpm release:check
|
|
337
341
|
```
|
|
338
342
|
|
|
339
|
-
That gate runs tests
|
|
343
|
+
That gate builds the release scripts, runs tests and readiness checks, packs the package once, then reuses that tarball for package smoke, installed-binary checks, fake-device example proofs, schema/example/template/doc packaging checks, and the packed-package consumer rehearsal. Reusing one tarball keeps the release path closer to npm publish behavior and avoids repeated clean/build/pack cycles.
|
|
340
344
|
|
|
341
345
|
Package smoke and consumer rehearsal keep child commands bounded so package-manager stalls fail with the temporary rehearsal directory preserved. Set `ASL_PACKAGE_GATE_TIMEOUT_MS` to raise the per-command timeout when a local registry, proxy, or cold package cache is slow:
|
|
342
346
|
|
|
@@ -344,6 +348,10 @@ Package smoke and consumer rehearsal keep child commands bounded so package-mana
|
|
|
344
348
|
ASL_PACKAGE_GATE_TIMEOUT_MS=300000 pnpm release:check
|
|
345
349
|
```
|
|
346
350
|
|
|
351
|
+
## Run Plan First
|
|
352
|
+
|
|
353
|
+
Profile runs write `run-plan.json` before provider commands, evidence ingest, and final health classification. Inspect it first when a live loop stalls or fails early: it records the scenario id, scenario hash, input mode (`fixture-event-log`, `adb-sidecar`, `simctl-sidecar`, or live capture), expected iterations, command transport, provider manifests, requested diagnostics, and evidence source paths. The profile CLIs also print a compact run-plan heartbeat to stderr while keeping stdout reserved for the run directory.
|
|
354
|
+
|
|
347
355
|
## Side References
|
|
348
356
|
|
|
349
357
|
- [Consumer App Rehearsal](consumer-rehearsal.md) for adoption inside an existing app
|
|
@@ -6,11 +6,11 @@
|
|
|
6
6
|
"capabilities": ["accessibility", "memory", "network", "profiler"],
|
|
7
7
|
"driverActions": ["collectPerfSignals"],
|
|
8
8
|
"artifactOutputs": ["accessibility", "memory", "network", "profiler", "signals"],
|
|
9
|
-
"lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "finalize"],
|
|
9
|
+
"lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "afterCapture", "finalize"],
|
|
10
10
|
"providerCommands": [
|
|
11
11
|
{
|
|
12
12
|
"id": "capture-accessibility",
|
|
13
|
-
"phase": "
|
|
13
|
+
"phase": "afterCapture",
|
|
14
14
|
"command": "node",
|
|
15
15
|
"cwd": "..",
|
|
16
16
|
"args": [
|
|
@@ -34,7 +34,7 @@
|
|
|
34
34
|
},
|
|
35
35
|
{
|
|
36
36
|
"id": "capture-profiler",
|
|
37
|
-
"phase": "
|
|
37
|
+
"phase": "afterCapture",
|
|
38
38
|
"command": "node",
|
|
39
39
|
"cwd": "..",
|
|
40
40
|
"args": [
|
|
@@ -80,6 +80,31 @@ function writeProviderEvidence({
|
|
|
80
80
|
|
|
81
81
|
writeJsonArtifact(outPath, {
|
|
82
82
|
...shared,
|
|
83
|
+
captureMode: 'afterCapture',
|
|
84
|
+
completenessStatus: 'complete',
|
|
85
|
+
comparability: {
|
|
86
|
+
status: 'diagnostic-only',
|
|
87
|
+
reason: 'Deterministic example evidence is collected after the profile window and is not used for timing budgets.',
|
|
88
|
+
},
|
|
89
|
+
dataClasses: ['cpu-samples'],
|
|
90
|
+
lifecycle: {
|
|
91
|
+
phase: 'afterCapture',
|
|
92
|
+
perturbsTiming: false,
|
|
93
|
+
},
|
|
94
|
+
metrics: {
|
|
95
|
+
commitCount: 0,
|
|
96
|
+
droppedFrameCount: 0,
|
|
97
|
+
jsLongTaskCount: 0,
|
|
98
|
+
},
|
|
99
|
+
profileKind: 'diagnostic-summary',
|
|
100
|
+
targetBinding: {
|
|
101
|
+
status: 'verified',
|
|
102
|
+
source: 'example-provider-fixture',
|
|
103
|
+
},
|
|
104
|
+
tool: {
|
|
105
|
+
name: 'agent-scenario-loop deterministic profiler',
|
|
106
|
+
version: '1.0.0',
|
|
107
|
+
},
|
|
83
108
|
samples: [],
|
|
84
109
|
summary: 'Deterministic example profiler evidence for package and consumer rehearsal.',
|
|
85
110
|
});
|
|
@@ -34,12 +34,12 @@ They do not mean the package bundles every named tool. A fixture describes what
|
|
|
34
34
|
- Keep `driverActions` about concrete operations the adapter can perform.
|
|
35
35
|
- Keep `uiContexts` about the surface the adapter can own; do not use `app` proof for system dialogs, share sheets, external browsers, WebViews, pickers, notifications, or another app unless the manifest explicitly declares that context.
|
|
36
36
|
- Do not add a capability or driver action until a runner or provider can produce the corresponding evidence.
|
|
37
|
-
- Keep `providerCommands` on evidence-provider manifests; primary runners should own lifecycle orchestration, not provider command wrappers.
|
|
37
|
+
- Keep `providerCommands` on evidence-provider manifests; primary runners should own lifecycle orchestration, not provider command wrappers. Prefer `phase: "afterCapture"` for diagnostics that inspect an already captured adb/simctl sidecar.
|
|
38
38
|
- When a tool writes files independently, attach them through `--signal`, `--capture`, or a `providerCommands` manifest so the run keeps stable artifact paths.
|
|
39
39
|
- Treat these manifests as starting contracts; consuming apps can narrow them to match the exact adapter they install.
|
|
40
40
|
|
|
41
41
|
## Tool Surface Notes
|
|
42
42
|
|
|
43
|
-
The bundled `agent-device` driver adapter and `asl-agent-device` capture runner map the declared portable subset: app open/close, alert inspection, `tap`, `scroll`, `assertVisible`, `inspectTree`, `screenshot`, and `readLogs`. Planner compatibility validates the agent-device target metadata that must be known before runtime: `tap` needs a selector, `adapterOptions.agentDevice.ref`, or `adapterOptions.agentDevice.x/y`; `assertVisible` needs a portable selector; selector matching must be exact until the adapter maps richer match modes. The agent-device CLI may expose more commands than the fixture declares, including recording, performance, network, trace, batch, and React DevTools operations. Keep those out of the primary runner manifest until an adapter maps them into stable Agent Scenario Loop artifacts. For example,
|
|
43
|
+
The bundled `agent-device` driver adapter and `asl-agent-device` capture runner map the declared portable subset: app open/close, alert inspection, `tap`, `scroll`, `assertVisible`, `inspectTree`, `screenshot`, and `readLogs`. Planner compatibility validates the agent-device target metadata that must be known before runtime: `tap` needs a selector, `adapterOptions.agentDevice.ref`, or `adapterOptions.agentDevice.x/y`; `assertVisible` needs a portable selector; selector matching must be exact until the adapter maps richer match modes. The agent-device CLI may expose more commands than the fixture declares, including recording, performance, network, trace, batch, and React DevTools operations. Keep those out of the primary runner manifest until an adapter maps them into stable Agent Scenario Loop artifacts. For example, Android snapshots, network dumps, and performance evidence can be attached through a provider once the project proves those commands on its devices; React DevTools, traces, and recording should stay in explicit heavy lanes until their outputs are stable ASL artifacts.
|
|
44
44
|
|
|
45
|
-
The Argent fixtures are external-tool contracts, not bundled package dependencies. `@swmansion/argent` exposes a local MCP/CLI surface for iOS Simulator and Android Emulator control, so Agent Scenario Loop should keep two lanes distinct when an app adopts it: a primary interaction adapter for launch, gestures, screenshot requests, and UI descriptions, and a provider lane for profiler output such as React commit or CPU summaries. Android can pair fast adb interaction with an Argent profiler provider so profiling startup cost does not slow every tap or scroll. iOS adapters should treat native-devtools disconnects, restart-required statuses, required screenshot failures, and root-only UI descriptions as failed scenario health, because timing budgets are not trustworthy when required UI evidence is unverifiable. Optional screenshot failures should stay visible as warnings. When Argent can prove launch and accessibility but its iOS screenshot backend is unavailable, ASL may attach simctl as a screenshot fallback provider while keeping the Argent warning in health. Argent output files should enter ASL through `raw/`, `captures/`, `signals/js`, or provider-command attachments with stable manifest inventory; do not create Argent-specific top-level artifact folders.
|
|
45
|
+
The Argent fixtures are external-tool contracts, not bundled package dependencies. `@swmansion/argent` exposes a local MCP/CLI surface for iOS Simulator and Android Emulator control, so Agent Scenario Loop should keep two lanes distinct when an app adopts it: a primary interaction adapter for launch, gestures, screenshot requests, and UI descriptions, and a provider lane for profiler output such as React commit or CPU summaries. Android can pair fast adb interaction with an Argent profiler provider so profiling startup cost does not slow every tap or scroll. iOS can use Argent `describe` as AXRuntime accessibility evidence when that command is reliable for the selected simulator and bundle; treat native UIKit hierarchy restart requirements as a separate unsupported or heavy diagnostic until the project can capture them consistently. iOS adapters should treat native-devtools disconnects, restart-required statuses, required screenshot failures, and root-only UI descriptions as failed scenario health, because timing budgets are not trustworthy when required UI evidence is unverifiable. Optional screenshot failures should stay visible as warnings. When Argent can prove launch and accessibility but its iOS screenshot backend is unavailable, ASL may attach simctl as a screenshot fallback provider while keeping the Argent warning in health. Argent output files should enter ASL through `raw/`, `captures/`, `signals/js`, or provider-command attachments with stable manifest inventory; do not create Argent-specific top-level artifact folders. React profiler CPU summaries are lifecycle evidence when they require a prior start/stop session. Provider output should preserve target-binding proof, raw profile attachments, derived summaries, and diagnostic-only/comparable status instead of treating those summaries as passive snapshots.
|
|
@@ -5,11 +5,11 @@
|
|
|
5
5
|
"platforms": ["ios", "android"],
|
|
6
6
|
"capabilities": ["accessibility"],
|
|
7
7
|
"artifactOutputs": ["accessibility"],
|
|
8
|
-
"lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "finalize"],
|
|
8
|
+
"lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "afterCapture", "finalize"],
|
|
9
9
|
"providerCommands": [
|
|
10
10
|
{
|
|
11
11
|
"id": "capture-accessibility",
|
|
12
|
-
"phase": "
|
|
12
|
+
"phase": "afterCapture",
|
|
13
13
|
"command": "axe",
|
|
14
14
|
"args": ["--output", "{providerDir}/accessibility.json"],
|
|
15
15
|
"outputs": [
|
|
@@ -5,11 +5,11 @@
|
|
|
5
5
|
"platforms": ["ios", "android"],
|
|
6
6
|
"capabilities": ["accessibility"],
|
|
7
7
|
"artifactOutputs": ["accessibility"],
|
|
8
|
-
"lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "finalize"],
|
|
8
|
+
"lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "afterCapture", "finalize"],
|
|
9
9
|
"providerCommands": [
|
|
10
10
|
{
|
|
11
11
|
"id": "capture-accessibility",
|
|
12
|
-
"phase": "
|
|
12
|
+
"phase": "afterCapture",
|
|
13
13
|
"command": "capture-accessibility",
|
|
14
14
|
"args": ["--platform", "{platform}", "--out", "{providerDir}/accessibility.json"],
|
|
15
15
|
"outputs": [
|
|
@@ -5,11 +5,11 @@
|
|
|
5
5
|
"platforms": ["ios", "android"],
|
|
6
6
|
"capabilities": ["memory"],
|
|
7
7
|
"artifactOutputs": ["memory", "signals"],
|
|
8
|
-
"lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "finalize"],
|
|
8
|
+
"lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "afterCapture", "finalize"],
|
|
9
9
|
"providerCommands": [
|
|
10
10
|
{
|
|
11
11
|
"id": "capture-memory",
|
|
12
|
-
"phase": "
|
|
12
|
+
"phase": "afterCapture",
|
|
13
13
|
"command": "capture-memory",
|
|
14
14
|
"args": ["--platform", "{platform}", "--run-id", "{runId}", "--out", "{providerDir}/memory.json"],
|
|
15
15
|
"outputs": [
|
|
@@ -5,11 +5,11 @@
|
|
|
5
5
|
"platforms": ["ios", "android"],
|
|
6
6
|
"capabilities": ["network"],
|
|
7
7
|
"artifactOutputs": ["network", "signals"],
|
|
8
|
-
"lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "finalize"],
|
|
8
|
+
"lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "afterCapture", "finalize"],
|
|
9
9
|
"providerCommands": [
|
|
10
10
|
{
|
|
11
11
|
"id": "capture-network",
|
|
12
|
-
"phase": "
|
|
12
|
+
"phase": "afterCapture",
|
|
13
13
|
"command": "capture-network",
|
|
14
14
|
"args": ["--platform", "{platform}", "--scenario", "{scenarioId}", "--out", "{providerDir}/network.har"],
|
|
15
15
|
"outputs": [
|
|
@@ -6,11 +6,11 @@
|
|
|
6
6
|
"capabilities": ["profiler"],
|
|
7
7
|
"driverActions": ["collectPerfSignals"],
|
|
8
8
|
"artifactOutputs": ["profiler", "signals"],
|
|
9
|
-
"lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "finalize"],
|
|
9
|
+
"lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "afterCapture", "finalize"],
|
|
10
10
|
"providerCommands": [
|
|
11
11
|
{
|
|
12
12
|
"id": "capture-profiler",
|
|
13
|
-
"phase": "
|
|
13
|
+
"phase": "afterCapture",
|
|
14
14
|
"command": "capture-profiler",
|
|
15
15
|
"args": ["--platform", "{platform}", "--run-id", "{runId}", "--out", "{providerDir}/profiler.json"],
|
|
16
16
|
"outputs": [
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agent-scenario-loop",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.5",
|
|
4
4
|
"private": false,
|
|
5
5
|
"description": "Scenario orchestration and evidence collection for agent-driven software development. Bring your own runner. Keep your scenarios. Keep your evidence.",
|
|
6
6
|
"license": "MIT",
|
|
@@ -59,6 +59,12 @@
|
|
|
59
59
|
"require": "./dist/index.js",
|
|
60
60
|
"default": "./dist/index.js"
|
|
61
61
|
},
|
|
62
|
+
"./app/profile-session": {
|
|
63
|
+
"types": "./app/profile-session.ts",
|
|
64
|
+
"import": "./app/profile-session.ts",
|
|
65
|
+
"require": "./app/profile-session.ts",
|
|
66
|
+
"default": "./app/profile-session.ts"
|
|
67
|
+
},
|
|
62
68
|
"./runner/android-adb": {
|
|
63
69
|
"types": "./dist/runner/android-adb.d.ts",
|
|
64
70
|
"import": "./dist/runner/android-adb.js",
|
|
@@ -201,7 +207,6 @@
|
|
|
201
207
|
"core/config-template.json",
|
|
202
208
|
"dist",
|
|
203
209
|
"!dist/**/__tests__",
|
|
204
|
-
"!dist/scripts",
|
|
205
210
|
"docs",
|
|
206
211
|
"examples",
|
|
207
212
|
"!examples/mobile-app/.expo",
|
|
@@ -224,9 +229,10 @@
|
|
|
224
229
|
"clean": "node -e \"require('node:fs').rmSync('dist',{recursive:true,force:true})\"",
|
|
225
230
|
"prepack": "npm run clean && npm run build",
|
|
226
231
|
"consumer:rehearse": "pnpm build && node dist/scripts/consumer-rehearsal.js",
|
|
232
|
+
"downstream:local-package": "pnpm build && node dist/scripts/downstream-local-package-gate.js",
|
|
227
233
|
"package:smoke": "pnpm build && node dist/scripts/package-smoke.js",
|
|
228
234
|
"prepublishOnly": "pnpm release:check",
|
|
229
|
-
"release:check": "pnpm
|
|
235
|
+
"release:check": "pnpm build && node dist/scripts/release-check.js",
|
|
230
236
|
"release:readiness": "pnpm build && node dist/scripts/release-readiness.js",
|
|
231
237
|
"typecheck": "tsc -p tsconfig.json --noEmit",
|
|
232
238
|
"test": "pnpm clean && pnpm typecheck && pnpm exec tsc -p tsconfig.json && node --test dist/core/__tests__/*.test.js dist/runner/__tests__/*.test.js",
|
|
@@ -280,8 +286,10 @@
|
|
|
280
286
|
"profile:ios:open-close": "pnpm build && node dist/runner/profile-ios.js --config core/config-template.json --scenario examples/scenarios/ios/open-close-cycle.json",
|
|
281
287
|
"validate-project": "pnpm build && node dist/runner/validate-project.js"
|
|
282
288
|
},
|
|
289
|
+
"dependencies": {
|
|
290
|
+
"@types/node": "^25.9.3"
|
|
291
|
+
},
|
|
283
292
|
"devDependencies": {
|
|
284
|
-
"@types/node": "^25.9.3",
|
|
285
293
|
"typescript": "^6.0.3"
|
|
286
294
|
}
|
|
287
295
|
}
|
|
@@ -363,7 +363,7 @@
|
|
|
363
363
|
"artifacts": {
|
|
364
364
|
"type": "object",
|
|
365
365
|
"additionalProperties": false,
|
|
366
|
-
"required": ["causalRun", "budgetVerdict", "manifest", "metrics", "summary", "scenario", "raw", "captures", "signals"],
|
|
366
|
+
"required": ["causalRun", "budgetVerdict", "manifest", "metrics", "summary", "scenario", "raw", "captures", "signals", "diagnostics"],
|
|
367
367
|
"properties": {
|
|
368
368
|
"causalRun": {
|
|
369
369
|
"type": "string"
|
|
@@ -386,20 +386,21 @@
|
|
|
386
386
|
"raw": {
|
|
387
387
|
"type": "object",
|
|
388
388
|
"additionalProperties": false,
|
|
389
|
-
"required": ["interactionLog", "deviceLog"],
|
|
390
389
|
"properties": {
|
|
391
390
|
"interactionLog": {
|
|
392
391
|
"type": "string"
|
|
393
392
|
},
|
|
394
393
|
"deviceLog": {
|
|
395
394
|
"type": "string"
|
|
395
|
+
},
|
|
396
|
+
"profileSessionEntries": {
|
|
397
|
+
"type": "string"
|
|
396
398
|
}
|
|
397
399
|
}
|
|
398
400
|
},
|
|
399
401
|
"captures": {
|
|
400
402
|
"type": "object",
|
|
401
403
|
"additionalProperties": false,
|
|
402
|
-
"required": ["video", "uiTree"],
|
|
403
404
|
"properties": {
|
|
404
405
|
"video": {
|
|
405
406
|
"type": "string"
|
|
@@ -415,6 +416,75 @@
|
|
|
415
416
|
}
|
|
416
417
|
}
|
|
417
418
|
},
|
|
419
|
+
"diagnostics": {
|
|
420
|
+
"type": "array",
|
|
421
|
+
"minItems": 1,
|
|
422
|
+
"items": {
|
|
423
|
+
"type": "object",
|
|
424
|
+
"additionalProperties": false,
|
|
425
|
+
"required": ["kind", "status", "required"],
|
|
426
|
+
"properties": {
|
|
427
|
+
"kind": {
|
|
428
|
+
"type": "string",
|
|
429
|
+
"enum": ["accessibility", "js", "logs", "memory", "network", "profiler", "screenshot", "uiTree", "video"]
|
|
430
|
+
},
|
|
431
|
+
"status": {
|
|
432
|
+
"type": "string",
|
|
433
|
+
"enum": ["captured", "not_requested", "not_supported", "unavailable", "failed", "skipped", "missing"]
|
|
434
|
+
},
|
|
435
|
+
"required": {
|
|
436
|
+
"type": "boolean"
|
|
437
|
+
},
|
|
438
|
+
"name": {
|
|
439
|
+
"type": "string",
|
|
440
|
+
"minLength": 1
|
|
441
|
+
},
|
|
442
|
+
"provider": {
|
|
443
|
+
"type": "string",
|
|
444
|
+
"minLength": 1
|
|
445
|
+
},
|
|
446
|
+
"runnerId": {
|
|
447
|
+
"type": "string",
|
|
448
|
+
"minLength": 1
|
|
449
|
+
},
|
|
450
|
+
"path": {
|
|
451
|
+
"type": "string",
|
|
452
|
+
"minLength": 1
|
|
453
|
+
},
|
|
454
|
+
"reason": {
|
|
455
|
+
"type": "string",
|
|
456
|
+
"minLength": 1
|
|
457
|
+
},
|
|
458
|
+
"nextAction": {
|
|
459
|
+
"type": "string",
|
|
460
|
+
"minLength": 1
|
|
461
|
+
},
|
|
462
|
+
"sidecarRoot": {
|
|
463
|
+
"type": "string",
|
|
464
|
+
"minLength": 1
|
|
465
|
+
},
|
|
466
|
+
"evidenceDependency": {
|
|
467
|
+
"type": "object",
|
|
468
|
+
"additionalProperties": false,
|
|
469
|
+
"required": ["kind", "path"],
|
|
470
|
+
"properties": {
|
|
471
|
+
"kind": {
|
|
472
|
+
"type": "string",
|
|
473
|
+
"minLength": 1
|
|
474
|
+
},
|
|
475
|
+
"root": {
|
|
476
|
+
"type": "string",
|
|
477
|
+
"enum": ["run", "sidecar"]
|
|
478
|
+
},
|
|
479
|
+
"path": {
|
|
480
|
+
"type": "string",
|
|
481
|
+
"minLength": 1
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
},
|
|
418
488
|
"signals": {
|
|
419
489
|
"type": "object",
|
|
420
490
|
"additionalProperties": false,
|