npm - agent-scenario-loop - Versions diffs - 0.1.2 → 0.1.4 - Mend

agent-scenario-loop 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

package/README.md +9 -9
package/app/profile-session.ts +352 -12
package/dist/core/agent-summary.d.ts +3 -2
package/dist/core/agent-summary.js +44 -2
package/dist/core/artifact-contract.d.ts +28 -8
package/dist/core/artifact-contract.js +676 -26
package/dist/core/comparison.d.ts +57 -3
package/dist/core/comparison.js +113 -1
package/dist/core/planner.d.ts +32 -1
package/dist/core/planner.js +144 -0
package/dist/core/run-index.d.ts +4 -0
package/dist/core/run-index.js +55 -1
package/dist/core/schema-validator.d.ts +2 -0
package/dist/core/schema-validator.js +2 -0
package/dist/runner/android-adb-driver.d.ts +7 -2
package/dist/runner/android-adb-driver.js +7 -1
package/dist/runner/android-adb.d.ts +40 -5
package/dist/runner/android-adb.js +1046 -664
package/dist/runner/compare-latest.d.ts +8 -4
package/dist/runner/compare-latest.js +24 -5
package/dist/runner/example-android-live.d.ts +10 -1
package/dist/runner/example-android-live.js +55 -0
package/dist/runner/example-ios-live.d.ts +10 -1
package/dist/runner/example-ios-live.js +55 -0
package/dist/runner/ios-simctl.d.ts +6 -0
package/dist/runner/ios-simctl.js +7 -0
package/dist/runner/live-comparison.d.ts +2 -2
package/dist/runner/live-comparison.js +2 -1
package/dist/runner/live-proof-summary.d.ts +5 -4
package/dist/runner/live-proof-summary.js +12 -2
package/dist/runner/live-proof.d.ts +3 -2
package/dist/runner/live-proof.js +9 -2
package/dist/runner/profile-android.d.ts +16 -1
package/dist/runner/profile-android.js +364 -26
package/dist/runner/profile-ios.d.ts +13 -2
package/dist/runner/profile-ios.js +341 -19
package/dist/runner/profile-mobile.d.ts +39 -3
package/dist/runner/profile-mobile.js +1054 -42
package/dist/runner/validate-project.js +3 -0
package/dist/scripts/consumer-rehearsal.d.ts +119 -0
package/dist/scripts/consumer-rehearsal.js +757 -0
package/dist/scripts/downstream-local-package-gate.d.ts +2 -0
package/dist/scripts/downstream-local-package-gate.js +264 -0
package/dist/scripts/package-smoke.d.ts +96 -0
package/dist/scripts/package-smoke.js +2282 -0
package/dist/scripts/release-readiness.d.ts +2 -0
package/dist/scripts/release-readiness.js +520 -0
package/docs/adapters.md +7 -1
package/docs/api.md +2 -2
package/docs/architecture.md +90 -0
package/docs/authoring.md +39 -3
package/docs/concepts.md +3 -24
package/docs/consumer-rehearsal.md +31 -1
package/docs/contracts.md +45 -101
package/docs/external-adapter-protocol.md +219 -0
package/docs/live-proofs.md +86 -3
package/docs/principles.md +9 -15
package/examples/mobile-app/README.md +12 -0
package/examples/mobile-app/runner-manifests/evidence-provider.json +3 -3
package/examples/mobile-app/runner-manifests/primary-runner.json +1 -0
package/examples/mobile-app/scripts/asl-capture-profiler-provider.mjs +25 -0
package/examples/runners/README.md +4 -3
package/examples/runners/adb-android.json +1 -0
package/examples/runners/agent-device-android.json +1 -0
package/examples/runners/agent-device-ios.json +1 -0
package/examples/runners/argent-android.json +1 -0
package/examples/runners/argent-ios.json +1 -0
package/examples/runners/axe-accessibility-provider.json +2 -2
package/examples/runners/script-accessibility-provider.json +2 -2
package/examples/runners/script-memory-provider.json +2 -2
package/examples/runners/script-network-provider.json +2 -2
package/examples/runners/script-profiler-provider.json +2 -2
package/examples/runners/xcodebuildmcp-ios.json +1 -0
package/package.json +12 -3
package/schemas/causal-run.schema.json +85 -2
package/schemas/comparison.schema.json +130 -2
package/schemas/external-adapter-message.schema.json +693 -0
package/schemas/health.schema.json +72 -0
package/schemas/live-proof-set.schema.json +1 -1
package/schemas/live-proof.schema.json +14 -6
package/schemas/manifest.schema.json +515 -4
package/schemas/profiler.schema.json +243 -0
package/schemas/runner-capabilities.schema.json +28 -2
package/schemas/scenario.schema.json +34 -2
package/templates/evidence-provider.json +3 -3
package/templates/primary-runner.json +1 -0
package/templates/scripts/asl-capture-profiler-provider.mjs +20 -0

package/docs/live-proofs.md CHANGED Viewed

@@ -14,6 +14,27 @@ pnpm demo:loop -- --out artifacts/demo-loop
 The command runs preflight, profiles baseline/current event logs, writes run artifacts, compares the current run against the latest trusted prior run, and refreshes the current run's `agent-summary.md`.
+It writes:
+- `preflight/app-startup/health.json`
+- `preflight/app-startup/verdict.json`
+- `preflight/app-startup/agent-summary.md`
+- `profile-runs/app-startup/demo-baseline/*`
+- `profile-runs/app-startup/demo-current/*`
+- `profile-runs/app-startup/demo-current/comparison.json`
+This is not a replacement for live device proof. It is a stable contract check that keeps the evidence loop reproducible through trusted prior-run selection while iOS or Android runtime setup is unavailable.
+## Plan Check
+Use `check-plan` to validate a scenario, runner manifest, and optional evidence-provider manifests before execution:
+```bash
+pnpm check-plan -- --scenario examples/scenarios/mobile/app-startup.json --runner examples/runners/xcodebuildmcp-ios.json --platform ios --out artifacts/plan/app-startup
+```
+This validates the input manifests, writes schema-checked `health.json` and `verdict.json`, writes `agent-summary.md`, and includes the raw planner match in `planner-compatibility.json`.
 ## Host/Device Access
 Keep deterministic validation and live device proof as separate execution lanes.
@@ -69,6 +90,48 @@ ASL_ARGENT_BIN=pnpm \
 The doctor composes the existing adb, simctl, agent-device, and Argent checks into one ASL artifact set. A failed doctor is environment evidence, not product evidence: fix the host access or command shape before starting scenario execution.
+## Platform Preflight and Profile Capture
+Use `android:preflight` to verify adb and connected-device readiness before adding live Android scenario execution:
+```bash
+pnpm android:preflight -- --package com.example.app --out artifacts/android-adb-preflight
+```
+The command writes `health.json`, `verdict.json`, `agent-summary.md`, `raw/adb-version.txt`, `raw/adb-devices.txt`, and `raw/android-metadata.json`. If adb, a connected online device, or an optional package check fails, health fails and the verdict remains `inconclusive`.
+Add `--capture-logcat --logcat-lines <count>` to write `raw/adb-logcat.txt` in the same artifact folder. Add `--react-native-debug-host <host:port>` with `--package <name>` for React Native development builds that need adb reverse plus the app `debug_http_host` preference before launch; the runner writes `raw/adb-react-native-reverse.txt` and `raw/adb-react-native-debug-host.txt`. Add `--clear-logcat --launch --wait-ms <ms>` with `--package <name>` to clear logs, launch the package, wait for a bounded capture window, and then collect logcat evidence. If requested capture-window setup or logcat capture fails, scenario health fails because timing and event evidence would be incomplete.
+Use captured logcat evidence directly with Android profiling:
+```bash
+pnpm profile:android -- --config core/config-template.json --scenario examples/mobile-app/scenarios/android/app-startup.json --adb-artifacts artifacts/android-adb-preflight --run-id android-run-1
+```
+Or let Android profiling own the adb capture window before it writes profile artifacts:
+```bash
+pnpm profile:android -- --config core/config-template.json --scenario examples/mobile-app/scenarios/android/app-startup.json --adb-capture --react-native-debug-host localhost:8097 --clear-logcat --launch --run-id android-run-1
+```
+Use `profile:ios --simctl-capture` when the example app or a consuming app is already installed on a booted simulator:
+```bash
+pnpm profile:ios -- --config core/config-template.json --scenario examples/mobile-app/scenarios/ios/app-startup.json --simctl-capture --profile-session --profile-session-storage --launch --run-id ios-run-1
+```
+The command writes a separate simctl capture folder under the selected output root, seeds the app-owned profile session into native AsyncStorage before launch, then collects stored app profile events after the capture window. Command scenarios seed the scenario command queue through the same storage contract before launch. Command envelopes preserve `commandId`, `sequence`, `queueId`, command pacing `waitMs`, and, for normalized execution-plan commands followed by a milestone wait, `waitForMilestone` plus `waitTimeoutMs`. Deep-link command transport uses the same envelope in query parameters. When `raw/ios-profile-events.log` exists, the iOS profile runner ingests that stored truth-event log; otherwise it falls back to `raw/ios-simctl-log.txt`.
+Profile manifests only list sidecar paths that were copied into the profile run or deliberately referenced as external sidecar evidence. If a simctl or adb capture folder is the real evidence source, `manifest.artifacts.diagnostics` records the diagnostic status plus `sidecarRoot`/`evidenceDependency` instead of inventing profile-root files such as `raw/device.log`, `captures/run.mp4`, or `captures/ui-tree.json`. Rehydrated runs may record `evidenceDependency.root: "sidecar"` with paths relative to `sidecarRoot`, so agents do not have to reason from long `../../` paths alone.
+When a scenario requests a screenshot, pass supported simulator screenshot options through the iOS capture command with `--screenshot-type`, `--screenshot-display`, or `--screenshot-mask`; ASL records the chosen options in capture metadata and the resulting path in `manifest.artifacts.captures.screenshots`.
+For profile-session capture on Android or iOS, omitting `--wait-ms` lets ASL derive the final evidence window from scenario execution waits and cycle count. On iOS, command-backed profile sessions use the expanded command queue, including setup commands, repeated cycle body commands, command pacing `waitMs`, milestone-gate `waitTimeoutMs`, and a conservative buffer. Explicit `--wait-ms` remains authoritative when a consuming app has a known startup or logging delay that the scenario cannot express.
+Scenario command targets live in `adapterOptions.iosSimctl.commands`, while the app handles them through `registerProfileCommandTargetHandler`. The iOS proof does not depend on unified logs carrying JavaScript console output; it depends on app-owned stored profile events.
+Attach independently produced provider evidence with `--signal <js|memory|network>:<path>` or `--capture <screenshot|video|uiTree>:<path>` so profile commands copy those files into stable run folders and inventory them in `manifest.artifacts.evidenceAttachments`.
 ## Generic Mobile Proof
 Use the generic live runners in a consuming app after `asl-init` has created `asl.config.json`, `scenarios/mobile/<id>.json`, and the `asl:*` package-script snippets:
@@ -134,9 +197,17 @@ pnpm example:android:live -- --run-suffix before-change
 pnpm example:android:live -- --run-suffix after-change
 ```
+After dependency, native-build, or scenario-contract changes, use `--seed-baseline` to capture a trusted same-cohort baseline immediately before the measured run. The seeded profiles use `*-baseline` run ids, must pass health and verdict, and stay in the same comparison lane:
+```bash
+pnpm example:android:live -- --run-suffix release-check --seed-baseline
+```
+When latest-trusted comparison sees slower single-run timing but both baseline and current remain inside their budgets, ASL reports `low_confidence` instead of `regressed`. Treat that as a repeat-or-sample signal, not proof of product regression.
 Read [Example Mobile App: Android Capture](../examples/mobile-app/README.md#android-capture) for Metro routing, adb permissions, individual scenario commands, selector behavior, and optional video capture.
-Expo dev-client Android shells may need an explicit Metro deep link after the native app launches. Put that local URL in ignored env state, for example `ASL_EXAMPLE_ANDROID_DEV_CLIENT_URL=asl-example://expo-development-client/?url=http%3A%2F%2F10.0.2.2%3A8097`, so Android profile capture opens the correct app session before profile-session deep links. When bundle load time is variable, also set `ASL_EXAMPLE_ANDROID_DEV_CLIENT_READY_PATTERN='Running "main"'` so the runner waits for bounded logcat readiness evidence before sending scenario links.
+Expo dev-client Android shells may need an explicit Metro deep link after the native app launches. Put that local URL in ignored env state, for example `ASL_EXAMPLE_ANDROID_DEV_CLIENT_URL=asl-example://expo-development-client/?url=http%3A%2F%2F10.0.2.2%3A8097`, so Android profile capture opens the correct app session before profile-session control. With storage-backed profile sessions, ASL waits for `Running "main"` by default before writing AsyncStorage. Set `ASL_EXAMPLE_ANDROID_DEV_CLIENT_READY_PATTERN` only when the app has a more precise readiness marker. If readiness fails, the runner reports an unhealthy startup gate and does not deliver stored commands or profile-session deep links.
 Apps using the ASL profile-session AsyncStorage bridge can opt into storage delivery with `--android-profile-session-storage`. The Android runner resolves the session `startedAt` from the selected device clock before writing AsyncStorage, so milestone timing stays device-relative instead of host-clock-relative. Override the default storage keys with `ASL_ANDROID_PROFILE_SESSION_STORAGE_KEY` and `ASL_ANDROID_PROFILE_COMMAND_STORAGE_KEY` only when adopting an existing app-owned bridge.
@@ -163,6 +234,14 @@ The root example live scripts pass `--compare-latest --fail-on-regression` by de
 pnpm example:ios:live -- --run-suffix after-change
 ```
+Use `--seed-baseline` for fresh release checks where no compatible trusted iOS baseline exists yet:
+```bash
+pnpm example:ios:live -- --run-suffix release-check --seed-baseline
+```
+The same `low_confidence` comparison policy applies to iOS seeded baselines, where simulator and dev-client startup timing can vary between adjacent runs while still satisfying product budgets.
 Expo dev-client iOS shells may need an explicit Metro deep link after the native app launches. Put that local URL in ignored env state, for example `ASL_EXAMPLE_IOS_DEV_CLIENT_URL=asl-example://expo-development-client/?url=http%3A%2F%2Flocalhost%3A8097`, so iOS profile capture opens the correct app session before collecting evidence.
 The default iOS live proof transport seeds profile-session control into simulator app storage. Use `--ios-profile-session-transport deeplink` when the app should receive profile-session start and command control through app URLs instead.
@@ -247,6 +326,10 @@ pnpm compare:latest \
 Scenario health must pass before timing or budget evidence can support an improvement or regression claim.
+The comparison gate is intentionally strict. If either run failed scenario health, or if the scenario ids do not match, the comparison is `inconclusive`. Numeric budget checks are compared only after that health gate passes. `comparison.json` includes `comparisonBasis` with the baseline/current run ids and run directories, giving agents artifact-local provenance instead of forcing them to infer it from folder names. It also includes `measurementPolicy`, which records the baseline selection mode, poisoning protections, valid sample counts, timing tolerance, and confidence level used for the comparison.
+The latest-trusted command excludes the exact current run directory from baseline selection. Baseline trust requires passed health and passed verdict. For attempt-aware artifacts, baseline trust also requires a clean first passed attempt, no retry lineage, no failed or partial cleanup, and no valid partial-artifact diagnostic fragments. Current runs must pass scenario health before the command will compare timing or budget evidence. If the current manifest declares `comparisonLane`, baseline selection is scoped to trusted prior runs with the same lane; if the current manifest has no lane, selection stays within unlabeled trusted prior runs. Profile manifests also include `scenarioHash`, a stable fingerprint of the normalized scenario contract. When the current run has that hash, latest-trusted selection only compares against trusted prior runs with the same hash; legacy runs without the hash remain comparable only to legacy current runs. This keeps proof modes such as plain live proof and live proof plus agent-device sidecar from comparing against each other, and it keeps migrated scenario definitions from poisoning before/after verdicts. Latest-trusted artifacts set `comparisonBasis.strategy` to `latest_trusted_prior`, record selection counts for inspected, trusted, trusted-prior, lane-comparable, and scenario-contract-comparable candidates, and mirror the active lane, scenario hash, and cohort hash inside `measurementPolicy.baselineSelection.poisoningProtection` when those filters are active.
 ## Release Gate
 Before publishing, run:
@@ -263,8 +346,8 @@ Package smoke and consumer rehearsal keep child commands bounded so package-mana
 ASL_PACKAGE_GATE_TIMEOUT_MS=300000 pnpm release:check
 ```
-Read next:
+## Side References
-- [Contracts](contracts.md) for artifact layout and supported runner surface
 - [Consumer App Rehearsal](consumer-rehearsal.md) for adoption inside an existing app
 - [examples/mobile-app](../examples/mobile-app/README.md) for detailed dogfood app commands
+- [Public API](api.md) for package imports and programmable runner composition

package/docs/principles.md CHANGED Viewed

@@ -1,28 +1,24 @@
 # Principles
-`agent-scenario-loop` is a scenario orchestration and evidence collection layer for agent-driven software development.
+`agent-scenario-loop` has one durable claim: scenarios, contracts, and evidence must outlive the current runner.
-Read this after [Concepts](concepts.md) if you want the project doctrine in a compact form.
-The durable value is not any one runner. The durable value is a stable scenario and evidence contract that survives runner changes.
-It is not another agent runner. It is the layer that coordinates runners, preserves evidence, and keeps scenarios useful as tooling changes.
-Scenarios are long-lived project assets. They describe important application behaviors, not the temporary mechanics of the current runner.
+Read this after [Concepts](concepts.md). Concepts explains the model; this page is the compressed doctrine.
 ## Four planes
+ASL separates mobile proof into four planes. Mixing them is the usual source of flaky claims.
 1. Control plane
-Use semantic app commands, deep links, and deterministic hooks before falling back to raw UI replay.
+Use semantic app commands, deep links, and deterministic hooks to start and steer the scenario. Raw UI replay is a realism check, not the preferred control architecture.
 2. Truth plane
-Use explicit profile events, stored signals, route state, and committed artifacts as the source of truth.
+Use app-owned truth events, stored signals, route state, and committed artifacts as the source of what happened.
 3. Evidence plane
-Preserve logs, screenshots, videos, profiler exports, memory captures, network captures, UI trees, metrics, and verdicts in one stable artifact layout.
+Preserve logs, screenshots, videos, profiler exports, memory captures, network captures, UI trees, metrics, verdicts, comparisons, and summaries in one stable artifact layout.
 4. Realism plane
-Use taps, swipes, and full UI interaction for realism checks and last-mile validation, not as the primary control architecture.
+Use taps, swipes, alerts, full UI interaction, and external device tools to prove the app still behaves under real interaction pressure.
 ## Invariants
@@ -41,6 +37,4 @@ Use taps, swipes, and full UI interaction for realism checks and last-mile valid
 ## Read next
-- [Contracts](contracts.md) for the current artifact and package surface
-- [Runner docs](../runner/README.md) for the host execution boundary
-- [README](../README.md) for the project entrypoint
+- [Architecture](architecture.md) for the TypeScript-first, language-neutral contract boundary

package/examples/mobile-app/README.md CHANGED Viewed

@@ -233,6 +233,12 @@ pnpm example:android:live -- --run-suffix before-change
 pnpm example:android:live -- --run-suffix after-change
 ```
+When a release, native build, or scenario edit changes the compatible cohort, seed a fresh trusted baseline in the same command:
+```bash
+pnpm example:android:live -- --run-suffix release-check --seed-baseline
+```
 The individual live commands remain useful while debugging one scenario:
 ```bash
@@ -289,6 +295,12 @@ The root example live scripts pass `--compare-latest --fail-on-regression` by de
 pnpm example:ios:live -- --run-suffix after-change
 ```
+When there is no compatible trusted iOS baseline for the current release cohort, seed one before the measured run:
+```bash
+pnpm example:ios:live -- --run-suffix release-check --seed-baseline
+```
 If global `xcode-select` points at a beta Xcode whose simulator services are not ready, set `ASL_EXAMPLE_XCODE_DEVELOPER_DIR` before the Node runner starts:
 ```bash

package/examples/mobile-app/runner-manifests/evidence-provider.json CHANGED Viewed

@@ -6,11 +6,11 @@
   "capabilities": ["accessibility", "memory", "network", "profiler"],
   "driverActions": ["collectPerfSignals"],
   "artifactOutputs": ["accessibility", "memory", "network", "profiler", "signals"],
-  "lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "finalize"],
+  "lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "afterCapture", "finalize"],
   "providerCommands": [
     {
       "id": "capture-accessibility",
-      "phase": "capture",
+      "phase": "afterCapture",
       "command": "node",
       "cwd": "..",
       "args": [
@@ -34,7 +34,7 @@
     },
     {
       "id": "capture-profiler",
-      "phase": "capture",
+      "phase": "afterCapture",
       "command": "node",
       "cwd": "..",
       "args": [

package/examples/mobile-app/runner-manifests/primary-runner.json CHANGED Viewed

@@ -6,6 +6,7 @@
   "capabilities": ["launch", "sessionControl", "command", "logCapture", "artifactWrite"],
   "driverActions": ["tap", "scroll", "assertVisible", "inspectTree", "screenshot", "record", "readLogs"],
   "artifactOutputs": ["logs", "signals", "screenshot", "video", "uiTree"],
+  "uiContexts": ["app"],
   "lifecycle": [
     "prepare",
     "launch",

package/examples/mobile-app/scripts/asl-capture-profiler-provider.mjs CHANGED Viewed

@@ -80,6 +80,31 @@ function writeProviderEvidence({
   writeJsonArtifact(outPath, {
     ...shared,
+    captureMode: 'afterCapture',
+    completenessStatus: 'complete',
+    comparability: {
+      status: 'diagnostic-only',
+      reason: 'Deterministic example evidence is collected after the profile window and is not used for timing budgets.',
+    },
+    dataClasses: ['cpu-samples'],
+    lifecycle: {
+      phase: 'afterCapture',
+      perturbsTiming: false,
+    },
+    metrics: {
+      commitCount: 0,
+      droppedFrameCount: 0,
+      jsLongTaskCount: 0,
+    },
+    profileKind: 'diagnostic-summary',
+    targetBinding: {
+      status: 'verified',
+      source: 'example-provider-fixture',
+    },
+    tool: {
+      name: 'agent-scenario-loop deterministic profiler',
+      version: '1.0.0',
+    },
     samples: [],
     summary: 'Deterministic example profiler evidence for package and consumer rehearsal.',
   });

package/examples/runners/README.md CHANGED Viewed

@@ -32,13 +32,14 @@ They do not mean the package bundles every named tool. A fixture describes what
 - Keep `capabilities` about lifecycle or evidence ownership.
 - Keep `driverActions` about concrete operations the adapter can perform.
+- Keep `uiContexts` about the surface the adapter can own; do not use `app` proof for system dialogs, share sheets, external browsers, WebViews, pickers, notifications, or another app unless the manifest explicitly declares that context.
 - Do not add a capability or driver action until a runner or provider can produce the corresponding evidence.
-- Keep `providerCommands` on evidence-provider manifests; primary runners should own lifecycle orchestration, not provider command wrappers.
+- Keep `providerCommands` on evidence-provider manifests; primary runners should own lifecycle orchestration, not provider command wrappers. Prefer `phase: "afterCapture"` for diagnostics that inspect an already captured adb/simctl sidecar.
 - When a tool writes files independently, attach them through `--signal`, `--capture`, or a `providerCommands` manifest so the run keeps stable artifact paths.
 - Treat these manifests as starting contracts; consuming apps can narrow them to match the exact adapter they install.
 ## Tool Surface Notes
-The bundled `agent-device` driver adapter and `asl-agent-device` capture runner map the declared portable subset: app open/close, alert inspection, `tap`, `scroll`, `assertVisible`, `inspectTree`, `screenshot`, and `readLogs`. Planner compatibility validates the agent-device target metadata that must be known before runtime: `tap` needs a selector, `adapterOptions.agentDevice.ref`, or `adapterOptions.agentDevice.x/y`; `assertVisible` needs a portable selector; selector matching must be exact until the adapter maps richer match modes. The agent-device CLI may expose more commands than the fixture declares, including recording, performance, network, trace, batch, and React DevTools operations. Keep those out of the primary runner manifest until an adapter maps them into stable Agent Scenario Loop artifacts. For example, performance or React DevTools output should usually start as an evidence provider or signal attachment, while `record` should only be declared once video capture is wired into `captures.video`.
+The bundled `agent-device` driver adapter and `asl-agent-device` capture runner map the declared portable subset: app open/close, alert inspection, `tap`, `scroll`, `assertVisible`, `inspectTree`, `screenshot`, and `readLogs`. Planner compatibility validates the agent-device target metadata that must be known before runtime: `tap` needs a selector, `adapterOptions.agentDevice.ref`, or `adapterOptions.agentDevice.x/y`; `assertVisible` needs a portable selector; selector matching must be exact until the adapter maps richer match modes. The agent-device CLI may expose more commands than the fixture declares, including recording, performance, network, trace, batch, and React DevTools operations. Keep those out of the primary runner manifest until an adapter maps them into stable Agent Scenario Loop artifacts. For example, Android snapshots, network dumps, and performance evidence can be attached through a provider once the project proves those commands on its devices; React DevTools, traces, and recording should stay in explicit heavy lanes until their outputs are stable ASL artifacts.
-The Argent fixtures are external-tool contracts, not bundled package dependencies. `@swmansion/argent` exposes a local MCP/CLI surface for iOS Simulator and Android Emulator control, so Agent Scenario Loop should keep two lanes distinct when an app adopts it: a primary interaction adapter for launch, gestures, screenshot requests, and UI descriptions, and a provider lane for profiler output such as React commit or CPU summaries. Android can pair fast adb interaction with an Argent profiler provider so profiling startup cost does not slow every tap or scroll. iOS adapters should treat native-devtools disconnects, restart-required statuses, required screenshot failures, and root-only UI descriptions as failed scenario health, because timing budgets are not trustworthy when required UI evidence is unverifiable. Optional screenshot failures should stay visible as warnings. When Argent can prove launch and accessibility but its iOS screenshot backend is unavailable, ASL may attach simctl as a screenshot fallback provider while keeping the Argent warning in health. Argent output files should enter ASL through `raw/`, `captures/`, `signals/js`, or provider-command attachments with stable manifest inventory; do not create Argent-specific top-level artifact folders.
+The Argent fixtures are external-tool contracts, not bundled package dependencies. `@swmansion/argent` exposes a local MCP/CLI surface for iOS Simulator and Android Emulator control, so Agent Scenario Loop should keep two lanes distinct when an app adopts it: a primary interaction adapter for launch, gestures, screenshot requests, and UI descriptions, and a provider lane for profiler output such as React commit or CPU summaries. Android can pair fast adb interaction with an Argent profiler provider so profiling startup cost does not slow every tap or scroll. iOS can use Argent `describe` as AXRuntime accessibility evidence when that command is reliable for the selected simulator and bundle; treat native UIKit hierarchy restart requirements as a separate unsupported or heavy diagnostic until the project can capture them consistently. iOS adapters should treat native-devtools disconnects, restart-required statuses, required screenshot failures, and root-only UI descriptions as failed scenario health, because timing budgets are not trustworthy when required UI evidence is unverifiable. Optional screenshot failures should stay visible as warnings. When Argent can prove launch and accessibility but its iOS screenshot backend is unavailable, ASL may attach simctl as a screenshot fallback provider while keeping the Argent warning in health. Argent output files should enter ASL through `raw/`, `captures/`, `signals/js`, or provider-command attachments with stable manifest inventory; do not create Argent-specific top-level artifact folders. React profiler CPU summaries are lifecycle evidence when they require a prior start/stop session. Provider output should preserve target-binding proof, raw profile attachments, derived summaries, and diagnostic-only/comparable status instead of treating those summaries as passive snapshots.

package/examples/runners/adb-android.json CHANGED Viewed

@@ -12,6 +12,7 @@
   ],
   "driverActions": ["tap", "scroll", "assertVisible", "inspectTree", "screenshot", "record", "readLogs"],
   "artifactOutputs": ["logs", "signals", "screenshot", "video", "uiTree"],
+  "uiContexts": ["app"],
   "lifecycle": [
     "prepare",
     "launch",

package/examples/runners/agent-device-android.json CHANGED Viewed

@@ -14,6 +14,7 @@
   ],
   "driverActions": ["tap", "scroll", "assertVisible", "inspectTree", "screenshot", "readLogs"],
   "artifactOutputs": ["logs", "signals", "screenshot", "uiTree"],
+  "uiContexts": ["app"],
   "lifecycle": [
     "prepare",
     "launch",

package/examples/runners/agent-device-ios.json CHANGED Viewed

@@ -14,6 +14,7 @@
   ],
   "driverActions": ["tap", "scroll", "assertVisible", "inspectTree", "screenshot", "readLogs"],
   "artifactOutputs": ["logs", "signals", "screenshot", "uiTree"],
+  "uiContexts": ["app"],
   "lifecycle": [
     "prepare",
     "launch",

package/examples/runners/argent-android.json CHANGED Viewed

@@ -14,6 +14,7 @@
   ],
   "driverActions": ["tap", "scroll", "assertVisible", "inspectTree", "screenshot"],
   "artifactOutputs": ["logs", "signals", "screenshot", "uiTree"],
+  "uiContexts": ["app"],
   "lifecycle": [
     "prepare",
     "launch",

package/examples/runners/argent-ios.json CHANGED Viewed

@@ -14,6 +14,7 @@
   ],
   "driverActions": ["tap", "scroll", "assertVisible", "inspectTree", "screenshot"],
   "artifactOutputs": ["logs", "signals", "screenshot", "uiTree"],
+  "uiContexts": ["app"],
   "lifecycle": [
     "prepare",
     "launch",

package/examples/runners/axe-accessibility-provider.json CHANGED Viewed

@@ -5,11 +5,11 @@
   "platforms": ["ios", "android"],
   "capabilities": ["accessibility"],
   "artifactOutputs": ["accessibility"],
-  "lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "finalize"],
+  "lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "afterCapture", "finalize"],
   "providerCommands": [
     {
       "id": "capture-accessibility",
-      "phase": "capture",
+      "phase": "afterCapture",
       "command": "axe",
       "args": ["--output", "{providerDir}/accessibility.json"],
       "outputs": [

package/examples/runners/script-accessibility-provider.json CHANGED Viewed

@@ -5,11 +5,11 @@
   "platforms": ["ios", "android"],
   "capabilities": ["accessibility"],
   "artifactOutputs": ["accessibility"],
-  "lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "finalize"],
+  "lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "afterCapture", "finalize"],
   "providerCommands": [
     {
       "id": "capture-accessibility",
-      "phase": "capture",
+      "phase": "afterCapture",
       "command": "capture-accessibility",
       "args": ["--platform", "{platform}", "--out", "{providerDir}/accessibility.json"],
       "outputs": [

package/examples/runners/script-memory-provider.json CHANGED Viewed

@@ -5,11 +5,11 @@
   "platforms": ["ios", "android"],
   "capabilities": ["memory"],
   "artifactOutputs": ["memory", "signals"],
-  "lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "finalize"],
+  "lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "afterCapture", "finalize"],
   "providerCommands": [
     {
       "id": "capture-memory",
-      "phase": "capture",
+      "phase": "afterCapture",
       "command": "capture-memory",
       "args": ["--platform", "{platform}", "--run-id", "{runId}", "--out", "{providerDir}/memory.json"],
       "outputs": [

package/examples/runners/script-network-provider.json CHANGED Viewed

@@ -5,11 +5,11 @@
   "platforms": ["ios", "android"],
   "capabilities": ["network"],
   "artifactOutputs": ["network", "signals"],
-  "lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "finalize"],
+  "lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "afterCapture", "finalize"],
   "providerCommands": [
     {
       "id": "capture-network",
-      "phase": "capture",
+      "phase": "afterCapture",
       "command": "capture-network",
       "args": ["--platform", "{platform}", "--scenario", "{scenarioId}", "--out", "{providerDir}/network.har"],
       "outputs": [

package/examples/runners/script-profiler-provider.json CHANGED Viewed

@@ -6,11 +6,11 @@
   "capabilities": ["profiler"],
   "driverActions": ["collectPerfSignals"],
   "artifactOutputs": ["profiler", "signals"],
-  "lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "finalize"],
+  "lifecycle": ["prepare", "startWindow", "capture", "stopWindow", "afterCapture", "finalize"],
   "providerCommands": [
     {
       "id": "capture-profiler",
-      "phase": "capture",
+      "phase": "afterCapture",
       "command": "capture-profiler",
       "args": ["--platform", "{platform}", "--run-id", "{runId}", "--out", "{providerDir}/profiler.json"],
       "outputs": [

package/examples/runners/xcodebuildmcp-ios.json CHANGED Viewed

@@ -16,6 +16,7 @@
   ],
   "driverActions": ["tap", "scroll", "assertVisible", "inspectTree", "screenshot", "record", "readLogs"],
   "artifactOutputs": ["logs", "signals", "screenshot", "video", "uiTree", "accessibility"],
+  "uiContexts": ["app"],
   "lifecycle": [
     "prepare",
     "launch",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "agent-scenario-loop",
-  "version": "0.1.2",
+  "version": "0.1.4",
   "private": false,
   "description": "Scenario orchestration and evidence collection for agent-driven software development. Bring your own runner. Keep your scenarios. Keep your evidence.",
   "license": "MIT",
@@ -59,6 +59,12 @@
       "require": "./dist/index.js",
       "default": "./dist/index.js"
     },
+    "./app/profile-session": {
+      "types": "./app/profile-session.ts",
+      "import": "./app/profile-session.ts",
+      "require": "./app/profile-session.ts",
+      "default": "./app/profile-session.ts"
+    },
     "./runner/android-adb": {
       "types": "./dist/runner/android-adb.d.ts",
       "import": "./dist/runner/android-adb.js",
@@ -200,7 +206,7 @@
     "app/profile-session.ts",
     "core/config-template.json",
     "dist",
-    "!dist/scripts",
+    "!dist/**/__tests__",
     "docs",
     "examples",
     "!examples/mobile-app/.expo",
@@ -223,6 +229,7 @@
     "clean": "node -e \"require('node:fs').rmSync('dist',{recursive:true,force:true})\"",
     "prepack": "npm run clean && npm run build",
     "consumer:rehearse": "pnpm build && node dist/scripts/consumer-rehearsal.js",
+    "downstream:local-package": "pnpm build && node dist/scripts/downstream-local-package-gate.js",
     "package:smoke": "pnpm build && node dist/scripts/package-smoke.js",
     "prepublishOnly": "pnpm release:check",
     "release:check": "pnpm test && pnpm release:readiness && pnpm package:smoke && pnpm consumer:rehearse",
@@ -279,8 +286,10 @@
     "profile:ios:open-close": "pnpm build && node dist/runner/profile-ios.js --config core/config-template.json --scenario examples/scenarios/ios/open-close-cycle.json",
     "validate-project": "pnpm build && node dist/runner/validate-project.js"
   },
+  "dependencies": {
+    "@types/node": "^25.9.3"
+  },
   "devDependencies": {
-    "@types/node": "^25.9.3",
     "typescript": "^6.0.3"
   }
 }

package/schemas/causal-run.schema.json CHANGED Viewed

@@ -98,10 +98,26 @@
         }
       }
     },
+    "provenanceRef": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": ["manifest", "runId"],
+      "properties": {
+        "manifest": {
+          "type": "string"
+        },
+        "runId": {
+          "type": "string"
+        },
+        "scenarioHash": {
+          "type": "string",
+          "pattern": "^[a-f0-9]{64}$"
+        }
+      }
+    },
     "budgets": {
       "type": "object",
       "description": "Named budget thresholds the run should be evaluated against.",
-      "minProperties": 1,
       "additionalProperties": {
         "$ref": "#/$defs/budgetThreshold"
       }
@@ -112,6 +128,41 @@
         "$ref": "#/$defs/timelineEvent"
       }
     },
+    "iterationSummary": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": ["expected", "completed", "failed", "timeouts", "incomplete", "status"],
+      "properties": {
+        "expected": {
+          "type": "integer",
+          "minimum": 1
+        },
+        "completed": {
+          "type": "integer",
+          "minimum": 0
+        },
+        "failed": {
+          "type": "integer",
+          "minimum": 0
+        },
+        "timeouts": {
+          "type": "integer",
+          "minimum": 0
+        },
+        "incomplete": {
+          "type": "array",
+          "uniqueItems": true,
+          "items": {
+            "type": "integer",
+            "minimum": 1
+          }
+        },
+        "status": {
+          "type": "string",
+          "enum": ["complete", "partial", "failed", "timeout"]
+        }
+      }
+    },
     "artifacts": {
       "type": "object",
       "additionalProperties": false,
@@ -169,12 +220,31 @@
           "items": {
             "type": "object",
             "additionalProperties": false,
-            "required": ["channel", "kind", "path", "sha256", "sizeBytes", "sourceFileName"],
+            "required": [
+              "channel",
+              "completenessStatus",
+              "corruptionStatus",
+              "kind",
+              "path",
+              "redactionStatus",
+              "sha256",
+              "sizeBytes",
+              "sourceFileName",
+              "transformations"
+            ],
             "properties": {
               "channel": {
                 "type": "string",
                 "enum": ["capture", "provider", "signal"]
               },
+              "completenessStatus": {
+                "type": "string",
+                "enum": ["complete", "truncated", "unknown"]
+              },
+              "corruptionStatus": {
+                "type": "string",
+                "enum": ["valid", "corrupt", "unknown"]
+              },
               "kind": {
                 "type": "string",
                 "enum": ["accessibility", "js", "logs", "memory", "network", "profiler", "screenshot", "uiTree", "video"]
@@ -183,6 +253,10 @@
                 "type": "string",
                 "minLength": 1
               },
+              "redactionStatus": {
+                "type": "string",
+                "enum": ["not-redacted", "redacted", "unknown"]
+              },
               "sha256": {
                 "type": "string",
                 "pattern": "^[a-f0-9]{64}$"
@@ -194,6 +268,15 @@
               "sourceFileName": {
                 "type": "string",
                 "minLength": 1
+              },
+              "transformations": {
+                "type": "array",
+                "minItems": 1,
+                "uniqueItems": true,
+                "items": {
+                  "type": "string",
+                  "enum": ["copied", "normalized", "redacted", "truncated", "compressed", "transcoded", "unknown"]
+                }
               }
             }
           }