@openwop/openwop-conformance 1.6.1 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. package/CHANGELOG.md +10 -0
  2. package/README.md +2 -2
  3. package/api/asyncapi.yaml +57 -0
  4. package/api/openapi.yaml +250 -0
  5. package/coverage.md +14 -0
  6. package/fixtures/conformance-run-duration-breach.json +33 -0
  7. package/fixtures.md +19 -0
  8. package/package.json +1 -1
  9. package/schemas/README.md +10 -0
  10. package/schemas/agent-inventory-response.schema.json +90 -0
  11. package/schemas/ai-envelope.schema.json +28 -0
  12. package/schemas/artifact-type-pack-manifest.schema.json +160 -0
  13. package/schemas/capabilities.schema.json +171 -4
  14. package/schemas/chat-card-pack-manifest.schema.json +158 -0
  15. package/schemas/envelopes/media.audio.schema.json +38 -0
  16. package/schemas/envelopes/media.file.schema.json +37 -0
  17. package/schemas/envelopes/media.image.schema.json +33 -0
  18. package/schemas/heartbeat-evaluated.schema.json +14 -0
  19. package/schemas/heartbeat-state-changed.schema.json +14 -0
  20. package/schemas/node-pack-manifest.schema.json +16 -1
  21. package/schemas/run-event-payloads.schema.json +96 -5
  22. package/schemas/run-event.schema.json +4 -0
  23. package/schemas/workflow-definition.schema.json +5 -0
  24. package/schemas/workspace-file-create.schema.json +20 -0
  25. package/schemas/workspace-file.schema.json +39 -0
  26. package/src/lib/agentLoop.ts +44 -0
  27. package/src/lib/agentRuntime.ts +45 -0
  28. package/src/lib/artifactTypes.ts +96 -0
  29. package/src/lib/cardPacks.ts +52 -0
  30. package/src/lib/discovery-capabilities.ts +50 -0
  31. package/src/lib/distillation.ts +38 -0
  32. package/src/lib/feedback.ts +3 -3
  33. package/src/lib/heartbeat.ts +31 -0
  34. package/src/lib/memoryAttribution.ts +48 -0
  35. package/src/lib/subRunAttestation.ts +35 -0
  36. package/src/lib/toolHooks.ts +33 -0
  37. package/src/scenarios/agent-loop-iteration-monotonic.test.ts +33 -0
  38. package/src/scenarios/agent-loop-stateful-resume.test.ts +28 -0
  39. package/src/scenarios/agent-loop-version5-shape.test.ts +41 -0
  40. package/src/scenarios/agent-loop-workspace-snapshot.test.ts +33 -0
  41. package/src/scenarios/agent-manifest-runtime.test.ts +85 -0
  42. package/src/scenarios/ai-envelope-shape.test.ts +14 -18
  43. package/src/scenarios/aiEnvelope.capBreached.test.ts +2 -1
  44. package/src/scenarios/aiEnvelope.schemaDrift.test.ts +2 -1
  45. package/src/scenarios/aiEnvelope.universalKinds.test.ts +2 -1
  46. package/src/scenarios/approval-gate-flow.test.ts +4 -6
  47. package/src/scenarios/artifact-schema-compile-bounded.test.ts +126 -0
  48. package/src/scenarios/artifact-type-pack-install.test.ts +78 -0
  49. package/src/scenarios/artifact-type-pack-manifest-validation.test.ts +140 -0
  50. package/src/scenarios/artifact-type-store-without-render.test.ts +54 -0
  51. package/src/scenarios/audit-log-integrity.test.ts +3 -2
  52. package/src/scenarios/auth-api-key-rotation.test.ts +2 -1
  53. package/src/scenarios/auth-mtls.test.ts +2 -1
  54. package/src/scenarios/auth-oauth2-client-credentials.test.ts +2 -1
  55. package/src/scenarios/auth-oidc-user-bearer.test.ts +2 -1
  56. package/src/scenarios/auth-saml-profile.test.ts +2 -1
  57. package/src/scenarios/auth-scim-profile.test.ts +2 -1
  58. package/src/scenarios/authorization-fail-closed.test.ts +2 -1
  59. package/src/scenarios/authorization-roles-shape.test.ts +2 -1
  60. package/src/scenarios/byok-auth-modes.test.ts +141 -0
  61. package/src/scenarios/chat-card-pack-execution.test.ts +56 -0
  62. package/src/scenarios/chat-card-pack-manifest-validation.test.ts +128 -0
  63. package/src/scenarios/commitment-fired.test.ts +83 -0
  64. package/src/scenarios/credential-payload-redaction.test.ts +2 -1
  65. package/src/scenarios/credentials-capability-shape.test.ts +2 -1
  66. package/src/scenarios/cross-engine-append-ordering.test.ts +2 -1
  67. package/src/scenarios/cross-host-ancestry-endpoint.test.ts +3 -2
  68. package/src/scenarios/cross-host-causation-shape.test.ts +3 -2
  69. package/src/scenarios/deadletter-capability-shape.test.ts +2 -1
  70. package/src/scenarios/deadletter-retry-exhaustion.test.ts +2 -1
  71. package/src/scenarios/distillation-index-roundtrip.test.ts +35 -0
  72. package/src/scenarios/distillation-secret-carryforward.test.ts +35 -0
  73. package/src/scenarios/distillation-shape.test.ts +41 -0
  74. package/src/scenarios/distillation-stable-archive.test.ts +37 -0
  75. package/src/scenarios/distillation-token-budget.test.ts +45 -0
  76. package/src/scenarios/envelope-completion-distinguishes-truncation.test.ts +4 -3
  77. package/src/scenarios/envelope-reasoning-secret-redaction.test.ts +5 -4
  78. package/src/scenarios/envelope-reasoning-shape.test.ts +3 -2
  79. package/src/scenarios/envelope-refusal-shape.test.ts +3 -2
  80. package/src/scenarios/envelope-rendering-hint.test.ts +95 -0
  81. package/src/scenarios/envelope-retry-attempted.test.ts +2 -1
  82. package/src/scenarios/envelope-tier-one-subset-static.test.ts +3 -2
  83. package/src/scenarios/exec-not-protocol-tier.test.ts +137 -0
  84. package/src/scenarios/experimental-tier-shape.test.ts +5 -4
  85. package/src/scenarios/fs-path-traversal.test.ts +2 -1
  86. package/src/scenarios/heartbeat-capability-shape.test.ts +35 -0
  87. package/src/scenarios/heartbeat-fires-once-per-tick.test.ts +28 -0
  88. package/src/scenarios/heartbeat-idempotent-no-spam.test.ts +43 -0
  89. package/src/scenarios/heartbeat-runtime-bound.test.ts +30 -0
  90. package/src/scenarios/http-client-ssrf.test.ts +10 -13
  91. package/src/scenarios/mcp-toolcall-redaction.test.ts +3 -2
  92. package/src/scenarios/media-url-inline-cap.test.ts +167 -0
  93. package/src/scenarios/memory-attribution-emits-on-write.test.ts +54 -0
  94. package/src/scenarios/memory-attribution-no-content.test.ts +45 -0
  95. package/src/scenarios/memory-attribution-replay-stable.test.ts +60 -0
  96. package/src/scenarios/memory-attribution-shape.test.ts +28 -0
  97. package/src/scenarios/memory-attribution-tenant-scoped.test.ts +44 -0
  98. package/src/scenarios/memory-compaction-event-emitted.test.ts +2 -1
  99. package/src/scenarios/memory-compaction-provenance-tag.test.ts +2 -1
  100. package/src/scenarios/memory-compaction-sr1-carry-forward.test.ts +2 -1
  101. package/src/scenarios/memory-consolidation-idempotent.test.ts +77 -0
  102. package/src/scenarios/memory-consolidation-shape.test.ts +90 -0
  103. package/src/scenarios/model-capability-substituted.test.ts +2 -1
  104. package/src/scenarios/multi-agent-confidence-escalation.test.ts +5 -4
  105. package/src/scenarios/multi-agent-handoff-state-machine.test.ts +6 -5
  106. package/src/scenarios/multi-agent-memory-lifecycle.test.ts +4 -3
  107. package/src/scenarios/multi-region-idempotency.test.ts +10 -10
  108. package/src/scenarios/oauth-capability-shape.test.ts +2 -1
  109. package/src/scenarios/oauth-connector-redaction.test.ts +2 -1
  110. package/src/scenarios/pause-resume.test.ts +3 -3
  111. package/src/scenarios/production-backpressure.test.ts +2 -2
  112. package/src/scenarios/production-retention-expiry.test.ts +2 -2
  113. package/src/scenarios/prompt-all-four-kinds-events.test.ts +2 -1
  114. package/src/scenarios/prompt-composed-secret-redaction.test.ts +2 -1
  115. package/src/scenarios/prompt-composed-trust-marker.test.ts +2 -1
  116. package/src/scenarios/prompt-end-to-end-events.test.ts +2 -1
  117. package/src/scenarios/prompt-list-and-fetch.test.ts +2 -1
  118. package/src/scenarios/prompt-mutable-lifecycle.test.ts +2 -1
  119. package/src/scenarios/prompt-mutation-workspace-membership-enforced.test.ts +2 -1
  120. package/src/scenarios/prompt-pack-install.test.ts +2 -1
  121. package/src/scenarios/prompt-read-workspace-membership-enforced.test.ts +2 -1
  122. package/src/scenarios/prompt-render-deterministic.test.ts +2 -1
  123. package/src/scenarios/prompt-resolution-chain-agent-intrinsic.test.ts +2 -1
  124. package/src/scenarios/prompt-resolution-chain-fallback-cascade.test.ts +2 -1
  125. package/src/scenarios/prompt-resolution-chain-node-wins.test.ts +2 -1
  126. package/src/scenarios/prompt-template-shape.test.ts +2 -1
  127. package/src/scenarios/provider-usage.test.ts +2 -1
  128. package/src/scenarios/replay-divergence-at-refusal.test.ts +4 -3
  129. package/src/scenarios/replay-fork-arbitrary.test.ts +3 -1
  130. package/src/scenarios/replay-llm-cache-key-portable.test.ts +2 -1
  131. package/src/scenarios/replayDeterminism.test.ts +3 -1
  132. package/src/scenarios/run-execution-bounds-shape.test.ts +133 -0
  133. package/src/scenarios/sandbox-memory-cap.test.ts +2 -1
  134. package/src/scenarios/sandbox-mvp-behavior.test.ts +2 -1
  135. package/src/scenarios/sandbox-no-host-fs-escape.test.ts +2 -1
  136. package/src/scenarios/sandbox-timeout-cap.test.ts +2 -1
  137. package/src/scenarios/scheduling-capability-shape.test.ts +2 -1
  138. package/src/scenarios/scheduling-cron-fires-once.test.ts +2 -1
  139. package/src/scenarios/secret-leakage-otel-attribute.test.ts +7 -6
  140. package/src/scenarios/spec-corpus-validity.test.ts +1 -1
  141. package/src/scenarios/subrun-approval-fail-closed.test.ts +33 -0
  142. package/src/scenarios/subrun-approval-gate.test.ts +35 -0
  143. package/src/scenarios/subrun-attestation-shape.test.ts +30 -0
  144. package/src/scenarios/subrun-checksum-stable.test.ts +43 -0
  145. package/src/scenarios/tool-hooks-authorization-fail-closed.test.ts +39 -0
  146. package/src/scenarios/tool-hooks-content-free.test.ts +40 -0
  147. package/src/scenarios/tool-hooks-rate-limit.test.ts +32 -0
  148. package/src/scenarios/tool-hooks-secret-redaction.test.ts +34 -0
  149. package/src/scenarios/tool-hooks-shape.test.ts +34 -0
  150. package/src/scenarios/wasm-pack-abi-version-rejection.test.ts +3 -10
  151. package/src/scenarios/wasm-pack-invoke-completed.test.ts +2 -2
  152. package/src/scenarios/wasm-pack-invoke-suspended.test.ts +2 -2
  153. package/src/scenarios/wasm-pack-load.test.ts +2 -2
  154. package/src/scenarios/wasm-pack-memory-cap.test.ts +3 -6
  155. package/src/scenarios/wasm-pack-replay-determinism.test.ts +2 -2
  156. package/src/scenarios/workflow-primary-output-annotation.test.ts +142 -0
  157. package/src/scenarios/workspace-behavior.test.ts +134 -0
  158. package/src/scenarios/workspace-capability-shape.test.ts +73 -0
  159. package/src/scenarios/workspace-cross-tenant-isolation.test.ts +84 -0
package/CHANGELOG.md CHANGED
@@ -1,5 +1,15 @@
1
1
  # `@openwop/openwop-conformance` Changelog
2
2
 
3
+ ## [1.8.0] — 2026-05-26
4
+
5
+ Minor bump — ships the conformance scenarios for the **spec-gap Draft cohort (RFCs 0067 / 0068 / 0069)**. All additive: every behavioral scenario is capability-gated and soft-skips against a host that doesn't advertise the surface (or doesn't wire the seam), and the always-on scenarios are server-free schema/corpus assertions. Existing v1.0-only hosts pass unchanged. (1.7.0 was the lockstep version bumped in the v1.1.4 release without its own scenario delta; this entry resumes the scenario-delta narrative.)
6
+
7
+ ### Added — RFC 0067 / 0068 / 0069 scenarios
8
+
9
+ - **RFC 0067** (provider-catalog conventions) — `byok-auth-modes.test.ts` (always-on schema-shape of `aiProviders.authModes` + the four-mode enum; discovery-gated §B auth-mode-contract cross-field checks — every `authModes` key in `supported`, every `apiKey` provider in `byok`, every `["none"]` provider absent from `byok`, `oauth-*` providers aligned to `capabilities.oauth.providers[]`).
10
+ - **RFC 0068** (memory consolidation + standing commitments) — `memory-consolidation-shape.test.ts` (always-on; the `agents.memoryConsolidation`/`agents.commitments` capability blocks + the `agent.memory.consolidated`/`commitment.fired` payload $defs, incl. the negative that `commitment.fired` without `memoryRef` is rejected), `memory-consolidation-idempotent.test.ts` (gated on `agents.memoryConsolidation.supported`; §D `outputCount ≤ inputCount` + the no-op-second-pass idempotence MUST + SR-1 carry-forward, via the `/v1/host/sample/memory/consolidate` seam), `commitment-fired.test.ts` (gated on `agents.commitments.supported`; content-free `commitment.fired` with `memoryRef` provenance + fire-once + the no-intention-text assertion, via the `/v1/host/sample/commitment/fire` seam).
11
+ - **RFC 0069** (exec-class tool host-extension safety contract) — `exec-not-protocol-tier.test.ts` (always-on, server-free structural assertion that no `core.*`/`openwop.*` identifier, no `capabilities.schema.json` property, and no RunEventType denotes arbitrary command execution; positive control allows `vendor.*`/`x-host-*-exec`). Backs the new protocol-tier SECURITY invariant `exec-must-not-be-protocol-tier`.
12
+
3
13
  ## [1.6.1] — 2026-05-25
4
14
 
5
15
  Patch — fixes a stale allowlist in `redaction.test.ts` that contradicted the same release's `capabilities.schema.json`. Reported by MyndHyve against the 1.6.0 cohort run.
package/README.md CHANGED
@@ -93,7 +93,7 @@ Exit code is non-zero on any failed assertion.
93
93
 
94
94
  ## What's Covered
95
95
 
96
- The current suite has 238 scenario files under `src/scenarios/`. 2026-05-25 (RFC 0025 §C point 1 — test-catalog isolation invariant; pairs with the 25 publish-error scenarios in `pack-registry-publish.test.ts`) added `pack-registry-isolation.test.ts` — capability-gated on `capabilities.packs.testMode.{supported, isolated}: true`; PUTs a disposable pack into `/v1/packs-test/{name}` and asserts the same `(name, version)` does NOT appear via `GET /v1/packs/{name}` — anchors the test-catalog isolation MUST in RFC 0025 §C. 2026-05-25 (RFC 0028 Tier-2 post-promotion T2 — read-side sister scenario for workspace-membership enforcement) added `prompt-read-workspace-membership-enforced.test.ts` — gates on `capabilities.prompts.supported: true` (broader than `mutableLibrary` so read-only hosts that expose `?workspaceId=` are also probed); drives `GET /v1/prompts?workspaceId=<random-non-member>` and interprets the response: 4xx PASS (canonical envelope check on 403); 200 with empty `templates[]` PASS (correct null result for a nonexistent workspace); 200 with non-empty `templates[]` FAIL (cross-tenant leak); 200 without `templates[]` field SKIP (host doesn't expose workspace-scoped reads). Verifies SECURITY invariant `prompt-read-workspace-membership-enforced`. Same-day T1 strengthened `prompt-mutation-workspace-membership-enforced.test.ts` to pin `error === "workspace_membership_required"` when the host's refusal status is 403 (other refusal codes unconstrained). 2026-05-25 (RFC 0028 Tier-2 follow-up — workspace-membership enforcement on mutating prompt endpoints, filed in response to a self-disclosed adopter vulnerability) added `prompt-mutation-workspace-membership-enforced.test.ts` — capability-gated on `capabilities.prompts.mutableLibrary: true`; drives `POST /v1/prompts` with a cryptographically-random non-member `workspaceId` and asserts the host refuses (NOT a 2xx; any 4xx/5xx is acceptable — silent success is the failure mode). Verifies SECURITY invariant `prompt-mutation-workspace-membership-enforced`. 2026-05-22 (RFC 0034 §B follow-up — secret-leakage harness against the OTel + debug-bundle seams) added `secret-leakage-otel-attribute.test.ts` — gates on `capabilities.secrets.supported` + `capabilities.observability.testSeams.{otelScrape,debugBundleExport}` AND the `OPENWOP_CANARY_SECRET_VALUE` env (host operator + conformance runner agree on the canary). Drives the existing `openwop-smoke-byok-roundtrip` fixture end-to-end; scrapes both seams after run completion; hard-fails if the canary plaintext appears in any OTel span attribute or debug-bundle field. Verifies SECURITY invariants `secret-leakage-otel-attribute` + `secret-leakage-debug-bundle-otel`. 2026-05-22 (RFC 0041 Phase 4 — replay determinism under nondeterministic models) added three scenarios: `replay-divergence-at-refusal.test.ts` (advertisement-shape probe on `replayDeterminism.refusalDivergenceEmission` + 2 `it.todo` for the dual-direction refusal-divergence case), `replay-observable-sequence-determinism.test.ts` (capability-gated; behavioral assertion soft-skipped until a `conformance-phase4-nondet-tool` fixture ships), `replay-llm-cache-key-portable.test.ts` (intra-host reproducibility + non-recipe-field invariance + Phase 4 advertisement alignment — reuses the existing `POST /v1/host/sample/test/llm-cache-key` seam from the sibling `replay-llm-cache-key.test.ts`). 2026-05-20 (RFC 0027 §A templateKinds-coverage follow-up — paired with `prompt-end-to-end-events.test.ts`) added `prompt-all-four-kinds-events.test.ts` exercising all four `PromptKind` values (`system`, `user`, `schema-hint`, `few-shot`) end-to-end through the reference workflow-engine sample's `local.sample.demo.mock-ai` dispatch path; capability-gated via `behaviorGate('prompts-supported', ...)`. Closes the credibility gap where the host advertised `templateKinds: ["system", "user", "few-shot", "schema-hint"]` but only the system+user pair was actually wired into dispatch. 2026-05-20 (RFCs 0030–0033 — envelope LLM-contract-hardening track) added 15 scenarios across four `Active` RFCs: `envelope-reasoning-shape.test.ts` (RFC 0030, always-on; asserts the OPTIONAL `reasoning` property on the three universal-kind schemas + the `schema.response` deliberate omission), `envelope-reasoning-secret-redaction.test.ts` (RFC 0030, capability-gated on `capabilities.envelopes.reasoning.supported` + `secrets.supported`; 5 `it.todo()` placeholders for SECURITY invariant `envelope-reasoning-secret-redaction`), `envelope-tier-one-subset-static.test.ts` (RFC 0030, always-on for load-bearing rules — no `oneOf` / `allOf` / `not` / `prefixItems` / `propertyNames` anywhere; gated on `tierOneSubsetCompliance: "strict"` for OpenAI-strict-only constraints), `envelope-variant-discriminator-static.test.ts` (RFC 0031, always-on; asserts no `oneOf` + every `anyOf` branch declares a single-string-enum discriminator in `required` on every `schemas/envelopes/*.schema.json`), `model-capability-substituted.test.ts` (RFC 0031, advertisement-shape probe on `capabilities.modelCapabilities.advertised[]` identifier pattern + 5 `it.todo()` placeholders for SECURITY invariant `model-capability-substituted-no-credential-disclosure`), `model-capability-insufficient.test.ts` (RFC 0031, 6 `it.todo()` placeholders for refusal + no-recursive-fallback), `node-module-required-capabilities-shape.test.ts` (RFC 0031 SHOULD-tier authoring-convention; 4 `it.todo()` placeholders), and the six envelope-reliability events from RFC 0032 (`envelope-retry-attempted` carrying the shared advertisement-shape probe enforcing both MUST-tier events in `events[]` per RFC 0032 §C, plus `envelope-retry-exhausted`, `envelope-refusal-shape`, `envelope-truncated`, `envelope-nl-to-format-engaged`, `envelope-recovery-applied` — collectively 39 `it.todo()` placeholders covering retry/refusal/truncation/recovery + SECURITY invariants `envelope-refusal-no-prompt-leak` and `envelope-recovery-no-content-leak`), plus RFC 0033's two scenarios (`envelope-completion-distinguishes-truncation.test.ts` + `envelope-truncation-cap-exhaustion.test.ts` — 12 `it.todo()` placeholders covering the truncation-vs-schema-violation retry-routing distinction + the DoS-bound assertion). Reference workflow-engine sample advertises `capabilities.envelopes.reasoning: { supported: true, promptDirective: "off" }` + `tierOneSubsetCompliance: "warn"` honestly (schemas accept the field; host doesn't yet inject the directive); the other three RFCs' capability blocks defer to reference-host emission code per the staged RFC 0027 §G precedent. 2026-05-20 (RFC 0028 §B Phase B — prompt-pack boot-time install) added `prompt-pack-install.test.ts` (capability-gated on `capabilities.prompts.endpointsSupported: true`; asserts a host that ran the boot-time pack loader surfaces ≥ 1 pack-source template under `GET /v1/prompts?source=pack` carrying the canonical `meta.source: "pack"` + `meta.packName` + `meta.packVersion` stamps; positively identifies the in-tree `vendor.openwop.prompt-sample` reference pack's `writer-system` template when present). Pairs with the new `host/promptPackLoader.ts` boot-time entry on the reference workflow-engine sample, which scans `examples/packs/*` plus `OPENWOP_PROMPT_PACKS_DIR` and calls `installPackTemplates()` for each `kind: "prompt"` pack found. 2026-05-20 (RFC 0029 Phase C — prompt resolution chain wire shape) added three more scenarios: `prompt-resolution-chain-node-wins.test.ts` (capability-gated on `capabilities.prompts.supported: true`; asserts layer-1 node-config supersedes lower layers per `spec/v1/prompts.md` §"Resolution chain (normative)"), `prompt-resolution-chain-agent-intrinsic.test.ts` (additionally gated on `capabilities.prompts.agentBindings: true`; asserts agent intrinsic `systemPromptRef` wins over `promptOverrides` AND lower layers when the node has no layer-1 ref), `prompt-resolution-chain-fallback-cascade.test.ts` (asserts layer 3 workflow-defaults wins over layer 4 host-defaults; layer 4 host-defaults wins when 1-3 yield null; resolved is null when all four yield null but chain[] still lists every attempted layer). The scenarios drive the host's `POST /v1/host/sample/prompt/resolve` test seam (reference-host implementation deferred to follow-up slice per RFC 0021 staging precedent). 2026-05-20 (RFC 0027 Phase A — prompt templates wire shape) added three scenarios: `prompt-template-shape.test.ts` (always-on; Ajv compileability + positive/negative round-trip for PromptTemplate + PromptRef + PromptKind), `prompt-composed-secret-redaction.test.ts` (capability-gated on `capabilities.prompts.supported: true` + `observability: "full"`; asserts `[REDACTED:<secretId>]` markers in `prompt.composed` payloads for `source: "secret"` variable bindings per SECURITY/threat-model-secret-leakage.md §SR-1), `prompt-composed-trust-marker.test.ts` (same capability gates; asserts `<UNTRUSTED>...</UNTRUSTED>` wrapping + `contentTrust: "untrusted"` propagation per RFC 0020 §D). Paired with new `fixtures/prompt-templates/` sub-directory + per-fixture schema-validity describe block + future SECURITY invariants `prompt-composed-secret-redaction` and `prompt-composed-trust-marker` (lands alongside reference-host emission per RFC 0021 staging precedent). 2026-05-18 (RFC 0022 `Draft` — runtime variable mapping) added four `it.todo()` placeholder scenarios covering the new mapping surfaces on `core.dispatch` (§A — `dispatch-input-mapping.test.ts`, `dispatch-output-mapping.test.ts`, `dispatch-cross-worker-handoff.test.ts`) and `core.subWorkflow` (§B — `subworkflow-input-mapping.test.ts`). Gated on `capabilities.agents.dispatchMapping` (dispatch trio) and `capabilities.subWorkflow.inputMapping` (subWorkflow). Promote to live assertions when RFC 0022 reaches `Active` + a reference host advertises the matching flags. 2026-05-17 (RFC 0003 §D handoff-schema enforcement, HV-1) added `agentPackHandoffSchemaValidation.test.ts` — verifies the host validates dispatch payloads against `handoff.taskSchemaRef` AND return payloads against `handoff.returnSchemaRef` per RFC 0003 §D. Paired with the new `agent-pack-handoff-schema-enforcement` row in `SECURITY/invariants.yaml`. 2026-05-17 (AI Envelope gap-closure, DRAFT v1.x — `spec/v1/ai-envelope.md`) added 7 advertisement-shape scenarios with `it.todo()` behavioral placeholders gated on `capabilities.envelopeContracts.advertised: true`: `aiEnvelope.universalKinds.test.ts`, `aiEnvelope.schemaDrift.test.ts`, `aiEnvelope.correlationReplay.test.ts`, `aiEnvelope.contractRefusal.test.ts`, `aiEnvelope.trustBoundaryPropagation.test.ts`, `aiEnvelope.redaction.test.ts`, `aiEnvelope.capBreached.test.ts`. Paired with the new `envelope-redaction-sr-1-carry-forward` row in `SECURITY/invariants.yaml`. 2026-05-17 (post-publish hardening, deep audit of `core.openwop.agents`) added `agents-run-tool-allowlist.test.ts` — server-free scenario locking in the `core.openwop.agents@1.0.1` safety-fix that closes `OPENWOP-AUDIT-2026-003` (function-typed `tool.handler` properties rejected at `validateTools()` with `INVALID_TOOL_DECLARATION`; tool-driven runs require `ctx.agentRuntime`; tool-less safe fallback preserved). Paired with the new `agents-run-no-raw-handler` row in `SECURITY/invariants.yaml`. Same-day post-publish hardening added `idempotency-key-determinism.test.ts` — server-free scenario locking in the `core.openwop.http@1.1.2` determinism safety-fix (default `composite` mode produces deterministic keys in `(runId, nodeId, payload)`; removed `uuid` mode rejects with `CONFIG_INVALID`; cross-impl vector test lets third-party reimplementations verify wire agreement). Paired with the new `idempotency-key-deterministic` row in `SECURITY/invariants.yaml`. 2026-05-17 (Phase 3 of RFC 0013) added three server-free scenarios exercising the reference workflow-chain expansion library (`conformance/src/lib/workflow-chain-expansion.ts`): `workflow-chain-expansion.test.ts` (parameter substitution + node id collision avoidance + edge rewriting + capability propagation + runtime-invariance contract), `workflow-chain-unresolvable-typeid.test.ts` (rejection with `chain_unresolvable_typeid` when a chain references an unknown typeId), and `workflow-chain-pack-signature-verification.test.ts` (Ed25519 verification recipe reuse from `node-packs.md §Signing`). Earlier that day (Phase 1) added `workflow-chain-pack-manifest-validation.test.ts` — server-free schema-validation scenario covering the new `workflow-chain-pack-manifest.schema.json` (positive sample + two negatives: kind/contents mismatch and invalid `chainId`). Closes RFC 0013 (`Workflow-chain packs`, `Draft`) Phases 1 + 3 alongside the new `spec/v1/workflow-chain-packs.md`, the `Capabilities.workflowChainPacks` block, and the registry build-index/conformance-check `kind` routing from Phase 2. Earlier that day, the suite added 27 `it.todo()` placeholder scenarios paired with RFCs 0014-0020 (host capability surfaces — fs, kvStorage, tableStorage, queueBus, sql/vector/search, blob/cache, mcp.serverMount). These promote to live assertions when each RFC reaches `Active` + the matching capability block lands in `schemas/capabilities.schema.json` + a reference host advertises the capability. Earlier additions include 18 Multi-Agent Shift scenarios (Phases 1-5) added 2026-05-10, the `registry-public.test.ts` public-registry healthcheck added 2026-05-11 (opt-in via `OPENWOP_TEST_PUBLIC_REGISTRY=true`), the `replay-llm-cache-key.test.ts` placeholder added 2026-05-11 (three `it.todo()` cases for the cross-host LLM cache-key recipe per `replay.md` §"LLM cache-key recipe"), the two `production-*.test.ts` scenarios added 2026-05-11 for the `openwop-production` profile per RFC 0009 (`production-backpressure.test.ts`, `production-retention-expiry.test.ts`), the four `auth-*.test.ts` scenarios added 2026-05-11/12 for the production-auth profiles per RFC 0010 (`auth-api-key-rotation.test.ts`, `auth-oauth2-client-credentials.test.ts`, `auth-oidc-user-bearer.test.ts`, `auth-mtls.test.ts` (opt-in via `OPENWOP_TEST_MTLS=1`)), `replay-retention-expiry.test.ts` added 2026-05-12 (capability shape + 410/422 envelope per `replay.md` §"Retention and garbage collection"), `bulk-cancel.test.ts` added 2026-05-12 (Phase B close-out of R1 — `POST /v1/runs:bulk-cancel`), the two Phase H launch-blocker advertisement-contract scenarios added 2026-05-12 (`mcp-toolcall-redaction.test.ts` for the MCP-1 invariant per `host-capabilities.md §host.mcp` + `threat-model-prompt-injection.md §UNTRUSTED`, and `http-client-ssrf.test.ts` for the SSRF + body-size cap advertisement contract on `capabilities.httpClient`), the `wasm-pack-abi-version-rejection.test.ts` Track 7 scenario added 2026-05-12 for the ABI-mismatch positive path via the `vendor.openwop.misbehaving-abi` pack per RFC 0008 §H, the `otel-trace-propagation-subworkflow.test.ts` Track 11 close-out added 2026-05-13 (parent + child run spans share the inbound traceparent's traceId across the `core.subWorkflow` dispatch boundary), and the three RFC 0012 (Memory Compaction Profile, `Active`) scenarios added 2026-05-13/14: `memory-compaction-sr1-carry-forward.test.ts` (load-bearing SR-1 §D), `memory-compaction-event-emitted.test.ts` (canonical §B payload shape), and `memory-compaction-provenance-tag.test.ts` (soft assertion on §C `compacted-from:<id>` convention). All three gate on `capabilities.memory.compaction.supported` + the host's test seam at `/v1/test/memory/{seed,compact}` (Postgres reference host enables both via `OPENWOP_MEMORY_COMPACTION=true OPENWOP_TEST_TRIGGER_COMPACTION=true`). 2026-05-15 (gap-closure CF-3) added `interrupt-token-matrix.test.ts` (malformed / unknown / replay / cross-run-id paths on `GET|POST /v1/interrupts/{token}`). The maintained scenario-to-spec map lives in [`coverage.md`](./coverage.md); this README keeps the operator quickstart and the historical scenario notes below.
96
+ The current suite has 284 scenario files under `src/scenarios/`. 2026-05-26 (RFC 0070 — agent-manifest runtime) added `agent-manifest-runtime.test.ts`; 2026-05-26 (RFC 0071 — artifact-type + chat card packs) added six: `artifact-type-pack-manifest-validation.test.ts` + `artifact-schema-compile-bounded.test.ts` (server-free) + `artifact-type-pack-install.test.ts` + `artifact-type-store-without-render.test.ts` + `chat-card-pack-manifest-validation.test.ts` (server-free) + `chat-card-pack-execution.test.ts` (capability-gated, host-pending). 2026-05-26 (RFCs 0067 / 0068 / 0069 — spec-gap Draft cohort) added five scenarios: `byok-auth-modes.test.ts` (RFC 0067; always-on schema-shape of `aiProviders.authModes` + a discovery-gated §B auth-mode-contract cross-field check), `memory-consolidation-shape.test.ts` (RFC 0068; always-on shape of `agents.memoryConsolidation`/`agents.commitments` + the `agent.memory.consolidated`/`commitment.fired` payload $defs), `memory-consolidation-idempotent.test.ts` + `commitment-fired.test.ts` (RFC 0068; capability-gated behavioral, soft-skip on the documented `/v1/host/sample/memory/consolidate` + `/commitment/fire` seams), and `exec-not-protocol-tier.test.ts` (RFC 0069; always-on server-free structural assertion that the protocol corpus defines no `core.*`/`openwop.*` exec-class primitive — backs the `exec-must-not-be-protocol-tier` SECURITY invariant). 2026-05-25 (RFC 0061 — stateful agent-loop lifecycle, executionModel.version 5) added four `agent-loop-*.test.ts` scenarios: `-version5-shape` (always-on; validates `executionModel.statefulResume`/`transcriptWindow` + the 1–5 version ceiling) plus `-iteration-monotonic` (gated on `version >= 5`; `runOrchestrator.decided.iteration` increments 1,2,3… exactly once per turn), `-workspace-snapshot` (gated additionally on `host.workspace.supported`; a turn-i workspace write is invisible to turn i, visible to turn i+1), and `-stateful-resume` (gated on `statefulResume`; a mid-loop suspend resumes at the same iteration without resetting the counter) — the three behavioral scenarios drive the documented agent-loop seam (`POST /v1/host/sample/agentloop/run`) and soft-skip until a host wires it. 2026-05-25 (RFC 0059 — host.workspace M2, reference-host enforcement) added two `workspace-*.test.ts` scenarios: `-behavior` (capability-gated CRUD round-trip / `If-Match` 409 `workspace_conflict` / `workspace_too_large` / §D run-start snapshot, all via the real `/v1/host/workspace/files` §C endpoints) and `-cross-tenant-isolation` (WCT-1 — drives the documented `POST /v1/host/sample/workspace/op` seam to assert a file owned by one `{tenant, workspace}` is unreadable, on both `get` and `list`, under a different owner; backs the new `workspace-cross-tenant-isolation` SECURITY invariant). The in-memory reference host now advertises `capabilities.workspace.supported` and honors §C/§D/§E end-to-end. 2026-05-25 (RFC 0062 — memory.distillation "dreams") added five `distillation-*.test.ts` scenarios: `-shape` (always-on; validates the `capabilities.memory.distillation` block + the additive `distillation` sub-object on `memory.compacted`) plus `-token-budget` (within budget `tokensUsed ≤ tokenBudget`; an un-meetable budget → `token_budget_exceeded` with no partial archive), `-stable-archive` (same sources + budget ⇒ byte-stable archive checksum), `-index-roundtrip` (gated additionally on `indexEmitted`; the `MEMORY-INDEX.json` workspace file is retrievable + `workspace.updated` fired), and `-secret-carryforward` (SR-1: a redacted source secret never appears in the archive) — the four behavioral scenarios drive the documented memory-distillation seam (`POST /v1/host/sample/memory/distill`) and soft-skip until a host wires it. 2026-05-25 (RFC 0063 — core.subWorkflow.outputAttestation) added four `subrun-*.test.ts` scenarios: `-attestation-shape` (always-on; validates the `capabilities.agents.subRunAttestation` flag) plus `-checksum-stable` (the child output checksum is the byte-stable, key-order-invariant RFC 8785 JCS + SHA-256 digest), `-approval-gate` (`requireApproval` → `accept` merges, `reject` does not), and `-approval-fail-closed` (no `accept`/`edit-accept` → no merge; backs the deferred `subrun-merge-approval-fail-closed` invariant) — the three behavioral scenarios drive the documented sub-run attestation seam (`POST /v1/host/sample/subrun/attest`) and soft-skip until a host wires it. 2026-05-25 (RFC 0064 — host.toolHooks) added five `tool-hooks-*.test.ts` scenarios: `-shape` (always-on; validates the `capabilities.toolHooks` block + the optional content-free fields on `agentToolCalled` / `agentToolReturned`) plus `-content-free` (gated on `prePostEvents`), `-authorization-fail-closed` (gated on `perToolAuthorization`), `-rate-limit` (gated on `perToolRateLimit`), and `-secret-redaction` (gated on `prePostEvents` + the SR-1 `argsHash` redaction rule) — the four behavioral scenarios drive the documented tool-hooks invoke seam (`POST /v1/host/sample/toolhooks/invoke`) and soft-skip until a host wires it. 2026-05-25 (RFC 0060 — host.heartbeat) added four `heartbeat-*.test.ts` scenarios: `-capability-shape` (always-on; validates the `capabilities.heartbeat` block) plus `-fires-once-per-tick`, `-idempotent-no-spam`, and `-runtime-bound` (gated on `capabilities.heartbeat.supported` + the host heartbeat tick seam; soft-skip until a host wires it). 2026-05-25 (RFC 0057 — memory write-attribution) added five `memory-attribution-*.test.ts` scenarios: `-shape` (always-on advertisement check on `capabilities.memory.attribution`), plus `-no-content`, `-tenant-scoped`, `-emits-on-write`, and `-replay-stable` (gated on `capabilities.memory.attribution.emitsWriteEvents`) verifying the content-free `memory.written` RunEvent, its two SECURITY invariants (`memory-attribution-no-content` + `memory-attribution-tenant-scoped`), and the §D replay rule that a `replay`-mode fork MUST NOT regenerate `memoryId`. 2026-05-25 (RFC 0025 §C point 1 — test-catalog isolation invariant; pairs with the 25 publish-error scenarios in `pack-registry-publish.test.ts`) added `pack-registry-isolation.test.ts` — capability-gated on `capabilities.packs.testMode.{supported, isolated}: true`; PUTs a disposable pack into `/v1/packs-test/{name}` and asserts the same `(name, version)` does NOT appear via `GET /v1/packs/{name}` — anchors the test-catalog isolation MUST in RFC 0025 §C. 2026-05-25 (RFC 0028 Tier-2 post-promotion T2 — read-side sister scenario for workspace-membership enforcement) added `prompt-read-workspace-membership-enforced.test.ts` — gates on `capabilities.prompts.supported: true` (broader than `mutableLibrary` so read-only hosts that expose `?workspaceId=` are also probed); drives `GET /v1/prompts?workspaceId=<random-non-member>` and interprets the response: 4xx PASS (canonical envelope check on 403); 200 with empty `templates[]` PASS (correct null result for a nonexistent workspace); 200 with non-empty `templates[]` FAIL (cross-tenant leak); 200 without `templates[]` field SKIP (host doesn't expose workspace-scoped reads). Verifies SECURITY invariant `prompt-read-workspace-membership-enforced`. Same-day T1 strengthened `prompt-mutation-workspace-membership-enforced.test.ts` to pin `error === "workspace_membership_required"` when the host's refusal status is 403 (other refusal codes unconstrained). 2026-05-25 (RFC 0028 Tier-2 follow-up — workspace-membership enforcement on mutating prompt endpoints, filed in response to a self-disclosed adopter vulnerability) added `prompt-mutation-workspace-membership-enforced.test.ts` — capability-gated on `capabilities.prompts.mutableLibrary: true`; drives `POST /v1/prompts` with a cryptographically-random non-member `workspaceId` and asserts the host refuses (NOT a 2xx; any 4xx/5xx is acceptable — silent success is the failure mode). Verifies SECURITY invariant `prompt-mutation-workspace-membership-enforced`. 2026-05-22 (RFC 0034 §B follow-up — secret-leakage harness against the OTel + debug-bundle seams) added `secret-leakage-otel-attribute.test.ts` — gates on `capabilities.secrets.supported` + `capabilities.observability.testSeams.{otelScrape,debugBundleExport}` AND the `OPENWOP_CANARY_SECRET_VALUE` env (host operator + conformance runner agree on the canary). Drives the existing `openwop-smoke-byok-roundtrip` fixture end-to-end; scrapes both seams after run completion; hard-fails if the canary plaintext appears in any OTel span attribute or debug-bundle field. Verifies SECURITY invariants `secret-leakage-otel-attribute` + `secret-leakage-debug-bundle-otel`. 2026-05-22 (RFC 0041 Phase 4 — replay determinism under nondeterministic models) added three scenarios: `replay-divergence-at-refusal.test.ts` (advertisement-shape probe on `replayDeterminism.refusalDivergenceEmission` + 2 `it.todo` for the dual-direction refusal-divergence case), `replay-observable-sequence-determinism.test.ts` (capability-gated; behavioral assertion soft-skipped until a `conformance-phase4-nondet-tool` fixture ships), `replay-llm-cache-key-portable.test.ts` (intra-host reproducibility + non-recipe-field invariance + Phase 4 advertisement alignment — reuses the existing `POST /v1/host/sample/test/llm-cache-key` seam from the sibling `replay-llm-cache-key.test.ts`). 2026-05-20 (RFC 0027 §A templateKinds-coverage follow-up — paired with `prompt-end-to-end-events.test.ts`) added `prompt-all-four-kinds-events.test.ts` exercising all four `PromptKind` values (`system`, `user`, `schema-hint`, `few-shot`) end-to-end through the reference workflow-engine sample's `local.sample.demo.mock-ai` dispatch path; capability-gated via `behaviorGate('prompts-supported', ...)`. Closes the credibility gap where the host advertised `templateKinds: ["system", "user", "few-shot", "schema-hint"]` but only the system+user pair was actually wired into dispatch. 2026-05-20 (RFCs 0030–0033 — envelope LLM-contract-hardening track) added 15 scenarios across four `Active` RFCs: `envelope-reasoning-shape.test.ts` (RFC 0030, always-on; asserts the OPTIONAL `reasoning` property on the three universal-kind schemas + the `schema.response` deliberate omission), `envelope-reasoning-secret-redaction.test.ts` (RFC 0030, capability-gated on `capabilities.envelopes.reasoning.supported` + `secrets.supported`; 5 `it.todo()` placeholders for SECURITY invariant `envelope-reasoning-secret-redaction`), `envelope-tier-one-subset-static.test.ts` (RFC 0030, always-on for load-bearing rules — no `oneOf` / `allOf` / `not` / `prefixItems` / `propertyNames` anywhere; gated on `tierOneSubsetCompliance: "strict"` for OpenAI-strict-only constraints), `envelope-variant-discriminator-static.test.ts` (RFC 0031, always-on; asserts no `oneOf` + every `anyOf` branch declares a single-string-enum discriminator in `required` on every `schemas/envelopes/*.schema.json`), `model-capability-substituted.test.ts` (RFC 0031, advertisement-shape probe on `capabilities.modelCapabilities.advertised[]` identifier pattern + 5 `it.todo()` placeholders for SECURITY invariant `model-capability-substituted-no-credential-disclosure`), `model-capability-insufficient.test.ts` (RFC 0031, 6 `it.todo()` placeholders for refusal + no-recursive-fallback), `node-module-required-capabilities-shape.test.ts` (RFC 0031 SHOULD-tier authoring-convention; 4 `it.todo()` placeholders), and the six envelope-reliability events from RFC 0032 (`envelope-retry-attempted` carrying the shared advertisement-shape probe enforcing both MUST-tier events in `events[]` per RFC 0032 §C, plus `envelope-retry-exhausted`, `envelope-refusal-shape`, `envelope-truncated`, `envelope-nl-to-format-engaged`, `envelope-recovery-applied` — collectively 39 `it.todo()` placeholders covering retry/refusal/truncation/recovery + SECURITY invariants `envelope-refusal-no-prompt-leak` and `envelope-recovery-no-content-leak`), plus RFC 0033's two scenarios (`envelope-completion-distinguishes-truncation.test.ts` + `envelope-truncation-cap-exhaustion.test.ts` — 12 `it.todo()` placeholders covering the truncation-vs-schema-violation retry-routing distinction + the DoS-bound assertion). Reference workflow-engine sample advertises `capabilities.envelopes.reasoning: { supported: true, promptDirective: "off" }` + `tierOneSubsetCompliance: "warn"` honestly (schemas accept the field; host doesn't yet inject the directive); the other three RFCs' capability blocks defer to reference-host emission code per the staged RFC 0027 §G precedent. 2026-05-20 (RFC 0028 §B Phase B — prompt-pack boot-time install) added `prompt-pack-install.test.ts` (capability-gated on `capabilities.prompts.endpointsSupported: true`; asserts a host that ran the boot-time pack loader surfaces ≥ 1 pack-source template under `GET /v1/prompts?source=pack` carrying the canonical `meta.source: "pack"` + `meta.packName` + `meta.packVersion` stamps; positively identifies the in-tree `vendor.openwop.prompt-sample` reference pack's `writer-system` template when present). Pairs with the new `host/promptPackLoader.ts` boot-time entry on the reference workflow-engine sample, which scans `examples/packs/*` plus `OPENWOP_PROMPT_PACKS_DIR` and calls `installPackTemplates()` for each `kind: "prompt"` pack found. 2026-05-20 (RFC 0029 Phase C — prompt resolution chain wire shape) added three more scenarios: `prompt-resolution-chain-node-wins.test.ts` (capability-gated on `capabilities.prompts.supported: true`; asserts layer-1 node-config supersedes lower layers per `spec/v1/prompts.md` §"Resolution chain (normative)"), `prompt-resolution-chain-agent-intrinsic.test.ts` (additionally gated on `capabilities.prompts.agentBindings: true`; asserts agent intrinsic `systemPromptRef` wins over `promptOverrides` AND lower layers when the node has no layer-1 ref), `prompt-resolution-chain-fallback-cascade.test.ts` (asserts layer 3 workflow-defaults wins over layer 4 host-defaults; layer 4 host-defaults wins when 1-3 yield null; resolved is null when all four yield null but chain[] still lists every attempted layer). The scenarios drive the host's `POST /v1/host/sample/prompt/resolve` test seam (reference-host implementation deferred to follow-up slice per RFC 0021 staging precedent). 2026-05-20 (RFC 0027 Phase A — prompt templates wire shape) added three scenarios: `prompt-template-shape.test.ts` (always-on; Ajv compileability + positive/negative round-trip for PromptTemplate + PromptRef + PromptKind), `prompt-composed-secret-redaction.test.ts` (capability-gated on `capabilities.prompts.supported: true` + `observability: "full"`; asserts `[REDACTED:<secretId>]` markers in `prompt.composed` payloads for `source: "secret"` variable bindings per SECURITY/threat-model-secret-leakage.md §SR-1), `prompt-composed-trust-marker.test.ts` (same capability gates; asserts `<UNTRUSTED>...</UNTRUSTED>` wrapping + `contentTrust: "untrusted"` propagation per RFC 0020 §D). Paired with new `fixtures/prompt-templates/` sub-directory + per-fixture schema-validity describe block + future SECURITY invariants `prompt-composed-secret-redaction` and `prompt-composed-trust-marker` (lands alongside reference-host emission per RFC 0021 staging precedent). 2026-05-18 (RFC 0022 `Draft` — runtime variable mapping) added four `it.todo()` placeholder scenarios covering the new mapping surfaces on `core.dispatch` (§A — `dispatch-input-mapping.test.ts`, `dispatch-output-mapping.test.ts`, `dispatch-cross-worker-handoff.test.ts`) and `core.subWorkflow` (§B — `subworkflow-input-mapping.test.ts`). Gated on `capabilities.agents.dispatchMapping` (dispatch trio) and `capabilities.subWorkflow.inputMapping` (subWorkflow). Promote to live assertions when RFC 0022 reaches `Active` + a reference host advertises the matching flags. 2026-05-17 (RFC 0003 §D handoff-schema enforcement, HV-1) added `agentPackHandoffSchemaValidation.test.ts` — verifies the host validates dispatch payloads against `handoff.taskSchemaRef` AND return payloads against `handoff.returnSchemaRef` per RFC 0003 §D. Paired with the new `agent-pack-handoff-schema-enforcement` row in `SECURITY/invariants.yaml`. 2026-05-17 (AI Envelope gap-closure, DRAFT v1.x — `spec/v1/ai-envelope.md`) added 7 advertisement-shape scenarios with `it.todo()` behavioral placeholders gated on `capabilities.envelopeContracts.advertised: true`: `aiEnvelope.universalKinds.test.ts`, `aiEnvelope.schemaDrift.test.ts`, `aiEnvelope.correlationReplay.test.ts`, `aiEnvelope.contractRefusal.test.ts`, `aiEnvelope.trustBoundaryPropagation.test.ts`, `aiEnvelope.redaction.test.ts`, `aiEnvelope.capBreached.test.ts`. Paired with the new `envelope-redaction-sr-1-carry-forward` row in `SECURITY/invariants.yaml`. 2026-05-17 (post-publish hardening, deep audit of `core.openwop.agents`) added `agents-run-tool-allowlist.test.ts` — server-free scenario locking in the `core.openwop.agents@1.0.1` safety-fix that closes `OPENWOP-AUDIT-2026-003` (function-typed `tool.handler` properties rejected at `validateTools()` with `INVALID_TOOL_DECLARATION`; tool-driven runs require `ctx.agentRuntime`; tool-less safe fallback preserved). Paired with the new `agents-run-no-raw-handler` row in `SECURITY/invariants.yaml`. Same-day post-publish hardening added `idempotency-key-determinism.test.ts` — server-free scenario locking in the `core.openwop.http@1.1.2` determinism safety-fix (default `composite` mode produces deterministic keys in `(runId, nodeId, payload)`; removed `uuid` mode rejects with `CONFIG_INVALID`; cross-impl vector test lets third-party reimplementations verify wire agreement). Paired with the new `idempotency-key-deterministic` row in `SECURITY/invariants.yaml`. 2026-05-17 (Phase 3 of RFC 0013) added three server-free scenarios exercising the reference workflow-chain expansion library (`conformance/src/lib/workflow-chain-expansion.ts`): `workflow-chain-expansion.test.ts` (parameter substitution + node id collision avoidance + edge rewriting + capability propagation + runtime-invariance contract), `workflow-chain-unresolvable-typeid.test.ts` (rejection with `chain_unresolvable_typeid` when a chain references an unknown typeId), and `workflow-chain-pack-signature-verification.test.ts` (Ed25519 verification recipe reuse from `node-packs.md §Signing`). Earlier that day (Phase 1) added `workflow-chain-pack-manifest-validation.test.ts` — server-free schema-validation scenario covering the new `workflow-chain-pack-manifest.schema.json` (positive sample + two negatives: kind/contents mismatch and invalid `chainId`). Closes RFC 0013 (`Workflow-chain packs`, `Draft`) Phases 1 + 3 alongside the new `spec/v1/workflow-chain-packs.md`, the `Capabilities.workflowChainPacks` block, and the registry build-index/conformance-check `kind` routing from Phase 2. Earlier that day, the suite added 27 `it.todo()` placeholder scenarios paired with RFCs 0014-0020 (host capability surfaces — fs, kvStorage, tableStorage, queueBus, sql/vector/search, blob/cache, mcp.serverMount). These promote to live assertions when each RFC reaches `Active` + the matching capability block lands in `schemas/capabilities.schema.json` + a reference host advertises the capability. Earlier additions include 18 Multi-Agent Shift scenarios (Phases 1-5) added 2026-05-10, the `registry-public.test.ts` public-registry healthcheck added 2026-05-11 (opt-in via `OPENWOP_TEST_PUBLIC_REGISTRY=true`), the `replay-llm-cache-key.test.ts` placeholder added 2026-05-11 (three `it.todo()` cases for the cross-host LLM cache-key recipe per `replay.md` §"LLM cache-key recipe"), the two `production-*.test.ts` scenarios added 2026-05-11 for the `openwop-production` profile per RFC 0009 (`production-backpressure.test.ts`, `production-retention-expiry.test.ts`), the four `auth-*.test.ts` scenarios added 2026-05-11/12 for the production-auth profiles per RFC 0010 (`auth-api-key-rotation.test.ts`, `auth-oauth2-client-credentials.test.ts`, `auth-oidc-user-bearer.test.ts`, `auth-mtls.test.ts` (opt-in via `OPENWOP_TEST_MTLS=1`)), `replay-retention-expiry.test.ts` added 2026-05-12 (capability shape + 410/422 envelope per `replay.md` §"Retention and garbage collection"), `bulk-cancel.test.ts` added 2026-05-12 (Phase B close-out of R1 — `POST /v1/runs:bulk-cancel`), the two Phase H launch-blocker advertisement-contract scenarios added 2026-05-12 (`mcp-toolcall-redaction.test.ts` for the MCP-1 invariant per `host-capabilities.md §host.mcp` + `threat-model-prompt-injection.md §UNTRUSTED`, and `http-client-ssrf.test.ts` for the SSRF + body-size cap advertisement contract on `capabilities.httpClient`), the `wasm-pack-abi-version-rejection.test.ts` Track 7 scenario added 2026-05-12 for the ABI-mismatch positive path via the `vendor.openwop.misbehaving-abi` pack per RFC 0008 §H, the `otel-trace-propagation-subworkflow.test.ts` Track 11 close-out added 2026-05-13 (parent + child run spans share the inbound traceparent's traceId across the `core.subWorkflow` dispatch boundary), and the three RFC 0012 (Memory Compaction Profile, `Active`) scenarios added 2026-05-13/14: `memory-compaction-sr1-carry-forward.test.ts` (load-bearing SR-1 §D), `memory-compaction-event-emitted.test.ts` (canonical §B payload shape), and `memory-compaction-provenance-tag.test.ts` (soft assertion on §C `compacted-from:<id>` convention). All three gate on `capabilities.memory.compaction.supported` + the host's test seam at `/v1/test/memory/{seed,compact}` (Postgres reference host enables both via `OPENWOP_MEMORY_COMPACTION=true OPENWOP_TEST_TRIGGER_COMPACTION=true`). 2026-05-15 (gap-closure CF-3) added `interrupt-token-matrix.test.ts` (malformed / unknown / replay / cross-run-id paths on `GET|POST /v1/interrupts/{token}`). The maintained scenario-to-spec map lives in [`coverage.md`](./coverage.md); this README keeps the operator quickstart and the historical scenario notes below.
97
97
 
98
98
  High-level coverage includes:
99
99
 
@@ -172,7 +172,7 @@ Server-required (added in 1.7.0):
172
172
  |---|---|---|
173
173
  | **Redaction** | [`capabilities.md`](../spec/v1/capabilities.md) §"Secrets" + NFR-7 + §"aiProviders" | Vendor-neutral assertions that the server doesn't leak secret material. Three scenario groups: (a) discovery shape contract — `secrets` + `aiProviders` advertisements are well-formed regardless of `secrets.supported`; when `supported === true`, scopes MUST be non-empty + `resolution === 'host-managed'`; `byok ⊆ supported`. (b) bearer-token redaction — invalid Bearer canary in `Authorization` header is not echoed in the 401 response body. (c) credentialRef echo control — gated on `secrets.supported === true`; canary planted in `configurable.ai.credentialRef` MUST NOT appear in any RunEvent payload (poll-based capture; transport-agnostic). Uses runtime-built canary fixtures (`lib/canaries.ts`) that defeat static secret scanners. 6 scenarios. |
174
174
 
175
- Current source tree: 238 scenario files. Use [`coverage.md`](./coverage.md) for current grade/gap tracking.
175
+ Current source tree: 284 scenario files. Use [`coverage.md`](./coverage.md) for current grade/gap tracking.
176
176
 
177
177
  ## Remaining Gaps
178
178
 
package/api/asyncapi.yaml CHANGED
@@ -54,6 +54,23 @@ servers:
54
54
  # ─────────────────────────────────────────────────────────────────────────────
55
55
  channels:
56
56
 
57
+ heartbeatEvents:
58
+ address: /heartbeats/{heartbeatId}/events
59
+ title: Heartbeat evaluation events (RFC 0060)
60
+ summary: Per-tick heartbeat evaluation + state-change notifications.
61
+ description: |
62
+ RFC 0060 `host.heartbeat`. Heartbeat-scoped (NOT a run-event
63
+ stream): a host advertising `capabilities.heartbeat.supported: true`
64
+ emits `heartbeat.evaluated` every tick and `heartbeat.stateChanged`
65
+ only on a predicate-state transition. Both are observability-only;
66
+ consumers MAY ignore them.
67
+ parameters:
68
+ heartbeatId:
69
+ description: Host-assigned heartbeat identifier.
70
+ messages:
71
+ heartbeatEvaluated: { $ref: '#/components/messages/HeartbeatEvaluated' }
72
+ heartbeatStateChanged: { $ref: '#/components/messages/HeartbeatStateChanged' }
73
+
57
74
  runEventsUpdates:
58
75
  address: /runs/{runId}/events
59
76
  title: SSE — updates mode (default)
@@ -80,6 +97,7 @@ channels:
80
97
  runPaused: { $ref: '#/components/messages/RunPaused' }
81
98
  runResumed: { $ref: '#/components/messages/RunResumed' }
82
99
  runAnnotated: { $ref: '#/components/messages/RunAnnotated' }
100
+ workspaceUpdated: { $ref: '#/components/messages/WorkspaceUpdated' }
83
101
  nodeCompleted: { $ref: '#/components/messages/NodeCompleted' }
84
102
  nodeFailed: { $ref: '#/components/messages/NodeFailed' }
85
103
  nodeSkipped: { $ref: '#/components/messages/NodeSkipped' }
@@ -156,6 +174,7 @@ channels:
156
174
  messages:
157
175
  anyRunEvent: { $ref: '#/components/messages/AnyRunEvent' }
158
176
  runAnnotated: { $ref: '#/components/messages/RunAnnotated' }
177
+ workspaceUpdated: { $ref: '#/components/messages/WorkspaceUpdated' }
159
178
 
160
179
  # ─────────────────────────────────────────────────────────────────────────────
161
180
  # OPERATIONS — consumer-side (receive)
@@ -320,6 +339,32 @@ components:
320
339
  payload:
321
340
  $ref: '#/components/schemas/Annotation'
322
341
 
342
+ WorkspaceUpdated:
343
+ name: workspace.updated
344
+ title: Workspace file updated (RFC 0059)
345
+ summary: A workspace file was created, replaced, or deleted via the host.workspace store. Content-free — carries the file path + resulting version only (the body is served by the read-side, SR-1-redacted). A replayable run-event-log entry (re-read from the log on replay, never regenerated); gated on capabilities.workspace.supported.
346
+ contentType: application/json
347
+ payload:
348
+ $ref: '#/components/schemas/WorkspaceUpdatedPayload'
349
+
350
+ # RFC 0060. Heartbeat-scoped observability events — NOT RunEventDocs,
351
+ # NOT replayable run-event-log entries. Emitted on the heartbeat channel.
352
+ HeartbeatEvaluated:
353
+ name: heartbeat.evaluated
354
+ title: Heartbeat evaluated (RFC 0060)
355
+ summary: A heartbeat predicate was evaluated this tick (status + changed flag). Heartbeat-scoped observability.
356
+ contentType: application/json
357
+ payload:
358
+ $ref: '#/components/schemas/HeartbeatEvaluated'
359
+
360
+ HeartbeatStateChanged:
361
+ name: heartbeat.stateChanged
362
+ title: Heartbeat state changed (RFC 0060)
363
+ summary: A heartbeat predicate's state transitioned; emitted ONLY on change (never on an unchanged tick). Heartbeat-scoped.
364
+ contentType: application/json
365
+ payload:
366
+ $ref: '#/components/schemas/HeartbeatStateChanged'
367
+
323
368
  # ── Node-lifecycle ───────────────────────────────────────────────────
324
369
  NodeCompleted:
325
370
  name: node.completed
@@ -470,6 +515,18 @@ components:
470
515
  Annotation:
471
516
  $ref: '../schemas/annotation.schema.json'
472
517
 
518
+ # RFC 0059. The workspace.updated event payload — content-free
519
+ # {path, version}. Definition lives at
520
+ # run-event-payloads.schema.json#$defs.workspaceUpdated so the SSE
521
+ # consumer + run-event log share one shape contract.
522
+ WorkspaceUpdatedPayload:
523
+ $ref: '../schemas/run-event-payloads.schema.json#/$defs/workspaceUpdated'
524
+ # RFC 0060 heartbeat events (heartbeat-scoped; see host-capabilities.md §host.heartbeat).
525
+ HeartbeatEvaluated:
526
+ $ref: '../schemas/heartbeat-evaluated.schema.json'
527
+ HeartbeatStateChanged:
528
+ $ref: '../schemas/heartbeat-state-changed.schema.json'
529
+
473
530
  StateSnapshotPayload:
474
531
  # S1 closure (2026-04-27): reuse the canonical RunSnapshot
475
532
  # projection shape verbatim. Same type returned by
package/api/openapi.yaml CHANGED
@@ -51,6 +51,8 @@ tags:
51
51
  description: Workflow definition manifest.
52
52
  - name: runs
53
53
  description: Run lifecycle — create, read, stream, cancel, fork.
54
+ - name: agents
55
+ description: Manifest-agent inventory (RFC 0072 §A). Read-only; gated on capabilities.agents.manifestRuntime. Dispatch rides the run surface (WorkflowNode.agent + POST /v1/runs).
54
56
  - name: hitl
55
57
  description: Human-in-the-loop interrupts and approvals.
56
58
  - name: artifacts
@@ -61,6 +63,8 @@ tags:
61
63
  description: Audit-log integrity verification (gated on the `openwop-audit-log-integrity` profile).
62
64
  - name: prompts
63
65
  description: Prompt-template library — list, fetch, render, mutate (RFC 0028; gated on `capabilities.prompts.*`).
66
+ - name: host
67
+ description: Host-capability resources — e.g. the RFC 0059 agent workspace file store (gated on `capabilities.workspace.*`).
64
68
  - name: packs-test
65
69
  description: |
66
70
  RFC 0025 (`Draft`). Test-mode mirror of the production `/v1/packs/*` publish/get/delete/sig surface against
@@ -427,6 +431,146 @@ paths:
427
431
  schema:
428
432
  $ref: '../schemas/error-envelope.schema.json'
429
433
 
434
+ # ── Agent workspace files (RFC 0059) ─────────────────────────────────
435
+ # Gated on `capabilities.workspace.supported: true`. A versioned,
436
+ # tenant·workspace-scoped (RFC 0048) ground-truth file store with atomic,
437
+ # optimistically-concurrent (`If-Match`) writes. A successful PUT/DELETE
438
+ # emits a content-free `workspace.updated` event. Hosts without the
439
+ # advertised capability return `501 capability_not_provided`.
440
+ /v1/host/workspace/files:
441
+ get:
442
+ tags: [host]
443
+ summary: List workspace file metadata for the caller's tenant·workspace (RFC 0059).
444
+ description: |
445
+ Returns file metadata (no bodies) for the caller's `{tenant,
446
+ workspace}` per RFC 0059 §C. Optional `?prefix=` filters the flat
447
+ `path` namespace to entries starting with the given prefix.
448
+ operationId: listWorkspaceFiles
449
+ parameters:
450
+ - $ref: '#/components/parameters/WorkspacePrefix'
451
+ responses:
452
+ '200':
453
+ description: Workspace file metadata (tenant·workspace-scoped; bodies omitted).
454
+ content:
455
+ application/json:
456
+ schema:
457
+ type: object
458
+ required: [files]
459
+ properties:
460
+ files:
461
+ type: array
462
+ items:
463
+ $ref: '../schemas/workspace-file.schema.json'
464
+ additionalProperties: false
465
+ '401': { $ref: '#/components/responses/Unauthenticated' }
466
+ '403': { $ref: '#/components/responses/Forbidden' }
467
+ '501':
468
+ description: 'Host does not advertise capabilities.workspace.supported (RFC 0059).'
469
+ content:
470
+ application/json:
471
+ schema:
472
+ $ref: '../schemas/error-envelope.schema.json'
473
+
474
+ /v1/host/workspace/files/{path}:
475
+ get:
476
+ tags: [host]
477
+ summary: Read one workspace file (RFC 0059).
478
+ description: |
479
+ Returns the `WorkspaceFile` at `path` for the caller's `{tenant,
480
+ workspace}`. When `capabilities.workspace.versioned: true`, an
481
+ optional `?version=N` returns the historical snapshot at version N.
482
+ operationId: getWorkspaceFile
483
+ parameters:
484
+ - $ref: '#/components/parameters/WorkspacePath'
485
+ - $ref: '#/components/parameters/WorkspaceVersion'
486
+ responses:
487
+ '200':
488
+ description: The workspace file (current version, or `?version=N` when versioned).
489
+ content:
490
+ application/json:
491
+ schema:
492
+ $ref: '../schemas/workspace-file.schema.json'
493
+ '401': { $ref: '#/components/responses/Unauthenticated' }
494
+ '403': { $ref: '#/components/responses/Forbidden' }
495
+ '404': { $ref: '#/components/responses/NotFound' }
496
+ '501':
497
+ description: 'Host does not advertise capabilities.workspace.supported (RFC 0059).'
498
+ content:
499
+ application/json:
500
+ schema:
501
+ $ref: '../schemas/error-envelope.schema.json'
502
+ put:
503
+ tags: [host]
504
+ summary: Atomic create/replace of a workspace file (RFC 0059).
505
+ description: |
506
+ Atomically creates or replaces the file at `path` per RFC 0059 §C.
507
+ MUST honor `If-Match: <etag>` — a stale token returns `409
508
+ workspace_conflict` (`details.currentVersion` carries the live
509
+ version). On success the host bumps `version`, recomputes `etag`,
510
+ and emits a `workspace.updated` event. A `content` exceeding
511
+ `capabilities.workspace.maxFileBytes` returns `workspace_too_large`.
512
+ operationId: putWorkspaceFile
513
+ parameters:
514
+ - $ref: '#/components/parameters/WorkspacePath'
515
+ - $ref: '#/components/parameters/IfMatch'
516
+ - $ref: '#/components/parameters/IdempotencyKey'
517
+ requestBody:
518
+ required: true
519
+ content:
520
+ application/json:
521
+ schema:
522
+ $ref: '../schemas/workspace-file-create.schema.json'
523
+ responses:
524
+ '200':
525
+ description: File created or replaced. Returns the persisted WorkspaceFile.
526
+ content:
527
+ application/json:
528
+ schema:
529
+ $ref: '../schemas/workspace-file.schema.json'
530
+ '400': { $ref: '#/components/responses/ValidationError' }
531
+ '401': { $ref: '#/components/responses/Unauthenticated' }
532
+ '403': { $ref: '#/components/responses/Forbidden' }
533
+ '409':
534
+ description: 'Stale `If-Match` — the file changed since the supplied etag (`workspace_conflict`).'
535
+ content:
536
+ application/json:
537
+ schema:
538
+ $ref: '../schemas/error-envelope.schema.json'
539
+ '413':
540
+ description: 'Content exceeds `capabilities.workspace.maxFileBytes` (`workspace_too_large`).'
541
+ content:
542
+ application/json:
543
+ schema:
544
+ $ref: '../schemas/error-envelope.schema.json'
545
+ '501':
546
+ description: 'Host does not advertise capabilities.workspace.supported (RFC 0059).'
547
+ content:
548
+ application/json:
549
+ schema:
550
+ $ref: '../schemas/error-envelope.schema.json'
551
+ delete:
552
+ tags: [host]
553
+ summary: Delete a workspace file (RFC 0059).
554
+ description: |
555
+ Removes the file at `path` (and, when `versioned: true`, writes a
556
+ tombstone). Emits a `workspace.updated` event on success.
557
+ operationId: deleteWorkspaceFile
558
+ parameters:
559
+ - $ref: '#/components/parameters/WorkspacePath'
560
+ - $ref: '#/components/parameters/IdempotencyKey'
561
+ responses:
562
+ '204':
563
+ description: File deleted.
564
+ '401': { $ref: '#/components/responses/Unauthenticated' }
565
+ '403': { $ref: '#/components/responses/Forbidden' }
566
+ '404': { $ref: '#/components/responses/NotFound' }
567
+ '501':
568
+ description: 'Host does not advertise capabilities.workspace.supported (RFC 0059).'
569
+ content:
570
+ application/json:
571
+ schema:
572
+ $ref: '../schemas/error-envelope.schema.json'
573
+
430
574
  /v1/runs:bulk-cancel:
431
575
  post:
432
576
  tags: [runs]
@@ -575,6 +719,75 @@ paths:
575
719
  application/json:
576
720
  schema: { $ref: '#/components/schemas/Error' }
577
721
 
722
+ /v1/agents:
723
+ get:
724
+ tags: [agents]
725
+ summary: |
726
+ RFC 0072 §A — list the manifest agents this host has installed into its
727
+ AgentRegistry (RFC 0070). Capability-gated on
728
+ `capabilities.agents.manifestRuntime.supported: true`; hosts that don't
729
+ advertise it return 404. Read-only projection — never carries the
730
+ system-prompt body, resolved handoff schemas, or credential material (SR-1).
731
+ Dispatch is not a bespoke endpoint: a manifest agent is invoked as a run
732
+ whose node pins it via `WorkflowNode.agent` + `POST /v1/runs` (RFC 0072 §B).
733
+ RFC 0074 — the result is scoped to the authenticated principal's owner
734
+ triple (RFC 0048). When `capabilities.agents.manifestRuntime.installScope`
735
+ is `'tenant'`, only the agents available to the caller's tenant·workspace
736
+ are returned (an agent another workspace installed is absent, never
737
+ disclosed); when `'host'` (default) the inventory is host-global as in
738
+ RFC 0072. A `'tenant'`-scoped host MUST reject unauthenticated/unscoped
739
+ requests per its standard auth contract rather than fall back to a global list.
740
+ operationId: listAgents
741
+ responses:
742
+ '200':
743
+ description: Installed manifest agents (agentId-sorted).
744
+ content:
745
+ application/json:
746
+ schema:
747
+ $ref: '../schemas/agent-inventory-response.schema.json'
748
+ '401': { $ref: '#/components/responses/Unauthenticated' }
749
+ '403': { $ref: '#/components/responses/Forbidden' }
750
+ '404':
751
+ description: |
752
+ Host does not advertise `capabilities.agents.manifestRuntime` and
753
+ treats the endpoint as absent.
754
+ content:
755
+ application/json:
756
+ schema: { $ref: '#/components/schemas/Error' }
757
+
758
+ /v1/agents/{agentId}:
759
+ get:
760
+ tags: [agents]
761
+ summary: |
762
+ RFC 0072 §A — return one installed manifest agent's inventory entry, or
763
+ 404 when no such agent is installed (or the host doesn't advertise
764
+ `capabilities.agents.manifestRuntime`). RFC 0074 — resolved within the
765
+ authenticated principal's owner triple (RFC 0048): on an
766
+ `installScope: 'tenant'` host an agent the caller's workspace has not
767
+ approved 404s identically to "not installed", so the surface never
768
+ discloses another tenant's inventory.
769
+ operationId: getAgent
770
+ parameters:
771
+ - in: path
772
+ name: agentId
773
+ required: true
774
+ schema: { type: string }
775
+ description: The manifest agentId.
776
+ responses:
777
+ '200':
778
+ description: The agent's inventory entry.
779
+ content:
780
+ application/json:
781
+ schema:
782
+ $ref: '../schemas/agent-inventory-response.schema.json#/$defs/AgentInventoryEntry'
783
+ '401': { $ref: '#/components/responses/Unauthenticated' }
784
+ '403': { $ref: '#/components/responses/Forbidden' }
785
+ '404':
786
+ description: No such agent, or the host doesn't advertise the capability.
787
+ content:
788
+ application/json:
789
+ schema: { $ref: '#/components/schemas/Error' }
790
+
578
791
  /v1/runs/{runId}:diff:
579
792
  get:
580
793
  tags: [runs]
@@ -1513,6 +1726,43 @@ components:
1513
1726
  Duplicate requests return the cached response with header
1514
1727
  `openwop-Idempotent-Replay: true`.
1515
1728
 
1729
+ WorkspacePath:
1730
+ in: path
1731
+ name: path
1732
+ required: true
1733
+ schema:
1734
+ type: string
1735
+ pattern: '^[A-Za-z0-9][A-Za-z0-9._/-]{0,255}$'
1736
+ description: |
1737
+ RFC 0059 workspace-relative file path. Flat namespace with
1738
+ `/`-in-names; no `..`, no leading `/`. Matches
1739
+ `workspace-file.schema.json#path`.
1740
+
1741
+ WorkspacePrefix:
1742
+ in: query
1743
+ name: prefix
1744
+ required: false
1745
+ schema: { type: string, maxLength: 256 }
1746
+ description: RFC 0059. Optional prefix filter over the flat `path` namespace for `listWorkspaceFiles`.
1747
+
1748
+ WorkspaceVersion:
1749
+ in: query
1750
+ name: version
1751
+ required: false
1752
+ schema: { type: integer, minimum: 1 }
1753
+ description: |
1754
+ RFC 0059. When `capabilities.workspace.versioned: true`, request the
1755
+ historical snapshot at this version. Absent = latest.
1756
+
1757
+ IfMatch:
1758
+ in: header
1759
+ name: If-Match
1760
+ required: false
1761
+ schema: { type: string, maxLength: 255 }
1762
+ description: |
1763
+ RFC 0059 optimistic-concurrency token — the file's current `etag`.
1764
+ A `PUT` carrying a stale `If-Match` returns `409 workspace_conflict`.
1765
+
1516
1766
  PackName:
1517
1767
  in: path
1518
1768
  name: name
package/coverage.md CHANGED
@@ -38,6 +38,7 @@
38
38
  | AI Envelope (FINAL v1.1 — `spec/v1/ai-envelope.md`, RFC 0021) | `aiEnvelope.universalKinds.test.ts`, `aiEnvelope.schemaDrift.test.ts`, `aiEnvelope.correlationReplay.test.ts`, `aiEnvelope.contractRefusal.test.ts`, `aiEnvelope.trustBoundaryPropagation.test.ts`, `aiEnvelope.redaction.test.ts`, `aiEnvelope.capBreached.test.ts`, `ai-envelope-shape.test.ts` | A− (shape + ~84 live behavioral assertions across all 8 files via the envelope-accept seam) | DRAFT v1.x gap-closure landed 2026-05-17; RFC 0021 promotion to FINAL v1.1 landed 2026-05-18. Closes the long-standing gap where 8 v1 surfaces already referenced AI Envelopes (`Capabilities.supportedEnvelopes` + `schemaVersions` + the three per-turn limits; `host.aiEnvelope.generate`; `envelopeType` on workflow-chain packs; `profiles.md` derivation; `host-extensions.md` namespacing; `positioning.md`; reference host discovery) but the envelope's own wire shape, universal kinds, schema discipline, and Envelope Contract gate were never specified. The 7 `aiEnvelope.*` advertisement-shape probes (gated on `capabilities.envelopeContracts.advertised: true`) cover the host's CAPABILITY claim. All 8 files now also run behavioral assertions through the workflow-engine sample's env-gated `POST /v1/host/sample/envelope/accept` seam (drained 2026-05-19): schema-drift refusal under strict mode; correlationId-replay short-circuit + persisted `priorCorrelations` store survives process restart; BYOK redaction-carry-forward returning `redactedPayload` + `redactionCount`; contract-refusal mappings via the capability-toggle seam; trust-boundary propagation from MCP/A2A; cap-breached counters; universal-kind always-allowed; `ai-envelope-shape` end-to-end accept-pipeline. Path to `Accepted` (host-side): live downstream-projection coverage on a published host (OTel scrape + debug-bundle export currently soft-skip on hosts that don't expose those seams). |
39
39
  | Envelope `reasoning` field + Tier-1 subset (RFC 0030 — `spec/v1/ai-envelope.md` §"Reasoning field (normative)", `spec/v1/structured-output-subset.md`) | `envelope-reasoning-shape.test.ts`, `envelope-reasoning-secret-redaction.test.ts`, `envelope-tier-one-subset-static.test.ts` | A (shape + load-bearing Tier-1 static checks always-on; 8 live behavioral assertions in `envelope-reasoning-secret-redaction` via the envelope-accept seam, including the OTel + debug-bundle downstream projections) | RFC 0030 promoted Draft → Active 2026-05-20. `envelope-reasoning-shape` (always-on) asserts the OPTIONAL `reasoning` property posture on the three universal-kind schemas + the `schema.response` deliberate omission + with/without backward-compat round-trips + `capabilities.envelopes.reasoning.{supported,promptDirective}` + `tierOneSubsetCompliance` advertisement shape. `envelope-tier-one-subset-static` enforces the load-bearing Tier-1 subset (no `oneOf` / `allOf` / `not` / `prefixItems` / `propertyNames` anywhere — Gemini silently drops these) on every universal-kind schema as always-on; the OpenAI-strict-only constraints (`minLength` / `maxLength` / `minItems` etc.) are checked only under host-advertised `tierOneSubsetCompliance: "strict"` to honor the universal-kind schemas' pre-RFC-0030 open-bag design. `envelope-reasoning-secret-redaction` (capability-gated on `reasoning.supported` + `secrets.supported`) carries 8 live behavioral assertions for SECURITY invariant `envelope-reasoning-secret-redaction`: BYOK canary substitution with the canonical `[REDACTED:<secretId>]` marker on `reasoning`; recursive walk across `reasoning` + sibling fields; passthrough when no canary matches; canary detection inside `clarification.request.reasoning`; downstream OTel-span scrape + debug-bundle export both confirm no canary plaintext leaks (soft-skip on hosts that don't expose those seams); non-routing-on-reasoning invariant (acceptor's routing decision MUST NOT depend on `reasoning` contents). Reference host advertises `capabilities.envelopes.reasoning: { supported: true, promptDirective: "off" }` + `tierOneSubsetCompliance: "warn"`. Path to `Accepted`: reference host injects the system-prompt directive instructing the model to populate `reasoning` (promotes `promptDirective` → `"advisory"`). |
40
40
  | Envelope variant discrimination + model capabilities (RFC 0031 — `spec/v1/ai-envelope.md` §"Variant payload discrimination (normative)", `spec/v1/host-capabilities.md` §"Model-capability declarations", `spec/v1/node-packs.md` §"Model-capability declarations on NodeModules") | `envelope-variant-discriminator-static.test.ts`, `model-capability-substituted.test.ts`, `model-capability-insufficient.test.ts`, `node-module-required-capabilities-shape.test.ts` | B+ (discriminator-static + advertisement-shape always-on; 14 live behavioral assertions across substitution + insufficient + authoring-convention, capability-gated) | RFC 0031 promoted Draft → Active 2026-05-20. `envelope-variant-discriminator-static` (always-on) walks every `schemas/envelopes/*.schema.json` asserting no `oneOf` at any nesting depth (Gemini silently drops `oneOf`, producing looser-than-declared schemas — a silent correctness bug) AND every `anyOf` branch declares a single-string-`enum` discriminator in `required` per RFC 0031 §A. `model-capability-substituted` (capability-gated on `capabilities.modelCapabilities.supported` + `substitutionSupported`) carries advertisement-shape check on the `advertised: string[]` pattern (each identifier matches the spec-reserved set OR `^x-host-<host>-<key>$` per RFC 0031 §C) + 4 live behavioral assertions covering substitution emission + SECURITY invariant `model-capability-substituted-no-credential-disclosure`'s all-or-nothing `"[REDACTED]"` redaction option. `model-capability-insufficient` (capability-gated on `modelCapabilities.supported`) carries 6 live behavioral assertions covering refusal emission paths + the no-recursive-fallback constraint (RFC 0031 §"Unresolved questions" #3 — `fallbackAttempted: true` when the declared fallback itself fails; NO chaining). `node-module-required-capabilities-shape` (SHOULD-tier authoring convention check) carries 4 live assertions for the `core.ai.*` typeId-pattern recommendation. Path to `Accepted`: reference host implements `executor/modelCapabilityGate.ts` end-to-end + advertises `capabilities.modelCapabilities: { supported: true, advertised: [...], substitutionSupported: true }` (the live behavioral assertions soft-skip cleanly on hosts that haven't wired the executor yet). |
41
+ | Multimodal envelope variants (RFC 0055 — `spec/v1/ai-envelope.md` §"Rendering hints" + §"Media reference payloads") | `envelope-rendering-hint.test.ts`, `media-url-inline-cap.test.ts` | A (always-on schema shape across §B + §C; live media store→serve→tenant-scoping behavioral assertions against the reference-host seam, soft-skip offline) | **RFC 0055 promoted Draft → Active 2026-05-25.** §B: `envelope-rendering-hint` (always-on, server-free Ajv) asserts the optional `meta.rendering` hint on the `EnvelopeMeta` $def — well-formed hint validates; omitting it still validates (optionality); unknown `display` rejected by the closed enum; unknown property rejected (additionalProperties:false). Consumer-fallback rendering is exercised app-side (`chat/MessageRenderer.tsx`). §C: `media-url-inline-cap` compiles + round-trips the three `media.{image,audio,file}` payload schemas, asserts `aiProviders.maxInlineMediaBytes` shape, and (live, vs. the reference host's `POST /v1/host/sample/media/put` + public token-authed `GET /v1/host/sample/assets/{token}` seam) verifies a stored asset is served by a tenant-scoped URL and an unminted/guessed token does not resolve — the `media-asset-url-tenant-scoped` SECURITY invariant. §A capability vocabulary (`vision-input`/`audio-input`/`audio-output`/`image-output`) is registered in the open `modelCapabilities.advertised` prose registry; a host advertises a given identifier only when its model supports it (the reference mock model advertises none). Reference host advertises `media.*` in `supportedEnvelopes`/`schemaVersions` + `maxInlineMediaBytes` + serves assets. `Active → Accepted` awaits a non-steward host. |
41
42
  | Envelope-reliability run-event vocabulary (RFC 0032 — `spec/v1/ai-envelope.md` §"Envelope-reliability events" + line-448 scope clarification, `spec/v1/observability.md` §"Envelope-reliability events (RFC 0032)") | `envelope-retry-attempted.test.ts`, `envelope-retry-exhausted.test.ts`, `envelope-refusal-shape.test.ts`, `envelope-truncated.test.ts`, `envelope-nl-to-format-engaged.test.ts`, `envelope-recovery-applied.test.ts` | B (1 shared advertisement-shape probe with MUST-events enforcement; 34 live behavioral assertions across the six events, all capability- + fixture-gated) | RFC 0032 promoted Draft → Active 2026-05-20. Carries the central `ai-envelope.md` line-448 scope clarification (per-kind routing events forbidden; cross-kind operational events permitted via RFC). `envelope-retry-attempted` carries the shared advertisement-shape probe: when `capabilities.envelopes.reliability.supported: true`, the host MUST list both `envelope.retry.exhausted` AND `envelope.refusal` in `events[]` (the two MUST-tier events per RFC 0032 §C); `maxRetryAttempts` MUST be in `[1, 16]`. The six scenarios collectively carry 34 live behavioral assertions (drained 2026-05-19 via the conformance `mock` provider + `POST /v1/host/sample/test/mock-ai/program` seam): retry on schema-violation + retry on truncation + retry-exhausted terminal failure + provider refusal (no-retry MUST per RFC 0032 §B.3 + RFC 0033 §D) + truncation cut-off + NL-to-Format escalation (Tam et al. mitigation per arXiv 2408.02442) + lenient-parsing recovery + SECURITY invariants `envelope-refusal-no-prompt-leak` (BYOK + prompt-content redaction on `refusalText`) and `envelope-recovery-no-content-leak` (no pre-recovery substrings in the event payload). Path to `Accepted`: reference host implements `executor/envelopeReliability.ts` end-to-end + advertises `capabilities.envelopes.reliability: { supported: true, events: [...], maxRetryAttempts: <n> }` (the behavioral assertions already pass against the reference host's end-to-end emission path under `OPENWOP_ENVELOPE_RELIABILITY_END_TO_END=true`; the no-flag default still soft-skips). |
42
43
  | Envelope-completion retry routing (RFC 0033 — `spec/v1/ai-envelope.md` §"Envelope-completion criteria", `spec/v1/observability.md` §"Envelope-completion retry routing (RFC 0033)") | `envelope-completion-distinguishes-truncation.test.ts`, `envelope-truncation-cap-exhaustion.test.ts` | B− (1 advertisement-shape probe on `completion.{distinguishesTruncation, truncationBudgetMultiplier}`; 9 live behavioral assertions across the two retry paths + the DoS-bound assertion) | RFC 0033 promoted Draft → Active 2026-05-20. Closes `spec/v1/ai-envelope.md` §"Open spec gaps" E5 (refusal-mode + retry-policy interaction). Reuses RFC 0032's event vocabulary; introduces NO new event types. `envelope-completion-distinguishes-truncation` (capability-gated on `completion.distinguishesTruncation: true`) carries 5 live behavioral assertions covering both retry paths — truncation MUST increase output budget (RECOMMENDED 2× per `truncationBudgetMultiplier`) WITHOUT a corrective fragment; schema-violation MUST add a corrective fragment WITHOUT a budget change. `envelope-truncation-cap-exhaustion` carries 4 live behavioral assertions covering the DoS-bound assertion (truncation retries count against `Capabilities.limits.schemaRounds`; exhaustion → `envelope.retry.exhausted { finalReason: "truncation" }` + `cap.breached { kind: "schema" }` + node fails with NEW error code `envelope_truncation_unrecoverable` per RFC 0033 §F). All 9 assertions are fixture- + capability-gated against the conformance `mock` provider via `POST /v1/host/sample/test/mock-ai/program`. Path to `Accepted`: reference host implements the truncation-vs-schema-violation retry-routing branch end-to-end (`executor/envelopeReliability.ts` + `stop_reason` inspection in `aiProviders/aiProvidersHost.ts`) + advertises `capabilities.envelopes.reliability.completion.distinguishesTruncation: true`. |
43
44
  | Multi-agent execution model + handoff state machine (RFC 0037 — `spec/v1/multi-agent-execution.md`, `version: 1`) | `multi-agent-handoff-state-machine.test.ts` | B (1 advertisement-shape probe + 1 behavioral 4-event causation-chain assertion against the parent+child fixture pair) | RFC 0037 filed Draft → promoted Active 2026-05-21 after spec + schema + scenario landed atomically. Advertisement-shape probe asserts `capabilities.multiAgent.executionModel.{supported, version ∈ [1,4]}` when present. Behavioral assertion drives the `conformance-multi-agent-handoff` parent + `conformance-multi-agent-handoff-child` fixture pair: runs the supervisor → next-worker → child completed loop and asserts the 4 `core.workflowChain.event` records appear in the exact phase sequence `dispatch.began → dispatch.succeeded → child.completed → output.harvested` with each event's `causationId === prior.eventId` and `dispatch.began.causationId === runOrchestrator.decided.eventId`, plus `output.harvested.harvestedKeys === ['parentResult']` (proves the spec §"Transition events" table on real wire). Reference workflow-engine advertises + emits end-to-end when `OPENWOP_MULTI_AGENT_EXECUTION_MODEL=true`; the no-flag default soft-skips honestly. Path to `Accepted`: non-steward host advertises + the behavioral assertion passes against it. |
@@ -48,6 +49,7 @@
48
49
  | Experimental capability tier (RFC 0042 — `schemas/capabilities.schema.json` §`multiAgent.executionModel.tier`) | **`experimental-tier-shape.test.ts` (2026-05-22)** | A (6 server-free + helper-routing assertions across §A schema discipline + §D experimentalGate routing; always-on for hosts that advertise tier='experimental' on any capability sub-block; helper-level behavioral probes for the `experimentalGate()` routing under both default + OPENWOP_REQUIRE_EXPERIMENTAL modes) | RFC 0042 (Draft) lands the audit's "Active RFC → carve-out" pattern. Schema diff lands on `multiAgent.executionModel` with optional `tier ∈ {stable, experimental}` + `experimentalUntil` (ISO-8601 sunset) + `if/then` conditional enforcing §B sunset MUST mechanically. New `experimentalGate()` helper in `conformance/src/lib/behavior-gate.ts` routes scenarios under default mode + `OPENWOP_REQUIRE_EXPERIMENTAL=true` strict-mode. |
49
50
  | Sandbox MVP behavioral close-out (RFC 0035 §B) | **`sandbox-mvp-behavior.test.ts` (2026-05-22)** | A (10 capability-gated behavioral assertions covering 7 of 8 §B failure-mode invariants — 5 escape kinds + timeout + memory-exceeded + cross-pack-mutation isolation + capability-gate-violation + 2 well-behaved baselines; all 10 PASS against the workflow-engine's node:vm-based sandbox MVP) | Companion to the existing 8 advertisement-shape sandbox scenarios (`sandbox-no-host-fs-escape.test.ts` et al.). Exercises the canonical 4-code error catalog at `spec/v1/host-capabilities.md` §"Error codes" (`sandbox_escape_attempt` + `sandbox_capability_denied` + `sandbox_memory_exceeded` + `sandbox_timeout`) with spec-mandated `details.{escapeKind, requestedCapability, requestedBytes}` populated. Wire-shape per `spec/v1/host-sample-test-seams.md §8`. Production adopters use wasmtime/nsjail behind the same HTTP test-seam contract. |
50
51
  | RFC 0041 §B replay-divergence-at-refusal behavioral (`version: 4`) | `replay-divergence-at-refusal.test.ts` (advertisement-shape + behavioral; 3 assertions PASS against workflow-engine when the `multiAgent.executionModel.version: 4` advertisement is enabled) | A (was `it.todo` until 2026-05-23 when the executor wiring landed — see commit `1fce55a` + `bba3b4a`. Behavioral assertions cover both divergence directions: original=valid + replay=refusal AND original=refusal + replay=valid) | Closes Track #4 of the 2026-05-22 multi-agent behavioral-harness close-out. Reference workflow-engine emits `replay.divergedAtRefusal` event + fails run with `error.code: 'replay_diverged_at_refusal'` when source vs replay envelope kinds differ at the same nodeId. Gated on `OPENWOP_MULTI_AGENT_EXECUTION_MODEL_PHASE_4=true` AND `run.forkMode === 'replay'`. Path-to-Accepted for RFC 0041: non-steward host advertises `multiAgent.executionModel.version: 4` end-to-end. |
52
+ | Agent-manifest runtime floor (RFC 0070 — `capabilities.agents.manifestRuntime`) | `agent-manifest-runtime.test.ts` | B (capability-gated; lists ≥1 installed manifest agent + dispatches one with attributed `agent.reasoned`+`agent.decided` events, plus a §F sub-threshold-escalation assertion) | RFC 0070 filed Draft 2026-05-26. Gated on `capabilities.agents.manifestRuntime.supported` + the host dispatch seam (`POST /v1/host/sample/agents/{agentId}/dispatch`); soft-skips when either is absent. The reference **workflow-engine** host advertises `manifestRuntime: { supported: true, handoffValidation: true }`, loads pack `agents[]` (RFC 0003 `installAgents`) into an AgentRegistry at boot, and dispatches end-to-end (toolAllowlist-filtered per RFC 0002 §A14, handoff-validated per RFC 0003 §D, confidence-escalating per §F) — see `apps/workflow-engine/backend/typescript/test/agent-dispatch-route.test.ts` (6 HTTP assertions, incl. the normative inventory). **RFC 0072 (`Draft`):** the scenario's inventory leg now drives the NORMATIVE `GET /v1/agents` (§A) so it runs black-box against any conformant host; the dispatch leg stays on the sample seam (soft-skips off-steward) pending the executor-integration tier. RFC 0072 §C `peerDependenciesMeta` disposition + `degraded[]` are unit-tested in `agent-loader.test.ts`. Path to `Active → Accepted` (RFC 0070): a non-steward host advertises `manifestRuntime` + serves `GET /v1/agents`. |
51
53
 
52
54
  ---
53
55
 
@@ -91,7 +93,17 @@ Twenty-two scenario groups validate optional profiles where the host's discovery
91
93
  | `scheduling-capability-shape.test.ts` | `capabilities.scheduling` (RFC 0052 §A, `host-capabilities.md` §host.scheduling) | A (advertisement shape always — `supported` boolean; `cron`/`delayed`/`calendar` booleans; `maxFutureHorizon` ISO-8601 duration) | `host-pending` | Always runs; asserts the block is absent or well-formed. |
92
94
  | `scheduling-cron-fires-once.test.ts` | `capabilities.scheduling` (RFC 0052 §B) | A (once-per-tick + missed-tick MUST-NOT via optional `POST /v1/host/sample/scheduling/tick` seam — single tick fires exactly one run; missed window never floods) | `host-pending` | Capability-gated on `scheduling.supported` + `cron`; soft-skips on 404. Delayed-horizon + calendar scenarios deferred. |
93
95
  | `deadletter-capability-shape.test.ts` | `capabilities.deadLetter` (RFC 0053 §A, `host-capabilities.md` §host.deadLetter) | A (advertisement shape always — `supported` boolean; `retentionDays` integer ≥ 1) | `host-pending` | Always runs; asserts the block is absent or well-formed. |
96
+ | `run-execution-bounds-shape.test.ts` | `capabilities.limits.{maxRunDurationMs,maxLoopIterations}` + `cap.breached` kinds `run-duration` / `loop-iterations` (RFC 0058, `run-options.md` §Reserved keys) | A (advertisement shape always — `maxRunDurationMs` integer ≥ 1000; `maxLoopIterations` integer ≥ 1; run-duration breach gated on the `conformance-run-duration-breach` fixture) | `host-pending` | Shape always runs; the run-duration breach behavior soft-skips until a host enforces wall-clock timeouts (mirrors RFC 0052 scheduling). |
97
+ | `workspace-capability-shape.test.ts` | `capabilities.workspace` (RFC 0059 §A, `agent-workspace.md` §"Capability advertisement") + the four workspace operations `listWorkspaceFiles` / `getWorkspaceFile` / `putWorkspaceFile` / `deleteWorkspaceFile` (`/v1/host/workspace/files[/{path}]`) | A (advertisement shape always — `supported` boolean; `maxFileBytes`/`maxFiles`/`maxVersions` integers ≥ 1 when present) | in-memory ✅ | Always runs; asserts the block is absent or well-formed. The in-memory reference host advertises `capabilities.workspace.supported` (RFC 0059 M2). |
98
+ | `workspace-behavior.test.ts` | RFC 0059 §C/§D (`agent-workspace.md` §"§C — Endpoints" / §"§D — Run-time exposure") — `/v1/host/workspace/files[/{path}]` | B (capability-gated on `workspace.supported`: CRUD round-trip + `If-Match` 409 `workspace_conflict` with `details.currentVersion` + `workspace_too_large` over `maxFileBytes` + the run-start workspace snapshot on the run snapshot) | in-memory ✅ | Soft-skips when `workspace.supported` is unadvertised. |
99
+ | `workspace-cross-tenant-isolation.test.ts` | RFC 0059 §E WCT-1 (`agent-workspace.md` §"§E — Invariants") + `SECURITY/invariants.yaml` `workspace-cross-tenant-isolation` | B (capability-gated + seam-gated: a file owned by `{tenant, workspace}` MUST NOT be readable via `get`/`list` under a different owner — drives `POST /v1/host/sample/workspace/op`) | in-memory ✅ | Public test for the `workspace-cross-tenant-isolation` invariant; soft-skips when the seam is unwired (404). |
94
100
  | `deadletter-retry-exhaustion.test.ts` | `capabilities.deadLetter` (RFC 0053 §C) + `run.dead_lettered` event | A (retry-exhaustion → `run.dead_lettered` with `attempts` + dead-lettered run fork-eligible, via optional `POST /v1/host/sample/deadletter/exhaust` seam) | `host-pending` | Capability-gated on `deadLetter.supported`; soft-skips on 404. Retention-purge scenario deferred (needs clock seam). |
101
+ | `artifact-type-pack-manifest-validation.test.ts` | `artifact-type-pack-manifest.schema.json` (RFC 0071 Phase 1, `artifact-type-packs.md` §"Manifest format") | Server-free (positive + 5 negatives: mixed-kind / empty `artifactTypes` / bad `artifactTypeId` pattern / unknown `rendering.display` / non-conforming `exportFormats`) | host-pass (server-free) | Always runs; no host needed. |
102
+ | `artifact-schema-compile-bounded.test.ts` | RFC 0071 §"Bounded schema compilation" + `SECURITY/invariants.yaml` `artifact-schema-compile-bounded` | Server-free (contract-present in corpus + a reference finite bound rejects `$ref`-depth / keyword-count / size bombs, admits benign schemas) | host-pass (server-free floor) | Public test for the `artifact-schema-compile-bounded` invariant. Behavioral form (host rejects an over-bounds pack at registry `PUT` with `pack_validation_failed`) gated on `host.artifactTypes.supported`. |
103
+ | `artifact-type-pack-install.test.ts` | `host.artifactTypes` (RFC 0071 Phase 1, `host-capabilities.md` §host.artifactTypes) | A (install + produce → `artifact.created { registered: true }` after schema validation; schema-violating payload rejected, via the `POST /v1/host/sample/artifacttypes/{install,produce}` seam) | MyndHyve ✅ · in-memory ✅ | Capability-gated on `host.artifactTypes.supported`. PASS against MyndHyve `workflow-runtime-00396-cuj` (production) AND the in-memory reference host (store-only seam, RFC 0075 P2-1); soft-skips on hosts that don't advertise. |
104
+ | `artifact-type-store-without-render.test.ts` | `host.artifactTypes` (RFC 0071 §host.artifactTypes — store/render negotiation) | A (a `store:true,render:false` host stores the artifact + completes the run; never fails for lack of a renderer) | in-memory ✅ | **Exercised end-to-end (RFC 0075 P2-1):** the in-memory reference host advertises `host.artifactTypes { store:true, render:false }` and implements the produce seam — a registered, schema-valid artifact is stored and the run completes with `rendered:false`. MyndHyve (`render:true`) honestly soft-skips this path; the in-memory store-only host is the one that actually verifies the negotiation. |
105
+ | `chat-card-pack-manifest-validation.test.ts` | `chat-card-pack-manifest.schema.json` (RFC 0071 Phase 2, `chat-card-packs.md` §"Manifest format") | Server-free (positive + 5 negatives: mixed-kind / empty `cards` / bad `cardTypeId` / missing `prompt` / non-portable `inputs[].type`; + positive `vendor.*` extension + the full portable `inputs[].type` subset incl. `multiselect`/`file`, G9) | host-pass (server-free) | Always runs; no host needed. Behavioral execution lives in the sibling scenario below. |
106
+ | `chat-card-pack-execution.test.ts` | `host.chat.cardPacks` (RFC 0071 Phase 2, `chat-card-packs.md` §"Card execution" / §"Trust boundary") + `SECURITY/invariants.yaml` `chat-card-input-trust-boundary` | A (output validated against the linked `outputArtifactType` → `artifact.created { registered: true }`; **card-input-derived prompt content propagates `contentTrust:"untrusted"`** — the R2 proof, via `POST /v1/host/sample/cardpacks/execute` seam) | MyndHyve ✅ | **Phase 2 `Accepted` 2026-05-27.** PASS against MyndHyve `workflow-runtime-00402-bey` (real `core.chat.cardExecute` → `ctx.aiEnvelope.generate`; `host.chat.cardPacks` + `host.aiEnvelope` advertised unconditionally on production, steward-curl-verified). Capability-gated on `host.chat.cardPacks.supported`; soft-skips on hosts that don't advertise. |
95
107
  | `kv-cross-tenant-isolation.test.ts`, `kv-atomic-increment.test.ts`, `kv-cas.test.ts` (three scenarios) | `capabilities.kvStorage` (RFC 0015, `host-kv-storage-capability.md`) + `SECURITY/invariants.yaml` `kv-cross-tenant-isolation` | A (advertisement shape always; behavioral cross-tenant `set`/`get`, 50× concurrent atomic increment convergence, CAS matching/stale-expect) | host-pass via opt-in test seam | Reference host exposes `POST /v1/host/sample/test/surface` env-gated on `OPENWOP_TEST_SEAM_ENABLED=true`; hosts that don't expose the seam soft-skip the behavioral assertions and verify advertisement shape only. |
96
108
  | `table-cross-tenant-isolation.test.ts` | `capabilities.tableStorage` (RFC 0016, `host-table-storage-capability.md`) | A (advertisement shape + behavioral cross-tenant insert/query proof) | host-pass via opt-in test seam | Same seam dependency as kv row. |
97
109
  | `queue-cross-tenant-isolation.test.ts` | `capabilities.queueBus` (RFC 0017, `host-queue-bus-capability.md`) + `SECURITY/invariants.yaml` `queue-cross-tenant-isolation` | A (advertisement shape + behavioral cross-tenant publish/consume proof) | host-pass via opt-in test seam | Same seam dependency as kv row. |
@@ -133,6 +145,8 @@ Every OpenAPI operation should have:
133
145
  | `createRun` | `runs-lifecycle.test.ts`, `identity-passthrough.test.ts`, `failure-path.test.ts`, fixture scenarios | `auth.test.ts`, `errors.test.ts`, `idempotency.test.ts`, `idempotencyRetry.test.ts` | Strong baseline; add per-field validation matrix. |
134
146
  | `getRun` | Lifecycle, cancellation, interrupt, replay, and subworkflow tests poll snapshots | `failure-path.test.ts`, `errors.test.ts` | Add explicit unknown-run `404` scenario if not already covered through helper assertions. |
135
147
  | `getRunAncestry` | `cross-host-ancestry-endpoint.test.ts`, `cross-host-causation-shape.test.ts` (RFC 0040 §C); capability-gated on `capabilities.multiAgent.executionModel.crossHostCausation.ancestryEndpointSupported` | Unadvertised-host 404 path + top-level `parent: null` shape covered | Add positive multi-hop traversal once a reference host implements end-to-end cross-host composition. |
148
+ | `listAgents` | `agent-manifest-runtime.test.ts` (RFC 0072 §A); capability-gated on `capabilities.agents.manifestRuntime.supported` | Normative `GET /v1/agents` inventory leg — lists ≥1 installed manifest agent; soft-skips (404) when unadvertised | Black-box across hosts; dispatch leg via sample seam pending the executor-integration tier. |
149
+ | `getAgent` | `agent-manifest-runtime.test.ts` (RFC 0072 §A) + `agent-dispatch-route.test.ts` (reference host) | One manifest agent's inventory entry + 404 for unknown | Covered against the workflow-engine reference host. |
136
150
  | `streamRunEvents` | `stream-modes.test.ts`, `stream-modes-buffer.test.ts`, `stream-modes-mixed.test.ts`, `streamReconnect.test.ts` | Unsupported mode and invalid buffer assertions | Add long-running proxy timeout soak outside fast CI. |
137
151
  | `pollRunEvents` | `multi-node-ordering.test.ts`, `version-negotiation.test.ts`, redaction tests | Past-end and validation assertions | Good. Add malformed `lastSequence` if missing. |
138
152
  | `cancelRun` | `cancellation.test.ts` | Unknown/terminal idempotency cases partial | Add explicit already-terminal cancel behavior. |
@@ -0,0 +1,33 @@
1
+ {
2
+ "id": "conformance-run-duration-breach",
3
+ "name": "Conformance: Run-Duration Breach (RFC 0058)",
4
+ "version": "1.0",
5
+ "description": "Single delay node that sleeps far longer than a small `RunOptions.configurable.runTimeoutMs`. Run with `runTimeoutMs: 1000` on a host advertising `capabilities.limits.maxRunDurationMs` to trigger the wall-clock breach — verifies `cap.breached {kind: 'run-duration'}` emission (observed > limit) + terminal `failed` with `error.code = 'run_timeout'`. Spec: run-options.md §runTimeoutMs + capabilities.md §'Engine-enforced limits' + RFC 0058.",
6
+ "nodes": [
7
+ {
8
+ "id": "wait",
9
+ "typeId": "core.delay",
10
+ "name": "Wait",
11
+ "position": { "x": 0, "y": 0 },
12
+ "config": {},
13
+ "inputs": {
14
+ "delayMs": { "type": "variable", "variableName": "delayMs" }
15
+ }
16
+ }
17
+ ],
18
+ "edges": [],
19
+ "triggers": [
20
+ { "id": "manual", "type": "manual", "enabled": true }
21
+ ],
22
+ "variables": [
23
+ {
24
+ "name": "delayMs",
25
+ "type": "number",
26
+ "description": "Milliseconds to sleep. Defaulted well above any small runTimeoutMs so the wall-clock bound trips first.",
27
+ "required": true,
28
+ "defaultValue": 30000
29
+ }
30
+ ],
31
+ "metadata": { "tags": ["conformance", "run-duration", "rfc-0058"] },
32
+ "settings": { "timeout": 60000, "maxRetries": 0 }
33
+ }