@openwop/openwop-conformance 1.5.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/CHANGELOG.md +27 -0
  2. package/README.md +2 -2
  3. package/api/asyncapi.yaml +25 -4
  4. package/api/openapi.yaml +371 -0
  5. package/coverage.md +31 -4
  6. package/fixtures/conformance-phase4-nondet-tool.json +53 -0
  7. package/fixtures/conformance-phase4-replay-divergence.json +40 -0
  8. package/fixtures.md +5 -3
  9. package/package.json +1 -1
  10. package/schemas/README.md +4 -0
  11. package/schemas/annotation-create.schema.json +37 -0
  12. package/schemas/annotation.schema.json +56 -0
  13. package/schemas/capabilities.schema.json +191 -3
  14. package/schemas/credential-reference.schema.json +21 -0
  15. package/schemas/node-pack-manifest.schema.json +112 -1
  16. package/schemas/run-diff-response.schema.json +64 -0
  17. package/schemas/run-event-payloads.schema.json +104 -2
  18. package/schemas/run-event.schema.json +8 -1
  19. package/schemas/run-snapshot.schema.json +11 -0
  20. package/src/lib/behavior-gate.ts +51 -0
  21. package/src/lib/driver.ts +13 -1
  22. package/src/lib/feedback.ts +31 -0
  23. package/src/lib/saml-idp.ts +179 -0
  24. package/src/scenarios/approval-gate-events.test.ts +61 -0
  25. package/src/scenarios/approval-gate-flow.test.ts +68 -0
  26. package/src/scenarios/auth-saml-profile.test.ts +119 -0
  27. package/src/scenarios/auth-scim-profile.test.ts +65 -0
  28. package/src/scenarios/authorization-fail-closed.test.ts +80 -0
  29. package/src/scenarios/authorization-roles-shape.test.ts +83 -0
  30. package/src/scenarios/connector-manifest-validity.test.ts +142 -0
  31. package/src/scenarios/credential-payload-redaction.test.ts +93 -0
  32. package/src/scenarios/credentials-capability-shape.test.ts +90 -0
  33. package/src/scenarios/cross-engine-append-behavior.test.ts +204 -0
  34. package/src/scenarios/cross-host-traceparent-propagation.test.ts +13 -6
  35. package/src/scenarios/cross-workspace-isolation.test.ts +72 -0
  36. package/src/scenarios/deadletter-capability-shape.test.ts +59 -0
  37. package/src/scenarios/deadletter-retry-exhaustion.test.ts +62 -0
  38. package/src/scenarios/experimental-tier-shape.test.ts +192 -0
  39. package/src/scenarios/feedback-capability-shape.test.ts +35 -0
  40. package/src/scenarios/feedback-correction-redaction.test.ts +35 -0
  41. package/src/scenarios/feedback-cross-tenant-isolation.test.ts +37 -0
  42. package/src/scenarios/feedback-fork-not-copied.test.ts +40 -0
  43. package/src/scenarios/feedback-on-terminal-run.test.ts +32 -0
  44. package/src/scenarios/feedback-record-and-list.test.ts +32 -0
  45. package/src/scenarios/feedback-unsupported-501.test.ts +32 -0
  46. package/src/scenarios/identity-owner-shape.test.ts +64 -0
  47. package/src/scenarios/multi-agent-confidence-escalation.test.ts +13 -12
  48. package/src/scenarios/multi-agent-memory-lifecycle.test.ts +87 -12
  49. package/src/scenarios/multi-region-idempotency-behavior.test.ts +203 -0
  50. package/src/scenarios/oauth-capability-shape.test.ts +97 -0
  51. package/src/scenarios/oauth-connector-redaction.test.ts +91 -0
  52. package/src/scenarios/pack-registry-isolation.test.ts +108 -0
  53. package/src/scenarios/pack-registry-publish.test.ts +1 -1
  54. package/src/scenarios/prompt-mutation-workspace-membership-enforced.test.ts +126 -0
  55. package/src/scenarios/prompt-read-workspace-membership-enforced.test.ts +183 -0
  56. package/src/scenarios/redaction.test.ts +4 -1
  57. package/src/scenarios/replay-divergence-at-refusal.test.ts +187 -7
  58. package/src/scenarios/replay-observable-sequence-determinism.test.ts +20 -6
  59. package/src/scenarios/run-diff.test.ts +143 -0
  60. package/src/scenarios/sandbox-capability-gate-respected.test.ts +7 -1
  61. package/src/scenarios/sandbox-memory-cap.test.ts +7 -5
  62. package/src/scenarios/sandbox-mvp-behavior.test.ts +280 -0
  63. package/src/scenarios/sandbox-no-cross-pack-mutation.test.ts +7 -1
  64. package/src/scenarios/sandbox-no-host-env-leak.test.ts +5 -1
  65. package/src/scenarios/sandbox-no-host-fs-escape.test.ts +9 -1
  66. package/src/scenarios/sandbox-no-host-process-escape.test.ts +5 -1
  67. package/src/scenarios/sandbox-no-network-escape.test.ts +5 -1
  68. package/src/scenarios/sandbox-timeout-cap.test.ts +7 -5
  69. package/src/scenarios/scheduling-capability-shape.test.ts +81 -0
  70. package/src/scenarios/scheduling-cron-fires-once.test.ts +66 -0
  71. package/src/scenarios/secret-leakage-otel-attribute.test.ts +241 -0
  72. package/src/scenarios/spec-corpus-validity.test.ts +6 -3
package/CHANGELOG.md CHANGED
@@ -1,5 +1,32 @@
1
1
  # `@openwop/openwop-conformance` Changelog
2
2
 
3
+ ## [1.6.1] — 2026-05-25
4
+
5
+ Patch — fixes a stale allowlist in `redaction.test.ts` that contradicted the same release's `capabilities.schema.json`. Reported by MyndHyve against the 1.6.0 cohort run.
6
+
7
+ ### Fixed
8
+
9
+ - **`redaction.test.ts:103`** — the `secrets.scopes` member check hardcoded `['tenant', 'user', 'run']`, omitting `'workspace'`. The canonical `secrets.scopes` enum in `capabilities.schema.json` is `["tenant", "user", "run", "workspace"]` (`workspace` is the RFC 0046/0048 sub-tenant scope, additive). A host honestly advertising `secrets.scopes: ['workspace', …]` (e.g. MyndHyve `workflow-runtime`) wrongly failed the scenario. The allowlist now tracks the schema enum. No wire-shape change; the schema and RFC 0046 §A were already canonical — only the test was stale.
10
+
11
+ ## [1.6.0] — 2026-05-25
12
+
13
+ Minor bump per `PUBLISHING.md` §"Versioning alignment" — ships the conformance scenarios for the **MyndHyve protocol-extension cohort (RFCs 0045–0054)** so adopting hosts can pin the released suite, run it against their deployment, and report pass for `Draft → Active → Accepted` graduation (per `RFCS/0001-rfc-process.md` §"Promotion to Accepted"). All additive — every new scenario is capability-gated and soft-skips against a host that doesn't advertise the surface, so existing v1.0-only hosts pass unchanged.
14
+
15
+ ### Added — RFC 0045–0054 cohort scenarios
16
+
17
+ - **RFC 0045** (connector pack manifest) — `connector-manifest-validity.test.ts` (server-free: §A schema validity of the `connector` block + both ConnectorAuth variants; §B action/trigger typeId-resolution).
18
+ - **RFC 0046** (`host.credentials`) — `credentials-capability-shape.test.ts` (always) + `credential-payload-redaction.test.ts` (gated; SECURITY invariant `credential-payload-redaction` via the `credentials/echo` seam).
19
+ - **RFC 0047** (`host.oauth`) — `oauth-capability-shape.test.ts` + `oauth-connector-redaction.test.ts` (gated; token redaction via the `oauth/connector-echo` seam).
20
+ - **RFC 0048** (identity triple) — `identity-owner-shape.test.ts` (server-free) + `cross-workspace-isolation.test.ts` (gated; fail-closed `run_forbidden` via the `identity/*` seams).
21
+ - **RFC 0049** (RBAC) — `authorization-roles-shape.test.ts` (always) + `authorization-fail-closed.test.ts` (gated; SECURITY invariant `authorization-fail-closed` via the `authorization/decide` seam).
22
+ - **RFC 0050** (SAML / SCIM) — `auth-saml-profile.test.ts` + `auth-scim-profile.test.ts` (advertisement shape always; behavior opt-in via `OPENWOP_TEST_SAML_IDP_URL` / `OPENWOP_TEST_SCIM_URL` + the `auth/saml/validate` + `auth/scim/provision` seams). Now ships a **bundled synthetic SAML IdP harness** (`conformance/src/lib/saml-idp.ts`, `node:crypto` RSA-SHA256, no deps) that mints the 1-positive + 6-negative assertion variants and whose `verify()` implements the RFC 0050 §A MUST list — `auth-saml-profile.test.ts` runs the full negative reference suite **server-free**; a host's real ACS validates the same assertions over the seam.
23
+ - **RFC 0051** (approval gate) — `approval-gate-events.test.ts` (server-free) + `approval-gate-flow.test.ts` (gated; unauthorized-denied + override-audited via the `governance/approval-gate` seam).
24
+ - **RFC 0052** (scheduling) — `scheduling-capability-shape.test.ts` (always) + `scheduling-cron-fires-once.test.ts` (gated; once-per-tick + missed-tick via the `scheduling/tick` seam).
25
+ - **RFC 0053** (dead-letter) — `deadletter-capability-shape.test.ts` (always) + `deadletter-retry-exhaustion.test.ts` (gated; `run.dead_lettered` + fork-eligibility via the `deadletter/exhaust` seam).
26
+ - **RFC 0054** (run diff) — `run-diff-*.test.ts` (landed with the run-diff endpoint).
27
+
28
+ Two new SECURITY invariants gated in this cohort: `credential-payload-redaction` (0046, also covers 0047 tokens) + `authorization-fail-closed` (0049). New `/v1/host/sample/*` seams are catalogued in `spec/v1/host-sample-test-seams.md` §"Open seams". Suite scenario-file count → 230.
29
+
3
30
  ## [1.5.0] — 2026-05-22
4
31
 
5
32
  Minor bump per `PUBLISHING.md` §"Versioning alignment" — unblocks MyndHyve's RFC 0044 + RFC 0039 Half A co-graduation by shipping the relaxed + RFC-0044-routing assertion logic in `multi-agent-confidence-escalation.test.ts`. No new scenario files; no new fixtures. Behavioral honesty pass on 8 sandbox scenarios + schema additions for the new RFC 0044 capability advertisement.
package/README.md CHANGED
@@ -93,7 +93,7 @@ Exit code is non-zero on any failed assertion.
93
93
 
94
94
  ## What's Covered
95
95
 
96
- The current suite has 205 scenario files under `src/scenarios/`. 2026-05-22 (RFC 0041 Phase 4 — replay determinism under nondeterministic models) added three scenarios: `replay-divergence-at-refusal.test.ts` (advertisement-shape probe on `replayDeterminism.refusalDivergenceEmission` + 2 `it.todo` for the dual-direction refusal-divergence case), `replay-observable-sequence-determinism.test.ts` (capability-gated; behavioral assertion soft-skipped until a `conformance-phase4-nondet-tool` fixture ships), `replay-llm-cache-key-portable.test.ts` (intra-host reproducibility + non-recipe-field invariance + Phase 4 advertisement alignment — reuses the existing `POST /v1/host/sample/test/llm-cache-key` seam from the sibling `replay-llm-cache-key.test.ts`). 2026-05-20 (RFC 0027 §A templateKinds-coverage follow-up — paired with `prompt-end-to-end-events.test.ts`) added `prompt-all-four-kinds-events.test.ts` exercising all four `PromptKind` values (`system`, `user`, `schema-hint`, `few-shot`) end-to-end through the reference workflow-engine sample's `local.sample.demo.mock-ai` dispatch path; capability-gated via `behaviorGate('prompts-supported', ...)`. Closes the credibility gap where the host advertised `templateKinds: ["system", "user", "few-shot", "schema-hint"]` but only the system+user pair was actually wired into dispatch. 2026-05-20 (RFCs 0030–0033 — envelope LLM-contract-hardening track) added 15 scenarios across four `Active` RFCs: `envelope-reasoning-shape.test.ts` (RFC 0030, always-on; asserts the OPTIONAL `reasoning` property on the three universal-kind schemas + the `schema.response` deliberate omission), `envelope-reasoning-secret-redaction.test.ts` (RFC 0030, capability-gated on `capabilities.envelopes.reasoning.supported` + `secrets.supported`; 5 `it.todo()` placeholders for SECURITY invariant `envelope-reasoning-secret-redaction`), `envelope-tier-one-subset-static.test.ts` (RFC 0030, always-on for load-bearing rules — no `oneOf` / `allOf` / `not` / `prefixItems` / `propertyNames` anywhere; gated on `tierOneSubsetCompliance: "strict"` for OpenAI-strict-only constraints), `envelope-variant-discriminator-static.test.ts` (RFC 0031, always-on; asserts no `oneOf` + every `anyOf` branch declares a single-string-enum discriminator in `required` on every `schemas/envelopes/*.schema.json`), `model-capability-substituted.test.ts` (RFC 0031, advertisement-shape probe on `capabilities.modelCapabilities.advertised[]` identifier pattern + 5 `it.todo()` placeholders for SECURITY invariant `model-capability-substituted-no-credential-disclosure`), `model-capability-insufficient.test.ts` (RFC 0031, 6 `it.todo()` placeholders for refusal + no-recursive-fallback), `node-module-required-capabilities-shape.test.ts` (RFC 0031 SHOULD-tier authoring-convention; 4 `it.todo()` placeholders), and the six envelope-reliability events from RFC 0032 (`envelope-retry-attempted` carrying the shared advertisement-shape probe enforcing both MUST-tier events in `events[]` per RFC 0032 §C, plus `envelope-retry-exhausted`, `envelope-refusal-shape`, `envelope-truncated`, `envelope-nl-to-format-engaged`, `envelope-recovery-applied` — collectively 39 `it.todo()` placeholders covering retry/refusal/truncation/recovery + SECURITY invariants `envelope-refusal-no-prompt-leak` and `envelope-recovery-no-content-leak`), plus RFC 0033's two scenarios (`envelope-completion-distinguishes-truncation.test.ts` + `envelope-truncation-cap-exhaustion.test.ts` — 12 `it.todo()` placeholders covering the truncation-vs-schema-violation retry-routing distinction + the DoS-bound assertion). Reference workflow-engine sample advertises `capabilities.envelopes.reasoning: { supported: true, promptDirective: "off" }` + `tierOneSubsetCompliance: "warn"` honestly (schemas accept the field; host doesn't yet inject the directive); the other three RFCs' capability blocks defer to reference-host emission code per the staged RFC 0027 §G precedent. 2026-05-20 (RFC 0028 §B Phase B — prompt-pack boot-time install) added `prompt-pack-install.test.ts` (capability-gated on `capabilities.prompts.endpointsSupported: true`; asserts a host that ran the boot-time pack loader surfaces ≥ 1 pack-source template under `GET /v1/prompts?source=pack` carrying the canonical `meta.source: "pack"` + `meta.packName` + `meta.packVersion` stamps; positively identifies the in-tree `vendor.openwop.prompt-sample` reference pack's `writer-system` template when present). Pairs with the new `host/promptPackLoader.ts` boot-time entry on the reference workflow-engine sample, which scans `examples/packs/*` plus `OPENWOP_PROMPT_PACKS_DIR` and calls `installPackTemplates()` for each `kind: "prompt"` pack found. 2026-05-20 (RFC 0029 Phase C — prompt resolution chain wire shape) added three more scenarios: `prompt-resolution-chain-node-wins.test.ts` (capability-gated on `capabilities.prompts.supported: true`; asserts layer-1 node-config supersedes lower layers per `spec/v1/prompts.md` §"Resolution chain (normative)"), `prompt-resolution-chain-agent-intrinsic.test.ts` (additionally gated on `capabilities.prompts.agentBindings: true`; asserts agent intrinsic `systemPromptRef` wins over `promptOverrides` AND lower layers when the node has no layer-1 ref), `prompt-resolution-chain-fallback-cascade.test.ts` (asserts layer 3 workflow-defaults wins over layer 4 host-defaults; layer 4 host-defaults wins when 1-3 yield null; resolved is null when all four yield null but chain[] still lists every attempted layer). The scenarios drive the host's `POST /v1/host/sample/prompt/resolve` test seam (reference-host implementation deferred to follow-up slice per RFC 0021 staging precedent). 2026-05-20 (RFC 0027 Phase A — prompt templates wire shape) added three scenarios: `prompt-template-shape.test.ts` (always-on; Ajv compileability + positive/negative round-trip for PromptTemplate + PromptRef + PromptKind), `prompt-composed-secret-redaction.test.ts` (capability-gated on `capabilities.prompts.supported: true` + `observability: "full"`; asserts `[REDACTED:<secretId>]` markers in `prompt.composed` payloads for `source: "secret"` variable bindings per SECURITY/threat-model-secret-leakage.md §SR-1), `prompt-composed-trust-marker.test.ts` (same capability gates; asserts `<UNTRUSTED>...</UNTRUSTED>` wrapping + `contentTrust: "untrusted"` propagation per RFC 0020 §D). Paired with new `fixtures/prompt-templates/` sub-directory + per-fixture schema-validity describe block + future SECURITY invariants `prompt-composed-secret-redaction` and `prompt-composed-trust-marker` (lands alongside reference-host emission per RFC 0021 staging precedent). 2026-05-18 (RFC 0022 `Draft` — runtime variable mapping) added four `it.todo()` placeholder scenarios covering the new mapping surfaces on `core.dispatch` (§A — `dispatch-input-mapping.test.ts`, `dispatch-output-mapping.test.ts`, `dispatch-cross-worker-handoff.test.ts`) and `core.subWorkflow` (§B — `subworkflow-input-mapping.test.ts`). Gated on `capabilities.agents.dispatchMapping` (dispatch trio) and `capabilities.subWorkflow.inputMapping` (subWorkflow). Promote to live assertions when RFC 0022 reaches `Active` + a reference host advertises the matching flags. 2026-05-17 (RFC 0003 §D handoff-schema enforcement, HV-1) added `agentPackHandoffSchemaValidation.test.ts` — verifies the host validates dispatch payloads against `handoff.taskSchemaRef` AND return payloads against `handoff.returnSchemaRef` per RFC 0003 §D. Paired with the new `agent-pack-handoff-schema-enforcement` row in `SECURITY/invariants.yaml`. 2026-05-17 (AI Envelope gap-closure, DRAFT v1.x — `spec/v1/ai-envelope.md`) added 7 advertisement-shape scenarios with `it.todo()` behavioral placeholders gated on `capabilities.envelopeContracts.advertised: true`: `aiEnvelope.universalKinds.test.ts`, `aiEnvelope.schemaDrift.test.ts`, `aiEnvelope.correlationReplay.test.ts`, `aiEnvelope.contractRefusal.test.ts`, `aiEnvelope.trustBoundaryPropagation.test.ts`, `aiEnvelope.redaction.test.ts`, `aiEnvelope.capBreached.test.ts`. Paired with the new `envelope-redaction-sr-1-carry-forward` row in `SECURITY/invariants.yaml`. 2026-05-17 (post-publish hardening, deep audit of `core.openwop.agents`) added `agents-run-tool-allowlist.test.ts` — server-free scenario locking in the `core.openwop.agents@1.0.1` safety-fix that closes `OPENWOP-AUDIT-2026-003` (function-typed `tool.handler` properties rejected at `validateTools()` with `INVALID_TOOL_DECLARATION`; tool-driven runs require `ctx.agentRuntime`; tool-less safe fallback preserved). Paired with the new `agents-run-no-raw-handler` row in `SECURITY/invariants.yaml`. Same-day post-publish hardening added `idempotency-key-determinism.test.ts` — server-free scenario locking in the `core.openwop.http@1.1.2` determinism safety-fix (default `composite` mode produces deterministic keys in `(runId, nodeId, payload)`; removed `uuid` mode rejects with `CONFIG_INVALID`; cross-impl vector test lets third-party reimplementations verify wire agreement). Paired with the new `idempotency-key-deterministic` row in `SECURITY/invariants.yaml`. 2026-05-17 (Phase 3 of RFC 0013) added three server-free scenarios exercising the reference workflow-chain expansion library (`conformance/src/lib/workflow-chain-expansion.ts`): `workflow-chain-expansion.test.ts` (parameter substitution + node id collision avoidance + edge rewriting + capability propagation + runtime-invariance contract), `workflow-chain-unresolvable-typeid.test.ts` (rejection with `chain_unresolvable_typeid` when a chain references an unknown typeId), and `workflow-chain-pack-signature-verification.test.ts` (Ed25519 verification recipe reuse from `node-packs.md §Signing`). Earlier that day (Phase 1) added `workflow-chain-pack-manifest-validation.test.ts` — server-free schema-validation scenario covering the new `workflow-chain-pack-manifest.schema.json` (positive sample + two negatives: kind/contents mismatch and invalid `chainId`). Closes RFC 0013 (`Workflow-chain packs`, `Draft`) Phases 1 + 3 alongside the new `spec/v1/workflow-chain-packs.md`, the `Capabilities.workflowChainPacks` block, and the registry build-index/conformance-check `kind` routing from Phase 2. Earlier that day, the suite added 27 `it.todo()` placeholder scenarios paired with RFCs 0014-0020 (host capability surfaces — fs, kvStorage, tableStorage, queueBus, sql/vector/search, blob/cache, mcp.serverMount). These promote to live assertions when each RFC reaches `Active` + the matching capability block lands in `schemas/capabilities.schema.json` + a reference host advertises the capability. Earlier additions include 18 Multi-Agent Shift scenarios (Phases 1-5) added 2026-05-10, the `registry-public.test.ts` public-registry healthcheck added 2026-05-11 (opt-in via `OPENWOP_TEST_PUBLIC_REGISTRY=true`), the `replay-llm-cache-key.test.ts` placeholder added 2026-05-11 (three `it.todo()` cases for the cross-host LLM cache-key recipe per `replay.md` §"LLM cache-key recipe"), the two `production-*.test.ts` scenarios added 2026-05-11 for the `openwop-production` profile per RFC 0009 (`production-backpressure.test.ts`, `production-retention-expiry.test.ts`), the four `auth-*.test.ts` scenarios added 2026-05-11/12 for the production-auth profiles per RFC 0010 (`auth-api-key-rotation.test.ts`, `auth-oauth2-client-credentials.test.ts`, `auth-oidc-user-bearer.test.ts`, `auth-mtls.test.ts` (opt-in via `OPENWOP_TEST_MTLS=1`)), `replay-retention-expiry.test.ts` added 2026-05-12 (capability shape + 410/422 envelope per `replay.md` §"Retention and garbage collection"), `bulk-cancel.test.ts` added 2026-05-12 (Phase B close-out of R1 — `POST /v1/runs:bulk-cancel`), the two Phase H launch-blocker advertisement-contract scenarios added 2026-05-12 (`mcp-toolcall-redaction.test.ts` for the MCP-1 invariant per `host-capabilities.md §host.mcp` + `threat-model-prompt-injection.md §UNTRUSTED`, and `http-client-ssrf.test.ts` for the SSRF + body-size cap advertisement contract on `capabilities.httpClient`), the `wasm-pack-abi-version-rejection.test.ts` Track 7 scenario added 2026-05-12 for the ABI-mismatch positive path via the `vendor.openwop.misbehaving-abi` pack per RFC 0008 §H, the `otel-trace-propagation-subworkflow.test.ts` Track 11 close-out added 2026-05-13 (parent + child run spans share the inbound traceparent's traceId across the `core.subWorkflow` dispatch boundary), and the three RFC 0012 (Memory Compaction Profile, `Active`) scenarios added 2026-05-13/14: `memory-compaction-sr1-carry-forward.test.ts` (load-bearing SR-1 §D), `memory-compaction-event-emitted.test.ts` (canonical §B payload shape), and `memory-compaction-provenance-tag.test.ts` (soft assertion on §C `compacted-from:<id>` convention). All three gate on `capabilities.memory.compaction.supported` + the host's test seam at `/v1/test/memory/{seed,compact}` (Postgres reference host enables both via `OPENWOP_MEMORY_COMPACTION=true OPENWOP_TEST_TRIGGER_COMPACTION=true`). 2026-05-15 (gap-closure CF-3) added `interrupt-token-matrix.test.ts` (malformed / unknown / replay / cross-run-id paths on `GET|POST /v1/interrupts/{token}`). The maintained scenario-to-spec map lives in [`coverage.md`](./coverage.md); this README keeps the operator quickstart and the historical scenario notes below.
96
+ The current suite has 238 scenario files under `src/scenarios/`. 2026-05-25 (RFC 0025 §C point 1 — test-catalog isolation invariant; pairs with the 25 publish-error scenarios in `pack-registry-publish.test.ts`) added `pack-registry-isolation.test.ts` — capability-gated on `capabilities.packs.testMode.{supported, isolated}: true`; PUTs a disposable pack into `/v1/packs-test/{name}` and asserts the same `(name, version)` does NOT appear via `GET /v1/packs/{name}` — anchors the test-catalog isolation MUST in RFC 0025 §C. 2026-05-25 (RFC 0028 Tier-2 post-promotion T2 — read-side sister scenario for workspace-membership enforcement) added `prompt-read-workspace-membership-enforced.test.ts` — gates on `capabilities.prompts.supported: true` (broader than `mutableLibrary` so read-only hosts that expose `?workspaceId=` are also probed); drives `GET /v1/prompts?workspaceId=<random-non-member>` and interprets the response: 4xx PASS (canonical envelope check on 403); 200 with empty `templates[]` PASS (correct null result for a nonexistent workspace); 200 with non-empty `templates[]` FAIL (cross-tenant leak); 200 without `templates[]` field SKIP (host doesn't expose workspace-scoped reads). Verifies SECURITY invariant `prompt-read-workspace-membership-enforced`. Same-day T1 strengthened `prompt-mutation-workspace-membership-enforced.test.ts` to pin `error === "workspace_membership_required"` when the host's refusal status is 403 (other refusal codes unconstrained). 2026-05-25 (RFC 0028 Tier-2 follow-up — workspace-membership enforcement on mutating prompt endpoints, filed in response to a self-disclosed adopter vulnerability) added `prompt-mutation-workspace-membership-enforced.test.ts` — capability-gated on `capabilities.prompts.mutableLibrary: true`; drives `POST /v1/prompts` with a cryptographically-random non-member `workspaceId` and asserts the host refuses (NOT a 2xx; any 4xx/5xx is acceptable — silent success is the failure mode). Verifies SECURITY invariant `prompt-mutation-workspace-membership-enforced`. 2026-05-22 (RFC 0034 §B follow-up — secret-leakage harness against the OTel + debug-bundle seams) added `secret-leakage-otel-attribute.test.ts` — gates on `capabilities.secrets.supported` + `capabilities.observability.testSeams.{otelScrape,debugBundleExport}` AND the `OPENWOP_CANARY_SECRET_VALUE` env (host operator + conformance runner agree on the canary). Drives the existing `openwop-smoke-byok-roundtrip` fixture end-to-end; scrapes both seams after run completion; hard-fails if the canary plaintext appears in any OTel span attribute or debug-bundle field. Verifies SECURITY invariants `secret-leakage-otel-attribute` + `secret-leakage-debug-bundle-otel`. 2026-05-22 (RFC 0041 Phase 4 — replay determinism under nondeterministic models) added three scenarios: `replay-divergence-at-refusal.test.ts` (advertisement-shape probe on `replayDeterminism.refusalDivergenceEmission` + 2 `it.todo` for the dual-direction refusal-divergence case), `replay-observable-sequence-determinism.test.ts` (capability-gated; behavioral assertion soft-skipped until a `conformance-phase4-nondet-tool` fixture ships), `replay-llm-cache-key-portable.test.ts` (intra-host reproducibility + non-recipe-field invariance + Phase 4 advertisement alignment — reuses the existing `POST /v1/host/sample/test/llm-cache-key` seam from the sibling `replay-llm-cache-key.test.ts`). 2026-05-20 (RFC 0027 §A templateKinds-coverage follow-up — paired with `prompt-end-to-end-events.test.ts`) added `prompt-all-four-kinds-events.test.ts` exercising all four `PromptKind` values (`system`, `user`, `schema-hint`, `few-shot`) end-to-end through the reference workflow-engine sample's `local.sample.demo.mock-ai` dispatch path; capability-gated via `behaviorGate('prompts-supported', ...)`. Closes the credibility gap where the host advertised `templateKinds: ["system", "user", "few-shot", "schema-hint"]` but only the system+user pair was actually wired into dispatch. 2026-05-20 (RFCs 0030–0033 — envelope LLM-contract-hardening track) added 15 scenarios across four `Active` RFCs: `envelope-reasoning-shape.test.ts` (RFC 0030, always-on; asserts the OPTIONAL `reasoning` property on the three universal-kind schemas + the `schema.response` deliberate omission), `envelope-reasoning-secret-redaction.test.ts` (RFC 0030, capability-gated on `capabilities.envelopes.reasoning.supported` + `secrets.supported`; 5 `it.todo()` placeholders for SECURITY invariant `envelope-reasoning-secret-redaction`), `envelope-tier-one-subset-static.test.ts` (RFC 0030, always-on for load-bearing rules — no `oneOf` / `allOf` / `not` / `prefixItems` / `propertyNames` anywhere; gated on `tierOneSubsetCompliance: "strict"` for OpenAI-strict-only constraints), `envelope-variant-discriminator-static.test.ts` (RFC 0031, always-on; asserts no `oneOf` + every `anyOf` branch declares a single-string-enum discriminator in `required` on every `schemas/envelopes/*.schema.json`), `model-capability-substituted.test.ts` (RFC 0031, advertisement-shape probe on `capabilities.modelCapabilities.advertised[]` identifier pattern + 5 `it.todo()` placeholders for SECURITY invariant `model-capability-substituted-no-credential-disclosure`), `model-capability-insufficient.test.ts` (RFC 0031, 6 `it.todo()` placeholders for refusal + no-recursive-fallback), `node-module-required-capabilities-shape.test.ts` (RFC 0031 SHOULD-tier authoring-convention; 4 `it.todo()` placeholders), and the six envelope-reliability events from RFC 0032 (`envelope-retry-attempted` carrying the shared advertisement-shape probe enforcing both MUST-tier events in `events[]` per RFC 0032 §C, plus `envelope-retry-exhausted`, `envelope-refusal-shape`, `envelope-truncated`, `envelope-nl-to-format-engaged`, `envelope-recovery-applied` — collectively 39 `it.todo()` placeholders covering retry/refusal/truncation/recovery + SECURITY invariants `envelope-refusal-no-prompt-leak` and `envelope-recovery-no-content-leak`), plus RFC 0033's two scenarios (`envelope-completion-distinguishes-truncation.test.ts` + `envelope-truncation-cap-exhaustion.test.ts` — 12 `it.todo()` placeholders covering the truncation-vs-schema-violation retry-routing distinction + the DoS-bound assertion). Reference workflow-engine sample advertises `capabilities.envelopes.reasoning: { supported: true, promptDirective: "off" }` + `tierOneSubsetCompliance: "warn"` honestly (schemas accept the field; host doesn't yet inject the directive); the other three RFCs' capability blocks defer to reference-host emission code per the staged RFC 0027 §G precedent. 2026-05-20 (RFC 0028 §B Phase B — prompt-pack boot-time install) added `prompt-pack-install.test.ts` (capability-gated on `capabilities.prompts.endpointsSupported: true`; asserts a host that ran the boot-time pack loader surfaces ≥ 1 pack-source template under `GET /v1/prompts?source=pack` carrying the canonical `meta.source: "pack"` + `meta.packName` + `meta.packVersion` stamps; positively identifies the in-tree `vendor.openwop.prompt-sample` reference pack's `writer-system` template when present). Pairs with the new `host/promptPackLoader.ts` boot-time entry on the reference workflow-engine sample, which scans `examples/packs/*` plus `OPENWOP_PROMPT_PACKS_DIR` and calls `installPackTemplates()` for each `kind: "prompt"` pack found. 2026-05-20 (RFC 0029 Phase C — prompt resolution chain wire shape) added three more scenarios: `prompt-resolution-chain-node-wins.test.ts` (capability-gated on `capabilities.prompts.supported: true`; asserts layer-1 node-config supersedes lower layers per `spec/v1/prompts.md` §"Resolution chain (normative)"), `prompt-resolution-chain-agent-intrinsic.test.ts` (additionally gated on `capabilities.prompts.agentBindings: true`; asserts agent intrinsic `systemPromptRef` wins over `promptOverrides` AND lower layers when the node has no layer-1 ref), `prompt-resolution-chain-fallback-cascade.test.ts` (asserts layer 3 workflow-defaults wins over layer 4 host-defaults; layer 4 host-defaults wins when 1-3 yield null; resolved is null when all four yield null but chain[] still lists every attempted layer). The scenarios drive the host's `POST /v1/host/sample/prompt/resolve` test seam (reference-host implementation deferred to follow-up slice per RFC 0021 staging precedent). 2026-05-20 (RFC 0027 Phase A — prompt templates wire shape) added three scenarios: `prompt-template-shape.test.ts` (always-on; Ajv compileability + positive/negative round-trip for PromptTemplate + PromptRef + PromptKind), `prompt-composed-secret-redaction.test.ts` (capability-gated on `capabilities.prompts.supported: true` + `observability: "full"`; asserts `[REDACTED:<secretId>]` markers in `prompt.composed` payloads for `source: "secret"` variable bindings per SECURITY/threat-model-secret-leakage.md §SR-1), `prompt-composed-trust-marker.test.ts` (same capability gates; asserts `<UNTRUSTED>...</UNTRUSTED>` wrapping + `contentTrust: "untrusted"` propagation per RFC 0020 §D). Paired with new `fixtures/prompt-templates/` sub-directory + per-fixture schema-validity describe block + future SECURITY invariants `prompt-composed-secret-redaction` and `prompt-composed-trust-marker` (lands alongside reference-host emission per RFC 0021 staging precedent). 2026-05-18 (RFC 0022 `Draft` — runtime variable mapping) added four `it.todo()` placeholder scenarios covering the new mapping surfaces on `core.dispatch` (§A — `dispatch-input-mapping.test.ts`, `dispatch-output-mapping.test.ts`, `dispatch-cross-worker-handoff.test.ts`) and `core.subWorkflow` (§B — `subworkflow-input-mapping.test.ts`). Gated on `capabilities.agents.dispatchMapping` (dispatch trio) and `capabilities.subWorkflow.inputMapping` (subWorkflow). Promote to live assertions when RFC 0022 reaches `Active` + a reference host advertises the matching flags. 2026-05-17 (RFC 0003 §D handoff-schema enforcement, HV-1) added `agentPackHandoffSchemaValidation.test.ts` — verifies the host validates dispatch payloads against `handoff.taskSchemaRef` AND return payloads against `handoff.returnSchemaRef` per RFC 0003 §D. Paired with the new `agent-pack-handoff-schema-enforcement` row in `SECURITY/invariants.yaml`. 2026-05-17 (AI Envelope gap-closure, DRAFT v1.x — `spec/v1/ai-envelope.md`) added 7 advertisement-shape scenarios with `it.todo()` behavioral placeholders gated on `capabilities.envelopeContracts.advertised: true`: `aiEnvelope.universalKinds.test.ts`, `aiEnvelope.schemaDrift.test.ts`, `aiEnvelope.correlationReplay.test.ts`, `aiEnvelope.contractRefusal.test.ts`, `aiEnvelope.trustBoundaryPropagation.test.ts`, `aiEnvelope.redaction.test.ts`, `aiEnvelope.capBreached.test.ts`. Paired with the new `envelope-redaction-sr-1-carry-forward` row in `SECURITY/invariants.yaml`. 2026-05-17 (post-publish hardening, deep audit of `core.openwop.agents`) added `agents-run-tool-allowlist.test.ts` — server-free scenario locking in the `core.openwop.agents@1.0.1` safety-fix that closes `OPENWOP-AUDIT-2026-003` (function-typed `tool.handler` properties rejected at `validateTools()` with `INVALID_TOOL_DECLARATION`; tool-driven runs require `ctx.agentRuntime`; tool-less safe fallback preserved). Paired with the new `agents-run-no-raw-handler` row in `SECURITY/invariants.yaml`. Same-day post-publish hardening added `idempotency-key-determinism.test.ts` — server-free scenario locking in the `core.openwop.http@1.1.2` determinism safety-fix (default `composite` mode produces deterministic keys in `(runId, nodeId, payload)`; removed `uuid` mode rejects with `CONFIG_INVALID`; cross-impl vector test lets third-party reimplementations verify wire agreement). Paired with the new `idempotency-key-deterministic` row in `SECURITY/invariants.yaml`. 2026-05-17 (Phase 3 of RFC 0013) added three server-free scenarios exercising the reference workflow-chain expansion library (`conformance/src/lib/workflow-chain-expansion.ts`): `workflow-chain-expansion.test.ts` (parameter substitution + node id collision avoidance + edge rewriting + capability propagation + runtime-invariance contract), `workflow-chain-unresolvable-typeid.test.ts` (rejection with `chain_unresolvable_typeid` when a chain references an unknown typeId), and `workflow-chain-pack-signature-verification.test.ts` (Ed25519 verification recipe reuse from `node-packs.md §Signing`). Earlier that day (Phase 1) added `workflow-chain-pack-manifest-validation.test.ts` — server-free schema-validation scenario covering the new `workflow-chain-pack-manifest.schema.json` (positive sample + two negatives: kind/contents mismatch and invalid `chainId`). Closes RFC 0013 (`Workflow-chain packs`, `Draft`) Phases 1 + 3 alongside the new `spec/v1/workflow-chain-packs.md`, the `Capabilities.workflowChainPacks` block, and the registry build-index/conformance-check `kind` routing from Phase 2. Earlier that day, the suite added 27 `it.todo()` placeholder scenarios paired with RFCs 0014-0020 (host capability surfaces — fs, kvStorage, tableStorage, queueBus, sql/vector/search, blob/cache, mcp.serverMount). These promote to live assertions when each RFC reaches `Active` + the matching capability block lands in `schemas/capabilities.schema.json` + a reference host advertises the capability. Earlier additions include 18 Multi-Agent Shift scenarios (Phases 1-5) added 2026-05-10, the `registry-public.test.ts` public-registry healthcheck added 2026-05-11 (opt-in via `OPENWOP_TEST_PUBLIC_REGISTRY=true`), the `replay-llm-cache-key.test.ts` placeholder added 2026-05-11 (three `it.todo()` cases for the cross-host LLM cache-key recipe per `replay.md` §"LLM cache-key recipe"), the two `production-*.test.ts` scenarios added 2026-05-11 for the `openwop-production` profile per RFC 0009 (`production-backpressure.test.ts`, `production-retention-expiry.test.ts`), the four `auth-*.test.ts` scenarios added 2026-05-11/12 for the production-auth profiles per RFC 0010 (`auth-api-key-rotation.test.ts`, `auth-oauth2-client-credentials.test.ts`, `auth-oidc-user-bearer.test.ts`, `auth-mtls.test.ts` (opt-in via `OPENWOP_TEST_MTLS=1`)), `replay-retention-expiry.test.ts` added 2026-05-12 (capability shape + 410/422 envelope per `replay.md` §"Retention and garbage collection"), `bulk-cancel.test.ts` added 2026-05-12 (Phase B close-out of R1 — `POST /v1/runs:bulk-cancel`), the two Phase H launch-blocker advertisement-contract scenarios added 2026-05-12 (`mcp-toolcall-redaction.test.ts` for the MCP-1 invariant per `host-capabilities.md §host.mcp` + `threat-model-prompt-injection.md §UNTRUSTED`, and `http-client-ssrf.test.ts` for the SSRF + body-size cap advertisement contract on `capabilities.httpClient`), the `wasm-pack-abi-version-rejection.test.ts` Track 7 scenario added 2026-05-12 for the ABI-mismatch positive path via the `vendor.openwop.misbehaving-abi` pack per RFC 0008 §H, the `otel-trace-propagation-subworkflow.test.ts` Track 11 close-out added 2026-05-13 (parent + child run spans share the inbound traceparent's traceId across the `core.subWorkflow` dispatch boundary), and the three RFC 0012 (Memory Compaction Profile, `Active`) scenarios added 2026-05-13/14: `memory-compaction-sr1-carry-forward.test.ts` (load-bearing SR-1 §D), `memory-compaction-event-emitted.test.ts` (canonical §B payload shape), and `memory-compaction-provenance-tag.test.ts` (soft assertion on §C `compacted-from:<id>` convention). All three gate on `capabilities.memory.compaction.supported` + the host's test seam at `/v1/test/memory/{seed,compact}` (Postgres reference host enables both via `OPENWOP_MEMORY_COMPACTION=true OPENWOP_TEST_TRIGGER_COMPACTION=true`). 2026-05-15 (gap-closure CF-3) added `interrupt-token-matrix.test.ts` (malformed / unknown / replay / cross-run-id paths on `GET|POST /v1/interrupts/{token}`). The maintained scenario-to-spec map lives in [`coverage.md`](./coverage.md); this README keeps the operator quickstart and the historical scenario notes below.
97
97
 
98
98
  High-level coverage includes:
99
99
 
@@ -172,7 +172,7 @@ Server-required (added in 1.7.0):
172
172
  |---|---|---|
173
173
  | **Redaction** | [`capabilities.md`](../spec/v1/capabilities.md) §"Secrets" + NFR-7 + §"aiProviders" | Vendor-neutral assertions that the server doesn't leak secret material. Three scenario groups: (a) discovery shape contract — `secrets` + `aiProviders` advertisements are well-formed regardless of `secrets.supported`; when `supported === true`, scopes MUST be non-empty + `resolution === 'host-managed'`; `byok ⊆ supported`. (b) bearer-token redaction — invalid Bearer canary in `Authorization` header is not echoed in the 401 response body. (c) credentialRef echo control — gated on `secrets.supported === true`; canary planted in `configurable.ai.credentialRef` MUST NOT appear in any RunEvent payload (poll-based capture; transport-agnostic). Uses runtime-built canary fixtures (`lib/canaries.ts`) that defeat static secret scanners. 6 scenarios. |
174
174
 
175
- Current source tree: 205 scenario files. Use [`coverage.md`](./coverage.md) for current grade/gap tracking.
175
+ Current source tree: 238 scenario files. Use [`coverage.md`](./coverage.md) for current grade/gap tracking.
176
176
 
177
177
  ## Remaining Gaps
178
178
 
package/api/asyncapi.yaml CHANGED
@@ -79,6 +79,7 @@ channels:
79
79
  runCancelled: { $ref: '#/components/messages/RunCancelled' }
80
80
  runPaused: { $ref: '#/components/messages/RunPaused' }
81
81
  runResumed: { $ref: '#/components/messages/RunResumed' }
82
+ runAnnotated: { $ref: '#/components/messages/RunAnnotated' }
82
83
  nodeCompleted: { $ref: '#/components/messages/NodeCompleted' }
83
84
  nodeFailed: { $ref: '#/components/messages/NodeFailed' }
84
85
  nodeSkipped: { $ref: '#/components/messages/NodeSkipped' }
@@ -153,7 +154,8 @@ channels:
153
154
  runId:
154
155
  $ref: '#/components/parameters/RunId'
155
156
  messages:
156
- anyRunEvent: { $ref: '#/components/messages/AnyRunEvent' }
157
+ anyRunEvent: { $ref: '#/components/messages/AnyRunEvent' }
158
+ runAnnotated: { $ref: '#/components/messages/RunAnnotated' }
157
159
 
158
160
  # ─────────────────────────────────────────────────────────────────────────────
159
161
  # OPERATIONS — consumer-side (receive)
@@ -310,6 +312,14 @@ components:
310
312
  payload:
311
313
  $ref: '#/components/schemas/RunEventDoc'
312
314
 
315
+ RunAnnotated:
316
+ name: run.annotated
317
+ title: Run annotated (RFC 0056)
318
+ summary: A non-blocking quality annotation was recorded for the run. Live notification ONLY — NOT a replayable run-event-log entry; its payload is an Annotation (not a RunEventDoc), so it is excluded from fork/replay (RFC 0056 §B/§D).
319
+ contentType: application/json
320
+ payload:
321
+ $ref: '#/components/schemas/Annotation'
322
+
313
323
  # ── Node-lifecycle ───────────────────────────────────────────────────
314
324
  NodeCompleted:
315
325
  name: node.completed
@@ -433,9 +443,14 @@ components:
433
443
  summary: |
434
444
  Type-erased handler for `debug` mode — discriminate on the
435
445
  `type` field per the `RunEventType` enum in the run-event
436
- JSON Schema. Includes events filtered out of `updates`:
437
- `log.appended`, `variable.changed`, `version.pinned`,
438
- `lease.*`, `node.retried`, `replay.diverged`, etc.
446
+ JSON Schema (the authoritative, exhaustive event list; the
447
+ named messages above are a curated `updates`-tier subset).
448
+ Includes events filtered out of `updates`: `log.appended`,
449
+ `variable.changed`, `version.pinned`, `lease.*`, `node.retried`,
450
+ `replay.diverged`, `connector.authorized`,
451
+ `connector.auth_expired` (RFC 0047), `authorization.decided`
452
+ (RFC 0049), `approval.granted` / `approval.rejected` /
453
+ `approval.overridden` (RFC 0051), etc.
439
454
  contentType: application/json
440
455
  payload:
441
456
  $ref: '#/components/schemas/RunEventDoc'
@@ -449,6 +464,12 @@ components:
449
464
  RunEventDoc:
450
465
  $ref: '../schemas/run-event.schema.json'
451
466
 
467
+ # RFC 0056. The run.annotated notification carries an Annotation —
468
+ # NOT a RunEventDoc — because annotations are a side-resource, not
469
+ # replayable run-event-log entries (RFC 0056 §B/§D).
470
+ Annotation:
471
+ $ref: '../schemas/annotation.schema.json'
472
+
452
473
  StateSnapshotPayload:
453
474
  # S1 closure (2026-04-27): reuse the canonical RunSnapshot
454
475
  # projection shape verbatim. Same type returned by
package/api/openapi.yaml CHANGED
@@ -61,6 +61,13 @@ tags:
61
61
  description: Audit-log integrity verification (gated on the `openwop-audit-log-integrity` profile).
62
62
  - name: prompts
63
63
  description: Prompt-template library — list, fetch, render, mutate (RFC 0028; gated on `capabilities.prompts.*`).
64
+ - name: packs-test
65
+ description: |
66
+ RFC 0025 (`Draft`). Test-mode mirror of the production `/v1/packs/*` publish/get/delete/sig surface against
67
+ an isolated catalog. Gated on `capabilities.packs.testMode.supported: true` plus the reference impl's
68
+ `OPENWOP_PACKS_TEST_NAMESPACE_ENABLED=true` env-gate. Lets the conformance suite exercise the documented
69
+ 19-code publish error catalog without `packs:publish` scope on the real registry. Hosts that haven't
70
+ mounted this surface MUST return `404 Not Found` for every path under `/v1/packs-test/`.
64
71
 
65
72
  # ─────────────────────────────────────────────────────────────────────────────
66
73
  # PATHS
@@ -354,6 +361,72 @@ paths:
354
361
  '403': { $ref: '#/components/responses/Forbidden' }
355
362
  '404': { $ref: '#/components/responses/NotFound' }
356
363
 
364
+ # ── Run feedback / annotations (RFC 0056) ────────────────────────────
365
+ # Gated on `capabilities.feedback.supported: true`. Annotations are a
366
+ # per-run side-resource (NOT replayable run-event-log entries); recording
367
+ # one also emits a live `run.annotated` SSE notification. Hosts without
368
+ # the advertised capability return `501 capability_not_provided`.
369
+ /v1/runs/{runId}/annotations:
370
+ post:
371
+ tags: [runs]
372
+ summary: Record a non-blocking quality annotation on a run (RFC 0056).
373
+ operationId: createAnnotation
374
+ parameters:
375
+ - $ref: '#/components/parameters/RunId'
376
+ - $ref: '#/components/parameters/IdempotencyKey'
377
+ requestBody:
378
+ required: true
379
+ content:
380
+ application/json:
381
+ schema:
382
+ $ref: '../schemas/annotation-create.schema.json'
383
+ responses:
384
+ '201':
385
+ description: Annotation recorded. Returns the persisted annotation.
386
+ content:
387
+ application/json:
388
+ schema:
389
+ $ref: '../schemas/annotation.schema.json'
390
+ '400': { $ref: '#/components/responses/ValidationError' }
391
+ '401': { $ref: '#/components/responses/Unauthenticated' }
392
+ '403': { $ref: '#/components/responses/Forbidden' }
393
+ '404': { $ref: '#/components/responses/NotFound' }
394
+ '501':
395
+ description: 'Host does not advertise capabilities.feedback.supported (RFC 0056).'
396
+ content:
397
+ application/json:
398
+ schema:
399
+ $ref: '../schemas/error-envelope.schema.json'
400
+ get:
401
+ tags: [runs]
402
+ summary: List the annotations recorded on a run (RFC 0056).
403
+ operationId: listAnnotations
404
+ parameters:
405
+ - $ref: '#/components/parameters/RunId'
406
+ responses:
407
+ '200':
408
+ description: Annotations for the run (tenant-scoped).
409
+ content:
410
+ application/json:
411
+ schema:
412
+ type: object
413
+ required: [annotations]
414
+ properties:
415
+ annotations:
416
+ type: array
417
+ items:
418
+ $ref: '../schemas/annotation.schema.json'
419
+ additionalProperties: false
420
+ '401': { $ref: '#/components/responses/Unauthenticated' }
421
+ '403': { $ref: '#/components/responses/Forbidden' }
422
+ '404': { $ref: '#/components/responses/NotFound' }
423
+ '501':
424
+ description: 'Host does not advertise capabilities.feedback.supported (RFC 0056).'
425
+ content:
426
+ application/json:
427
+ schema:
428
+ $ref: '../schemas/error-envelope.schema.json'
429
+
357
430
  /v1/runs:bulk-cancel:
358
431
  post:
359
432
  tags: [runs]
@@ -502,6 +575,54 @@ paths:
502
575
  application/json:
503
576
  schema: { $ref: '#/components/schemas/Error' }
504
577
 
578
+ /v1/runs/{runId}:diff:
579
+ get:
580
+ tags: [runs]
581
+ summary: |
582
+ RFC 0054 — return a deterministic, replay-aware structured diff of
583
+ two runs (typically a run and its RFC 0011 fork): `divergedAtSeq` +
584
+ ordered `eventDiffs[]` + `stateDiff`. The diff is a pure function of
585
+ the two event logs (see `replay.md` determinism contract). Requires
586
+ `runs:read` on BOTH runs. Hosts that don't implement it return 404.
587
+ operationId: diffRun
588
+ parameters:
589
+ - $ref: '#/components/parameters/RunId'
590
+ - name: against
591
+ in: query
592
+ required: true
593
+ description: The other run id to diff `{runId}` against (the `b` run).
594
+ schema: { type: string }
595
+ responses:
596
+ '200':
597
+ description: |
598
+ Structured diff of the two runs. `divergedAtSeq: null` + empty
599
+ `eventDiffs` when the logs are identical.
600
+ content:
601
+ application/json:
602
+ schema:
603
+ $ref: '../schemas/run-diff-response.schema.json'
604
+ '400':
605
+ description: Missing or malformed `against` query parameter.
606
+ content:
607
+ application/json:
608
+ schema: { $ref: '#/components/schemas/Error' }
609
+ '401': { $ref: '#/components/responses/Unauthenticated' }
610
+ '403':
611
+ description: |
612
+ Caller lacks `runs:read` on `{runId}` and/or on `against`
613
+ (`forbidden`); composes with RFC 0048 cross-workspace
614
+ isolation.
615
+ content:
616
+ application/json:
617
+ schema: { $ref: '#/components/schemas/Error' }
618
+ '404':
619
+ description: |
620
+ Either run doesn't exist, OR the host doesn't implement the diff
621
+ endpoint and treats the path as absent.
622
+ content:
623
+ application/json:
624
+ schema: { $ref: '#/components/schemas/Error' }
625
+
505
626
  /v1/runs/{runId}:pause:
506
627
  post:
507
628
  tags: [runs]
@@ -1146,6 +1267,209 @@ paths:
1146
1267
  schema:
1147
1268
  $ref: '../schemas/error-envelope.schema.json'
1148
1269
 
1270
+ # ── Test-mode pack registry namespace (RFC 0025) ─────────────────────────
1271
+ # Mirrors the production /v1/packs/* PUT/GET/DELETE/.sig surface against an
1272
+ # isolated catalog. Conformance scenarios under
1273
+ # `conformance/src/scenarios/pack-registry-publish.test.ts` exercise the
1274
+ # 19-code publish error catalog through this namespace. Hosts that don't
1275
+ # advertise `capabilities.packs.testMode.supported: true` MUST return
1276
+ # 404 for every path below.
1277
+
1278
+ /v1/packs-test/{name}/-/{version}.tgz:
1279
+ parameters:
1280
+ - $ref: '#/components/parameters/PackName'
1281
+ - $ref: '#/components/parameters/PackVersion'
1282
+ put:
1283
+ tags: [packs-test]
1284
+ summary: Publish a pack tarball to the isolated test catalog.
1285
+ description: |
1286
+ Mirror of `PUT /v1/packs/{name}/-/{version}.tgz` per
1287
+ `spec/v1/node-packs.md` §"PUT /v1/packs/{name}/-/{version}.tgz".
1288
+ Request shape, response shape, status codes, and the 19-code
1289
+ publish error catalog (`invalid_pack_scope`, `invalid_pack_name`,
1290
+ `invalid_version`, `invalid_body`, eight `tarball_*` codes,
1291
+ `invalid_manifest`, `manifest_mismatch` (or the granular
1292
+ `manifest_name_mismatch` / `manifest_version_mismatch` pair),
1293
+ `pack_integrity_failure`, `unsupported_runtime`, `forbidden`,
1294
+ `conflict`/`version_conflict`) MUST be served verbatim. The
1295
+ test catalog MUST be isolated per RFC 0025 §C — a pack PUT'd
1296
+ here MUST NOT appear in `GET /v1/packs/{name}` listings.
1297
+ operationId: putTestPackTarball
1298
+ parameters:
1299
+ - in: header
1300
+ name: X-Pack-Signing-Method
1301
+ required: false
1302
+ schema: { type: string, enum: [sigstore, manual, none] }
1303
+ - in: header
1304
+ name: X-Pack-Sha256
1305
+ required: false
1306
+ schema:
1307
+ type: string
1308
+ pattern: '^sha256-[A-Za-z0-9+/=]+$'
1309
+ description: Caller-asserted SHA-256 (server verifies; mismatch surfaces `pack_integrity_failure`).
1310
+ requestBody:
1311
+ required: true
1312
+ description: Gzipped tarball bytes (`application/tar+gzip`, `application/gzip`, `application/x-gzip`, or `application/octet-stream`).
1313
+ content:
1314
+ application/gzip:
1315
+ schema: { type: string, format: binary }
1316
+ application/x-gzip:
1317
+ schema: { type: string, format: binary }
1318
+ application/tar+gzip:
1319
+ schema: { type: string, format: binary }
1320
+ application/octet-stream:
1321
+ schema: { type: string, format: binary }
1322
+ responses:
1323
+ '200':
1324
+ description: Idempotent re-publish — identical sha256 content already published; existing record returned.
1325
+ content:
1326
+ application/json:
1327
+ schema: { $ref: '#/components/schemas/TestPackPublishRecord' }
1328
+ '201':
1329
+ description: New version published to the test catalog.
1330
+ content:
1331
+ application/json:
1332
+ schema: { $ref: '#/components/schemas/TestPackPublishRecord' }
1333
+ '400':
1334
+ description: |
1335
+ One of the 17 spec-documented 400-class publish error codes:
1336
+ URL/scope (`invalid_pack_scope`, `invalid_pack_name`, `invalid_version`),
1337
+ body shape (`invalid_body`),
1338
+ tarball extraction (`tarball_gunzip_failed`, `tarball_too_large`,
1339
+ `tarball_manifest_missing`, `tarball_manifest_too_large`,
1340
+ `tarball_manifest_not_json`, `tarball_entry_missing`,
1341
+ `tarball_entry_too_large`, `tarball_path_traversal`,
1342
+ `tarball_tar_parse_failed`),
1343
+ manifest contents (`invalid_manifest`, `manifest_mismatch` or
1344
+ the granular `manifest_name_mismatch` / `manifest_version_mismatch` pair,
1345
+ `pack_integrity_failure`, `unsupported_runtime`).
1346
+ content:
1347
+ application/json:
1348
+ schema: { $ref: '../schemas/error-envelope.schema.json' }
1349
+ '401': { $ref: '#/components/responses/Unauthenticated' }
1350
+ '403':
1351
+ description: '`forbidden` — caller lacks `packs:publish` scope or the namespace claim.'
1352
+ content:
1353
+ application/json:
1354
+ schema: { $ref: '../schemas/error-envelope.schema.json' }
1355
+ '404':
1356
+ description: 'Host does not advertise `capabilities.packs.testMode.supported: true`, or the test-mode env-gate is unset.'
1357
+ content:
1358
+ application/json:
1359
+ schema: { $ref: '../schemas/error-envelope.schema.json' }
1360
+ '409':
1361
+ description: '`conflict` (or `version_conflict`) — `(name, version)` already published with different content.'
1362
+ content:
1363
+ application/json:
1364
+ schema: { $ref: '../schemas/error-envelope.schema.json' }
1365
+ get:
1366
+ tags: [packs-test]
1367
+ summary: Fetch a published test-catalog tarball.
1368
+ description: 'Mirror of `GET /v1/packs/{name}/-/{version}.tgz`. Returns the gzipped tarball bytes with `Content-Type: application/tar+gzip` and an `ETag: "sha256-..."` matching the manifest''s `tarballSha256`.'
1369
+ operationId: getTestPackTarball
1370
+ responses:
1371
+ '200':
1372
+ description: Tarball bytes.
1373
+ content:
1374
+ application/tar+gzip:
1375
+ schema: { type: string, format: binary }
1376
+ '400':
1377
+ description: '`invalid_pack_name` or `invalid_version` — URL params malformed.'
1378
+ content:
1379
+ application/json:
1380
+ schema: { $ref: '../schemas/error-envelope.schema.json' }
1381
+ '401': { $ref: '#/components/responses/Unauthenticated' }
1382
+ '403':
1383
+ description: '`forbidden` — caller lacks `packs:read` scope.'
1384
+ content:
1385
+ application/json:
1386
+ schema: { $ref: '../schemas/error-envelope.schema.json' }
1387
+ '404':
1388
+ description: 'Pack version not found in the test catalog (or host does not advertise `capabilities.packs.testMode.supported: true`).'
1389
+ content:
1390
+ application/json:
1391
+ schema: { $ref: '../schemas/error-envelope.schema.json' }
1392
+ /v1/packs-test/{name}/-/{version}:
1393
+ parameters:
1394
+ - $ref: '#/components/parameters/PackName'
1395
+ - $ref: '#/components/parameters/PackVersion'
1396
+ delete:
1397
+ tags: [packs-test]
1398
+ summary: Unpublish a test-catalog version (mirrors unpublish-window semantics).
1399
+ description: |
1400
+ Mirror of `DELETE /v1/packs/{name}/-/{version}` per
1401
+ `spec/v1/node-packs.md`. Returns `400 unpublish_window_expired`
1402
+ for versions older than the registry's unpublish window
1403
+ (default 72h). Test-mode implementations MAY shorten the
1404
+ window for tractable conformance fixtures but MUST surface
1405
+ the same error code.
1406
+ operationId: deleteTestPackVersion
1407
+ responses:
1408
+ '204':
1409
+ description: Version successfully unpublished from the test catalog.
1410
+ '400':
1411
+ description: '`unpublish_window_expired`, `invalid_pack_name`, or `invalid_version`.'
1412
+ content:
1413
+ application/json:
1414
+ schema: { $ref: '../schemas/error-envelope.schema.json' }
1415
+ '401': { $ref: '#/components/responses/Unauthenticated' }
1416
+ '403':
1417
+ description: '`forbidden` — caller lacks `packs:publish` scope.'
1418
+ content:
1419
+ application/json:
1420
+ schema: { $ref: '../schemas/error-envelope.schema.json' }
1421
+ '404':
1422
+ description: Version doesn't exist in the test catalog, or host does not advertise the test-mode capability.
1423
+ content:
1424
+ application/json:
1425
+ schema: { $ref: '../schemas/error-envelope.schema.json' }
1426
+
1427
+ /v1/packs-test/{name}/-/{version}.sig:
1428
+ parameters:
1429
+ - $ref: '#/components/parameters/PackName'
1430
+ - $ref: '#/components/parameters/PackVersion'
1431
+ get:
1432
+ tags: [packs-test]
1433
+ summary: Fetch the detached Ed25519 signature for a test-catalog pack.
1434
+ description: |
1435
+ Mirror of `GET /v1/packs/{name}/-/{version}.sig`. Returns the
1436
+ signature blob over `pack.json` for this version. MAY 302-redirect
1437
+ to a storage-backend signed URL.
1438
+ operationId: getTestPackSignature
1439
+ responses:
1440
+ '200':
1441
+ description: Signature blob.
1442
+ content:
1443
+ application/octet-stream:
1444
+ schema: { type: string, format: binary }
1445
+ '302':
1446
+ description: Redirect to a storage-backend signed URL (clients SHOULD follow).
1447
+ headers:
1448
+ Location:
1449
+ schema: { type: string, format: uri }
1450
+ '400':
1451
+ description: '`invalid_pack_name` or `invalid_version` — URL params malformed.'
1452
+ content:
1453
+ application/json:
1454
+ schema: { $ref: '../schemas/error-envelope.schema.json' }
1455
+ '401': { $ref: '#/components/responses/Unauthenticated' }
1456
+ '403':
1457
+ description: '`forbidden` — caller lacks `packs:read` scope.'
1458
+ content:
1459
+ application/json:
1460
+ schema: { $ref: '../schemas/error-envelope.schema.json' }
1461
+ '404':
1462
+ description: |
1463
+ `signature_not_available` — version is missing, yanked,
1464
+ unsigned at publish time, OR the registry's storage backend
1465
+ is unwired. The four cases are intentionally
1466
+ indistinguishable per spec/v1/node-packs.md §"GET /v1/packs/{name}/-/{version}.sig".
1467
+ Also returned when the host does not advertise
1468
+ `capabilities.packs.testMode.supported: true`.
1469
+ content:
1470
+ application/json:
1471
+ schema: { $ref: '../schemas/error-envelope.schema.json' }
1472
+
1149
1473
  # ─────────────────────────────────────────────────────────────────────────────
1150
1474
  # COMPONENTS
1151
1475
  # ─────────────────────────────────────────────────────────────────────────────
@@ -1189,6 +1513,25 @@ components:
1189
1513
  Duplicate requests return the cached response with header
1190
1514
  `openwop-Idempotent-Replay: true`.
1191
1515
 
1516
+ PackName:
1517
+ in: path
1518
+ name: name
1519
+ required: true
1520
+ schema:
1521
+ type: string
1522
+ minLength: 3
1523
+ maxLength: 214
1524
+ description: Reverse-DNS pack name per `spec/v1/node-packs.md` §Naming (e.g. `vendor.acme.salesforce-tools`).
1525
+
1526
+ PackVersion:
1527
+ in: path
1528
+ name: version
1529
+ required: true
1530
+ schema:
1531
+ type: string
1532
+ pattern: '^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-[\w.-]+)?(?:\+[\w.-]+)?$'
1533
+ description: SemVer 2.0.0 version of the pack.
1534
+
1192
1535
  responses:
1193
1536
  Unauthenticated:
1194
1537
  description: Missing or invalid credential.
@@ -1304,3 +1647,31 @@ components:
1304
1647
  supported:
1305
1648
  type: array
1306
1649
  items: { type: string, enum: [values, updates, messages, debug] }
1650
+
1651
+ TestPackPublishRecord:
1652
+ description: |
1653
+ Response body for a successful publish against the test-mode
1654
+ registry namespace (RFC 0025). Mirror of the production publish
1655
+ record returned by `PUT /v1/packs/{name}/-/{version}.tgz`.
1656
+ type: object
1657
+ required: [name, version, tarballSha256, publishedAt]
1658
+ properties:
1659
+ name:
1660
+ type: string
1661
+ description: Reverse-DNS pack name as PUT'd.
1662
+ version:
1663
+ type: string
1664
+ description: SemVer 2.0.0 version as PUT'd.
1665
+ tarballSha256:
1666
+ type: string
1667
+ pattern: '^sha256-[A-Za-z0-9+/=]+$'
1668
+ description: Server-computed SHA-256 over the uploaded tarball bytes.
1669
+ publishedAt:
1670
+ type: string
1671
+ format: date-time
1672
+ signed:
1673
+ type: boolean
1674
+ description: Whether a sibling `.sig` signature blob was persisted.
1675
+ signingMethod:
1676
+ type: string
1677
+ enum: [sigstore, manual, none]