npm - @openwop/openwop-conformance - Versions diffs - 1.4.0 → 1.6.0 - Mend

@openwop/openwop-conformance 1.4.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

package/CHANGELOG.md +60 -0
package/README.md +2 -2
package/api/asyncapi.yaml +8 -3
package/api/openapi.yaml +305 -0
package/coverage.md +35 -10
package/fixtures/conformance-phase4-nondet-tool.json +53 -0
package/fixtures/conformance-phase4-replay-divergence.json +40 -0
package/fixtures.md +5 -3
package/package.json +1 -1
package/schemas/README.md +2 -0
package/schemas/capabilities.schema.json +176 -3
package/schemas/credential-reference.schema.json +21 -0
package/schemas/node-pack-manifest.schema.json +112 -1
package/schemas/run-diff-response.schema.json +64 -0
package/schemas/run-event-payloads.schema.json +104 -2
package/schemas/run-event.schema.json +8 -1
package/schemas/run-snapshot.schema.json +11 -0
package/src/lib/behavior-gate.ts +51 -0
package/src/lib/driver.ts +13 -1
package/src/lib/saml-idp.ts +179 -0
package/src/scenarios/approval-gate-events.test.ts +61 -0
package/src/scenarios/approval-gate-flow.test.ts +68 -0
package/src/scenarios/auth-saml-profile.test.ts +119 -0
package/src/scenarios/auth-scim-profile.test.ts +65 -0
package/src/scenarios/authorization-fail-closed.test.ts +80 -0
package/src/scenarios/authorization-roles-shape.test.ts +83 -0
package/src/scenarios/connector-manifest-validity.test.ts +142 -0
package/src/scenarios/credential-payload-redaction.test.ts +93 -0
package/src/scenarios/credentials-capability-shape.test.ts +90 -0
package/src/scenarios/cross-engine-append-behavior.test.ts +204 -0
package/src/scenarios/cross-host-traceparent-propagation.test.ts +13 -6
package/src/scenarios/cross-workspace-isolation.test.ts +72 -0
package/src/scenarios/deadletter-capability-shape.test.ts +59 -0
package/src/scenarios/deadletter-retry-exhaustion.test.ts +62 -0
package/src/scenarios/experimental-tier-shape.test.ts +192 -0
package/src/scenarios/identity-owner-shape.test.ts +64 -0
package/src/scenarios/multi-agent-confidence-escalation.test.ts +59 -21
package/src/scenarios/multi-agent-memory-lifecycle.test.ts +87 -12
package/src/scenarios/multi-region-idempotency-behavior.test.ts +203 -0
package/src/scenarios/oauth-capability-shape.test.ts +97 -0
package/src/scenarios/oauth-connector-redaction.test.ts +91 -0
package/src/scenarios/pack-registry-isolation.test.ts +108 -0
package/src/scenarios/pack-registry-publish.test.ts +1 -1
package/src/scenarios/prompt-mutation-workspace-membership-enforced.test.ts +126 -0
package/src/scenarios/prompt-read-workspace-membership-enforced.test.ts +183 -0
package/src/scenarios/replay-divergence-at-refusal.test.ts +187 -7
package/src/scenarios/replay-observable-sequence-determinism.test.ts +20 -6
package/src/scenarios/run-diff.test.ts +143 -0
package/src/scenarios/sandbox-capability-gate-respected.test.ts +15 -13
package/src/scenarios/sandbox-memory-cap.test.ts +7 -8
package/src/scenarios/sandbox-mvp-behavior.test.ts +280 -0
package/src/scenarios/sandbox-no-cross-pack-mutation.test.ts +14 -13
package/src/scenarios/sandbox-no-host-env-leak.test.ts +14 -21
package/src/scenarios/sandbox-no-host-fs-escape.test.ts +20 -15
package/src/scenarios/sandbox-no-host-process-escape.test.ts +18 -13
package/src/scenarios/sandbox-no-network-escape.test.ts +14 -31
package/src/scenarios/sandbox-timeout-cap.test.ts +7 -8
package/src/scenarios/scheduling-capability-shape.test.ts +81 -0
package/src/scenarios/scheduling-cron-fires-once.test.ts +66 -0
package/src/scenarios/secret-leakage-otel-attribute.test.ts +241 -0
package/src/scenarios/spec-corpus-validity.test.ts +2 -2

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,65 @@
 # `@openwop/openwop-conformance` Changelog
+## [1.6.0] — 2026-05-25
+Minor bump per `PUBLISHING.md` §"Versioning alignment" — ships the conformance scenarios for the **MyndHyve protocol-extension cohort (RFCs 0045–0054)** so adopting hosts can pin the released suite, run it against their deployment, and report pass for `Draft → Active → Accepted` graduation (per `RFCS/0001-rfc-process.md` §"Promotion to Accepted"). All additive — every new scenario is capability-gated and soft-skips against a host that doesn't advertise the surface, so existing v1.0-only hosts pass unchanged.
+### Added — RFC 0045–0054 cohort scenarios
+- **RFC 0045** (connector pack manifest) — `connector-manifest-validity.test.ts` (server-free: §A schema validity of the `connector` block + both ConnectorAuth variants; §B action/trigger typeId-resolution).
+- **RFC 0046** (`host.credentials`) — `credentials-capability-shape.test.ts` (always) + `credential-payload-redaction.test.ts` (gated; SECURITY invariant `credential-payload-redaction` via the `credentials/echo` seam).
+- **RFC 0047** (`host.oauth`) — `oauth-capability-shape.test.ts` + `oauth-connector-redaction.test.ts` (gated; token redaction via the `oauth/connector-echo` seam).
+- **RFC 0048** (identity triple) — `identity-owner-shape.test.ts` (server-free) + `cross-workspace-isolation.test.ts` (gated; fail-closed `run_forbidden` via the `identity/*` seams).
+- **RFC 0049** (RBAC) — `authorization-roles-shape.test.ts` (always) + `authorization-fail-closed.test.ts` (gated; SECURITY invariant `authorization-fail-closed` via the `authorization/decide` seam).
+- **RFC 0050** (SAML / SCIM) — `auth-saml-profile.test.ts` + `auth-scim-profile.test.ts` (advertisement shape always; behavior opt-in via `OPENWOP_TEST_SAML_IDP_URL` / `OPENWOP_TEST_SCIM_URL` + the `auth/saml/validate` + `auth/scim/provision` seams). Now ships a **bundled synthetic SAML IdP harness** (`conformance/src/lib/saml-idp.ts`, `node:crypto` RSA-SHA256, no deps) that mints the 1-positive + 6-negative assertion variants and whose `verify()` implements the RFC 0050 §A MUST list — `auth-saml-profile.test.ts` runs the full negative reference suite **server-free**; a host's real ACS validates the same assertions over the seam.
+- **RFC 0051** (approval gate) — `approval-gate-events.test.ts` (server-free) + `approval-gate-flow.test.ts` (gated; unauthorized-denied + override-audited via the `governance/approval-gate` seam).
+- **RFC 0052** (scheduling) — `scheduling-capability-shape.test.ts` (always) + `scheduling-cron-fires-once.test.ts` (gated; once-per-tick + missed-tick via the `scheduling/tick` seam).
+- **RFC 0053** (dead-letter) — `deadletter-capability-shape.test.ts` (always) + `deadletter-retry-exhaustion.test.ts` (gated; `run.dead_lettered` + fork-eligibility via the `deadletter/exhaust` seam).
+- **RFC 0054** (run diff) — `run-diff-*.test.ts` (landed with the run-diff endpoint).
+Two new SECURITY invariants gated in this cohort: `credential-payload-redaction` (0046, also covers 0047 tokens) + `authorization-fail-closed` (0049). New `/v1/host/sample/*` seams are catalogued in `spec/v1/host-sample-test-seams.md` §"Open seams". Suite scenario-file count → 230.
+## [1.5.0] — 2026-05-22
+Minor bump per `PUBLISHING.md` §"Versioning alignment" — unblocks MyndHyve's RFC 0044 + RFC 0039 Half A co-graduation by shipping the relaxed + RFC-0044-routing assertion logic in `multi-agent-confidence-escalation.test.ts`. No new scenario files; no new fixtures. Behavioral honesty pass on 8 sandbox scenarios + schema additions for the new RFC 0044 capability advertisement.
+### Changed — RFC 0044 interrupt-kind routing in `multi-agent-confidence-escalation.test.ts`
+Previously the scenario asserted `expect(terminal.status).toBe('waiting-clarification')` — strict equality on the clarify-kind escalation path, which rejected even RFC 0039 §A's own escalate-approval path (→ `waiting-approval`). v1.5.0 ships the relaxed + RFC-0044-routing logic landed in upstream commits `f03d01d` (relaxation to accept both canonical statuses) + `641d088` (RFC 0044 vendor-kind routing):
+- **Canonical kind advertised** (`clarification` / `approval`) → strict `expect(terminal.status).toBe('waiting-clarification' | 'waiting-approval')`.
+- **Vendor kind advertised** (`x-host-<host>-<kind>` per `host-extensions.md` §"Canonical prefixes") → `expect(terminal.status.startsWith('waiting-')).toBe(true)`; the host's `interrupt.md` mapping determines the suffix.
+- **No advertisement** → fall back to the canonical either-status check (preserves the `f03d01d` relaxation).
+This unblocks MyndHyve's `confidenceEscalationInterruptKind: 'x-host-myndhyve-low-confidence'` advertisement (their entrenched `interrupt.kind: 'low-confidence'` → `waiting-approval` mapping) without forcing a cross-cutting rename of `LOW_CONFIDENCE_SUSPEND_REASON` + `mockAgent.node` + `escalationThreshold.ts` + downstream UI consumers. See RFC 0044 §B (`RFCS/0044-confidence-escalation-interrupt-kind-advertisement.md`) for the normative contract.
+### Changed — Sandbox scenarios converted vacuous `expect(true).toBe(true)` to `it.todo` (honesty pass)
+The 8 `sandbox-*.test.ts` scenarios in v1.4.0 carried `expect(true).toBe(true)` tautology assertions for their behavioral legs. v1.5.0 converts them to `it.todo()` per upstream commit `5864a2f`:
+- `sandbox-no-host-process-escape.test.ts`
+- `sandbox-no-network-escape.test.ts`
+- `sandbox-no-host-fs-escape.test.ts`
+- `sandbox-no-host-env-leak.test.ts`
+- `sandbox-timeout-cap.test.ts`
+- `sandbox-memory-cap.test.ts`
+- `sandbox-no-cross-pack-mutation.test.ts`
+- `sandbox-capability-gate-respected.test.ts`
+Test reporters now surface 8 todos instead of 8 vacuous passes. The advertisement-shape probes (in `sandbox-no-host-fs-escape`, `sandbox-memory-cap`, `sandbox-timeout-cap`) still run real discovery-doc assertions when capabilities are advertised. Behavioral assertions light up when a sandbox-executing reference host wires the seam.
+### Changed — `schemas/capabilities.schema.json` (vendored): adds `multiAgent.executionModel.confidenceEscalationInterruptKind`
+Per RFC 0044 §A. The optional field accepts the canonical literal `"clarification"` / `"approval"` OR a vendor extension matching `^x-host-[a-z][a-z0-9-]*-[a-z][a-z0-9-]*$` per `host-extensions.md` §"Canonical prefixes". Required for the routing logic above; absent advertisement falls back to the canonical-status check.
+### No new scenario files
+Scenario file count unchanged at 205 (the v1.4.0 baseline). All changes are behavior modifications to existing files.
+### Known limits — unchanged from v1.4.0
+The 6 `it.todo` behavioral assertions across RFC 0034 OTel-seam-gated, RFC 0040 traceparent-propagation, RFC 0041 refusal-divergence + observable-sequence scenarios remain. The 8 sandbox `it.todo` assertions are new in v1.5.0 (replacing the v1.4.0 vacuous-pass shapes).
 ## [1.4.0] — 2026-05-22
 Minor bump per `PUBLISHING.md` §"Versioning alignment" — bundles 45 new conformance scenarios + 23 new fixtures landing since the 1.3.0 publish (2026-05-19). Unblocks non-steward host adoption of RFCs 0027 + 0028 + 0029 + 0030 + 0031 + 0032 + 0033 + 0034 + 0035 + 0036 + 0037 + 0039 + 0040 + 0041 against a single suite version.

package/README.md CHANGED Viewed

@@ -93,7 +93,7 @@ Exit code is non-zero on any failed assertion.
 ## What's Covered
-The current suite has 205 scenario files under `src/scenarios/`. 2026-05-22 (RFC 0041 Phase 4 — replay determinism under nondeterministic models) added three scenarios: `replay-divergence-at-refusal.test.ts` (advertisement-shape probe on `replayDeterminism.refusalDivergenceEmission` + 2 `it.todo` for the dual-direction refusal-divergence case), `replay-observable-sequence-determinism.test.ts` (capability-gated; behavioral assertion soft-skipped until a `conformance-phase4-nondet-tool` fixture ships), `replay-llm-cache-key-portable.test.ts` (intra-host reproducibility + non-recipe-field invariance + Phase 4 advertisement alignment — reuses the existing `POST /v1/host/sample/test/llm-cache-key` seam from the sibling `replay-llm-cache-key.test.ts`). 2026-05-20 (RFC 0027 §A templateKinds-coverage follow-up — paired with `prompt-end-to-end-events.test.ts`) added `prompt-all-four-kinds-events.test.ts` exercising all four `PromptKind` values (`system`, `user`, `schema-hint`, `few-shot`) end-to-end through the reference workflow-engine sample's `local.sample.demo.mock-ai` dispatch path; capability-gated via `behaviorGate('prompts-supported', ...)`. Closes the credibility gap where the host advertised `templateKinds: ["system", "user", "few-shot", "schema-hint"]` but only the system+user pair was actually wired into dispatch. 2026-05-20 (RFCs 0030–0033 — envelope LLM-contract-hardening track) added 15 scenarios across four `Active` RFCs: `envelope-reasoning-shape.test.ts` (RFC 0030, always-on; asserts the OPTIONAL `reasoning` property on the three universal-kind schemas + the `schema.response` deliberate omission), `envelope-reasoning-secret-redaction.test.ts` (RFC 0030, capability-gated on `capabilities.envelopes.reasoning.supported` + `secrets.supported`; 5 `it.todo()` placeholders for SECURITY invariant `envelope-reasoning-secret-redaction`), `envelope-tier-one-subset-static.test.ts` (RFC 0030, always-on for load-bearing rules — no `oneOf` / `allOf` / `not` / `prefixItems` / `propertyNames` anywhere; gated on `tierOneSubsetCompliance: "strict"` for OpenAI-strict-only constraints), `envelope-variant-discriminator-static.test.ts` (RFC 0031, always-on; asserts no `oneOf` + every `anyOf` branch declares a single-string-enum discriminator in `required` on every `schemas/envelopes/*.schema.json`), `model-capability-substituted.test.ts` (RFC 0031, advertisement-shape probe on `capabilities.modelCapabilities.advertised[]` identifier pattern + 5 `it.todo()` placeholders for SECURITY invariant `model-capability-substituted-no-credential-disclosure`), `model-capability-insufficient.test.ts` (RFC 0031, 6 `it.todo()` placeholders for refusal + no-recursive-fallback), `node-module-required-capabilities-shape.test.ts` (RFC 0031 SHOULD-tier authoring-convention; 4 `it.todo()` placeholders), and the six envelope-reliability events from RFC 0032 (`envelope-retry-attempted` carrying the shared advertisement-shape probe enforcing both MUST-tier events in `events[]` per RFC 0032 §C, plus `envelope-retry-exhausted`, `envelope-refusal-shape`, `envelope-truncated`, `envelope-nl-to-format-engaged`, `envelope-recovery-applied` — collectively 39 `it.todo()` placeholders covering retry/refusal/truncation/recovery + SECURITY invariants `envelope-refusal-no-prompt-leak` and `envelope-recovery-no-content-leak`), plus RFC 0033's two scenarios (`envelope-completion-distinguishes-truncation.test.ts` + `envelope-truncation-cap-exhaustion.test.ts` — 12 `it.todo()` placeholders covering the truncation-vs-schema-violation retry-routing distinction + the DoS-bound assertion). Reference workflow-engine sample advertises `capabilities.envelopes.reasoning: { supported: true, promptDirective: "off" }` + `tierOneSubsetCompliance: "warn"` honestly (schemas accept the field; host doesn't yet inject the directive); the other three RFCs' capability blocks defer to reference-host emission code per the staged RFC 0027 §G precedent. 2026-05-20 (RFC 0028 §B Phase B — prompt-pack boot-time install) added `prompt-pack-install.test.ts` (capability-gated on `capabilities.prompts.endpointsSupported: true`; asserts a host that ran the boot-time pack loader surfaces ≥ 1 pack-source template under `GET /v1/prompts?source=pack` carrying the canonical `meta.source: "pack"` + `meta.packName` + `meta.packVersion` stamps; positively identifies the in-tree `vendor.openwop.prompt-sample` reference pack's `writer-system` template when present). Pairs with the new `host/promptPackLoader.ts` boot-time entry on the reference workflow-engine sample, which scans `examples/packs/*` plus `OPENWOP_PROMPT_PACKS_DIR` and calls `installPackTemplates()` for each `kind: "prompt"` pack found. 2026-05-20 (RFC 0029 Phase C — prompt resolution chain wire shape) added three more scenarios: `prompt-resolution-chain-node-wins.test.ts` (capability-gated on `capabilities.prompts.supported: true`; asserts layer-1 node-config supersedes lower layers per `spec/v1/prompts.md` §"Resolution chain (normative)"), `prompt-resolution-chain-agent-intrinsic.test.ts` (additionally gated on `capabilities.prompts.agentBindings: true`; asserts agent intrinsic `systemPromptRef` wins over `promptOverrides` AND lower layers when the node has no layer-1 ref), `prompt-resolution-chain-fallback-cascade.test.ts` (asserts layer 3 workflow-defaults wins over layer 4 host-defaults; layer 4 host-defaults wins when 1-3 yield null; resolved is null when all four yield null but chain[] still lists every attempted layer). The scenarios drive the host's `POST /v1/host/sample/prompt/resolve` test seam (reference-host implementation deferred to follow-up slice per RFC 0021 staging precedent). 2026-05-20 (RFC 0027 Phase A — prompt templates wire shape) added three scenarios: `prompt-template-shape.test.ts` (always-on; Ajv compileability + positive/negative round-trip for PromptTemplate + PromptRef + PromptKind), `prompt-composed-secret-redaction.test.ts` (capability-gated on `capabilities.prompts.supported: true` + `observability: "full"`; asserts `[REDACTED:<secretId>]` markers in `prompt.composed` payloads for `source: "secret"` variable bindings per SECURITY/threat-model-secret-leakage.md §SR-1), `prompt-composed-trust-marker.test.ts` (same capability gates; asserts `<UNTRUSTED>...</UNTRUSTED>` wrapping + `contentTrust: "untrusted"` propagation per RFC 0020 §D). Paired with new `fixtures/prompt-templates/` sub-directory + per-fixture schema-validity describe block + future SECURITY invariants `prompt-composed-secret-redaction` and `prompt-composed-trust-marker` (lands alongside reference-host emission per RFC 0021 staging precedent). 2026-05-18 (RFC 0022 `Draft` — runtime variable mapping) added four `it.todo()` placeholder scenarios covering the new mapping surfaces on `core.dispatch` (§A — `dispatch-input-mapping.test.ts`, `dispatch-output-mapping.test.ts`, `dispatch-cross-worker-handoff.test.ts`) and `core.subWorkflow` (§B — `subworkflow-input-mapping.test.ts`). Gated on `capabilities.agents.dispatchMapping` (dispatch trio) and `capabilities.subWorkflow.inputMapping` (subWorkflow). Promote to live assertions when RFC 0022 reaches `Active` + a reference host advertises the matching flags. 2026-05-17 (RFC 0003 §D handoff-schema enforcement, HV-1) added `agentPackHandoffSchemaValidation.test.ts` — verifies the host validates dispatch payloads against `handoff.taskSchemaRef` AND return payloads against `handoff.returnSchemaRef` per RFC 0003 §D. Paired with the new `agent-pack-handoff-schema-enforcement` row in `SECURITY/invariants.yaml`. 2026-05-17 (AI Envelope gap-closure, DRAFT v1.x — `spec/v1/ai-envelope.md`) added 7 advertisement-shape scenarios with `it.todo()` behavioral placeholders gated on `capabilities.envelopeContracts.advertised: true`: `aiEnvelope.universalKinds.test.ts`, `aiEnvelope.schemaDrift.test.ts`, `aiEnvelope.correlationReplay.test.ts`, `aiEnvelope.contractRefusal.test.ts`, `aiEnvelope.trustBoundaryPropagation.test.ts`, `aiEnvelope.redaction.test.ts`, `aiEnvelope.capBreached.test.ts`. Paired with the new `envelope-redaction-sr-1-carry-forward` row in `SECURITY/invariants.yaml`. 2026-05-17 (post-publish hardening, deep audit of `core.openwop.agents`) added `agents-run-tool-allowlist.test.ts` — server-free scenario locking in the `core.openwop.agents@1.0.1` safety-fix that closes `OPENWOP-AUDIT-2026-003` (function-typed `tool.handler` properties rejected at `validateTools()` with `INVALID_TOOL_DECLARATION`; tool-driven runs require `ctx.agentRuntime`; tool-less safe fallback preserved). Paired with the new `agents-run-no-raw-handler` row in `SECURITY/invariants.yaml`. Same-day post-publish hardening added `idempotency-key-determinism.test.ts` — server-free scenario locking in the `core.openwop.http@1.1.2` determinism safety-fix (default `composite` mode produces deterministic keys in `(runId, nodeId, payload)`; removed `uuid` mode rejects with `CONFIG_INVALID`; cross-impl vector test lets third-party reimplementations verify wire agreement). Paired with the new `idempotency-key-deterministic` row in `SECURITY/invariants.yaml`. 2026-05-17 (Phase 3 of RFC 0013) added three server-free scenarios exercising the reference workflow-chain expansion library (`conformance/src/lib/workflow-chain-expansion.ts`): `workflow-chain-expansion.test.ts` (parameter substitution + node id collision avoidance + edge rewriting + capability propagation + runtime-invariance contract), `workflow-chain-unresolvable-typeid.test.ts` (rejection with `chain_unresolvable_typeid` when a chain references an unknown typeId), and `workflow-chain-pack-signature-verification.test.ts` (Ed25519 verification recipe reuse from `node-packs.md §Signing`). Earlier that day (Phase 1) added `workflow-chain-pack-manifest-validation.test.ts` — server-free schema-validation scenario covering the new `workflow-chain-pack-manifest.schema.json` (positive sample + two negatives: kind/contents mismatch and invalid `chainId`). Closes RFC 0013 (`Workflow-chain packs`, `Draft`) Phases 1 + 3 alongside the new `spec/v1/workflow-chain-packs.md`, the `Capabilities.workflowChainPacks` block, and the registry build-index/conformance-check `kind` routing from Phase 2. Earlier that day, the suite added 27 `it.todo()` placeholder scenarios paired with RFCs 0014-0020 (host capability surfaces — fs, kvStorage, tableStorage, queueBus, sql/vector/search, blob/cache, mcp.serverMount). These promote to live assertions when each RFC reaches `Active` + the matching capability block lands in `schemas/capabilities.schema.json` + a reference host advertises the capability. Earlier additions include 18 Multi-Agent Shift scenarios (Phases 1-5) added 2026-05-10, the `registry-public.test.ts` public-registry healthcheck added 2026-05-11 (opt-in via `OPENWOP_TEST_PUBLIC_REGISTRY=true`), the `replay-llm-cache-key.test.ts` placeholder added 2026-05-11 (three `it.todo()` cases for the cross-host LLM cache-key recipe per `replay.md` §"LLM cache-key recipe"), the two `production-*.test.ts` scenarios added 2026-05-11 for the `openwop-production` profile per RFC 0009 (`production-backpressure.test.ts`, `production-retention-expiry.test.ts`), the four `auth-*.test.ts` scenarios added 2026-05-11/12 for the production-auth profiles per RFC 0010 (`auth-api-key-rotation.test.ts`, `auth-oauth2-client-credentials.test.ts`, `auth-oidc-user-bearer.test.ts`, `auth-mtls.test.ts` (opt-in via `OPENWOP_TEST_MTLS=1`)), `replay-retention-expiry.test.ts` added 2026-05-12 (capability shape + 410/422 envelope per `replay.md` §"Retention and garbage collection"), `bulk-cancel.test.ts` added 2026-05-12 (Phase B close-out of R1 — `POST /v1/runs:bulk-cancel`), the two Phase H launch-blocker advertisement-contract scenarios added 2026-05-12 (`mcp-toolcall-redaction.test.ts` for the MCP-1 invariant per `host-capabilities.md §host.mcp` + `threat-model-prompt-injection.md §UNTRUSTED`, and `http-client-ssrf.test.ts` for the SSRF + body-size cap advertisement contract on `capabilities.httpClient`), the `wasm-pack-abi-version-rejection.test.ts` Track 7 scenario added 2026-05-12 for the ABI-mismatch positive path via the `vendor.openwop.misbehaving-abi` pack per RFC 0008 §H, the `otel-trace-propagation-subworkflow.test.ts` Track 11 close-out added 2026-05-13 (parent + child run spans share the inbound traceparent's traceId across the `core.subWorkflow` dispatch boundary), and the three RFC 0012 (Memory Compaction Profile, `Active`) scenarios added 2026-05-13/14: `memory-compaction-sr1-carry-forward.test.ts` (load-bearing SR-1 §D), `memory-compaction-event-emitted.test.ts` (canonical §B payload shape), and `memory-compaction-provenance-tag.test.ts` (soft assertion on §C `compacted-from:<id>` convention). All three gate on `capabilities.memory.compaction.supported` + the host's test seam at `/v1/test/memory/{seed,compact}` (Postgres reference host enables both via `OPENWOP_MEMORY_COMPACTION=true OPENWOP_TEST_TRIGGER_COMPACTION=true`). 2026-05-15 (gap-closure CF-3) added `interrupt-token-matrix.test.ts` (malformed / unknown / replay / cross-run-id paths on `GET|POST /v1/interrupts/{token}`). The maintained scenario-to-spec map lives in [`coverage.md`](./coverage.md); this README keeps the operator quickstart and the historical scenario notes below.
+The current suite has 231 scenario files under `src/scenarios/`. 2026-05-25 (RFC 0025 §C point 1 — test-catalog isolation invariant; pairs with the 25 publish-error scenarios in `pack-registry-publish.test.ts`) added `pack-registry-isolation.test.ts` — capability-gated on `capabilities.packs.testMode.{supported, isolated}: true`; PUTs a disposable pack into `/v1/packs-test/{name}` and asserts the same `(name, version)` does NOT appear via `GET /v1/packs/{name}` — anchors the test-catalog isolation MUST in RFC 0025 §C. 2026-05-25 (RFC 0028 Tier-2 post-promotion T2 — read-side sister scenario for workspace-membership enforcement) added `prompt-read-workspace-membership-enforced.test.ts` — gates on `capabilities.prompts.supported: true` (broader than `mutableLibrary` so read-only hosts that expose `?workspaceId=` are also probed); drives `GET /v1/prompts?workspaceId=<random-non-member>` and interprets the response: 4xx PASS (canonical envelope check on 403); 200 with empty `templates[]` PASS (correct null result for a nonexistent workspace); 200 with non-empty `templates[]` FAIL (cross-tenant leak); 200 without `templates[]` field SKIP (host doesn't expose workspace-scoped reads). Verifies SECURITY invariant `prompt-read-workspace-membership-enforced`. Same-day T1 strengthened `prompt-mutation-workspace-membership-enforced.test.ts` to pin `error === "workspace_membership_required"` when the host's refusal status is 403 (other refusal codes unconstrained). 2026-05-25 (RFC 0028 Tier-2 follow-up — workspace-membership enforcement on mutating prompt endpoints, filed in response to a self-disclosed adopter vulnerability) added `prompt-mutation-workspace-membership-enforced.test.ts` — capability-gated on `capabilities.prompts.mutableLibrary: true`; drives `POST /v1/prompts` with a cryptographically-random non-member `workspaceId` and asserts the host refuses (NOT a 2xx; any 4xx/5xx is acceptable — silent success is the failure mode). Verifies SECURITY invariant `prompt-mutation-workspace-membership-enforced`. 2026-05-22 (RFC 0034 §B follow-up — secret-leakage harness against the OTel + debug-bundle seams) added `secret-leakage-otel-attribute.test.ts` — gates on `capabilities.secrets.supported` + `capabilities.observability.testSeams.{otelScrape,debugBundleExport}` AND the `OPENWOP_CANARY_SECRET_VALUE` env (host operator + conformance runner agree on the canary). Drives the existing `openwop-smoke-byok-roundtrip` fixture end-to-end; scrapes both seams after run completion; hard-fails if the canary plaintext appears in any OTel span attribute or debug-bundle field. Verifies SECURITY invariants `secret-leakage-otel-attribute` + `secret-leakage-debug-bundle-otel`. 2026-05-22 (RFC 0041 Phase 4 — replay determinism under nondeterministic models) added three scenarios: `replay-divergence-at-refusal.test.ts` (advertisement-shape probe on `replayDeterminism.refusalDivergenceEmission` + 2 `it.todo` for the dual-direction refusal-divergence case), `replay-observable-sequence-determinism.test.ts` (capability-gated; behavioral assertion soft-skipped until a `conformance-phase4-nondet-tool` fixture ships), `replay-llm-cache-key-portable.test.ts` (intra-host reproducibility + non-recipe-field invariance + Phase 4 advertisement alignment — reuses the existing `POST /v1/host/sample/test/llm-cache-key` seam from the sibling `replay-llm-cache-key.test.ts`). 2026-05-20 (RFC 0027 §A templateKinds-coverage follow-up — paired with `prompt-end-to-end-events.test.ts`) added `prompt-all-four-kinds-events.test.ts` exercising all four `PromptKind` values (`system`, `user`, `schema-hint`, `few-shot`) end-to-end through the reference workflow-engine sample's `local.sample.demo.mock-ai` dispatch path; capability-gated via `behaviorGate('prompts-supported', ...)`. Closes the credibility gap where the host advertised `templateKinds: ["system", "user", "few-shot", "schema-hint"]` but only the system+user pair was actually wired into dispatch. 2026-05-20 (RFCs 0030–0033 — envelope LLM-contract-hardening track) added 15 scenarios across four `Active` RFCs: `envelope-reasoning-shape.test.ts` (RFC 0030, always-on; asserts the OPTIONAL `reasoning` property on the three universal-kind schemas + the `schema.response` deliberate omission), `envelope-reasoning-secret-redaction.test.ts` (RFC 0030, capability-gated on `capabilities.envelopes.reasoning.supported` + `secrets.supported`; 5 `it.todo()` placeholders for SECURITY invariant `envelope-reasoning-secret-redaction`), `envelope-tier-one-subset-static.test.ts` (RFC 0030, always-on for load-bearing rules — no `oneOf` / `allOf` / `not` / `prefixItems` / `propertyNames` anywhere; gated on `tierOneSubsetCompliance: "strict"` for OpenAI-strict-only constraints), `envelope-variant-discriminator-static.test.ts` (RFC 0031, always-on; asserts no `oneOf` + every `anyOf` branch declares a single-string-enum discriminator in `required` on every `schemas/envelopes/*.schema.json`), `model-capability-substituted.test.ts` (RFC 0031, advertisement-shape probe on `capabilities.modelCapabilities.advertised[]` identifier pattern + 5 `it.todo()` placeholders for SECURITY invariant `model-capability-substituted-no-credential-disclosure`), `model-capability-insufficient.test.ts` (RFC 0031, 6 `it.todo()` placeholders for refusal + no-recursive-fallback), `node-module-required-capabilities-shape.test.ts` (RFC 0031 SHOULD-tier authoring-convention; 4 `it.todo()` placeholders), and the six envelope-reliability events from RFC 0032 (`envelope-retry-attempted` carrying the shared advertisement-shape probe enforcing both MUST-tier events in `events[]` per RFC 0032 §C, plus `envelope-retry-exhausted`, `envelope-refusal-shape`, `envelope-truncated`, `envelope-nl-to-format-engaged`, `envelope-recovery-applied` — collectively 39 `it.todo()` placeholders covering retry/refusal/truncation/recovery + SECURITY invariants `envelope-refusal-no-prompt-leak` and `envelope-recovery-no-content-leak`), plus RFC 0033's two scenarios (`envelope-completion-distinguishes-truncation.test.ts` + `envelope-truncation-cap-exhaustion.test.ts` — 12 `it.todo()` placeholders covering the truncation-vs-schema-violation retry-routing distinction + the DoS-bound assertion). Reference workflow-engine sample advertises `capabilities.envelopes.reasoning: { supported: true, promptDirective: "off" }` + `tierOneSubsetCompliance: "warn"` honestly (schemas accept the field; host doesn't yet inject the directive); the other three RFCs' capability blocks defer to reference-host emission code per the staged RFC 0027 §G precedent. 2026-05-20 (RFC 0028 §B Phase B — prompt-pack boot-time install) added `prompt-pack-install.test.ts` (capability-gated on `capabilities.prompts.endpointsSupported: true`; asserts a host that ran the boot-time pack loader surfaces ≥ 1 pack-source template under `GET /v1/prompts?source=pack` carrying the canonical `meta.source: "pack"` + `meta.packName` + `meta.packVersion` stamps; positively identifies the in-tree `vendor.openwop.prompt-sample` reference pack's `writer-system` template when present). Pairs with the new `host/promptPackLoader.ts` boot-time entry on the reference workflow-engine sample, which scans `examples/packs/*` plus `OPENWOP_PROMPT_PACKS_DIR` and calls `installPackTemplates()` for each `kind: "prompt"` pack found. 2026-05-20 (RFC 0029 Phase C — prompt resolution chain wire shape) added three more scenarios: `prompt-resolution-chain-node-wins.test.ts` (capability-gated on `capabilities.prompts.supported: true`; asserts layer-1 node-config supersedes lower layers per `spec/v1/prompts.md` §"Resolution chain (normative)"), `prompt-resolution-chain-agent-intrinsic.test.ts` (additionally gated on `capabilities.prompts.agentBindings: true`; asserts agent intrinsic `systemPromptRef` wins over `promptOverrides` AND lower layers when the node has no layer-1 ref), `prompt-resolution-chain-fallback-cascade.test.ts` (asserts layer 3 workflow-defaults wins over layer 4 host-defaults; layer 4 host-defaults wins when 1-3 yield null; resolved is null when all four yield null but chain[] still lists every attempted layer). The scenarios drive the host's `POST /v1/host/sample/prompt/resolve` test seam (reference-host implementation deferred to follow-up slice per RFC 0021 staging precedent). 2026-05-20 (RFC 0027 Phase A — prompt templates wire shape) added three scenarios: `prompt-template-shape.test.ts` (always-on; Ajv compileability + positive/negative round-trip for PromptTemplate + PromptRef + PromptKind), `prompt-composed-secret-redaction.test.ts` (capability-gated on `capabilities.prompts.supported: true` + `observability: "full"`; asserts `[REDACTED:<secretId>]` markers in `prompt.composed` payloads for `source: "secret"` variable bindings per SECURITY/threat-model-secret-leakage.md §SR-1), `prompt-composed-trust-marker.test.ts` (same capability gates; asserts `<UNTRUSTED>...</UNTRUSTED>` wrapping + `contentTrust: "untrusted"` propagation per RFC 0020 §D). Paired with new `fixtures/prompt-templates/` sub-directory + per-fixture schema-validity describe block + future SECURITY invariants `prompt-composed-secret-redaction` and `prompt-composed-trust-marker` (lands alongside reference-host emission per RFC 0021 staging precedent). 2026-05-18 (RFC 0022 `Draft` — runtime variable mapping) added four `it.todo()` placeholder scenarios covering the new mapping surfaces on `core.dispatch` (§A — `dispatch-input-mapping.test.ts`, `dispatch-output-mapping.test.ts`, `dispatch-cross-worker-handoff.test.ts`) and `core.subWorkflow` (§B — `subworkflow-input-mapping.test.ts`). Gated on `capabilities.agents.dispatchMapping` (dispatch trio) and `capabilities.subWorkflow.inputMapping` (subWorkflow). Promote to live assertions when RFC 0022 reaches `Active` + a reference host advertises the matching flags. 2026-05-17 (RFC 0003 §D handoff-schema enforcement, HV-1) added `agentPackHandoffSchemaValidation.test.ts` — verifies the host validates dispatch payloads against `handoff.taskSchemaRef` AND return payloads against `handoff.returnSchemaRef` per RFC 0003 §D. Paired with the new `agent-pack-handoff-schema-enforcement` row in `SECURITY/invariants.yaml`. 2026-05-17 (AI Envelope gap-closure, DRAFT v1.x — `spec/v1/ai-envelope.md`) added 7 advertisement-shape scenarios with `it.todo()` behavioral placeholders gated on `capabilities.envelopeContracts.advertised: true`: `aiEnvelope.universalKinds.test.ts`, `aiEnvelope.schemaDrift.test.ts`, `aiEnvelope.correlationReplay.test.ts`, `aiEnvelope.contractRefusal.test.ts`, `aiEnvelope.trustBoundaryPropagation.test.ts`, `aiEnvelope.redaction.test.ts`, `aiEnvelope.capBreached.test.ts`. Paired with the new `envelope-redaction-sr-1-carry-forward` row in `SECURITY/invariants.yaml`. 2026-05-17 (post-publish hardening, deep audit of `core.openwop.agents`) added `agents-run-tool-allowlist.test.ts` — server-free scenario locking in the `core.openwop.agents@1.0.1` safety-fix that closes `OPENWOP-AUDIT-2026-003` (function-typed `tool.handler` properties rejected at `validateTools()` with `INVALID_TOOL_DECLARATION`; tool-driven runs require `ctx.agentRuntime`; tool-less safe fallback preserved). Paired with the new `agents-run-no-raw-handler` row in `SECURITY/invariants.yaml`. Same-day post-publish hardening added `idempotency-key-determinism.test.ts` — server-free scenario locking in the `core.openwop.http@1.1.2` determinism safety-fix (default `composite` mode produces deterministic keys in `(runId, nodeId, payload)`; removed `uuid` mode rejects with `CONFIG_INVALID`; cross-impl vector test lets third-party reimplementations verify wire agreement). Paired with the new `idempotency-key-deterministic` row in `SECURITY/invariants.yaml`. 2026-05-17 (Phase 3 of RFC 0013) added three server-free scenarios exercising the reference workflow-chain expansion library (`conformance/src/lib/workflow-chain-expansion.ts`): `workflow-chain-expansion.test.ts` (parameter substitution + node id collision avoidance + edge rewriting + capability propagation + runtime-invariance contract), `workflow-chain-unresolvable-typeid.test.ts` (rejection with `chain_unresolvable_typeid` when a chain references an unknown typeId), and `workflow-chain-pack-signature-verification.test.ts` (Ed25519 verification recipe reuse from `node-packs.md §Signing`). Earlier that day (Phase 1) added `workflow-chain-pack-manifest-validation.test.ts` — server-free schema-validation scenario covering the new `workflow-chain-pack-manifest.schema.json` (positive sample + two negatives: kind/contents mismatch and invalid `chainId`). Closes RFC 0013 (`Workflow-chain packs`, `Draft`) Phases 1 + 3 alongside the new `spec/v1/workflow-chain-packs.md`, the `Capabilities.workflowChainPacks` block, and the registry build-index/conformance-check `kind` routing from Phase 2. Earlier that day, the suite added 27 `it.todo()` placeholder scenarios paired with RFCs 0014-0020 (host capability surfaces — fs, kvStorage, tableStorage, queueBus, sql/vector/search, blob/cache, mcp.serverMount). These promote to live assertions when each RFC reaches `Active` + the matching capability block lands in `schemas/capabilities.schema.json` + a reference host advertises the capability. Earlier additions include 18 Multi-Agent Shift scenarios (Phases 1-5) added 2026-05-10, the `registry-public.test.ts` public-registry healthcheck added 2026-05-11 (opt-in via `OPENWOP_TEST_PUBLIC_REGISTRY=true`), the `replay-llm-cache-key.test.ts` placeholder added 2026-05-11 (three `it.todo()` cases for the cross-host LLM cache-key recipe per `replay.md` §"LLM cache-key recipe"), the two `production-*.test.ts` scenarios added 2026-05-11 for the `openwop-production` profile per RFC 0009 (`production-backpressure.test.ts`, `production-retention-expiry.test.ts`), the four `auth-*.test.ts` scenarios added 2026-05-11/12 for the production-auth profiles per RFC 0010 (`auth-api-key-rotation.test.ts`, `auth-oauth2-client-credentials.test.ts`, `auth-oidc-user-bearer.test.ts`, `auth-mtls.test.ts` (opt-in via `OPENWOP_TEST_MTLS=1`)), `replay-retention-expiry.test.ts` added 2026-05-12 (capability shape + 410/422 envelope per `replay.md` §"Retention and garbage collection"), `bulk-cancel.test.ts` added 2026-05-12 (Phase B close-out of R1 — `POST /v1/runs:bulk-cancel`), the two Phase H launch-blocker advertisement-contract scenarios added 2026-05-12 (`mcp-toolcall-redaction.test.ts` for the MCP-1 invariant per `host-capabilities.md §host.mcp` + `threat-model-prompt-injection.md §UNTRUSTED`, and `http-client-ssrf.test.ts` for the SSRF + body-size cap advertisement contract on `capabilities.httpClient`), the `wasm-pack-abi-version-rejection.test.ts` Track 7 scenario added 2026-05-12 for the ABI-mismatch positive path via the `vendor.openwop.misbehaving-abi` pack per RFC 0008 §H, the `otel-trace-propagation-subworkflow.test.ts` Track 11 close-out added 2026-05-13 (parent + child run spans share the inbound traceparent's traceId across the `core.subWorkflow` dispatch boundary), and the three RFC 0012 (Memory Compaction Profile, `Active`) scenarios added 2026-05-13/14: `memory-compaction-sr1-carry-forward.test.ts` (load-bearing SR-1 §D), `memory-compaction-event-emitted.test.ts` (canonical §B payload shape), and `memory-compaction-provenance-tag.test.ts` (soft assertion on §C `compacted-from:<id>` convention). All three gate on `capabilities.memory.compaction.supported` + the host's test seam at `/v1/test/memory/{seed,compact}` (Postgres reference host enables both via `OPENWOP_MEMORY_COMPACTION=true OPENWOP_TEST_TRIGGER_COMPACTION=true`). 2026-05-15 (gap-closure CF-3) added `interrupt-token-matrix.test.ts` (malformed / unknown / replay / cross-run-id paths on `GET|POST /v1/interrupts/{token}`). The maintained scenario-to-spec map lives in [`coverage.md`](./coverage.md); this README keeps the operator quickstart and the historical scenario notes below.
 High-level coverage includes:
@@ -172,7 +172,7 @@ Server-required (added in 1.7.0):
 |---|---|---|
 | **Redaction** | [`capabilities.md`](../spec/v1/capabilities.md) §"Secrets" + NFR-7 + §"aiProviders" | Vendor-neutral assertions that the server doesn't leak secret material. Three scenario groups: (a) discovery shape contract — `secrets` + `aiProviders` advertisements are well-formed regardless of `secrets.supported`; when `supported === true`, scopes MUST be non-empty + `resolution === 'host-managed'`; `byok ⊆ supported`. (b) bearer-token redaction — invalid Bearer canary in `Authorization` header is not echoed in the 401 response body. (c) credentialRef echo control — gated on `secrets.supported === true`; canary planted in `configurable.ai.credentialRef` MUST NOT appear in any RunEvent payload (poll-based capture; transport-agnostic). Uses runtime-built canary fixtures (`lib/canaries.ts`) that defeat static secret scanners. 6 scenarios. |
-Current source tree: 205 scenario files. Use [`coverage.md`](./coverage.md) for current grade/gap tracking.
+Current source tree: 231 scenario files. Use [`coverage.md`](./coverage.md) for current grade/gap tracking.
 ## Remaining Gaps

package/api/asyncapi.yaml CHANGED Viewed

@@ -433,9 +433,14 @@ components:
       summary: |
         Type-erased handler for `debug` mode — discriminate on the
         `type` field per the `RunEventType` enum in the run-event
-        JSON Schema. Includes events filtered out of `updates`:
-        `log.appended`, `variable.changed`, `version.pinned`,
-        `lease.*`, `node.retried`, `replay.diverged`, etc.
+        JSON Schema (the authoritative, exhaustive event list; the
+        named messages above are a curated `updates`-tier subset).
+        Includes events filtered out of `updates`: `log.appended`,
+        `variable.changed`, `version.pinned`, `lease.*`, `node.retried`,
+        `replay.diverged`, `connector.authorized`,
+        `connector.auth_expired` (RFC 0047), `authorization.decided`
+        (RFC 0049), `approval.granted` / `approval.rejected` /
+        `approval.overridden` (RFC 0051), etc.
       contentType: application/json
       payload:
         $ref: '#/components/schemas/RunEventDoc'

package/api/openapi.yaml CHANGED Viewed

@@ -61,6 +61,13 @@ tags:
     description: Audit-log integrity verification (gated on the `openwop-audit-log-integrity` profile).
   - name: prompts
     description: Prompt-template library — list, fetch, render, mutate (RFC 0028; gated on `capabilities.prompts.*`).
+  - name: packs-test
+    description: |
+      RFC 0025 (`Draft`). Test-mode mirror of the production `/v1/packs/*` publish/get/delete/sig surface against
+      an isolated catalog. Gated on `capabilities.packs.testMode.supported: true` plus the reference impl's
+      `OPENWOP_PACKS_TEST_NAMESPACE_ENABLED=true` env-gate. Lets the conformance suite exercise the documented
+      19-code publish error catalog without `packs:publish` scope on the real registry. Hosts that haven't
+      mounted this surface MUST return `404 Not Found` for every path under `/v1/packs-test/`.
 # ─────────────────────────────────────────────────────────────────────────────
 # PATHS
@@ -502,6 +509,54 @@ paths:
             application/json:
               schema: { $ref: '#/components/schemas/Error' }
+  /v1/runs/{runId}:diff:
+    get:
+      tags: [runs]
+      summary: |
+        RFC 0054 — return a deterministic, replay-aware structured diff of
+        two runs (typically a run and its RFC 0011 fork): `divergedAtSeq` +
+        ordered `eventDiffs[]` + `stateDiff`. The diff is a pure function of
+        the two event logs (see `replay.md` determinism contract). Requires
+        `runs:read` on BOTH runs. Hosts that don't implement it return 404.
+      operationId: diffRun
+      parameters:
+        - $ref: '#/components/parameters/RunId'
+        - name: against
+          in: query
+          required: true
+          description: The other run id to diff `{runId}` against (the `b` run).
+          schema: { type: string }
+      responses:
+        '200':
+          description: |
+            Structured diff of the two runs. `divergedAtSeq: null` + empty
+            `eventDiffs` when the logs are identical.
+          content:
+            application/json:
+              schema:
+                $ref: '../schemas/run-diff-response.schema.json'
+        '400':
+          description: Missing or malformed `against` query parameter.
+          content:
+            application/json:
+              schema: { $ref: '#/components/schemas/Error' }
+        '401': { $ref: '#/components/responses/Unauthenticated' }
+        '403':
+          description: |
+            Caller lacks `runs:read` on `{runId}` and/or on `against`
+            (`forbidden`); composes with RFC 0048 cross-workspace
+            isolation.
+          content:
+            application/json:
+              schema: { $ref: '#/components/schemas/Error' }
+        '404':
+          description: |
+            Either run doesn't exist, OR the host doesn't implement the diff
+            endpoint and treats the path as absent.
+          content:
+            application/json:
+              schema: { $ref: '#/components/schemas/Error' }
   /v1/runs/{runId}:pause:
     post:
       tags: [runs]
@@ -1146,6 +1201,209 @@ paths:
               schema:
                 $ref: '../schemas/error-envelope.schema.json'
+  # ── Test-mode pack registry namespace (RFC 0025) ─────────────────────────
+  # Mirrors the production /v1/packs/* PUT/GET/DELETE/.sig surface against an
+  # isolated catalog. Conformance scenarios under
+  # `conformance/src/scenarios/pack-registry-publish.test.ts` exercise the
+  # 19-code publish error catalog through this namespace. Hosts that don't
+  # advertise `capabilities.packs.testMode.supported: true` MUST return
+  # 404 for every path below.
+  /v1/packs-test/{name}/-/{version}.tgz:
+    parameters:
+      - $ref: '#/components/parameters/PackName'
+      - $ref: '#/components/parameters/PackVersion'
+    put:
+      tags: [packs-test]
+      summary: Publish a pack tarball to the isolated test catalog.
+      description: |
+        Mirror of `PUT /v1/packs/{name}/-/{version}.tgz` per
+        `spec/v1/node-packs.md` §"PUT /v1/packs/{name}/-/{version}.tgz".
+        Request shape, response shape, status codes, and the 19-code
+        publish error catalog (`invalid_pack_scope`, `invalid_pack_name`,
+        `invalid_version`, `invalid_body`, eight `tarball_*` codes,
+        `invalid_manifest`, `manifest_mismatch` (or the granular
+        `manifest_name_mismatch` / `manifest_version_mismatch` pair),
+        `pack_integrity_failure`, `unsupported_runtime`, `forbidden`,
+        `conflict`/`version_conflict`) MUST be served verbatim. The
+        test catalog MUST be isolated per RFC 0025 §C — a pack PUT'd
+        here MUST NOT appear in `GET /v1/packs/{name}` listings.
+      operationId: putTestPackTarball
+      parameters:
+        - in: header
+          name: X-Pack-Signing-Method
+          required: false
+          schema: { type: string, enum: [sigstore, manual, none] }
+        - in: header
+          name: X-Pack-Sha256
+          required: false
+          schema:
+            type: string
+            pattern: '^sha256-[A-Za-z0-9+/=]+$'
+          description: Caller-asserted SHA-256 (server verifies; mismatch surfaces `pack_integrity_failure`).
+      requestBody:
+        required: true
+        description: Gzipped tarball bytes (`application/tar+gzip`, `application/gzip`, `application/x-gzip`, or `application/octet-stream`).
+        content:
+          application/gzip:
+            schema: { type: string, format: binary }
+          application/x-gzip:
+            schema: { type: string, format: binary }
+          application/tar+gzip:
+            schema: { type: string, format: binary }
+          application/octet-stream:
+            schema: { type: string, format: binary }
+      responses:
+        '200':
+          description: Idempotent re-publish — identical sha256 content already published; existing record returned.
+          content:
+            application/json:
+              schema: { $ref: '#/components/schemas/TestPackPublishRecord' }
+        '201':
+          description: New version published to the test catalog.
+          content:
+            application/json:
+              schema: { $ref: '#/components/schemas/TestPackPublishRecord' }
+        '400':
+          description: |
+            One of the 17 spec-documented 400-class publish error codes:
+            URL/scope (`invalid_pack_scope`, `invalid_pack_name`, `invalid_version`),
+            body shape (`invalid_body`),
+            tarball extraction (`tarball_gunzip_failed`, `tarball_too_large`,
+            `tarball_manifest_missing`, `tarball_manifest_too_large`,
+            `tarball_manifest_not_json`, `tarball_entry_missing`,
+            `tarball_entry_too_large`, `tarball_path_traversal`,
+            `tarball_tar_parse_failed`),
+            manifest contents (`invalid_manifest`, `manifest_mismatch` or
+            the granular `manifest_name_mismatch` / `manifest_version_mismatch` pair,
+            `pack_integrity_failure`, `unsupported_runtime`).
+          content:
+            application/json:
+              schema: { $ref: '../schemas/error-envelope.schema.json' }
+        '401': { $ref: '#/components/responses/Unauthenticated' }
+        '403':
+          description: '`forbidden` — caller lacks `packs:publish` scope or the namespace claim.'
+          content:
+            application/json:
+              schema: { $ref: '../schemas/error-envelope.schema.json' }
+        '404':
+          description: 'Host does not advertise `capabilities.packs.testMode.supported: true`, or the test-mode env-gate is unset.'
+          content:
+            application/json:
+              schema: { $ref: '../schemas/error-envelope.schema.json' }
+        '409':
+          description: '`conflict` (or `version_conflict`) — `(name, version)` already published with different content.'
+          content:
+            application/json:
+              schema: { $ref: '../schemas/error-envelope.schema.json' }
+    get:
+      tags: [packs-test]
+      summary: Fetch a published test-catalog tarball.
+      description: 'Mirror of `GET /v1/packs/{name}/-/{version}.tgz`. Returns the gzipped tarball bytes with `Content-Type: application/tar+gzip` and an `ETag: "sha256-..."` matching the manifest''s `tarballSha256`.'
+      operationId: getTestPackTarball
+      responses:
+        '200':
+          description: Tarball bytes.
+          content:
+            application/tar+gzip:
+              schema: { type: string, format: binary }
+        '400':
+          description: '`invalid_pack_name` or `invalid_version` — URL params malformed.'
+          content:
+            application/json:
+              schema: { $ref: '../schemas/error-envelope.schema.json' }
+        '401': { $ref: '#/components/responses/Unauthenticated' }
+        '403':
+          description: '`forbidden` — caller lacks `packs:read` scope.'
+          content:
+            application/json:
+              schema: { $ref: '../schemas/error-envelope.schema.json' }
+        '404':
+          description: 'Pack version not found in the test catalog (or host does not advertise `capabilities.packs.testMode.supported: true`).'
+          content:
+            application/json:
+              schema: { $ref: '../schemas/error-envelope.schema.json' }
+  /v1/packs-test/{name}/-/{version}:
+    parameters:
+      - $ref: '#/components/parameters/PackName'
+      - $ref: '#/components/parameters/PackVersion'
+    delete:
+      tags: [packs-test]
+      summary: Unpublish a test-catalog version (mirrors unpublish-window semantics).
+      description: |
+        Mirror of `DELETE /v1/packs/{name}/-/{version}` per
+        `spec/v1/node-packs.md`. Returns `400 unpublish_window_expired`
+        for versions older than the registry's unpublish window
+        (default 72h). Test-mode implementations MAY shorten the
+        window for tractable conformance fixtures but MUST surface
+        the same error code.
+      operationId: deleteTestPackVersion
+      responses:
+        '204':
+          description: Version successfully unpublished from the test catalog.
+        '400':
+          description: '`unpublish_window_expired`, `invalid_pack_name`, or `invalid_version`.'
+          content:
+            application/json:
+              schema: { $ref: '../schemas/error-envelope.schema.json' }
+        '401': { $ref: '#/components/responses/Unauthenticated' }
+        '403':
+          description: '`forbidden` — caller lacks `packs:publish` scope.'
+          content:
+            application/json:
+              schema: { $ref: '../schemas/error-envelope.schema.json' }
+        '404':
+          description: Version doesn't exist in the test catalog, or host does not advertise the test-mode capability.
+          content:
+            application/json:
+              schema: { $ref: '../schemas/error-envelope.schema.json' }
+  /v1/packs-test/{name}/-/{version}.sig:
+    parameters:
+      - $ref: '#/components/parameters/PackName'
+      - $ref: '#/components/parameters/PackVersion'
+    get:
+      tags: [packs-test]
+      summary: Fetch the detached Ed25519 signature for a test-catalog pack.
+      description: |
+        Mirror of `GET /v1/packs/{name}/-/{version}.sig`. Returns the
+        signature blob over `pack.json` for this version. MAY 302-redirect
+        to a storage-backend signed URL.
+      operationId: getTestPackSignature
+      responses:
+        '200':
+          description: Signature blob.
+          content:
+            application/octet-stream:
+              schema: { type: string, format: binary }
+        '302':
+          description: Redirect to a storage-backend signed URL (clients SHOULD follow).
+          headers:
+            Location:
+              schema: { type: string, format: uri }
+        '400':
+          description: '`invalid_pack_name` or `invalid_version` — URL params malformed.'
+          content:
+            application/json:
+              schema: { $ref: '../schemas/error-envelope.schema.json' }
+        '401': { $ref: '#/components/responses/Unauthenticated' }
+        '403':
+          description: '`forbidden` — caller lacks `packs:read` scope.'
+          content:
+            application/json:
+              schema: { $ref: '../schemas/error-envelope.schema.json' }
+        '404':
+          description: |
+            `signature_not_available` — version is missing, yanked,
+            unsigned at publish time, OR the registry's storage backend
+            is unwired. The four cases are intentionally
+            indistinguishable per spec/v1/node-packs.md §"GET /v1/packs/{name}/-/{version}.sig".
+            Also returned when the host does not advertise
+            `capabilities.packs.testMode.supported: true`.
+          content:
+            application/json:
+              schema: { $ref: '../schemas/error-envelope.schema.json' }
 # ─────────────────────────────────────────────────────────────────────────────
 # COMPONENTS
 # ─────────────────────────────────────────────────────────────────────────────
@@ -1189,6 +1447,25 @@ components:
         Duplicate requests return the cached response with header
         `openwop-Idempotent-Replay: true`.
+    PackName:
+      in: path
+      name: name
+      required: true
+      schema:
+        type: string
+        minLength: 3
+        maxLength: 214
+      description: Reverse-DNS pack name per `spec/v1/node-packs.md` §Naming (e.g. `vendor.acme.salesforce-tools`).
+    PackVersion:
+      in: path
+      name: version
+      required: true
+      schema:
+        type: string
+        pattern: '^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-[\w.-]+)?(?:\+[\w.-]+)?$'
+      description: SemVer 2.0.0 version of the pack.
   responses:
     Unauthenticated:
       description: Missing or invalid credential.
@@ -1304,3 +1581,31 @@ components:
                 supported:
                   type: array
                   items: { type: string, enum: [values, updates, messages, debug] }
+    TestPackPublishRecord:
+      description: |
+        Response body for a successful publish against the test-mode
+        registry namespace (RFC 0025). Mirror of the production publish
+        record returned by `PUT /v1/packs/{name}/-/{version}.tgz`.
+      type: object
+      required: [name, version, tarballSha256, publishedAt]
+      properties:
+        name:
+          type: string
+          description: Reverse-DNS pack name as PUT'd.
+        version:
+          type: string
+          description: SemVer 2.0.0 version as PUT'd.
+        tarballSha256:
+          type: string
+          pattern: '^sha256-[A-Za-z0-9+/=]+$'
+          description: Server-computed SHA-256 over the uploaded tarball bytes.
+        publishedAt:
+          type: string
+          format: date-time
+        signed:
+          type: boolean
+          description: Whether a sibling `.sig` signature blob was persisted.
+        signingMethod:
+          type: string
+          enum: [sigstore, manual, none]