@openwop/openwop-conformance 1.4.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/CHANGELOG.md +60 -0
  2. package/README.md +2 -2
  3. package/api/asyncapi.yaml +8 -3
  4. package/api/openapi.yaml +305 -0
  5. package/coverage.md +35 -10
  6. package/fixtures/conformance-phase4-nondet-tool.json +53 -0
  7. package/fixtures/conformance-phase4-replay-divergence.json +40 -0
  8. package/fixtures.md +5 -3
  9. package/package.json +1 -1
  10. package/schemas/README.md +2 -0
  11. package/schemas/capabilities.schema.json +176 -3
  12. package/schemas/credential-reference.schema.json +21 -0
  13. package/schemas/node-pack-manifest.schema.json +112 -1
  14. package/schemas/run-diff-response.schema.json +64 -0
  15. package/schemas/run-event-payloads.schema.json +104 -2
  16. package/schemas/run-event.schema.json +8 -1
  17. package/schemas/run-snapshot.schema.json +11 -0
  18. package/src/lib/behavior-gate.ts +51 -0
  19. package/src/lib/driver.ts +13 -1
  20. package/src/lib/saml-idp.ts +179 -0
  21. package/src/scenarios/approval-gate-events.test.ts +61 -0
  22. package/src/scenarios/approval-gate-flow.test.ts +68 -0
  23. package/src/scenarios/auth-saml-profile.test.ts +119 -0
  24. package/src/scenarios/auth-scim-profile.test.ts +65 -0
  25. package/src/scenarios/authorization-fail-closed.test.ts +80 -0
  26. package/src/scenarios/authorization-roles-shape.test.ts +83 -0
  27. package/src/scenarios/connector-manifest-validity.test.ts +142 -0
  28. package/src/scenarios/credential-payload-redaction.test.ts +93 -0
  29. package/src/scenarios/credentials-capability-shape.test.ts +90 -0
  30. package/src/scenarios/cross-engine-append-behavior.test.ts +204 -0
  31. package/src/scenarios/cross-host-traceparent-propagation.test.ts +13 -6
  32. package/src/scenarios/cross-workspace-isolation.test.ts +72 -0
  33. package/src/scenarios/deadletter-capability-shape.test.ts +59 -0
  34. package/src/scenarios/deadletter-retry-exhaustion.test.ts +62 -0
  35. package/src/scenarios/experimental-tier-shape.test.ts +192 -0
  36. package/src/scenarios/identity-owner-shape.test.ts +64 -0
  37. package/src/scenarios/multi-agent-confidence-escalation.test.ts +59 -21
  38. package/src/scenarios/multi-agent-memory-lifecycle.test.ts +87 -12
  39. package/src/scenarios/multi-region-idempotency-behavior.test.ts +203 -0
  40. package/src/scenarios/oauth-capability-shape.test.ts +97 -0
  41. package/src/scenarios/oauth-connector-redaction.test.ts +91 -0
  42. package/src/scenarios/pack-registry-isolation.test.ts +108 -0
  43. package/src/scenarios/pack-registry-publish.test.ts +1 -1
  44. package/src/scenarios/prompt-mutation-workspace-membership-enforced.test.ts +126 -0
  45. package/src/scenarios/prompt-read-workspace-membership-enforced.test.ts +183 -0
  46. package/src/scenarios/replay-divergence-at-refusal.test.ts +187 -7
  47. package/src/scenarios/replay-observable-sequence-determinism.test.ts +20 -6
  48. package/src/scenarios/run-diff.test.ts +143 -0
  49. package/src/scenarios/sandbox-capability-gate-respected.test.ts +15 -13
  50. package/src/scenarios/sandbox-memory-cap.test.ts +7 -8
  51. package/src/scenarios/sandbox-mvp-behavior.test.ts +280 -0
  52. package/src/scenarios/sandbox-no-cross-pack-mutation.test.ts +14 -13
  53. package/src/scenarios/sandbox-no-host-env-leak.test.ts +14 -21
  54. package/src/scenarios/sandbox-no-host-fs-escape.test.ts +20 -15
  55. package/src/scenarios/sandbox-no-host-process-escape.test.ts +18 -13
  56. package/src/scenarios/sandbox-no-network-escape.test.ts +14 -31
  57. package/src/scenarios/sandbox-timeout-cap.test.ts +7 -8
  58. package/src/scenarios/scheduling-capability-shape.test.ts +81 -0
  59. package/src/scenarios/scheduling-cron-fires-once.test.ts +66 -0
  60. package/src/scenarios/secret-leakage-otel-attribute.test.ts +241 -0
  61. package/src/scenarios/spec-corpus-validity.test.ts +2 -2
package/CHANGELOG.md CHANGED
@@ -1,5 +1,65 @@
1
1
  # `@openwop/openwop-conformance` Changelog
2
2
 
3
+ ## [1.6.0] — 2026-05-25
4
+
5
+ Minor bump per `PUBLISHING.md` §"Versioning alignment" — ships the conformance scenarios for the **MyndHyve protocol-extension cohort (RFCs 0045–0054)** so adopting hosts can pin the released suite, run it against their deployment, and report pass for `Draft → Active → Accepted` graduation (per `RFCS/0001-rfc-process.md` §"Promotion to Accepted"). All additive — every new scenario is capability-gated and soft-skips against a host that doesn't advertise the surface, so existing v1.0-only hosts pass unchanged.
6
+
7
+ ### Added — RFC 0045–0054 cohort scenarios
8
+
9
+ - **RFC 0045** (connector pack manifest) — `connector-manifest-validity.test.ts` (server-free: §A schema validity of the `connector` block + both ConnectorAuth variants; §B action/trigger typeId-resolution).
10
+ - **RFC 0046** (`host.credentials`) — `credentials-capability-shape.test.ts` (always) + `credential-payload-redaction.test.ts` (gated; SECURITY invariant `credential-payload-redaction` via the `credentials/echo` seam).
11
+ - **RFC 0047** (`host.oauth`) — `oauth-capability-shape.test.ts` + `oauth-connector-redaction.test.ts` (gated; token redaction via the `oauth/connector-echo` seam).
12
+ - **RFC 0048** (identity triple) — `identity-owner-shape.test.ts` (server-free) + `cross-workspace-isolation.test.ts` (gated; fail-closed `run_forbidden` via the `identity/*` seams).
13
+ - **RFC 0049** (RBAC) — `authorization-roles-shape.test.ts` (always) + `authorization-fail-closed.test.ts` (gated; SECURITY invariant `authorization-fail-closed` via the `authorization/decide` seam).
14
+ - **RFC 0050** (SAML / SCIM) — `auth-saml-profile.test.ts` + `auth-scim-profile.test.ts` (advertisement shape always; behavior opt-in via `OPENWOP_TEST_SAML_IDP_URL` / `OPENWOP_TEST_SCIM_URL` + the `auth/saml/validate` + `auth/scim/provision` seams). Now ships a **bundled synthetic SAML IdP harness** (`conformance/src/lib/saml-idp.ts`, `node:crypto` RSA-SHA256, no deps) that mints the 1-positive + 6-negative assertion variants and whose `verify()` implements the RFC 0050 §A MUST list — `auth-saml-profile.test.ts` runs the full negative reference suite **server-free**; a host's real ACS validates the same assertions over the seam.
15
+ - **RFC 0051** (approval gate) — `approval-gate-events.test.ts` (server-free) + `approval-gate-flow.test.ts` (gated; unauthorized-denied + override-audited via the `governance/approval-gate` seam).
16
+ - **RFC 0052** (scheduling) — `scheduling-capability-shape.test.ts` (always) + `scheduling-cron-fires-once.test.ts` (gated; once-per-tick + missed-tick via the `scheduling/tick` seam).
17
+ - **RFC 0053** (dead-letter) — `deadletter-capability-shape.test.ts` (always) + `deadletter-retry-exhaustion.test.ts` (gated; `run.dead_lettered` + fork-eligibility via the `deadletter/exhaust` seam).
18
+ - **RFC 0054** (run diff) — `run-diff-*.test.ts` (landed with the run-diff endpoint).
19
+
20
+ Two new SECURITY invariants gated in this cohort: `credential-payload-redaction` (0046, also covers 0047 tokens) + `authorization-fail-closed` (0049). New `/v1/host/sample/*` seams are catalogued in `spec/v1/host-sample-test-seams.md` §"Open seams". Suite scenario-file count → 230.
21
+
22
+ ## [1.5.0] — 2026-05-22
23
+
24
+ Minor bump per `PUBLISHING.md` §"Versioning alignment" — unblocks MyndHyve's RFC 0044 + RFC 0039 Half A co-graduation by shipping the relaxed + RFC-0044-routing assertion logic in `multi-agent-confidence-escalation.test.ts`. No new scenario files; no new fixtures. Behavioral honesty pass on 8 sandbox scenarios + schema additions for the new RFC 0044 capability advertisement.
25
+
26
+ ### Changed — RFC 0044 interrupt-kind routing in `multi-agent-confidence-escalation.test.ts`
27
+
28
+ Previously the scenario asserted `expect(terminal.status).toBe('waiting-clarification')` — strict equality on the clarify-kind escalation path, which rejected even RFC 0039 §A's own escalate-approval path (→ `waiting-approval`). v1.5.0 ships the relaxed + RFC-0044-routing logic landed in upstream commits `f03d01d` (relaxation to accept both canonical statuses) + `641d088` (RFC 0044 vendor-kind routing):
29
+
30
+ - **Canonical kind advertised** (`clarification` / `approval`) → strict `expect(terminal.status).toBe('waiting-clarification' | 'waiting-approval')`.
31
+ - **Vendor kind advertised** (`x-host-<host>-<kind>` per `host-extensions.md` §"Canonical prefixes") → `expect(terminal.status.startsWith('waiting-')).toBe(true)`; the host's `interrupt.md` mapping determines the suffix.
32
+ - **No advertisement** → fall back to the canonical either-status check (preserves the `f03d01d` relaxation).
33
+
34
+ This unblocks MyndHyve's `confidenceEscalationInterruptKind: 'x-host-myndhyve-low-confidence'` advertisement (their entrenched `interrupt.kind: 'low-confidence'` → `waiting-approval` mapping) without forcing a cross-cutting rename of `LOW_CONFIDENCE_SUSPEND_REASON` + `mockAgent.node` + `escalationThreshold.ts` + downstream UI consumers. See RFC 0044 §B (`RFCS/0044-confidence-escalation-interrupt-kind-advertisement.md`) for the normative contract.
35
+
36
+ ### Changed — Sandbox scenarios converted vacuous `expect(true).toBe(true)` to `it.todo` (honesty pass)
37
+
38
+ The 8 `sandbox-*.test.ts` scenarios in v1.4.0 carried `expect(true).toBe(true)` tautology assertions for their behavioral legs. v1.5.0 converts them to `it.todo()` per upstream commit `5864a2f`:
39
+
40
+ - `sandbox-no-host-process-escape.test.ts`
41
+ - `sandbox-no-network-escape.test.ts`
42
+ - `sandbox-no-host-fs-escape.test.ts`
43
+ - `sandbox-no-host-env-leak.test.ts`
44
+ - `sandbox-timeout-cap.test.ts`
45
+ - `sandbox-memory-cap.test.ts`
46
+ - `sandbox-no-cross-pack-mutation.test.ts`
47
+ - `sandbox-capability-gate-respected.test.ts`
48
+
49
+ Test reporters now surface 8 todos instead of 8 vacuous passes. The advertisement-shape probes (in `sandbox-no-host-fs-escape`, `sandbox-memory-cap`, `sandbox-timeout-cap`) still run real discovery-doc assertions when capabilities are advertised. Behavioral assertions light up when a sandbox-executing reference host wires the seam.
50
+
51
+ ### Changed — `schemas/capabilities.schema.json` (vendored): adds `multiAgent.executionModel.confidenceEscalationInterruptKind`
52
+
53
+ Per RFC 0044 §A. The optional field accepts the canonical literal `"clarification"` / `"approval"` OR a vendor extension matching `^x-host-[a-z][a-z0-9-]*-[a-z][a-z0-9-]*$` per `host-extensions.md` §"Canonical prefixes". Required for the routing logic above; absent advertisement falls back to the canonical-status check.
54
+
55
+ ### No new scenario files
56
+
57
+ Scenario file count unchanged at 205 (the v1.4.0 baseline). All changes are behavior modifications to existing files.
58
+
59
+ ### Known limits — unchanged from v1.4.0
60
+
61
+ The 6 `it.todo` behavioral assertions across RFC 0034 OTel-seam-gated, RFC 0040 traceparent-propagation, RFC 0041 refusal-divergence + observable-sequence scenarios remain. The 8 sandbox `it.todo` assertions are new in v1.5.0 (replacing the v1.4.0 vacuous-pass shapes).
62
+
3
63
  ## [1.4.0] — 2026-05-22
4
64
 
5
65
  Minor bump per `PUBLISHING.md` §"Versioning alignment" — bundles 45 new conformance scenarios + 23 new fixtures landing since the 1.3.0 publish (2026-05-19). Unblocks non-steward host adoption of RFCs 0027 + 0028 + 0029 + 0030 + 0031 + 0032 + 0033 + 0034 + 0035 + 0036 + 0037 + 0039 + 0040 + 0041 against a single suite version.
package/README.md CHANGED
@@ -93,7 +93,7 @@ Exit code is non-zero on any failed assertion.
93
93
 
94
94
  ## What's Covered
95
95
 
96
- The current suite has 205 scenario files under `src/scenarios/`. 2026-05-22 (RFC 0041 Phase 4 — replay determinism under nondeterministic models) added three scenarios: `replay-divergence-at-refusal.test.ts` (advertisement-shape probe on `replayDeterminism.refusalDivergenceEmission` + 2 `it.todo` for the dual-direction refusal-divergence case), `replay-observable-sequence-determinism.test.ts` (capability-gated; behavioral assertion soft-skipped until a `conformance-phase4-nondet-tool` fixture ships), `replay-llm-cache-key-portable.test.ts` (intra-host reproducibility + non-recipe-field invariance + Phase 4 advertisement alignment — reuses the existing `POST /v1/host/sample/test/llm-cache-key` seam from the sibling `replay-llm-cache-key.test.ts`). 2026-05-20 (RFC 0027 §A templateKinds-coverage follow-up — paired with `prompt-end-to-end-events.test.ts`) added `prompt-all-four-kinds-events.test.ts` exercising all four `PromptKind` values (`system`, `user`, `schema-hint`, `few-shot`) end-to-end through the reference workflow-engine sample's `local.sample.demo.mock-ai` dispatch path; capability-gated via `behaviorGate('prompts-supported', ...)`. Closes the credibility gap where the host advertised `templateKinds: ["system", "user", "few-shot", "schema-hint"]` but only the system+user pair was actually wired into dispatch. 2026-05-20 (RFCs 0030–0033 — envelope LLM-contract-hardening track) added 15 scenarios across four `Active` RFCs: `envelope-reasoning-shape.test.ts` (RFC 0030, always-on; asserts the OPTIONAL `reasoning` property on the three universal-kind schemas + the `schema.response` deliberate omission), `envelope-reasoning-secret-redaction.test.ts` (RFC 0030, capability-gated on `capabilities.envelopes.reasoning.supported` + `secrets.supported`; 5 `it.todo()` placeholders for SECURITY invariant `envelope-reasoning-secret-redaction`), `envelope-tier-one-subset-static.test.ts` (RFC 0030, always-on for load-bearing rules — no `oneOf` / `allOf` / `not` / `prefixItems` / `propertyNames` anywhere; gated on `tierOneSubsetCompliance: "strict"` for OpenAI-strict-only constraints), `envelope-variant-discriminator-static.test.ts` (RFC 0031, always-on; asserts no `oneOf` + every `anyOf` branch declares a single-string-enum discriminator in `required` on every `schemas/envelopes/*.schema.json`), `model-capability-substituted.test.ts` (RFC 0031, advertisement-shape probe on `capabilities.modelCapabilities.advertised[]` identifier pattern + 5 `it.todo()` placeholders for SECURITY invariant `model-capability-substituted-no-credential-disclosure`), `model-capability-insufficient.test.ts` (RFC 0031, 6 `it.todo()` placeholders for refusal + no-recursive-fallback), `node-module-required-capabilities-shape.test.ts` (RFC 0031 SHOULD-tier authoring-convention; 4 `it.todo()` placeholders), and the six envelope-reliability events from RFC 0032 (`envelope-retry-attempted` carrying the shared advertisement-shape probe enforcing both MUST-tier events in `events[]` per RFC 0032 §C, plus `envelope-retry-exhausted`, `envelope-refusal-shape`, `envelope-truncated`, `envelope-nl-to-format-engaged`, `envelope-recovery-applied` — collectively 39 `it.todo()` placeholders covering retry/refusal/truncation/recovery + SECURITY invariants `envelope-refusal-no-prompt-leak` and `envelope-recovery-no-content-leak`), plus RFC 0033's two scenarios (`envelope-completion-distinguishes-truncation.test.ts` + `envelope-truncation-cap-exhaustion.test.ts` — 12 `it.todo()` placeholders covering the truncation-vs-schema-violation retry-routing distinction + the DoS-bound assertion). Reference workflow-engine sample advertises `capabilities.envelopes.reasoning: { supported: true, promptDirective: "off" }` + `tierOneSubsetCompliance: "warn"` honestly (schemas accept the field; host doesn't yet inject the directive); the other three RFCs' capability blocks defer to reference-host emission code per the staged RFC 0027 §G precedent. 2026-05-20 (RFC 0028 §B Phase B — prompt-pack boot-time install) added `prompt-pack-install.test.ts` (capability-gated on `capabilities.prompts.endpointsSupported: true`; asserts a host that ran the boot-time pack loader surfaces ≥ 1 pack-source template under `GET /v1/prompts?source=pack` carrying the canonical `meta.source: "pack"` + `meta.packName` + `meta.packVersion` stamps; positively identifies the in-tree `vendor.openwop.prompt-sample` reference pack's `writer-system` template when present). Pairs with the new `host/promptPackLoader.ts` boot-time entry on the reference workflow-engine sample, which scans `examples/packs/*` plus `OPENWOP_PROMPT_PACKS_DIR` and calls `installPackTemplates()` for each `kind: "prompt"` pack found. 2026-05-20 (RFC 0029 Phase C — prompt resolution chain wire shape) added three more scenarios: `prompt-resolution-chain-node-wins.test.ts` (capability-gated on `capabilities.prompts.supported: true`; asserts layer-1 node-config supersedes lower layers per `spec/v1/prompts.md` §"Resolution chain (normative)"), `prompt-resolution-chain-agent-intrinsic.test.ts` (additionally gated on `capabilities.prompts.agentBindings: true`; asserts agent intrinsic `systemPromptRef` wins over `promptOverrides` AND lower layers when the node has no layer-1 ref), `prompt-resolution-chain-fallback-cascade.test.ts` (asserts layer 3 workflow-defaults wins over layer 4 host-defaults; layer 4 host-defaults wins when 1-3 yield null; resolved is null when all four yield null but chain[] still lists every attempted layer). The scenarios drive the host's `POST /v1/host/sample/prompt/resolve` test seam (reference-host implementation deferred to follow-up slice per RFC 0021 staging precedent). 2026-05-20 (RFC 0027 Phase A — prompt templates wire shape) added three scenarios: `prompt-template-shape.test.ts` (always-on; Ajv compileability + positive/negative round-trip for PromptTemplate + PromptRef + PromptKind), `prompt-composed-secret-redaction.test.ts` (capability-gated on `capabilities.prompts.supported: true` + `observability: "full"`; asserts `[REDACTED:<secretId>]` markers in `prompt.composed` payloads for `source: "secret"` variable bindings per SECURITY/threat-model-secret-leakage.md §SR-1), `prompt-composed-trust-marker.test.ts` (same capability gates; asserts `<UNTRUSTED>...</UNTRUSTED>` wrapping + `contentTrust: "untrusted"` propagation per RFC 0020 §D). Paired with new `fixtures/prompt-templates/` sub-directory + per-fixture schema-validity describe block + future SECURITY invariants `prompt-composed-secret-redaction` and `prompt-composed-trust-marker` (lands alongside reference-host emission per RFC 0021 staging precedent). 2026-05-18 (RFC 0022 `Draft` — runtime variable mapping) added four `it.todo()` placeholder scenarios covering the new mapping surfaces on `core.dispatch` (§A — `dispatch-input-mapping.test.ts`, `dispatch-output-mapping.test.ts`, `dispatch-cross-worker-handoff.test.ts`) and `core.subWorkflow` (§B — `subworkflow-input-mapping.test.ts`). Gated on `capabilities.agents.dispatchMapping` (dispatch trio) and `capabilities.subWorkflow.inputMapping` (subWorkflow). Promote to live assertions when RFC 0022 reaches `Active` + a reference host advertises the matching flags. 2026-05-17 (RFC 0003 §D handoff-schema enforcement, HV-1) added `agentPackHandoffSchemaValidation.test.ts` — verifies the host validates dispatch payloads against `handoff.taskSchemaRef` AND return payloads against `handoff.returnSchemaRef` per RFC 0003 §D. Paired with the new `agent-pack-handoff-schema-enforcement` row in `SECURITY/invariants.yaml`. 2026-05-17 (AI Envelope gap-closure, DRAFT v1.x — `spec/v1/ai-envelope.md`) added 7 advertisement-shape scenarios with `it.todo()` behavioral placeholders gated on `capabilities.envelopeContracts.advertised: true`: `aiEnvelope.universalKinds.test.ts`, `aiEnvelope.schemaDrift.test.ts`, `aiEnvelope.correlationReplay.test.ts`, `aiEnvelope.contractRefusal.test.ts`, `aiEnvelope.trustBoundaryPropagation.test.ts`, `aiEnvelope.redaction.test.ts`, `aiEnvelope.capBreached.test.ts`. Paired with the new `envelope-redaction-sr-1-carry-forward` row in `SECURITY/invariants.yaml`. 2026-05-17 (post-publish hardening, deep audit of `core.openwop.agents`) added `agents-run-tool-allowlist.test.ts` — server-free scenario locking in the `core.openwop.agents@1.0.1` safety-fix that closes `OPENWOP-AUDIT-2026-003` (function-typed `tool.handler` properties rejected at `validateTools()` with `INVALID_TOOL_DECLARATION`; tool-driven runs require `ctx.agentRuntime`; tool-less safe fallback preserved). Paired with the new `agents-run-no-raw-handler` row in `SECURITY/invariants.yaml`. Same-day post-publish hardening added `idempotency-key-determinism.test.ts` — server-free scenario locking in the `core.openwop.http@1.1.2` determinism safety-fix (default `composite` mode produces deterministic keys in `(runId, nodeId, payload)`; removed `uuid` mode rejects with `CONFIG_INVALID`; cross-impl vector test lets third-party reimplementations verify wire agreement). Paired with the new `idempotency-key-deterministic` row in `SECURITY/invariants.yaml`. 2026-05-17 (Phase 3 of RFC 0013) added three server-free scenarios exercising the reference workflow-chain expansion library (`conformance/src/lib/workflow-chain-expansion.ts`): `workflow-chain-expansion.test.ts` (parameter substitution + node id collision avoidance + edge rewriting + capability propagation + runtime-invariance contract), `workflow-chain-unresolvable-typeid.test.ts` (rejection with `chain_unresolvable_typeid` when a chain references an unknown typeId), and `workflow-chain-pack-signature-verification.test.ts` (Ed25519 verification recipe reuse from `node-packs.md §Signing`). Earlier that day (Phase 1) added `workflow-chain-pack-manifest-validation.test.ts` — server-free schema-validation scenario covering the new `workflow-chain-pack-manifest.schema.json` (positive sample + two negatives: kind/contents mismatch and invalid `chainId`). Closes RFC 0013 (`Workflow-chain packs`, `Draft`) Phases 1 + 3 alongside the new `spec/v1/workflow-chain-packs.md`, the `Capabilities.workflowChainPacks` block, and the registry build-index/conformance-check `kind` routing from Phase 2. Earlier that day, the suite added 27 `it.todo()` placeholder scenarios paired with RFCs 0014-0020 (host capability surfaces — fs, kvStorage, tableStorage, queueBus, sql/vector/search, blob/cache, mcp.serverMount). These promote to live assertions when each RFC reaches `Active` + the matching capability block lands in `schemas/capabilities.schema.json` + a reference host advertises the capability. Earlier additions include 18 Multi-Agent Shift scenarios (Phases 1-5) added 2026-05-10, the `registry-public.test.ts` public-registry healthcheck added 2026-05-11 (opt-in via `OPENWOP_TEST_PUBLIC_REGISTRY=true`), the `replay-llm-cache-key.test.ts` placeholder added 2026-05-11 (three `it.todo()` cases for the cross-host LLM cache-key recipe per `replay.md` §"LLM cache-key recipe"), the two `production-*.test.ts` scenarios added 2026-05-11 for the `openwop-production` profile per RFC 0009 (`production-backpressure.test.ts`, `production-retention-expiry.test.ts`), the four `auth-*.test.ts` scenarios added 2026-05-11/12 for the production-auth profiles per RFC 0010 (`auth-api-key-rotation.test.ts`, `auth-oauth2-client-credentials.test.ts`, `auth-oidc-user-bearer.test.ts`, `auth-mtls.test.ts` (opt-in via `OPENWOP_TEST_MTLS=1`)), `replay-retention-expiry.test.ts` added 2026-05-12 (capability shape + 410/422 envelope per `replay.md` §"Retention and garbage collection"), `bulk-cancel.test.ts` added 2026-05-12 (Phase B close-out of R1 — `POST /v1/runs:bulk-cancel`), the two Phase H launch-blocker advertisement-contract scenarios added 2026-05-12 (`mcp-toolcall-redaction.test.ts` for the MCP-1 invariant per `host-capabilities.md §host.mcp` + `threat-model-prompt-injection.md §UNTRUSTED`, and `http-client-ssrf.test.ts` for the SSRF + body-size cap advertisement contract on `capabilities.httpClient`), the `wasm-pack-abi-version-rejection.test.ts` Track 7 scenario added 2026-05-12 for the ABI-mismatch positive path via the `vendor.openwop.misbehaving-abi` pack per RFC 0008 §H, the `otel-trace-propagation-subworkflow.test.ts` Track 11 close-out added 2026-05-13 (parent + child run spans share the inbound traceparent's traceId across the `core.subWorkflow` dispatch boundary), and the three RFC 0012 (Memory Compaction Profile, `Active`) scenarios added 2026-05-13/14: `memory-compaction-sr1-carry-forward.test.ts` (load-bearing SR-1 §D), `memory-compaction-event-emitted.test.ts` (canonical §B payload shape), and `memory-compaction-provenance-tag.test.ts` (soft assertion on §C `compacted-from:<id>` convention). All three gate on `capabilities.memory.compaction.supported` + the host's test seam at `/v1/test/memory/{seed,compact}` (Postgres reference host enables both via `OPENWOP_MEMORY_COMPACTION=true OPENWOP_TEST_TRIGGER_COMPACTION=true`). 2026-05-15 (gap-closure CF-3) added `interrupt-token-matrix.test.ts` (malformed / unknown / replay / cross-run-id paths on `GET|POST /v1/interrupts/{token}`). The maintained scenario-to-spec map lives in [`coverage.md`](./coverage.md); this README keeps the operator quickstart and the historical scenario notes below.
96
+ The current suite has 231 scenario files under `src/scenarios/`. 2026-05-25 (RFC 0025 §C point 1 — test-catalog isolation invariant; pairs with the 25 publish-error scenarios in `pack-registry-publish.test.ts`) added `pack-registry-isolation.test.ts` — capability-gated on `capabilities.packs.testMode.{supported, isolated}: true`; PUTs a disposable pack into `/v1/packs-test/{name}` and asserts the same `(name, version)` does NOT appear via `GET /v1/packs/{name}` — anchors the test-catalog isolation MUST in RFC 0025 §C. 2026-05-25 (RFC 0028 Tier-2 post-promotion T2 — read-side sister scenario for workspace-membership enforcement) added `prompt-read-workspace-membership-enforced.test.ts` — gates on `capabilities.prompts.supported: true` (broader than `mutableLibrary` so read-only hosts that expose `?workspaceId=` are also probed); drives `GET /v1/prompts?workspaceId=<random-non-member>` and interprets the response: 4xx PASS (canonical envelope check on 403); 200 with empty `templates[]` PASS (correct null result for a nonexistent workspace); 200 with non-empty `templates[]` FAIL (cross-tenant leak); 200 without `templates[]` field SKIP (host doesn't expose workspace-scoped reads). Verifies SECURITY invariant `prompt-read-workspace-membership-enforced`. Same-day T1 strengthened `prompt-mutation-workspace-membership-enforced.test.ts` to pin `error === "workspace_membership_required"` when the host's refusal status is 403 (other refusal codes unconstrained). 2026-05-25 (RFC 0028 Tier-2 follow-up — workspace-membership enforcement on mutating prompt endpoints, filed in response to a self-disclosed adopter vulnerability) added `prompt-mutation-workspace-membership-enforced.test.ts` — capability-gated on `capabilities.prompts.mutableLibrary: true`; drives `POST /v1/prompts` with a cryptographically-random non-member `workspaceId` and asserts the host refuses (NOT a 2xx; any 4xx/5xx is acceptable — silent success is the failure mode). Verifies SECURITY invariant `prompt-mutation-workspace-membership-enforced`. 2026-05-22 (RFC 0034 §B follow-up — secret-leakage harness against the OTel + debug-bundle seams) added `secret-leakage-otel-attribute.test.ts` — gates on `capabilities.secrets.supported` + `capabilities.observability.testSeams.{otelScrape,debugBundleExport}` AND the `OPENWOP_CANARY_SECRET_VALUE` env (host operator + conformance runner agree on the canary). Drives the existing `openwop-smoke-byok-roundtrip` fixture end-to-end; scrapes both seams after run completion; hard-fails if the canary plaintext appears in any OTel span attribute or debug-bundle field. Verifies SECURITY invariants `secret-leakage-otel-attribute` + `secret-leakage-debug-bundle-otel`. 2026-05-22 (RFC 0041 Phase 4 — replay determinism under nondeterministic models) added three scenarios: `replay-divergence-at-refusal.test.ts` (advertisement-shape probe on `replayDeterminism.refusalDivergenceEmission` + 2 `it.todo` for the dual-direction refusal-divergence case), `replay-observable-sequence-determinism.test.ts` (capability-gated; behavioral assertion soft-skipped until a `conformance-phase4-nondet-tool` fixture ships), `replay-llm-cache-key-portable.test.ts` (intra-host reproducibility + non-recipe-field invariance + Phase 4 advertisement alignment — reuses the existing `POST /v1/host/sample/test/llm-cache-key` seam from the sibling `replay-llm-cache-key.test.ts`). 2026-05-20 (RFC 0027 §A templateKinds-coverage follow-up — paired with `prompt-end-to-end-events.test.ts`) added `prompt-all-four-kinds-events.test.ts` exercising all four `PromptKind` values (`system`, `user`, `schema-hint`, `few-shot`) end-to-end through the reference workflow-engine sample's `local.sample.demo.mock-ai` dispatch path; capability-gated via `behaviorGate('prompts-supported', ...)`. Closes the credibility gap where the host advertised `templateKinds: ["system", "user", "few-shot", "schema-hint"]` but only the system+user pair was actually wired into dispatch. 2026-05-20 (RFCs 0030–0033 — envelope LLM-contract-hardening track) added 15 scenarios across four `Active` RFCs: `envelope-reasoning-shape.test.ts` (RFC 0030, always-on; asserts the OPTIONAL `reasoning` property on the three universal-kind schemas + the `schema.response` deliberate omission), `envelope-reasoning-secret-redaction.test.ts` (RFC 0030, capability-gated on `capabilities.envelopes.reasoning.supported` + `secrets.supported`; 5 `it.todo()` placeholders for SECURITY invariant `envelope-reasoning-secret-redaction`), `envelope-tier-one-subset-static.test.ts` (RFC 0030, always-on for load-bearing rules — no `oneOf` / `allOf` / `not` / `prefixItems` / `propertyNames` anywhere; gated on `tierOneSubsetCompliance: "strict"` for OpenAI-strict-only constraints), `envelope-variant-discriminator-static.test.ts` (RFC 0031, always-on; asserts no `oneOf` + every `anyOf` branch declares a single-string-enum discriminator in `required` on every `schemas/envelopes/*.schema.json`), `model-capability-substituted.test.ts` (RFC 0031, advertisement-shape probe on `capabilities.modelCapabilities.advertised[]` identifier pattern + 5 `it.todo()` placeholders for SECURITY invariant `model-capability-substituted-no-credential-disclosure`), `model-capability-insufficient.test.ts` (RFC 0031, 6 `it.todo()` placeholders for refusal + no-recursive-fallback), `node-module-required-capabilities-shape.test.ts` (RFC 0031 SHOULD-tier authoring-convention; 4 `it.todo()` placeholders), and the six envelope-reliability events from RFC 0032 (`envelope-retry-attempted` carrying the shared advertisement-shape probe enforcing both MUST-tier events in `events[]` per RFC 0032 §C, plus `envelope-retry-exhausted`, `envelope-refusal-shape`, `envelope-truncated`, `envelope-nl-to-format-engaged`, `envelope-recovery-applied` — collectively 39 `it.todo()` placeholders covering retry/refusal/truncation/recovery + SECURITY invariants `envelope-refusal-no-prompt-leak` and `envelope-recovery-no-content-leak`), plus RFC 0033's two scenarios (`envelope-completion-distinguishes-truncation.test.ts` + `envelope-truncation-cap-exhaustion.test.ts` — 12 `it.todo()` placeholders covering the truncation-vs-schema-violation retry-routing distinction + the DoS-bound assertion). Reference workflow-engine sample advertises `capabilities.envelopes.reasoning: { supported: true, promptDirective: "off" }` + `tierOneSubsetCompliance: "warn"` honestly (schemas accept the field; host doesn't yet inject the directive); the other three RFCs' capability blocks defer to reference-host emission code per the staged RFC 0027 §G precedent. 2026-05-20 (RFC 0028 §B Phase B — prompt-pack boot-time install) added `prompt-pack-install.test.ts` (capability-gated on `capabilities.prompts.endpointsSupported: true`; asserts a host that ran the boot-time pack loader surfaces ≥ 1 pack-source template under `GET /v1/prompts?source=pack` carrying the canonical `meta.source: "pack"` + `meta.packName` + `meta.packVersion` stamps; positively identifies the in-tree `vendor.openwop.prompt-sample` reference pack's `writer-system` template when present). Pairs with the new `host/promptPackLoader.ts` boot-time entry on the reference workflow-engine sample, which scans `examples/packs/*` plus `OPENWOP_PROMPT_PACKS_DIR` and calls `installPackTemplates()` for each `kind: "prompt"` pack found. 2026-05-20 (RFC 0029 Phase C — prompt resolution chain wire shape) added three more scenarios: `prompt-resolution-chain-node-wins.test.ts` (capability-gated on `capabilities.prompts.supported: true`; asserts layer-1 node-config supersedes lower layers per `spec/v1/prompts.md` §"Resolution chain (normative)"), `prompt-resolution-chain-agent-intrinsic.test.ts` (additionally gated on `capabilities.prompts.agentBindings: true`; asserts agent intrinsic `systemPromptRef` wins over `promptOverrides` AND lower layers when the node has no layer-1 ref), `prompt-resolution-chain-fallback-cascade.test.ts` (asserts layer 3 workflow-defaults wins over layer 4 host-defaults; layer 4 host-defaults wins when 1-3 yield null; resolved is null when all four yield null but chain[] still lists every attempted layer). The scenarios drive the host's `POST /v1/host/sample/prompt/resolve` test seam (reference-host implementation deferred to follow-up slice per RFC 0021 staging precedent). 2026-05-20 (RFC 0027 Phase A — prompt templates wire shape) added three scenarios: `prompt-template-shape.test.ts` (always-on; Ajv compileability + positive/negative round-trip for PromptTemplate + PromptRef + PromptKind), `prompt-composed-secret-redaction.test.ts` (capability-gated on `capabilities.prompts.supported: true` + `observability: "full"`; asserts `[REDACTED:<secretId>]` markers in `prompt.composed` payloads for `source: "secret"` variable bindings per SECURITY/threat-model-secret-leakage.md §SR-1), `prompt-composed-trust-marker.test.ts` (same capability gates; asserts `<UNTRUSTED>...</UNTRUSTED>` wrapping + `contentTrust: "untrusted"` propagation per RFC 0020 §D). Paired with new `fixtures/prompt-templates/` sub-directory + per-fixture schema-validity describe block + future SECURITY invariants `prompt-composed-secret-redaction` and `prompt-composed-trust-marker` (lands alongside reference-host emission per RFC 0021 staging precedent). 2026-05-18 (RFC 0022 `Draft` — runtime variable mapping) added four `it.todo()` placeholder scenarios covering the new mapping surfaces on `core.dispatch` (§A — `dispatch-input-mapping.test.ts`, `dispatch-output-mapping.test.ts`, `dispatch-cross-worker-handoff.test.ts`) and `core.subWorkflow` (§B — `subworkflow-input-mapping.test.ts`). Gated on `capabilities.agents.dispatchMapping` (dispatch trio) and `capabilities.subWorkflow.inputMapping` (subWorkflow). Promote to live assertions when RFC 0022 reaches `Active` + a reference host advertises the matching flags. 2026-05-17 (RFC 0003 §D handoff-schema enforcement, HV-1) added `agentPackHandoffSchemaValidation.test.ts` — verifies the host validates dispatch payloads against `handoff.taskSchemaRef` AND return payloads against `handoff.returnSchemaRef` per RFC 0003 §D. Paired with the new `agent-pack-handoff-schema-enforcement` row in `SECURITY/invariants.yaml`. 2026-05-17 (AI Envelope gap-closure, DRAFT v1.x — `spec/v1/ai-envelope.md`) added 7 advertisement-shape scenarios with `it.todo()` behavioral placeholders gated on `capabilities.envelopeContracts.advertised: true`: `aiEnvelope.universalKinds.test.ts`, `aiEnvelope.schemaDrift.test.ts`, `aiEnvelope.correlationReplay.test.ts`, `aiEnvelope.contractRefusal.test.ts`, `aiEnvelope.trustBoundaryPropagation.test.ts`, `aiEnvelope.redaction.test.ts`, `aiEnvelope.capBreached.test.ts`. Paired with the new `envelope-redaction-sr-1-carry-forward` row in `SECURITY/invariants.yaml`. 2026-05-17 (post-publish hardening, deep audit of `core.openwop.agents`) added `agents-run-tool-allowlist.test.ts` — server-free scenario locking in the `core.openwop.agents@1.0.1` safety-fix that closes `OPENWOP-AUDIT-2026-003` (function-typed `tool.handler` properties rejected at `validateTools()` with `INVALID_TOOL_DECLARATION`; tool-driven runs require `ctx.agentRuntime`; tool-less safe fallback preserved). Paired with the new `agents-run-no-raw-handler` row in `SECURITY/invariants.yaml`. Same-day post-publish hardening added `idempotency-key-determinism.test.ts` — server-free scenario locking in the `core.openwop.http@1.1.2` determinism safety-fix (default `composite` mode produces deterministic keys in `(runId, nodeId, payload)`; removed `uuid` mode rejects with `CONFIG_INVALID`; cross-impl vector test lets third-party reimplementations verify wire agreement). Paired with the new `idempotency-key-deterministic` row in `SECURITY/invariants.yaml`. 2026-05-17 (Phase 3 of RFC 0013) added three server-free scenarios exercising the reference workflow-chain expansion library (`conformance/src/lib/workflow-chain-expansion.ts`): `workflow-chain-expansion.test.ts` (parameter substitution + node id collision avoidance + edge rewriting + capability propagation + runtime-invariance contract), `workflow-chain-unresolvable-typeid.test.ts` (rejection with `chain_unresolvable_typeid` when a chain references an unknown typeId), and `workflow-chain-pack-signature-verification.test.ts` (Ed25519 verification recipe reuse from `node-packs.md §Signing`). Earlier that day (Phase 1) added `workflow-chain-pack-manifest-validation.test.ts` — server-free schema-validation scenario covering the new `workflow-chain-pack-manifest.schema.json` (positive sample + two negatives: kind/contents mismatch and invalid `chainId`). Closes RFC 0013 (`Workflow-chain packs`, `Draft`) Phases 1 + 3 alongside the new `spec/v1/workflow-chain-packs.md`, the `Capabilities.workflowChainPacks` block, and the registry build-index/conformance-check `kind` routing from Phase 2. Earlier that day, the suite added 27 `it.todo()` placeholder scenarios paired with RFCs 0014-0020 (host capability surfaces — fs, kvStorage, tableStorage, queueBus, sql/vector/search, blob/cache, mcp.serverMount). These promote to live assertions when each RFC reaches `Active` + the matching capability block lands in `schemas/capabilities.schema.json` + a reference host advertises the capability. Earlier additions include 18 Multi-Agent Shift scenarios (Phases 1-5) added 2026-05-10, the `registry-public.test.ts` public-registry healthcheck added 2026-05-11 (opt-in via `OPENWOP_TEST_PUBLIC_REGISTRY=true`), the `replay-llm-cache-key.test.ts` placeholder added 2026-05-11 (three `it.todo()` cases for the cross-host LLM cache-key recipe per `replay.md` §"LLM cache-key recipe"), the two `production-*.test.ts` scenarios added 2026-05-11 for the `openwop-production` profile per RFC 0009 (`production-backpressure.test.ts`, `production-retention-expiry.test.ts`), the four `auth-*.test.ts` scenarios added 2026-05-11/12 for the production-auth profiles per RFC 0010 (`auth-api-key-rotation.test.ts`, `auth-oauth2-client-credentials.test.ts`, `auth-oidc-user-bearer.test.ts`, `auth-mtls.test.ts` (opt-in via `OPENWOP_TEST_MTLS=1`)), `replay-retention-expiry.test.ts` added 2026-05-12 (capability shape + 410/422 envelope per `replay.md` §"Retention and garbage collection"), `bulk-cancel.test.ts` added 2026-05-12 (Phase B close-out of R1 — `POST /v1/runs:bulk-cancel`), the two Phase H launch-blocker advertisement-contract scenarios added 2026-05-12 (`mcp-toolcall-redaction.test.ts` for the MCP-1 invariant per `host-capabilities.md §host.mcp` + `threat-model-prompt-injection.md §UNTRUSTED`, and `http-client-ssrf.test.ts` for the SSRF + body-size cap advertisement contract on `capabilities.httpClient`), the `wasm-pack-abi-version-rejection.test.ts` Track 7 scenario added 2026-05-12 for the ABI-mismatch positive path via the `vendor.openwop.misbehaving-abi` pack per RFC 0008 §H, the `otel-trace-propagation-subworkflow.test.ts` Track 11 close-out added 2026-05-13 (parent + child run spans share the inbound traceparent's traceId across the `core.subWorkflow` dispatch boundary), and the three RFC 0012 (Memory Compaction Profile, `Active`) scenarios added 2026-05-13/14: `memory-compaction-sr1-carry-forward.test.ts` (load-bearing SR-1 §D), `memory-compaction-event-emitted.test.ts` (canonical §B payload shape), and `memory-compaction-provenance-tag.test.ts` (soft assertion on §C `compacted-from:<id>` convention). All three gate on `capabilities.memory.compaction.supported` + the host's test seam at `/v1/test/memory/{seed,compact}` (Postgres reference host enables both via `OPENWOP_MEMORY_COMPACTION=true OPENWOP_TEST_TRIGGER_COMPACTION=true`). 2026-05-15 (gap-closure CF-3) added `interrupt-token-matrix.test.ts` (malformed / unknown / replay / cross-run-id paths on `GET|POST /v1/interrupts/{token}`). The maintained scenario-to-spec map lives in [`coverage.md`](./coverage.md); this README keeps the operator quickstart and the historical scenario notes below.
97
97
 
98
98
  High-level coverage includes:
99
99
 
@@ -172,7 +172,7 @@ Server-required (added in 1.7.0):
172
172
  |---|---|---|
173
173
  | **Redaction** | [`capabilities.md`](../spec/v1/capabilities.md) §"Secrets" + NFR-7 + §"aiProviders" | Vendor-neutral assertions that the server doesn't leak secret material. Three scenario groups: (a) discovery shape contract — `secrets` + `aiProviders` advertisements are well-formed regardless of `secrets.supported`; when `supported === true`, scopes MUST be non-empty + `resolution === 'host-managed'`; `byok ⊆ supported`. (b) bearer-token redaction — invalid Bearer canary in `Authorization` header is not echoed in the 401 response body. (c) credentialRef echo control — gated on `secrets.supported === true`; canary planted in `configurable.ai.credentialRef` MUST NOT appear in any RunEvent payload (poll-based capture; transport-agnostic). Uses runtime-built canary fixtures (`lib/canaries.ts`) that defeat static secret scanners. 6 scenarios. |
174
174
 
175
- Current source tree: 205 scenario files. Use [`coverage.md`](./coverage.md) for current grade/gap tracking.
175
+ Current source tree: 231 scenario files. Use [`coverage.md`](./coverage.md) for current grade/gap tracking.
176
176
 
177
177
  ## Remaining Gaps
178
178
 
package/api/asyncapi.yaml CHANGED
@@ -433,9 +433,14 @@ components:
433
433
  summary: |
434
434
  Type-erased handler for `debug` mode — discriminate on the
435
435
  `type` field per the `RunEventType` enum in the run-event
436
- JSON Schema. Includes events filtered out of `updates`:
437
- `log.appended`, `variable.changed`, `version.pinned`,
438
- `lease.*`, `node.retried`, `replay.diverged`, etc.
436
+ JSON Schema (the authoritative, exhaustive event list; the
437
+ named messages above are a curated `updates`-tier subset).
438
+ Includes events filtered out of `updates`: `log.appended`,
439
+ `variable.changed`, `version.pinned`, `lease.*`, `node.retried`,
440
+ `replay.diverged`, `connector.authorized`,
441
+ `connector.auth_expired` (RFC 0047), `authorization.decided`
442
+ (RFC 0049), `approval.granted` / `approval.rejected` /
443
+ `approval.overridden` (RFC 0051), etc.
439
444
  contentType: application/json
440
445
  payload:
441
446
  $ref: '#/components/schemas/RunEventDoc'
package/api/openapi.yaml CHANGED
@@ -61,6 +61,13 @@ tags:
61
61
  description: Audit-log integrity verification (gated on the `openwop-audit-log-integrity` profile).
62
62
  - name: prompts
63
63
  description: Prompt-template library — list, fetch, render, mutate (RFC 0028; gated on `capabilities.prompts.*`).
64
+ - name: packs-test
65
+ description: |
66
+ RFC 0025 (`Draft`). Test-mode mirror of the production `/v1/packs/*` publish/get/delete/sig surface against
67
+ an isolated catalog. Gated on `capabilities.packs.testMode.supported: true` plus the reference impl's
68
+ `OPENWOP_PACKS_TEST_NAMESPACE_ENABLED=true` env-gate. Lets the conformance suite exercise the documented
69
+ 19-code publish error catalog without `packs:publish` scope on the real registry. Hosts that haven't
70
+ mounted this surface MUST return `404 Not Found` for every path under `/v1/packs-test/`.
64
71
 
65
72
  # ─────────────────────────────────────────────────────────────────────────────
66
73
  # PATHS
@@ -502,6 +509,54 @@ paths:
502
509
  application/json:
503
510
  schema: { $ref: '#/components/schemas/Error' }
504
511
 
512
+ /v1/runs/{runId}:diff:
513
+ get:
514
+ tags: [runs]
515
+ summary: |
516
+ RFC 0054 — return a deterministic, replay-aware structured diff of
517
+ two runs (typically a run and its RFC 0011 fork): `divergedAtSeq` +
518
+ ordered `eventDiffs[]` + `stateDiff`. The diff is a pure function of
519
+ the two event logs (see `replay.md` determinism contract). Requires
520
+ `runs:read` on BOTH runs. Hosts that don't implement it return 404.
521
+ operationId: diffRun
522
+ parameters:
523
+ - $ref: '#/components/parameters/RunId'
524
+ - name: against
525
+ in: query
526
+ required: true
527
+ description: The other run id to diff `{runId}` against (the `b` run).
528
+ schema: { type: string }
529
+ responses:
530
+ '200':
531
+ description: |
532
+ Structured diff of the two runs. `divergedAtSeq: null` + empty
533
+ `eventDiffs` when the logs are identical.
534
+ content:
535
+ application/json:
536
+ schema:
537
+ $ref: '../schemas/run-diff-response.schema.json'
538
+ '400':
539
+ description: Missing or malformed `against` query parameter.
540
+ content:
541
+ application/json:
542
+ schema: { $ref: '#/components/schemas/Error' }
543
+ '401': { $ref: '#/components/responses/Unauthenticated' }
544
+ '403':
545
+ description: |
546
+ Caller lacks `runs:read` on `{runId}` and/or on `against`
547
+ (`forbidden`); composes with RFC 0048 cross-workspace
548
+ isolation.
549
+ content:
550
+ application/json:
551
+ schema: { $ref: '#/components/schemas/Error' }
552
+ '404':
553
+ description: |
554
+ Either run doesn't exist, OR the host doesn't implement the diff
555
+ endpoint and treats the path as absent.
556
+ content:
557
+ application/json:
558
+ schema: { $ref: '#/components/schemas/Error' }
559
+
505
560
  /v1/runs/{runId}:pause:
506
561
  post:
507
562
  tags: [runs]
@@ -1146,6 +1201,209 @@ paths:
1146
1201
  schema:
1147
1202
  $ref: '../schemas/error-envelope.schema.json'
1148
1203
 
1204
+ # ── Test-mode pack registry namespace (RFC 0025) ─────────────────────────
1205
+ # Mirrors the production /v1/packs/* PUT/GET/DELETE/.sig surface against an
1206
+ # isolated catalog. Conformance scenarios under
1207
+ # `conformance/src/scenarios/pack-registry-publish.test.ts` exercise the
1208
+ # 19-code publish error catalog through this namespace. Hosts that don't
1209
+ # advertise `capabilities.packs.testMode.supported: true` MUST return
1210
+ # 404 for every path below.
1211
+
1212
+ /v1/packs-test/{name}/-/{version}.tgz:
1213
+ parameters:
1214
+ - $ref: '#/components/parameters/PackName'
1215
+ - $ref: '#/components/parameters/PackVersion'
1216
+ put:
1217
+ tags: [packs-test]
1218
+ summary: Publish a pack tarball to the isolated test catalog.
1219
+ description: |
1220
+ Mirror of `PUT /v1/packs/{name}/-/{version}.tgz` per
1221
+ `spec/v1/node-packs.md` §"PUT /v1/packs/{name}/-/{version}.tgz".
1222
+ Request shape, response shape, status codes, and the 19-code
1223
+ publish error catalog (`invalid_pack_scope`, `invalid_pack_name`,
1224
+ `invalid_version`, `invalid_body`, eight `tarball_*` codes,
1225
+ `invalid_manifest`, `manifest_mismatch` (or the granular
1226
+ `manifest_name_mismatch` / `manifest_version_mismatch` pair),
1227
+ `pack_integrity_failure`, `unsupported_runtime`, `forbidden`,
1228
+ `conflict`/`version_conflict`) MUST be served verbatim. The
1229
+ test catalog MUST be isolated per RFC 0025 §C — a pack PUT'd
1230
+ here MUST NOT appear in `GET /v1/packs/{name}` listings.
1231
+ operationId: putTestPackTarball
1232
+ parameters:
1233
+ - in: header
1234
+ name: X-Pack-Signing-Method
1235
+ required: false
1236
+ schema: { type: string, enum: [sigstore, manual, none] }
1237
+ - in: header
1238
+ name: X-Pack-Sha256
1239
+ required: false
1240
+ schema:
1241
+ type: string
1242
+ pattern: '^sha256-[A-Za-z0-9+/=]+$'
1243
+ description: Caller-asserted SHA-256 (server verifies; mismatch surfaces `pack_integrity_failure`).
1244
+ requestBody:
1245
+ required: true
1246
+ description: Gzipped tarball bytes (`application/tar+gzip`, `application/gzip`, `application/x-gzip`, or `application/octet-stream`).
1247
+ content:
1248
+ application/gzip:
1249
+ schema: { type: string, format: binary }
1250
+ application/x-gzip:
1251
+ schema: { type: string, format: binary }
1252
+ application/tar+gzip:
1253
+ schema: { type: string, format: binary }
1254
+ application/octet-stream:
1255
+ schema: { type: string, format: binary }
1256
+ responses:
1257
+ '200':
1258
+ description: Idempotent re-publish — identical sha256 content already published; existing record returned.
1259
+ content:
1260
+ application/json:
1261
+ schema: { $ref: '#/components/schemas/TestPackPublishRecord' }
1262
+ '201':
1263
+ description: New version published to the test catalog.
1264
+ content:
1265
+ application/json:
1266
+ schema: { $ref: '#/components/schemas/TestPackPublishRecord' }
1267
+ '400':
1268
+ description: |
1269
+ One of the 17 spec-documented 400-class publish error codes:
1270
+ URL/scope (`invalid_pack_scope`, `invalid_pack_name`, `invalid_version`),
1271
+ body shape (`invalid_body`),
1272
+ tarball extraction (`tarball_gunzip_failed`, `tarball_too_large`,
1273
+ `tarball_manifest_missing`, `tarball_manifest_too_large`,
1274
+ `tarball_manifest_not_json`, `tarball_entry_missing`,
1275
+ `tarball_entry_too_large`, `tarball_path_traversal`,
1276
+ `tarball_tar_parse_failed`),
1277
+ manifest contents (`invalid_manifest`, `manifest_mismatch` or
1278
+ the granular `manifest_name_mismatch` / `manifest_version_mismatch` pair,
1279
+ `pack_integrity_failure`, `unsupported_runtime`).
1280
+ content:
1281
+ application/json:
1282
+ schema: { $ref: '../schemas/error-envelope.schema.json' }
1283
+ '401': { $ref: '#/components/responses/Unauthenticated' }
1284
+ '403':
1285
+ description: '`forbidden` — caller lacks `packs:publish` scope or the namespace claim.'
1286
+ content:
1287
+ application/json:
1288
+ schema: { $ref: '../schemas/error-envelope.schema.json' }
1289
+ '404':
1290
+ description: 'Host does not advertise `capabilities.packs.testMode.supported: true`, or the test-mode env-gate is unset.'
1291
+ content:
1292
+ application/json:
1293
+ schema: { $ref: '../schemas/error-envelope.schema.json' }
1294
+ '409':
1295
+ description: '`conflict` (or `version_conflict`) — `(name, version)` already published with different content.'
1296
+ content:
1297
+ application/json:
1298
+ schema: { $ref: '../schemas/error-envelope.schema.json' }
1299
+ get:
1300
+ tags: [packs-test]
1301
+ summary: Fetch a published test-catalog tarball.
1302
+ description: 'Mirror of `GET /v1/packs/{name}/-/{version}.tgz`. Returns the gzipped tarball bytes with `Content-Type: application/tar+gzip` and an `ETag: "sha256-..."` matching the manifest''s `tarballSha256`.'
1303
+ operationId: getTestPackTarball
1304
+ responses:
1305
+ '200':
1306
+ description: Tarball bytes.
1307
+ content:
1308
+ application/tar+gzip:
1309
+ schema: { type: string, format: binary }
1310
+ '400':
1311
+ description: '`invalid_pack_name` or `invalid_version` — URL params malformed.'
1312
+ content:
1313
+ application/json:
1314
+ schema: { $ref: '../schemas/error-envelope.schema.json' }
1315
+ '401': { $ref: '#/components/responses/Unauthenticated' }
1316
+ '403':
1317
+ description: '`forbidden` — caller lacks `packs:read` scope.'
1318
+ content:
1319
+ application/json:
1320
+ schema: { $ref: '../schemas/error-envelope.schema.json' }
1321
+ '404':
1322
+ description: 'Pack version not found in the test catalog (or host does not advertise `capabilities.packs.testMode.supported: true`).'
1323
+ content:
1324
+ application/json:
1325
+ schema: { $ref: '../schemas/error-envelope.schema.json' }
1326
+ /v1/packs-test/{name}/-/{version}:
1327
+ parameters:
1328
+ - $ref: '#/components/parameters/PackName'
1329
+ - $ref: '#/components/parameters/PackVersion'
1330
+ delete:
1331
+ tags: [packs-test]
1332
+ summary: Unpublish a test-catalog version (mirrors unpublish-window semantics).
1333
+ description: |
1334
+ Mirror of `DELETE /v1/packs/{name}/-/{version}` per
1335
+ `spec/v1/node-packs.md`. Returns `400 unpublish_window_expired`
1336
+ for versions older than the registry's unpublish window
1337
+ (default 72h). Test-mode implementations MAY shorten the
1338
+ window for tractable conformance fixtures but MUST surface
1339
+ the same error code.
1340
+ operationId: deleteTestPackVersion
1341
+ responses:
1342
+ '204':
1343
+ description: Version successfully unpublished from the test catalog.
1344
+ '400':
1345
+ description: '`unpublish_window_expired`, `invalid_pack_name`, or `invalid_version`.'
1346
+ content:
1347
+ application/json:
1348
+ schema: { $ref: '../schemas/error-envelope.schema.json' }
1349
+ '401': { $ref: '#/components/responses/Unauthenticated' }
1350
+ '403':
1351
+ description: '`forbidden` — caller lacks `packs:publish` scope.'
1352
+ content:
1353
+ application/json:
1354
+ schema: { $ref: '../schemas/error-envelope.schema.json' }
1355
+ '404':
1356
+ description: Version doesn't exist in the test catalog, or host does not advertise the test-mode capability.
1357
+ content:
1358
+ application/json:
1359
+ schema: { $ref: '../schemas/error-envelope.schema.json' }
1360
+
1361
+ /v1/packs-test/{name}/-/{version}.sig:
1362
+ parameters:
1363
+ - $ref: '#/components/parameters/PackName'
1364
+ - $ref: '#/components/parameters/PackVersion'
1365
+ get:
1366
+ tags: [packs-test]
1367
+ summary: Fetch the detached Ed25519 signature for a test-catalog pack.
1368
+ description: |
1369
+ Mirror of `GET /v1/packs/{name}/-/{version}.sig`. Returns the
1370
+ signature blob over `pack.json` for this version. MAY 302-redirect
1371
+ to a storage-backend signed URL.
1372
+ operationId: getTestPackSignature
1373
+ responses:
1374
+ '200':
1375
+ description: Signature blob.
1376
+ content:
1377
+ application/octet-stream:
1378
+ schema: { type: string, format: binary }
1379
+ '302':
1380
+ description: Redirect to a storage-backend signed URL (clients SHOULD follow).
1381
+ headers:
1382
+ Location:
1383
+ schema: { type: string, format: uri }
1384
+ '400':
1385
+ description: '`invalid_pack_name` or `invalid_version` — URL params malformed.'
1386
+ content:
1387
+ application/json:
1388
+ schema: { $ref: '../schemas/error-envelope.schema.json' }
1389
+ '401': { $ref: '#/components/responses/Unauthenticated' }
1390
+ '403':
1391
+ description: '`forbidden` — caller lacks `packs:read` scope.'
1392
+ content:
1393
+ application/json:
1394
+ schema: { $ref: '../schemas/error-envelope.schema.json' }
1395
+ '404':
1396
+ description: |
1397
+ `signature_not_available` — version is missing, yanked,
1398
+ unsigned at publish time, OR the registry's storage backend
1399
+ is unwired. The four cases are intentionally
1400
+ indistinguishable per spec/v1/node-packs.md §"GET /v1/packs/{name}/-/{version}.sig".
1401
+ Also returned when the host does not advertise
1402
+ `capabilities.packs.testMode.supported: true`.
1403
+ content:
1404
+ application/json:
1405
+ schema: { $ref: '../schemas/error-envelope.schema.json' }
1406
+
1149
1407
  # ─────────────────────────────────────────────────────────────────────────────
1150
1408
  # COMPONENTS
1151
1409
  # ─────────────────────────────────────────────────────────────────────────────
@@ -1189,6 +1447,25 @@ components:
1189
1447
  Duplicate requests return the cached response with header
1190
1448
  `openwop-Idempotent-Replay: true`.
1191
1449
 
1450
+ PackName:
1451
+ in: path
1452
+ name: name
1453
+ required: true
1454
+ schema:
1455
+ type: string
1456
+ minLength: 3
1457
+ maxLength: 214
1458
+ description: Reverse-DNS pack name per `spec/v1/node-packs.md` §Naming (e.g. `vendor.acme.salesforce-tools`).
1459
+
1460
+ PackVersion:
1461
+ in: path
1462
+ name: version
1463
+ required: true
1464
+ schema:
1465
+ type: string
1466
+ pattern: '^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-[\w.-]+)?(?:\+[\w.-]+)?$'
1467
+ description: SemVer 2.0.0 version of the pack.
1468
+
1192
1469
  responses:
1193
1470
  Unauthenticated:
1194
1471
  description: Missing or invalid credential.
@@ -1304,3 +1581,31 @@ components:
1304
1581
  supported:
1305
1582
  type: array
1306
1583
  items: { type: string, enum: [values, updates, messages, debug] }
1584
+
1585
+ TestPackPublishRecord:
1586
+ description: |
1587
+ Response body for a successful publish against the test-mode
1588
+ registry namespace (RFC 0025). Mirror of the production publish
1589
+ record returned by `PUT /v1/packs/{name}/-/{version}.tgz`.
1590
+ type: object
1591
+ required: [name, version, tarballSha256, publishedAt]
1592
+ properties:
1593
+ name:
1594
+ type: string
1595
+ description: Reverse-DNS pack name as PUT'd.
1596
+ version:
1597
+ type: string
1598
+ description: SemVer 2.0.0 version as PUT'd.
1599
+ tarballSha256:
1600
+ type: string
1601
+ pattern: '^sha256-[A-Za-z0-9+/=]+$'
1602
+ description: Server-computed SHA-256 over the uploaded tarball bytes.
1603
+ publishedAt:
1604
+ type: string
1605
+ format: date-time
1606
+ signed:
1607
+ type: boolean
1608
+ description: Whether a sibling `.sig` signature blob was persisted.
1609
+ signingMethod:
1610
+ type: string
1611
+ enum: [sigstore, manual, none]