@openwop/openwop-conformance 1.4.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/CHANGELOG.md +60 -0
  2. package/README.md +2 -2
  3. package/api/asyncapi.yaml +8 -3
  4. package/api/openapi.yaml +305 -0
  5. package/coverage.md +35 -10
  6. package/fixtures/conformance-phase4-nondet-tool.json +53 -0
  7. package/fixtures/conformance-phase4-replay-divergence.json +40 -0
  8. package/fixtures.md +5 -3
  9. package/package.json +1 -1
  10. package/schemas/README.md +2 -0
  11. package/schemas/capabilities.schema.json +176 -3
  12. package/schemas/credential-reference.schema.json +21 -0
  13. package/schemas/node-pack-manifest.schema.json +112 -1
  14. package/schemas/run-diff-response.schema.json +64 -0
  15. package/schemas/run-event-payloads.schema.json +104 -2
  16. package/schemas/run-event.schema.json +8 -1
  17. package/schemas/run-snapshot.schema.json +11 -0
  18. package/src/lib/behavior-gate.ts +51 -0
  19. package/src/lib/driver.ts +13 -1
  20. package/src/lib/saml-idp.ts +179 -0
  21. package/src/scenarios/approval-gate-events.test.ts +61 -0
  22. package/src/scenarios/approval-gate-flow.test.ts +68 -0
  23. package/src/scenarios/auth-saml-profile.test.ts +119 -0
  24. package/src/scenarios/auth-scim-profile.test.ts +65 -0
  25. package/src/scenarios/authorization-fail-closed.test.ts +80 -0
  26. package/src/scenarios/authorization-roles-shape.test.ts +83 -0
  27. package/src/scenarios/connector-manifest-validity.test.ts +142 -0
  28. package/src/scenarios/credential-payload-redaction.test.ts +93 -0
  29. package/src/scenarios/credentials-capability-shape.test.ts +90 -0
  30. package/src/scenarios/cross-engine-append-behavior.test.ts +204 -0
  31. package/src/scenarios/cross-host-traceparent-propagation.test.ts +13 -6
  32. package/src/scenarios/cross-workspace-isolation.test.ts +72 -0
  33. package/src/scenarios/deadletter-capability-shape.test.ts +59 -0
  34. package/src/scenarios/deadletter-retry-exhaustion.test.ts +62 -0
  35. package/src/scenarios/experimental-tier-shape.test.ts +192 -0
  36. package/src/scenarios/identity-owner-shape.test.ts +64 -0
  37. package/src/scenarios/multi-agent-confidence-escalation.test.ts +59 -21
  38. package/src/scenarios/multi-agent-memory-lifecycle.test.ts +87 -12
  39. package/src/scenarios/multi-region-idempotency-behavior.test.ts +203 -0
  40. package/src/scenarios/oauth-capability-shape.test.ts +97 -0
  41. package/src/scenarios/oauth-connector-redaction.test.ts +91 -0
  42. package/src/scenarios/pack-registry-isolation.test.ts +108 -0
  43. package/src/scenarios/pack-registry-publish.test.ts +1 -1
  44. package/src/scenarios/prompt-mutation-workspace-membership-enforced.test.ts +126 -0
  45. package/src/scenarios/prompt-read-workspace-membership-enforced.test.ts +183 -0
  46. package/src/scenarios/replay-divergence-at-refusal.test.ts +187 -7
  47. package/src/scenarios/replay-observable-sequence-determinism.test.ts +20 -6
  48. package/src/scenarios/run-diff.test.ts +143 -0
  49. package/src/scenarios/sandbox-capability-gate-respected.test.ts +15 -13
  50. package/src/scenarios/sandbox-memory-cap.test.ts +7 -8
  51. package/src/scenarios/sandbox-mvp-behavior.test.ts +280 -0
  52. package/src/scenarios/sandbox-no-cross-pack-mutation.test.ts +14 -13
  53. package/src/scenarios/sandbox-no-host-env-leak.test.ts +14 -21
  54. package/src/scenarios/sandbox-no-host-fs-escape.test.ts +20 -15
  55. package/src/scenarios/sandbox-no-host-process-escape.test.ts +18 -13
  56. package/src/scenarios/sandbox-no-network-escape.test.ts +14 -31
  57. package/src/scenarios/sandbox-timeout-cap.test.ts +7 -8
  58. package/src/scenarios/scheduling-capability-shape.test.ts +81 -0
  59. package/src/scenarios/scheduling-cron-fires-once.test.ts +66 -0
  60. package/src/scenarios/secret-leakage-otel-attribute.test.ts +241 -0
  61. package/src/scenarios/spec-corpus-validity.test.ts +2 -2
package/coverage.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # OpenWOP Conformance Coverage Map
2
2
 
3
- > **Status: Living document. Updated 2026-05-11.** This map connects the current scenario files to the protocol surfaces they protect and records the remaining gaps from the protocol deep dive. Scenario names are source-of-truth file names under `conformance/src/scenarios/`.
3
+ > **Status: Living document. Updated 2026-05-25.** This map connects the current scenario files to the protocol surfaces they protect and records the remaining gaps from the protocol deep dive. Scenario names are source-of-truth file names under `conformance/src/scenarios/`.
4
4
 
5
5
  > **Shape grade vs behavior grade.** Some optional-profile scenarios validate **capability shape** (the host's discovery advertisement is well-formed) without yet exercising **behavior** (the host actually implements the profile end-to-end). The "Current grade" column reflects shape; see §"Capability-gated scenarios: shape vs behavior" below for the dual-grade view and the `OPENWOP_REQUIRE_BEHAVIOR=true` strict-mode runner flag.
6
6
 
@@ -40,10 +40,14 @@
40
40
  | Envelope variant discrimination + model capabilities (RFC 0031 — `spec/v1/ai-envelope.md` §"Variant payload discrimination (normative)", `spec/v1/host-capabilities.md` §"Model-capability declarations", `spec/v1/node-packs.md` §"Model-capability declarations on NodeModules") | `envelope-variant-discriminator-static.test.ts`, `model-capability-substituted.test.ts`, `model-capability-insufficient.test.ts`, `node-module-required-capabilities-shape.test.ts` | B+ (discriminator-static + advertisement-shape always-on; 14 live behavioral assertions across substitution + insufficient + authoring-convention, capability-gated) | RFC 0031 promoted Draft → Active 2026-05-20. `envelope-variant-discriminator-static` (always-on) walks every `schemas/envelopes/*.schema.json` asserting no `oneOf` at any nesting depth (Gemini silently drops `oneOf`, producing looser-than-declared schemas — a silent correctness bug) AND every `anyOf` branch declares a single-string-`enum` discriminator in `required` per RFC 0031 §A. `model-capability-substituted` (capability-gated on `capabilities.modelCapabilities.supported` + `substitutionSupported`) carries advertisement-shape check on the `advertised: string[]` pattern (each identifier matches the spec-reserved set OR `^x-host-<host>-<key>$` per RFC 0031 §C) + 4 live behavioral assertions covering substitution emission + SECURITY invariant `model-capability-substituted-no-credential-disclosure`'s all-or-nothing `"[REDACTED]"` redaction option. `model-capability-insufficient` (capability-gated on `modelCapabilities.supported`) carries 6 live behavioral assertions covering refusal emission paths + the no-recursive-fallback constraint (RFC 0031 §"Unresolved questions" #3 — `fallbackAttempted: true` when the declared fallback itself fails; NO chaining). `node-module-required-capabilities-shape` (SHOULD-tier authoring convention check) carries 4 live assertions for the `core.ai.*` typeId-pattern recommendation. Path to `Accepted`: reference host implements `executor/modelCapabilityGate.ts` end-to-end + advertises `capabilities.modelCapabilities: { supported: true, advertised: [...], substitutionSupported: true }` (the live behavioral assertions soft-skip cleanly on hosts that haven't wired the executor yet). |
41
41
  | Envelope-reliability run-event vocabulary (RFC 0032 — `spec/v1/ai-envelope.md` §"Envelope-reliability events" + line-448 scope clarification, `spec/v1/observability.md` §"Envelope-reliability events (RFC 0032)") | `envelope-retry-attempted.test.ts`, `envelope-retry-exhausted.test.ts`, `envelope-refusal-shape.test.ts`, `envelope-truncated.test.ts`, `envelope-nl-to-format-engaged.test.ts`, `envelope-recovery-applied.test.ts` | B (1 shared advertisement-shape probe with MUST-events enforcement; 34 live behavioral assertions across the six events, all capability- + fixture-gated) | RFC 0032 promoted Draft → Active 2026-05-20. Carries the central `ai-envelope.md` line-448 scope clarification (per-kind routing events forbidden; cross-kind operational events permitted via RFC). `envelope-retry-attempted` carries the shared advertisement-shape probe: when `capabilities.envelopes.reliability.supported: true`, the host MUST list both `envelope.retry.exhausted` AND `envelope.refusal` in `events[]` (the two MUST-tier events per RFC 0032 §C); `maxRetryAttempts` MUST be in `[1, 16]`. The six scenarios collectively carry 34 live behavioral assertions (drained 2026-05-19 via the conformance `mock` provider + `POST /v1/host/sample/test/mock-ai/program` seam): retry on schema-violation + retry on truncation + retry-exhausted terminal failure + provider refusal (no-retry MUST per RFC 0032 §B.3 + RFC 0033 §D) + truncation cut-off + NL-to-Format escalation (Tam et al. mitigation per arXiv 2408.02442) + lenient-parsing recovery + SECURITY invariants `envelope-refusal-no-prompt-leak` (BYOK + prompt-content redaction on `refusalText`) and `envelope-recovery-no-content-leak` (no pre-recovery substrings in the event payload). Path to `Accepted`: reference host implements `executor/envelopeReliability.ts` end-to-end + advertises `capabilities.envelopes.reliability: { supported: true, events: [...], maxRetryAttempts: <n> }` (the behavioral assertions already pass against the reference host's end-to-end emission path under `OPENWOP_ENVELOPE_RELIABILITY_END_TO_END=true`; the no-flag default still soft-skips). |
42
42
  | Envelope-completion retry routing (RFC 0033 — `spec/v1/ai-envelope.md` §"Envelope-completion criteria", `spec/v1/observability.md` §"Envelope-completion retry routing (RFC 0033)") | `envelope-completion-distinguishes-truncation.test.ts`, `envelope-truncation-cap-exhaustion.test.ts` | B− (1 advertisement-shape probe on `completion.{distinguishesTruncation, truncationBudgetMultiplier}`; 9 live behavioral assertions across the two retry paths + the DoS-bound assertion) | RFC 0033 promoted Draft → Active 2026-05-20. Closes `spec/v1/ai-envelope.md` §"Open spec gaps" E5 (refusal-mode + retry-policy interaction). Reuses RFC 0032's event vocabulary; introduces NO new event types. `envelope-completion-distinguishes-truncation` (capability-gated on `completion.distinguishesTruncation: true`) carries 5 live behavioral assertions covering both retry paths — truncation MUST increase output budget (RECOMMENDED 2× per `truncationBudgetMultiplier`) WITHOUT a corrective fragment; schema-violation MUST add a corrective fragment WITHOUT a budget change. `envelope-truncation-cap-exhaustion` carries 4 live behavioral assertions covering the DoS-bound assertion (truncation retries count against `Capabilities.limits.schemaRounds`; exhaustion → `envelope.retry.exhausted { finalReason: "truncation" }` + `cap.breached { kind: "schema" }` + node fails with NEW error code `envelope_truncation_unrecoverable` per RFC 0033 §F). All 9 assertions are fixture- + capability-gated against the conformance `mock` provider via `POST /v1/host/sample/test/mock-ai/program`. Path to `Accepted`: reference host implements the truncation-vs-schema-violation retry-routing branch end-to-end (`executor/envelopeReliability.ts` + `stop_reason` inspection in `aiProviders/aiProvidersHost.ts`) + advertises `capabilities.envelopes.reliability.completion.distinguishesTruncation: true`. |
43
- | Multi-agent execution model + handoff state machine (RFC 0037 Phase 1 — `spec/v1/multi-agent-execution.md`) | `multi-agent-handoff-state-machine.test.ts` | B (1 advertisement-shape probe + 1 behavioral 4-event causation-chain assertion against the parent+child fixture pair) | RFC 0037 Phase 1 filed Draft → promoted Active 2026-05-21 after spec + schema + scenario landed atomically. Advertisement-shape probe asserts `capabilities.multiAgent.executionModel.{supported, version ∈ [1,4]}` when present. Behavioral assertion drives the `conformance-multi-agent-handoff` parent + `conformance-multi-agent-handoff-child` fixture pair: runs the supervisor → next-worker → child completed loop and asserts the 4 `core.workflowChain.event` records appear in the exact phase sequence `dispatch.began → dispatch.succeeded → child.completed → output.harvested` with each event's `causationId === prior.eventId` and `dispatch.began.causationId === runOrchestrator.decided.eventId`, plus `output.harvested.harvestedKeys === ['parentResult']` (proves the spec §"Transition events" table on real wire). Reference workflow-engine advertises + emits end-to-end when `OPENWOP_MULTI_AGENT_EXECUTION_MODEL=true`; the no-flag default soft-skips honestly. Path to `Accepted`: non-steward host advertises + the behavioral assertion passes against it. |
44
- | Multi-agent Phase 2 confidence-floor escalation (RFC 0039 — `spec/v1/multi-agent-execution.md` §"Confidence escalation") | `multi-agent-confidence-escalation.test.ts` | B (1 advertisement-shape probe on `confidenceEscalationFloor` + 1 behavioral assertion against the low-confidence fixture) | RFC 0039 Phase 2 filed Draft → promoted Active 2026-05-22 after the confidence-floor half landed end-to-end. Advertisement-shape probe asserts `capabilities.multiAgent.executionModel.confidenceEscalationFloor` (when present) is a number in `[0.5, 1.0]`; values below the spec floor are non-conformant. Behavioral assertion drives the `conformance-multi-agent-confidence-escalation` fixture (supervisor `mockDispatchPlan` carries one decision with `confidence: 0.3`) and asserts: parent reaches `waiting-clarification` (NOT `completed` because no dispatch fired); exactly ONE `core.workflowChain.confidence-escalated` event with `payload.confidence === 0.3`, `payload.floor ∈ [0.5, 1.0]`, `payload.escalationKind ∈ {clarify, escalate}`; causationId chains back to the `runOrchestrator.decided` event; ZERO `core.workflowChain.event` records (the load-bearing distinction from Phase 1 — confidence floor MUST fire BEFORE any dispatch.began). Reference workflow-engine advertises `version: 2` + `confidenceEscalationFloor: 0.5` when both `OPENWOP_MULTI_AGENT_EXECUTION_MODEL=true` AND `OPENWOP_MULTI_AGENT_EXECUTION_MODEL_PHASE_2=true` are set; floor tunable via `OPENWOP_MULTI_AGENT_CONFIDENCE_FLOOR`. Path to `Accepted`: non-steward host advertises `version: 2` + the behavioral assertion passes against it. Memory-lifecycle half of RFC 0039 (MAE-2/3) remains explicit follow-up: `crossChildMemoryConcurrency` capability field is schema-landed but the host's MemoryAdapter doesn't yet implement either contract. |
43
+ | Multi-agent execution model + handoff state machine (RFC 0037 — `spec/v1/multi-agent-execution.md`, `version: 1`) | `multi-agent-handoff-state-machine.test.ts` | B (1 advertisement-shape probe + 1 behavioral 4-event causation-chain assertion against the parent+child fixture pair) | RFC 0037 filed Draft → promoted Active 2026-05-21 after spec + schema + scenario landed atomically. Advertisement-shape probe asserts `capabilities.multiAgent.executionModel.{supported, version ∈ [1,4]}` when present. Behavioral assertion drives the `conformance-multi-agent-handoff` parent + `conformance-multi-agent-handoff-child` fixture pair: runs the supervisor → next-worker → child completed loop and asserts the 4 `core.workflowChain.event` records appear in the exact phase sequence `dispatch.began → dispatch.succeeded → child.completed → output.harvested` with each event's `causationId === prior.eventId` and `dispatch.began.causationId === runOrchestrator.decided.eventId`, plus `output.harvested.harvestedKeys === ['parentResult']` (proves the spec §"Transition events" table on real wire). Reference workflow-engine advertises + emits end-to-end when `OPENWOP_MULTI_AGENT_EXECUTION_MODEL=true`; the no-flag default soft-skips honestly. Path to `Accepted`: non-steward host advertises + the behavioral assertion passes against it. |
44
+ | Multi-agent confidence-floor escalation (RFC 0039 — `spec/v1/multi-agent-execution.md` §"Confidence escalation", `version: 2`) | `multi-agent-confidence-escalation.test.ts` | B (1 advertisement-shape probe on `confidenceEscalationFloor` + 1 behavioral assertion against the low-confidence fixture) | RFC 0039 filed Draft → promoted Active 2026-05-22 after the confidence-floor half landed end-to-end. Advertisement-shape probe asserts `capabilities.multiAgent.executionModel.confidenceEscalationFloor` (when present) is a number in `[0.5, 1.0]`; values below the spec floor are non-conformant. Behavioral assertion drives the `conformance-multi-agent-confidence-escalation` fixture (supervisor `mockDispatchPlan` carries one decision with `confidence: 0.3`) and asserts: parent reaches `waiting-clarification` (NOT `completed` because no dispatch fired); exactly ONE `core.workflowChain.confidence-escalated` event with `payload.confidence === 0.3`, `payload.floor ∈ [0.5, 1.0]`, `payload.escalationKind ∈ {clarify, escalate}`; causationId chains back to the `runOrchestrator.decided` event; ZERO `core.workflowChain.event` records (the load-bearing distinction from `version: 1` — confidence floor MUST fire BEFORE any dispatch.began). Reference workflow-engine advertises `version: 2` + `confidenceEscalationFloor: 0.5` when both `OPENWOP_MULTI_AGENT_EXECUTION_MODEL=true` AND `OPENWOP_MULTI_AGENT_EXECUTION_MODEL_PHASE_2=true` are set; floor tunable via `OPENWOP_MULTI_AGENT_CONFIDENCE_FLOOR`. Path to `Accepted`: non-steward host advertises `version: 2` + the behavioral assertion passes against it. Memory-lifecycle half of RFC 0039 (MAE-2/3) remains explicit follow-up: `crossChildMemoryConcurrency` capability field is schema-landed but the host's MemoryAdapter doesn't yet implement either contract. |
45
45
  | Sandbox execution contract (RFC 0035 — `spec/v1/host-capabilities.md` §"Sandbox execution contract") | `sandbox-no-host-fs-escape.test.ts`, `sandbox-no-host-env-leak.test.ts`, `sandbox-no-network-escape.test.ts`, `sandbox-no-host-process-escape.test.ts`, `sandbox-memory-cap.test.ts`, `sandbox-timeout-cap.test.ts`, `sandbox-capability-gate-respected.test.ts`, `sandbox-no-cross-pack-mutation.test.ts` | C+ (advertisement-shape probes always-on; 8 capability-gated behavioral stubs scaffolded; soft-skip on hosts that don't advertise `capabilities.sandbox.supported`) | RFC 0035 promoted Draft → Active 2026-05-21. 8 scenarios, one per `node-pack-sandbox-*` invariant in `SECURITY/invariants.yaml`. Behavioral assertions remain stubbed with `expect(true).toBe(true)` + docstring expected-wire-shape pending the synthetic `vendor.openwop.misbehaving-sandbox` pack + a first sandbox-executing reference host. Path to `Accepted`: first sandbox-executing host advertises + implements the 8 failure-mode invariants + the 8 scenarios pass; at that point the 8 `node-pack-sandbox-*` SECURITY rows graduate from `reference-impl` → `protocol` tier per RFC 0035 §"Acceptance criteria." |
46
- | Multi-region idempotency + cross-engine append-ordering (RFC 0036 — `spec/v1/idempotency.md` §"`multiRegion` sub-block", `spec/v1/replay.md` §"Cross-region replay") | `multi-region-idempotency.test.ts`, `cross-engine-append-ordering.test.ts` | C+ (2 categorical-shape probes always-on + 1 granular `multiRegion` shape probe + 1 `crossEngineOrdering` shape probe; behavioral assertions deferred to simulator landing per RFC 0036 §C) | RFC 0036 promoted Draft Active 2026-05-21. The existing `multi-region-idempotency.test.ts` covers the categorical `capabilities.idempotency.crossRegion ∈ {single-region, best-effort, strict}` claim plus the matching operator-tier metric names; a third describe block added 2026-05-21 covers the granular `capabilities.idempotency.multiRegion.{supported, replicationLagBoundMs, partitionRecoveryStrategy}` advertisement shape (`replicationLagBoundMs ∈ [0, 60000]`; `partitionRecoveryStrategy ∈ {last-writer-wins, first-writer-wins}` OR `^x-host-<host>-<key>$`). NEW `cross-engine-append-ordering.test.ts` covers `capabilities.eventLog.crossEngineOrdering.{supported, orderingModel {lamport, vector-clock, global-sequencer}}` shape. Behavioral two-engine-append-then-cross-read assertion deferred until the Postgres reference host's multi-region simulator lands per RFC 0036 §C. Path to `Accepted`: simulator + behavioral conformance pass against the reference host; non-steward host advertises the same. |
46
+ | Multi-region idempotency + cross-engine append-ordering (RFC 0036 — `spec/v1/idempotency.md` §"`multiRegion` sub-block", `spec/v1/replay.md` §"Cross-region replay") | `multi-region-idempotency.test.ts`, `cross-engine-append-ordering.test.ts`, **`multi-region-idempotency-behavior.test.ts` (2026-05-22)**, **`cross-engine-append-behavior.test.ts` (2026-05-22)** | A (2 categorical-shape probes always-on + 1 granular `multiRegion` shape probe + 1 `crossEngineOrdering` shape probe + 6 multi-region behavioral assertions + 4 cross-engine Lamport-ordering behavioral assertions; all 10 behavioral assertions PASS against the reference workflow-engine when `OPENWOP_TEST_MULTI_REGION_SIMULATOR=true` + `OPENWOP_TEST_CROSS_ENGINE_HARNESS=true` are set) | RFC 0036 §B + §C behavioral close-out landed 2026-05-22 via the new workflow-engine test seams (`POST /v1/host/sample/test/multi-region/simulate-partition` + `POST/GET /v1/host/sample/test/cross-engine/{append,read,reset}`) see `spec/v1/host-sample-test-seams.md` §6 + §7. The new `multi-region-idempotency-behavior.test.ts` exercises the canonical lex-min convergence rule + order-invariance + 400-on-mismatch; the new `cross-engine-append-behavior.test.ts` exercises Lamport-clock monotonicity + per-engine order preservation + read-determinism. Path to `Accepted`: non-steward host advertises matching capabilities + the behavioral assertions pass against it. |
47
+ | Secret-leakage telemetry / debug-bundle export (RFC 0034 §B — `spec/v1/host-capabilities.md` §"OTel collector test seam") | **`secret-leakage-otel-attribute.test.ts` (2026-05-22)** | A− (3 capability-gated probes — OTel span scrape + debug-bundle scrape + advertisement-shape; soft-skips honestly until host advertises `capabilities.observability.testSeams.{otelScrape, debugBundleExport}` AND `capabilities.secrets.supported` AND `OPENWOP_CANARY_SECRET_VALUE` env is set) | Broadens the existing protocol-tier `secret-leakage-otel-attribute` + `secret-leakage-debug-bundle-otel` SECURITY invariants from envelope-acceptor-narrow (already covered by `envelope-reasoning-secret-redaction.test.ts`) to executor-side-broad. Drives the existing `openwop-smoke-byok-roundtrip` fixture; scrapes both seams after run completion; hard-fails if the BYOK canary plaintext appears in any OTel span attribute or debug-bundle field. |
48
+ | Experimental capability tier (RFC 0042 — `schemas/capabilities.schema.json` §`multiAgent.executionModel.tier`) | **`experimental-tier-shape.test.ts` (2026-05-22)** | A (6 server-free + helper-routing assertions across §A schema discipline + §D experimentalGate routing; always-on for hosts that advertise tier='experimental' on any capability sub-block; helper-level behavioral probes for the `experimentalGate()` routing under both default + OPENWOP_REQUIRE_EXPERIMENTAL modes) | RFC 0042 (Draft) lands the audit's "Active RFC → carve-out" pattern. Schema diff lands on `multiAgent.executionModel` with optional `tier ∈ {stable, experimental}` + `experimentalUntil` (ISO-8601 sunset) + `if/then` conditional enforcing §B sunset MUST mechanically. New `experimentalGate()` helper in `conformance/src/lib/behavior-gate.ts` routes scenarios under default mode + `OPENWOP_REQUIRE_EXPERIMENTAL=true` strict-mode. |
49
+ | Sandbox MVP behavioral close-out (RFC 0035 §B) | **`sandbox-mvp-behavior.test.ts` (2026-05-22)** | A (10 capability-gated behavioral assertions covering 7 of 8 §B failure-mode invariants — 5 escape kinds + timeout + memory-exceeded + cross-pack-mutation isolation + capability-gate-violation + 2 well-behaved baselines; all 10 PASS against the workflow-engine's node:vm-based sandbox MVP) | Companion to the existing 8 advertisement-shape sandbox scenarios (`sandbox-no-host-fs-escape.test.ts` et al.). Exercises the canonical 4-code error catalog at `spec/v1/host-capabilities.md` §"Error codes" (`sandbox_escape_attempt` + `sandbox_capability_denied` + `sandbox_memory_exceeded` + `sandbox_timeout`) with spec-mandated `details.{escapeKind, requestedCapability, requestedBytes}` populated. Wire-shape per `spec/v1/host-sample-test-seams.md §8`. Production adopters use wasmtime/nsjail behind the same HTTP test-seam contract. |
50
+ | RFC 0041 §B replay-divergence-at-refusal behavioral (`version: 4`) | `replay-divergence-at-refusal.test.ts` (advertisement-shape + behavioral; 3 assertions PASS against workflow-engine when the `multiAgent.executionModel.version: 4` advertisement is enabled) | A (was `it.todo` until 2026-05-23 when the executor wiring landed — see commit `1fce55a` + `bba3b4a`. Behavioral assertions cover both divergence directions: original=valid + replay=refusal AND original=refusal + replay=valid) | Closes Track #4 of the 2026-05-22 multi-agent behavioral-harness close-out. Reference workflow-engine emits `replay.divergedAtRefusal` event + fails run with `error.code: 'replay_diverged_at_refusal'` when source vs replay envelope kinds differ at the same nodeId. Gated on `OPENWOP_MULTI_AGENT_EXECUTION_MODEL_PHASE_4=true` AND `run.forkMode === 'replay'`. Path-to-Accepted for RFC 0041: non-steward host advertises `multiAgent.executionModel.version: 4` end-to-end. |
47
51
 
48
52
  ---
49
53
 
@@ -71,6 +75,23 @@ Twenty-two scenario groups validate optional profiles where the host's discovery
71
75
  | `replay-retention-expiry.test.ts` | `openwop-replay-fork` (`replay.md` §"Retention and garbage collection") | B (capability shape always; 410/422 envelope on expired-range fork gated on `OPENWOP_TEST_EXPIRED_REPLAY_RUN_ID`; details.{sourceRunId, fromSeq, retentionBoundary} soft-checks per spec SHOULD) | `host-pending` | Reference host advertises `replay.supported: true` + operator produces a known-expired run id (no standardized force-expire endpoint per RFC 0009 Q#1). |
72
76
  | `discovery.test.ts` — auth-scoped subtests (3 of them) | `openwop-discovery-auth-scoped` (`capabilities-change-detection.md` §"Scoped capability views", RFC 0011) | B (capability shape + mode/endpointPath typing always; required-field-preservation in authenticated view always; authorization-oracle probe gated on `OPENWOP_TEST_UNAUTHORIZED_API_KEY`) | `host-pending` | Reference host advertises `capabilities.discovery.authScoped.supported: true` + serves an authenticated capability view that satisfies the base schema + a tenant-scoped key pair for the oracle probe. |
73
77
  | `fs-path-traversal.test.ts` | `capabilities.fs` (RFC 0014, `host-fs-capability.md`) | A (advertisement shape + two path-escape probes asserting `path_outside_sandbox`) | host-pass (workflow-engine reference) | Reference host advertises `capabilities.fs.supported: true` with sandboxRoot under `<dataDir>/host-fs`. |
78
+ | `credentials-capability-shape.test.ts` | `capabilities.credentials` (RFC 0046, `host-capabilities.md` §host.credentials) | A (advertisement shape always — `supported` boolean; `scopes` ⊆ {user,workspace,tenant}; `rotation` ∈ {none,two-key-overlap}) | `host-pending` | Always runs; asserts the block is absent or well-formed. No host advertises `capabilities.credentials` yet (RFC 0046 `Draft`). |
79
+ | `credential-payload-redaction.test.ts` | `capabilities.credentials` (RFC 0046) + `SECURITY/invariants.yaml` `credential-payload-redaction` | A (advertisement shape always; redaction MUST-NOT via optional `POST /v1/host/sample/credentials/echo` seam — canary plaintext absent from all observable surfaces) | `host-pending` | Capability-gated on `credentials.supported`; behavioral probe soft-skips on 404 when the seam is unwired, mirroring `fs-path-traversal`. |
80
+ | `oauth-capability-shape.test.ts` | `capabilities.oauth` (RFC 0047, `host-capabilities.md` §host.oauth) | A (advertisement shape always — `supported` boolean; `grants` ⊆ {authorization_code,client_credentials,refresh_token}; every `providers[].id` non-empty) | `host-pending` | Always runs; asserts the block is absent or well-formed. No host advertises `capabilities.oauth` yet (RFC 0047 `Draft`). |
81
+ | `oauth-connector-redaction.test.ts` | `capabilities.oauth` (RFC 0047) + `SECURITY/invariants.yaml` `credential-payload-redaction` | A (advertisement shape always; token-material redaction via optional `POST /v1/host/sample/oauth/connector-echo` seam — canary token absent from all observable surfaces; `connector.authorized` carries the ref not the token) | `host-pending` | Capability-gated on `oauth.supported`; behavioral probe soft-skips on 404. Reuses the RFC 0046 redaction invariant (OAuth tokens are stored as host.credentials entries). |
82
+ | `connector-manifest-validity.test.ts` | `node-pack-manifest.schema.json` §Connector (RFC 0045, `node-packs.md` §Connectors) | Server-free (schema validity of the `connector` block incl. both ConnectorAuth variants + positive/negative round-trip; §B action/trigger typeId-resolution semantics — `connector_action_unresolved` on an unknown typeId) | host-pass (server-free) | Always runs; no host needed. Behavioral idempotency-hint + rate-limit-honored scenarios deferred until a host advertises a connector. |
83
+ | `identity-owner-shape.test.ts` | `run-snapshot.schema.json` properties.owner (RFC 0048 §C, `auth.md` §Identity claims) | Server-free (owner triple schema validity: positive `{tenant}` + full triple; negative missing-tenant + unknown-prop) | host-pass (server-free) | Always runs; no host needed. |
84
+ | `cross-workspace-isolation.test.ts` | RFC 0048 §C/§D (`auth.md` §Identity claims, `rest-endpoints.md` `run_forbidden`) | A (owner-echo shape if a sample run is readable; §D isolation MUST-NOT via optional `POST /v1/host/sample/identity/cross-workspace-read` seam — cross-workspace read fails closed with `run_forbidden`/`not_found`) | `host-pending` | Behavioral probe soft-skips on 404; no host advertises run ownership yet (RFC 0048 `Draft`). |
85
+ | `authorization-roles-shape.test.ts` | `capabilities.authorization` (RFC 0049 §A, `auth.md` §"Role-based authorization") | A (advertisement shape always — `supported` boolean; `failClosed` const true; every `roles[].role` non-empty + `scopes` array) | `host-pending` | Always runs; asserts the block is absent or well-formed. |
86
+ | `authorization-fail-closed.test.ts` | `capabilities.authorization` (RFC 0049 §C) + `SECURITY/invariants.yaml` `authorization-fail-closed` | A (advertisement `failClosed===true` always; fail-closed MUST-NOT via optional `POST /v1/host/sample/authorization/decide` seam — an unseeded-role principal resolves `allowed:false`) | `host-pending` | Capability-gated on `authorization.supported`; behavioral probe soft-skips on 404. Scope-match + denial-audited scenarios deferred to a host. |
87
+ | `auth-saml-profile.test.ts` | `openwop-auth-saml` (RFC 0050, `auth-profiles.md` §`openwop-auth-saml`) | A+B (profile-advertisement shape always; **1-positive + 6-negative reference suite runs server-free** via the bundled synthetic IdP `conformance/src/lib/saml-idp.ts` — `alg:none`/unsigned/bad-sig/expired/not-yet-valid/wrapping; host-ACS validation opt-in via `OPENWOP_TEST_SAML_IDP_URL` + the `auth/saml/validate` seam) | host-pass (server-free reference) | Synthetic IdP bundled (`node:crypto`, no deps). Host-ACS pass is the remaining graduation gate. |
88
+ | `auth-scim-profile.test.ts` | `openwop-auth-scim` (RFC 0050, `auth-profiles.md` §`openwop-auth-scim`) | B (profile-advertisement shape always; SCIM user/group provisioning → principal/role roundtrip opt-in via `OPENWOP_TEST_SCIM_URL` + the `auth/scim/provision` seam) | `host-pending` | Behavior opt-in (operator-supplied SCIM endpoint); deactivate ⇒ subsequent-deny assertion deferred to a host. |
89
+ | `approval-gate-events.test.ts` | `approval.granted` / `.rejected` / `.overridden` (RFC 0051 §B, `interrupt-profiles.md` §approvalGate) | Server-free (event-payload schema validity: required fields incl. mandatory `overridden.reason`; additionalProperties:false negatives) | host-pass (server-free) | Always runs; no host needed. |
90
+ | `approval-gate-flow.test.ts` | `core.openwop.governance.approvalGate` (RFC 0051 §A) + `capabilities.authorization` (RFC 0049) | A (capability-gated on `authorization.supported`; unauthorized-principal-denied + override-audited via the `governance/approval-gate` seam) | `host-pending` | Behavioral probe soft-skips on 404. Grant/reject-loopback/quorum scenarios deferred until a governance host wires the seam. |
91
+ | `scheduling-capability-shape.test.ts` | `capabilities.scheduling` (RFC 0052 §A, `host-capabilities.md` §host.scheduling) | A (advertisement shape always — `supported` boolean; `cron`/`delayed`/`calendar` booleans; `maxFutureHorizon` ISO-8601 duration) | `host-pending` | Always runs; asserts the block is absent or well-formed. |
92
+ | `scheduling-cron-fires-once.test.ts` | `capabilities.scheduling` (RFC 0052 §B) | A (once-per-tick + missed-tick MUST-NOT via optional `POST /v1/host/sample/scheduling/tick` seam — single tick fires exactly one run; missed window never floods) | `host-pending` | Capability-gated on `scheduling.supported` + `cron`; soft-skips on 404. Delayed-horizon + calendar scenarios deferred. |
93
+ | `deadletter-capability-shape.test.ts` | `capabilities.deadLetter` (RFC 0053 §A, `host-capabilities.md` §host.deadLetter) | A (advertisement shape always — `supported` boolean; `retentionDays` integer ≥ 1) | `host-pending` | Always runs; asserts the block is absent or well-formed. |
94
+ | `deadletter-retry-exhaustion.test.ts` | `capabilities.deadLetter` (RFC 0053 §C) + `run.dead_lettered` event | A (retry-exhaustion → `run.dead_lettered` with `attempts` + dead-lettered run fork-eligible, via optional `POST /v1/host/sample/deadletter/exhaust` seam) | `host-pending` | Capability-gated on `deadLetter.supported`; soft-skips on 404. Retention-purge scenario deferred (needs clock seam). |
74
95
  | `kv-cross-tenant-isolation.test.ts`, `kv-atomic-increment.test.ts`, `kv-cas.test.ts` (three scenarios) | `capabilities.kvStorage` (RFC 0015, `host-kv-storage-capability.md`) + `SECURITY/invariants.yaml` `kv-cross-tenant-isolation` | A (advertisement shape always; behavioral cross-tenant `set`/`get`, 50× concurrent atomic increment convergence, CAS matching/stale-expect) | host-pass via opt-in test seam | Reference host exposes `POST /v1/host/sample/test/surface` env-gated on `OPENWOP_TEST_SEAM_ENABLED=true`; hosts that don't expose the seam soft-skip the behavioral assertions and verify advertisement shape only. |
75
96
  | `table-cross-tenant-isolation.test.ts` | `capabilities.tableStorage` (RFC 0016, `host-table-storage-capability.md`) | A (advertisement shape + behavioral cross-tenant insert/query proof) | host-pass via opt-in test seam | Same seam dependency as kv row. |
76
97
  | `queue-cross-tenant-isolation.test.ts` | `capabilities.queueBus` (RFC 0017, `host-queue-bus-capability.md`) + `SECURITY/invariants.yaml` `queue-cross-tenant-isolation` | A (advertisement shape + behavioral cross-tenant publish/consume proof) | host-pass via opt-in test seam | Same seam dependency as kv row. |
@@ -80,6 +101,8 @@ Twenty-two scenario groups validate optional profiles where the host's discovery
80
101
  | `prompt-end-to-end-events.test.ts`, `prompt-resolution-chain-node-wins.test.ts`, `prompt-resolution-chain-fallback-cascade.test.ts` (three scenarios) | `prompts-supported` profile — gates on `capabilities.prompts.supported: true` (RFC 0027 + RFC 0029, `spec/v1/prompts.md`) | A (advertisement shape always + end-to-end resolve + emit during real workflow dispatch; resolution chain Layers 1, 3, 4 exercised) | host-pass (workflow-engine reference) | Reference host advertises `capabilities.prompts.supported: true` since RFC 0027 ref-impl landed; dispatch wiring in `bootstrap/nodes.ts` walks the resolution chain and emits `agent.promptResolved` + `prompt.composed` per spec/v1/prompts.md §"Composition + observability". |
81
102
  | `prompt-pack-install.test.ts`, `prompt-list-and-fetch.test.ts`, `prompt-render-deterministic.test.ts` (three scenarios) | `prompts-endpoints` profile — gates on `capabilities.prompts.endpointsSupported: true` (RFC 0028 §A, `spec/v1/prompts.md` §"Discovery & distribution") | A (advertisement shape always + list/get/render contract + pack-source provenance stamps + ETag honoring when supported) | host-pass (workflow-engine reference) | Reference host serves the six `/v1/prompts*` routes via `routes/prompts.ts` against the in-memory `PromptStore`. Pack-install existence claim opt-in via `OPENWOP_TEST_PROMPT_PACK_INSTALLED=true` (the in-tree `vendor.openwop.prompt-sample` pack auto-installs via `promptPackLoader.ts`). |
82
103
  | `prompt-mutable-lifecycle.test.ts` | `prompts-mutable` profile — gates on `capabilities.prompts.mutableLibrary: true` (RFC 0028 §C) | A (advertisement shape + CRUD lifecycle + pack/host source 403-on-mutation) | host-pass (workflow-engine reference) | Reference host advertises `mutableLibrary: true`; user-source templates accepted, pack + host-built-in templates return 403 on POST/PUT/DELETE. |
104
+ | `prompt-mutation-workspace-membership-enforced.test.ts` | `prompts-mutable` profile — gates on `capabilities.prompts.mutableLibrary: true` (RFC 0028 Tier-2 follow-up, post-promotion) + `SECURITY/invariants.yaml` `prompt-mutation-workspace-membership-enforced` | A (advertisement gate + cross-workspace write refusal — drives `POST /v1/prompts` with a random non-member `workspaceId`, asserts any 4xx/5xx; on 403 specifically, additionally pins canonical `error === "workspace_membership_required"` envelope per `rest-endpoints.md` §"Common error codes"; other refusal codes unconstrained) | capability-gated (no reference-host membership backend yet; soft-skips cleanly until a host wires the workspace-member resolver) | Filed 2026-05-25 in response to a MyndHyve self-disclosed Admin-SDK-bypasses-DB-rules vulnerability on revision `00207-vzq`. T1 canonicalization same-day (2026-05-25) added the 403-envelope check. Operator override via `OPENWOP_TEST_NONMEMBER_WORKSPACE_ID`. |
105
+ | `prompt-read-workspace-membership-enforced.test.ts` | `prompts-supported` profile — gates on `capabilities.prompts.supported: true` (broader than `mutableLibrary` per MyndHyve relay Option B: read-only hosts that expose `?workspaceId=` reads are NOT exempt from the symmetric authz invariant) + `SECURITY/invariants.yaml` `prompt-read-workspace-membership-enforced` | A (advertisement gate + cross-workspace read refusal — drives `GET /v1/prompts?workspaceId=<random-non-member>`, interprets response: 4xx PASS with canonical envelope check on 403; 200 with empty `templates[]` PASS as correct null result; 200 with non-empty `templates[]` FAIL as cross-tenant leak; 200 without `templates[]` field SKIP via response-shape detection — host doesn't expose workspace-scoped reads) | capability-gated (no reference-host workspace-scoped read backend yet; soft-skips cleanly on the response-shape detection) | T2 sister scenario filed 2026-05-25 alongside T1; same threat model as the write scenario but probes the read path. Read paths are NOT exempt from cross-tenant authz — a `GET ?workspaceId=<not-mine>` that returns another workspace's templates is a data leak with the same blast radius as a cross-tenant write. Uses response-shape detection (rather than a new capability field) to self-skip hosts without workspace-scoped reads. Operator override via `OPENWOP_TEST_NONMEMBER_WORKSPACE_ID`. |
83
106
  | `prompt-resolution-chain-agent-intrinsic.test.ts` | `prompts-agent-bindings` profile — gates on `capabilities.prompts.agentBindings: true` (RFC 0029 §A Layer 2) | A (advertisement shape + Layer 2 agent intrinsic / overrides / library-default precedence over Layers 3-4) | host-pass (workflow-engine reference) | Reference host advertises `agentBindings: true` so Layer 2 sub-layers (agent-intrinsic / agent-overrides / agent-library-default) walk per RFC 0029 §B. |
84
107
  | `prompt-composed-secret-redaction.test.ts`, `prompt-composed-trust-marker.test.ts` (two scenarios) | `prompts-observability-full` profile — gates on `prompts.supported + observability: "full"` (RFC 0027 §E + RFC 0020 §D) + `SECURITY/invariants.yaml` `prompt-composed-secret-redaction` + `prompt-composed-trust-marker` | A (advertisement shape + `[REDACTED:<credentialRef>]` markers for secret-source bindings + `<UNTRUSTED>...</UNTRUSTED>` wrapping + `contentTrust: "untrusted"` propagation) | host-pass (workflow-engine reference) | Reference host advertises `observability: "full"` (sourced from `host/promptHostConfig.ts`). Composition pipeline in `host/promptCompose.ts` enforces SR-1 carry-forward + untrusted-content marker per `SECURITY/threat-model-secret-leakage.md` §SR-1. |
85
108
 
@@ -118,18 +141,20 @@ Every OpenAPI operation should have:
118
141
  | `pauseRun` | `pause-resume.test.ts` covers direct route behavior for running → paused, idempotent re-pause, terminal conflict, and pause-during-suspend race | Conflict and race paths covered with `details.runStatus`; endpoint is no longer coverage-missing | Add explicit immediate-vs-drain-current-node policy assertion when a host advertises both drain policies. |
119
142
  | `resumeRun` | `pause-resume.test.ts` covers direct route behavior for paused → running and non-paused conflict | Conflict path covered with `details.runStatus`; endpoint is no longer coverage-missing | Good. |
120
143
  | `forkRun` | `replay-fork.test.ts`, `replayDeterminism.test.ts` | Negative `fromSeq`, past-end, unknown source, invalid overlay | Add arbitrary-event fork and retention-expired source. |
144
+ | `diffRun` | `run-diff.test.ts` (RFC 0054); soft-skips on 404 when the endpoint is unimplemented | Self-diff `divergedAtSeq: null`/empty (determinism floor), two-fixture divergence with `eventDiffs[0].seq === divergedAtSeq`, response-shape + `stateDiff` redaction-safety, `400` (missing `against`) + `404` (nonexistent `against`) | Add a bespoke deterministically-divergent fork fixture for `divergedAtSeq === N`-at-a-chosen-seq; full cross-principal `403` needs a multi-principal harness. |
121
145
  | `resolveInterruptByRun` | `interrupt-approval.test.ts`, `interrupt-clarification.test.ts`, `approval-payload.test.ts`, `interruptRace.test.ts` | Invalid action, unknown node, race cases | Add auth-required and quorum profile scenarios. |
122
146
  | `inspectInterruptByToken` | `interrupt-token-matrix.test.ts` (CF-3, 2026-05-15) covers malformed + unknown token paths | Negative paths covered | Add explicit expired-token case when a host advertises a TTL seam. |
123
147
  | `resolveInterruptByToken` | `interrupt-token-matrix.test.ts` covers replay (already-resolved) + unknown token; `interrupt-external-event-correlation.test.ts` covers positive path | Replay path + unknown-token path covered with explicit assertions | Add wrong-action case once the host advertises a typed allowed-actions vocabulary in the interrupt manifest. |
124
148
  | `getArtifact` | Indirect through approval payload fixtures | `route-coverage.test.ts` covers unknown artifact `404`/`403` envelope; `artifact-auth.test.ts` (CF-4 close-out 2026-05-15; SQLite host 401-before-404 stub landed 2026-05-19, closes the info-leak surface for every HTTP method) covers `401` unauthenticated path | Negative paths covered (401 + 405 non-GET + 404/403) | Add positive artifact-read scenario once a reference host implements `getArtifact` end-to-end. |
125
149
  | `registerWebhook` | Webhook spec exists | `route-coverage.test.ts` covers invalid URL validation envelope | Add positive registration with a test receiver when harness support exists. |
126
150
  | `unregisterWebhook` | Webhook spec exists | `route-coverage.test.ts` covers unknown subscription behavior | Add full register-then-unregister roundtrip with a test receiver. |
127
- | `listPromptTemplates` | `prompt-template-shape.test.ts` covers schema shape + advertisement contract for `capabilities.prompts.*`; capability-gated behavioral list-with-filter scenarios deferred to RFC 0028 acceptance gate | n/a yet endpoint surface in spec only (RFC 0028 Draft); reference host hasn't implemented the route yet | Add positive list-with-filter scenario + auth-failure + invalid-cursor scenarios once a reference host implements the route. |
128
- | `createPromptTemplate` | None endpoint surface in spec only (RFC 0028 Draft) | n/a yet | Add positive create + `409` duplicate + `501` not-mutable-library + auth/scope scenarios once a reference host implements the route. |
129
- | `getPromptTemplate` | None endpoint surface in spec only | n/a yet | Add positive fetch + `404` unknown + `400` ambiguous-libraryId + `ETag` revalidation scenarios. |
130
- | `updatePromptTemplate` | None endpoint surface in spec only | n/a yet | Add positive update + `409` non-monotonic-version + `403` pack-sourced-readonly + `501` not-mutable-library scenarios. |
131
- | `deletePromptTemplate` | None endpoint surface in spec only | n/a yet | Add positive delete + `403` pack-sourced-readonly + `404` unknown + `501` not-mutable-library scenarios. |
132
- | `renderPromptTemplate` | `prompt-composed-secret-redaction.test.ts` + `prompt-composed-trust-marker.test.ts` exercise the compose pipeline via the `/v1/host/sample/prompt/compose` host-extension seam; capability-gated. The spec'd `POST /v1/prompts:render` endpoint shares the same composition pipeline (RFC 0028 §A deterministic-render invariant matches RFC 0027 §F replay invariant). | Composition redaction + trust-marker invariants covered via the seam | Add positive `:render` via the spec'd endpoint + `400 prompt_variable_unresolved` + `404 template_not_found` once a reference host implements the route. |
151
+ | `listPromptTemplates` | `prompt-template-shape.test.ts` + `prompt-list-and-fetch.test.ts` cover schema shape + advertisement contract + list/get contract for `capabilities.prompts.*` against the reference workflow-engine (RFC 0028 `Active`endpoints live under `apps/workflow-engine/backend/typescript/src/routes/prompts.ts`) | Behavioral list + advertisement-shape covered | Add cross-host list-with-filter parity scenario when a second host advertises `endpointsSupported: true`. |
152
+ | `createPromptTemplate` | `prompt-mutable-lifecycle.test.ts` covers CRUD lifecycle against the reference workflow-engine (gated on `mutableLibrary: true`); user-source POST succeeds, pack + host-built-in templates return 403 | Positive create + readonly-source 403 path covered | Add explicit `409` duplicate-id scenario + auth/scope matrix scenarios. |
153
+ | `getPromptTemplate` | `prompt-list-and-fetch.test.ts` covers positive fetch + ambiguous-libraryId + ETag honoring when host advertises it | Positive fetch + 404 + ETag covered | Good — minor gap is the `400 ambiguous_template_id` cross-library disambiguation matrix. |
154
+ | `updatePromptTemplate` | `prompt-mutable-lifecycle.test.ts` covers positive update + non-monotonic-version conflict + pack-sourced-readonly 403 against the reference workflow-engine | Positive update + 403 readonly-source + 409 conflict covered | Add `501` not-mutable-library negative for hosts that advertise `mutableLibrary: false`. |
155
+ | `deletePromptTemplate` | `prompt-mutable-lifecycle.test.ts` covers positive delete + pack-sourced-readonly 403 against the reference workflow-engine | Positive delete + 403 readonly-source covered | Add `501` not-mutable-library negative + `404` unknown-template scenarios. |
156
+ | `renderPromptTemplate` | `prompt-render-deterministic.test.ts` exercises `POST /v1/prompts:render` end-to-end against the reference workflow-engine; deterministic-hash invariant verified across `:render` + `prompt.composed` event paths. `prompt-composed-secret-redaction.test.ts` + `prompt-composed-trust-marker.test.ts` exercise the shared compose pipeline via the `/v1/host/sample/prompt/compose` seam | Deterministic render + composition redaction + trust-marker invariants covered | Add `400 prompt_variable_unresolved` matrix for missing variables across all four PromptKinds. |
157
+ | `putTestPackTarball`, `getTestPackTarball`, `deleteTestPackVersion`, `getTestPackSignature` | `pack-registry-publish.test.ts` covers the 19-code publish error catalog through the RFC 0025 `/v1/packs-test/*` mirror namespace, gated on `capabilities.packs.testMode.supported: true` (RFC 0025 §A). 26 scenarios soft-skip when the advertisement is absent; when present, the suite exercises URL/scope, body-shape, tarball-extraction, manifest-contents, integrity, auth/conflict, unpublish-window, and signature-endpoint pairing. | Soft-skip on advertisement absence; behavioral on advertisement presence | Add real-tarball-builder fixtures so the manifest_mismatch / pack_integrity_failure / unsupported_runtime branches assert against a meaningful gzip+tar payload (currently soft-skipped with explanatory comments). |
133
158
 
134
159
  ---
135
160
 
@@ -0,0 +1,53 @@
1
+ {
2
+ "id": "conformance-phase4-nondet-tool",
3
+ "name": "Conformance: RFC 0041 §C observable-output-sequence determinism (Phase 4)",
4
+ "version": "1.0",
5
+ "description": "Two-node workflow exercising a nondeterministic tool followed by a structured-output node, used by `replay-observable-sequence-determinism.test.ts` to verify RFC 0041 §C — across original + replay runs of the same workflow against the same engine, the observable RunEventDoc sequence prefix MUST be identical up to and including the nondeterministic-tool node's `node.completed` event. The host's replay path MUST replay the original event log entries (rather than re-executing the tool) for nodes whose `core.tool.*` config carries `nondeterministic: true`. Phase 4 hosts advertising `multiAgent.executionModel.replayDeterminism.supported: true` MUST honor this contract; non-Phase-4 hosts MAY re-execute the tool freely (and consequently observe sequence drift the conformance scenario will not assert against).",
6
+ "nodes": [
7
+ {
8
+ "id": "nondet-tool",
9
+ "typeId": "core.noop",
10
+ "name": "Nondeterministic tool (proxied via core.noop for sample-grade)",
11
+ "position": { "x": 0, "y": 0 },
12
+ "config": {
13
+ "nondeterministic": true,
14
+ "phase4Probe": true
15
+ },
16
+ "inputs": {}
17
+ },
18
+ {
19
+ "id": "structured-call",
20
+ "typeId": "core.ai.structuredOutput",
21
+ "name": "Structured output via mock provider (consumes nondet-tool result)",
22
+ "position": { "x": 200, "y": 0 },
23
+ "config": {
24
+ "provider": "mock",
25
+ "model": "mock-mini",
26
+ "outputSchema": {
27
+ "type": "object",
28
+ "required": ["valid"],
29
+ "properties": { "valid": { "type": "boolean" } }
30
+ }
31
+ },
32
+ "inputs": {
33
+ "messages": {
34
+ "type": "static",
35
+ "value": [
36
+ { "role": "user", "content": "Please emit a valid envelope." }
37
+ ]
38
+ }
39
+ }
40
+ }
41
+ ],
42
+ "edges": [
43
+ { "id": "e1", "sourceNodeId": "nondet-tool", "targetNodeId": "structured-call" }
44
+ ],
45
+ "triggers": [
46
+ { "id": "manual", "type": "manual", "enabled": true }
47
+ ],
48
+ "variables": [],
49
+ "metadata": {
50
+ "tags": ["conformance", "rfc-0041", "phase-4", "observable-sequence-determinism", "multi-agent-execution"]
51
+ },
52
+ "settings": { "timeout": 30000 }
53
+ }
@@ -0,0 +1,40 @@
1
+ {
2
+ "id": "conformance-phase4-replay-divergence",
3
+ "name": "Conformance: RFC 0041 §B replay-divergence-at-refusal (Phase 4)",
4
+ "version": "1.0",
5
+ "description": "Single `core.ai.structuredOutput` node against the conformance `mock` provider. Conformance scenario `replay-divergence-at-refusal.test.ts` pre-seeds the mock with a two-entry program via `POST /v1/host/sample/test/mock-ai/program` keyed on the structured-call nodeId: entry [0] returns a valid envelope (consumed by the original run); entry [1] returns `stopReason: 'safety'` + `refusalText` (consumed by the `:fork mode: replay`). Phase 4 hosts advertising `multiAgent.executionModel.replayDeterminism.refusalDivergenceEmission: true` MUST detect the divergence at replay time, emit a `replay.divergedAtRefusal` event with `originalEnvelopeKind: 'valid'` + `replayEnvelopeKind: 'refusal'`, and fail the replay with HTTP `422` + `error.code: 'replay_diverged_at_refusal'` per `spec/v1/rest-endpoints.md §\"Common error codes\"`. Silent substitution of the refusal for the original envelope is non-conformant.",
6
+ "nodes": [
7
+ {
8
+ "id": "structured-call",
9
+ "typeId": "core.ai.structuredOutput",
10
+ "name": "Structured output via mock provider (Phase 4 replay-divergence probe)",
11
+ "position": { "x": 0, "y": 0 },
12
+ "config": {
13
+ "provider": "mock",
14
+ "model": "mock-mini",
15
+ "outputSchema": {
16
+ "type": "object",
17
+ "required": ["valid"],
18
+ "properties": { "valid": { "type": "boolean" } }
19
+ }
20
+ },
21
+ "inputs": {
22
+ "messages": {
23
+ "type": "static",
24
+ "value": [
25
+ { "role": "user", "content": "Please emit a valid envelope." }
26
+ ]
27
+ }
28
+ }
29
+ }
30
+ ],
31
+ "edges": [],
32
+ "triggers": [
33
+ { "id": "manual", "type": "manual", "enabled": true }
34
+ ],
35
+ "variables": [],
36
+ "metadata": {
37
+ "tags": ["conformance", "rfc-0041", "phase-4", "replay-divergence", "multi-agent-execution"]
38
+ },
39
+ "settings": { "timeout": 30000 }
40
+ }
package/fixtures.md CHANGED
@@ -84,9 +84,9 @@ All fixtures MUST advertise:
84
84
  | Dispatch Per-Worker Mapping Override | `conformance-dispatch-per-worker-override` | RFC 0022 §A / HVMAP-1c-override — parent with BOTH a default `inputMapping` (`{ input: 'defaultX' }`) AND `perWorkerInputMappings.child-b: { input: 'sharedVar' }`. Verifies `effectiveInputMapping` precedence per §A: child-a receives the default, child-b receives the override. Reuses `conformance-dispatch-cross-worker-handoff-child-a` + `-child-b`. | `completed` | ≤ 30s |
85
85
  | Dispatch deterministic-fail child | `conformance-dispatch-deterministic-fail-child` | RFC 0022 §B / HVMAP-1b-failed — child workflow that ALWAYS terminates `failed` via `core.fail`. Used by `conformance-dispatch-output-mapping` to verify the parent's `outputMapping` is SKIPPED when the child fails terminally. | `failed` | ≤ 5s |
86
86
  | Dispatch cancellable child | `conformance-dispatch-cancellable-child` | RFC 0022 §B / HVMAP-1b-cancelled — child workflow with a long `core.delay` so the test cancels it externally via `POST /v1/runs/{childRunId}/cancel`. Verifies the parent's `outputMapping` is SKIPPED when the child terminates `cancelled`. | `cancelled` | ≤ 60s |
87
- | Multi-Agent Handoff (parent) | `conformance-multi-agent-handoff` | RFC 0037 Phase 1 — exercises the planner→worker handoff state machine. Supervisor decides one `next-worker`, dispatch spawns the child, harvests outputMapping. Conformance reads the event log for the 4 `core.workflowChain.event` transition records in causation-chained order (`dispatch.began → dispatch.succeeded → child.completed → output.harvested`). Capability-gated on `capabilities.multiAgent.executionModel.supported`. | `completed` | ≤ 30s |
88
- | Multi-Agent Handoff (child) | `conformance-multi-agent-handoff-child` | RFC 0037 Phase 1 — child for `conformance-multi-agent-handoff`. Declares `childOutcome.defaultValue='handoff-complete'`; the parent's outputMapping harvests it onto `parentResult`, triggering the `output.harvested` transition event. | `completed` | ≤ 5s |
89
- | Multi-Agent Confidence Escalation | `conformance-multi-agent-confidence-escalation` | RFC 0039 §A — exercises the Phase 2 confidence-floor escalation contract. Supervisor's `mockDispatchPlan` carries ONE decision with `confidence: 0.3` (below the 0.5 spec floor). The host MUST emit `core.workflowChain.confidence-escalated` AND suspend with a clarification interrupt BEFORE any dispatch.began fires; conformance asserts zero `core.workflowChain.event` records (no dispatch). Capability-gated on `capabilities.multiAgent.executionModel.version >= 2`. | `waiting-clarification` | ≤ 30s |
87
+ | Multi-Agent Handoff (parent) | `conformance-multi-agent-handoff` | RFC 0037 (`version: 1`) — exercises the planner→worker handoff state machine. Supervisor decides one `next-worker`, dispatch spawns the child, harvests outputMapping. Conformance reads the event log for the 4 `core.workflowChain.event` transition records in causation-chained order (`dispatch.began → dispatch.succeeded → child.completed → output.harvested`). Capability-gated on `capabilities.multiAgent.executionModel.supported`. | `completed` | ≤ 30s |
88
+ | Multi-Agent Handoff (child) | `conformance-multi-agent-handoff-child` | RFC 0037 (`version: 1`) — child for `conformance-multi-agent-handoff`. Declares `childOutcome.defaultValue='handoff-complete'`; the parent's outputMapping harvests it onto `parentResult`, triggering the `output.harvested` transition event. | `completed` | ≤ 5s |
89
+ | Multi-Agent Confidence Escalation | `conformance-multi-agent-confidence-escalation` | RFC 0039 §A (`version: 2`) — exercises the confidence-floor escalation contract. Supervisor's `mockDispatchPlan` carries ONE decision with `confidence: 0.3` (below the 0.5 spec floor). The host MUST emit `core.workflowChain.confidence-escalated` AND suspend with a clarification interrupt BEFORE any dispatch.began fires; conformance asserts zero `core.workflowChain.event` records (no dispatch). Capability-gated on `capabilities.multiAgent.executionModel.version >= 2`. | `waiting-clarification` | ≤ 30s |
90
90
  | Agent Memory Round-Trip | `conformance-agent-memory-roundtrip` | Phase 3 — `MemoryAdapter.list/get` write → read | `completed` | ≤ 15s |
91
91
  | Agent Memory Cross-Tenant | `conformance-agent-memory-cross-tenant` | Phase 3 / CTI-1 — cross-tenant probe MUST return `[]` / `null` | `completed` | ≤ 10s |
92
92
  | Agent Memory Redaction | `conformance-agent-memory-redaction` | Phase 3 / SR-1 — BYOK plaintext surfaces as `[REDACTED:<id>]` on read | `completed` | ≤ 15s |
@@ -113,6 +113,8 @@ All fixtures MUST advertise:
113
113
  | Envelope Refusal | `conformance-envelope-refusal` | RFC 0032 §B.3 + RFC 0033 §D + §F end-to-end refusal — mock provider returns `stopReason: 'safety'` with `refusalText`. Host MUST emit exactly one `envelope.refusal` event, NOT retry (RFC 0033 §D), fail with `error.code: 'envelope_refused_by_provider'`, AND keep refusalText off `RunSnapshot.error.message` (SECURITY invariant `envelope-refusal-no-prompt-leak`). | `failed` (`error.code='envelope_refused_by_provider'`) | ≤ 10s |
114
114
  | Envelope Recovery Applied | `conformance-envelope-recovery-applied` | RFC 0032 §B.6 lenient-parse — mock returns a markdown-fenced JSON envelope (```json\\n...\\n```). Host's `dispatchStructured()` lenient-parse fallback (`tryLenientParse()`) strips the fence, emits exactly one `envelope.recovery.applied` with `path: 'markdown-fence'`, and accepts the parsed value WITHOUT counting against the retry budget per RFC 0033 §D. | `completed` | ≤ 10s |
115
115
  | Envelope NL-to-Format Engaged | `conformance-envelope-nl-to-format-engaged` | RFC 0032 §B.5 NL-to-Format fallback — mock returns natural-language prose on the first 3 attempts (exhausting the retry budget); the host detects the NL shape after exhaustion, emits exactly one `envelope.nlToFormat.engaged { originalEnvelopeType, fallbackCalls: 1 }`, then fires ONE additional dispatch with a corrective coercion fragment. The 4th program entry returns valid JSON; the schema validates; the run terminates `completed`. | `completed` | ≤ 10s |
116
+ | Phase 4 Replay Divergence | `conformance-phase4-replay-divergence` | RFC 0041 §B — single `core.ai.structuredOutput` node against mock provider. Conformance scenario pre-seeds a 2-entry program via the existing mock-AI program seam: entry [0] returns a valid envelope (original run consumes); entry [1] returns `stopReason: 'safety'` + `refusalText` (`:fork mode: replay` consumes). Phase 4 hosts advertising `multiAgent.executionModel.replayDeterminism.refusalDivergenceEmission: true` MUST emit `replay.divergedAtRefusal` + fail replay with `error.code: 'replay_diverged_at_refusal'`. Silent substitution is non-conformant. Pairs with `replay-divergence-at-refusal.test.ts`. | original: `completed`; replay: `failed` (`error.code='replay_diverged_at_refusal'`) | ≤ 10s |
117
+ | Phase 4 Nondeterministic Tool | `conformance-phase4-nondet-tool` | RFC 0041 §C — two-node workflow (`core.noop` proxied as a nondeterministic tool → `core.ai.structuredOutput`). Used by `replay-observable-sequence-determinism.test.ts` to verify that across original + replay runs, the observable `RunEventDoc` sequence prefix is identical up to and including the nondeterministic-tool node's `node.completed` event. The host's replay path MUST replay the original event log entries (rather than re-executing the tool) for nodes whose `core.tool.*` config carries `nondeterministic: true`. Phase 4 hosts advertising `multiAgent.executionModel.replayDeterminism.supported: true` honor this contract. | original + replay: `completed`; observable prefixes equal up to the nondet boundary | ≤ 10s |
116
118
 
117
119
  The `messages`-mode stream fixture (AI token streaming) is covered by the deterministic mock-provider surface in `spec/v1/run-options.md`. Hosts that do not advertise `Capabilities.testing.mockProviders` skip-equivalent on those scenarios.
118
120
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@openwop/openwop-conformance",
3
- "version": "1.4.0",
3
+ "version": "1.6.0",
4
4
  "description": "Production-ready black-box conformance suite for OpenWOP v1.0 compliant servers.",
5
5
  "repository": {
6
6
  "type": "git",
package/schemas/README.md CHANGED
@@ -17,6 +17,7 @@
17
17
  | `conversation-event.schema.json` | `channels-and-reducers.md` + conversation RFC | Multi-turn conversation event shape for orchestrator-driven HITL flows |
18
18
  | `conversation-turn.schema.json` | `channels-and-reducers.md` + conversation RFC | Conversation turn shape for user/agent/system messages |
19
19
  | `core-conformance-mock-agent-config.schema.json` | `node-packs.md` + RFC 0023 | Config shape for the conformance-only `core.conformance.mock-agent` typeId — drives `agent.*` event emission on cue (`mockReasoning` / `mockToolCalls` / `mockHandoff` / `mockDecision` / `mockConfidence`). Hosts MUST refuse this typeId for production tenants unless `capabilities.conformance.mockAgent` is advertised. |
20
+ | `credential-reference.schema.json` | `host-capabilities.md` §host.credentials + RFC 0046 | Opaque `{ ref, scope }` handle to a host-stored credential — the only credential artifact on the wire; never carries key material |
20
21
  | `debug-bundle.schema.json` | `debug-bundle.md` | Portable run diagnostic export from `GET /v1/runs/{runId}/debug-bundle` |
21
22
  | `dispatch-config.schema.json` | `node-packs.md` + dispatch RFC | Configuration shape for `core.dispatch` / sub-workflow routing |
22
23
  | `error-envelope.schema.json` | `rest-endpoints.md` + `auth.md` | Canonical `{error, message, details?}` shape returned on every non-2xx |
@@ -31,6 +32,7 @@
31
32
  | `registry-version-manifest.schema.json` | `registry-operations.md` | Registry-augmented version manifest served at `GET /v1/packs/{name}/-/{version}.json`. Extends the bare pack-manifest contract with registry-side metadata (integrity hash, signing-block polymorphism, lifecycle flags). Enforced by the `Validate version manifests against registry-version-manifest schema` step in `.github/workflows/registry-publish.yml`. |
32
33
  | `orchestrator-decision.schema.json` | `node-packs.md` + orchestrator RFC | Decision output shape for orchestrator routing nodes |
33
34
  | `run-ancestry-response.schema.json` | `multi-agent-execution.md` + RFC 0040 | Response body for `GET /v1/runs/{runId}/ancestry` — names the run's immediate parent in the cross-host composition chain (or `parent: null` for top-level runs). Capability-gated on `capabilities.multiAgent.executionModel.crossHostCausation.ancestryEndpointSupported`. |
35
+ | `run-diff-response.schema.json` | `rest-endpoints.md` + RFC 0054 | Response body for `GET /v1/runs/{runId}:diff?against={otherRunId}` — deterministic, replay-aware structured diff of two runs (`divergedAtSeq` + `eventDiffs[]` + `stateDiff`). |
34
36
  | `run-event-payloads.schema.json` | `run-event.schema.json` §RunEventType | Per-RunEventType payload contracts, indexed by `$defs.<typeId>` for opt-in strict validation |
35
37
  | `run-event.schema.json` | `version-negotiation.md` + `RunEventDoc` | Event log envelope + event type enum |
36
38
  | `run-options.schema.json` | `run-options.md` | Per-run input overlay (configurable + tags + metadata) on `POST /v1/runs` |
@@ -370,10 +370,10 @@
370
370
  "type": "array",
371
371
  "items": {
372
372
  "type": "string",
373
- "enum": ["tenant", "user", "run"]
373
+ "enum": ["tenant", "user", "run", "workspace"]
374
374
  },
375
375
  "uniqueItems": true,
376
- "description": "Subset of scopes the host implements. Tenant-scoped secrets are workspace-shared; user-scoped are per-end-user; run-scoped are ephemeral per-run."
376
+ "description": "Subset of scopes the host implements. Tenant-scoped secrets are workspace-shared; user-scoped are per-end-user; run-scoped are ephemeral per-run; `workspace` (RFC 0046/0048) is the explicit sub-tenant scope. Appended `workspace` is additive — hosts that omit it are unaffected."
377
377
  },
378
378
  "resolution": {
379
379
  "type": "string",
@@ -383,6 +383,90 @@
383
383
  },
384
384
  "additionalProperties": false
385
385
  },
386
+ "credentials": {
387
+ "type": "object",
388
+ "description": "RFC 0046 (`Draft`). Portable credential resolution + lifecycle contract — sibling to `secrets`, first-class store-at-rest + workspace sharing + two-key-overlap rotation. A pack references a credential by `{ ref, scope }` (see `credential-reference.schema.json`); the host resolves it into the node sandbox ONLY — never into inputs, persisted variables, channels, any run.* event payload, the debug bundle, or replay state (SECURITY invariant `credential-payload-redaction`). Supersedes the informal BYOK annex; the `secrets` advertisement stays valid.",
389
+ "required": ["supported"],
390
+ "properties": {
391
+ "supported": {
392
+ "type": "boolean",
393
+ "description": "Host implements the host.credentials resolution + lifecycle contract."
394
+ },
395
+ "scopes": {
396
+ "type": "array",
397
+ "items": { "type": "string", "enum": ["user", "workspace", "tenant"] },
398
+ "uniqueItems": true,
399
+ "description": "Subset of resolution scopes the host implements. `workspace` is the RFC 0048 sub-tenant; `tenant` and `user` align with the `secrets.scopes` vocabulary."
400
+ },
401
+ "encryptionAtRest": {
402
+ "type": "boolean",
403
+ "description": "Host encrypts stored credential material at rest."
404
+ },
405
+ "rotation": {
406
+ "type": "string",
407
+ "enum": ["none", "two-key-overlap"],
408
+ "description": "`two-key-overlap`: old + new credential both resolve as valid during a grace window, then the old fails with `credential_not_found` (mirrors `openwop-auth-api-key-rotation`). `none`: no rotation surface."
409
+ },
410
+ "sharing": {
411
+ "type": "boolean",
412
+ "description": "A single stored credential can be referenced by many workflows within a scope (e.g. a workspace-shared key) without copying material between references."
413
+ }
414
+ },
415
+ "additionalProperties": false
416
+ },
417
+ "oauth": {
418
+ "type": "object",
419
+ "description": "RFC 0047 (`Draft`). Host performs OAuth 2.0 grants (authorization-code + refresh) on a user's behalf for connector nodes, stores the acquired token as a `host.credentials` (RFC 0046) entry, refreshes it transparently, and resolves it into the node sandbox as a bearer token. Token material NEVER crosses the wire (SECURITY invariant `credential-payload-redaction`). Distinct from `auth` host-authentication profiles (RFC 0010 = who is the caller; this = what third-party token a node holds).",
420
+ "required": ["supported"],
421
+ "properties": {
422
+ "supported": { "type": "boolean", "description": "Host implements the host.oauth third-party token acquisition + refresh contract." },
423
+ "grants": {
424
+ "type": "array",
425
+ "items": { "type": "string", "enum": ["authorization_code", "client_credentials", "refresh_token"] },
426
+ "uniqueItems": true,
427
+ "description": "OAuth 2.0 grant types the host performs on a node's behalf."
428
+ },
429
+ "providers": {
430
+ "type": "array",
431
+ "description": "Provider catalog the host can acquire tokens for. A connector node's `auth.provider` MUST match an `id` here.",
432
+ "items": {
433
+ "type": "object",
434
+ "required": ["id"],
435
+ "properties": {
436
+ "id": { "type": "string", "minLength": 1, "description": "Stable provider id, e.g. `slack`, `google`." },
437
+ "authUrl": { "type": "string", "format": "uri", "description": "Authorization endpoint." },
438
+ "tokenUrl": { "type": "string", "format": "uri", "description": "Token endpoint." },
439
+ "scopesSupported": { "type": "array", "items": { "type": "string" }, "description": "Scopes this provider exposes." }
440
+ },
441
+ "additionalProperties": false
442
+ }
443
+ }
444
+ },
445
+ "additionalProperties": false
446
+ },
447
+ "authorization": {
448
+ "type": "object",
449
+ "description": "RFC 0049 (`Draft`). Maps an RFC 0048 principal's role to scopes (reusing the API-key scope grammar in `auth.md`) and surfaces authorization decisions as `authorization.decided` events. Fail-closed: an absent/unseeded role denies (SECURITY invariant `authorization-fail-closed`).",
450
+ "required": ["supported"],
451
+ "properties": {
452
+ "supported": { "type": "boolean", "description": "Host implements the role→scope authorization-decision contract." },
453
+ "failClosed": { "const": true, "description": "Absent/unseeded role denies; resolver errors deny. MUST be true when present — see SECURITY invariant `authorization-fail-closed`." },
454
+ "roles": {
455
+ "type": "array",
456
+ "description": "Role catalog. A principal's role resolves to this scope set; a request is authorized when any role-derived scope matches the required scope per the API-key scope-match semantics.",
457
+ "items": {
458
+ "type": "object",
459
+ "required": ["role", "scopes"],
460
+ "properties": {
461
+ "role": { "type": "string", "minLength": 1 },
462
+ "scopes": { "type": "array", "items": { "type": "string", "minLength": 1 }, "uniqueItems": true }
463
+ },
464
+ "additionalProperties": false
465
+ }
466
+ }
467
+ },
468
+ "additionalProperties": false
469
+ },
386
470
  "runtimeCapabilities": {
387
471
  "type": "array",
388
472
  "items": { "type": "string", "minLength": 1 },
@@ -398,11 +482,30 @@
398
482
  "type": "object",
399
483
  "additionalProperties": false,
400
484
  "required": ["supported", "version"],
485
+ "if": {
486
+ "properties": { "tier": { "const": "experimental" } },
487
+ "required": ["tier"]
488
+ },
489
+ "then": {
490
+ "required": ["experimentalUntil"]
491
+ },
401
492
  "properties": {
402
493
  "supported": {
403
494
  "type": "boolean",
404
495
  "description": "Host implements the execution loop + handoff state machine per spec/v1/multi-agent-execution.md §\"Execution loop\" + §\"Handoff state machine\". When true, the host MUST emit `core.workflowChain.event` records on every handoff transition per the §\"Transition events\" table."
405
496
  },
497
+ "tier": {
498
+ "type": "string",
499
+ "enum": ["stable", "experimental"],
500
+ "default": "stable",
501
+ "description": "RFC 0042 — stability claim for this capability advertisement. `stable` (the default when absent) means the host commits to the wire shape across v1.x minors. `experimental` means the host advertises the surface as a preview; the wire shape MAY shift compatibly without notice until the underlying RFC graduates to `Accepted` and the host re-advertises as `stable`. Hosts MUST omit the field for capabilities whose underlying RFC is already `Accepted`."
502
+ },
503
+ "experimentalUntil": {
504
+ "type": "string",
505
+ "format": "date",
506
+ "pattern": "^\\d{4}-\\d{2}-\\d{2}$",
507
+ "description": "RFC 0042 §B — required when `tier` is `experimental`. ISO-8601 `YYYY-MM-DD` no more than 12 months past the discovery response date. Reaching this date without graduating the underlying RFC to `Accepted` MUST result in the host either flipping tier to `stable` OR retracting the capability advertisement (or — with an open deprecation RFC — extending with a new `experimentalUntil` per §B sub-block 2)."
508
+ },
406
509
  "version": {
407
510
  "type": "integer",
408
511
  "minimum": 1,
@@ -415,6 +518,15 @@
415
518
  "maximum": 1.0,
416
519
  "description": "RFC 0039 §A. Operator-declared confidence floor at or above the spec floor of 0.5; when an OrchestratorDecision carries `confidence` below this floor, the host MUST escalate via a `clarify` or `escalate` interrupt instead of executing the decision. Absent: the spec floor 0.5 applies. Values < 0.5 are non-conformant; values > 1.0 are nonsense. Applies only when `version >= 2`."
417
520
  },
521
+ "confidenceEscalationInterruptKind": {
522
+ "type": "string",
523
+ "anyOf": [
524
+ { "const": "clarification" },
525
+ { "const": "approval" },
526
+ { "pattern": "^x-host-[a-z][a-z0-9-]*-[a-z][a-z0-9-]*$" }
527
+ ],
528
+ "description": "RFC 0044 — the literal `interrupt.kind` the host emits when escalating a below-floor confidence decision per RFC 0039 §A. `clarification` and `approval` are the canonical values matching the clarify-OR-approval choice in RFC 0039 §A; vendor-extension kinds use the canonical host-extension namespace `^x-host-<host>-<kind>$` per `spec/v1/host-extensions.md` §\"Canonical prefixes\". When advertised, hosts MUST emit an interrupt of the advertised kind on every confidence-escalation event AND the host's downstream `interrupt.md` mapping determines the `waiting-*` terminal status. Absent: conformance assumes the host uses one of the two canonical kinds (the relaxed assertion accepts either). Hosts using vendor kinds MUST also publish a non-normative kind-mapping document per RFC 0044 §C."
529
+ },
418
530
  "crossChildMemoryConcurrency": {
419
531
  "type": "string",
420
532
  "enum": ["strict", "advisory"],
@@ -827,6 +939,29 @@
827
939
  },
828
940
  "additionalProperties": false
829
941
  },
942
+ "scheduling": {
943
+ "type": "object",
944
+ "description": "RFC 0052 (`Draft`). Time-based run initiation behind the `schedule` trigger — gives the trigger a portable, durable, once-per-tick execution contract. Composes with `queueBus` (RFC 0017) where the host backs scheduling with a queue; orthogonal to the in-DAG `core.control.delay` primitive (which delays a node mid-run, not run initiation).",
945
+ "required": ["supported"],
946
+ "properties": {
947
+ "supported": { "type": "boolean" },
948
+ "cron": { "type": "boolean", "description": "Host honors cron-expression schedules." },
949
+ "delayed": { "type": "boolean", "description": "Host honors one-shot delayed execution." },
950
+ "calendar": { "type": "boolean", "description": "Host honors calendar-reference schedules." },
951
+ "maxFutureHorizon": { "type": "string", "description": "ISO-8601 duration (e.g. `P90D`); the farthest-future a run may be scheduled. Schedules beyond it MUST be rejected with `schedule_horizon_exceeded`." }
952
+ },
953
+ "additionalProperties": false
954
+ },
955
+ "deadLetter": {
956
+ "type": "object",
957
+ "description": "RFC 0053 (`Draft`). Run-level dead-letter sink for terminally-failed runs/nodes. On retry exhaustion (RFC 0009), the run is routed to a durable, inspectable sink and a `run.dead_lettered` event is emitted; dead-lettered runs remain fork-eligible (RFC 0011) for the retention window. Distinct from `queueBus.deadLetterSupported`, which dead-letters transport *messages*, not *runs*.",
958
+ "required": ["supported"],
959
+ "properties": {
960
+ "supported": { "type": "boolean" },
961
+ "retentionDays": { "type": "integer", "minimum": 1, "description": "Days a dead-lettered run is retained for inspection/fork before purge." }
962
+ },
963
+ "additionalProperties": false
964
+ },
830
965
  "sql": {
831
966
  "type": "object",
832
967
  "description": "RFC 0018 (`Active`). SQL database adapter with parametric-only enforcement. Hosts MUST reject non-parametric queries that inline user input (`sql-parametric-only` invariant — guards against SQL injection across every workflow).",
@@ -910,6 +1045,44 @@
910
1045
  "required": ["supported"],
911
1046
  "additionalProperties": false
912
1047
  },
1048
+ "packs": {
1049
+ "type": "object",
1050
+ "description": "RFC 0025 (`Active`). Pack-registry surface advertisement. The baseline `/v1/packs/*` read surface (per `spec/v1/node-packs.md` §\"Registry HTTP API\") is unconditional for hosts that ship a pack catalog and does NOT require a capability flag; this object carries optional sub-blocks (currently the test-mode mirror namespace). Hosts that don't expose any optional pack-registry sub-block MAY omit this block entirely.",
1051
+ "properties": {
1052
+ "testMode": {
1053
+ "type": "object",
1054
+ "description": "RFC 0025 §A. Optional `/v1/packs-test/*` mirror surface that exposes the production publish/get/delete/sig contract against an isolated catalog. Lets the conformance suite (`pack-registry-publish.test.ts`) exercise the documented 19-code publish error catalog without `packs:publish` scope on the real registry. Hosts that advertise `supported: true` MUST honor the §C isolation guarantees and MUST surface the same error envelopes and HTTP statuses as the production `/v1/packs/*` surface.",
1055
+ "properties": {
1056
+ "supported": {
1057
+ "type": "boolean",
1058
+ "description": "Host exposes `/v1/packs-test/*` per RFC 0025 §B. When `true`, the conformance suite drives publish-error-catalog assertions through the test namespace; when `false` or absent, the 26 scenarios in `pack-registry-publish.test.ts` soft-skip cleanly."
1059
+ },
1060
+ "isolated": {
1061
+ "type": "boolean",
1062
+ "description": "RFC 0025 §C point 1. MUST be `true` when `supported` is `true` — guarantees the test catalog is persisted distinctly from the production catalog and that a pack PUT'd via `/v1/packs-test/*` MUST NOT appear in `/v1/packs/*` listings."
1063
+ },
1064
+ "catalogResetEndpoint": {
1065
+ "type": "string",
1066
+ "description": "RFC 0025 §C point 4. Optional URL path (e.g. `/v1/packs-test/reset`) that clears the entire test catalog. When advertised, conformance-suite teardown SHOULD call it; the endpoint MUST be idempotent. Hosts MAY omit; in that case the suite leaves disposable timestamped pack names in place and relies on the next host restart to clear in-memory state.",
1067
+ "pattern": "^/"
1068
+ },
1069
+ "scopes": {
1070
+ "type": "array",
1071
+ "items": {
1072
+ "type": "string",
1073
+ "enum": ["core", "vendor", "community", "private", "local"]
1074
+ },
1075
+ "uniqueItems": true,
1076
+ "minItems": 1,
1077
+ "description": "RFC 0025 §A. Which namespace scopes the test catalog accepts in pack names. Public test catalogs SHOULD refuse `private` and `local` (matching the production-registry rule for `packs.openwop.dev`); private dev catalogs MAY accept all five. When omitted, the test catalog defaults to the same scope set as the production namespace it mirrors."
1078
+ }
1079
+ },
1080
+ "required": ["supported"],
1081
+ "additionalProperties": false
1082
+ }
1083
+ },
1084
+ "additionalProperties": false
1085
+ },
913
1086
  "mcp": {
914
1087
  "type": "object",
915
1088
  "description": "RFC 0020 (`Active`). MCP (Model Context Protocol) composition surface. The client half is consumed implicitly via `host.mcp` host-surface; this block adds the optional server half — workflow exposed AS an MCP server with bidirectional sampling/elicitation bridges.",
@@ -1117,7 +1290,7 @@
1117
1290
  "type": "array",
1118
1291
  "items": { "type": "string", "minLength": 1 },
1119
1292
  "uniqueItems": true,
1120
- "description": "Auth profiles the host claims. Canonical ids: `openwop-audit-log-integrity` (auth-profiles.md §Audit-log integrity), `openwop-auth-api-key-rotation`, `openwop-auth-oauth2-client-credentials`, `openwop-auth-oidc-user-bearer`, `openwop-auth-mtls`. Clients SHOULD tolerate unknown profile ids."
1293
+ "description": "Auth profiles the host claims. Canonical ids: `openwop-audit-log-integrity` (auth-profiles.md §Audit-log integrity), `openwop-auth-api-key-rotation`, `openwop-auth-oauth2-client-credentials`, `openwop-auth-oidc-user-bearer`, `openwop-auth-mtls`, `openwop-auth-saml` + `openwop-auth-scim` + `openwop-auth-ldap` (RFC 0050 enterprise identity). Clients SHOULD tolerate unknown profile ids."
1121
1294
  },
1122
1295
  "rotation": {
1123
1296
  "type": "object",
@@ -0,0 +1,21 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://openwop.dev/spec/v1/credential-reference.schema.json",
4
+ "title": "CredentialReference",
5
+ "description": "RFC 0046. An opaque, host-issued handle to a stored credential. This is the ONLY credential artifact permitted on the wire — it NEVER carries key material. The host's host.credentials resolver dereferences it into the node sandbox at execution time (SECURITY invariant `credential-payload-redaction`).",
6
+ "type": "object",
7
+ "required": ["ref"],
8
+ "properties": {
9
+ "ref": {
10
+ "type": "string",
11
+ "minLength": 1,
12
+ "description": "Opaque host-issued identifier, e.g. `cred_a3b9c2`. Hosts MUST NOT encode secret material in the ref."
13
+ },
14
+ "scope": {
15
+ "type": "string",
16
+ "enum": ["user", "workspace", "tenant"],
17
+ "description": "Resolution scope. MUST match a scope in `capabilities.credentials.scopes`. Absent ⇒ the host's default scope."
18
+ }
19
+ },
20
+ "additionalProperties": false
21
+ }