@openwop/openwop-conformance 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/CHANGELOG.md +91 -1
  2. package/README.md +3 -2
  3. package/api/asyncapi.yaml +8 -0
  4. package/api/openapi.yaml +371 -1
  5. package/coverage.md +25 -5
  6. package/fixtures/conformance-envelope-nl-to-format-engaged.json +41 -0
  7. package/fixtures/conformance-envelope-recovery-applied.json +39 -0
  8. package/fixtures/conformance-envelope-refusal.json +38 -0
  9. package/fixtures/conformance-envelope-retry-attempted.json +39 -0
  10. package/fixtures/conformance-envelope-retry-exhausted.json +38 -0
  11. package/fixtures/conformance-envelope-truncated.json +39 -0
  12. package/fixtures/conformance-envelope-truncation-cap-exhaustion.json +39 -0
  13. package/fixtures/conformance-model-capability-insufficient.json +25 -0
  14. package/fixtures/conformance-multi-agent-confidence-escalation.json +49 -0
  15. package/fixtures/conformance-multi-agent-handoff-child.json +27 -0
  16. package/fixtures/conformance-multi-agent-handoff.json +49 -0
  17. package/fixtures/conformance-prompt-all-four-kinds.json +39 -0
  18. package/fixtures/conformance-prompt-end-to-end.json +33 -0
  19. package/fixtures/conformance-subworkflow-mid-run-mutation-child.json +31 -0
  20. package/fixtures/conformance-subworkflow-mid-run-mutation.json +33 -0
  21. package/fixtures/openwop-smoke-cost-emit.json +37 -0
  22. package/fixtures/prompt-templates/conformance-prompt-few-shot-2.json +14 -0
  23. package/fixtures/prompt-templates/conformance-prompt-few-shot.json +14 -0
  24. package/fixtures/prompt-templates/conformance-prompt-schema-hint.json +14 -0
  25. package/fixtures/prompt-templates/conformance-prompt-secret-redaction.json +23 -0
  26. package/fixtures/prompt-templates/conformance-prompt-trust-marker.json +23 -0
  27. package/fixtures/prompt-templates/conformance-prompt-writer-system.json +15 -0
  28. package/fixtures/prompt-templates/conformance-prompt-writer-user.json +15 -0
  29. package/fixtures.md +39 -0
  30. package/package.json +1 -1
  31. package/schemas/README.md +5 -0
  32. package/schemas/agent-manifest.schema.json +16 -0
  33. package/schemas/capabilities.schema.json +375 -1
  34. package/schemas/envelopes/clarification.request.schema.json +9 -0
  35. package/schemas/envelopes/error.schema.json +4 -0
  36. package/schemas/envelopes/schema.request.schema.json +4 -0
  37. package/schemas/envelopes/schema.response.schema.json +1 -1
  38. package/schemas/node-pack-manifest.schema.json +28 -0
  39. package/schemas/orchestrator-decision.schema.json +12 -0
  40. package/schemas/prompt-kind.schema.json +8 -0
  41. package/schemas/prompt-pack-manifest.schema.json +80 -0
  42. package/schemas/prompt-ref.schema.json +40 -0
  43. package/schemas/prompt-template.schema.json +149 -0
  44. package/schemas/registry-version-manifest.schema.json +5 -0
  45. package/schemas/run-ancestry-response.schema.json +54 -0
  46. package/schemas/run-event-payloads.schema.json +479 -11
  47. package/schemas/run-event.schema.json +15 -1
  48. package/schemas/run-snapshot.schema.json +3 -2
  49. package/schemas/workflow-definition.schema.json +19 -1
  50. package/src/lib/llm-cache-key-recipe.ts +68 -0
  51. package/src/scenarios/aiEnvelope.contractRefusal.test.ts +104 -13
  52. package/src/scenarios/aiEnvelope.correlationReplay.test.ts +32 -15
  53. package/src/scenarios/aiEnvelope.redaction.test.ts +6 -5
  54. package/src/scenarios/aiEnvelope.schemaDrift.test.ts +5 -5
  55. package/src/scenarios/aiEnvelope.trustBoundaryPropagation.test.ts +211 -12
  56. package/src/scenarios/aiEnvelope.universalKinds.test.ts +7 -7
  57. package/src/scenarios/blob-presign-expiry.test.ts +7 -7
  58. package/src/scenarios/cache-ttl-expiry.test.ts +6 -6
  59. package/src/scenarios/cost-attribution.test.ts +124 -11
  60. package/src/scenarios/cross-engine-append-ordering.test.ts +99 -0
  61. package/src/scenarios/cross-host-ancestry-endpoint.test.ts +136 -0
  62. package/src/scenarios/cross-host-causation-shape.test.ts +117 -0
  63. package/src/scenarios/cross-host-traceparent-propagation.test.ts +60 -0
  64. package/src/scenarios/envelope-completion-distinguishes-truncation.test.ts +223 -0
  65. package/src/scenarios/envelope-nl-to-format-engaged.test.ts +152 -0
  66. package/src/scenarios/envelope-reasoning-secret-redaction.test.ts +343 -0
  67. package/src/scenarios/envelope-reasoning-shape.test.ts +190 -0
  68. package/src/scenarios/envelope-recovery-applied.test.ts +229 -0
  69. package/src/scenarios/envelope-refusal-shape.test.ts +289 -0
  70. package/src/scenarios/envelope-retry-attempted.test.ts +258 -0
  71. package/src/scenarios/envelope-retry-exhausted.test.ts +168 -0
  72. package/src/scenarios/envelope-tier-one-subset-static.test.ts +229 -0
  73. package/src/scenarios/envelope-truncated.test.ts +136 -0
  74. package/src/scenarios/envelope-truncation-cap-exhaustion.test.ts +144 -0
  75. package/src/scenarios/envelope-variant-discriminator-static.test.ts +152 -0
  76. package/src/scenarios/fixtures-valid.test.ts +123 -15
  77. package/src/scenarios/kv-ttl-expiry.test.ts +7 -7
  78. package/src/scenarios/model-capability-insufficient.test.ts +221 -0
  79. package/src/scenarios/model-capability-substituted.test.ts +203 -0
  80. package/src/scenarios/multi-agent-confidence-escalation.test.ts +164 -0
  81. package/src/scenarios/multi-agent-handoff-state-machine.test.ts +167 -0
  82. package/src/scenarios/multi-agent-memory-lifecycle.test.ts +124 -0
  83. package/src/scenarios/multi-region-idempotency.test.ts +58 -0
  84. package/src/scenarios/node-module-required-capabilities-shape.test.ts +185 -0
  85. package/src/scenarios/prompt-all-four-kinds-events.test.ts +198 -0
  86. package/src/scenarios/prompt-composed-secret-redaction.test.ts +178 -0
  87. package/src/scenarios/prompt-composed-trust-marker.test.ts +165 -0
  88. package/src/scenarios/prompt-end-to-end-events.test.ts +202 -0
  89. package/src/scenarios/prompt-list-and-fetch.test.ts +207 -0
  90. package/src/scenarios/prompt-mutable-lifecycle.test.ts +216 -0
  91. package/src/scenarios/prompt-pack-install.test.ts +187 -0
  92. package/src/scenarios/prompt-render-deterministic.test.ts +240 -0
  93. package/src/scenarios/prompt-resolution-chain-agent-intrinsic.test.ts +140 -0
  94. package/src/scenarios/prompt-resolution-chain-fallback-cascade.test.ts +172 -0
  95. package/src/scenarios/prompt-resolution-chain-node-wins.test.ts +144 -0
  96. package/src/scenarios/prompt-template-shape.test.ts +359 -0
  97. package/src/scenarios/queue-ack-nack-dlq.test.ts +7 -7
  98. package/src/scenarios/queue-publish-consume-roundtrip.test.ts +7 -7
  99. package/src/scenarios/replay-divergence-at-refusal.test.ts +134 -0
  100. package/src/scenarios/replay-llm-cache-key-portable.test.ts +197 -0
  101. package/src/scenarios/replay-llm-cache-key.test.ts +1 -40
  102. package/src/scenarios/replay-observable-sequence-determinism.test.ts +80 -0
  103. package/src/scenarios/sandbox-capability-gate-respected.test.ts +31 -0
  104. package/src/scenarios/sandbox-memory-cap.test.ts +61 -0
  105. package/src/scenarios/sandbox-no-cross-pack-mutation.test.ts +35 -0
  106. package/src/scenarios/sandbox-no-host-env-leak.test.ts +38 -0
  107. package/src/scenarios/sandbox-no-host-fs-escape.test.ts +91 -0
  108. package/src/scenarios/sandbox-no-host-process-escape.test.ts +30 -0
  109. package/src/scenarios/sandbox-no-network-escape.test.ts +49 -0
  110. package/src/scenarios/sandbox-timeout-cap.test.ts +61 -0
  111. package/src/scenarios/search-bm25-roundtrip.test.ts +7 -7
  112. package/src/scenarios/spec-corpus-validity.test.ts +34 -6
  113. package/src/scenarios/sql-transaction-atomicity.test.ts +6 -6
  114. package/src/scenarios/stream-subscribe-from-beginning.test.ts +7 -7
  115. package/src/scenarios/subworkflow-input-mapping.test.ts +70 -4
  116. package/src/scenarios/table-cursor-pagination.test.ts +7 -7
  117. package/src/scenarios/table-schema-enforcement.test.ts +7 -7
  118. package/src/scenarios/vector-knn-roundtrip.test.ts +7 -7
package/coverage.md CHANGED
@@ -34,14 +34,22 @@
34
34
  | Audit-log integrity profile | `audit-log-integrity.test.ts` | A | Profile claim + `/v1/audit/verify` shape + checkpoint-signature verification; chain re-walk with `chainValid` and `checkpointsValid` bits. Tamper detection covered host-internally at `examples/hosts/sqlite/test/audit-tamper.test.ts` (mutate-entry + forge-signature paths). CF-11 close-out 2026-05-15: cross-host checkpoint export via `examples/hosts/postgres/src/audit-export.ts` + standalone verifier at `scripts/verify-audit-checkpoints.mjs`; `examples/hosts/postgres/test/audit-checkpoint-export.test.ts` verifies 7 paths (positive + tampered-signature + non-monotonic-atSequence). |
35
35
  | Multi-region idempotency capability | `multi-region-idempotency.test.ts` | C | Discovery enum coverage; remaining: cross-region partition simulation (requires multi-region harness). |
36
36
  | Public hosted registry (`packs.openwop.dev`) | `registry-public.test.ts` | A− | Discovery, index, and per-pack manifest assertions against the public registry. Opt-in via `OPENWOP_TEST_PUBLIC_REGISTRY=true` so default conformance runs don't depend on outbound `packs.openwop.dev` reachability. Remaining: tarball-fetch + signature-verify roundtrip. |
37
- | Workflow-chain packs (RFC 0013 — `spec/v1/workflow-chain-packs.md`) | `workflow-chain-pack-manifest-validation.test.ts`, `workflow-chain-pack-signature-verification.test.ts`, `workflow-chain-expansion.test.ts`, `workflow-chain-unresolvable-typeid.test.ts`, `workflow-chain-host-expansion.test.ts` | A (4 server-free + 1 live-host) | RFC 0013 promoted Draft → Active 2026-05-18; live-host gate (`workflow-chain-host-expansion.test.ts`) added 2026-05-18 closing the Phase 3 acceptance criterion. The 4 server-free scenarios exercise the pure-library algorithm; the new live-host scenario exercises the reference in-memory host's HTTP wrapper (`POST /v1/host/sample/workflow-chain:expand`) under `OPENWOP_REQUIRE_BEHAVIOR=true`, gated on `capabilities.workflowChainPacks.supported: true`. 6 cases — discovery advertisement / 1-node positive / 2-node positive with edges / unknown-pack 404 / unknown-chain 404 / malformed-body 422. RFC promotes to `Accepted` once the live-host scenario passes against a published host run. |
38
- | AI Envelope (FINAL v1.1 — `spec/v1/ai-envelope.md`, RFC 0021) | `aiEnvelope.universalKinds.test.ts`, `aiEnvelope.schemaDrift.test.ts`, `aiEnvelope.correlationReplay.test.ts`, `aiEnvelope.contractRefusal.test.ts`, `aiEnvelope.trustBoundaryPropagation.test.ts`, `aiEnvelope.redaction.test.ts`, `aiEnvelope.capBreached.test.ts`, `ai-envelope-shape.test.ts` | B (shape + 8 behavioral accept-pipeline assertions) | DRAFT v1.x gap-closure landed 2026-05-17; RFC 0021 promotion to FINAL v1.1 landed 2026-05-18. Closes the long-standing gap where 8 v1 surfaces already referenced AI Envelopes (`Capabilities.supportedEnvelopes` + `schemaVersions` + the three per-turn limits; `host.aiEnvelope.generate`; `envelopeType` on workflow-chain packs; `profiles.md` derivation; `host-extensions.md` namespacing; `positioning.md`; reference host discovery) but the envelope's own wire shape, universal kinds, schema discipline, and Envelope Contract gate were never specified. The 7 `aiEnvelope.*` advertisement-shape probes (gated on `capabilities.envelopeContracts.advertised: true`) cover the host's CAPABILITY claim. `ai-envelope-shape.test.ts` adds 8 BEHAVIORAL assertions through the workflow-engine sample's env-gated `POST /v1/host/sample/envelope/accept` seam: accepted / invalid (shape) / invalid (ISO 8601 timestamp) / gated (vendor kind not advertised) / breached (counter at cap) / universal-kind-always-allowed / normalizedMeta.contentTrust propagation from runTrustBoundary / envelope-supplied contentTrust precedence over runTrustBoundary. Behavior-unlock dependency for the remaining `aiEnvelope.*` `it.todo()` blocks: a reference host wires schema drift + correlationId-replay short-circuit + redaction-carry-forward + contract-refusal mappings + trust-boundary propagation onto RunEventDocs. |
37
+ | Workflow-chain packs (RFC 0013 — `spec/v1/workflow-chain-packs.md`) | `workflow-chain-pack-manifest-validation.test.ts`, `workflow-chain-pack-signature-verification.test.ts`, `workflow-chain-expansion.test.ts`, `workflow-chain-unresolvable-typeid.test.ts`, `workflow-chain-host-expansion.test.ts` | A (4 server-free + 1 live-host) | RFC 0013 promoted Draft → Active → Accepted 2026-05-18 once the live-host gate (`workflow-chain-host-expansion.test.ts`) passed against the reference in-memory host. The 4 server-free scenarios exercise the pure-library algorithm; the live-host scenario exercises the host's HTTP wrapper (`POST /v1/host/sample/workflow-chain:expand`) under `OPENWOP_REQUIRE_BEHAVIOR=true`, gated on `capabilities.workflowChainPacks.supported: true`. 6 cases — discovery advertisement / 1-node positive / 2-node positive with edges / unknown-pack 404 / unknown-chain 404 / malformed-body 422. The matching spec doc `workflow-chain-packs.md` remains at `DRAFT v1.x` pending Phase B/C closure (parameter schema validation + cross-host expansion equivalence). |
38
+ | AI Envelope (FINAL v1.1 — `spec/v1/ai-envelope.md`, RFC 0021) | `aiEnvelope.universalKinds.test.ts`, `aiEnvelope.schemaDrift.test.ts`, `aiEnvelope.correlationReplay.test.ts`, `aiEnvelope.contractRefusal.test.ts`, `aiEnvelope.trustBoundaryPropagation.test.ts`, `aiEnvelope.redaction.test.ts`, `aiEnvelope.capBreached.test.ts`, `ai-envelope-shape.test.ts` | A− (shape + ~84 live behavioral assertions across all 8 files via the envelope-accept seam) | DRAFT v1.x gap-closure landed 2026-05-17; RFC 0021 promotion to FINAL v1.1 landed 2026-05-18. Closes the long-standing gap where 8 v1 surfaces already referenced AI Envelopes (`Capabilities.supportedEnvelopes` + `schemaVersions` + the three per-turn limits; `host.aiEnvelope.generate`; `envelopeType` on workflow-chain packs; `profiles.md` derivation; `host-extensions.md` namespacing; `positioning.md`; reference host discovery) but the envelope's own wire shape, universal kinds, schema discipline, and Envelope Contract gate were never specified. The 7 `aiEnvelope.*` advertisement-shape probes (gated on `capabilities.envelopeContracts.advertised: true`) cover the host's CAPABILITY claim. All 8 files now also run behavioral assertions through the workflow-engine sample's env-gated `POST /v1/host/sample/envelope/accept` seam (drained 2026-05-19): schema-drift refusal under strict mode; correlationId-replay short-circuit + persisted `priorCorrelations` store survives process restart; BYOK redaction-carry-forward returning `redactedPayload` + `redactionCount`; contract-refusal mappings via the capability-toggle seam; trust-boundary propagation from MCP/A2A; cap-breached counters; universal-kind always-allowed; `ai-envelope-shape` end-to-end accept-pipeline. Path to `Accepted` (host-side): live downstream-projection coverage on a published host (OTel scrape + debug-bundle export currently soft-skip on hosts that don't expose those seams). |
39
+ | Envelope `reasoning` field + Tier-1 subset (RFC 0030 — `spec/v1/ai-envelope.md` §"Reasoning field (normative)", `spec/v1/structured-output-subset.md`) | `envelope-reasoning-shape.test.ts`, `envelope-reasoning-secret-redaction.test.ts`, `envelope-tier-one-subset-static.test.ts` | A (shape + load-bearing Tier-1 static checks always-on; 8 live behavioral assertions in `envelope-reasoning-secret-redaction` via the envelope-accept seam, including the OTel + debug-bundle downstream projections) | RFC 0030 promoted Draft → Active 2026-05-20. `envelope-reasoning-shape` (always-on) asserts the OPTIONAL `reasoning` property posture on the three universal-kind schemas + the `schema.response` deliberate omission + with/without backward-compat round-trips + `capabilities.envelopes.reasoning.{supported,promptDirective}` + `tierOneSubsetCompliance` advertisement shape. `envelope-tier-one-subset-static` enforces the load-bearing Tier-1 subset (no `oneOf` / `allOf` / `not` / `prefixItems` / `propertyNames` anywhere — Gemini silently drops these) on every universal-kind schema as always-on; the OpenAI-strict-only constraints (`minLength` / `maxLength` / `minItems` etc.) are checked only under host-advertised `tierOneSubsetCompliance: "strict"` to honor the universal-kind schemas' pre-RFC-0030 open-bag design. `envelope-reasoning-secret-redaction` (capability-gated on `reasoning.supported` + `secrets.supported`) carries 8 live behavioral assertions for SECURITY invariant `envelope-reasoning-secret-redaction`: BYOK canary substitution with the canonical `[REDACTED:<secretId>]` marker on `reasoning`; recursive walk across `reasoning` + sibling fields; passthrough when no canary matches; canary detection inside `clarification.request.reasoning`; downstream OTel-span scrape + debug-bundle export both confirm no canary plaintext leaks (soft-skip on hosts that don't expose those seams); non-routing-on-reasoning invariant (acceptor's routing decision MUST NOT depend on `reasoning` contents). Reference host advertises `capabilities.envelopes.reasoning: { supported: true, promptDirective: "off" }` + `tierOneSubsetCompliance: "warn"`. Path to `Accepted`: reference host injects the system-prompt directive instructing the model to populate `reasoning` (promotes `promptDirective` → `"advisory"`). |
40
+ | Envelope variant discrimination + model capabilities (RFC 0031 — `spec/v1/ai-envelope.md` §"Variant payload discrimination (normative)", `spec/v1/host-capabilities.md` §"Model-capability declarations", `spec/v1/node-packs.md` §"Model-capability declarations on NodeModules") | `envelope-variant-discriminator-static.test.ts`, `model-capability-substituted.test.ts`, `model-capability-insufficient.test.ts`, `node-module-required-capabilities-shape.test.ts` | B+ (discriminator-static + advertisement-shape always-on; 14 live behavioral assertions across substitution + insufficient + authoring-convention, capability-gated) | RFC 0031 promoted Draft → Active 2026-05-20. `envelope-variant-discriminator-static` (always-on) walks every `schemas/envelopes/*.schema.json` asserting no `oneOf` at any nesting depth (Gemini silently drops `oneOf`, producing looser-than-declared schemas — a silent correctness bug) AND every `anyOf` branch declares a single-string-`enum` discriminator in `required` per RFC 0031 §A. `model-capability-substituted` (capability-gated on `capabilities.modelCapabilities.supported` + `substitutionSupported`) carries advertisement-shape check on the `advertised: string[]` pattern (each identifier matches the spec-reserved set OR `^x-host-<host>-<key>$` per RFC 0031 §C) + 4 live behavioral assertions covering substitution emission + SECURITY invariant `model-capability-substituted-no-credential-disclosure`'s all-or-nothing `"[REDACTED]"` redaction option. `model-capability-insufficient` (capability-gated on `modelCapabilities.supported`) carries 6 live behavioral assertions covering refusal emission paths + the no-recursive-fallback constraint (RFC 0031 §"Unresolved questions" #3 — `fallbackAttempted: true` when the declared fallback itself fails; NO chaining). `node-module-required-capabilities-shape` (SHOULD-tier authoring convention check) carries 4 live assertions for the `core.ai.*` typeId-pattern recommendation. Path to `Accepted`: reference host implements `executor/modelCapabilityGate.ts` end-to-end + advertises `capabilities.modelCapabilities: { supported: true, advertised: [...], substitutionSupported: true }` (the live behavioral assertions soft-skip cleanly on hosts that haven't wired the executor yet). |
41
+ | Envelope-reliability run-event vocabulary (RFC 0032 — `spec/v1/ai-envelope.md` §"Envelope-reliability events" + line-448 scope clarification, `spec/v1/observability.md` §"Envelope-reliability events (RFC 0032)") | `envelope-retry-attempted.test.ts`, `envelope-retry-exhausted.test.ts`, `envelope-refusal-shape.test.ts`, `envelope-truncated.test.ts`, `envelope-nl-to-format-engaged.test.ts`, `envelope-recovery-applied.test.ts` | B (1 shared advertisement-shape probe with MUST-events enforcement; 34 live behavioral assertions across the six events, all capability- + fixture-gated) | RFC 0032 promoted Draft → Active 2026-05-20. Carries the central `ai-envelope.md` line-448 scope clarification (per-kind routing events forbidden; cross-kind operational events permitted via RFC). `envelope-retry-attempted` carries the shared advertisement-shape probe: when `capabilities.envelopes.reliability.supported: true`, the host MUST list both `envelope.retry.exhausted` AND `envelope.refusal` in `events[]` (the two MUST-tier events per RFC 0032 §C); `maxRetryAttempts` MUST be in `[1, 16]`. The six scenarios collectively carry 34 live behavioral assertions (drained 2026-05-19 via the conformance `mock` provider + `POST /v1/host/sample/test/mock-ai/program` seam): retry on schema-violation + retry on truncation + retry-exhausted terminal failure + provider refusal (no-retry MUST per RFC 0032 §B.3 + RFC 0033 §D) + truncation cut-off + NL-to-Format escalation (Tam et al. mitigation per arXiv 2408.02442) + lenient-parsing recovery + SECURITY invariants `envelope-refusal-no-prompt-leak` (BYOK + prompt-content redaction on `refusalText`) and `envelope-recovery-no-content-leak` (no pre-recovery substrings in the event payload). Path to `Accepted`: reference host implements `executor/envelopeReliability.ts` end-to-end + advertises `capabilities.envelopes.reliability: { supported: true, events: [...], maxRetryAttempts: <n> }` (the behavioral assertions already pass against the reference host's end-to-end emission path under `OPENWOP_ENVELOPE_RELIABILITY_END_TO_END=true`; the no-flag default still soft-skips). |
42
+ | Envelope-completion retry routing (RFC 0033 — `spec/v1/ai-envelope.md` §"Envelope-completion criteria", `spec/v1/observability.md` §"Envelope-completion retry routing (RFC 0033)") | `envelope-completion-distinguishes-truncation.test.ts`, `envelope-truncation-cap-exhaustion.test.ts` | B− (1 advertisement-shape probe on `completion.{distinguishesTruncation, truncationBudgetMultiplier}`; 9 live behavioral assertions across the two retry paths + the DoS-bound assertion) | RFC 0033 promoted Draft → Active 2026-05-20. Closes `spec/v1/ai-envelope.md` §"Open spec gaps" E5 (refusal-mode + retry-policy interaction). Reuses RFC 0032's event vocabulary; introduces NO new event types. `envelope-completion-distinguishes-truncation` (capability-gated on `completion.distinguishesTruncation: true`) carries 5 live behavioral assertions covering both retry paths — truncation MUST increase output budget (RECOMMENDED 2× per `truncationBudgetMultiplier`) WITHOUT a corrective fragment; schema-violation MUST add a corrective fragment WITHOUT a budget change. `envelope-truncation-cap-exhaustion` carries 4 live behavioral assertions covering the DoS-bound assertion (truncation retries count against `Capabilities.limits.schemaRounds`; exhaustion → `envelope.retry.exhausted { finalReason: "truncation" }` + `cap.breached { kind: "schema" }` + node fails with NEW error code `envelope_truncation_unrecoverable` per RFC 0033 §F). All 9 assertions are fixture- + capability-gated against the conformance `mock` provider via `POST /v1/host/sample/test/mock-ai/program`. Path to `Accepted`: reference host implements the truncation-vs-schema-violation retry-routing branch end-to-end (`executor/envelopeReliability.ts` + `stop_reason` inspection in `aiProviders/aiProvidersHost.ts`) + advertises `capabilities.envelopes.reliability.completion.distinguishesTruncation: true`. |
43
+ | Multi-agent execution model + handoff state machine (RFC 0037 Phase 1 — `spec/v1/multi-agent-execution.md`) | `multi-agent-handoff-state-machine.test.ts` | B (1 advertisement-shape probe + 1 behavioral 4-event causation-chain assertion against the parent+child fixture pair) | RFC 0037 Phase 1 filed Draft → promoted Active 2026-05-21 after spec + schema + scenario landed atomically. Advertisement-shape probe asserts `capabilities.multiAgent.executionModel.{supported, version ∈ [1,4]}` when present. Behavioral assertion drives the `conformance-multi-agent-handoff` parent + `conformance-multi-agent-handoff-child` fixture pair: runs the supervisor → next-worker → child completed loop and asserts the 4 `core.workflowChain.event` records appear in the exact phase sequence `dispatch.began → dispatch.succeeded → child.completed → output.harvested` with each event's `causationId === prior.eventId` and `dispatch.began.causationId === runOrchestrator.decided.eventId`, plus `output.harvested.harvestedKeys === ['parentResult']` (proves the spec §"Transition events" table on real wire). Reference workflow-engine advertises + emits end-to-end when `OPENWOP_MULTI_AGENT_EXECUTION_MODEL=true`; the no-flag default soft-skips honestly. Path to `Accepted`: non-steward host advertises + the behavioral assertion passes against it. |
44
+ | Multi-agent Phase 2 confidence-floor escalation (RFC 0039 — `spec/v1/multi-agent-execution.md` §"Confidence escalation") | `multi-agent-confidence-escalation.test.ts` | B (1 advertisement-shape probe on `confidenceEscalationFloor` + 1 behavioral assertion against the low-confidence fixture) | RFC 0039 Phase 2 filed Draft → promoted Active 2026-05-22 after the confidence-floor half landed end-to-end. Advertisement-shape probe asserts `capabilities.multiAgent.executionModel.confidenceEscalationFloor` (when present) is a number in `[0.5, 1.0]`; values below the spec floor are non-conformant. Behavioral assertion drives the `conformance-multi-agent-confidence-escalation` fixture (supervisor `mockDispatchPlan` carries one decision with `confidence: 0.3`) and asserts: parent reaches `waiting-clarification` (NOT `completed` because no dispatch fired); exactly ONE `core.workflowChain.confidence-escalated` event with `payload.confidence === 0.3`, `payload.floor ∈ [0.5, 1.0]`, `payload.escalationKind ∈ {clarify, escalate}`; causationId chains back to the `runOrchestrator.decided` event; ZERO `core.workflowChain.event` records (the load-bearing distinction from Phase 1 — confidence floor MUST fire BEFORE any dispatch.began). Reference workflow-engine advertises `version: 2` + `confidenceEscalationFloor: 0.5` when both `OPENWOP_MULTI_AGENT_EXECUTION_MODEL=true` AND `OPENWOP_MULTI_AGENT_EXECUTION_MODEL_PHASE_2=true` are set; floor tunable via `OPENWOP_MULTI_AGENT_CONFIDENCE_FLOOR`. Path to `Accepted`: non-steward host advertises `version: 2` + the behavioral assertion passes against it. Memory-lifecycle half of RFC 0039 (MAE-2/3) remains explicit follow-up: `crossChildMemoryConcurrency` capability field is schema-landed but the host's MemoryAdapter doesn't yet implement either contract. |
45
+ | Sandbox execution contract (RFC 0035 — `spec/v1/host-capabilities.md` §"Sandbox execution contract") | `sandbox-no-host-fs-escape.test.ts`, `sandbox-no-host-env-leak.test.ts`, `sandbox-no-network-escape.test.ts`, `sandbox-no-host-process-escape.test.ts`, `sandbox-memory-cap.test.ts`, `sandbox-timeout-cap.test.ts`, `sandbox-capability-gate-respected.test.ts`, `sandbox-no-cross-pack-mutation.test.ts` | C+ (advertisement-shape probes always-on; 8 capability-gated behavioral stubs scaffolded; soft-skip on hosts that don't advertise `capabilities.sandbox.supported`) | RFC 0035 promoted Draft → Active 2026-05-21. 8 scenarios, one per `node-pack-sandbox-*` invariant in `SECURITY/invariants.yaml`. Behavioral assertions remain stubbed with `expect(true).toBe(true)` + docstring expected-wire-shape pending the synthetic `vendor.openwop.misbehaving-sandbox` pack + a first sandbox-executing reference host. Path to `Accepted`: first sandbox-executing host advertises + implements the 8 failure-mode invariants + the 8 scenarios pass; at that point the 8 `node-pack-sandbox-*` SECURITY rows graduate from `reference-impl` → `protocol` tier per RFC 0035 §"Acceptance criteria." |
46
+ | Multi-region idempotency + cross-engine append-ordering (RFC 0036 — `spec/v1/idempotency.md` §"`multiRegion` sub-block", `spec/v1/replay.md` §"Cross-region replay") | `multi-region-idempotency.test.ts`, `cross-engine-append-ordering.test.ts` | C+ (2 categorical-shape probes always-on + 1 granular `multiRegion` shape probe + 1 `crossEngineOrdering` shape probe; behavioral assertions deferred to simulator landing per RFC 0036 §C) | RFC 0036 promoted Draft → Active 2026-05-21. The existing `multi-region-idempotency.test.ts` covers the categorical `capabilities.idempotency.crossRegion ∈ {single-region, best-effort, strict}` claim plus the matching operator-tier metric names; a third describe block added 2026-05-21 covers the granular `capabilities.idempotency.multiRegion.{supported, replicationLagBoundMs, partitionRecoveryStrategy}` advertisement shape (`replicationLagBoundMs ∈ [0, 60000]`; `partitionRecoveryStrategy ∈ {last-writer-wins, first-writer-wins}` OR `^x-host-<host>-<key>$`). NEW `cross-engine-append-ordering.test.ts` covers `capabilities.eventLog.crossEngineOrdering.{supported, orderingModel ∈ {lamport, vector-clock, global-sequencer}}` shape. Behavioral two-engine-append-then-cross-read assertion deferred until the Postgres reference host's multi-region simulator lands per RFC 0036 §C. Path to `Accepted`: simulator + behavioral conformance pass against the reference host; non-steward host advertises the same. |
39
47
 
40
48
  ---
41
49
 
42
50
  ## Capability-gated scenarios: shape vs behavior
43
51
 
44
- Seventeen scenarios (or scenario groups) validate optional profiles where the host's discovery advertisement is well-formed (shape grade) but no reference host yet implements the profile end-to-end (behavior grade is `host-pending`). Default suite runs skip these with a warning; set `OPENWOP_REQUIRE_BEHAVIOR=true` to convert skips into hard failures.
52
+ Twenty-two scenario groups validate optional profiles where the host's discovery advertisement is well-formed (shape grade) but no reference host yet implements the profile end-to-end (behavior grade is `host-pending`). Default suite runs skip these with a warning; set `OPENWOP_REQUIRE_BEHAVIOR=true` to convert skips into hard failures.
45
53
 
46
54
  | Scenario | Profile / capability | Shape grade | Behavior grade | Behavior-unlock dependency |
47
55
  |---|---|---|---|---|
@@ -69,6 +77,11 @@ Seventeen scenarios (or scenario groups) validate optional profiles where the ho
69
77
  | `blob-cross-tenant-isolation.test.ts`, `cache-cross-tenant-isolation.test.ts` (two scenarios) | `capabilities.blobStorage` + `capabilities.cache` (RFC 0019, `host-blob-cache-capability.md`) | A (advertisement shape + behavioral cross-tenant put/get isolation for both surfaces) | host-pass via opt-in test seam | Same seam dependency as kv row. |
70
78
  | `sql-injection-rejection.test.ts` | `capabilities.sql` (RFC 0018, `host-sql-vector-search-capability.md`) + `SECURITY/invariants.yaml` `sql-parametric-only` | A (advertisement shape + parametric round-trip + injection-shape input bound as literal returns 0 rows) | host-pass via opt-in test seam | Same seam dependency as kv row. |
71
79
  | `mcp-server-tool-roundtrip.test.ts`, `mcp-server-resource-roundtrip.test.ts`, `mcp-server-prompt-roundtrip.test.ts`, `mcp-server-sampling-bridge.test.ts`, `mcp-server-elicitation-bridge.test.ts`, `mcp-server-untrusted-args.test.ts` (six scenarios) | `capabilities.mcp.serverMount` (RFC 0020, `mcp-integration.md` §"OpenWOP host as MCP server") + `SECURITY/invariants.yaml` `mcp-server-untrusted-args` | A (advertisement shape + JSON-RPC tools/list+tools/call roundtrip, resources/list+read, prompts/list+get, sampling/elicitation bridge dispatch, Ajv2020 args validation rejecting with -32602 before workflow start) | host-pass via opt-in MCP server mount | Reference host exposes `POST /v1/host/sample/mcp` env-gated on `OPENWOP_MCP_SERVER_ENABLED=true`; hosts that don't expose the mount soft-skip the behavioral assertions and verify advertisement shape only. |
80
+ | `prompt-end-to-end-events.test.ts`, `prompt-resolution-chain-node-wins.test.ts`, `prompt-resolution-chain-fallback-cascade.test.ts` (three scenarios) | `prompts-supported` profile — gates on `capabilities.prompts.supported: true` (RFC 0027 + RFC 0029, `spec/v1/prompts.md`) | A (advertisement shape always + end-to-end resolve + emit during real workflow dispatch; resolution chain Layers 1, 3, 4 exercised) | host-pass (workflow-engine reference) | Reference host advertises `capabilities.prompts.supported: true` since RFC 0027 ref-impl landed; dispatch wiring in `bootstrap/nodes.ts` walks the resolution chain and emits `agent.promptResolved` + `prompt.composed` per spec/v1/prompts.md §"Composition + observability". |
81
+ | `prompt-pack-install.test.ts`, `prompt-list-and-fetch.test.ts`, `prompt-render-deterministic.test.ts` (three scenarios) | `prompts-endpoints` profile — gates on `capabilities.prompts.endpointsSupported: true` (RFC 0028 §A, `spec/v1/prompts.md` §"Discovery & distribution") | A (advertisement shape always + list/get/render contract + pack-source provenance stamps + ETag honoring when supported) | host-pass (workflow-engine reference) | Reference host serves the six `/v1/prompts*` routes via `routes/prompts.ts` against the in-memory `PromptStore`. Pack-install existence claim opt-in via `OPENWOP_TEST_PROMPT_PACK_INSTALLED=true` (the in-tree `vendor.openwop.prompt-sample` pack auto-installs via `promptPackLoader.ts`). |
82
+ | `prompt-mutable-lifecycle.test.ts` | `prompts-mutable` profile — gates on `capabilities.prompts.mutableLibrary: true` (RFC 0028 §C) | A (advertisement shape + CRUD lifecycle + pack/host source 403-on-mutation) | host-pass (workflow-engine reference) | Reference host advertises `mutableLibrary: true`; user-source templates accepted, pack + host-built-in templates return 403 on POST/PUT/DELETE. |
83
+ | `prompt-resolution-chain-agent-intrinsic.test.ts` | `prompts-agent-bindings` profile — gates on `capabilities.prompts.agentBindings: true` (RFC 0029 §A Layer 2) | A (advertisement shape + Layer 2 agent intrinsic / overrides / library-default precedence over Layers 3-4) | host-pass (workflow-engine reference) | Reference host advertises `agentBindings: true` so Layer 2 sub-layers (agent-intrinsic / agent-overrides / agent-library-default) walk per RFC 0029 §B. |
84
+ | `prompt-composed-secret-redaction.test.ts`, `prompt-composed-trust-marker.test.ts` (two scenarios) | `prompts-observability-full` profile — gates on `prompts.supported + observability: "full"` (RFC 0027 §E + RFC 0020 §D) + `SECURITY/invariants.yaml` `prompt-composed-secret-redaction` + `prompt-composed-trust-marker` | A (advertisement shape + `[REDACTED:<credentialRef>]` markers for secret-source bindings + `<UNTRUSTED>...</UNTRUSTED>` wrapping + `contentTrust: "untrusted"` propagation) | host-pass (workflow-engine reference) | Reference host advertises `observability: "full"` (sourced from `host/promptHostConfig.ts`). Composition pipeline in `host/promptCompose.ts` enforces SR-1 carry-forward + untrusted-content marker per `SECURITY/threat-model-secret-leakage.md` §SR-1. |
72
85
 
73
86
  Strict-mode runner usage:
74
87
 
@@ -76,7 +89,7 @@ Strict-mode runner usage:
76
89
  OPENWOP_REQUIRE_BEHAVIOR=true npx vitest run
77
90
  ```
78
91
 
79
- The flag is read at scenario startup via `conformance/src/lib/env.ts` → `loadEnv().requireBehavior`. Scenarios use the `behaviorGate(profileName, advertised)` helper from `conformance/src/lib/behavior-gate.ts` so the strict-mode failure message cites the relevant spec section. `audit-log-integrity.test.ts` is the worked example as of 2026-05-11; the remaining nine scenarios will adopt the helper as their host-side profiles land (tracked in `docs/PROTOCOL-GAP-CLOSURE-PLAN.md` Phase-1 tracks T1.1 onward).
92
+ The flag is read at scenario startup via `conformance/src/lib/env.ts` → `loadEnv().requireBehavior`. Scenarios use the `behaviorGate(profileName, advertised)` helper from `conformance/src/lib/behavior-gate.ts` so the strict-mode failure message cites the relevant spec section. The wired-and-passing examples (host-pass behavior grade) include `audit-log-integrity.test.ts` (landed 2026-05-11), the 5 host-capability-surface families gated on the `OPENWOP_TEST_SEAM_ENABLED=true` test seam, and the prompt-* family of 10 scenarios across the five `prompts-*` profile names (landed 2026-05-20). The remaining `host-pending` rows in the table above adopt the helper as their host-side profiles land (tracked in `docs/PROTOCOL-GAP-CLOSURE-PLAN.md` Phase-1 tracks T1.1 onward). Hosts that deliberately don't implement a profile can list it in `OPENWOP_OPTED_OUT_PROFILES=name1,name2,...` to skip even in strict mode — minimal hosts can claim full strict-mode coverage without falsifying advertisements.
80
93
 
81
94
  ---
82
95
 
@@ -96,6 +109,7 @@ Every OpenAPI operation should have:
96
109
  | `getWorkflow` | `route-coverage.test.ts`; fixture-dependent lifecycle tests indirectly require seeded workflow IDs | `route-coverage.test.ts` covers unknown workflow `404`/`403` envelope | Good. |
97
110
  | `createRun` | `runs-lifecycle.test.ts`, `identity-passthrough.test.ts`, `failure-path.test.ts`, fixture scenarios | `auth.test.ts`, `errors.test.ts`, `idempotency.test.ts`, `idempotencyRetry.test.ts` | Strong baseline; add per-field validation matrix. |
98
111
  | `getRun` | Lifecycle, cancellation, interrupt, replay, and subworkflow tests poll snapshots | `failure-path.test.ts`, `errors.test.ts` | Add explicit unknown-run `404` scenario if not already covered through helper assertions. |
112
+ | `getRunAncestry` | `cross-host-ancestry-endpoint.test.ts`, `cross-host-causation-shape.test.ts` (RFC 0040 §C); capability-gated on `capabilities.multiAgent.executionModel.crossHostCausation.ancestryEndpointSupported` | Unadvertised-host 404 path + top-level `parent: null` shape covered | Add positive multi-hop traversal once a reference host implements end-to-end cross-host composition. |
99
113
  | `streamRunEvents` | `stream-modes.test.ts`, `stream-modes-buffer.test.ts`, `stream-modes-mixed.test.ts`, `streamReconnect.test.ts` | Unsupported mode and invalid buffer assertions | Add long-running proxy timeout soak outside fast CI. |
100
114
  | `pollRunEvents` | `multi-node-ordering.test.ts`, `version-negotiation.test.ts`, redaction tests | Past-end and validation assertions | Good. Add malformed `lastSequence` if missing. |
101
115
  | `cancelRun` | `cancellation.test.ts` | Unknown/terminal idempotency cases partial | Add explicit already-terminal cancel behavior. |
@@ -110,6 +124,12 @@ Every OpenAPI operation should have:
110
124
  | `getArtifact` | Indirect through approval payload fixtures | `route-coverage.test.ts` covers unknown artifact `404`/`403` envelope; `artifact-auth.test.ts` (CF-4 close-out 2026-05-15; SQLite host 401-before-404 stub landed 2026-05-19, closes the info-leak surface for every HTTP method) covers `401` unauthenticated path | Negative paths covered (401 + 405 non-GET + 404/403) | Add positive artifact-read scenario once a reference host implements `getArtifact` end-to-end. |
111
125
  | `registerWebhook` | Webhook spec exists | `route-coverage.test.ts` covers invalid URL validation envelope | Add positive registration with a test receiver when harness support exists. |
112
126
  | `unregisterWebhook` | Webhook spec exists | `route-coverage.test.ts` covers unknown subscription behavior | Add full register-then-unregister roundtrip with a test receiver. |
127
+ | `listPromptTemplates` | `prompt-template-shape.test.ts` covers schema shape + advertisement contract for `capabilities.prompts.*`; capability-gated behavioral list-with-filter scenarios deferred to RFC 0028 acceptance gate | n/a yet — endpoint surface in spec only (RFC 0028 Draft); reference host hasn't implemented the route yet | Add positive list-with-filter scenario + auth-failure + invalid-cursor scenarios once a reference host implements the route. |
128
+ | `createPromptTemplate` | None — endpoint surface in spec only (RFC 0028 Draft) | n/a yet | Add positive create + `409` duplicate + `501` not-mutable-library + auth/scope scenarios once a reference host implements the route. |
129
+ | `getPromptTemplate` | None — endpoint surface in spec only | n/a yet | Add positive fetch + `404` unknown + `400` ambiguous-libraryId + `ETag` revalidation scenarios. |
130
+ | `updatePromptTemplate` | None — endpoint surface in spec only | n/a yet | Add positive update + `409` non-monotonic-version + `403` pack-sourced-readonly + `501` not-mutable-library scenarios. |
131
+ | `deletePromptTemplate` | None — endpoint surface in spec only | n/a yet | Add positive delete + `403` pack-sourced-readonly + `404` unknown + `501` not-mutable-library scenarios. |
132
+ | `renderPromptTemplate` | `prompt-composed-secret-redaction.test.ts` + `prompt-composed-trust-marker.test.ts` exercise the compose pipeline via the `/v1/host/sample/prompt/compose` host-extension seam; capability-gated. The spec'd `POST /v1/prompts:render` endpoint shares the same composition pipeline (RFC 0028 §A deterministic-render invariant matches RFC 0027 §F replay invariant). | Composition redaction + trust-marker invariants covered via the seam | Add positive `:render` via the spec'd endpoint + `400 prompt_variable_unresolved` + `404 template_not_found` once a reference host implements the route. |
113
133
 
114
134
  ---
115
135
 
@@ -129,4 +149,4 @@ Every OpenAPI operation should have:
129
149
  | P2 | End-to-end webhook signed-delivery test exercising `X-openwop-Signature-Algorithm: v1`. | `webhooks.md` |
130
150
  | P2 | Conformance scenarios that cite normative RFC docs (not just schemas) for the multi-agent surfaces. | RFCS/0002–0007 |
131
151
  | ✅ Closed 2026-05-17 (HV-1) | `agentPackHandoffSchemaValidation.test.ts` verifies RFC 0003 §D — host MUST validate dispatch payloads against `handoff.taskSchemaRef` AND return payloads against `handoff.returnSchemaRef`. Fixture `conformance-agent-pack-handoff-schema-validation` exercises 3 branches (valid-task / invalid-task / mock-return-violation). Capability-gated on `capabilities.agents.{supported,dispatch}`. | RFCS/0003-agent-packs.md §D |
132
- | Placeholders 2026-05-18 (HVMAP-1/2) | RFC 0022 (`Draft`) ships 4 `it.todo()` placeholder scenarios: `dispatch-input-mapping.test.ts`, `dispatch-output-mapping.test.ts`, `dispatch-cross-worker-handoff.test.ts`, `subworkflow-input-mapping.test.ts`. Fixtures: `conformance-dispatch-input-mapping`, `-output-mapping`, `-cross-worker-handoff`, `conformance-subworkflow-input-mapping`. Promote to live assertions when RFC 0022 reaches `Active` AND a reference host advertises `capabilities.agents.dispatchMapping` and/or `capabilities.subWorkflow.inputMapping`. | RFCS/0022-dispatch-input-output-mapping.md §A + §B |
152
+ | Closed 2026-05-19 (HVMAP-1/2) | RFC 0022 conformance fully landed across two cycles: 2026-05-18 promoted HVMAP-1a/1b/1c/2 happy paths from `it.todo()` to live behavioral tests against the Postgres reference host (advertising `capabilities.agents.dispatchMapping: true` + `capabilities.subWorkflow.inputMapping: true`); 2026-05-19 promoted all remaining negative-path cases — HVMAP-1a-null, HVMAP-1a-refusal, HVMAP-1b-failed, HVMAP-1b-cancelled, HVMAP-1c-override, HVMAP-2-unset, HVMAP-2-no-midrun-propagation, HVMAP-2-refusal via the new capability-toggle seam (`conformance/src/lib/host-toggle.ts` + `POST /v1/host/sample/test/capability-toggle`) and 5 new fixtures (`-no-default` variants, `-per-worker-override`, `-deterministic-fail-child`, `-cancellable-child`). Republished as `@openwop/openwop-conformance@1.3.0`. | RFCS/0022-dispatch-input-output-mapping.md §A + §B + §C |
@@ -0,0 +1,41 @@
1
+ {
2
+ "id": "conformance-envelope-nl-to-format-engaged",
3
+ "name": "Conformance: envelope.nlToFormat.engaged (RFC 0032 §B.5)",
4
+ "version": "1.0",
5
+ "description": "Drives `core.ai.structuredOutput` against the conformance-only `mock` provider. The conformance scenario POSTs a 3-entry program: all three attempts return natural-language prose (no JSON sigil at the start). The host's `dispatchStructured` retry loop exhausts on parse-error, detects the NL shape, and fires ONE additional dispatch with a corrective coercion fragment — emitting `envelope.nlToFormat.engaged { originalEnvelopeType, fallbackCalls: 1 }` BEFORE the secondary call. The pre-seeded 4th program entry returns valid JSON, the schema validates, and the run terminates `completed`.",
6
+ "nodes": [
7
+ {
8
+ "id": "structured-call",
9
+ "typeId": "core.ai.structuredOutput",
10
+ "name": "Structured output via mock provider (NL responses)",
11
+ "position": { "x": 0, "y": 0 },
12
+ "config": {
13
+ "provider": "mock",
14
+ "model": "mock-mini",
15
+ "outputSchema": {
16
+ "$id": "https://example.test/spec/v1/conformance-envelope.schema.json",
17
+ "title": "TestEnvelope",
18
+ "type": "object",
19
+ "required": ["result"],
20
+ "properties": { "result": { "type": "string" } }
21
+ },
22
+ "retryOnInvalidJson": 0
23
+ },
24
+ "inputs": {
25
+ "messages": {
26
+ "type": "static",
27
+ "value": [{ "role": "user", "content": "Please emit a structured envelope." }]
28
+ }
29
+ }
30
+ }
31
+ ],
32
+ "edges": [],
33
+ "triggers": [
34
+ { "id": "manual", "type": "manual", "enabled": true }
35
+ ],
36
+ "variables": [],
37
+ "metadata": {
38
+ "tags": ["conformance", "rfc-0032", "envelope-reliability", "nl-to-format-engaged"]
39
+ },
40
+ "settings": { "timeout": 30000 }
41
+ }
@@ -0,0 +1,39 @@
1
+ {
2
+ "id": "conformance-envelope-recovery-applied",
3
+ "name": "Conformance: envelope.recovery.applied (RFC 0032 §B.6)",
4
+ "version": "1.0",
5
+ "description": "Drives `core.ai.structuredOutput` against the conformance-only `mock` provider. The conformance scenario POSTs a 1-entry program to `/v1/host/sample/test/mock-ai/program` (keyed by `nodeId: 'structured-call'`) BEFORE starting the run: the mock returns a markdown-fenced JSON envelope (e.g., ```json\\n{\"result\":\"ok\"}\\n```). The host's `dispatchStructured` lenient-parse fallback strips the fence via `tryLenientParse(text)`, emits exactly one `envelope.recovery.applied` with `path: 'markdown-fence'`, and accepts the parsed value WITHOUT counting against the retry budget per RFC 0033 §D. Run terminates `completed`.",
6
+ "nodes": [
7
+ {
8
+ "id": "structured-call",
9
+ "typeId": "core.ai.structuredOutput",
10
+ "name": "Structured output via mock provider (markdown-fenced)",
11
+ "position": { "x": 0, "y": 0 },
12
+ "config": {
13
+ "provider": "mock",
14
+ "model": "mock-mini",
15
+ "outputSchema": {
16
+ "type": "object",
17
+ "required": ["result"],
18
+ "properties": { "result": { "type": "string" } }
19
+ },
20
+ "retryOnInvalidJson": 0
21
+ },
22
+ "inputs": {
23
+ "messages": {
24
+ "type": "static",
25
+ "value": [{ "role": "user", "content": "Please emit a markdown-fenced envelope." }]
26
+ }
27
+ }
28
+ }
29
+ ],
30
+ "edges": [],
31
+ "triggers": [
32
+ { "id": "manual", "type": "manual", "enabled": true }
33
+ ],
34
+ "variables": [],
35
+ "metadata": {
36
+ "tags": ["conformance", "rfc-0032", "envelope-reliability", "recovery-applied"]
37
+ },
38
+ "settings": { "timeout": 30000 }
39
+ }
@@ -0,0 +1,38 @@
1
+ {
2
+ "id": "conformance-envelope-refusal",
3
+ "name": "Conformance: envelope.refusal end-to-end (RFC 0032 §B.3 + RFC 0033 §D + §F)",
4
+ "version": "1.0",
5
+ "description": "Single `core.ai.structuredOutput` node against the conformance `mock` provider with a pre-seeded program returning `stopReason: 'safety'` + `refusalText: '...'` on attempt 1. Host's `dispatchStructured()` MUST: (a) emit exactly one `envelope.refusal` event with the canonical payload shape; (b) NOT retry (RFC 0032 §B.3 + RFC 0033 §D — refusal is terminal); (c) fail the node with `error.code: 'envelope_refused_by_provider'` per RFC 0033 §F; (d) NOT echo the refusal text in `RunSnapshot.error.message` (SECURITY invariant `envelope-refusal-no-prompt-leak` — refusal text lives only on the event-log entry, scrubbed via the existing SR-1 redaction harness).",
6
+ "nodes": [
7
+ {
8
+ "id": "structured-call",
9
+ "typeId": "core.ai.structuredOutput",
10
+ "name": "Structured output via mock provider (refusal)",
11
+ "position": { "x": 0, "y": 0 },
12
+ "config": {
13
+ "provider": "mock",
14
+ "model": "mock-mini",
15
+ "outputSchema": {
16
+ "type": "object",
17
+ "required": ["valid"],
18
+ "properties": { "valid": { "type": "boolean" } }
19
+ }
20
+ },
21
+ "inputs": {
22
+ "messages": {
23
+ "type": "static",
24
+ "value": [{ "role": "user", "content": "Please emit a valid envelope." }]
25
+ }
26
+ }
27
+ }
28
+ ],
29
+ "edges": [],
30
+ "triggers": [
31
+ { "id": "manual", "type": "manual", "enabled": true }
32
+ ],
33
+ "variables": [],
34
+ "metadata": {
35
+ "tags": ["conformance", "rfc-0032", "rfc-0033", "envelope-reliability", "refusal"]
36
+ },
37
+ "settings": { "timeout": 30000 }
38
+ }
@@ -0,0 +1,39 @@
1
+ {
2
+ "id": "conformance-envelope-retry-attempted",
3
+ "name": "Conformance: envelope.retry.attempted (RFC 0032 §B.1)",
4
+ "version": "1.0",
5
+ "description": "Drives `core.ai.structuredOutput` against the conformance-only `mock` provider. The conformance scenario POSTs a 2-entry program to `/v1/host/sample/test/mock-ai/program` (keyed by `nodeId: 'structured-call'`) BEFORE starting the run: attempt 1 returns invalid JSON, attempt 2 returns a valid envelope. The host's `dispatchStructured` retry loop MUST emit exactly one `envelope.retry.attempted` event with `attempt: 2` between the two provider calls (RFC 0032 §B.1). The run terminates `completed` after the second attempt succeeds.",
6
+ "nodes": [
7
+ {
8
+ "id": "structured-call",
9
+ "typeId": "core.ai.structuredOutput",
10
+ "name": "Structured output via mock provider",
11
+ "position": { "x": 0, "y": 0 },
12
+ "config": {
13
+ "provider": "mock",
14
+ "model": "mock-mini",
15
+ "outputSchema": {
16
+ "type": "object",
17
+ "required": ["valid"],
18
+ "properties": { "valid": { "type": "boolean" } }
19
+ },
20
+ "retryOnInvalidJson": 0
21
+ },
22
+ "inputs": {
23
+ "messages": {
24
+ "type": "static",
25
+ "value": [{ "role": "user", "content": "Please emit a valid envelope." }]
26
+ }
27
+ }
28
+ }
29
+ ],
30
+ "edges": [],
31
+ "triggers": [
32
+ { "id": "manual", "type": "manual", "enabled": true }
33
+ ],
34
+ "variables": [],
35
+ "metadata": {
36
+ "tags": ["conformance", "rfc-0032", "envelope-reliability", "retry-attempted"]
37
+ },
38
+ "settings": { "timeout": 30000 }
39
+ }
@@ -0,0 +1,38 @@
1
+ {
2
+ "id": "conformance-envelope-retry-exhausted",
3
+ "name": "Conformance: envelope.retry.exhausted (RFC 0032 §B.2)",
4
+ "version": "1.0",
5
+ "description": "Drives `core.ai.structuredOutput` against the conformance `mock` provider with a program that returns invalid JSON on EVERY attempt. The host's `dispatchStructured` retry loop MUST exhaust its budget and emit exactly one `envelope.retry.exhausted` event with `finalReason: 'schema-violation'` (or `'parse-error'` per RFC 0032 §B.1 reason enum). The node MUST fail with `error.code: 'envelope_payload_invalid'` (existing RFC 0021 code per RFC 0033 §C). Pairs with `envelope-retry-exhausted.test.ts`.",
6
+ "nodes": [
7
+ {
8
+ "id": "structured-call",
9
+ "typeId": "core.ai.structuredOutput",
10
+ "name": "Structured output via mock provider (always invalid)",
11
+ "position": { "x": 0, "y": 0 },
12
+ "config": {
13
+ "provider": "mock",
14
+ "model": "mock-mini",
15
+ "outputSchema": {
16
+ "type": "object",
17
+ "required": ["valid"],
18
+ "properties": { "valid": { "type": "boolean" } }
19
+ }
20
+ },
21
+ "inputs": {
22
+ "messages": {
23
+ "type": "static",
24
+ "value": [{ "role": "user", "content": "Please emit a valid envelope." }]
25
+ }
26
+ }
27
+ }
28
+ ],
29
+ "edges": [],
30
+ "triggers": [
31
+ { "id": "manual", "type": "manual", "enabled": true }
32
+ ],
33
+ "variables": [],
34
+ "metadata": {
35
+ "tags": ["conformance", "rfc-0032", "envelope-reliability", "retry-exhausted"]
36
+ },
37
+ "settings": { "timeout": 30000 }
38
+ }
@@ -0,0 +1,39 @@
1
+ {
2
+ "id": "conformance-envelope-truncated",
3
+ "name": "Conformance: envelope.truncated (RFC 0032 §B.4 + RFC 0033 §B)",
4
+ "version": "1.0",
5
+ "description": "Drives `core.ai.structuredOutput` against the conformance `mock` provider with a program: attempt 1 returns `stopReason: 'max_tokens'` (truncation); attempt 2 returns a valid envelope. The host's `dispatchStructured` retry loop MUST: (a) emit exactly one `envelope.truncated` event with `stopReason: 'max_tokens'`; (b) retry with an INCREASED output budget per RFC 0033 §B (the host's `truncationBudgetMultiplier` — default 2×); (c) NOT inject the corrective schema fragment on the truncation retry (truncation is an output-size problem, not a schema problem). Eventually completes after attempt 2 succeeds. Pairs with `envelope-truncated.test.ts`.",
6
+ "nodes": [
7
+ {
8
+ "id": "structured-call",
9
+ "typeId": "core.ai.structuredOutput",
10
+ "name": "Structured output (truncation then success)",
11
+ "position": { "x": 0, "y": 0 },
12
+ "config": {
13
+ "provider": "mock",
14
+ "model": "mock-mini",
15
+ "maxTokens": 50,
16
+ "outputSchema": {
17
+ "type": "object",
18
+ "required": ["valid"],
19
+ "properties": { "valid": { "type": "boolean" } }
20
+ }
21
+ },
22
+ "inputs": {
23
+ "messages": {
24
+ "type": "static",
25
+ "value": [{ "role": "user", "content": "Please emit a valid envelope." }]
26
+ }
27
+ }
28
+ }
29
+ ],
30
+ "edges": [],
31
+ "triggers": [
32
+ { "id": "manual", "type": "manual", "enabled": true }
33
+ ],
34
+ "variables": [],
35
+ "metadata": {
36
+ "tags": ["conformance", "rfc-0032", "rfc-0033", "envelope-reliability", "truncated"]
37
+ },
38
+ "settings": { "timeout": 30000 }
39
+ }
@@ -0,0 +1,39 @@
1
+ {
2
+ "id": "conformance-envelope-truncation-cap-exhaustion",
3
+ "name": "Conformance: envelope.truncation-cap-exhaustion (RFC 0033 §B + §F)",
4
+ "version": "1.0",
5
+ "description": "Drives `core.ai.structuredOutput` against the conformance `mock` provider with a program that returns `stopReason: 'max_tokens'` on EVERY attempt. The host's `dispatchStructured` retry loop MUST: (a) emit `envelope.truncated` on each attempt (or at least the first one — RFC 0032 §B.4 is per-attempt); (b) double the budget each retry (RFC 0033 §B); (c) exhaust retries after `maxRetryAttempts` (default 3); (d) emit exactly one `envelope.retry.exhausted` with `finalReason: 'truncation'`; (e) emit `cap.breached` with `kind: 'schema'`; (f) fail the node with `error.code: 'envelope_truncation_unrecoverable'` per RFC 0033 §F. The run does NOT exceed `maxRetryAttempts` total LLM calls — DoS-bound assertion. Pairs with `envelope-truncation-cap-exhaustion.test.ts`.",
6
+ "nodes": [
7
+ {
8
+ "id": "structured-call",
9
+ "typeId": "core.ai.structuredOutput",
10
+ "name": "Structured output (perpetual truncation → cap exhaustion)",
11
+ "position": { "x": 0, "y": 0 },
12
+ "config": {
13
+ "provider": "mock",
14
+ "model": "mock-mini",
15
+ "maxTokens": 50,
16
+ "outputSchema": {
17
+ "type": "object",
18
+ "required": ["valid"],
19
+ "properties": { "valid": { "type": "boolean" } }
20
+ }
21
+ },
22
+ "inputs": {
23
+ "messages": {
24
+ "type": "static",
25
+ "value": [{ "role": "user", "content": "Please emit a valid envelope." }]
26
+ }
27
+ }
28
+ }
29
+ ],
30
+ "edges": [],
31
+ "triggers": [
32
+ { "id": "manual", "type": "manual", "enabled": true }
33
+ ],
34
+ "variables": [],
35
+ "metadata": {
36
+ "tags": ["conformance", "rfc-0032", "rfc-0033", "envelope-reliability", "truncation-cap-exhaustion", "DoS-bound"]
37
+ },
38
+ "settings": { "timeout": 30000 }
39
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "id": "conformance-model-capability-insufficient",
3
+ "name": "Conformance: model capability insufficient (RFC 0031 §B step 4 + §D)",
4
+ "version": "1.0",
5
+ "description": "Single `conformance.modelCapability.insufficient` node whose NodeModule declares `requiredModelCapabilities: ['nonexistent-capability-9b3f']` — an identifier outside the spec-reserved set. The executor's model-capability gate (executor.ts:230-289) MUST refuse at dispatch time, emitting `model.capability.insufficient` BEFORE `node.failed` per RFC 0031 §D. Pairs with `model-capability-insufficient.test.ts` end-to-end branch.",
6
+ "nodes": [
7
+ {
8
+ "id": "insufficient-node",
9
+ "typeId": "conformance.modelCapability.insufficient",
10
+ "name": "Always refuses on capability gate",
11
+ "position": { "x": 0, "y": 0 },
12
+ "config": {},
13
+ "inputs": {}
14
+ }
15
+ ],
16
+ "edges": [],
17
+ "triggers": [
18
+ { "id": "manual", "type": "manual", "enabled": true }
19
+ ],
20
+ "variables": [],
21
+ "metadata": {
22
+ "tags": ["conformance", "rfc-0031", "model-capability", "insufficient"]
23
+ },
24
+ "settings": { "timeout": 5000 }
25
+ }
@@ -0,0 +1,49 @@
1
+ {
2
+ "id": "conformance-multi-agent-confidence-escalation",
3
+ "name": "Conformance: Multi-Agent Confidence-Floor Escalation (RFC 0039 §A)",
4
+ "version": "1.0",
5
+ "description": "RFC 0039 §A — exercises the confidence-floor escalation contract introduced by Phase 2 of the multi-agent execution model. Supervisor plan: ONE `next-worker` decision carrying `confidence: 0.3` (below the 0.5 spec floor). The `core.dispatch` node MUST escalate via clarification interrupt BEFORE any dispatch.began event fires; conformance asserts the run reaches `waiting-clarification` AND the event log carries exactly one `core.workflowChain.confidence-escalated` AND zero `core.workflowChain.event` records (no dispatch fired). Capability-gated on `capabilities.multiAgent.executionModel.version >= 2`. See multi-agent-confidence-escalation.test.ts.",
6
+ "nodes": [
7
+ {
8
+ "id": "supervisor",
9
+ "typeId": "core.orchestrator.supervisor",
10
+ "name": "Supervisor (mock plan with low-confidence decision)",
11
+ "position": { "x": 0, "y": 0 },
12
+ "config": {
13
+ "mockDispatchPlan": [
14
+ {
15
+ "kind": "next-worker",
16
+ "nextWorkerIds": ["conformance-multi-agent-handoff-child"],
17
+ "confidence": 0.3
18
+ }
19
+ ]
20
+ },
21
+ "inputs": {}
22
+ },
23
+ {
24
+ "id": "dispatch",
25
+ "typeId": "core.dispatch",
26
+ "name": "Dispatch (confidence gate fires before worker spawn)",
27
+ "position": { "x": 200, "y": 0 },
28
+ "config": {
29
+ "askUserRouting": "auto",
30
+ "workerDispatchModel": "child-run",
31
+ "fanOutPolicy": "sequential"
32
+ },
33
+ "inputs": {}
34
+ }
35
+ ],
36
+ "edges": [
37
+ { "id": "e1", "sourceNodeId": "supervisor", "targetNodeId": "dispatch" },
38
+ { "id": "e2", "sourceNodeId": "dispatch", "targetNodeId": "supervisor" }
39
+ ],
40
+ "triggers": [
41
+ { "id": "manual", "type": "manual", "enabled": true }
42
+ ],
43
+ "variables": [],
44
+ "metadata": {
45
+ "tags": ["conformance", "rfc-0039", "multi-agent", "confidence", "escalation"],
46
+ "requiresCapability": "capabilities.multiAgent.executionModel.version-2"
47
+ },
48
+ "settings": { "timeout": 30000 }
49
+ }
@@ -0,0 +1,27 @@
1
+ {
2
+ "id": "conformance-multi-agent-handoff-child",
3
+ "name": "Conformance: Multi-Agent Handoff State Machine (RFC 0037 Phase 1) — child",
4
+ "version": "1.0",
5
+ "description": "Child fixture for `conformance-multi-agent-handoff`. Declares `childOutcome.defaultValue='handoff-complete'`; the variable is folded into the child's initial `variables_json` at run-create time. On terminal `completed`, the parent's `outputMapping` harvests `childOutcome` onto `parentResult`, which triggers the `output.harvested` transition event per RFC 0037 §\"Handoff state machine\".",
6
+ "nodes": [
7
+ {
8
+ "id": "noop",
9
+ "typeId": "core.identity",
10
+ "name": "Noop (childOutcome seeded by defaultValue)",
11
+ "position": { "x": 0, "y": 0 },
12
+ "config": {},
13
+ "inputs": {}
14
+ }
15
+ ],
16
+ "edges": [],
17
+ "triggers": [
18
+ { "id": "manual", "type": "manual", "enabled": true }
19
+ ],
20
+ "variables": [
21
+ { "name": "childOutcome", "type": "string", "defaultValue": "handoff-complete" }
22
+ ],
23
+ "metadata": {
24
+ "tags": ["conformance", "rfc-0037", "multi-agent", "handoff", "child"]
25
+ },
26
+ "settings": { "timeout": 5000 }
27
+ }
@@ -0,0 +1,49 @@
1
+ {
2
+ "id": "conformance-multi-agent-handoff",
3
+ "name": "Conformance: Multi-Agent Handoff State Machine (RFC 0037 Phase 1)",
4
+ "version": "1.0",
5
+ "description": "RFC 0037 Phase 1 — exercises the planner→worker handoff state machine. Supervisor plan: `next-worker: ['conformance-multi-agent-handoff-child']` → `terminate`. The dispatch consumes one decision, spawns the child, harvests `childOutcome` into `parentResult` (outputMapping is non-empty so the `output.harvested` transition fires per RFC 0022 §A + RFC 0037 §\"Handoff state machine\" terminal-row constraint). Conformance reads the parent's event log and asserts 4 `core.workflowChain.event` records appear in causation-chained order: dispatch.began → dispatch.succeeded → child.completed → output.harvested. Gated on `capabilities.multiAgent.executionModel.supported: true`. See multi-agent-handoff-state-machine.test.ts.",
6
+ "nodes": [
7
+ {
8
+ "id": "supervisor",
9
+ "typeId": "core.orchestrator.supervisor",
10
+ "name": "Supervisor (mock plan)",
11
+ "position": { "x": 0, "y": 0 },
12
+ "config": {
13
+ "mockDispatchPlan": [
14
+ { "kind": "next-worker", "nextWorkerIds": ["conformance-multi-agent-handoff-child"] },
15
+ { "kind": "terminate", "reason": "goal-reached" }
16
+ ]
17
+ },
18
+ "inputs": {}
19
+ },
20
+ {
21
+ "id": "dispatch",
22
+ "typeId": "core.dispatch",
23
+ "name": "Dispatch with outputMapping (drives output.harvested)",
24
+ "position": { "x": 200, "y": 0 },
25
+ "config": {
26
+ "askUserRouting": "auto",
27
+ "workerDispatchModel": "child-run",
28
+ "fanOutPolicy": "sequential",
29
+ "outputMapping": { "parentResult": "childOutcome" }
30
+ },
31
+ "inputs": {}
32
+ }
33
+ ],
34
+ "edges": [
35
+ { "id": "e1", "sourceNodeId": "supervisor", "targetNodeId": "dispatch" },
36
+ { "id": "e2", "sourceNodeId": "dispatch", "targetNodeId": "supervisor" }
37
+ ],
38
+ "triggers": [
39
+ { "id": "manual", "type": "manual", "enabled": true }
40
+ ],
41
+ "variables": [
42
+ { "name": "parentResult", "type": "string", "defaultValue": "" }
43
+ ],
44
+ "metadata": {
45
+ "tags": ["conformance", "rfc-0037", "multi-agent", "handoff"],
46
+ "requiresCapability": "capabilities.multiAgent.executionModel.supported"
47
+ },
48
+ "settings": { "timeout": 30000 }
49
+ }
@@ -0,0 +1,39 @@
1
+ {
2
+ "id": "conformance-prompt-all-four-kinds",
3
+ "name": "Conformance: Prompt-Library All Four Kinds End-to-End",
4
+ "version": "1.0",
5
+ "description": "RFC 0027 §A end-to-end coverage of all four `PromptKind` values (system, user, schema-hint, few-shot) with a MULTI-ENTRY `fewShotPromptRefs[]` array so the resolver's per-index lookup (`fewShotPromptRefs[slotIndex]`) is regression-pinned. Single mock-ai node configured with one ref per singular-kind slot and two distinct templateIds in the few-shot array; when the node executes, the host MUST resolve each ref via the four-layer chain (RFC 0029 §A), emit one `agent.promptResolved` event per kind+slot (4 events total: system, user, schema-hint, few-shot×2 sub-events count as 2 — total 5), compose the body (RFC 0027 §E), emit one `prompt.composed` event per composition (5 events), and complete the run successfully. Capability-gated: host MUST advertise `capabilities.prompts.supported: true`. See conformance/src/scenarios/prompt-all-four-kinds-events.test.ts.",
6
+ "nodes": [
7
+ {
8
+ "id": "all-kinds",
9
+ "typeId": "local.sample.demo.mock-ai",
10
+ "name": "All-kinds (mock AI)",
11
+ "position": { "x": 0, "y": 0 },
12
+ "config": {
13
+ "systemPromptRef": "prompt:conformance.prompt.writer-system@1.0.0",
14
+ "userPromptRef": "prompt:conformance.prompt.writer-user@1.0.0",
15
+ "schemaHintPromptRef": "prompt:conformance.prompt.schema-hint@1.0.0",
16
+ "fewShotPromptRefs": [
17
+ "prompt:conformance.prompt.few-shot@1.0.0",
18
+ "prompt:conformance.prompt.few-shot-2@1.0.0"
19
+ ]
20
+ },
21
+ "inputs": {}
22
+ }
23
+ ],
24
+ "edges": [],
25
+ "triggers": [
26
+ { "id": "manual", "type": "manual", "enabled": true }
27
+ ],
28
+ "variables": [
29
+ {
30
+ "name": "prompt",
31
+ "type": "string",
32
+ "description": "Optional inline prompt content. The mock-ai node prefers the composed bodies (concatenated in system → schema-hint → few-shot exemplars → user order) when refs are present; this variable is the fallback baseline if no ref resolves.",
33
+ "required": false,
34
+ "defaultValue": "conformance baseline prompt"
35
+ }
36
+ ],
37
+ "metadata": { "tags": ["conformance", "prompts", "all-four-kinds"] },
38
+ "settings": { "timeout": 10000 }
39
+ }