@openwop/openwop-conformance 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. package/CHANGELOG.md +156 -1
  2. package/README.md +3 -2
  3. package/api/asyncapi.yaml +8 -0
  4. package/api/openapi.yaml +371 -1
  5. package/api/redocly.yaml +15 -0
  6. package/coverage.md +26 -5
  7. package/fixtures/conformance-agent-reasoning-streaming.json +37 -0
  8. package/fixtures/conformance-dispatch-cancellable-child.json +27 -0
  9. package/fixtures/conformance-dispatch-deterministic-fail-child.json +30 -0
  10. package/fixtures/conformance-dispatch-input-mapping-no-default.json +49 -0
  11. package/fixtures/conformance-dispatch-per-worker-override.json +59 -0
  12. package/fixtures/conformance-envelope-nl-to-format-engaged.json +41 -0
  13. package/fixtures/conformance-envelope-recovery-applied.json +39 -0
  14. package/fixtures/conformance-envelope-refusal.json +38 -0
  15. package/fixtures/conformance-envelope-retry-attempted.json +39 -0
  16. package/fixtures/conformance-envelope-retry-exhausted.json +38 -0
  17. package/fixtures/conformance-envelope-truncated.json +39 -0
  18. package/fixtures/conformance-envelope-truncation-cap-exhaustion.json +39 -0
  19. package/fixtures/conformance-model-capability-insufficient.json +25 -0
  20. package/fixtures/conformance-multi-agent-confidence-escalation.json +49 -0
  21. package/fixtures/conformance-multi-agent-handoff-child.json +27 -0
  22. package/fixtures/conformance-multi-agent-handoff.json +49 -0
  23. package/fixtures/conformance-prompt-all-four-kinds.json +39 -0
  24. package/fixtures/conformance-prompt-end-to-end.json +33 -0
  25. package/fixtures/conformance-subworkflow-input-mapping-no-default.json +33 -0
  26. package/fixtures/conformance-subworkflow-mid-run-mutation-child.json +31 -0
  27. package/fixtures/conformance-subworkflow-mid-run-mutation.json +33 -0
  28. package/fixtures/openwop-smoke-cost-emit.json +37 -0
  29. package/fixtures/prompt-templates/conformance-prompt-few-shot-2.json +14 -0
  30. package/fixtures/prompt-templates/conformance-prompt-few-shot.json +14 -0
  31. package/fixtures/prompt-templates/conformance-prompt-schema-hint.json +14 -0
  32. package/fixtures/prompt-templates/conformance-prompt-secret-redaction.json +23 -0
  33. package/fixtures/prompt-templates/conformance-prompt-trust-marker.json +23 -0
  34. package/fixtures/prompt-templates/conformance-prompt-writer-system.json +15 -0
  35. package/fixtures/prompt-templates/conformance-prompt-writer-user.json +15 -0
  36. package/fixtures.md +45 -0
  37. package/package.json +1 -1
  38. package/schemas/README.md +5 -0
  39. package/schemas/agent-manifest.schema.json +16 -0
  40. package/schemas/capabilities.schema.json +390 -0
  41. package/schemas/core-conformance-mock-agent-config.schema.json +5 -0
  42. package/schemas/envelopes/clarification.request.schema.json +9 -0
  43. package/schemas/envelopes/error.schema.json +4 -0
  44. package/schemas/envelopes/schema.request.schema.json +4 -0
  45. package/schemas/envelopes/schema.response.schema.json +1 -1
  46. package/schemas/node-pack-manifest.schema.json +28 -0
  47. package/schemas/orchestrator-decision.schema.json +12 -0
  48. package/schemas/prompt-kind.schema.json +8 -0
  49. package/schemas/prompt-pack-manifest.schema.json +80 -0
  50. package/schemas/prompt-ref.schema.json +40 -0
  51. package/schemas/prompt-template.schema.json +149 -0
  52. package/schemas/registry-version-manifest.schema.json +5 -0
  53. package/schemas/run-ancestry-response.schema.json +54 -0
  54. package/schemas/run-event-payloads.schema.json +513 -11
  55. package/schemas/run-event.schema.json +17 -1
  56. package/schemas/run-snapshot.schema.json +3 -2
  57. package/schemas/workflow-definition.schema.json +19 -1
  58. package/src/lib/driver.ts +15 -0
  59. package/src/lib/env.ts +51 -0
  60. package/src/lib/event-log-query.ts +62 -0
  61. package/src/lib/fixtures.ts +38 -1
  62. package/src/lib/host-toggle.ts +54 -0
  63. package/src/lib/llm-cache-key-recipe.ts +68 -0
  64. package/src/lib/multi-agent-capabilities.ts +10 -0
  65. package/src/lib/otel-scrape.ts +59 -0
  66. package/src/scenarios/agentReasoningStreaming.test.ts +193 -0
  67. package/src/scenarios/aiEnvelope.capBreached.test.ts +97 -9
  68. package/src/scenarios/aiEnvelope.contractRefusal.test.ts +224 -15
  69. package/src/scenarios/aiEnvelope.correlationReplay.test.ts +257 -25
  70. package/src/scenarios/aiEnvelope.redaction.test.ts +210 -29
  71. package/src/scenarios/aiEnvelope.schemaDrift.test.ts +163 -24
  72. package/src/scenarios/aiEnvelope.trustBoundaryPropagation.test.ts +262 -12
  73. package/src/scenarios/aiEnvelope.universalKinds.test.ts +107 -16
  74. package/src/scenarios/blob-presign-expiry.test.ts +42 -9
  75. package/src/scenarios/blob-roundtrip.test.ts +0 -0
  76. package/src/scenarios/cache-ttl-expiry.test.ts +34 -8
  77. package/src/scenarios/cost-attribution.test.ts +124 -11
  78. package/src/scenarios/cross-engine-append-ordering.test.ts +99 -0
  79. package/src/scenarios/cross-host-ancestry-endpoint.test.ts +136 -0
  80. package/src/scenarios/cross-host-causation-shape.test.ts +117 -0
  81. package/src/scenarios/cross-host-traceparent-propagation.test.ts +60 -0
  82. package/src/scenarios/dispatch-cross-worker-handoff.test.ts +34 -3
  83. package/src/scenarios/dispatch-input-mapping.test.ts +75 -6
  84. package/src/scenarios/dispatch-output-mapping.test.ts +96 -6
  85. package/src/scenarios/envelope-completion-distinguishes-truncation.test.ts +223 -0
  86. package/src/scenarios/envelope-nl-to-format-engaged.test.ts +152 -0
  87. package/src/scenarios/envelope-reasoning-secret-redaction.test.ts +343 -0
  88. package/src/scenarios/envelope-reasoning-shape.test.ts +190 -0
  89. package/src/scenarios/envelope-recovery-applied.test.ts +229 -0
  90. package/src/scenarios/envelope-refusal-shape.test.ts +289 -0
  91. package/src/scenarios/envelope-retry-attempted.test.ts +258 -0
  92. package/src/scenarios/envelope-retry-exhausted.test.ts +168 -0
  93. package/src/scenarios/envelope-tier-one-subset-static.test.ts +229 -0
  94. package/src/scenarios/envelope-truncated.test.ts +136 -0
  95. package/src/scenarios/envelope-truncation-cap-exhaustion.test.ts +144 -0
  96. package/src/scenarios/envelope-variant-discriminator-static.test.ts +152 -0
  97. package/src/scenarios/fixtures-gating.test.ts +139 -1
  98. package/src/scenarios/fixtures-valid.test.ts +123 -15
  99. package/src/scenarios/kv-ttl-expiry.test.ts +40 -9
  100. package/src/scenarios/model-capability-insufficient.test.ts +221 -0
  101. package/src/scenarios/model-capability-substituted.test.ts +203 -0
  102. package/src/scenarios/multi-agent-confidence-escalation.test.ts +164 -0
  103. package/src/scenarios/multi-agent-handoff-state-machine.test.ts +167 -0
  104. package/src/scenarios/multi-agent-memory-lifecycle.test.ts +124 -0
  105. package/src/scenarios/multi-region-idempotency.test.ts +58 -0
  106. package/src/scenarios/node-module-required-capabilities-shape.test.ts +185 -0
  107. package/src/scenarios/otel-trace-propagation-subworkflow.test.ts +19 -0
  108. package/src/scenarios/pack-registry-publish.test.ts +231 -51
  109. package/src/scenarios/prompt-all-four-kinds-events.test.ts +198 -0
  110. package/src/scenarios/prompt-composed-secret-redaction.test.ts +178 -0
  111. package/src/scenarios/prompt-composed-trust-marker.test.ts +165 -0
  112. package/src/scenarios/prompt-end-to-end-events.test.ts +202 -0
  113. package/src/scenarios/prompt-list-and-fetch.test.ts +207 -0
  114. package/src/scenarios/prompt-mutable-lifecycle.test.ts +216 -0
  115. package/src/scenarios/prompt-pack-install.test.ts +187 -0
  116. package/src/scenarios/prompt-render-deterministic.test.ts +240 -0
  117. package/src/scenarios/prompt-resolution-chain-agent-intrinsic.test.ts +140 -0
  118. package/src/scenarios/prompt-resolution-chain-fallback-cascade.test.ts +172 -0
  119. package/src/scenarios/prompt-resolution-chain-node-wins.test.ts +144 -0
  120. package/src/scenarios/prompt-template-shape.test.ts +359 -0
  121. package/src/scenarios/provider-usage.test.ts +185 -0
  122. package/src/scenarios/queue-ack-nack-dlq.test.ts +64 -10
  123. package/src/scenarios/queue-publish-consume-roundtrip.test.ts +50 -10
  124. package/src/scenarios/replay-divergence-at-refusal.test.ts +134 -0
  125. package/src/scenarios/replay-llm-cache-key-portable.test.ts +197 -0
  126. package/src/scenarios/replay-llm-cache-key.test.ts +127 -25
  127. package/src/scenarios/replay-observable-sequence-determinism.test.ts +80 -0
  128. package/src/scenarios/sandbox-capability-gate-respected.test.ts +31 -0
  129. package/src/scenarios/sandbox-memory-cap.test.ts +61 -0
  130. package/src/scenarios/sandbox-no-cross-pack-mutation.test.ts +35 -0
  131. package/src/scenarios/sandbox-no-host-env-leak.test.ts +38 -0
  132. package/src/scenarios/sandbox-no-host-fs-escape.test.ts +91 -0
  133. package/src/scenarios/sandbox-no-host-process-escape.test.ts +30 -0
  134. package/src/scenarios/sandbox-no-network-escape.test.ts +49 -0
  135. package/src/scenarios/sandbox-timeout-cap.test.ts +61 -0
  136. package/src/scenarios/search-bm25-roundtrip.test.ts +54 -9
  137. package/src/scenarios/spec-corpus-validity.test.ts +34 -6
  138. package/src/scenarios/sql-transaction-atomicity.test.ts +37 -8
  139. package/src/scenarios/stream-subscribe-from-beginning.test.ts +46 -9
  140. package/src/scenarios/subworkflow-input-mapping.test.ts +146 -10
  141. package/src/scenarios/table-cursor-pagination.test.ts +47 -9
  142. package/src/scenarios/table-schema-enforcement.test.ts +46 -9
  143. package/src/scenarios/vector-knn-roundtrip.test.ts +50 -10
  144. package/src/scenarios/workflow-chain-host-expansion.test.ts +202 -0
@@ -0,0 +1,60 @@
1
+ /**
2
+ * cross-host-traceparent-propagation — RFC 0040 §B behavioral (capability-gated).
3
+ *
4
+ * Status: ACTIVE (capability-gated; behavioral assertion soft-skipped
5
+ * until a cross-host MCP/A2A composition test fixture ships). Gated on
6
+ * `capabilities.multiAgent.executionModel.version >= 3` AND
7
+ * `capabilities.multiAgent.executionModel.crossHostCausation.supported: true`.
8
+ *
9
+ * Asserts (when host advertises Phase 3 + a real MCP/A2A composition
10
+ * endpoint is reachable):
11
+ *
12
+ * 1. An outbound MCP tool call dispatched from a Phase 3 host MUST
13
+ * carry the parent run's W3C `traceparent` header. The MCP server
14
+ * receives the header AND uses it as the parent trace for any
15
+ * spans it emits (closing the cross-host span linkage that
16
+ * RFC 0023's same-host coverage left open).
17
+ *
18
+ * 2. An inbound MCP tool reply OR A2A message handler MUST adopt the
19
+ * `traceparent` header from the inbound envelope as the trace
20
+ * parent for any subsequent events the receiving agent emits.
21
+ *
22
+ * 3. (Symmetric) Outbound A2A messages MUST carry the parent run's
23
+ * `traceparent`; inbound A2A handlers MUST adopt it.
24
+ *
25
+ * Behavioral wiring requires a cross-host test harness: either a real
26
+ * MCP server peer (`OPENWOP_MCP_REAL_SERVER_URL`) or an A2A peer
27
+ * (`OPENWOP_A2A_REAL_PEER_URL`) the host can call into. Without those,
28
+ * the assertion soft-skips and only the shape probe in
29
+ * cross-host-causation-shape.test.ts applies.
30
+ *
31
+ * @see RFCS/0040-multi-agent-cross-host-causation.md §B
32
+ * @see spec/v1/multi-agent-execution.md §"W3C tracecontext across MCP + A2A composition"
33
+ * @see RFCS/0023-conformance-agent-event-emitters.md (the same-host predecessor)
34
+ */
35
+
36
+ import { describe, it } from 'vitest';
37
+
38
+ // Behavioral assertions in this file are currently `it.todo` placeholders;
39
+ // the cross-host MCP / A2A peer harness (gated on OPENWOP_MCP_REAL_SERVER_URL
40
+ // / OPENWOP_A2A_REAL_PEER_URL) hasn't landed yet. When it does, the
41
+ // `it.todo` calls flip back to runnable `it(...)` bodies that read discovery
42
+ // (via `driver.get('/.well-known/openwop')`), gate on `Phase 3` advertisement,
43
+ // and drive the workflow through the configured real peer.
44
+
45
+ describe('cross-host-traceparent-propagation: behavioral (RFC 0040 §B)', () => {
46
+ // Behavioral assertion drives a workflow that calls an MCP tool via the
47
+ // host's `core.mcp.toolCall` node. The MCP peer (configured via
48
+ // OPENWOP_MCP_REAL_SERVER_URL) records inbound headers; the test reads
49
+ // the recorded headers and asserts `traceparent` is present + matches
50
+ // the format `00-{traceId}-{spanId}-{flags}` per W3C tracecontext.
51
+ // Until the peer harness lands, the assertion is surfaced as `todo` so
52
+ // test reporters track the gap rather than reporting a vacuous PASS.
53
+ it.todo('Phase 3 host MUST inject parent run\'s traceparent into outbound MCP requests');
54
+
55
+ // Behavioral assertion drives a workflow that dispatches an A2A message
56
+ // via the host's `core.a2a.send` (or equivalent) node. The A2A peer
57
+ // (configured via OPENWOP_A2A_REAL_PEER_URL) records inbound headers;
58
+ // the test asserts `traceparent` is present + well-formed.
59
+ it.todo('Phase 3 host MUST inject parent run\'s traceparent into outbound A2A messages');
60
+ });
@@ -92,7 +92,38 @@ describe.skipIf(SKIP)('dispatch-cross-worker-handoff: sequential child→parent
92
92
  )).toBe('hello');
93
93
  });
94
94
 
95
- it.todo(
96
- 'HVMAP-1c-override: per-worker mapping overrides default mapping. dispatch.inputMapping={input:"defaultX"}; perWorkerInputMappings.child-b={input:"sharedVar"}; child-b MUST receive inputs.input from sharedVar, NOT defaultX. Requires a fixture variant carrying both default + per-worker mappings.',
97
- );
95
+ it('HVMAP-1c-override: per-worker mapping overrides default mapping per §A effectiveInputMapping precedence', async () => {
96
+ const PARENT_OVERRIDE = 'conformance-dispatch-per-worker-override';
97
+ if (!isFixtureAdvertised(PARENT_OVERRIDE)) return; // fixture not seeded — soft-skip
98
+ const create = await driver.post('/v1/runs', { workflowId: PARENT_OVERRIDE });
99
+ expect(create.status).toBe(201);
100
+ const parentRunId = (create.json as { runId: string }).runId;
101
+ await pollUntilTerminal(parentRunId);
102
+
103
+ const eventsRes = await driver.get(`/v1/runs/${encodeURIComponent(parentRunId)}/events`);
104
+ const events = ((eventsRes.json as { events?: RunEvent[] } | undefined)?.events ?? []);
105
+ const dispatchedA = events.find((e) => e.type === 'node.dispatched' && e.payload?.childWorkflowId === CHILD_A);
106
+ const dispatchedB = events.find((e) => e.type === 'node.dispatched' && e.payload?.childWorkflowId === CHILD_B);
107
+ if (!dispatchedA || !dispatchedB) return;
108
+
109
+ const childARes = await driver.get(`/v1/runs/${encodeURIComponent(dispatchedA.payload!.childRunId!)}`);
110
+ const childBRes = await driver.get(`/v1/runs/${encodeURIComponent(dispatchedB.payload!.childRunId!)}`);
111
+ const childAInputs = (childARes.json as { inputs?: Record<string, unknown> }).inputs ?? {};
112
+ const childBInputs = (childBRes.json as { inputs?: Record<string, unknown> }).inputs ?? {};
113
+
114
+ expect(
115
+ childAInputs.input,
116
+ driver.describe(
117
+ 'RFCS/0022-dispatch-input-output-mapping.md §A',
118
+ 'child-a uses the DEFAULT inputMapping; input MUST come from parent.defaultX',
119
+ ),
120
+ ).toBe('default-x-value');
121
+ expect(
122
+ childBInputs.input,
123
+ driver.describe(
124
+ 'RFCS/0022-dispatch-input-output-mapping.md §A',
125
+ 'child-b uses the per-worker OVERRIDE; input MUST come from parent.sharedVar (NOT defaultX)',
126
+ ),
127
+ ).toBe('shared-value');
128
+ });
98
129
  });
@@ -24,6 +24,7 @@ import { describe, it, expect } from 'vitest';
24
24
  import { driver } from '../lib/driver.js';
25
25
  import { pollUntilTerminal } from '../lib/polling.js';
26
26
  import { isFixtureAdvertised } from '../lib/fixtures.js';
27
+ import { setHostCapability, resetHostCapabilities, isToggleAvailable } from '../lib/host-toggle.js';
27
28
 
28
29
  const PARENT = 'conformance-dispatch-input-mapping';
29
30
  const CHILD = 'conformance-dispatch-input-mapping-child';
@@ -84,11 +85,79 @@ describe.skipIf(SKIP)('dispatch-input-mapping: parent → child variable project
84
85
  )).toBe('Alice');
85
86
  });
86
87
 
87
- it.todo(
88
- 'HVMAP-1a-null: parent variable unset → child input surfaces as `undefined` (NOT omitted, NOT `null`) per §A normative bullet. Requires a fixture variant omitting parentName.defaultValue.',
89
- );
88
+ it('HVMAP-1a-null: parent variable unset → child input surfaces as `undefined` per §A', async () => {
89
+ const PARENT_NO_DEFAULT = 'conformance-dispatch-input-mapping-no-default';
90
+ if (!isFixtureAdvertised(PARENT_NO_DEFAULT) || !isFixtureAdvertised(CHILD)) return; // fixture not seeded — soft-skip
91
+ const create = await driver.post('/v1/runs', { workflowId: PARENT_NO_DEFAULT });
92
+ expect(create.status).toBe(201);
93
+ const parentRunId = (create.json as { runId: string }).runId;
94
+ await pollUntilTerminal(parentRunId);
90
95
 
91
- it.todo(
92
- 'HVMAP-1a-refusal: host advertises capabilities.agents.dispatch: true but NOT capabilities.agents.dispatchMapping: true; workflow with non-empty inputMapping MUST fail registration with validation_error + details.requiredCapability === "agents.dispatchMapping". Requires a host-capability-toggle hook in the conformance harness.',
93
- );
96
+ const eventsRes = await driver.get(`/v1/runs/${encodeURIComponent(parentRunId)}/events`);
97
+ const events = ((eventsRes.json as { events?: RunEvent[] } | undefined)?.events ?? []);
98
+ const dispatched = events.find(
99
+ (e) => e.type === 'node.dispatched' && e.payload?.childWorkflowId === CHILD,
100
+ );
101
+ if (!dispatched) return; // host doesn't emit node.dispatched — soft-skip
102
+ const childRunId = dispatched.payload?.childRunId;
103
+
104
+ const childRes = await driver.get(`/v1/runs/${encodeURIComponent(childRunId!)}`);
105
+ const child = childRes.json as RunSnapshot;
106
+ // Per RFC 0022 §A: an unset parent variable MUST surface as `undefined`.
107
+ // On the wire, `undefined` becomes either omitted from the JSON object
108
+ // OR explicit `null`; the spec REJECTS the latter. We accept either
109
+ // "key absent" or "key === undefined" but FAIL on `null`.
110
+ const inputs = child.inputs ?? {};
111
+ const v = inputs.childGreeting;
112
+ expect(
113
+ v === undefined || !('childGreeting' in inputs),
114
+ driver.describe(
115
+ 'RFCS/0022-dispatch-input-output-mapping.md §A',
116
+ 'unset parent variable projection MUST surface as undefined (NOT null, NOT a default placeholder)',
117
+ ),
118
+ ).toBe(true);
119
+ expect(v).not.toBe(null);
120
+ });
121
+
122
+ });
123
+
124
+ describe('dispatch-input-mapping: registration refusal (RFC 0022 §C HVMAP-1a-refusal)', () => {
125
+ it('host with agents.dispatchMapping toggled OFF MUST refuse non-empty inputMapping at registration', async () => {
126
+ if (!(await isToggleAvailable())) return; // seam not exposed — soft-skip
127
+ await setHostCapability('agents.dispatchMapping', false);
128
+ try {
129
+ const workflow = {
130
+ workflowId: `hvmap-1a-refusal-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
131
+ nodes: [
132
+ {
133
+ nodeId: 'dispatch-1',
134
+ typeId: 'core.dispatch',
135
+ config: {
136
+ nextWorkerIds: ['child-a'],
137
+ inputMapping: { childInput: 'parentVar' }, // non-empty — refusal trigger
138
+ },
139
+ },
140
+ ],
141
+ };
142
+ const res = await driver.post('/v1/host/sample/workflows', workflow);
143
+ expect(
144
+ res.status,
145
+ driver.describe(
146
+ 'RFCS/0022-dispatch-input-output-mapping.md §C',
147
+ 'workflow with non-empty inputMapping MUST be refused when capabilities.agents.dispatchMapping is not advertised',
148
+ ),
149
+ ).toBe(400);
150
+ const body = res.json as { error?: string; details?: { requiredCapability?: string } };
151
+ expect(body.error).toBe('validation_error');
152
+ expect(
153
+ body.details?.requiredCapability,
154
+ driver.describe(
155
+ 'RFCS/0022-dispatch-input-output-mapping.md §C',
156
+ 'refusal MUST surface requiredCapability: "agents.dispatchMapping"',
157
+ ),
158
+ ).toBe('agents.dispatchMapping');
159
+ } finally {
160
+ await resetHostCapabilities();
161
+ }
162
+ });
94
163
  });
@@ -55,11 +55,101 @@ describe.skipIf(SKIP)('dispatch-output-mapping: child → parent variable harves
55
55
  )).toBe('done');
56
56
  });
57
57
 
58
- it.todo(
59
- 'HVMAP-1b-failed: child terminates with `failed` status; outputMapping MUST be skipped; parent variables stay at pre-dispatch state for that child. Requires a child fixture that fails deterministically.',
60
- );
58
+ });
59
+
60
+ interface RunEvent { readonly type: string; readonly nodeId?: string; readonly payload?: { childRunId?: string; childWorkflowId?: string } & Record<string, unknown>; }
61
+
62
+ /** Register a parent workflow that dispatches to a specific child fixture
63
+ * with outputMapping. Returns the registered parent's workflowId.
64
+ * The parent declares `parentResult` with a sentinel default so the
65
+ * test can verify it stayed at the sentinel (NOT overwritten by
66
+ * outputMapping) when the child terminates non-completed. */
67
+ async function registerParent(childFixtureId: string): Promise<string | null> {
68
+ const workflowId = `hvmap-1b-${childFixtureId.replace(/[^a-zA-Z0-9_.-]/g, '-')}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
69
+ const def = {
70
+ workflowId,
71
+ nodes: [
72
+ {
73
+ nodeId: 'supervisor',
74
+ typeId: 'core.orchestrator.supervisor',
75
+ config: {
76
+ mockDispatchPlan: [
77
+ { kind: 'next-worker', nextWorkerIds: [childFixtureId] },
78
+ { kind: 'terminate', reason: 'goal-reached' },
79
+ ],
80
+ },
81
+ },
82
+ {
83
+ nodeId: 'dispatch',
84
+ typeId: 'core.dispatch',
85
+ config: {
86
+ askUserRouting: 'auto',
87
+ workerDispatchModel: 'child-run',
88
+ fanOutPolicy: 'sequential',
89
+ outputMapping: { parentResult: 'childOutcome' },
90
+ },
91
+ },
92
+ ],
93
+ };
94
+ const res = await driver.post('/v1/host/sample/workflows', def);
95
+ if (res.status !== 201) return null;
96
+ return workflowId;
97
+ }
98
+
99
+ describe.skipIf(!isFixtureAdvertised('conformance-dispatch-deterministic-fail-child'))('dispatch-output-mapping: HVMAP-1b-failed (RFC 0022 §B)', () => {
100
+ it('child terminates `failed` → outputMapping MUST be skipped; parent.parentResult stays at sentinel', async () => {
101
+ const parentId = await registerParent('conformance-dispatch-deterministic-fail-child');
102
+ if (!parentId) return; // workflow-register seam not exposed — soft-skip
103
+ const create = await driver.post('/v1/runs', { workflowId: parentId });
104
+ expect(create.status).toBe(201);
105
+ const parentRunId = (create.json as { runId: string }).runId;
106
+ const terminal = (await pollUntilTerminal(parentRunId)) as RunSnapshot & { variables?: Record<string, unknown> };
107
+ // Parent reaches some terminal state (completed if it tolerates failed children + supervisor terminates; failed if not).
108
+ // Either way, parentResult MUST NOT be overwritten with the child's "this-should-not-be-harvested" sentinel.
109
+ const parentVars = terminal.variables ?? {};
110
+ expect(
111
+ parentVars.parentResult,
112
+ driver.describe(
113
+ 'RFCS/0022-dispatch-input-output-mapping.md §B',
114
+ 'outputMapping MUST be SKIPPED when child terminates failed; parent variable MUST NOT be overwritten',
115
+ ),
116
+ ).not.toBe('this-should-not-be-harvested');
117
+ });
118
+ });
61
119
 
62
- it.todo(
63
- 'HVMAP-1b-cancelled: child terminates with `cancelled` status; outputMapping MUST be skipped; parent variables stay at pre-dispatch state for that child. Requires a child fixture that supports external cancellation.',
64
- );
120
+ describe.skipIf(!isFixtureAdvertised('conformance-dispatch-cancellable-child'))('dispatch-output-mapping: HVMAP-1b-cancelled (RFC 0022 §B)', () => {
121
+ it('child terminates `cancelled` outputMapping MUST be skipped; parent.parentResult stays at sentinel', async () => {
122
+ const parentId = await registerParent('conformance-dispatch-cancellable-child');
123
+ if (!parentId) return; // soft-skip
124
+ const create = await driver.post('/v1/runs', { workflowId: parentId });
125
+ expect(create.status).toBe(201);
126
+ const parentRunId = (create.json as { runId: string }).runId;
127
+
128
+ // Poll for the node.dispatched event so we can cancel the child mid-flight.
129
+ const start = Date.now();
130
+ let childRunId: string | undefined;
131
+ while (Date.now() - start < 10_000) {
132
+ const eventsRes = await driver.get(`/v1/runs/${encodeURIComponent(parentRunId)}/events`);
133
+ const events = ((eventsRes.json as { events?: RunEvent[] } | undefined)?.events ?? []);
134
+ const dispatched = events.find((e) => e.type === 'node.dispatched' && e.payload?.childWorkflowId === 'conformance-dispatch-cancellable-child');
135
+ if (dispatched?.payload?.childRunId) {
136
+ childRunId = dispatched.payload.childRunId;
137
+ break;
138
+ }
139
+ await new Promise((r) => setTimeout(r, 250));
140
+ }
141
+ if (!childRunId) return; // dispatch didn't surface child run id — soft-skip
142
+ const cancelRes = await driver.post(`/v1/runs/${encodeURIComponent(childRunId)}/cancel`, { reason: 'hvmap-1b-cancelled test' });
143
+ expect(cancelRes.status === 200 || cancelRes.status === 202).toBe(true);
144
+
145
+ const terminal = (await pollUntilTerminal(parentRunId)) as RunSnapshot & { variables?: Record<string, unknown> };
146
+ const parentVars = terminal.variables ?? {};
147
+ expect(
148
+ parentVars.parentResult,
149
+ driver.describe(
150
+ 'RFCS/0022-dispatch-input-output-mapping.md §B',
151
+ 'outputMapping MUST be SKIPPED when child terminates cancelled; parent variable MUST NOT be overwritten',
152
+ ),
153
+ ).not.toBe('this-should-not-be-harvested');
154
+ });
65
155
  });
@@ -0,0 +1,223 @@
1
+ /**
2
+ * envelope-completion-distinguishes-truncation — RFC 0033 §A + §B + §C
3
+ * truncation-vs-schema-violation retry-routing distinction.
4
+ *
5
+ * Capability-gated on `capabilities.envelopes.reliability.supported: true`
6
+ * AND `capabilities.envelopes.reliability.completion.distinguishesTruncation: true`
7
+ * AND the host's test seam. Soft-skip cleanly on hosts that conflate the two
8
+ * paths (legacy v1.1 behavior).
9
+ *
10
+ * Asserts two scenarios:
11
+ *
12
+ * 1. **Truncation path** (RFC 0033 §B). Mock LLM stops at `max_tokens` mid-envelope.
13
+ * - `envelope.truncated` event fires.
14
+ * - `envelope.retry.attempted` fires with `reason: 'truncation'`.
15
+ * - The retry's `maxTokens` budget is strictly greater than the initial.
16
+ *
17
+ * 2. **Schema-violation path** (RFC 0033 §C). Mock LLM emits malformed JSON.
18
+ * - NO `envelope.truncated` event.
19
+ * - `envelope.retry.attempted` fires with `reason` ∈ {`schema-violation`, `parse-error`}.
20
+ * - The retry's `maxTokens` budget is UNCHANGED from the initial.
21
+ *
22
+ * Both scenarios share the existing per-path fixtures
23
+ * (`conformance-envelope-truncated` for the truncation case;
24
+ * `conformance-envelope-retry-attempted` for the schema-violation case).
25
+ *
26
+ * @see RFCS/0033-envelope-completion-contract.md §A + §B + §C
27
+ * @see spec/v1/ai-envelope.md §"Envelope-completion criteria"
28
+ */
29
+
30
+ import { describe, it, expect } from 'vitest';
31
+ import { driver } from '../lib/driver.js';
32
+ import { pollUntilTerminal } from '../lib/polling.js';
33
+ import { isFixtureAdvertised } from '../lib/fixtures.js';
34
+
35
+ const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
36
+ const NODE_ID = 'structured-call';
37
+
38
+ interface DiscoveryDoc {
39
+ capabilities?: {
40
+ envelopes?: {
41
+ reliability?: {
42
+ completion?: {
43
+ distinguishesTruncation?: unknown;
44
+ truncationBudgetMultiplier?: unknown;
45
+ };
46
+ };
47
+ };
48
+ };
49
+ }
50
+
51
+ interface RunEvent {
52
+ type: string;
53
+ payload?: Record<string, unknown>;
54
+ nodeId?: string;
55
+ sequence: number;
56
+ }
57
+
58
+ async function readDiscovery(): Promise<DiscoveryDoc | null> {
59
+ try {
60
+ const res = await driver.get('/.well-known/openwop');
61
+ if (res.status !== 200) return null;
62
+ return res.json as DiscoveryDoc;
63
+ } catch {
64
+ return null;
65
+ }
66
+ }
67
+
68
+ async function programMock(program: Array<Record<string, unknown>>): Promise<{ status: number }> {
69
+ const res = await driver.post('/v1/host/sample/test/mock-ai/program', { nodeId: NODE_ID, program });
70
+ return { status: res.status };
71
+ }
72
+
73
+ async function startRunAndRead(workflowId: string): Promise<{ events: RunEvent[]; terminal: unknown } | null> {
74
+ const create = await driver.post('/v1/runs', { workflowId });
75
+ if (create.status !== 201) return null;
76
+ const runId = (create.json as { runId: string }).runId;
77
+ const terminal = await pollUntilTerminal(runId, { timeoutMs: 10_000 });
78
+ const eventsRes = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
79
+ if (eventsRes.status !== 200) return null;
80
+ const events = ((eventsRes.json as { events?: RunEvent[] } | undefined)?.events ?? []) as RunEvent[];
81
+ return { events, terminal };
82
+ }
83
+
84
+ async function lastBudget(): Promise<number | null> {
85
+ const res = await driver.get(`/v1/host/sample/test/mock-ai/last-dispatch-budget?nodeId=${encodeURIComponent(NODE_ID)}`);
86
+ if (res.status !== 200) return null;
87
+ return (res.json as { maxTokens?: number | null }).maxTokens ?? null;
88
+ }
89
+
90
+ describe.skipIf(HTTP_SKIP)('envelope-completion-distinguishes-truncation: advertisement shape (RFC 0033 §E)', () => {
91
+ it('capabilities.envelopes.reliability.completion (when present) conforms to RFC 0033 §E', async () => {
92
+ const d = await readDiscovery();
93
+ if (d === null) return;
94
+ const completion = d.capabilities?.envelopes?.reliability?.completion;
95
+ if (completion === undefined) return;
96
+ expect(
97
+ typeof completion.distinguishesTruncation,
98
+ driver.describe('RFCS/0033-envelope-completion-contract.md §E', 'completion.distinguishesTruncation MUST be boolean when block is advertised'),
99
+ ).toBe('boolean');
100
+ if (completion.truncationBudgetMultiplier !== undefined) {
101
+ const n = completion.truncationBudgetMultiplier as number;
102
+ expect(
103
+ typeof n === 'number' && n >= 1 && n <= 8,
104
+ driver.describe('RFCS/0033-envelope-completion-contract.md §E', 'truncationBudgetMultiplier MUST be a number in [1, 8] (default 2)'),
105
+ ).toBe(true);
106
+ }
107
+ });
108
+ });
109
+
110
+ const TRUNCATED_FIXTURE = 'conformance-envelope-truncated';
111
+ const SCHEMA_VIOLATION_FIXTURE = 'conformance-envelope-retry-attempted';
112
+
113
+ describe.skipIf(HTTP_SKIP)('envelope-completion-distinguishes-truncation: truncation path (RFC 0033 §B)', () => {
114
+ it('truncation: emits envelope.truncated + envelope.retry.attempted with reason: "truncation"', async () => {
115
+ if (!isFixtureAdvertised(TRUNCATED_FIXTURE)) return;
116
+ const d = await readDiscovery();
117
+ if (d?.capabilities?.envelopes?.reliability?.completion?.distinguishesTruncation !== true) return;
118
+ const seed = await programMock([
119
+ { stopReason: 'max_tokens', content: '{"partial' },
120
+ { stopReason: 'end_turn', content: '{"valid":true}' },
121
+ ]);
122
+ if (seed.status === 404) return;
123
+
124
+ const result = await startRunAndRead(TRUNCATED_FIXTURE);
125
+ if (result === null) return;
126
+ const truncated = result.events.find((e) => e.type === 'envelope.truncated');
127
+ expect(truncated, 'envelope.truncated MUST fire on the truncation path').toBeDefined();
128
+ const retry = result.events.find((e) => e.type === 'envelope.retry.attempted');
129
+ expect(retry, 'envelope.retry.attempted MUST fire between attempts').toBeDefined();
130
+ expect(
131
+ retry!.payload?.reason,
132
+ driver.describe(
133
+ 'RFCS/0033-envelope-completion-contract.md §B',
134
+ 'truncation-routed retry MUST carry reason: "truncation" (distinct from schema-violation per RFC 0033 §A precedence rule)',
135
+ ),
136
+ ).toBe('truncation');
137
+ });
138
+
139
+ it('truncation: retry budget strictly greater than initial (RFC 0033 §B truncationBudgetMultiplier)', async () => {
140
+ if (!isFixtureAdvertised(TRUNCATED_FIXTURE)) return;
141
+ const d = await readDiscovery();
142
+ if (d?.capabilities?.envelopes?.reliability?.completion?.distinguishesTruncation !== true) return;
143
+ const seed = await programMock([
144
+ { stopReason: 'max_tokens', content: '{"partial' },
145
+ { stopReason: 'end_turn', content: '{"valid":true}' },
146
+ ]);
147
+ if (seed.status === 404) return;
148
+
149
+ await startRunAndRead(TRUNCATED_FIXTURE);
150
+ const budget = await lastBudget();
151
+ if (budget === null) return;
152
+ expect(
153
+ budget,
154
+ driver.describe(
155
+ 'RFCS/0033-envelope-completion-contract.md §B',
156
+ 'truncation retry MUST multiply maxTokens by truncationBudgetMultiplier — final budget > initial 50 fixture value',
157
+ ),
158
+ ).toBeGreaterThan(50);
159
+ });
160
+ });
161
+
162
+ describe.skipIf(HTTP_SKIP)('envelope-completion-distinguishes-truncation: schema-violation path (RFC 0033 §C)', () => {
163
+ it('schema-violation: NO envelope.truncated; envelope.retry.attempted reason ∈ {schema-violation, parse-error}', async () => {
164
+ if (!isFixtureAdvertised(SCHEMA_VIOLATION_FIXTURE)) return;
165
+ const seed = await programMock([
166
+ { content: 'not valid json' },
167
+ { content: '{"valid":true}' },
168
+ ]);
169
+ if (seed.status === 404) return;
170
+
171
+ const result = await startRunAndRead(SCHEMA_VIOLATION_FIXTURE);
172
+ if (result === null) return;
173
+ const truncated = result.events.find((e) => e.type === 'envelope.truncated');
174
+ expect(
175
+ truncated,
176
+ driver.describe(
177
+ 'RFCS/0033-envelope-completion-contract.md §C',
178
+ 'schema-violation path MUST NOT emit envelope.truncated (truncation and schema-violation are distinct paths per RFC 0033 §A)',
179
+ ),
180
+ ).toBeUndefined();
181
+ const retry = result.events.find((e) => e.type === 'envelope.retry.attempted');
182
+ expect(retry).toBeDefined();
183
+ const reason = retry!.payload?.reason as string | undefined;
184
+ expect(
185
+ reason === 'schema-violation' || reason === 'parse-error',
186
+ driver.describe(
187
+ 'RFCS/0033-envelope-completion-contract.md §C',
188
+ 'schema-violation-routed retry MUST carry reason ∈ {schema-violation, parse-error}; truncation reason is reserved for the budget-doubling path',
189
+ ),
190
+ ).toBe(true);
191
+ });
192
+
193
+ it('schema-violation: retry budget UNCHANGED from initial (no budget multiplication on this path)', async () => {
194
+ if (!isFixtureAdvertised(SCHEMA_VIOLATION_FIXTURE)) return;
195
+ const seed = await programMock([
196
+ { content: 'not valid json' },
197
+ { content: '{"valid":true}' },
198
+ ]);
199
+ if (seed.status === 404) return;
200
+
201
+ await startRunAndRead(SCHEMA_VIOLATION_FIXTURE);
202
+ const budget = await lastBudget();
203
+ if (budget === null) return;
204
+ // The schema-violation fixture doesn't set maxTokens explicitly →
205
+ // budget snapshots whatever the host's default is on each call.
206
+ // The KEY invariant: the retry call's budget MUST NOT be multiplied
207
+ // (the truncation path doubles; this path keeps the same). The
208
+ // budget on the last call equals the budget on the first call.
209
+ // Without a per-call history hook, we can't strictly compare; we
210
+ // assert the budget didn't grow into the truncation-path range
211
+ // (which would be ≥2× the default — typically 8000 for the
212
+ // sample's structuredOutput dispatch path).
213
+ if (budget !== null) {
214
+ expect(
215
+ budget,
216
+ driver.describe(
217
+ 'RFCS/0033-envelope-completion-contract.md §C',
218
+ 'schema-violation retry MUST NOT multiply maxTokens — budget stays at the original value (host default)',
219
+ ),
220
+ ).toBeLessThan(20_000);
221
+ }
222
+ });
223
+ });