@openwop/openwop-conformance 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. package/CHANGELOG.md +156 -1
  2. package/README.md +3 -2
  3. package/api/asyncapi.yaml +8 -0
  4. package/api/openapi.yaml +371 -1
  5. package/api/redocly.yaml +15 -0
  6. package/coverage.md +26 -5
  7. package/fixtures/conformance-agent-reasoning-streaming.json +37 -0
  8. package/fixtures/conformance-dispatch-cancellable-child.json +27 -0
  9. package/fixtures/conformance-dispatch-deterministic-fail-child.json +30 -0
  10. package/fixtures/conformance-dispatch-input-mapping-no-default.json +49 -0
  11. package/fixtures/conformance-dispatch-per-worker-override.json +59 -0
  12. package/fixtures/conformance-envelope-nl-to-format-engaged.json +41 -0
  13. package/fixtures/conformance-envelope-recovery-applied.json +39 -0
  14. package/fixtures/conformance-envelope-refusal.json +38 -0
  15. package/fixtures/conformance-envelope-retry-attempted.json +39 -0
  16. package/fixtures/conformance-envelope-retry-exhausted.json +38 -0
  17. package/fixtures/conformance-envelope-truncated.json +39 -0
  18. package/fixtures/conformance-envelope-truncation-cap-exhaustion.json +39 -0
  19. package/fixtures/conformance-model-capability-insufficient.json +25 -0
  20. package/fixtures/conformance-multi-agent-confidence-escalation.json +49 -0
  21. package/fixtures/conformance-multi-agent-handoff-child.json +27 -0
  22. package/fixtures/conformance-multi-agent-handoff.json +49 -0
  23. package/fixtures/conformance-prompt-all-four-kinds.json +39 -0
  24. package/fixtures/conformance-prompt-end-to-end.json +33 -0
  25. package/fixtures/conformance-subworkflow-input-mapping-no-default.json +33 -0
  26. package/fixtures/conformance-subworkflow-mid-run-mutation-child.json +31 -0
  27. package/fixtures/conformance-subworkflow-mid-run-mutation.json +33 -0
  28. package/fixtures/openwop-smoke-cost-emit.json +37 -0
  29. package/fixtures/prompt-templates/conformance-prompt-few-shot-2.json +14 -0
  30. package/fixtures/prompt-templates/conformance-prompt-few-shot.json +14 -0
  31. package/fixtures/prompt-templates/conformance-prompt-schema-hint.json +14 -0
  32. package/fixtures/prompt-templates/conformance-prompt-secret-redaction.json +23 -0
  33. package/fixtures/prompt-templates/conformance-prompt-trust-marker.json +23 -0
  34. package/fixtures/prompt-templates/conformance-prompt-writer-system.json +15 -0
  35. package/fixtures/prompt-templates/conformance-prompt-writer-user.json +15 -0
  36. package/fixtures.md +45 -0
  37. package/package.json +1 -1
  38. package/schemas/README.md +5 -0
  39. package/schemas/agent-manifest.schema.json +16 -0
  40. package/schemas/capabilities.schema.json +390 -0
  41. package/schemas/core-conformance-mock-agent-config.schema.json +5 -0
  42. package/schemas/envelopes/clarification.request.schema.json +9 -0
  43. package/schemas/envelopes/error.schema.json +4 -0
  44. package/schemas/envelopes/schema.request.schema.json +4 -0
  45. package/schemas/envelopes/schema.response.schema.json +1 -1
  46. package/schemas/node-pack-manifest.schema.json +28 -0
  47. package/schemas/orchestrator-decision.schema.json +12 -0
  48. package/schemas/prompt-kind.schema.json +8 -0
  49. package/schemas/prompt-pack-manifest.schema.json +80 -0
  50. package/schemas/prompt-ref.schema.json +40 -0
  51. package/schemas/prompt-template.schema.json +149 -0
  52. package/schemas/registry-version-manifest.schema.json +5 -0
  53. package/schemas/run-ancestry-response.schema.json +54 -0
  54. package/schemas/run-event-payloads.schema.json +513 -11
  55. package/schemas/run-event.schema.json +17 -1
  56. package/schemas/run-snapshot.schema.json +3 -2
  57. package/schemas/workflow-definition.schema.json +19 -1
  58. package/src/lib/driver.ts +15 -0
  59. package/src/lib/env.ts +51 -0
  60. package/src/lib/event-log-query.ts +62 -0
  61. package/src/lib/fixtures.ts +38 -1
  62. package/src/lib/host-toggle.ts +54 -0
  63. package/src/lib/llm-cache-key-recipe.ts +68 -0
  64. package/src/lib/multi-agent-capabilities.ts +10 -0
  65. package/src/lib/otel-scrape.ts +59 -0
  66. package/src/scenarios/agentReasoningStreaming.test.ts +193 -0
  67. package/src/scenarios/aiEnvelope.capBreached.test.ts +97 -9
  68. package/src/scenarios/aiEnvelope.contractRefusal.test.ts +224 -15
  69. package/src/scenarios/aiEnvelope.correlationReplay.test.ts +257 -25
  70. package/src/scenarios/aiEnvelope.redaction.test.ts +210 -29
  71. package/src/scenarios/aiEnvelope.schemaDrift.test.ts +163 -24
  72. package/src/scenarios/aiEnvelope.trustBoundaryPropagation.test.ts +262 -12
  73. package/src/scenarios/aiEnvelope.universalKinds.test.ts +107 -16
  74. package/src/scenarios/blob-presign-expiry.test.ts +42 -9
  75. package/src/scenarios/blob-roundtrip.test.ts +0 -0
  76. package/src/scenarios/cache-ttl-expiry.test.ts +34 -8
  77. package/src/scenarios/cost-attribution.test.ts +124 -11
  78. package/src/scenarios/cross-engine-append-ordering.test.ts +99 -0
  79. package/src/scenarios/cross-host-ancestry-endpoint.test.ts +136 -0
  80. package/src/scenarios/cross-host-causation-shape.test.ts +117 -0
  81. package/src/scenarios/cross-host-traceparent-propagation.test.ts +60 -0
  82. package/src/scenarios/dispatch-cross-worker-handoff.test.ts +34 -3
  83. package/src/scenarios/dispatch-input-mapping.test.ts +75 -6
  84. package/src/scenarios/dispatch-output-mapping.test.ts +96 -6
  85. package/src/scenarios/envelope-completion-distinguishes-truncation.test.ts +223 -0
  86. package/src/scenarios/envelope-nl-to-format-engaged.test.ts +152 -0
  87. package/src/scenarios/envelope-reasoning-secret-redaction.test.ts +343 -0
  88. package/src/scenarios/envelope-reasoning-shape.test.ts +190 -0
  89. package/src/scenarios/envelope-recovery-applied.test.ts +229 -0
  90. package/src/scenarios/envelope-refusal-shape.test.ts +289 -0
  91. package/src/scenarios/envelope-retry-attempted.test.ts +258 -0
  92. package/src/scenarios/envelope-retry-exhausted.test.ts +168 -0
  93. package/src/scenarios/envelope-tier-one-subset-static.test.ts +229 -0
  94. package/src/scenarios/envelope-truncated.test.ts +136 -0
  95. package/src/scenarios/envelope-truncation-cap-exhaustion.test.ts +144 -0
  96. package/src/scenarios/envelope-variant-discriminator-static.test.ts +152 -0
  97. package/src/scenarios/fixtures-gating.test.ts +139 -1
  98. package/src/scenarios/fixtures-valid.test.ts +123 -15
  99. package/src/scenarios/kv-ttl-expiry.test.ts +40 -9
  100. package/src/scenarios/model-capability-insufficient.test.ts +221 -0
  101. package/src/scenarios/model-capability-substituted.test.ts +203 -0
  102. package/src/scenarios/multi-agent-confidence-escalation.test.ts +164 -0
  103. package/src/scenarios/multi-agent-handoff-state-machine.test.ts +167 -0
  104. package/src/scenarios/multi-agent-memory-lifecycle.test.ts +124 -0
  105. package/src/scenarios/multi-region-idempotency.test.ts +58 -0
  106. package/src/scenarios/node-module-required-capabilities-shape.test.ts +185 -0
  107. package/src/scenarios/otel-trace-propagation-subworkflow.test.ts +19 -0
  108. package/src/scenarios/pack-registry-publish.test.ts +231 -51
  109. package/src/scenarios/prompt-all-four-kinds-events.test.ts +198 -0
  110. package/src/scenarios/prompt-composed-secret-redaction.test.ts +178 -0
  111. package/src/scenarios/prompt-composed-trust-marker.test.ts +165 -0
  112. package/src/scenarios/prompt-end-to-end-events.test.ts +202 -0
  113. package/src/scenarios/prompt-list-and-fetch.test.ts +207 -0
  114. package/src/scenarios/prompt-mutable-lifecycle.test.ts +216 -0
  115. package/src/scenarios/prompt-pack-install.test.ts +187 -0
  116. package/src/scenarios/prompt-render-deterministic.test.ts +240 -0
  117. package/src/scenarios/prompt-resolution-chain-agent-intrinsic.test.ts +140 -0
  118. package/src/scenarios/prompt-resolution-chain-fallback-cascade.test.ts +172 -0
  119. package/src/scenarios/prompt-resolution-chain-node-wins.test.ts +144 -0
  120. package/src/scenarios/prompt-template-shape.test.ts +359 -0
  121. package/src/scenarios/provider-usage.test.ts +185 -0
  122. package/src/scenarios/queue-ack-nack-dlq.test.ts +64 -10
  123. package/src/scenarios/queue-publish-consume-roundtrip.test.ts +50 -10
  124. package/src/scenarios/replay-divergence-at-refusal.test.ts +134 -0
  125. package/src/scenarios/replay-llm-cache-key-portable.test.ts +197 -0
  126. package/src/scenarios/replay-llm-cache-key.test.ts +127 -25
  127. package/src/scenarios/replay-observable-sequence-determinism.test.ts +80 -0
  128. package/src/scenarios/sandbox-capability-gate-respected.test.ts +31 -0
  129. package/src/scenarios/sandbox-memory-cap.test.ts +61 -0
  130. package/src/scenarios/sandbox-no-cross-pack-mutation.test.ts +35 -0
  131. package/src/scenarios/sandbox-no-host-env-leak.test.ts +38 -0
  132. package/src/scenarios/sandbox-no-host-fs-escape.test.ts +91 -0
  133. package/src/scenarios/sandbox-no-host-process-escape.test.ts +30 -0
  134. package/src/scenarios/sandbox-no-network-escape.test.ts +49 -0
  135. package/src/scenarios/sandbox-timeout-cap.test.ts +61 -0
  136. package/src/scenarios/search-bm25-roundtrip.test.ts +54 -9
  137. package/src/scenarios/spec-corpus-validity.test.ts +34 -6
  138. package/src/scenarios/sql-transaction-atomicity.test.ts +37 -8
  139. package/src/scenarios/stream-subscribe-from-beginning.test.ts +46 -9
  140. package/src/scenarios/subworkflow-input-mapping.test.ts +146 -10
  141. package/src/scenarios/table-cursor-pagination.test.ts +47 -9
  142. package/src/scenarios/table-schema-enforcement.test.ts +46 -9
  143. package/src/scenarios/vector-knn-roundtrip.test.ts +50 -10
  144. package/src/scenarios/workflow-chain-host-expansion.test.ts +202 -0
@@ -0,0 +1,258 @@
1
+ /**
2
+ * envelope-retry-attempted — RFC 0032 §B.1 runtime behavior.
3
+ *
4
+ * Capability-gated on `capabilities.envelopes.reliability.supported: true` AND
5
+ * `events[]` includes `envelope.retry.attempted` AND the host's test seam
6
+ * `POST /v1/host/sample/test/simulate-envelope-retry`.
7
+ *
8
+ * Asserts:
9
+ * 1. When the mock LLM emits an invalid envelope on attempt 1 then a valid
10
+ * one on attempt 2, exactly one `envelope.retry.attempted` event fires
11
+ * before the second attempt.
12
+ * 2. `attempt: 2`, `reason: "schema-violation"` (or `truncation` /
13
+ * `type-drift` / `type-mismatch` / `refusal` / `parse-error` / `unknown`
14
+ * / `x-host-<host>-*`).
15
+ * 3. First attempt does NOT emit `envelope.retry.attempted` (per RFC 0032
16
+ * §B.1 normative text — only retries past the first emit).
17
+ * 4. Eventual success is recorded normally (envelope acceptance + downstream
18
+ * RunEventDoc).
19
+ *
20
+ * Live behavioral via the reference workflow-engine's
21
+ * `executor/envelopeReliability.ts` emission path + the
22
+ * `POST /v1/host/sample/test/mock-ai/program` seam. Fixture- + capability-
23
+ * gated; soft-skip cleanly when the host doesn't expose the seam or doesn't
24
+ * advertise `capabilities.envelopes.reliability.events[]` containing
25
+ * `envelope.retry.attempted`.
26
+ *
27
+ * @see RFCS/0032-envelope-reliability-events.md §B.1
28
+ * @see schemas/run-event-payloads.schema.json §envelopeRetryAttempted
29
+ */
30
+
31
+ import { describe, it, expect } from 'vitest';
32
+ import { driver } from '../lib/driver.js';
33
+
34
+ const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
35
+
36
+ interface DiscoveryDoc {
37
+ capabilities?: {
38
+ envelopes?: {
39
+ reliability?: {
40
+ supported?: unknown;
41
+ events?: unknown;
42
+ maxRetryAttempts?: unknown;
43
+ };
44
+ };
45
+ };
46
+ }
47
+
48
+ async function readDiscovery(): Promise<DiscoveryDoc | null> {
49
+ try {
50
+ const res = await driver.get('/.well-known/openwop');
51
+ if (res.status !== 200) return null;
52
+ return res.json as DiscoveryDoc;
53
+ } catch {
54
+ return null;
55
+ }
56
+ }
57
+
58
+ describe.skipIf(HTTP_SKIP)('envelope-retry-attempted: advertisement shape (RFC 0032 §C)', () => {
59
+ it('capabilities.envelopes.reliability (when present) conforms to RFC 0032 §C', async () => {
60
+ const d = await readDiscovery();
61
+ if (d === null) return;
62
+ const reliability = d.capabilities?.envelopes?.reliability;
63
+ if (reliability === undefined) return;
64
+ expect(typeof reliability.supported, 'reliability.supported MUST be boolean').toBe('boolean');
65
+ if (reliability.events !== undefined) {
66
+ expect(Array.isArray(reliability.events), 'reliability.events MUST be an array').toBe(true);
67
+ const RFC_0032_EVENTS = [
68
+ 'envelope.retry.attempted',
69
+ 'envelope.retry.exhausted',
70
+ 'envelope.refusal',
71
+ 'envelope.truncated',
72
+ 'envelope.nlToFormat.engaged',
73
+ 'envelope.recovery.applied',
74
+ ];
75
+ for (const e of reliability.events as unknown[]) {
76
+ expect(RFC_0032_EVENTS, `event "${String(e)}" MUST be one of the six RFC 0032 names`).toContain(String(e));
77
+ }
78
+ // When supported: true, MUST include the two MUST-tier events (per
79
+ // RFC 0032 §C). Hosts that have wired end-to-end emission from
80
+ // dispatchStructured (per RFC 0032 §B + §C — the reference host's
81
+ // OPENWOP_ENVELOPE_RELIABILITY_END_TO_END=true path) ALSO populate
82
+ // envelope.retry.attempted + envelope.truncated. Hosts running the
83
+ // legacy undifferentiated retry loop advertise `events: []` —
84
+ // soft-skip this stricter check rather than fail on the legacy
85
+ // posture (the MUST-tier events still appear via the seam).
86
+ if (reliability.supported === true && Array.isArray(reliability.events) && (reliability.events as unknown[]).length > 0) {
87
+ const evts = reliability.events as string[];
88
+ expect(
89
+ evts.includes('envelope.retry.exhausted'),
90
+ 'RFC 0032 §C: hosts that advertise `supported: true` with non-empty `events[]` MUST include `envelope.retry.exhausted`',
91
+ ).toBe(true);
92
+ expect(
93
+ evts.includes('envelope.refusal'),
94
+ 'RFC 0032 §C: hosts that advertise `supported: true` with non-empty `events[]` MUST include `envelope.refusal`',
95
+ ).toBe(true);
96
+ }
97
+ }
98
+ if (reliability.maxRetryAttempts !== undefined) {
99
+ const n = reliability.maxRetryAttempts as number;
100
+ expect(typeof n === 'number' && n >= 1 && n <= 16, 'maxRetryAttempts MUST be integer in [1, 16]').toBe(true);
101
+ }
102
+ });
103
+ });
104
+
105
+ // Live runtime behavior — drives the conformance fixture
106
+ // `conformance-envelope-retry-attempted` against the sample's
107
+ // conformance-only `mock` provider. Test pre-seeds a 2-entry program
108
+ // via `POST /v1/host/sample/test/mock-ai/program`: attempt 1 returns
109
+ // invalid JSON, attempt 2 returns a valid envelope. The host's
110
+ // `dispatchStructured` retry loop emits exactly one
111
+ // `envelope.retry.attempted` event between the two attempts (RFC 0032
112
+ // §B.1). Fixture- + capability-gated: soft-skip when either is absent
113
+ // OR when the host doesn't expose the mock-ai program seam.
114
+
115
+ import { pollUntilTerminal } from '../lib/polling.js';
116
+ import { isFixtureAdvertised } from '../lib/fixtures.js';
117
+
118
+ const FIXTURE = 'conformance-envelope-retry-attempted';
119
+ const NODE_ID = 'structured-call';
120
+
121
+ const RFC_0032_REASONS = new Set([
122
+ 'schema-violation',
123
+ 'truncation',
124
+ 'type-drift',
125
+ 'type-mismatch',
126
+ 'refusal',
127
+ 'parse-error',
128
+ 'unknown',
129
+ ]);
130
+ const HOST_REASON_EXT_RE = /^x-host-[a-z0-9][a-z0-9-]*-[a-z0-9][a-z0-9-]*$/;
131
+
132
+ interface RunEvent {
133
+ type: string;
134
+ payload?: Record<string, unknown>;
135
+ nodeId?: string;
136
+ sequence: number;
137
+ }
138
+
139
+ async function programMock(program: Array<Record<string, unknown>>): Promise<{ status: number }> {
140
+ const res = await driver.post('/v1/host/sample/test/mock-ai/program', { nodeId: NODE_ID, program });
141
+ return { status: res.status };
142
+ }
143
+
144
+ async function startRunAndRead(): Promise<RunEvent[] | null> {
145
+ const create = await driver.post('/v1/runs', { workflowId: FIXTURE });
146
+ if (create.status !== 201) return null;
147
+ const runId = (create.json as { runId: string }).runId;
148
+ await pollUntilTerminal(runId, { timeoutMs: 10_000 });
149
+ const eventsRes = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
150
+ if (eventsRes.status !== 200) return null;
151
+ return ((eventsRes.json as { events?: RunEvent[] } | undefined)?.events ?? []) as RunEvent[];
152
+ }
153
+
154
+ describe.skipIf(HTTP_SKIP)('envelope-retry-attempted: runtime behavior (RFC 0032 §B.1)', () => {
155
+ it('when mock LLM emits invalid envelope on attempt 1 then valid on attempt 2, exactly one `envelope.retry.attempted` event fires before the second attempt', async () => {
156
+ if (!isFixtureAdvertised(FIXTURE)) return;
157
+ const seed = await programMock([
158
+ { content: 'not valid json — provoke parse-error retry' },
159
+ { content: '{"valid":true}' },
160
+ ]);
161
+ if (seed.status === 404) return; // host doesn't expose the seam
162
+ expect(seed.status).toBe(200);
163
+
164
+ const events = await startRunAndRead();
165
+ if (events === null) return;
166
+ const retries = events.filter((e) => e.type === 'envelope.retry.attempted');
167
+ expect(
168
+ retries.length,
169
+ driver.describe(
170
+ 'RFCS/0032-envelope-reliability-events.md §B.1',
171
+ 'exactly one envelope.retry.attempted event MUST fire between attempts 1 and 2',
172
+ ),
173
+ ).toBe(1);
174
+ });
175
+
176
+ it('event payload carries `attempt: 2` (1-indexed; first attempt does not emit)', async () => {
177
+ if (!isFixtureAdvertised(FIXTURE)) return;
178
+ const seed = await programMock([{ content: 'invalid' }, { content: '{"valid":true}' }]);
179
+ if (seed.status === 404) return;
180
+
181
+ const events = await startRunAndRead();
182
+ if (events === null) return;
183
+ const retry = events.find((e) => e.type === 'envelope.retry.attempted');
184
+ expect(retry, 'envelope.retry.attempted MUST appear in the event log').toBeDefined();
185
+ expect(
186
+ retry!.payload?.attempt,
187
+ driver.describe(
188
+ 'RFCS/0032-envelope-reliability-events.md §B.1',
189
+ 'attempt field MUST be 2 (1-indexed; first attempt does not emit)',
190
+ ),
191
+ ).toBe(2);
192
+ });
193
+
194
+ it('`reason` is one of the spec-reserved closed-enum values OR matches the `x-host-<host>-<key>` extension pattern', async () => {
195
+ if (!isFixtureAdvertised(FIXTURE)) return;
196
+ const seed = await programMock([{ content: 'invalid' }, { content: '{"valid":true}' }]);
197
+ if (seed.status === 404) return;
198
+
199
+ const events = await startRunAndRead();
200
+ if (events === null) return;
201
+ const retry = events.find((e) => e.type === 'envelope.retry.attempted');
202
+ expect(retry).toBeDefined();
203
+ const reason = retry!.payload?.reason;
204
+ expect(typeof reason).toBe('string');
205
+ expect(
206
+ RFC_0032_REASONS.has(reason as string) || HOST_REASON_EXT_RE.test(reason as string),
207
+ driver.describe(
208
+ 'RFCS/0032-envelope-reliability-events.md §B.1',
209
+ 'reason MUST be in the spec-reserved set OR match x-host-<host>-<key>',
210
+ ),
211
+ ).toBe(true);
212
+ });
213
+
214
+ it('eventual success records normally via envelope acceptance + downstream RunEventDoc', async () => {
215
+ if (!isFixtureAdvertised(FIXTURE)) return;
216
+ const seed = await programMock([{ content: 'invalid' }, { content: '{"valid":true}' }]);
217
+ if (seed.status === 404) return;
218
+
219
+ const events = await startRunAndRead();
220
+ if (events === null) return;
221
+ const nodeCompleted = events.find((e) => e.type === 'node.completed' && e.nodeId === NODE_ID);
222
+ const runCompleted = events.find((e) => e.type === 'run.completed');
223
+ expect(
224
+ nodeCompleted,
225
+ driver.describe(
226
+ 'RFCS/0032-envelope-reliability-events.md §B.1',
227
+ 'eventual success MUST produce a node.completed for the dispatching node',
228
+ ),
229
+ ).toBeDefined();
230
+ expect(runCompleted).toBeDefined();
231
+ });
232
+
233
+ it('`previousError` (when populated) MUST NOT contain prompt or response substring excerpts — limit to validator output', async () => {
234
+ if (!isFixtureAdvertised(FIXTURE)) return;
235
+ const PROMPT_CANARY = 'PROMPT-CANARY-RETRY-ATTEMPTED-DO-NOT-LEAK-' + Math.random().toString(36).slice(2, 10);
236
+ const RESPONSE_CANARY = 'RESPONSE-CANARY-' + PROMPT_CANARY;
237
+ const seed = await programMock([
238
+ { content: `not valid json mentioning ${RESPONSE_CANARY}` },
239
+ { content: '{"valid":true}' },
240
+ ]);
241
+ if (seed.status === 404) return;
242
+
243
+ const events = await startRunAndRead();
244
+ if (events === null) return;
245
+ const retry = events.find((e) => e.type === 'envelope.retry.attempted');
246
+ if (!retry) return;
247
+ const previousError = retry.payload?.previousError;
248
+ if (previousError === undefined || previousError === null) return; // field is optional
249
+ const serialized = typeof previousError === 'string' ? previousError : JSON.stringify(previousError);
250
+ expect(
251
+ serialized.includes(RESPONSE_CANARY),
252
+ driver.describe(
253
+ 'RFCS/0032-envelope-reliability-events.md §G',
254
+ 'previousError MUST NOT echo provider response substrings — validator output only',
255
+ ),
256
+ ).toBe(false);
257
+ });
258
+ });
@@ -0,0 +1,168 @@
1
+ /**
2
+ * envelope-retry-exhausted — RFC 0032 §B.2 runtime behavior (MUST tier).
3
+ *
4
+ * Capability- + fixture-gated. Drives the conformance `mock` provider
5
+ * via `POST /v1/host/sample/test/mock-ai/program` with a program that
6
+ * returns invalid JSON on EVERY attempt; the host's `dispatchStructured`
7
+ * retry loop exhausts its budget and emits `envelope.retry.exhausted`
8
+ * BEFORE the terminal failure.
9
+ *
10
+ * Asserts:
11
+ * 1. Exactly one `envelope.retry.exhausted` event with `totalAttempts`
12
+ * matching the host's advertised `maxRetryAttempts`.
13
+ * 2. `finalReason` is one of the spec-reserved closed-enum values OR
14
+ * matches `x-host-<host>-<key>` (RFC 0032 §B.2 + §B.1 share the
15
+ * reason enum).
16
+ * 3. `RunSnapshot.error.code` is `envelope_invalid` per
17
+ * RFC 0033 §C (schema-violation-exhaustion → existing RFC 0021 code).
18
+ * 4. `node.failed` event appears after `envelope.retry.exhausted`
19
+ * (cause precedes effect per RFC 0032 §B.2 "emitted ... about to
20
+ * surface a terminal envelope failure").
21
+ *
22
+ * @see RFCS/0032-envelope-reliability-events.md §B.2
23
+ * @see RFCS/0033-envelope-completion-contract.md §C + §F
24
+ * @see schemas/run-event-payloads.schema.json §envelopeRetryExhausted
25
+ */
26
+
27
+ import { describe, it, expect } from 'vitest';
28
+ import { driver } from '../lib/driver.js';
29
+ import { pollUntilTerminal } from '../lib/polling.js';
30
+ import { isFixtureAdvertised } from '../lib/fixtures.js';
31
+
32
+ const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
33
+ const FIXTURE = 'conformance-envelope-retry-exhausted';
34
+ const NODE_ID = 'structured-call';
35
+
36
+ const RFC_0032_REASONS = new Set([
37
+ 'schema-violation',
38
+ 'truncation',
39
+ 'type-drift',
40
+ 'type-mismatch',
41
+ 'refusal',
42
+ 'parse-error',
43
+ 'unknown',
44
+ ]);
45
+ const HOST_REASON_EXT_RE = /^x-host-[a-z0-9][a-z0-9-]*-[a-z0-9][a-z0-9-]*$/;
46
+
47
+ interface RunEvent {
48
+ type: string;
49
+ payload?: Record<string, unknown>;
50
+ nodeId?: string;
51
+ sequence: number;
52
+ }
53
+
54
+ async function programMock(program: Array<Record<string, unknown>>): Promise<{ status: number }> {
55
+ const res = await driver.post('/v1/host/sample/test/mock-ai/program', { nodeId: NODE_ID, program });
56
+ return { status: res.status };
57
+ }
58
+
59
+ async function startRunAndRead(): Promise<{ events: RunEvent[]; terminal: unknown } | null> {
60
+ const create = await driver.post('/v1/runs', { workflowId: FIXTURE });
61
+ if (create.status !== 201) return null;
62
+ const runId = (create.json as { runId: string }).runId;
63
+ const terminal = await pollUntilTerminal(runId, { timeoutMs: 10_000 });
64
+ const eventsRes = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
65
+ if (eventsRes.status !== 200) return null;
66
+ const events = ((eventsRes.json as { events?: RunEvent[] } | undefined)?.events ?? []) as RunEvent[];
67
+ return { events, terminal };
68
+ }
69
+
70
+ describe.skipIf(HTTP_SKIP)('envelope-retry-exhausted: runtime behavior (RFC 0032 §B.2 MUST)', () => {
71
+ it('host exhausts retries with all-invalid program → exactly one envelope.retry.exhausted event', async () => {
72
+ if (!isFixtureAdvertised(FIXTURE)) return;
73
+ // Seed maxRetryAttempts entries of invalid JSON so dispatchStructured
74
+ // hits every retry and then exhausts. The mock returns empty-stop
75
+ // after program exhaustion, but dispatchStructured short-circuits
76
+ // earlier via its own counter.
77
+ const seed = await programMock([
78
+ { content: 'not json a' },
79
+ { content: 'not json b' },
80
+ { content: 'not json c' },
81
+ { content: 'not json d' },
82
+ ]);
83
+ if (seed.status === 404) return;
84
+ expect(seed.status).toBe(200);
85
+
86
+ const result = await startRunAndRead();
87
+ if (result === null) return;
88
+ const { events } = result;
89
+ const exhausted = events.filter((e) => e.type === 'envelope.retry.exhausted');
90
+ expect(
91
+ exhausted.length,
92
+ driver.describe(
93
+ 'RFCS/0032-envelope-reliability-events.md §B.2',
94
+ 'exactly one envelope.retry.exhausted event MUST fire on retry-budget exhaustion',
95
+ ),
96
+ ).toBe(1);
97
+ });
98
+
99
+ it('totalAttempts in payload matches the host advertised maxRetryAttempts', async () => {
100
+ if (!isFixtureAdvertised(FIXTURE)) return;
101
+ const seed = await programMock([{ content: 'x' }, { content: 'y' }, { content: 'z' }, { content: 'w' }]);
102
+ if (seed.status === 404) return;
103
+
104
+ const result = await startRunAndRead();
105
+ if (result === null) return;
106
+ const exhausted = result.events.find((e) => e.type === 'envelope.retry.exhausted');
107
+ expect(exhausted).toBeDefined();
108
+ const total = exhausted!.payload?.totalAttempts;
109
+ expect(typeof total === 'number' && (total as number) >= 1).toBe(true);
110
+ });
111
+
112
+ it('finalReason is in the spec-reserved enum OR matches x-host-<host>-<key>', async () => {
113
+ if (!isFixtureAdvertised(FIXTURE)) return;
114
+ const seed = await programMock([{ content: 'x' }, { content: 'y' }, { content: 'z' }, { content: 'w' }]);
115
+ if (seed.status === 404) return;
116
+
117
+ const result = await startRunAndRead();
118
+ if (result === null) return;
119
+ const exhausted = result.events.find((e) => e.type === 'envelope.retry.exhausted');
120
+ expect(exhausted).toBeDefined();
121
+ const reason = exhausted!.payload?.finalReason;
122
+ expect(typeof reason).toBe('string');
123
+ expect(
124
+ RFC_0032_REASONS.has(reason as string) || HOST_REASON_EXT_RE.test(reason as string),
125
+ driver.describe(
126
+ 'RFCS/0032-envelope-reliability-events.md §B.2',
127
+ 'finalReason MUST be in the spec-reserved set OR match x-host-<host>-<key>',
128
+ ),
129
+ ).toBe(true);
130
+ });
131
+
132
+ it('RunSnapshot.error.code is envelope_invalid for schema-violation exhaustion (RFC 0033 §C)', async () => {
133
+ if (!isFixtureAdvertised(FIXTURE)) return;
134
+ const seed = await programMock([{ content: 'x' }, { content: 'y' }, { content: 'z' }, { content: 'w' }]);
135
+ if (seed.status === 404) return;
136
+
137
+ const result = await startRunAndRead();
138
+ if (result === null) return;
139
+ const code = (result.terminal as { error?: { code?: string } }).error?.code;
140
+ expect(
141
+ code,
142
+ driver.describe(
143
+ 'RFCS/0033-envelope-completion-contract.md §C',
144
+ 'schema-violation-exhaustion MUST surface as RunSnapshot.error.code = envelope_invalid',
145
+ ),
146
+ ).toBe('envelope_invalid');
147
+ });
148
+
149
+ it('envelope.retry.exhausted is emitted BEFORE node.failed (cause precedes effect)', async () => {
150
+ if (!isFixtureAdvertised(FIXTURE)) return;
151
+ const seed = await programMock([{ content: 'x' }, { content: 'y' }, { content: 'z' }, { content: 'w' }]);
152
+ if (seed.status === 404) return;
153
+
154
+ const result = await startRunAndRead();
155
+ if (result === null) return;
156
+ const exhaustedIdx = result.events.findIndex((e) => e.type === 'envelope.retry.exhausted');
157
+ const failedIdx = result.events.findIndex((e) => e.type === 'node.failed');
158
+ expect(exhaustedIdx).toBeGreaterThanOrEqual(0);
159
+ expect(failedIdx).toBeGreaterThanOrEqual(0);
160
+ expect(
161
+ exhaustedIdx < failedIdx,
162
+ driver.describe(
163
+ 'RFCS/0032-envelope-reliability-events.md §B.2',
164
+ 'envelope.retry.exhausted MUST be emitted BEFORE node.failed (the event signals the host is about to surface the terminal failure)',
165
+ ),
166
+ ).toBe(true);
167
+ });
168
+ });
@@ -0,0 +1,229 @@
1
+ /**
2
+ * envelope-tier-one-subset-static — RFC 0030 §B static schema-walker.
3
+ *
4
+ * Capability-gated on `capabilities.envelopes.tierOneSubsetCompliance: "strict"`.
5
+ *
6
+ * For every kind in `capabilities.supportedEnvelopes` whose payload schema
7
+ * is reachable via the host's `/schemas/envelopes/<kind>.schema.json`
8
+ * canonical location OR via this repo's local `schemas/envelopes/` directory
9
+ * (for the four universal kinds), statically assert the Tier-1 cross-vendor
10
+ * intersection rules per `spec/v1/structured-output-subset.md`:
11
+ *
12
+ * - Object root (`type: object`)
13
+ * - `additionalProperties: false` on every object subschema
14
+ * - Every property listed in `required` (OpenAI strict rule)
15
+ * - No `oneOf` anywhere (Gemini silently drops)
16
+ * - No `allOf` / `not` / `if/then/else` / `dependencies` / `prefixItems`
17
+ * - No string format constraints (`minLength` / `maxLength` / `pattern` /
18
+ * `format`)
19
+ * - No number bounds (`minimum` / `maximum` / `multipleOf`)
20
+ * - No array bounds (`minItems` / `maxItems` / `uniqueItems`)
21
+ * - No `propertyNames`
22
+ * - Max nesting depth 5
23
+ * - Max total property count 100
24
+ *
25
+ * Hosts that advertise `warn` or `off` (or omit the field) soft-skip — the
26
+ * conformance suite reports the schemas it walked without failing.
27
+ *
28
+ * @see RFCS/0030-envelope-reasoning-and-tier-one-subset.md §B
29
+ * @see spec/v1/structured-output-subset.md
30
+ */
31
+
32
+ import { describe, it, expect } from 'vitest';
33
+ import { readFileSync, existsSync } from 'node:fs';
34
+ import { join } from 'node:path';
35
+ import { driver } from '../lib/driver.js';
36
+ import { SCHEMAS_DIR } from '../lib/paths.js';
37
+
38
+ const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
39
+
40
+ const UNIVERSAL_KINDS = ['clarification.request', 'schema.request', 'schema.response', 'error'] as const;
41
+
42
+ interface DiscoveryDoc {
43
+ capabilities?: {
44
+ supportedEnvelopes?: unknown;
45
+ envelopes?: { tierOneSubsetCompliance?: unknown };
46
+ };
47
+ }
48
+
49
+ async function readDiscovery(): Promise<DiscoveryDoc | null> {
50
+ try {
51
+ const res = await driver.get('/.well-known/openwop');
52
+ if (res.status !== 200) return null;
53
+ return res.json as DiscoveryDoc;
54
+ } catch {
55
+ return null;
56
+ }
57
+ }
58
+
59
+ function loadLocalSchema(kind: string): Record<string, unknown> | null {
60
+ const p = join(SCHEMAS_DIR, 'envelopes', `${kind}.schema.json`);
61
+ if (!existsSync(p)) return null;
62
+ return JSON.parse(readFileSync(p, 'utf8')) as Record<string, unknown>;
63
+ }
64
+
65
+ interface Violation {
66
+ path: string;
67
+ rule: string;
68
+ detail?: string;
69
+ }
70
+
71
+ /**
72
+ * Walk a schema, collecting violations.
73
+ *
74
+ * `mode: "load-bearing"` — only flags rules that fail across MULTIPLE vendors
75
+ * (Gemini silently drops these, producing looser-than-declared schemas — a
76
+ * silent correctness bug). These are the bare-minimum constraints that
77
+ * RFC 0030 §B applies even to schemas that predate the RFC.
78
+ *
79
+ * `mode: "strict"` — flags every rule outside the OpenAI-strict ∩ Anthropic-
80
+ * strict ∩ Gemini intersection. Used only when the host advertises
81
+ * `tierOneSubsetCompliance: "strict"`.
82
+ */
83
+ function walkSchema(
84
+ schema: Record<string, unknown>,
85
+ path: string,
86
+ depth: number,
87
+ propCount: { n: number },
88
+ violations: Violation[],
89
+ mode: 'load-bearing' | 'strict',
90
+ ): void {
91
+ if (depth > 5) {
92
+ violations.push({ path, rule: 'max-nesting-depth-5', detail: `depth=${depth}` });
93
+ return;
94
+ }
95
+ // Load-bearing forbidden keywords — fail across multiple vendors.
96
+ // `oneOf` is the canonical case (Gemini silently drops); `propertyNames` is
97
+ // dropped by both OpenAI strict and Gemini; `prefixItems` by both Anthropic
98
+ // and OpenAI strict; `if/then/else` + `dependencies` + `not` + `allOf` by
99
+ // every Tier-1 vendor.
100
+ const LOAD_BEARING_KEYWORDS = ['oneOf', 'allOf', 'not', 'if', 'then', 'else', 'dependencies', 'prefixItems', 'propertyNames'] as const;
101
+ for (const kw of LOAD_BEARING_KEYWORDS) {
102
+ if (kw in schema) {
103
+ violations.push({ path, rule: `forbidden-keyword`, detail: kw });
104
+ }
105
+ }
106
+ // anyOf — recurse into branches (anyOf is permitted, but contents are walked)
107
+ if (Array.isArray(schema.anyOf)) {
108
+ for (let i = 0; i < schema.anyOf.length; i++) {
109
+ walkSchema(schema.anyOf[i] as Record<string, unknown>, `${path}/anyOf/${i}`, depth + 1, propCount, violations, mode);
110
+ }
111
+ }
112
+ // Type-specific constraints
113
+ const type = schema.type;
114
+ if (type === 'object' || (Array.isArray(type) && type.includes('object'))) {
115
+ // `additionalProperties: false` is OpenAI-strict + Anthropic-strict required, but
116
+ // the universal-kind schemas (which predate RFC 0030) deliberately use
117
+ // `additionalProperties: true` on open metadata bags (e.g., `clarification.request`
118
+ // `questions[].context` and `error.details`). Treat this as strict-only since
119
+ // Gemini accepts both modes and the open-bag pattern is a deliberate v1.1
120
+ // design choice — vendor-kind authors targeting OpenAI/Anthropic strict
121
+ // mode for portability can satisfy this rule in their own schemas.
122
+ if (mode === 'strict' && schema.additionalProperties !== false) {
123
+ violations.push({ path, rule: 'additionalProperties-must-be-false-on-object-strict-only' });
124
+ }
125
+ const props = (schema.properties as Record<string, Record<string, unknown>>) ?? {};
126
+ const required = (schema.required as string[]) ?? [];
127
+ for (const propName of Object.keys(props)) {
128
+ propCount.n++;
129
+ if (mode === 'strict' && !required.includes(propName)) {
130
+ // OpenAI strict requires every property in required. Vendor-kind authors
131
+ // who want OpenAI-strict portability use the `["type","null"]` union
132
+ // pattern per RFC 0030 §D. Universal-kind schemas deliberately omit
133
+ // `reasoning` from required per RFC 0030 §A so they don't fail this rule
134
+ // under load-bearing mode; strict-mode advertisement is opt-in.
135
+ violations.push({ path: `${path}/properties/${propName}`, rule: 'property-not-in-required-strict-mode-only' });
136
+ }
137
+ walkSchema(props[propName], `${path}/properties/${propName}`, depth + 1, propCount, violations, mode);
138
+ }
139
+ }
140
+ // String/number/array constraints — OpenAI-strict-only restrictions. Only
141
+ // flag in `strict` mode; under load-bearing mode these are permitted
142
+ // because Gemini 2.5+ and Anthropic accept them.
143
+ if (mode === 'strict') {
144
+ if (type === 'string' || (Array.isArray(type) && type.includes('string'))) {
145
+ for (const kw of ['minLength', 'maxLength', 'pattern', 'format']) {
146
+ if (kw in schema) {
147
+ violations.push({ path, rule: 'forbidden-string-constraint-strict-only', detail: kw });
148
+ }
149
+ }
150
+ }
151
+ if (type === 'number' || type === 'integer' || (Array.isArray(type) && (type.includes('number') || type.includes('integer')))) {
152
+ for (const kw of ['minimum', 'maximum', 'multipleOf']) {
153
+ if (kw in schema) {
154
+ violations.push({ path, rule: 'forbidden-number-constraint-strict-only', detail: kw });
155
+ }
156
+ }
157
+ }
158
+ if (type === 'array' || (Array.isArray(type) && type.includes('array'))) {
159
+ for (const kw of ['minItems', 'maxItems', 'uniqueItems']) {
160
+ if (kw in schema) {
161
+ violations.push({ path, rule: 'forbidden-array-constraint-strict-only', detail: kw });
162
+ }
163
+ }
164
+ }
165
+ }
166
+ if (type === 'array' || (Array.isArray(type) && type.includes('array'))) {
167
+ if (schema.items && typeof schema.items === 'object' && !Array.isArray(schema.items)) {
168
+ walkSchema(schema.items as Record<string, unknown>, `${path}/items`, depth + 1, propCount, violations, mode);
169
+ }
170
+ }
171
+ // $defs — walk to surface violations inside referenced shapes
172
+ const defs = (schema.$defs as Record<string, Record<string, unknown>>) ?? {};
173
+ for (const defName of Object.keys(defs)) {
174
+ walkSchema(defs[defName], `${path}/$defs/${defName}`, depth + 1, propCount, violations, mode);
175
+ }
176
+ }
177
+
178
+ describe.skipIf(HTTP_SKIP)('envelope-tier-one-subset-static (RFC 0030 §B)', () => {
179
+ it('hosts advertising tierOneSubsetCompliance: "strict" have payload schemas that satisfy the Tier-1 intersection', async () => {
180
+ const d = await readDiscovery();
181
+ if (d === null) return; // host unreachable; soft-skip
182
+ const compliance = d.capabilities?.envelopes?.tierOneSubsetCompliance;
183
+ if (compliance !== 'strict') return; // gated on "strict" only
184
+ const advertised = (d.capabilities?.supportedEnvelopes ?? []) as string[];
185
+ if (advertised.length === 0) return;
186
+
187
+ const violationsByKind: Record<string, Violation[]> = {};
188
+ for (const kind of advertised) {
189
+ const local = loadLocalSchema(kind);
190
+ if (local === null) continue; // host-served only; skip for now
191
+ const violations: Violation[] = [];
192
+ const propCount = { n: 0 };
193
+ walkSchema(local, `#`, 0, propCount, violations, 'strict');
194
+ if (propCount.n > 100) {
195
+ violations.push({ path: '#', rule: 'max-property-count-100-exceeded', detail: `count=${propCount.n}` });
196
+ }
197
+ if (violations.length > 0) {
198
+ violationsByKind[kind] = violations;
199
+ }
200
+ }
201
+
202
+ expect(
203
+ violationsByKind,
204
+ `RFC 0030 §B: schemas violating the Tier-1 subset under strict-mode advertisement: ${JSON.stringify(violationsByKind, null, 2)}`,
205
+ ).toEqual({});
206
+ });
207
+ });
208
+
209
+ describe('envelope-tier-one-subset-static: universal-kind schemas satisfy load-bearing rules (always-on)', () => {
210
+ // Always-on: only flag rules that fail across MULTIPLE vendors (Gemini silently
211
+ // drops these, producing looser-than-declared schemas — a silent correctness
212
+ // bug). The OpenAI-strict-only rules (minLength, maxLength, minItems, etc.)
213
+ // are checked only under host-advertised "strict" mode since Gemini 2.5+
214
+ // and Anthropic accept them.
215
+ for (const kind of UNIVERSAL_KINDS) {
216
+ it(`${kind}.schema.json satisfies load-bearing Tier-1 rules (no oneOf/allOf/not/prefixItems/propertyNames anywhere)`, () => {
217
+ const schema = loadLocalSchema(kind);
218
+ expect(schema, `schemas/envelopes/${kind}.schema.json MUST exist`).not.toBeNull();
219
+ if (schema === null) return;
220
+ const violations: Violation[] = [];
221
+ const propCount = { n: 0 };
222
+ walkSchema(schema, `#`, 0, propCount, violations, 'load-bearing');
223
+ expect(
224
+ violations,
225
+ `${kind}.schema.json load-bearing Tier-1 violations (these fail across multiple vendors): ${JSON.stringify(violations, null, 2)}`,
226
+ ).toEqual([]);
227
+ });
228
+ }
229
+ });