@openwop/openwop-conformance 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. package/CHANGELOG.md +156 -1
  2. package/README.md +3 -2
  3. package/api/asyncapi.yaml +8 -0
  4. package/api/openapi.yaml +371 -1
  5. package/api/redocly.yaml +15 -0
  6. package/coverage.md +26 -5
  7. package/fixtures/conformance-agent-reasoning-streaming.json +37 -0
  8. package/fixtures/conformance-dispatch-cancellable-child.json +27 -0
  9. package/fixtures/conformance-dispatch-deterministic-fail-child.json +30 -0
  10. package/fixtures/conformance-dispatch-input-mapping-no-default.json +49 -0
  11. package/fixtures/conformance-dispatch-per-worker-override.json +59 -0
  12. package/fixtures/conformance-envelope-nl-to-format-engaged.json +41 -0
  13. package/fixtures/conformance-envelope-recovery-applied.json +39 -0
  14. package/fixtures/conformance-envelope-refusal.json +38 -0
  15. package/fixtures/conformance-envelope-retry-attempted.json +39 -0
  16. package/fixtures/conformance-envelope-retry-exhausted.json +38 -0
  17. package/fixtures/conformance-envelope-truncated.json +39 -0
  18. package/fixtures/conformance-envelope-truncation-cap-exhaustion.json +39 -0
  19. package/fixtures/conformance-model-capability-insufficient.json +25 -0
  20. package/fixtures/conformance-multi-agent-confidence-escalation.json +49 -0
  21. package/fixtures/conformance-multi-agent-handoff-child.json +27 -0
  22. package/fixtures/conformance-multi-agent-handoff.json +49 -0
  23. package/fixtures/conformance-prompt-all-four-kinds.json +39 -0
  24. package/fixtures/conformance-prompt-end-to-end.json +33 -0
  25. package/fixtures/conformance-subworkflow-input-mapping-no-default.json +33 -0
  26. package/fixtures/conformance-subworkflow-mid-run-mutation-child.json +31 -0
  27. package/fixtures/conformance-subworkflow-mid-run-mutation.json +33 -0
  28. package/fixtures/openwop-smoke-cost-emit.json +37 -0
  29. package/fixtures/prompt-templates/conformance-prompt-few-shot-2.json +14 -0
  30. package/fixtures/prompt-templates/conformance-prompt-few-shot.json +14 -0
  31. package/fixtures/prompt-templates/conformance-prompt-schema-hint.json +14 -0
  32. package/fixtures/prompt-templates/conformance-prompt-secret-redaction.json +23 -0
  33. package/fixtures/prompt-templates/conformance-prompt-trust-marker.json +23 -0
  34. package/fixtures/prompt-templates/conformance-prompt-writer-system.json +15 -0
  35. package/fixtures/prompt-templates/conformance-prompt-writer-user.json +15 -0
  36. package/fixtures.md +45 -0
  37. package/package.json +1 -1
  38. package/schemas/README.md +5 -0
  39. package/schemas/agent-manifest.schema.json +16 -0
  40. package/schemas/capabilities.schema.json +390 -0
  41. package/schemas/core-conformance-mock-agent-config.schema.json +5 -0
  42. package/schemas/envelopes/clarification.request.schema.json +9 -0
  43. package/schemas/envelopes/error.schema.json +4 -0
  44. package/schemas/envelopes/schema.request.schema.json +4 -0
  45. package/schemas/envelopes/schema.response.schema.json +1 -1
  46. package/schemas/node-pack-manifest.schema.json +28 -0
  47. package/schemas/orchestrator-decision.schema.json +12 -0
  48. package/schemas/prompt-kind.schema.json +8 -0
  49. package/schemas/prompt-pack-manifest.schema.json +80 -0
  50. package/schemas/prompt-ref.schema.json +40 -0
  51. package/schemas/prompt-template.schema.json +149 -0
  52. package/schemas/registry-version-manifest.schema.json +5 -0
  53. package/schemas/run-ancestry-response.schema.json +54 -0
  54. package/schemas/run-event-payloads.schema.json +513 -11
  55. package/schemas/run-event.schema.json +17 -1
  56. package/schemas/run-snapshot.schema.json +3 -2
  57. package/schemas/workflow-definition.schema.json +19 -1
  58. package/src/lib/driver.ts +15 -0
  59. package/src/lib/env.ts +51 -0
  60. package/src/lib/event-log-query.ts +62 -0
  61. package/src/lib/fixtures.ts +38 -1
  62. package/src/lib/host-toggle.ts +54 -0
  63. package/src/lib/llm-cache-key-recipe.ts +68 -0
  64. package/src/lib/multi-agent-capabilities.ts +10 -0
  65. package/src/lib/otel-scrape.ts +59 -0
  66. package/src/scenarios/agentReasoningStreaming.test.ts +193 -0
  67. package/src/scenarios/aiEnvelope.capBreached.test.ts +97 -9
  68. package/src/scenarios/aiEnvelope.contractRefusal.test.ts +224 -15
  69. package/src/scenarios/aiEnvelope.correlationReplay.test.ts +257 -25
  70. package/src/scenarios/aiEnvelope.redaction.test.ts +210 -29
  71. package/src/scenarios/aiEnvelope.schemaDrift.test.ts +163 -24
  72. package/src/scenarios/aiEnvelope.trustBoundaryPropagation.test.ts +262 -12
  73. package/src/scenarios/aiEnvelope.universalKinds.test.ts +107 -16
  74. package/src/scenarios/blob-presign-expiry.test.ts +42 -9
  75. package/src/scenarios/blob-roundtrip.test.ts +0 -0
  76. package/src/scenarios/cache-ttl-expiry.test.ts +34 -8
  77. package/src/scenarios/cost-attribution.test.ts +124 -11
  78. package/src/scenarios/cross-engine-append-ordering.test.ts +99 -0
  79. package/src/scenarios/cross-host-ancestry-endpoint.test.ts +136 -0
  80. package/src/scenarios/cross-host-causation-shape.test.ts +117 -0
  81. package/src/scenarios/cross-host-traceparent-propagation.test.ts +60 -0
  82. package/src/scenarios/dispatch-cross-worker-handoff.test.ts +34 -3
  83. package/src/scenarios/dispatch-input-mapping.test.ts +75 -6
  84. package/src/scenarios/dispatch-output-mapping.test.ts +96 -6
  85. package/src/scenarios/envelope-completion-distinguishes-truncation.test.ts +223 -0
  86. package/src/scenarios/envelope-nl-to-format-engaged.test.ts +152 -0
  87. package/src/scenarios/envelope-reasoning-secret-redaction.test.ts +343 -0
  88. package/src/scenarios/envelope-reasoning-shape.test.ts +190 -0
  89. package/src/scenarios/envelope-recovery-applied.test.ts +229 -0
  90. package/src/scenarios/envelope-refusal-shape.test.ts +289 -0
  91. package/src/scenarios/envelope-retry-attempted.test.ts +258 -0
  92. package/src/scenarios/envelope-retry-exhausted.test.ts +168 -0
  93. package/src/scenarios/envelope-tier-one-subset-static.test.ts +229 -0
  94. package/src/scenarios/envelope-truncated.test.ts +136 -0
  95. package/src/scenarios/envelope-truncation-cap-exhaustion.test.ts +144 -0
  96. package/src/scenarios/envelope-variant-discriminator-static.test.ts +152 -0
  97. package/src/scenarios/fixtures-gating.test.ts +139 -1
  98. package/src/scenarios/fixtures-valid.test.ts +123 -15
  99. package/src/scenarios/kv-ttl-expiry.test.ts +40 -9
  100. package/src/scenarios/model-capability-insufficient.test.ts +221 -0
  101. package/src/scenarios/model-capability-substituted.test.ts +203 -0
  102. package/src/scenarios/multi-agent-confidence-escalation.test.ts +164 -0
  103. package/src/scenarios/multi-agent-handoff-state-machine.test.ts +167 -0
  104. package/src/scenarios/multi-agent-memory-lifecycle.test.ts +124 -0
  105. package/src/scenarios/multi-region-idempotency.test.ts +58 -0
  106. package/src/scenarios/node-module-required-capabilities-shape.test.ts +185 -0
  107. package/src/scenarios/otel-trace-propagation-subworkflow.test.ts +19 -0
  108. package/src/scenarios/pack-registry-publish.test.ts +231 -51
  109. package/src/scenarios/prompt-all-four-kinds-events.test.ts +198 -0
  110. package/src/scenarios/prompt-composed-secret-redaction.test.ts +178 -0
  111. package/src/scenarios/prompt-composed-trust-marker.test.ts +165 -0
  112. package/src/scenarios/prompt-end-to-end-events.test.ts +202 -0
  113. package/src/scenarios/prompt-list-and-fetch.test.ts +207 -0
  114. package/src/scenarios/prompt-mutable-lifecycle.test.ts +216 -0
  115. package/src/scenarios/prompt-pack-install.test.ts +187 -0
  116. package/src/scenarios/prompt-render-deterministic.test.ts +240 -0
  117. package/src/scenarios/prompt-resolution-chain-agent-intrinsic.test.ts +140 -0
  118. package/src/scenarios/prompt-resolution-chain-fallback-cascade.test.ts +172 -0
  119. package/src/scenarios/prompt-resolution-chain-node-wins.test.ts +144 -0
  120. package/src/scenarios/prompt-template-shape.test.ts +359 -0
  121. package/src/scenarios/provider-usage.test.ts +185 -0
  122. package/src/scenarios/queue-ack-nack-dlq.test.ts +64 -10
  123. package/src/scenarios/queue-publish-consume-roundtrip.test.ts +50 -10
  124. package/src/scenarios/replay-divergence-at-refusal.test.ts +134 -0
  125. package/src/scenarios/replay-llm-cache-key-portable.test.ts +197 -0
  126. package/src/scenarios/replay-llm-cache-key.test.ts +127 -25
  127. package/src/scenarios/replay-observable-sequence-determinism.test.ts +80 -0
  128. package/src/scenarios/sandbox-capability-gate-respected.test.ts +31 -0
  129. package/src/scenarios/sandbox-memory-cap.test.ts +61 -0
  130. package/src/scenarios/sandbox-no-cross-pack-mutation.test.ts +35 -0
  131. package/src/scenarios/sandbox-no-host-env-leak.test.ts +38 -0
  132. package/src/scenarios/sandbox-no-host-fs-escape.test.ts +91 -0
  133. package/src/scenarios/sandbox-no-host-process-escape.test.ts +30 -0
  134. package/src/scenarios/sandbox-no-network-escape.test.ts +49 -0
  135. package/src/scenarios/sandbox-timeout-cap.test.ts +61 -0
  136. package/src/scenarios/search-bm25-roundtrip.test.ts +54 -9
  137. package/src/scenarios/spec-corpus-validity.test.ts +34 -6
  138. package/src/scenarios/sql-transaction-atomicity.test.ts +37 -8
  139. package/src/scenarios/stream-subscribe-from-beginning.test.ts +46 -9
  140. package/src/scenarios/subworkflow-input-mapping.test.ts +146 -10
  141. package/src/scenarios/table-cursor-pagination.test.ts +47 -9
  142. package/src/scenarios/table-schema-enforcement.test.ts +46 -9
  143. package/src/scenarios/vector-knn-roundtrip.test.ts +50 -10
  144. package/src/scenarios/workflow-chain-host-expansion.test.ts +202 -0
@@ -1,9 +1,11 @@
1
1
  /**
2
- * aiEnvelope.correlationReplay — FINAL v1.1 advertisement-shape verification + behavioral placeholders.
2
+ * aiEnvelope.correlationReplay — FINAL v1.1 advertisement-shape + behavioral.
3
3
  *
4
- * Status: DRAFT (advertisement-shape). `spec/v1/ai-envelope.md` landed
5
- * 2026-05-17 as DRAFT v1.x. Behavioral assertions stay `it.todo()` until a
6
- * reference host wires the accept path and the cross-process replay seam.
4
+ * Status: ACTIVE (advertisement-shape + behavioral). `spec/v1/ai-envelope.md`
5
+ * promoted Draft FINAL v1.1 2026-05-18. Live behavioral via the
6
+ * `POST /v1/host/sample/envelope/accept` seam with the persisted
7
+ * `priorCorrelations` store (survives process restart between original
8
+ * accept and replay; soft-skip on HTTP 404).
7
9
  *
8
10
  * Summary: two envelopes in the same run with the same `correlationId` MUST
9
11
  * be treated as a re-emission. The second invocation returns the cached
@@ -45,25 +47,255 @@ describe('aiEnvelope.correlationReplay: advertisement shape (FINAL v1.1)', () =>
45
47
  });
46
48
  });
47
49
 
48
- describe('aiEnvelope.correlationReplay: engine-state placeholders', () => {
49
- // The 4 assertions below require the engine to maintain a per-run
50
- // correlationId cached-outcome map AND project envelope acceptance
51
- // onto RunEventDocs with `causationId = envelope.correlationId`.
52
- //
53
- // The reference workflow-engine sample's `acceptEnvelope` is a pure
54
- // function (host/envelopeAcceptor.ts) it validates + categorizes
55
- // a single envelope without tracking state across calls. Promoting
56
- // these to behavioral requires either:
57
- // (a) extending the acceptor with an injected dedup store
58
- // (per-run correlationId map keyed by runId), OR
59
- // (b) a higher-level test seam that wires the acceptor into the
60
- // run lifecycle + event log.
61
- //
62
- // (b) is the spec-faithful path (per ai-envelope.md §"Replay
63
- // determinism" the dedup is engine-level, not acceptor-level).
64
- // Tracked as host-impl follow-up.
65
- it.todo('emit envelope twice with same correlationId second returns cached outcome; no duplicate RunEventDocs');
66
- it.todo('emit envelope with correlationId C, then with same C and different type → refuse envelope_correlation_conflict');
67
- it.todo('cross-process replay: process-death after accept; recovered process re-emits same correlationId → cached outcome, no handler re-invocation');
68
- it.todo('resulting RunEventDoc.causationId equals the envelope.correlationId (causal chain preserved)');
50
+ // Behavioral assertions through the workflow-engine sample's env-gated
51
+ // `POST /v1/host/sample/envelope/accept` seam. The seam accepts a flat
52
+ // `priorCorrelations` array (each entry: `{correlationId, outcome, envelopeType}`)
53
+ // that the acceptor consumes as the per-run dedup store. Each test
54
+ // soft-skips on HTTP 404 (host doesn't expose the seam).
55
+ //
56
+ // The cross-process replay assertion (process death + recovery) still
57
+ // stays deferred it requires a higher-level lifecycle seam that
58
+ // persists the dedup state, which is engine scope, not acceptor scope.
59
+ async function accept(envelope: unknown, opts: Record<string, unknown> = {}): Promise<{ status: number; body: { status?: string; reason?: string; envelopeId?: string; normalizedMeta?: { contentTrust?: string } } }> {
60
+ const res = await driver.post('/v1/host/sample/envelope/accept', { envelope, ...opts });
61
+ return { status: res.status, body: res.json as { status?: string; reason?: string; envelopeId?: string; normalizedMeta?: { contentTrust?: string } } };
62
+ }
63
+
64
+ const baseMeta = { source: 'ai-generation' as const, ts: '2026-05-18T10:00:00Z' };
65
+
66
+ describe('aiEnvelope.correlationReplay: behavioral in-process dedup (FINAL v1.1)', () => {
67
+ it('same correlationId re-emission returns the cached outcome unchanged', async () => {
68
+ const envelope = {
69
+ type: 'clarification.request',
70
+ schemaVersion: 1,
71
+ envelopeId: 'env-cr-replay-1',
72
+ correlationId: 'r:n:0:replay1',
73
+ payload: { questions: [{ id: 'q1', question: 'why?' }] },
74
+ meta: baseMeta,
75
+ };
76
+ const first = await accept(envelope);
77
+ if (first.status === 404) return;
78
+ expect(first.body.status).toBe('accepted');
79
+ const cachedOutcome = first.body;
80
+
81
+ const second = await accept(envelope, {
82
+ priorCorrelations: [
83
+ {
84
+ correlationId: 'r:n:0:replay1',
85
+ outcome: cachedOutcome,
86
+ envelopeType: 'clarification.request',
87
+ },
88
+ ],
89
+ });
90
+ expect(
91
+ second.body.status,
92
+ driver.describe(
93
+ 'ai-envelope.md §"Replay determinism"',
94
+ 'second emission with same correlationId MUST return the cached outcome (handler runs at most once per correlationId)',
95
+ ),
96
+ ).toBe('accepted');
97
+ expect(second.body.envelopeId).toBe(cachedOutcome.envelopeId);
98
+ });
99
+
100
+ it('same correlationId, different envelope type → invalid envelope_correlation_conflict', async () => {
101
+ const r = await accept(
102
+ {
103
+ type: 'error', // re-using a correlationId previously bound to clarification.request
104
+ schemaVersion: 1,
105
+ envelopeId: 'env-cr-conflict',
106
+ correlationId: 'r:n:0:conflict',
107
+ payload: { code: 'x', message: 'y' },
108
+ meta: baseMeta,
109
+ },
110
+ {
111
+ priorCorrelations: [
112
+ {
113
+ correlationId: 'r:n:0:conflict',
114
+ outcome: { status: 'accepted', envelopeId: 'env-prior', recordedEventIds: [], normalizedMeta: { contentTrust: 'trusted' } },
115
+ envelopeType: 'clarification.request',
116
+ },
117
+ ],
118
+ },
119
+ );
120
+ if (r.status === 404) return;
121
+ expect(
122
+ r.body.status,
123
+ driver.describe(
124
+ 'ai-envelope.md §"Replay determinism"',
125
+ 'same correlationId with different type MUST refuse envelope_correlation_conflict',
126
+ ),
127
+ ).toBe('invalid');
128
+ expect(r.body.reason).toContain('envelope_correlation_conflict');
129
+ });
130
+
131
+ it('cached outcome of any status (invalid/gated/breached) replays identically', async () => {
132
+ // Plant a `gated` cached outcome; second emission MUST return the same gated outcome
133
+ // (handler MUST NOT re-run, even if conditions might now accept).
134
+ const cached = {
135
+ status: 'gated' as const,
136
+ reason: 'envelope type \'vendor.x.foo\' not advertised',
137
+ allowedKinds: ['clarification.request', 'schema.request', 'schema.response', 'error'],
138
+ };
139
+ const r = await accept(
140
+ {
141
+ type: 'vendor.x.foo',
142
+ schemaVersion: 1,
143
+ envelopeId: 'env-cr-cached-gated',
144
+ correlationId: 'r:n:0:cachedgated',
145
+ payload: {},
146
+ meta: baseMeta,
147
+ },
148
+ {
149
+ hostSupportedEnvelopes: ['vendor.x.foo'], // would otherwise accept
150
+ priorCorrelations: [
151
+ {
152
+ correlationId: 'r:n:0:cachedgated',
153
+ outcome: cached,
154
+ envelopeType: 'vendor.x.foo',
155
+ },
156
+ ],
157
+ },
158
+ );
159
+ if (r.status === 404) return;
160
+ expect(
161
+ r.body.status,
162
+ driver.describe(
163
+ 'ai-envelope.md §"Replay determinism"',
164
+ 'cached non-accepted outcome MUST replay identically (handler at most once per correlationId)',
165
+ ),
166
+ ).toBe('gated');
167
+ });
168
+ });
169
+
170
+ // E.1 engine-projection via the test-only event-log seam.
171
+ import { queryTestEvents, isEventLogSeamAvailable, resetTestSeam } from '../lib/event-log-query.js';
172
+
173
+ describe('aiEnvelope.correlationReplay: causationId projection via event-log seam', () => {
174
+ it('resulting RunEventDoc.causationId MUST equal the envelope.correlationId (causal chain preserved)', async () => {
175
+ if (!(await isEventLogSeamAvailable())) return;
176
+ const runId = `r-cr-cause-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
177
+ const correlationId = `${runId}:n:0:causationId-link`;
178
+ await accept(
179
+ {
180
+ type: 'clarification.request',
181
+ schemaVersion: 1,
182
+ envelopeId: 'env-cr-cause-1',
183
+ correlationId,
184
+ payload: { questions: [{ id: 'q1', question: 'why?' }] },
185
+ meta: baseMeta,
186
+ },
187
+ { projectTo: { runId, nodeId: 'n' } },
188
+ );
189
+ const events = await queryTestEvents(runId);
190
+ if (!events.ok || events.events.length === 0) return;
191
+ for (const e of events.events) {
192
+ expect(
193
+ e.causationId,
194
+ driver.describe('ai-envelope.md §"Replay determinism"', 'every event projected from an envelope MUST carry causationId === envelope.correlationId'),
195
+ ).toBe(correlationId);
196
+ }
197
+ await resetTestSeam();
198
+ });
199
+ });
200
+
201
+ describe('aiEnvelope.correlationReplay: cross-process replay via persisted dedup', () => {
202
+ // Cross-process replay proven WITHOUT actually killing the process:
203
+ // when a caller supplies `persistedDedup: { runId }`, the seam reads
204
+ // the persisted store BEFORE consulting the in-memory priorCorrelations
205
+ // and writes the outcome back after a successful accept. A second
206
+ // call from the same (or a hypothetically-restarted) process with
207
+ // ONLY persistedDedup set — no in-memory priorCorrelations — MUST
208
+ // return the same outcome as the first. That is the cross-process
209
+ // semantics: the persisted store is the source of truth, the in-
210
+ // memory map a per-process accelerator.
211
+ it('persisted outcome replays for the same correlationId even with NO in-memory priorCorrelations', async () => {
212
+ const runId = `r-cr-persist-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
213
+ const correlationId = `${runId}:n:0:persist1`;
214
+ // Two envelopes with the SAME correlationId but DIFFERENT
215
+ // envelopeIds. The acceptor reflects the inbound envelopeId on a
216
+ // fresh accept; a cache-hit returns the FIRST call's envelopeId
217
+ // regardless of what the second call carried. The envelopeId
218
+ // divergence is what makes this assertion non-trivial: if the
219
+ // persisted store is consulted, second.envelopeId === 'env-cr-
220
+ // persist-1'; if the handler re-runs (cache miss), it would
221
+ // surface 'env-cr-persist-2'.
222
+ const env1 = {
223
+ type: 'clarification.request',
224
+ schemaVersion: 1,
225
+ envelopeId: 'env-cr-persist-1',
226
+ correlationId,
227
+ payload: { questions: [{ id: 'q1', question: 'why?' }] },
228
+ meta: baseMeta,
229
+ };
230
+ const env2 = {
231
+ type: 'clarification.request',
232
+ schemaVersion: 1,
233
+ envelopeId: 'env-cr-persist-2',
234
+ correlationId,
235
+ payload: { questions: [{ id: 'q1', question: 'why?' }] },
236
+ meta: baseMeta,
237
+ };
238
+ // First accept persists the outcome under (runId, correlationId).
239
+ const first = await accept(env1, { persistedDedup: { runId } });
240
+ if (first.status === 404) return; // seam not exposed — soft-skip
241
+ expect(first.body.status).toBe('accepted');
242
+ expect(first.body.envelopeId).toBe('env-cr-persist-1');
243
+
244
+ // Second accept — same correlationId, NO priorCorrelations passed
245
+ // in-band, DIFFERENT envelopeId. If the persisted store is
246
+ // consulted, the cached outcome's envelopeId (env-cr-persist-1)
247
+ // is returned. If only the in-memory map were used, the handler
248
+ // would re-run and reflect env-cr-persist-2.
249
+ const second = await accept(env2, { persistedDedup: { runId } });
250
+ expect(
251
+ second.body.envelopeId,
252
+ driver.describe(
253
+ 'ai-envelope.md §"Replay determinism"',
254
+ 'persisted outcome MUST replay across calls without an in-memory priorCorrelations map (cross-process recovery: cached envelopeId surfaces even when the inbound envelope carries a different envelopeId)',
255
+ ),
256
+ ).toBe('env-cr-persist-1');
257
+ expect(second.body.status).toBe('accepted');
258
+ });
259
+
260
+ it('persisted store enforces envelope_correlation_conflict across calls', async () => {
261
+ const runId = `r-cr-persist-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
262
+ const correlationId = `${runId}:n:0:conflict1`;
263
+ // First accept: clarification.request.
264
+ const first = await accept(
265
+ {
266
+ type: 'clarification.request',
267
+ schemaVersion: 1,
268
+ envelopeId: 'env-cr-persist-conflict-1',
269
+ correlationId,
270
+ payload: { questions: [{ id: 'q1', question: 'why?' }] },
271
+ meta: baseMeta,
272
+ },
273
+ { persistedDedup: { runId } },
274
+ );
275
+ if (first.status === 404) return;
276
+ expect(first.body.status).toBe('accepted');
277
+
278
+ // Second accept: same correlationId, different envelope type, NO
279
+ // in-memory priorCorrelations — the conflict MUST be served from
280
+ // the persisted store.
281
+ const second = await accept(
282
+ {
283
+ type: 'error',
284
+ schemaVersion: 1,
285
+ envelopeId: 'env-cr-persist-conflict-2',
286
+ correlationId,
287
+ payload: { code: 'x', message: 'y' },
288
+ meta: baseMeta,
289
+ },
290
+ { persistedDedup: { runId } },
291
+ );
292
+ expect(
293
+ second.body.status,
294
+ driver.describe(
295
+ 'ai-envelope.md §"Replay determinism"',
296
+ 'persisted store MUST surface envelope_correlation_conflict on type mismatch without an in-memory priorCorrelations map',
297
+ ),
298
+ ).toBe('invalid');
299
+ expect(second.body.reason).toContain('envelope_correlation_conflict');
300
+ });
69
301
  });
@@ -1,10 +1,11 @@
1
1
  /**
2
- * aiEnvelope.redaction — FINAL v1.1 advertisement-shape verification + behavioral placeholders.
2
+ * aiEnvelope.redaction — FINAL v1.1 advertisement-shape + behavioral.
3
3
  *
4
- * Status: DRAFT (advertisement-shape). `spec/v1/ai-envelope.md` landed
5
- * 2026-05-17 as DRAFT v1.x. Behavioral assertions stay `it.todo()` until a
6
- * reference host wires the envelope accept path through the BYOK redaction
7
- * harness.
4
+ * Status: ACTIVE (advertisement-shape + behavioral). `spec/v1/ai-envelope.md`
5
+ * promoted Draft FINAL v1.1 2026-05-18. Live behavioral via the
6
+ * `POST /v1/host/sample/envelope/accept` seam, which routes the envelope
7
+ * through the BYOK redaction harness and returns `redactedPayload` +
8
+ * `redactionCount` (soft-skip on HTTP 404).
8
9
  *
9
10
  * Summary: AI Envelopes MUST route through the same BYOK redaction harness
10
11
  * applied to a fresh `MemoryEntry.put` per `agent-memory.md` §"SR-1
@@ -46,28 +47,208 @@ describe('aiEnvelope.redaction: advertisement shape (FINAL v1.1)', () => {
46
47
  });
47
48
  });
48
49
 
49
- describe('aiEnvelope.redaction: BYOK-redaction placeholders', () => {
50
- // The 6 assertions below require the engine's BYOK redaction pipeline
51
- // (per SECURITY/threat-model-secret-leakage.md SR-1 carry-forward) to
52
- // hook into envelope acceptance AND every downstream surface that
53
- // persists envelope content (RunEventDoc, OTel span attributes,
54
- // debug-bundle export, error envelope projection).
55
- //
56
- // The reference workflow-engine sample's `acceptEnvelope` is pure +
57
- // doesn't touch payload contents. Redaction lives at a different
58
- // layer (BYOK secretResolver + event-log sanitizer). Promoting these
59
- // to behavioral requires either:
60
- // (a) chaining the acceptor through `stripSecretsFromPersisted`
61
- // before persisting the recorded view, OR
62
- // (b) an end-to-end test that plants a BYOK canary in an envelope
63
- // payload, runs through the full accept emit → persist → export
64
- // chain, and asserts the canary is absent on every output.
65
- //
66
- // (b) is the spec-faithful path. Tracked as host-impl follow-up.
67
- it.todo('emit envelope whose payload contains a known BYOK substring substring absent from emitted RunEventDocs');
68
- it.todo('redacted substring absent from OTel envelope_* span attributes');
69
- it.todo('redacted substring absent from debug-bundle export');
70
- it.todo('redacted substring absent from error envelope on validation refusal (no leak via error path)');
71
- it.todo('redaction marker is the canonical [REDACTED:<reason>] form, NOT a model-generated <REDACTED> string');
72
- it.todo('redaction runs AFTER schema validation: a payload with redacted-shaped substrings still validates structurally');
50
+ // Behavioral assertions through the workflow-engine sample's env-gated
51
+ // `POST /v1/host/sample/envelope/accept` seam. The seam threads a
52
+ // `byokCanaries: string[]` option into AcceptOptions so the acceptor
53
+ // can apply the §"Redaction (SR-1 carry-forward)" pass on accepted
54
+ // envelopes, returning `redactedPayload` in the outcome. Each test
55
+ // soft-skips on HTTP 404 (host doesn't expose the seam).
56
+ async function accept(envelope: unknown, opts: Record<string, unknown> = {}): Promise<{ status: number; body: { status?: string; reason?: string; redactedPayload?: unknown; redactionCount?: number; details?: unknown[] } }> {
57
+ const res = await driver.post('/v1/host/sample/envelope/accept', { envelope, ...opts });
58
+ return { status: res.status, body: res.json as { status?: string; reason?: string; redactedPayload?: unknown; redactionCount?: number; details?: unknown[] } };
59
+ }
60
+
61
+ const baseMeta = { source: 'ai-generation' as const, ts: '2026-05-18T10:00:00Z' };
62
+ const CANARY_VALUE = 'sk-canary-DO-NOT-LEAK-9d7f3a2b';
63
+ const CANARY_SECRET_ID = 'byok-secret-test-fixture-001';
64
+ const CANARIES = [{ value: CANARY_VALUE, secretId: CANARY_SECRET_ID }];
65
+ const CANONICAL_MARKER = `[REDACTED:${CANARY_SECRET_ID}]`;
66
+
67
+ describe('aiEnvelope.redaction: behavioral acceptor-level scrub (FINAL v1.1)', () => {
68
+ it('canary in payload substituted with canonical SR-1 [REDACTED:<secretId>] marker per agent-memory.md:66', async () => {
69
+ const r = await accept(
70
+ {
71
+ type: 'error',
72
+ schemaVersion: 1,
73
+ envelopeId: 'env-red-1',
74
+ correlationId: 'r:n:0:red1',
75
+ payload: { code: 'leak_demo', message: `tool result included ${CANARY_VALUE} verbatim` },
76
+ meta: baseMeta,
77
+ },
78
+ { byokCanaries: CANARIES },
79
+ );
80
+ if (r.status === 404) return;
81
+ expect(r.body.status).toBe('accepted');
82
+ expect(r.body.redactionCount, 'redactionCount MUST be > 0 when canary appears').toBeGreaterThan(0);
83
+ expect(
84
+ JSON.stringify(r.body.redactedPayload).includes(CANARY_VALUE),
85
+ driver.describe('ai-envelope.md §"Redaction (SR-1 carry-forward)"', 'canary plaintext MUST be absent from the redacted view'),
86
+ ).toBe(false);
87
+ expect(
88
+ JSON.stringify(r.body.redactedPayload),
89
+ driver.describe('agent-memory.md §SR-1 line 66', 'persisted entry MUST carry [REDACTED:<secretId>] in place of the plaintext'),
90
+ ).toContain(CANONICAL_MARKER);
91
+ });
92
+
93
+ it('canary across nested object fields → all occurrences scrubbed with canonical marker', async () => {
94
+ const r = await accept(
95
+ {
96
+ type: 'clarification.request',
97
+ schemaVersion: 1,
98
+ envelopeId: 'env-red-nested',
99
+ correlationId: 'r:n:0:rednested',
100
+ payload: {
101
+ questions: [
102
+ { id: 'q1', question: `What is ${CANARY_VALUE}?` },
103
+ { id: 'q2', question: 'unrelated', context: { trace: `${CANARY_VALUE}/${CANARY_VALUE}` } },
104
+ ],
105
+ },
106
+ meta: baseMeta,
107
+ },
108
+ { byokCanaries: CANARIES },
109
+ );
110
+ if (r.status === 404) return;
111
+ expect(r.body.status).toBe('accepted');
112
+ expect(
113
+ JSON.stringify(r.body.redactedPayload).includes(CANARY_VALUE),
114
+ 'no canary plaintext remnant anywhere in the redacted view (recursive scrub)',
115
+ ).toBe(false);
116
+ // q1's question (1 occurrence), q2's context.trace (2 occurrences) = total 3
117
+ expect(r.body.redactionCount).toBe(3);
118
+ });
119
+
120
+ it('multiple canaries → each substituted with its own secretId marker', async () => {
121
+ const C1 = { value: 'sk-canary-alpha-xxxx', secretId: 'secret-alpha' };
122
+ const C2 = { value: 'sk-canary-beta-yyyy', secretId: 'secret-beta' };
123
+ const r = await accept(
124
+ {
125
+ type: 'error',
126
+ schemaVersion: 1,
127
+ envelopeId: 'env-red-multi',
128
+ correlationId: 'r:n:0:redmulti',
129
+ payload: { code: 'multi_leak', message: `first=${C1.value}, second=${C2.value}` },
130
+ meta: baseMeta,
131
+ },
132
+ { byokCanaries: [C1, C2] },
133
+ );
134
+ if (r.status === 404) return;
135
+ expect(r.body.status).toBe('accepted');
136
+ const view = JSON.stringify(r.body.redactedPayload);
137
+ expect(view.includes(C1.value)).toBe(false);
138
+ expect(view.includes(C2.value)).toBe(false);
139
+ expect(
140
+ view.includes(`[REDACTED:${C1.secretId}]`) && view.includes(`[REDACTED:${C2.secretId}]`),
141
+ driver.describe('agent-memory.md §SR-1', 'each canary MUST be substituted with its OWN [REDACTED:<secretId>] marker'),
142
+ ).toBe(true);
143
+ });
144
+
145
+ it('redaction runs AFTER schema validation: payload with [REDACTED:...]-shaped substrings still validates', async () => {
146
+ // The error-kind payload schema requires { code, message }. A pre-redacted
147
+ // marker in the message MUST NOT trip validation.
148
+ const r = await accept(
149
+ {
150
+ type: 'error',
151
+ schemaVersion: 1,
152
+ envelopeId: 'env-red-shape',
153
+ correlationId: 'r:n:0:redshape',
154
+ payload: { code: 'demo', message: 'already had [REDACTED:secret-prior] before we saw it' },
155
+ meta: baseMeta,
156
+ },
157
+ { byokCanaries: CANARIES }, // canary NOT in payload; substitution count expected 0
158
+ );
159
+ if (r.status === 404) return;
160
+ expect(
161
+ r.body.status,
162
+ driver.describe('ai-envelope.md §"Redaction (SR-1 carry-forward)"', 'redaction MUST run AFTER schema validation; pre-existing markers do not affect validation'),
163
+ ).toBe('accepted');
164
+ // No canary present → redactionCount absent or 0
165
+ expect(r.body.redactionCount ?? 0).toBe(0);
166
+ });
167
+
168
+ it('canary in invalid envelope (validation refusal) → error response MUST NOT echo the canary plaintext', async () => {
169
+ // ISO 8601 violation triggers an `invalid` outcome BEFORE the redaction
170
+ // pass runs. The acceptor's validation-detail extractor MUST NOT echo
171
+ // the payload contents into the error response.
172
+ const r = await accept(
173
+ {
174
+ type: 'error',
175
+ schemaVersion: 1,
176
+ envelopeId: 'env-red-leak',
177
+ correlationId: 'r:n:0:redleak',
178
+ payload: { code: 'demo', message: `secret value is ${CANARY_VALUE}` },
179
+ meta: { ...baseMeta, ts: 'tomorrow' }, // bad ts → invalid
180
+ },
181
+ { byokCanaries: CANARIES },
182
+ );
183
+ if (r.status === 404) return;
184
+ expect(r.body.status).toBe('invalid');
185
+ const bodyString = JSON.stringify(r.body);
186
+ expect(
187
+ bodyString.includes(CANARY_VALUE),
188
+ driver.describe(
189
+ 'SECURITY/threat-model-secret-leakage.md §SR-1',
190
+ 'error response on validation refusal MUST NOT echo BYOK canary plaintext',
191
+ ),
192
+ ).toBe(false);
193
+ });
194
+ });
195
+
196
+ // E.2 OTel scrape + E.3 debug-bundle seams.
197
+ import { queryTestSpans, exportDebugBundle, isOtelSeamAvailable } from '../lib/otel-scrape.js';
198
+ import { resetTestSeam } from '../lib/event-log-query.js';
199
+
200
+ describe('aiEnvelope.redaction: OTel + debug-bundle scrape (E.2 + E.3)', () => {
201
+ it('redacted canary plaintext MUST be absent from OTel envelope_* span attributes', async () => {
202
+ if (!(await isOtelSeamAvailable())) return;
203
+ const runId = `r-red-otel-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
204
+ await accept(
205
+ {
206
+ type: 'error',
207
+ schemaVersion: 1,
208
+ envelopeId: 'env-red-otel-1',
209
+ correlationId: `${runId}:n:0:red-otel`,
210
+ payload: { code: 'leak_demo', message: `tool result included ${CANARY_VALUE} verbatim` },
211
+ meta: baseMeta,
212
+ },
213
+ { byokCanaries: CANARIES, projectTo: { runId, nodeId: 'n' } },
214
+ );
215
+ const spans = await queryTestSpans({ runId });
216
+ if (!spans.ok) return;
217
+ const allAttrs = spans.data.flatMap((s) => Object.values(s.attributes).map((v) => String(v)));
218
+ expect(
219
+ allAttrs.some((v) => v.includes(CANARY_VALUE)),
220
+ driver.describe(
221
+ 'SECURITY/threat-model-secret-leakage.md §SR-1',
222
+ 'BYOK canary plaintext MUST NOT appear in any OTel envelope_* span attribute',
223
+ ),
224
+ ).toBe(false);
225
+ await resetTestSeam();
226
+ });
227
+
228
+ it('redacted canary plaintext MUST be absent from debug-bundle export', async () => {
229
+ if (!(await isOtelSeamAvailable())) return;
230
+ const runId = `r-red-bundle-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
231
+ await accept(
232
+ {
233
+ type: 'clarification.request',
234
+ schemaVersion: 1,
235
+ envelopeId: 'env-red-bundle-1',
236
+ correlationId: `${runId}:n:0:red-bundle`,
237
+ payload: { questions: [{ id: 'q1', question: `embed ${CANARY_VALUE} here` }] },
238
+ meta: baseMeta,
239
+ },
240
+ { byokCanaries: CANARIES, projectTo: { runId, nodeId: 'n' } },
241
+ );
242
+ const bundle = await exportDebugBundle(runId);
243
+ if (!bundle.ok) return;
244
+ const serialized = JSON.stringify(bundle.data);
245
+ expect(
246
+ serialized.includes(CANARY_VALUE),
247
+ driver.describe(
248
+ 'SECURITY/threat-model-secret-leakage.md §SR-1',
249
+ 'BYOK canary plaintext MUST NOT appear in the debug-bundle export (events + spans)',
250
+ ),
251
+ ).toBe(false);
252
+ await resetTestSeam();
253
+ });
73
254
  });