@openwop/openwop-conformance 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/CHANGELOG.md +65 -0
  2. package/README.md +2 -2
  3. package/api/redocly.yaml +15 -0
  4. package/coverage.md +2 -1
  5. package/fixtures/conformance-agent-reasoning-streaming.json +37 -0
  6. package/fixtures/conformance-dispatch-cancellable-child.json +27 -0
  7. package/fixtures/conformance-dispatch-deterministic-fail-child.json +30 -0
  8. package/fixtures/conformance-dispatch-input-mapping-no-default.json +49 -0
  9. package/fixtures/conformance-dispatch-per-worker-override.json +59 -0
  10. package/fixtures/conformance-subworkflow-input-mapping-no-default.json +33 -0
  11. package/fixtures.md +6 -0
  12. package/package.json +1 -1
  13. package/schemas/capabilities.schema.json +16 -0
  14. package/schemas/core-conformance-mock-agent-config.schema.json +5 -0
  15. package/schemas/run-event-payloads.schema.json +35 -1
  16. package/schemas/run-event.schema.json +2 -0
  17. package/src/lib/driver.ts +15 -0
  18. package/src/lib/env.ts +51 -0
  19. package/src/lib/event-log-query.ts +62 -0
  20. package/src/lib/fixtures.ts +38 -1
  21. package/src/lib/host-toggle.ts +54 -0
  22. package/src/lib/multi-agent-capabilities.ts +10 -0
  23. package/src/lib/otel-scrape.ts +59 -0
  24. package/src/scenarios/agentReasoningStreaming.test.ts +193 -0
  25. package/src/scenarios/aiEnvelope.capBreached.test.ts +97 -9
  26. package/src/scenarios/aiEnvelope.contractRefusal.test.ts +128 -10
  27. package/src/scenarios/aiEnvelope.correlationReplay.test.ts +236 -21
  28. package/src/scenarios/aiEnvelope.redaction.test.ts +204 -24
  29. package/src/scenarios/aiEnvelope.schemaDrift.test.ts +158 -19
  30. package/src/scenarios/aiEnvelope.trustBoundaryPropagation.test.ts +59 -8
  31. package/src/scenarios/aiEnvelope.universalKinds.test.ts +100 -9
  32. package/src/scenarios/blob-presign-expiry.test.ts +35 -2
  33. package/src/scenarios/blob-roundtrip.test.ts +0 -0
  34. package/src/scenarios/cache-ttl-expiry.test.ts +28 -2
  35. package/src/scenarios/dispatch-cross-worker-handoff.test.ts +34 -3
  36. package/src/scenarios/dispatch-input-mapping.test.ts +75 -6
  37. package/src/scenarios/dispatch-output-mapping.test.ts +96 -6
  38. package/src/scenarios/fixtures-gating.test.ts +139 -1
  39. package/src/scenarios/kv-ttl-expiry.test.ts +33 -2
  40. package/src/scenarios/otel-trace-propagation-subworkflow.test.ts +19 -0
  41. package/src/scenarios/pack-registry-publish.test.ts +231 -51
  42. package/src/scenarios/provider-usage.test.ts +185 -0
  43. package/src/scenarios/queue-ack-nack-dlq.test.ts +57 -3
  44. package/src/scenarios/queue-publish-consume-roundtrip.test.ts +43 -3
  45. package/src/scenarios/replay-llm-cache-key.test.ts +166 -25
  46. package/src/scenarios/search-bm25-roundtrip.test.ts +47 -2
  47. package/src/scenarios/sql-transaction-atomicity.test.ts +31 -2
  48. package/src/scenarios/stream-subscribe-from-beginning.test.ts +39 -2
  49. package/src/scenarios/subworkflow-input-mapping.test.ts +77 -7
  50. package/src/scenarios/table-cursor-pagination.test.ts +40 -2
  51. package/src/scenarios/table-schema-enforcement.test.ts +39 -2
  52. package/src/scenarios/vector-knn-roundtrip.test.ts +43 -3
  53. package/src/scenarios/workflow-chain-host-expansion.test.ts +202 -0
@@ -137,14 +137,132 @@ describe('aiEnvelope.contractRefusal: behavioral accept-gate (FINAL v1.1)', () =
137
137
  });
138
138
  });
139
139
 
140
- describe('aiEnvelope.contractRefusal: engine-integration placeholders', () => {
141
- // These require the engine to project gated outcomes onto RunEventDocs
142
- // / node.failed events / log.appended (level: warn) per refusalMode.
143
- // The pure-function acceptor surfaces `gated` outcomes; the engine
144
- // projects them to the event log.
145
- it.todo('node.failed event carries error.code = "envelope_contract_violation"');
146
- it.todo('refused envelope error.details.acceptedTypes lists the declared accepts[]');
147
- it.todo('refused envelope error.details.refusedType names the emitted type');
148
- it.todo('refusalMode:"discard-and-warn" emits log.appended level:"warn" instead of node.failed');
149
- it.todo('capability-gated typeId refusal stacks atop Envelope Contract refusal (host.aiEnvelope absent → typeId refused first)');
140
+ // E.1 engine-projection via the test-only event-log seam.
141
+ import { queryTestEvents, isEventLogSeamAvailable, resetTestSeam } from '../lib/event-log-query.js';
142
+
143
+ describe('aiEnvelope.contractRefusal: engine projection via event-log seam', () => {
144
+ it('gated (fail-node) node.failed { error.code: "envelope_contract_violation" }', async () => {
145
+ if (!(await isEventLogSeamAvailable())) return;
146
+ const runId = `r-cr-fail-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
147
+ const r = await accept(
148
+ {
149
+ type: 'vendor.x.bar.create',
150
+ schemaVersion: 1,
151
+ envelopeId: 'env-cr-proj-1',
152
+ correlationId: `${runId}:n:0:cr-proj-1`,
153
+ payload: {},
154
+ meta: baseMeta,
155
+ },
156
+ {
157
+ hostSupportedEnvelopes: ['vendor.x.bar.create', 'vendor.x.foo.create'],
158
+ nodeAllowedKinds: ['vendor.x.foo.create'],
159
+ projectTo: { runId, nodeId: 'n', refusalMode: 'fail-node' },
160
+ },
161
+ );
162
+ if (r.status === 404) return;
163
+ expect(r.body.status).toBe('gated');
164
+ const events = await queryTestEvents(runId, { type: 'node.failed' });
165
+ if (!events.ok || events.events.length === 0) return;
166
+ const err = events.events[0]!.payload.error as { code?: string; details?: { refusedType?: string; acceptedTypes?: string[] } };
167
+ expect(
168
+ err.code,
169
+ driver.describe('ai-envelope.md §"Envelope Contract"', 'gated outcome MUST project to node.failed with error.code = envelope_contract_violation'),
170
+ ).toBe('envelope_contract_violation');
171
+ });
172
+
173
+ it('refused envelope: error.details.refusedType names emitted kind; acceptedTypes lists allowed kinds', async () => {
174
+ if (!(await isEventLogSeamAvailable())) return;
175
+ const runId = `r-cr-details-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
176
+ await accept(
177
+ {
178
+ type: 'vendor.x.bar.create',
179
+ schemaVersion: 1,
180
+ envelopeId: 'env-cr-proj-details',
181
+ correlationId: `${runId}:n:0:cr-details`,
182
+ payload: {},
183
+ meta: baseMeta,
184
+ },
185
+ {
186
+ hostSupportedEnvelopes: ['vendor.x.bar.create', 'vendor.x.foo.create'],
187
+ nodeAllowedKinds: ['vendor.x.foo.create'],
188
+ projectTo: { runId, nodeId: 'n' },
189
+ },
190
+ );
191
+ const events = await queryTestEvents(runId, { type: 'node.failed' });
192
+ if (!events.ok || events.events.length === 0) return;
193
+ const details = (events.events[0]!.payload.error as { details?: { refusedType?: string; acceptedTypes?: string[] } }).details;
194
+ expect(details?.refusedType).toBe('vendor.x.bar.create');
195
+ expect(
196
+ Array.isArray(details?.acceptedTypes) && details!.acceptedTypes!.includes('vendor.x.foo.create'),
197
+ driver.describe('ai-envelope.md §"Envelope Contract"', 'error.details.acceptedTypes MUST list the node\'s declared accepts[] (plus universals)'),
198
+ ).toBe(true);
199
+ });
200
+
201
+ it('refusalMode:"discard-and-warn" → log.appended { level: "warn" } instead of node.failed', async () => {
202
+ if (!(await isEventLogSeamAvailable())) return;
203
+ const runId = `r-cr-warn-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
204
+ await accept(
205
+ {
206
+ type: 'vendor.x.bar.create',
207
+ schemaVersion: 1,
208
+ envelopeId: 'env-cr-proj-warn',
209
+ correlationId: `${runId}:n:0:cr-warn`,
210
+ payload: {},
211
+ meta: baseMeta,
212
+ },
213
+ {
214
+ hostSupportedEnvelopes: ['vendor.x.bar.create'],
215
+ nodeAllowedKinds: ['vendor.x.foo.create'], // gated
216
+ projectTo: { runId, nodeId: 'n', refusalMode: 'discard-and-warn' },
217
+ },
218
+ );
219
+ const warnEvents = await queryTestEvents(runId, { type: 'log.appended' });
220
+ const failEvents = await queryTestEvents(runId, { type: 'node.failed' });
221
+ if (!warnEvents.ok || !failEvents.ok) return;
222
+ expect(
223
+ warnEvents.events.some((e) => (e.payload as { level?: string }).level === 'warn'),
224
+ driver.describe('ai-envelope.md §"Envelope Contract"', 'discard-and-warn MUST emit log.appended at warn level'),
225
+ ).toBe(true);
226
+ expect(
227
+ failEvents.events.length,
228
+ driver.describe('ai-envelope.md §"Envelope Contract"', 'discard-and-warn MUST NOT emit node.failed'),
229
+ ).toBe(0);
230
+ await resetTestSeam();
231
+ });
232
+
233
+ it('host-gate refusal (hostSupportedEnvelopes) projects to node.failed with envelope_contract_violation', async () => {
234
+ if (!(await isEventLogSeamAvailable())) return;
235
+ const runId = `r-cr-host-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
236
+ await accept(
237
+ {
238
+ type: 'vendor.unadvertised.kind',
239
+ schemaVersion: 1,
240
+ envelopeId: 'env-cr-proj-host',
241
+ correlationId: `${runId}:n:0:cr-host`,
242
+ payload: {},
243
+ meta: baseMeta,
244
+ },
245
+ {
246
+ hostSupportedEnvelopes: ['vendor.advertised.only'],
247
+ nodeAllowedKinds: ['vendor.unadvertised.kind'],
248
+ projectTo: { runId, nodeId: 'n' },
249
+ },
250
+ );
251
+ const events = await queryTestEvents(runId, { type: 'node.failed' });
252
+ if (!events.ok || events.events.length === 0) return;
253
+ expect(
254
+ (events.events[0]!.payload.error as { code?: string }).code,
255
+ driver.describe('ai-envelope.md §"Capability handshake integration"', 'host-gate refusal MUST project to node.failed envelope_contract_violation (stacks above node-gate)'),
256
+ ).toBe('envelope_contract_violation');
257
+ });
258
+ });
259
+
260
+ describe('aiEnvelope.contractRefusal: capability-stacking placeholder', () => {
261
+ // Capability-gated typeId refusal stacking (host.aiEnvelope absent →
262
+ // typeId refused FIRST, before envelope contract gate) requires
263
+ // the workflow-register handler to consult host.aiEnvelope BEFORE
264
+ // dispatching envelope acceptance. Tracked under Thread E (engine
265
+ // integration of acceptor into node execution path); the seam
266
+ // alone can't verify the ordering.
267
+ it.todo('capability-gated typeId refusal stacks atop Envelope Contract refusal (host.aiEnvelope absent → typeId refused first; needs node-execution wiring)');
150
268
  });
@@ -45,25 +45,240 @@ describe('aiEnvelope.correlationReplay: advertisement shape (FINAL v1.1)', () =>
45
45
  });
46
46
  });
47
47
 
48
- describe('aiEnvelope.correlationReplay: engine-state placeholders', () => {
49
- // The 4 assertions below require the engine to maintain a per-run
50
- // correlationId cached-outcome map AND project envelope acceptance
51
- // onto RunEventDocs with `causationId = envelope.correlationId`.
52
- //
53
- // The reference workflow-engine sample's `acceptEnvelope` is a pure
54
- // function (host/envelopeAcceptor.ts) it validates + categorizes
55
- // a single envelope without tracking state across calls. Promoting
56
- // these to behavioral requires either:
57
- // (a) extending the acceptor with an injected dedup store
58
- // (per-run correlationId map keyed by runId), OR
59
- // (b) a higher-level test seam that wires the acceptor into the
60
- // run lifecycle + event log.
61
- //
62
- // (b) is the spec-faithful path (per ai-envelope.md §"Replay
63
- // determinism" the dedup is engine-level, not acceptor-level).
64
- // Tracked as host-impl follow-up.
65
- it.todo('emit envelope twice with same correlationId second returns cached outcome; no duplicate RunEventDocs');
66
- it.todo('emit envelope with correlationId C, then with same C and different type → refuse envelope_correlation_conflict');
67
- it.todo('cross-process replay: process-death after accept; recovered process re-emits same correlationId → cached outcome, no handler re-invocation');
68
- it.todo('resulting RunEventDoc.causationId equals the envelope.correlationId (causal chain preserved)');
48
+ // Behavioral assertions through the workflow-engine sample's env-gated
49
+ // `POST /v1/host/sample/envelope/accept` seam. The seam accepts a flat
50
+ // `priorCorrelations` array (each entry: `{correlationId, outcome, envelopeType}`)
51
+ // that the acceptor consumes as the per-run dedup store. Each test
52
+ // soft-skips on HTTP 404 (host doesn't expose the seam).
53
+ //
54
+ // The cross-process replay assertion (process death + recovery) still
55
+ // stays deferred it requires a higher-level lifecycle seam that
56
+ // persists the dedup state, which is engine scope, not acceptor scope.
57
+ async function accept(envelope: unknown, opts: Record<string, unknown> = {}): Promise<{ status: number; body: { status?: string; reason?: string; envelopeId?: string; normalizedMeta?: { contentTrust?: string } } }> {
58
+ const res = await driver.post('/v1/host/sample/envelope/accept', { envelope, ...opts });
59
+ return { status: res.status, body: res.json as { status?: string; reason?: string; envelopeId?: string; normalizedMeta?: { contentTrust?: string } } };
60
+ }
61
+
62
+ const baseMeta = { source: 'ai-generation' as const, ts: '2026-05-18T10:00:00Z' };
63
+
64
+ describe('aiEnvelope.correlationReplay: behavioral in-process dedup (FINAL v1.1)', () => {
65
+ it('same correlationId re-emission returns the cached outcome unchanged', async () => {
66
+ const envelope = {
67
+ type: 'clarification.request',
68
+ schemaVersion: 1,
69
+ envelopeId: 'env-cr-replay-1',
70
+ correlationId: 'r:n:0:replay1',
71
+ payload: { questions: [{ id: 'q1', question: 'why?' }] },
72
+ meta: baseMeta,
73
+ };
74
+ const first = await accept(envelope);
75
+ if (first.status === 404) return;
76
+ expect(first.body.status).toBe('accepted');
77
+ const cachedOutcome = first.body;
78
+
79
+ const second = await accept(envelope, {
80
+ priorCorrelations: [
81
+ {
82
+ correlationId: 'r:n:0:replay1',
83
+ outcome: cachedOutcome,
84
+ envelopeType: 'clarification.request',
85
+ },
86
+ ],
87
+ });
88
+ expect(
89
+ second.body.status,
90
+ driver.describe(
91
+ 'ai-envelope.md §"Replay determinism"',
92
+ 'second emission with same correlationId MUST return the cached outcome (handler runs at most once per correlationId)',
93
+ ),
94
+ ).toBe('accepted');
95
+ expect(second.body.envelopeId).toBe(cachedOutcome.envelopeId);
96
+ });
97
+
98
+ it('same correlationId, different envelope type → invalid envelope_correlation_conflict', async () => {
99
+ const r = await accept(
100
+ {
101
+ type: 'error', // re-using a correlationId previously bound to clarification.request
102
+ schemaVersion: 1,
103
+ envelopeId: 'env-cr-conflict',
104
+ correlationId: 'r:n:0:conflict',
105
+ payload: { code: 'x', message: 'y' },
106
+ meta: baseMeta,
107
+ },
108
+ {
109
+ priorCorrelations: [
110
+ {
111
+ correlationId: 'r:n:0:conflict',
112
+ outcome: { status: 'accepted', envelopeId: 'env-prior', recordedEventIds: [], normalizedMeta: { contentTrust: 'trusted' } },
113
+ envelopeType: 'clarification.request',
114
+ },
115
+ ],
116
+ },
117
+ );
118
+ if (r.status === 404) return;
119
+ expect(
120
+ r.body.status,
121
+ driver.describe(
122
+ 'ai-envelope.md §"Replay determinism"',
123
+ 'same correlationId with different type MUST refuse envelope_correlation_conflict',
124
+ ),
125
+ ).toBe('invalid');
126
+ expect(r.body.reason).toContain('envelope_correlation_conflict');
127
+ });
128
+
129
+ it('cached outcome of any status (invalid/gated/breached) replays identically', async () => {
130
+ // Plant a `gated` cached outcome; second emission MUST return the same gated outcome
131
+ // (handler MUST NOT re-run, even if conditions might now accept).
132
+ const cached = {
133
+ status: 'gated' as const,
134
+ reason: 'envelope type \'vendor.x.foo\' not advertised',
135
+ allowedKinds: ['clarification.request', 'schema.request', 'schema.response', 'error'],
136
+ };
137
+ const r = await accept(
138
+ {
139
+ type: 'vendor.x.foo',
140
+ schemaVersion: 1,
141
+ envelopeId: 'env-cr-cached-gated',
142
+ correlationId: 'r:n:0:cachedgated',
143
+ payload: {},
144
+ meta: baseMeta,
145
+ },
146
+ {
147
+ hostSupportedEnvelopes: ['vendor.x.foo'], // would otherwise accept
148
+ priorCorrelations: [
149
+ {
150
+ correlationId: 'r:n:0:cachedgated',
151
+ outcome: cached,
152
+ envelopeType: 'vendor.x.foo',
153
+ },
154
+ ],
155
+ },
156
+ );
157
+ if (r.status === 404) return;
158
+ expect(
159
+ r.body.status,
160
+ driver.describe(
161
+ 'ai-envelope.md §"Replay determinism"',
162
+ 'cached non-accepted outcome MUST replay identically (handler at most once per correlationId)',
163
+ ),
164
+ ).toBe('gated');
165
+ });
166
+ });
167
+
168
+ // E.1 engine-projection via the test-only event-log seam.
169
+ import { queryTestEvents, isEventLogSeamAvailable, resetTestSeam } from '../lib/event-log-query.js';
170
+
171
+ describe('aiEnvelope.correlationReplay: causationId projection via event-log seam', () => {
172
+ it('resulting RunEventDoc.causationId MUST equal the envelope.correlationId (causal chain preserved)', async () => {
173
+ if (!(await isEventLogSeamAvailable())) return;
174
+ const runId = `r-cr-cause-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
175
+ const correlationId = `${runId}:n:0:causationId-link`;
176
+ await accept(
177
+ {
178
+ type: 'clarification.request',
179
+ schemaVersion: 1,
180
+ envelopeId: 'env-cr-cause-1',
181
+ correlationId,
182
+ payload: { questions: [{ id: 'q1', question: 'why?' }] },
183
+ meta: baseMeta,
184
+ },
185
+ { projectTo: { runId, nodeId: 'n' } },
186
+ );
187
+ const events = await queryTestEvents(runId);
188
+ if (!events.ok || events.events.length === 0) return;
189
+ for (const e of events.events) {
190
+ expect(
191
+ e.causationId,
192
+ driver.describe('ai-envelope.md §"Replay determinism"', 'every event projected from an envelope MUST carry causationId === envelope.correlationId'),
193
+ ).toBe(correlationId);
194
+ }
195
+ await resetTestSeam();
196
+ });
197
+ });
198
+
199
+ describe('aiEnvelope.correlationReplay: cross-process replay via persisted dedup', () => {
200
+ // Cross-process replay proven WITHOUT actually killing the process:
201
+ // when a caller supplies `persistedDedup: { runId }`, the seam reads
202
+ // the persisted store BEFORE consulting the in-memory priorCorrelations
203
+ // and writes the outcome back after a successful accept. A second
204
+ // call from the same (or a hypothetically-restarted) process with
205
+ // ONLY persistedDedup set — no in-memory priorCorrelations — MUST
206
+ // return the same outcome as the first. That is the cross-process
207
+ // semantics: the persisted store is the source of truth, the in-
208
+ // memory map a per-process accelerator.
209
+ it('persisted outcome replays for the same correlationId even with NO in-memory priorCorrelations', async () => {
210
+ const runId = `r-cr-persist-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
211
+ const correlationId = `${runId}:n:0:persist1`;
212
+ const envelope = {
213
+ type: 'clarification.request',
214
+ schemaVersion: 1,
215
+ envelopeId: 'env-cr-persist-1',
216
+ correlationId,
217
+ payload: { questions: [{ id: 'q1', question: 'why?' }] },
218
+ meta: baseMeta,
219
+ };
220
+ // First accept persists the outcome under (runId, correlationId).
221
+ const first = await accept(envelope, { persistedDedup: { runId } });
222
+ if (first.status === 404) return; // seam not exposed — soft-skip
223
+ expect(first.body.status).toBe('accepted');
224
+ const cachedEnvelopeId = first.body.envelopeId;
225
+
226
+ // Second accept — same correlationId, NO priorCorrelations passed
227
+ // in-band. If the persisted store is consulted, the cached outcome
228
+ // is returned (same envelopeId). If only the in-memory map were
229
+ // used, the handler would re-run and mint a different envelopeId
230
+ // (or accept again with the original — either way, NOT the proof
231
+ // of cross-process semantics).
232
+ const second = await accept(envelope, { persistedDedup: { runId } });
233
+ expect(
234
+ second.body.envelopeId,
235
+ driver.describe(
236
+ 'ai-envelope.md §"Replay determinism"',
237
+ 'persisted outcome MUST replay across calls without an in-memory priorCorrelations map (cross-process recovery semantics)',
238
+ ),
239
+ ).toBe(cachedEnvelopeId);
240
+ expect(second.body.status).toBe('accepted');
241
+ });
242
+
243
+ it('persisted store enforces envelope_correlation_conflict across calls', async () => {
244
+ const runId = `r-cr-persist-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
245
+ const correlationId = `${runId}:n:0:conflict1`;
246
+ // First accept: clarification.request.
247
+ const first = await accept(
248
+ {
249
+ type: 'clarification.request',
250
+ schemaVersion: 1,
251
+ envelopeId: 'env-cr-persist-conflict-1',
252
+ correlationId,
253
+ payload: { questions: [{ id: 'q1', question: 'why?' }] },
254
+ meta: baseMeta,
255
+ },
256
+ { persistedDedup: { runId } },
257
+ );
258
+ if (first.status === 404) return;
259
+ expect(first.body.status).toBe('accepted');
260
+
261
+ // Second accept: same correlationId, different envelope type, NO
262
+ // in-memory priorCorrelations — the conflict MUST be served from
263
+ // the persisted store.
264
+ const second = await accept(
265
+ {
266
+ type: 'error',
267
+ schemaVersion: 1,
268
+ envelopeId: 'env-cr-persist-conflict-2',
269
+ correlationId,
270
+ payload: { code: 'x', message: 'y' },
271
+ meta: baseMeta,
272
+ },
273
+ { persistedDedup: { runId } },
274
+ );
275
+ expect(
276
+ second.body.status,
277
+ driver.describe(
278
+ 'ai-envelope.md §"Replay determinism"',
279
+ 'persisted store MUST surface envelope_correlation_conflict on type mismatch without an in-memory priorCorrelations map',
280
+ ),
281
+ ).toBe('invalid');
282
+ expect(second.body.reason).toContain('envelope_correlation_conflict');
283
+ });
69
284
  });
@@ -46,28 +46,208 @@ describe('aiEnvelope.redaction: advertisement shape (FINAL v1.1)', () => {
46
46
  });
47
47
  });
48
48
 
49
- describe('aiEnvelope.redaction: BYOK-redaction placeholders', () => {
50
- // The 6 assertions below require the engine's BYOK redaction pipeline
51
- // (per SECURITY/threat-model-secret-leakage.md SR-1 carry-forward) to
52
- // hook into envelope acceptance AND every downstream surface that
53
- // persists envelope content (RunEventDoc, OTel span attributes,
54
- // debug-bundle export, error envelope projection).
55
- //
56
- // The reference workflow-engine sample's `acceptEnvelope` is pure +
57
- // doesn't touch payload contents. Redaction lives at a different
58
- // layer (BYOK secretResolver + event-log sanitizer). Promoting these
59
- // to behavioral requires either:
60
- // (a) chaining the acceptor through `stripSecretsFromPersisted`
61
- // before persisting the recorded view, OR
62
- // (b) an end-to-end test that plants a BYOK canary in an envelope
63
- // payload, runs through the full accept emit → persist → export
64
- // chain, and asserts the canary is absent on every output.
65
- //
66
- // (b) is the spec-faithful path. Tracked as host-impl follow-up.
67
- it.todo('emit envelope whose payload contains a known BYOK substring substring absent from emitted RunEventDocs');
68
- it.todo('redacted substring absent from OTel envelope_* span attributes');
69
- it.todo('redacted substring absent from debug-bundle export');
70
- it.todo('redacted substring absent from error envelope on validation refusal (no leak via error path)');
71
- it.todo('redaction marker is the canonical [REDACTED:<reason>] form, NOT a model-generated <REDACTED> string');
72
- it.todo('redaction runs AFTER schema validation: a payload with redacted-shaped substrings still validates structurally');
49
+ // Behavioral assertions through the workflow-engine sample's env-gated
50
+ // `POST /v1/host/sample/envelope/accept` seam. The seam threads a
51
+ // `byokCanaries: string[]` option into AcceptOptions so the acceptor
52
+ // can apply the §"Redaction (SR-1 carry-forward)" pass on accepted
53
+ // envelopes, returning `redactedPayload` in the outcome. Each test
54
+ // soft-skips on HTTP 404 (host doesn't expose the seam).
55
+ async function accept(envelope: unknown, opts: Record<string, unknown> = {}): Promise<{ status: number; body: { status?: string; reason?: string; redactedPayload?: unknown; redactionCount?: number; details?: unknown[] } }> {
56
+ const res = await driver.post('/v1/host/sample/envelope/accept', { envelope, ...opts });
57
+ return { status: res.status, body: res.json as { status?: string; reason?: string; redactedPayload?: unknown; redactionCount?: number; details?: unknown[] } };
58
+ }
59
+
60
+ const baseMeta = { source: 'ai-generation' as const, ts: '2026-05-18T10:00:00Z' };
61
+ const CANARY_VALUE = 'sk-canary-DO-NOT-LEAK-9d7f3a2b';
62
+ const CANARY_SECRET_ID = 'byok-secret-test-fixture-001';
63
+ const CANARIES = [{ value: CANARY_VALUE, secretId: CANARY_SECRET_ID }];
64
+ const CANONICAL_MARKER = `[REDACTED:${CANARY_SECRET_ID}]`;
65
+
66
+ describe('aiEnvelope.redaction: behavioral acceptor-level scrub (FINAL v1.1)', () => {
67
+ it('canary in payload substituted with canonical SR-1 [REDACTED:<secretId>] marker per agent-memory.md:66', async () => {
68
+ const r = await accept(
69
+ {
70
+ type: 'error',
71
+ schemaVersion: 1,
72
+ envelopeId: 'env-red-1',
73
+ correlationId: 'r:n:0:red1',
74
+ payload: { code: 'leak_demo', message: `tool result included ${CANARY_VALUE} verbatim` },
75
+ meta: baseMeta,
76
+ },
77
+ { byokCanaries: CANARIES },
78
+ );
79
+ if (r.status === 404) return;
80
+ expect(r.body.status).toBe('accepted');
81
+ expect(r.body.redactionCount, 'redactionCount MUST be > 0 when canary appears').toBeGreaterThan(0);
82
+ expect(
83
+ JSON.stringify(r.body.redactedPayload).includes(CANARY_VALUE),
84
+ driver.describe('ai-envelope.md §"Redaction (SR-1 carry-forward)"', 'canary plaintext MUST be absent from the redacted view'),
85
+ ).toBe(false);
86
+ expect(
87
+ JSON.stringify(r.body.redactedPayload),
88
+ driver.describe('agent-memory.md §SR-1 line 66', 'persisted entry MUST carry [REDACTED:<secretId>] in place of the plaintext'),
89
+ ).toContain(CANONICAL_MARKER);
90
+ });
91
+
92
+ it('canary across nested object fields → all occurrences scrubbed with canonical marker', async () => {
93
+ const r = await accept(
94
+ {
95
+ type: 'clarification.request',
96
+ schemaVersion: 1,
97
+ envelopeId: 'env-red-nested',
98
+ correlationId: 'r:n:0:rednested',
99
+ payload: {
100
+ questions: [
101
+ { id: 'q1', question: `What is ${CANARY_VALUE}?` },
102
+ { id: 'q2', question: 'unrelated', context: { trace: `${CANARY_VALUE}/${CANARY_VALUE}` } },
103
+ ],
104
+ },
105
+ meta: baseMeta,
106
+ },
107
+ { byokCanaries: CANARIES },
108
+ );
109
+ if (r.status === 404) return;
110
+ expect(r.body.status).toBe('accepted');
111
+ expect(
112
+ JSON.stringify(r.body.redactedPayload).includes(CANARY_VALUE),
113
+ 'no canary plaintext remnant anywhere in the redacted view (recursive scrub)',
114
+ ).toBe(false);
115
+ // q1's question (1 occurrence), q2's context.trace (2 occurrences) = total 3
116
+ expect(r.body.redactionCount).toBe(3);
117
+ });
118
+
119
+ it('multiple canaries → each substituted with its own secretId marker', async () => {
120
+ const C1 = { value: 'sk-canary-alpha-xxxx', secretId: 'secret-alpha' };
121
+ const C2 = { value: 'sk-canary-beta-yyyy', secretId: 'secret-beta' };
122
+ const r = await accept(
123
+ {
124
+ type: 'error',
125
+ schemaVersion: 1,
126
+ envelopeId: 'env-red-multi',
127
+ correlationId: 'r:n:0:redmulti',
128
+ payload: { code: 'multi_leak', message: `first=${C1.value}, second=${C2.value}` },
129
+ meta: baseMeta,
130
+ },
131
+ { byokCanaries: [C1, C2] },
132
+ );
133
+ if (r.status === 404) return;
134
+ expect(r.body.status).toBe('accepted');
135
+ const view = JSON.stringify(r.body.redactedPayload);
136
+ expect(view.includes(C1.value)).toBe(false);
137
+ expect(view.includes(C2.value)).toBe(false);
138
+ expect(
139
+ view.includes(`[REDACTED:${C1.secretId}]`) && view.includes(`[REDACTED:${C2.secretId}]`),
140
+ driver.describe('agent-memory.md §SR-1', 'each canary MUST be substituted with its OWN [REDACTED:<secretId>] marker'),
141
+ ).toBe(true);
142
+ });
143
+
144
+ it('redaction runs AFTER schema validation: payload with [REDACTED:...]-shaped substrings still validates', async () => {
145
+ // The error-kind payload schema requires { code, message }. A pre-redacted
146
+ // marker in the message MUST NOT trip validation.
147
+ const r = await accept(
148
+ {
149
+ type: 'error',
150
+ schemaVersion: 1,
151
+ envelopeId: 'env-red-shape',
152
+ correlationId: 'r:n:0:redshape',
153
+ payload: { code: 'demo', message: 'already had [REDACTED:secret-prior] before we saw it' },
154
+ meta: baseMeta,
155
+ },
156
+ { byokCanaries: CANARIES }, // canary NOT in payload; substitution count expected 0
157
+ );
158
+ if (r.status === 404) return;
159
+ expect(
160
+ r.body.status,
161
+ driver.describe('ai-envelope.md §"Redaction (SR-1 carry-forward)"', 'redaction MUST run AFTER schema validation; pre-existing markers do not affect validation'),
162
+ ).toBe('accepted');
163
+ // No canary present → redactionCount absent or 0
164
+ expect(r.body.redactionCount ?? 0).toBe(0);
165
+ });
166
+
167
+ it('canary in invalid envelope (validation refusal) → error response MUST NOT echo the canary plaintext', async () => {
168
+ // ISO 8601 violation triggers an `invalid` outcome BEFORE the redaction
169
+ // pass runs. The acceptor's validation-detail extractor MUST NOT echo
170
+ // the payload contents into the error response.
171
+ const r = await accept(
172
+ {
173
+ type: 'error',
174
+ schemaVersion: 1,
175
+ envelopeId: 'env-red-leak',
176
+ correlationId: 'r:n:0:redleak',
177
+ payload: { code: 'demo', message: `secret value is ${CANARY_VALUE}` },
178
+ meta: { ...baseMeta, ts: 'tomorrow' }, // bad ts → invalid
179
+ },
180
+ { byokCanaries: CANARIES },
181
+ );
182
+ if (r.status === 404) return;
183
+ expect(r.body.status).toBe('invalid');
184
+ const bodyString = JSON.stringify(r.body);
185
+ expect(
186
+ bodyString.includes(CANARY_VALUE),
187
+ driver.describe(
188
+ 'SECURITY/threat-model-secret-leakage.md §SR-1',
189
+ 'error response on validation refusal MUST NOT echo BYOK canary plaintext',
190
+ ),
191
+ ).toBe(false);
192
+ });
193
+ });
194
+
195
+ // E.2 OTel scrape + E.3 debug-bundle seams.
196
+ import { queryTestSpans, exportDebugBundle, isOtelSeamAvailable } from '../lib/otel-scrape.js';
197
+ import { resetTestSeam } from '../lib/event-log-query.js';
198
+
199
+ describe('aiEnvelope.redaction: OTel + debug-bundle scrape (E.2 + E.3)', () => {
200
+ it('redacted canary plaintext MUST be absent from OTel envelope_* span attributes', async () => {
201
+ if (!(await isOtelSeamAvailable())) return;
202
+ const runId = `r-red-otel-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
203
+ await accept(
204
+ {
205
+ type: 'error',
206
+ schemaVersion: 1,
207
+ envelopeId: 'env-red-otel-1',
208
+ correlationId: `${runId}:n:0:red-otel`,
209
+ payload: { code: 'leak_demo', message: `tool result included ${CANARY_VALUE} verbatim` },
210
+ meta: baseMeta,
211
+ },
212
+ { byokCanaries: CANARIES, projectTo: { runId, nodeId: 'n' } },
213
+ );
214
+ const spans = await queryTestSpans({ runId });
215
+ if (!spans.ok) return;
216
+ const allAttrs = spans.data.flatMap((s) => Object.values(s.attributes).map((v) => String(v)));
217
+ expect(
218
+ allAttrs.some((v) => v.includes(CANARY_VALUE)),
219
+ driver.describe(
220
+ 'SECURITY/threat-model-secret-leakage.md §SR-1',
221
+ 'BYOK canary plaintext MUST NOT appear in any OTel envelope_* span attribute',
222
+ ),
223
+ ).toBe(false);
224
+ await resetTestSeam();
225
+ });
226
+
227
+ it('redacted canary plaintext MUST be absent from debug-bundle export', async () => {
228
+ if (!(await isOtelSeamAvailable())) return;
229
+ const runId = `r-red-bundle-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
230
+ await accept(
231
+ {
232
+ type: 'clarification.request',
233
+ schemaVersion: 1,
234
+ envelopeId: 'env-red-bundle-1',
235
+ correlationId: `${runId}:n:0:red-bundle`,
236
+ payload: { questions: [{ id: 'q1', question: `embed ${CANARY_VALUE} here` }] },
237
+ meta: baseMeta,
238
+ },
239
+ { byokCanaries: CANARIES, projectTo: { runId, nodeId: 'n' } },
240
+ );
241
+ const bundle = await exportDebugBundle(runId);
242
+ if (!bundle.ok) return;
243
+ const serialized = JSON.stringify(bundle.data);
244
+ expect(
245
+ serialized.includes(CANARY_VALUE),
246
+ driver.describe(
247
+ 'SECURITY/threat-model-secret-leakage.md §SR-1',
248
+ 'BYOK canary plaintext MUST NOT appear in the debug-bundle export (events + spans)',
249
+ ),
250
+ ).toBe(false);
251
+ await resetTestSeam();
252
+ });
73
253
  });