@openwop/openwop-conformance 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. package/CHANGELOG.md +156 -1
  2. package/README.md +3 -2
  3. package/api/asyncapi.yaml +8 -0
  4. package/api/openapi.yaml +371 -1
  5. package/api/redocly.yaml +15 -0
  6. package/coverage.md +26 -5
  7. package/fixtures/conformance-agent-reasoning-streaming.json +37 -0
  8. package/fixtures/conformance-dispatch-cancellable-child.json +27 -0
  9. package/fixtures/conformance-dispatch-deterministic-fail-child.json +30 -0
  10. package/fixtures/conformance-dispatch-input-mapping-no-default.json +49 -0
  11. package/fixtures/conformance-dispatch-per-worker-override.json +59 -0
  12. package/fixtures/conformance-envelope-nl-to-format-engaged.json +41 -0
  13. package/fixtures/conformance-envelope-recovery-applied.json +39 -0
  14. package/fixtures/conformance-envelope-refusal.json +38 -0
  15. package/fixtures/conformance-envelope-retry-attempted.json +39 -0
  16. package/fixtures/conformance-envelope-retry-exhausted.json +38 -0
  17. package/fixtures/conformance-envelope-truncated.json +39 -0
  18. package/fixtures/conformance-envelope-truncation-cap-exhaustion.json +39 -0
  19. package/fixtures/conformance-model-capability-insufficient.json +25 -0
  20. package/fixtures/conformance-multi-agent-confidence-escalation.json +49 -0
  21. package/fixtures/conformance-multi-agent-handoff-child.json +27 -0
  22. package/fixtures/conformance-multi-agent-handoff.json +49 -0
  23. package/fixtures/conformance-prompt-all-four-kinds.json +39 -0
  24. package/fixtures/conformance-prompt-end-to-end.json +33 -0
  25. package/fixtures/conformance-subworkflow-input-mapping-no-default.json +33 -0
  26. package/fixtures/conformance-subworkflow-mid-run-mutation-child.json +31 -0
  27. package/fixtures/conformance-subworkflow-mid-run-mutation.json +33 -0
  28. package/fixtures/openwop-smoke-cost-emit.json +37 -0
  29. package/fixtures/prompt-templates/conformance-prompt-few-shot-2.json +14 -0
  30. package/fixtures/prompt-templates/conformance-prompt-few-shot.json +14 -0
  31. package/fixtures/prompt-templates/conformance-prompt-schema-hint.json +14 -0
  32. package/fixtures/prompt-templates/conformance-prompt-secret-redaction.json +23 -0
  33. package/fixtures/prompt-templates/conformance-prompt-trust-marker.json +23 -0
  34. package/fixtures/prompt-templates/conformance-prompt-writer-system.json +15 -0
  35. package/fixtures/prompt-templates/conformance-prompt-writer-user.json +15 -0
  36. package/fixtures.md +45 -0
  37. package/package.json +1 -1
  38. package/schemas/README.md +5 -0
  39. package/schemas/agent-manifest.schema.json +16 -0
  40. package/schemas/capabilities.schema.json +390 -0
  41. package/schemas/core-conformance-mock-agent-config.schema.json +5 -0
  42. package/schemas/envelopes/clarification.request.schema.json +9 -0
  43. package/schemas/envelopes/error.schema.json +4 -0
  44. package/schemas/envelopes/schema.request.schema.json +4 -0
  45. package/schemas/envelopes/schema.response.schema.json +1 -1
  46. package/schemas/node-pack-manifest.schema.json +28 -0
  47. package/schemas/orchestrator-decision.schema.json +12 -0
  48. package/schemas/prompt-kind.schema.json +8 -0
  49. package/schemas/prompt-pack-manifest.schema.json +80 -0
  50. package/schemas/prompt-ref.schema.json +40 -0
  51. package/schemas/prompt-template.schema.json +149 -0
  52. package/schemas/registry-version-manifest.schema.json +5 -0
  53. package/schemas/run-ancestry-response.schema.json +54 -0
  54. package/schemas/run-event-payloads.schema.json +513 -11
  55. package/schemas/run-event.schema.json +17 -1
  56. package/schemas/run-snapshot.schema.json +3 -2
  57. package/schemas/workflow-definition.schema.json +19 -1
  58. package/src/lib/driver.ts +15 -0
  59. package/src/lib/env.ts +51 -0
  60. package/src/lib/event-log-query.ts +62 -0
  61. package/src/lib/fixtures.ts +38 -1
  62. package/src/lib/host-toggle.ts +54 -0
  63. package/src/lib/llm-cache-key-recipe.ts +68 -0
  64. package/src/lib/multi-agent-capabilities.ts +10 -0
  65. package/src/lib/otel-scrape.ts +59 -0
  66. package/src/scenarios/agentReasoningStreaming.test.ts +193 -0
  67. package/src/scenarios/aiEnvelope.capBreached.test.ts +97 -9
  68. package/src/scenarios/aiEnvelope.contractRefusal.test.ts +224 -15
  69. package/src/scenarios/aiEnvelope.correlationReplay.test.ts +257 -25
  70. package/src/scenarios/aiEnvelope.redaction.test.ts +210 -29
  71. package/src/scenarios/aiEnvelope.schemaDrift.test.ts +163 -24
  72. package/src/scenarios/aiEnvelope.trustBoundaryPropagation.test.ts +262 -12
  73. package/src/scenarios/aiEnvelope.universalKinds.test.ts +107 -16
  74. package/src/scenarios/blob-presign-expiry.test.ts +42 -9
  75. package/src/scenarios/blob-roundtrip.test.ts +0 -0
  76. package/src/scenarios/cache-ttl-expiry.test.ts +34 -8
  77. package/src/scenarios/cost-attribution.test.ts +124 -11
  78. package/src/scenarios/cross-engine-append-ordering.test.ts +99 -0
  79. package/src/scenarios/cross-host-ancestry-endpoint.test.ts +136 -0
  80. package/src/scenarios/cross-host-causation-shape.test.ts +117 -0
  81. package/src/scenarios/cross-host-traceparent-propagation.test.ts +60 -0
  82. package/src/scenarios/dispatch-cross-worker-handoff.test.ts +34 -3
  83. package/src/scenarios/dispatch-input-mapping.test.ts +75 -6
  84. package/src/scenarios/dispatch-output-mapping.test.ts +96 -6
  85. package/src/scenarios/envelope-completion-distinguishes-truncation.test.ts +223 -0
  86. package/src/scenarios/envelope-nl-to-format-engaged.test.ts +152 -0
  87. package/src/scenarios/envelope-reasoning-secret-redaction.test.ts +343 -0
  88. package/src/scenarios/envelope-reasoning-shape.test.ts +190 -0
  89. package/src/scenarios/envelope-recovery-applied.test.ts +229 -0
  90. package/src/scenarios/envelope-refusal-shape.test.ts +289 -0
  91. package/src/scenarios/envelope-retry-attempted.test.ts +258 -0
  92. package/src/scenarios/envelope-retry-exhausted.test.ts +168 -0
  93. package/src/scenarios/envelope-tier-one-subset-static.test.ts +229 -0
  94. package/src/scenarios/envelope-truncated.test.ts +136 -0
  95. package/src/scenarios/envelope-truncation-cap-exhaustion.test.ts +144 -0
  96. package/src/scenarios/envelope-variant-discriminator-static.test.ts +152 -0
  97. package/src/scenarios/fixtures-gating.test.ts +139 -1
  98. package/src/scenarios/fixtures-valid.test.ts +123 -15
  99. package/src/scenarios/kv-ttl-expiry.test.ts +40 -9
  100. package/src/scenarios/model-capability-insufficient.test.ts +221 -0
  101. package/src/scenarios/model-capability-substituted.test.ts +203 -0
  102. package/src/scenarios/multi-agent-confidence-escalation.test.ts +164 -0
  103. package/src/scenarios/multi-agent-handoff-state-machine.test.ts +167 -0
  104. package/src/scenarios/multi-agent-memory-lifecycle.test.ts +124 -0
  105. package/src/scenarios/multi-region-idempotency.test.ts +58 -0
  106. package/src/scenarios/node-module-required-capabilities-shape.test.ts +185 -0
  107. package/src/scenarios/otel-trace-propagation-subworkflow.test.ts +19 -0
  108. package/src/scenarios/pack-registry-publish.test.ts +231 -51
  109. package/src/scenarios/prompt-all-four-kinds-events.test.ts +198 -0
  110. package/src/scenarios/prompt-composed-secret-redaction.test.ts +178 -0
  111. package/src/scenarios/prompt-composed-trust-marker.test.ts +165 -0
  112. package/src/scenarios/prompt-end-to-end-events.test.ts +202 -0
  113. package/src/scenarios/prompt-list-and-fetch.test.ts +207 -0
  114. package/src/scenarios/prompt-mutable-lifecycle.test.ts +216 -0
  115. package/src/scenarios/prompt-pack-install.test.ts +187 -0
  116. package/src/scenarios/prompt-render-deterministic.test.ts +240 -0
  117. package/src/scenarios/prompt-resolution-chain-agent-intrinsic.test.ts +140 -0
  118. package/src/scenarios/prompt-resolution-chain-fallback-cascade.test.ts +172 -0
  119. package/src/scenarios/prompt-resolution-chain-node-wins.test.ts +144 -0
  120. package/src/scenarios/prompt-template-shape.test.ts +359 -0
  121. package/src/scenarios/provider-usage.test.ts +185 -0
  122. package/src/scenarios/queue-ack-nack-dlq.test.ts +64 -10
  123. package/src/scenarios/queue-publish-consume-roundtrip.test.ts +50 -10
  124. package/src/scenarios/replay-divergence-at-refusal.test.ts +134 -0
  125. package/src/scenarios/replay-llm-cache-key-portable.test.ts +197 -0
  126. package/src/scenarios/replay-llm-cache-key.test.ts +127 -25
  127. package/src/scenarios/replay-observable-sequence-determinism.test.ts +80 -0
  128. package/src/scenarios/sandbox-capability-gate-respected.test.ts +31 -0
  129. package/src/scenarios/sandbox-memory-cap.test.ts +61 -0
  130. package/src/scenarios/sandbox-no-cross-pack-mutation.test.ts +35 -0
  131. package/src/scenarios/sandbox-no-host-env-leak.test.ts +38 -0
  132. package/src/scenarios/sandbox-no-host-fs-escape.test.ts +91 -0
  133. package/src/scenarios/sandbox-no-host-process-escape.test.ts +30 -0
  134. package/src/scenarios/sandbox-no-network-escape.test.ts +49 -0
  135. package/src/scenarios/sandbox-timeout-cap.test.ts +61 -0
  136. package/src/scenarios/search-bm25-roundtrip.test.ts +54 -9
  137. package/src/scenarios/spec-corpus-validity.test.ts +34 -6
  138. package/src/scenarios/sql-transaction-atomicity.test.ts +37 -8
  139. package/src/scenarios/stream-subscribe-from-beginning.test.ts +46 -9
  140. package/src/scenarios/subworkflow-input-mapping.test.ts +146 -10
  141. package/src/scenarios/table-cursor-pagination.test.ts +47 -9
  142. package/src/scenarios/table-schema-enforcement.test.ts +46 -9
  143. package/src/scenarios/vector-knn-roundtrip.test.ts +50 -10
  144. package/src/scenarios/workflow-chain-host-expansion.test.ts +202 -0
@@ -0,0 +1,152 @@
1
+ /**
2
+ * envelope-nl-to-format-engaged — RFC 0032 §B.5 runtime behavior (MAY tier).
3
+ *
4
+ * Capability-gated on `capabilities.envelopes.reliability.supported: true`
5
+ * AND `events[]` includes `envelope.nlToFormat.engaged`. Soft-skip cleanly
6
+ * on hosts that don't implement NL-to-Format fallback — NL-to-Format is one
7
+ * of many possible recovery strategies; hosts that don't advertise it don't
8
+ * need to emit.
9
+ *
10
+ * Asserts:
11
+ * 1. When retry exhaustion triggers the NL-to-Format fallback (per Tam et al.
12
+ * mitigation: free-form reasoning in the first call → schema coercion
13
+ * in the second call), exactly one `envelope.nlToFormat.engaged` event
14
+ * fires.
15
+ * 2. `originalEnvelopeType` carries the envelope kind the original attempt
16
+ * was trying to emit.
17
+ * 3. `fallbackCalls >= 1` (informational — how many secondary LLM calls
18
+ * the host issued to reformat).
19
+ * 4. The eventual envelope acceptance (when fallback succeeds) records
20
+ * normally via downstream RunEventDoc.
21
+ *
22
+ * @see RFCS/0032-envelope-reliability-events.md §B.5
23
+ * @see Tam et al., "Let Me Speak Freely?" — https://arxiv.org/pdf/2408.02442
24
+ * @see schemas/run-event-payloads.schema.json §envelopeNlToFormatEngaged
25
+ */
26
+
27
+ import { describe, it, expect } from 'vitest';
28
+ import { driver } from '../lib/driver.js';
29
+ import { pollUntilTerminal } from '../lib/polling.js';
30
+ import { isFixtureAdvertised } from '../lib/fixtures.js';
31
+
32
+ const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
33
+ const FIXTURE = 'conformance-envelope-nl-to-format-engaged';
34
+ const NODE_ID = 'structured-call';
35
+
36
+ interface RunEvent {
37
+ type: string;
38
+ payload?: Record<string, unknown>;
39
+ nodeId?: string;
40
+ sequence: number;
41
+ }
42
+
43
+ async function programMock(program: Array<Record<string, unknown>>): Promise<{ status: number }> {
44
+ const res = await driver.post('/v1/host/sample/test/mock-ai/program', { nodeId: NODE_ID, program });
45
+ return { status: res.status };
46
+ }
47
+
48
+ async function runAndReadEvents(): Promise<RunEvent[] | null> {
49
+ const create = await driver.post('/v1/runs', { workflowId: FIXTURE });
50
+ if (create.status !== 201) return null;
51
+ const runId = (create.json as { runId: string }).runId;
52
+ await pollUntilTerminal(runId, { timeoutMs: 10_000 });
53
+ const eventsRes = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
54
+ if (eventsRes.status !== 200) return null;
55
+ return ((eventsRes.json as { events?: RunEvent[] } | undefined)?.events ?? []) as RunEvent[];
56
+ }
57
+
58
+ // Three NL responses to exhaust the retry budget; the fourth is the
59
+ // coerced response the NL-to-Format fallback secondary call returns —
60
+ // valid JSON matching the schema. The mock returns whatever the test
61
+ // programmed for the Nth call; the host's fallback issues a 4th call
62
+ // after retry exhaustion.
63
+ const NL_THEN_COERCED_PROGRAM = [
64
+ { content: 'Sure, here is the result: the answer is OK.' },
65
+ { content: 'Of course! The result you wanted is okay.' },
66
+ { content: 'I think the result should be ok-ish.' },
67
+ { content: '{"result":"coerced-ok"}' },
68
+ ];
69
+
70
+ describe.skipIf(HTTP_SKIP)('envelope-nl-to-format-engaged: runtime behavior (RFC 0032 §B.5 MAY)', () => {
71
+ it('when retry exhaustion triggers the NL-to-Format fallback, exactly one `envelope.nlToFormat.engaged` event fires', async () => {
72
+ if (!isFixtureAdvertised(FIXTURE)) return;
73
+ const seed = await programMock(NL_THEN_COERCED_PROGRAM);
74
+ if (seed.status === 404) return;
75
+ expect(seed.status).toBe(200);
76
+
77
+ const events = await runAndReadEvents();
78
+ if (events === null) return;
79
+ const engagements = events.filter((e) => e.type === 'envelope.nlToFormat.engaged');
80
+ expect(
81
+ engagements.length,
82
+ driver.describe(
83
+ 'RFCS/0032-envelope-reliability-events.md §B.5',
84
+ 'exactly one envelope.nlToFormat.engaged event MUST fire when the host detects NL-shape responses after retry exhaustion',
85
+ ),
86
+ ).toBe(1);
87
+ });
88
+
89
+ it('`originalEnvelopeType` carries the envelope kind the original attempt targeted', async () => {
90
+ if (!isFixtureAdvertised(FIXTURE)) return;
91
+ const seed = await programMock(NL_THEN_COERCED_PROGRAM);
92
+ if (seed.status === 404) return;
93
+
94
+ const events = await runAndReadEvents();
95
+ if (events === null) return;
96
+ const engagement = events.find((e) => e.type === 'envelope.nlToFormat.engaged');
97
+ expect(engagement).toBeDefined();
98
+ expect(
99
+ typeof engagement!.payload?.originalEnvelopeType,
100
+ driver.describe(
101
+ 'RFCS/0032-envelope-reliability-events.md §B.5',
102
+ 'originalEnvelopeType MUST be present and string-typed — derived from the response-schema or wrapping metadata',
103
+ ),
104
+ ).toBe('string');
105
+ expect((engagement!.payload?.originalEnvelopeType as string).length).toBeGreaterThan(0);
106
+ });
107
+
108
+ it('`fallbackCalls >= 1` reports the number of secondary LLM calls used to reformat free-form output into the envelope schema', async () => {
109
+ if (!isFixtureAdvertised(FIXTURE)) return;
110
+ const seed = await programMock(NL_THEN_COERCED_PROGRAM);
111
+ if (seed.status === 404) return;
112
+
113
+ const events = await runAndReadEvents();
114
+ if (events === null) return;
115
+ const engagement = events.find((e) => e.type === 'envelope.nlToFormat.engaged');
116
+ expect(engagement).toBeDefined();
117
+ const fallbackCalls = engagement!.payload?.fallbackCalls;
118
+ expect(typeof fallbackCalls).toBe('number');
119
+ expect(
120
+ fallbackCalls as number,
121
+ driver.describe(
122
+ 'RFCS/0032-envelope-reliability-events.md §B.5',
123
+ 'fallbackCalls MUST be >= 1 — the fallback fired at least one secondary call to reformat the free-form output',
124
+ ),
125
+ ).toBeGreaterThanOrEqual(1);
126
+ });
127
+
128
+ it('the eventual envelope acceptance (when fallback succeeds) records normally via downstream RunEventDoc', async () => {
129
+ if (!isFixtureAdvertised(FIXTURE)) return;
130
+ const seed = await programMock(NL_THEN_COERCED_PROGRAM);
131
+ if (seed.status === 404) return;
132
+
133
+ const events = await runAndReadEvents();
134
+ if (events === null) return;
135
+ const nodeCompleted = events.find((e) => e.type === 'node.completed' && e.nodeId === NODE_ID);
136
+ expect(
137
+ nodeCompleted,
138
+ driver.describe(
139
+ 'RFCS/0032-envelope-reliability-events.md §B.5',
140
+ 'NL-to-Format fallback success MUST reach node.completed — the coerced envelope flows downstream like any other accepted envelope',
141
+ ),
142
+ ).toBeDefined();
143
+ const completedPayload = JSON.stringify(nodeCompleted?.payload ?? {});
144
+ expect(
145
+ completedPayload.includes('coerced-ok'),
146
+ driver.describe(
147
+ 'RFCS/0032-envelope-reliability-events.md §B.5',
148
+ 'the coerced structured data from the secondary call MUST flow to the downstream RunEventDoc',
149
+ ),
150
+ ).toBe(true);
151
+ });
152
+ });
@@ -0,0 +1,343 @@
1
+ /**
2
+ * envelope-reasoning-secret-redaction — RFC 0030 §E security invariant.
3
+ *
4
+ * SECURITY invariant: `envelope-reasoning-secret-redaction` (gate timing
5
+ * per RFC 0027 §G precedent — lands alongside reference-host emission).
6
+ *
7
+ * Asserts that the envelope-acceptor's BYOK redaction harness walks the
8
+ * `reasoning` field — known credential canaries (the `byokCanaries[]`
9
+ * shape from RFC 0021 §"Redaction (SR-1 carry-forward)", supplied as
10
+ * `{ value, secretId }` pairs) found inside `reasoning` MUST be
11
+ * substituted with `[REDACTED:<secretId>]` markers
12
+ * before the envelope is persisted to `RunEventDoc.payload`. The acceptor's
13
+ * recursive walk per `ai-envelope.md` §"Redaction (SR-1 carry-forward)"
14
+ * covers `reasoning` automatically because it's just another payload
15
+ * field — but the conformance suite asserts it explicitly so a future
16
+ * refactor that adds an early-exit at known-shape boundaries cannot
17
+ * regress the invariant.
18
+ *
19
+ * Behavioral assertions drive the existing envelope-accept test seam
20
+ * (`POST /v1/host/sample/envelope/accept`) introduced by RFC 0021. Each
21
+ * test soft-skips on HTTP 404 (host doesn't expose the seam) and on
22
+ * capability absence.
23
+ *
24
+ * Downstream-projection assertions (OTel-attribute scrape + debug-bundle
25
+ * export + non-routing-on-reasoning invariant) are live behavioral via the
26
+ * `/v1/host/sample/test/otel/spans` + `/v1/host/sample/test/debug-bundle/export`
27
+ * seams (soft-skip on HTTP 404 when the host doesn't expose them). The
28
+ * acceptor-level redaction is verified independently above via the
29
+ * envelope-accept seam.
30
+ *
31
+ * @see RFCS/0030-envelope-reasoning-and-tier-one-subset.md §E
32
+ * @see spec/v1/ai-envelope.md §"Reasoning field (normative)" + §"Redaction (SR-1 carry-forward)"
33
+ * @see SECURITY/threat-model-secret-leakage.md §SR-1
34
+ */
35
+
36
+ import { describe, it, expect } from 'vitest';
37
+ import { driver } from '../lib/driver.js';
38
+
39
+ const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
40
+
41
+ interface DiscoveryDoc {
42
+ capabilities?: {
43
+ envelopes?: { reasoning?: { supported?: unknown } };
44
+ secrets?: { supported?: unknown };
45
+ observability?: {
46
+ testSeams?: {
47
+ otelScrape?: unknown;
48
+ debugBundleExport?: unknown;
49
+ };
50
+ };
51
+ };
52
+ }
53
+
54
+ async function readDiscovery(): Promise<DiscoveryDoc | null> {
55
+ try {
56
+ const res = await driver.get('/.well-known/openwop');
57
+ if (res.status !== 200) return null;
58
+ return res.json as DiscoveryDoc;
59
+ } catch {
60
+ return null;
61
+ }
62
+ }
63
+
64
+ async function accept(
65
+ envelope: unknown,
66
+ opts: Record<string, unknown> = {},
67
+ ): Promise<{
68
+ status: number;
69
+ body: {
70
+ status?: string;
71
+ reason?: string;
72
+ redactedPayload?: unknown;
73
+ redactionCount?: number;
74
+ details?: unknown[];
75
+ };
76
+ }> {
77
+ const res = await driver.post('/v1/host/sample/envelope/accept', { envelope, ...opts });
78
+ return {
79
+ status: res.status,
80
+ body: res.json as {
81
+ status?: string;
82
+ reason?: string;
83
+ redactedPayload?: unknown;
84
+ redactionCount?: number;
85
+ details?: unknown[];
86
+ },
87
+ };
88
+ }
89
+
90
+ const baseMeta = { source: 'ai-generation' as const, ts: '2026-05-20T10:00:00Z' };
91
+ const CANARY_VALUE = 'sk-reasoning-canary-DO-NOT-LEAK-30a1';
92
+ const CANARY_SECRET_ID = 'byok-reasoning-fixture-001';
93
+ const CANARIES = [{ value: CANARY_VALUE, secretId: CANARY_SECRET_ID }];
94
+ const CANONICAL_MARKER = `[REDACTED:${CANARY_SECRET_ID}]`;
95
+
96
+ describe.skipIf(HTTP_SKIP)('envelope-reasoning-secret-redaction: advertisement shape (RFC 0030 §E)', () => {
97
+ it('hosts advertising envelope reasoning + BYOK honor SR-1 carry-forward for the reasoning field', async () => {
98
+ const d = await readDiscovery();
99
+ if (d === null) return;
100
+ const reasoning = d.capabilities?.envelopes?.reasoning?.supported;
101
+ const secrets = d.capabilities?.secrets?.supported;
102
+ if (reasoning !== true || secrets !== true) return; // soft-skip when either is absent
103
+ // The contract is invariant-based, not capability-flag-based — the
104
+ // advertisement-shape check here just confirms both surfaces are claimed.
105
+ expect(true).toBe(true);
106
+ });
107
+ });
108
+
109
+ describe.skipIf(HTTP_SKIP)('envelope-reasoning-secret-redaction: BYOK redaction of `reasoning` (RFC 0030 §E)', () => {
110
+ it('canary in `reasoning` → substituted with canonical [REDACTED:<secretId>] marker per agent-memory.md:66', async () => {
111
+ const r = await accept(
112
+ {
113
+ type: 'error',
114
+ schemaVersion: 1,
115
+ envelopeId: 'env-reason-red-1',
116
+ correlationId: 'r:n:0:reasonred1',
117
+ payload: {
118
+ reasoning: `I analyzed the input and noticed the credential ${CANARY_VALUE} was embedded; the call cannot proceed safely.`,
119
+ code: 'validation_failed',
120
+ message: 'Refusing to act on a credential-bearing input.',
121
+ },
122
+ meta: baseMeta,
123
+ },
124
+ { byokCanaries: CANARIES },
125
+ );
126
+ if (r.status === 404) return; // host doesn't expose the seam
127
+ expect(r.body.status, 'envelope MUST be accepted; redaction is a post-validation pass').toBe('accepted');
128
+ expect(
129
+ r.body.redactionCount,
130
+ 'RFC 0030 §E: redactionCount MUST be > 0 when a canary appears in `reasoning`',
131
+ ).toBeGreaterThan(0);
132
+ expect(
133
+ JSON.stringify(r.body.redactedPayload).includes(CANARY_VALUE),
134
+ driver.describe(
135
+ 'ai-envelope.md §"Redaction (SR-1 carry-forward)"',
136
+ 'canary plaintext MUST NOT remain anywhere in the redacted view — `reasoning` field included',
137
+ ),
138
+ ).toBe(false);
139
+ expect(
140
+ JSON.stringify(r.body.redactedPayload),
141
+ driver.describe(
142
+ 'agent-memory.md §SR-1 line 66',
143
+ 'persisted entry MUST carry [REDACTED:<secretId>] in place of the plaintext',
144
+ ),
145
+ ).toContain(CANONICAL_MARKER);
146
+ });
147
+
148
+ it('canary in `reasoning` AND another payload field → both occurrences scrubbed with single marker', async () => {
149
+ const r = await accept(
150
+ {
151
+ type: 'error',
152
+ schemaVersion: 1,
153
+ envelopeId: 'env-reason-red-2',
154
+ correlationId: 'r:n:0:reasonred2',
155
+ payload: {
156
+ reasoning: `The token ${CANARY_VALUE} appeared in two places.`,
157
+ code: 'leak_demo',
158
+ message: `Original tool output: ${CANARY_VALUE}`,
159
+ },
160
+ meta: baseMeta,
161
+ },
162
+ { byokCanaries: CANARIES },
163
+ );
164
+ if (r.status === 404) return;
165
+ expect(r.body.status).toBe('accepted');
166
+ expect(
167
+ JSON.stringify(r.body.redactedPayload).includes(CANARY_VALUE),
168
+ 'no canary plaintext remnant anywhere — `reasoning` + `message` both walked recursively',
169
+ ).toBe(false);
170
+ expect(
171
+ r.body.redactionCount,
172
+ 'recursive walk substitutes once per occurrence; 2 occurrences = redactionCount: 2',
173
+ ).toBe(2);
174
+ });
175
+
176
+ it('absent canary in `reasoning` → reasoning passes through unchanged (no false-positive redaction)', async () => {
177
+ const r = await accept(
178
+ {
179
+ type: 'error',
180
+ schemaVersion: 1,
181
+ envelopeId: 'env-reason-red-3',
182
+ correlationId: 'r:n:0:reasonred3',
183
+ payload: {
184
+ reasoning: 'The input was empty; I declined to fabricate a response.',
185
+ code: 'no_input',
186
+ message: 'Empty input.',
187
+ },
188
+ meta: baseMeta,
189
+ },
190
+ { byokCanaries: CANARIES }, // canary in fixture, but NOT in payload
191
+ );
192
+ if (r.status === 404) return;
193
+ expect(r.body.status).toBe('accepted');
194
+ expect(r.body.redactionCount, 'no canary occurrence → redactionCount: 0').toBe(0);
195
+ const payload = (r.body.redactedPayload ?? {}) as { reasoning?: string };
196
+ expect(
197
+ payload.reasoning,
198
+ 'reasoning field MUST pass through unchanged when no canary substring matches',
199
+ ).toBe('The input was empty; I declined to fabricate a response.');
200
+ });
201
+
202
+ it('canary in `clarification.request.reasoning` (universal kind with reasoning property)', async () => {
203
+ const r = await accept(
204
+ {
205
+ type: 'clarification.request',
206
+ schemaVersion: 1,
207
+ envelopeId: 'env-reason-red-4',
208
+ correlationId: 'r:n:0:reasonred4',
209
+ payload: {
210
+ reasoning: `I noticed the input contained ${CANARY_VALUE}; I need clarification on whether to proceed.`,
211
+ questions: [{ id: 'q1', question: 'Should I treat embedded credentials as valid input?' }],
212
+ },
213
+ meta: baseMeta,
214
+ },
215
+ { byokCanaries: CANARIES },
216
+ );
217
+ if (r.status === 404) return;
218
+ expect(r.body.status).toBe('accepted');
219
+ expect(JSON.stringify(r.body.redactedPayload).includes(CANARY_VALUE)).toBe(false);
220
+ expect(JSON.stringify(r.body.redactedPayload)).toContain(CANONICAL_MARKER);
221
+ });
222
+ });
223
+
224
+ // Behavioral assertions through the workflow-engine sample's downstream
225
+ // projection paths. Each `it()` soft-skips on HTTP 404 when the host
226
+ // doesn't expose the `/test/otel/spans` or `/test/debug-bundle/export`
227
+ // seam. The envelope-accept seam (above) verifies the acceptor-level
228
+ // redaction; these assertions verify the redaction propagates through
229
+ // the downstream surfaces.
230
+
231
+ describe.skipIf(HTTP_SKIP)('envelope-reasoning-secret-redaction: downstream-projection paths (RFC 0030 §E)', () => {
232
+ // Drives the existing envelope-accept seam with `projectTo.runId` so the
233
+ // outcome is mirrored to the host's test span + event log buffers (per
234
+ // `host/envelopeProjection.ts`). The conformance assertions read those
235
+ // buffers via the `/v1/host/sample/test/otel/spans` + `/test/debug-
236
+ // bundle/export` seams and confirm the canary plaintext from `reasoning`
237
+ // never appears in either projection.
238
+ const RUN_ID = 'reasoning-redaction-test-run';
239
+
240
+ async function acceptForRun(reasoning: string, envelopeId: string): Promise<{ status: number; body: { status?: string; redactedPayload?: unknown } }> {
241
+ const res = await driver.post('/v1/host/sample/envelope/accept', {
242
+ envelope: {
243
+ type: 'error',
244
+ schemaVersion: 1,
245
+ envelopeId,
246
+ correlationId: `r:n:0:${envelopeId}`,
247
+ payload: { reasoning, code: 'validation_failed', message: 'Refusing.' },
248
+ meta: baseMeta,
249
+ },
250
+ byokCanaries: CANARIES,
251
+ projectTo: { runId: RUN_ID, nodeId: 'reasoning-emit-node' },
252
+ });
253
+ return { status: res.status, body: res.json as { status?: string; redactedPayload?: unknown } };
254
+ }
255
+
256
+ it('OTel span attributes for the envelope-emitting node MUST NOT include plaintext `secret:`-prefixed substrings from `reasoning`', async () => {
257
+ // RFC 0034 §B: gate on capabilities.observability.testSeams.otelScrape.
258
+ // Hosts that don't advertise it soft-skip; hosts that DO advertise MUST serve a valid response.
259
+ const d = await readDiscovery();
260
+ const otelScrapeAdvertised = d?.capabilities?.observability?.testSeams?.otelScrape === true;
261
+ if (!otelScrapeAdvertised) return; // soft-skip — host honest about not implementing per RFC 0034 §A
262
+
263
+ const r = await acceptForRun(
264
+ `Reasoning trace mentioning ${CANARY_VALUE} verbatim.`,
265
+ 'env-otel-redact-1',
266
+ );
267
+ if (r.status === 404) return; // envelope-accept seam itself not exposed — separate gate
268
+ expect(r.body.status).toBe('accepted');
269
+
270
+ const spansRes = await driver.get(
271
+ `/v1/host/sample/test/otel/spans?runId=${encodeURIComponent(RUN_ID)}`,
272
+ );
273
+ expect(
274
+ spansRes.status,
275
+ driver.describe(
276
+ 'RFCS/0034-otel-collector-test-seam.md §B',
277
+ 'host advertising capabilities.observability.testSeams.otelScrape MUST serve the seam (200) — 404 is non-conformant',
278
+ ),
279
+ ).toBe(200);
280
+ const spans = ((spansRes.json as { spans?: Array<Record<string, unknown>> } | undefined)?.spans ?? []);
281
+ const serialized = JSON.stringify(spans);
282
+ expect(
283
+ serialized.includes(CANARY_VALUE),
284
+ driver.describe(
285
+ 'SECURITY/invariants.yaml secret-leakage-otel-attribute',
286
+ 'no canary plaintext from the envelope `reasoning` field may appear in any OTel span attribute',
287
+ ),
288
+ ).toBe(false);
289
+ });
290
+
291
+ it("debug-bundle export MUST NOT include plaintext `secret:`-prefixed substrings from envelope.reasoning", async () => {
292
+ // RFC 0034 §B: gate on capabilities.observability.testSeams.debugBundleExport.
293
+ const d = await readDiscovery();
294
+ const debugBundleAdvertised = d?.capabilities?.observability?.testSeams?.debugBundleExport === true;
295
+ if (!debugBundleAdvertised) return; // soft-skip — host honest about not implementing per RFC 0034 §A
296
+
297
+ const r = await acceptForRun(
298
+ `Step-by-step reasoning that surfaces ${CANARY_VALUE} from a prior tool result.`,
299
+ 'env-bundle-redact-1',
300
+ );
301
+ if (r.status === 404) return; // envelope-accept seam itself not exposed — separate gate
302
+ expect(r.body.status).toBe('accepted');
303
+
304
+ const bundleRes = await driver.post('/v1/host/sample/test/debug-bundle/export', { runId: RUN_ID });
305
+ expect(
306
+ bundleRes.status,
307
+ driver.describe(
308
+ 'RFCS/0034-otel-collector-test-seam.md §B',
309
+ 'host advertising capabilities.observability.testSeams.debugBundleExport MUST serve the seam (200) — 404 is non-conformant',
310
+ ),
311
+ ).toBe(200);
312
+ const serialized = JSON.stringify(bundleRes.json);
313
+ expect(
314
+ serialized.includes(CANARY_VALUE),
315
+ driver.describe(
316
+ 'SECURITY/invariants.yaml secret-leakage-debug-bundle-otel',
317
+ 'no canary plaintext from envelope.reasoning may appear in the debug-bundle export',
318
+ ),
319
+ ).toBe(false);
320
+ });
321
+
322
+ it("envelope acceptance MUST NOT route on `reasoning` contents (RFC 0030 §A normative MUST NOT) — the host's handler-routing decision MUST be identical regardless of `reasoning` value", async () => {
323
+ // Two envelopes, identical shape EXCEPT for `reasoning` content +
324
+ // envelopeId. The acceptor's routing decision (status / redactedPayload
325
+ // structure modulo the redaction marker) MUST be identical, proving
326
+ // reasoning is non-routing per RFC 0030 §A.
327
+ const aResp = await acceptForRun('reasoning-variant-A: model thinks the input is benign.', 'env-route-A');
328
+ const bResp = await acceptForRun(
329
+ `reasoning-variant-B with embedded ${CANARY_VALUE} canary — host MUST NOT route differently.`,
330
+ 'env-route-B',
331
+ );
332
+ if (aResp.status === 404 || bResp.status === 404) return;
333
+ expect(aResp.body.status).toBe('accepted');
334
+ expect(bResp.body.status).toBe('accepted');
335
+ expect(
336
+ aResp.body.status,
337
+ driver.describe(
338
+ 'RFCS/0030-envelope-reasoning-and-tier-one-subset.md §A',
339
+ 'reasoning is informational only; routing decision MUST NOT depend on its contents',
340
+ ),
341
+ ).toBe(bResp.body.status);
342
+ });
343
+ });
@@ -0,0 +1,190 @@
1
+ /**
2
+ * envelope-reasoning-shape — RFC 0030 §A wire-shape conformance.
3
+ *
4
+ * Asserts:
5
+ * 1. The three universal-kind payload schemas that carry reasoning
6
+ * (`clarification.request`, `schema.request`, `error`) declare the
7
+ * OPTIONAL `reasoning` property of type `string`.
8
+ * 2. The fourth universal-kind schema (`schema.response`) does NOT
9
+ * declare `reasoning` (side-channel ack per RFC 0030 §A).
10
+ * 3. Each of the three schemas validates payloads with and without
11
+ * `reasoning` populated (preserves v1.1 backward compatibility).
12
+ * 4. `capabilities.envelopes.reasoning` advertisement shape (when
13
+ * present) conforms per RFC 0030 §C.
14
+ * 5. `capabilities.envelopes.tierOneSubsetCompliance` is one of the
15
+ * three documented values when present.
16
+ *
17
+ * NOT capability-gated — schema-shape compilation always runs. Discovery
18
+ * checks soft-skip when no live host is configured.
19
+ *
20
+ * @see RFCS/0030-envelope-reasoning-and-tier-one-subset.md
21
+ * @see spec/v1/ai-envelope.md §"Reasoning field (normative)"
22
+ * @see schemas/envelopes/clarification.request.schema.json
23
+ * @see schemas/envelopes/schema.request.schema.json
24
+ * @see schemas/envelopes/error.schema.json
25
+ * @see schemas/envelopes/schema.response.schema.json
26
+ */
27
+
28
+ import { describe, it, expect } from 'vitest';
29
+ import Ajv2020 from 'ajv/dist/2020.js';
30
+ import addFormats from 'ajv-formats';
31
+ import { readFileSync } from 'node:fs';
32
+ import { join } from 'node:path';
33
+ import { driver } from '../lib/driver.js';
34
+ import { SCHEMAS_DIR } from '../lib/paths.js';
35
+
36
+ const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
37
+
38
+ interface DiscoveryDoc {
39
+ capabilities?: {
40
+ envelopes?: {
41
+ reasoning?: {
42
+ supported?: unknown;
43
+ promptDirective?: unknown;
44
+ };
45
+ tierOneSubsetCompliance?: unknown;
46
+ };
47
+ };
48
+ }
49
+
50
+ function loadSchema(rel: string): Record<string, unknown> {
51
+ return JSON.parse(readFileSync(join(SCHEMAS_DIR, rel), 'utf8')) as Record<string, unknown>;
52
+ }
53
+
54
+ function makeAjv(): Ajv2020 {
55
+ const ajv = new Ajv2020({ allErrors: true, strict: false });
56
+ addFormats(ajv);
57
+ return ajv;
58
+ }
59
+
60
+ async function readDiscovery(): Promise<DiscoveryDoc | null> {
61
+ try {
62
+ const res = await driver.get('/.well-known/openwop');
63
+ if (res.status !== 200) return null;
64
+ return res.json as DiscoveryDoc;
65
+ } catch {
66
+ return null;
67
+ }
68
+ }
69
+
70
+ describe('envelope-reasoning-shape: universal-kind schemas (RFC 0030 §A)', () => {
71
+ const KINDS_WITH_REASONING = ['clarification.request', 'schema.request', 'error'] as const;
72
+
73
+ for (const kind of KINDS_WITH_REASONING) {
74
+ it(`${kind}.schema.json declares OPTIONAL \`reasoning: string\` per RFC 0030 §A`, () => {
75
+ const schema = loadSchema(`envelopes/${kind}.schema.json`);
76
+ const properties = schema.properties as Record<string, Record<string, unknown>> | undefined;
77
+ const required = (schema.required as string[] | undefined) ?? [];
78
+ expect(
79
+ properties?.reasoning,
80
+ `RFC 0030 §A: ${kind}.schema.json MUST declare a \`reasoning\` property`,
81
+ ).toBeDefined();
82
+ expect(
83
+ properties?.reasoning?.type,
84
+ 'RFC 0030 §A: `reasoning` MUST be type: string on universal-kind schemas',
85
+ ).toBe('string');
86
+ expect(
87
+ required.includes('reasoning'),
88
+ `RFC 0030 §A: ${kind}.schema.json MUST NOT list \`reasoning\` in required (OPTIONAL field; absence is a valid envelope shape)`,
89
+ ).toBe(false);
90
+ });
91
+ }
92
+
93
+ it('schema.response.schema.json deliberately omits `reasoning` (side-channel ack per RFC 0030 §A)', () => {
94
+ const schema = loadSchema('envelopes/schema.response.schema.json');
95
+ const properties = schema.properties as Record<string, Record<string, unknown>> | undefined;
96
+ expect(
97
+ properties?.reasoning,
98
+ 'RFC 0030 §A: `schema.response` is a side-channel ack envelope; it MUST NOT declare `reasoning`',
99
+ ).toBeUndefined();
100
+ });
101
+ });
102
+
103
+ describe('envelope-reasoning-shape: backward-compat round-trip (RFC 0030 §A)', () => {
104
+ // Compile each schema once at describe-scope so re-validation across `it`
105
+ // blocks reuses the same validator (Ajv refuses duplicate $id registration).
106
+ const ajvClarification = makeAjv();
107
+ const validateClarification = ajvClarification.compile(loadSchema('envelopes/clarification.request.schema.json'));
108
+ const ajvError = makeAjv();
109
+ const validateError = ajvError.compile(loadSchema('envelopes/error.schema.json'));
110
+ const ajvSchemaRequest = makeAjv();
111
+ const validateSchemaRequest = ajvSchemaRequest.compile(loadSchema('envelopes/schema.request.schema.json'));
112
+
113
+ it('clarification.request validates a payload WITHOUT reasoning (backward compat)', () => {
114
+ const payload = { questions: [{ id: 'q1', question: 'What do you mean by X?' }] };
115
+ expect(
116
+ validateClarification(payload),
117
+ 'RFC 0030 §A: existing v1.1 envelope without `reasoning` MUST remain valid',
118
+ ).toBe(true);
119
+ });
120
+
121
+ it('clarification.request validates a payload WITH reasoning', () => {
122
+ const payload = {
123
+ reasoning: 'The user mentioned X twice but I am not sure which X they mean.',
124
+ questions: [{ id: 'q1', question: 'What do you mean by X?' }],
125
+ };
126
+ expect(
127
+ validateClarification(payload),
128
+ 'RFC 0030 §A: envelope with optional `reasoning` populated MUST be accepted',
129
+ ).toBe(true);
130
+ });
131
+
132
+ it('error envelope validates without reasoning (backward compat)', () => {
133
+ const payload = { code: 'validation_failed', message: 'Could not match the schema.' };
134
+ expect(validateError(payload)).toBe(true);
135
+ });
136
+
137
+ it('error envelope validates with reasoning', () => {
138
+ const payload = {
139
+ reasoning: 'I analyzed each required field but the input was missing X.',
140
+ code: 'validation_failed',
141
+ message: 'Could not match the schema.',
142
+ };
143
+ expect(validateError(payload)).toBe(true);
144
+ });
145
+
146
+ it('schema.request validates without reasoning', () => {
147
+ expect(validateSchemaRequest({ envelopeType: 'vendor.acme.prd.create' })).toBe(true);
148
+ });
149
+
150
+ it('rejects reasoning of non-string type (universal kinds use plain string, not string|null union)', () => {
151
+ const payload = {
152
+ reasoning: 42,
153
+ questions: [{ id: 'q1', question: '?' }],
154
+ };
155
+ expect(
156
+ validateClarification(payload),
157
+ 'RFC 0030 §A: universal-kind `reasoning` MUST be type: string; numbers reject',
158
+ ).toBe(false);
159
+ });
160
+ });
161
+
162
+ describe.skipIf(HTTP_SKIP)('envelope-reasoning-shape: capabilities.envelopes advertisement (RFC 0030 §C)', () => {
163
+ it('capabilities.envelopes.reasoning (when present) conforms to RFC 0030 §C', async () => {
164
+ const d = await readDiscovery();
165
+ if (d === null) return;
166
+ const reasoning = d.capabilities?.envelopes?.reasoning;
167
+ if (reasoning === undefined) return; // optional block; host MAY omit
168
+ expect(
169
+ typeof reasoning.supported,
170
+ 'RFC 0030 §C: capabilities.envelopes.reasoning.supported MUST be boolean when block is advertised',
171
+ ).toBe('boolean');
172
+ if (reasoning.promptDirective !== undefined) {
173
+ expect(
174
+ ['mandatory', 'advisory', 'off'],
175
+ 'RFC 0030 §C: promptDirective MUST be one of the three documented values',
176
+ ).toContain(String(reasoning.promptDirective));
177
+ }
178
+ });
179
+
180
+ it('capabilities.envelopes.tierOneSubsetCompliance (when present) conforms to RFC 0030 §B', async () => {
181
+ const d = await readDiscovery();
182
+ if (d === null) return;
183
+ const compliance = d.capabilities?.envelopes?.tierOneSubsetCompliance;
184
+ if (compliance === undefined) return; // optional; host MAY omit
185
+ expect(
186
+ ['strict', 'warn', 'off'],
187
+ 'RFC 0030 §B: tierOneSubsetCompliance MUST be one of strict|warn|off',
188
+ ).toContain(String(compliance));
189
+ });
190
+ });