@openwop/openwop-conformance 1.3.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +132 -1
- package/README.md +3 -2
- package/api/asyncapi.yaml +8 -0
- package/api/openapi.yaml +371 -1
- package/coverage.md +26 -6
- package/fixtures/conformance-envelope-nl-to-format-engaged.json +41 -0
- package/fixtures/conformance-envelope-recovery-applied.json +39 -0
- package/fixtures/conformance-envelope-refusal.json +38 -0
- package/fixtures/conformance-envelope-retry-attempted.json +39 -0
- package/fixtures/conformance-envelope-retry-exhausted.json +38 -0
- package/fixtures/conformance-envelope-truncated.json +39 -0
- package/fixtures/conformance-envelope-truncation-cap-exhaustion.json +39 -0
- package/fixtures/conformance-model-capability-insufficient.json +25 -0
- package/fixtures/conformance-multi-agent-confidence-escalation.json +49 -0
- package/fixtures/conformance-multi-agent-handoff-child.json +27 -0
- package/fixtures/conformance-multi-agent-handoff.json +49 -0
- package/fixtures/conformance-prompt-all-four-kinds.json +39 -0
- package/fixtures/conformance-prompt-end-to-end.json +33 -0
- package/fixtures/conformance-subworkflow-mid-run-mutation-child.json +31 -0
- package/fixtures/conformance-subworkflow-mid-run-mutation.json +33 -0
- package/fixtures/openwop-smoke-cost-emit.json +37 -0
- package/fixtures/prompt-templates/conformance-prompt-few-shot-2.json +14 -0
- package/fixtures/prompt-templates/conformance-prompt-few-shot.json +14 -0
- package/fixtures/prompt-templates/conformance-prompt-schema-hint.json +14 -0
- package/fixtures/prompt-templates/conformance-prompt-secret-redaction.json +23 -0
- package/fixtures/prompt-templates/conformance-prompt-trust-marker.json +23 -0
- package/fixtures/prompt-templates/conformance-prompt-writer-system.json +15 -0
- package/fixtures/prompt-templates/conformance-prompt-writer-user.json +15 -0
- package/fixtures.md +39 -0
- package/package.json +1 -1
- package/schemas/README.md +5 -0
- package/schemas/agent-manifest.schema.json +16 -0
- package/schemas/capabilities.schema.json +384 -1
- package/schemas/envelopes/clarification.request.schema.json +9 -0
- package/schemas/envelopes/error.schema.json +4 -0
- package/schemas/envelopes/schema.request.schema.json +4 -0
- package/schemas/envelopes/schema.response.schema.json +1 -1
- package/schemas/node-pack-manifest.schema.json +28 -0
- package/schemas/orchestrator-decision.schema.json +12 -0
- package/schemas/prompt-kind.schema.json +8 -0
- package/schemas/prompt-pack-manifest.schema.json +80 -0
- package/schemas/prompt-ref.schema.json +40 -0
- package/schemas/prompt-template.schema.json +149 -0
- package/schemas/registry-version-manifest.schema.json +5 -0
- package/schemas/run-ancestry-response.schema.json +54 -0
- package/schemas/run-event-payloads.schema.json +479 -11
- package/schemas/run-event.schema.json +15 -1
- package/schemas/run-snapshot.schema.json +3 -2
- package/schemas/workflow-definition.schema.json +19 -1
- package/src/lib/llm-cache-key-recipe.ts +68 -0
- package/src/scenarios/aiEnvelope.contractRefusal.test.ts +104 -13
- package/src/scenarios/aiEnvelope.correlationReplay.test.ts +32 -15
- package/src/scenarios/aiEnvelope.redaction.test.ts +6 -5
- package/src/scenarios/aiEnvelope.schemaDrift.test.ts +5 -5
- package/src/scenarios/aiEnvelope.trustBoundaryPropagation.test.ts +211 -12
- package/src/scenarios/aiEnvelope.universalKinds.test.ts +7 -7
- package/src/scenarios/blob-presign-expiry.test.ts +7 -7
- package/src/scenarios/cache-ttl-expiry.test.ts +6 -6
- package/src/scenarios/cost-attribution.test.ts +124 -11
- package/src/scenarios/cross-engine-append-ordering.test.ts +99 -0
- package/src/scenarios/cross-host-ancestry-endpoint.test.ts +136 -0
- package/src/scenarios/cross-host-causation-shape.test.ts +117 -0
- package/src/scenarios/cross-host-traceparent-propagation.test.ts +60 -0
- package/src/scenarios/envelope-completion-distinguishes-truncation.test.ts +223 -0
- package/src/scenarios/envelope-nl-to-format-engaged.test.ts +152 -0
- package/src/scenarios/envelope-reasoning-secret-redaction.test.ts +343 -0
- package/src/scenarios/envelope-reasoning-shape.test.ts +190 -0
- package/src/scenarios/envelope-recovery-applied.test.ts +229 -0
- package/src/scenarios/envelope-refusal-shape.test.ts +289 -0
- package/src/scenarios/envelope-retry-attempted.test.ts +258 -0
- package/src/scenarios/envelope-retry-exhausted.test.ts +168 -0
- package/src/scenarios/envelope-tier-one-subset-static.test.ts +229 -0
- package/src/scenarios/envelope-truncated.test.ts +136 -0
- package/src/scenarios/envelope-truncation-cap-exhaustion.test.ts +144 -0
- package/src/scenarios/envelope-variant-discriminator-static.test.ts +152 -0
- package/src/scenarios/fixtures-valid.test.ts +123 -15
- package/src/scenarios/kv-ttl-expiry.test.ts +7 -7
- package/src/scenarios/model-capability-insufficient.test.ts +221 -0
- package/src/scenarios/model-capability-substituted.test.ts +203 -0
- package/src/scenarios/multi-agent-confidence-escalation.test.ts +201 -0
- package/src/scenarios/multi-agent-handoff-state-machine.test.ts +167 -0
- package/src/scenarios/multi-agent-memory-lifecycle.test.ts +124 -0
- package/src/scenarios/multi-region-idempotency.test.ts +58 -0
- package/src/scenarios/node-module-required-capabilities-shape.test.ts +185 -0
- package/src/scenarios/prompt-all-four-kinds-events.test.ts +198 -0
- package/src/scenarios/prompt-composed-secret-redaction.test.ts +178 -0
- package/src/scenarios/prompt-composed-trust-marker.test.ts +165 -0
- package/src/scenarios/prompt-end-to-end-events.test.ts +202 -0
- package/src/scenarios/prompt-list-and-fetch.test.ts +207 -0
- package/src/scenarios/prompt-mutable-lifecycle.test.ts +216 -0
- package/src/scenarios/prompt-pack-install.test.ts +187 -0
- package/src/scenarios/prompt-render-deterministic.test.ts +240 -0
- package/src/scenarios/prompt-resolution-chain-agent-intrinsic.test.ts +140 -0
- package/src/scenarios/prompt-resolution-chain-fallback-cascade.test.ts +172 -0
- package/src/scenarios/prompt-resolution-chain-node-wins.test.ts +144 -0
- package/src/scenarios/prompt-template-shape.test.ts +359 -0
- package/src/scenarios/queue-ack-nack-dlq.test.ts +7 -7
- package/src/scenarios/queue-publish-consume-roundtrip.test.ts +7 -7
- package/src/scenarios/replay-divergence-at-refusal.test.ts +134 -0
- package/src/scenarios/replay-llm-cache-key-portable.test.ts +197 -0
- package/src/scenarios/replay-llm-cache-key.test.ts +1 -40
- package/src/scenarios/replay-observable-sequence-determinism.test.ts +80 -0
- package/src/scenarios/sandbox-capability-gate-respected.test.ts +27 -0
- package/src/scenarios/sandbox-memory-cap.test.ts +58 -0
- package/src/scenarios/sandbox-no-cross-pack-mutation.test.ts +30 -0
- package/src/scenarios/sandbox-no-host-env-leak.test.ts +27 -0
- package/src/scenarios/sandbox-no-host-fs-escape.test.ts +88 -0
- package/src/scenarios/sandbox-no-host-process-escape.test.ts +31 -0
- package/src/scenarios/sandbox-no-network-escape.test.ts +28 -0
- package/src/scenarios/sandbox-timeout-cap.test.ts +58 -0
- package/src/scenarios/search-bm25-roundtrip.test.ts +7 -7
- package/src/scenarios/spec-corpus-validity.test.ts +34 -6
- package/src/scenarios/sql-transaction-atomicity.test.ts +6 -6
- package/src/scenarios/stream-subscribe-from-beginning.test.ts +7 -7
- package/src/scenarios/subworkflow-input-mapping.test.ts +70 -4
- package/src/scenarios/table-cursor-pagination.test.ts +7 -7
- package/src/scenarios/table-schema-enforcement.test.ts +7 -7
- package/src/scenarios/vector-knn-roundtrip.test.ts +7 -7
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* envelope-reasoning-secret-redaction — RFC 0030 §E security invariant.
|
|
3
|
+
*
|
|
4
|
+
* SECURITY invariant: `envelope-reasoning-secret-redaction` (gate timing
|
|
5
|
+
* per RFC 0027 §G precedent — lands alongside reference-host emission).
|
|
6
|
+
*
|
|
7
|
+
* Asserts that the envelope-acceptor's BYOK redaction harness walks the
|
|
8
|
+
* `reasoning` field — known credential canaries (the `byokCanaries[]`
|
|
9
|
+
* shape from RFC 0021 §"Redaction (SR-1 carry-forward)", supplied as
|
|
10
|
+
* `{ value, secretId }` pairs) found inside `reasoning` MUST be
|
|
11
|
+
* substituted with `[REDACTED:<secretId>]` markers
|
|
12
|
+
* before the envelope is persisted to `RunEventDoc.payload`. The acceptor's
|
|
13
|
+
* recursive walk per `ai-envelope.md` §"Redaction (SR-1 carry-forward)"
|
|
14
|
+
* covers `reasoning` automatically because it's just another payload
|
|
15
|
+
* field — but the conformance suite asserts it explicitly so a future
|
|
16
|
+
* refactor that adds an early-exit at known-shape boundaries cannot
|
|
17
|
+
* regress the invariant.
|
|
18
|
+
*
|
|
19
|
+
* Behavioral assertions drive the existing envelope-accept test seam
|
|
20
|
+
* (`POST /v1/host/sample/envelope/accept`) introduced by RFC 0021. Each
|
|
21
|
+
* test soft-skips on HTTP 404 (host doesn't expose the seam) and on
|
|
22
|
+
* capability absence.
|
|
23
|
+
*
|
|
24
|
+
* Downstream-projection assertions (OTel-attribute scrape + debug-bundle
|
|
25
|
+
* export + non-routing-on-reasoning invariant) are live behavioral via the
|
|
26
|
+
* `/v1/host/sample/test/otel/spans` + `/v1/host/sample/test/debug-bundle/export`
|
|
27
|
+
* seams (soft-skip on HTTP 404 when the host doesn't expose them). The
|
|
28
|
+
* acceptor-level redaction is verified independently above via the
|
|
29
|
+
* envelope-accept seam.
|
|
30
|
+
*
|
|
31
|
+
* @see RFCS/0030-envelope-reasoning-and-tier-one-subset.md §E
|
|
32
|
+
* @see spec/v1/ai-envelope.md §"Reasoning field (normative)" + §"Redaction (SR-1 carry-forward)"
|
|
33
|
+
* @see SECURITY/threat-model-secret-leakage.md §SR-1
|
|
34
|
+
*/
|
|
35
|
+
|
|
36
|
+
import { describe, it, expect } from 'vitest';
|
|
37
|
+
import { driver } from '../lib/driver.js';
|
|
38
|
+
|
|
39
|
+
const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
|
|
40
|
+
|
|
41
|
+
interface DiscoveryDoc {
|
|
42
|
+
capabilities?: {
|
|
43
|
+
envelopes?: { reasoning?: { supported?: unknown } };
|
|
44
|
+
secrets?: { supported?: unknown };
|
|
45
|
+
observability?: {
|
|
46
|
+
testSeams?: {
|
|
47
|
+
otelScrape?: unknown;
|
|
48
|
+
debugBundleExport?: unknown;
|
|
49
|
+
};
|
|
50
|
+
};
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
async function readDiscovery(): Promise<DiscoveryDoc | null> {
|
|
55
|
+
try {
|
|
56
|
+
const res = await driver.get('/.well-known/openwop');
|
|
57
|
+
if (res.status !== 200) return null;
|
|
58
|
+
return res.json as DiscoveryDoc;
|
|
59
|
+
} catch {
|
|
60
|
+
return null;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
async function accept(
|
|
65
|
+
envelope: unknown,
|
|
66
|
+
opts: Record<string, unknown> = {},
|
|
67
|
+
): Promise<{
|
|
68
|
+
status: number;
|
|
69
|
+
body: {
|
|
70
|
+
status?: string;
|
|
71
|
+
reason?: string;
|
|
72
|
+
redactedPayload?: unknown;
|
|
73
|
+
redactionCount?: number;
|
|
74
|
+
details?: unknown[];
|
|
75
|
+
};
|
|
76
|
+
}> {
|
|
77
|
+
const res = await driver.post('/v1/host/sample/envelope/accept', { envelope, ...opts });
|
|
78
|
+
return {
|
|
79
|
+
status: res.status,
|
|
80
|
+
body: res.json as {
|
|
81
|
+
status?: string;
|
|
82
|
+
reason?: string;
|
|
83
|
+
redactedPayload?: unknown;
|
|
84
|
+
redactionCount?: number;
|
|
85
|
+
details?: unknown[];
|
|
86
|
+
},
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
const baseMeta = { source: 'ai-generation' as const, ts: '2026-05-20T10:00:00Z' };
|
|
91
|
+
const CANARY_VALUE = 'sk-reasoning-canary-DO-NOT-LEAK-30a1';
|
|
92
|
+
const CANARY_SECRET_ID = 'byok-reasoning-fixture-001';
|
|
93
|
+
const CANARIES = [{ value: CANARY_VALUE, secretId: CANARY_SECRET_ID }];
|
|
94
|
+
const CANONICAL_MARKER = `[REDACTED:${CANARY_SECRET_ID}]`;
|
|
95
|
+
|
|
96
|
+
describe.skipIf(HTTP_SKIP)('envelope-reasoning-secret-redaction: advertisement shape (RFC 0030 §E)', () => {
|
|
97
|
+
it('hosts advertising envelope reasoning + BYOK honor SR-1 carry-forward for the reasoning field', async () => {
|
|
98
|
+
const d = await readDiscovery();
|
|
99
|
+
if (d === null) return;
|
|
100
|
+
const reasoning = d.capabilities?.envelopes?.reasoning?.supported;
|
|
101
|
+
const secrets = d.capabilities?.secrets?.supported;
|
|
102
|
+
if (reasoning !== true || secrets !== true) return; // soft-skip when either is absent
|
|
103
|
+
// The contract is invariant-based, not capability-flag-based — the
|
|
104
|
+
// advertisement-shape check here just confirms both surfaces are claimed.
|
|
105
|
+
expect(true).toBe(true);
|
|
106
|
+
});
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
describe.skipIf(HTTP_SKIP)('envelope-reasoning-secret-redaction: BYOK redaction of `reasoning` (RFC 0030 §E)', () => {
|
|
110
|
+
it('canary in `reasoning` → substituted with canonical [REDACTED:<secretId>] marker per agent-memory.md:66', async () => {
|
|
111
|
+
const r = await accept(
|
|
112
|
+
{
|
|
113
|
+
type: 'error',
|
|
114
|
+
schemaVersion: 1,
|
|
115
|
+
envelopeId: 'env-reason-red-1',
|
|
116
|
+
correlationId: 'r:n:0:reasonred1',
|
|
117
|
+
payload: {
|
|
118
|
+
reasoning: `I analyzed the input and noticed the credential ${CANARY_VALUE} was embedded; the call cannot proceed safely.`,
|
|
119
|
+
code: 'validation_failed',
|
|
120
|
+
message: 'Refusing to act on a credential-bearing input.',
|
|
121
|
+
},
|
|
122
|
+
meta: baseMeta,
|
|
123
|
+
},
|
|
124
|
+
{ byokCanaries: CANARIES },
|
|
125
|
+
);
|
|
126
|
+
if (r.status === 404) return; // host doesn't expose the seam
|
|
127
|
+
expect(r.body.status, 'envelope MUST be accepted; redaction is a post-validation pass').toBe('accepted');
|
|
128
|
+
expect(
|
|
129
|
+
r.body.redactionCount,
|
|
130
|
+
'RFC 0030 §E: redactionCount MUST be > 0 when a canary appears in `reasoning`',
|
|
131
|
+
).toBeGreaterThan(0);
|
|
132
|
+
expect(
|
|
133
|
+
JSON.stringify(r.body.redactedPayload).includes(CANARY_VALUE),
|
|
134
|
+
driver.describe(
|
|
135
|
+
'ai-envelope.md §"Redaction (SR-1 carry-forward)"',
|
|
136
|
+
'canary plaintext MUST NOT remain anywhere in the redacted view — `reasoning` field included',
|
|
137
|
+
),
|
|
138
|
+
).toBe(false);
|
|
139
|
+
expect(
|
|
140
|
+
JSON.stringify(r.body.redactedPayload),
|
|
141
|
+
driver.describe(
|
|
142
|
+
'agent-memory.md §SR-1 line 66',
|
|
143
|
+
'persisted entry MUST carry [REDACTED:<secretId>] in place of the plaintext',
|
|
144
|
+
),
|
|
145
|
+
).toContain(CANONICAL_MARKER);
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
it('canary in `reasoning` AND another payload field → both occurrences scrubbed with single marker', async () => {
|
|
149
|
+
const r = await accept(
|
|
150
|
+
{
|
|
151
|
+
type: 'error',
|
|
152
|
+
schemaVersion: 1,
|
|
153
|
+
envelopeId: 'env-reason-red-2',
|
|
154
|
+
correlationId: 'r:n:0:reasonred2',
|
|
155
|
+
payload: {
|
|
156
|
+
reasoning: `The token ${CANARY_VALUE} appeared in two places.`,
|
|
157
|
+
code: 'leak_demo',
|
|
158
|
+
message: `Original tool output: ${CANARY_VALUE}`,
|
|
159
|
+
},
|
|
160
|
+
meta: baseMeta,
|
|
161
|
+
},
|
|
162
|
+
{ byokCanaries: CANARIES },
|
|
163
|
+
);
|
|
164
|
+
if (r.status === 404) return;
|
|
165
|
+
expect(r.body.status).toBe('accepted');
|
|
166
|
+
expect(
|
|
167
|
+
JSON.stringify(r.body.redactedPayload).includes(CANARY_VALUE),
|
|
168
|
+
'no canary plaintext remnant anywhere — `reasoning` + `message` both walked recursively',
|
|
169
|
+
).toBe(false);
|
|
170
|
+
expect(
|
|
171
|
+
r.body.redactionCount,
|
|
172
|
+
'recursive walk substitutes once per occurrence; 2 occurrences = redactionCount: 2',
|
|
173
|
+
).toBe(2);
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
it('absent canary in `reasoning` → reasoning passes through unchanged (no false-positive redaction)', async () => {
|
|
177
|
+
const r = await accept(
|
|
178
|
+
{
|
|
179
|
+
type: 'error',
|
|
180
|
+
schemaVersion: 1,
|
|
181
|
+
envelopeId: 'env-reason-red-3',
|
|
182
|
+
correlationId: 'r:n:0:reasonred3',
|
|
183
|
+
payload: {
|
|
184
|
+
reasoning: 'The input was empty; I declined to fabricate a response.',
|
|
185
|
+
code: 'no_input',
|
|
186
|
+
message: 'Empty input.',
|
|
187
|
+
},
|
|
188
|
+
meta: baseMeta,
|
|
189
|
+
},
|
|
190
|
+
{ byokCanaries: CANARIES }, // canary in fixture, but NOT in payload
|
|
191
|
+
);
|
|
192
|
+
if (r.status === 404) return;
|
|
193
|
+
expect(r.body.status).toBe('accepted');
|
|
194
|
+
expect(r.body.redactionCount, 'no canary occurrence → redactionCount: 0').toBe(0);
|
|
195
|
+
const payload = (r.body.redactedPayload ?? {}) as { reasoning?: string };
|
|
196
|
+
expect(
|
|
197
|
+
payload.reasoning,
|
|
198
|
+
'reasoning field MUST pass through unchanged when no canary substring matches',
|
|
199
|
+
).toBe('The input was empty; I declined to fabricate a response.');
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
it('canary in `clarification.request.reasoning` (universal kind with reasoning property)', async () => {
|
|
203
|
+
const r = await accept(
|
|
204
|
+
{
|
|
205
|
+
type: 'clarification.request',
|
|
206
|
+
schemaVersion: 1,
|
|
207
|
+
envelopeId: 'env-reason-red-4',
|
|
208
|
+
correlationId: 'r:n:0:reasonred4',
|
|
209
|
+
payload: {
|
|
210
|
+
reasoning: `I noticed the input contained ${CANARY_VALUE}; I need clarification on whether to proceed.`,
|
|
211
|
+
questions: [{ id: 'q1', question: 'Should I treat embedded credentials as valid input?' }],
|
|
212
|
+
},
|
|
213
|
+
meta: baseMeta,
|
|
214
|
+
},
|
|
215
|
+
{ byokCanaries: CANARIES },
|
|
216
|
+
);
|
|
217
|
+
if (r.status === 404) return;
|
|
218
|
+
expect(r.body.status).toBe('accepted');
|
|
219
|
+
expect(JSON.stringify(r.body.redactedPayload).includes(CANARY_VALUE)).toBe(false);
|
|
220
|
+
expect(JSON.stringify(r.body.redactedPayload)).toContain(CANONICAL_MARKER);
|
|
221
|
+
});
|
|
222
|
+
});
|
|
223
|
+
|
|
224
|
+
// Behavioral assertions through the workflow-engine sample's downstream
|
|
225
|
+
// projection paths. Each `it()` soft-skips on HTTP 404 when the host
|
|
226
|
+
// doesn't expose the `/test/otel/spans` or `/test/debug-bundle/export`
|
|
227
|
+
// seam. The envelope-accept seam (above) verifies the acceptor-level
|
|
228
|
+
// redaction; these assertions verify the redaction propagates through
|
|
229
|
+
// the downstream surfaces.
|
|
230
|
+
|
|
231
|
+
describe.skipIf(HTTP_SKIP)('envelope-reasoning-secret-redaction: downstream-projection paths (RFC 0030 §E)', () => {
|
|
232
|
+
// Drives the existing envelope-accept seam with `projectTo.runId` so the
|
|
233
|
+
// outcome is mirrored to the host's test span + event log buffers (per
|
|
234
|
+
// `host/envelopeProjection.ts`). The conformance assertions read those
|
|
235
|
+
// buffers via the `/v1/host/sample/test/otel/spans` + `/test/debug-
|
|
236
|
+
// bundle/export` seams and confirm the canary plaintext from `reasoning`
|
|
237
|
+
// never appears in either projection.
|
|
238
|
+
const RUN_ID = 'reasoning-redaction-test-run';
|
|
239
|
+
|
|
240
|
+
async function acceptForRun(reasoning: string, envelopeId: string): Promise<{ status: number; body: { status?: string; redactedPayload?: unknown } }> {
|
|
241
|
+
const res = await driver.post('/v1/host/sample/envelope/accept', {
|
|
242
|
+
envelope: {
|
|
243
|
+
type: 'error',
|
|
244
|
+
schemaVersion: 1,
|
|
245
|
+
envelopeId,
|
|
246
|
+
correlationId: `r:n:0:${envelopeId}`,
|
|
247
|
+
payload: { reasoning, code: 'validation_failed', message: 'Refusing.' },
|
|
248
|
+
meta: baseMeta,
|
|
249
|
+
},
|
|
250
|
+
byokCanaries: CANARIES,
|
|
251
|
+
projectTo: { runId: RUN_ID, nodeId: 'reasoning-emit-node' },
|
|
252
|
+
});
|
|
253
|
+
return { status: res.status, body: res.json as { status?: string; redactedPayload?: unknown } };
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
it('OTel span attributes for the envelope-emitting node MUST NOT include plaintext `secret:`-prefixed substrings from `reasoning`', async () => {
|
|
257
|
+
// RFC 0034 §B: gate on capabilities.observability.testSeams.otelScrape.
|
|
258
|
+
// Hosts that don't advertise it soft-skip; hosts that DO advertise MUST serve a valid response.
|
|
259
|
+
const d = await readDiscovery();
|
|
260
|
+
const otelScrapeAdvertised = d?.capabilities?.observability?.testSeams?.otelScrape === true;
|
|
261
|
+
if (!otelScrapeAdvertised) return; // soft-skip — host honest about not implementing per RFC 0034 §A
|
|
262
|
+
|
|
263
|
+
const r = await acceptForRun(
|
|
264
|
+
`Reasoning trace mentioning ${CANARY_VALUE} verbatim.`,
|
|
265
|
+
'env-otel-redact-1',
|
|
266
|
+
);
|
|
267
|
+
if (r.status === 404) return; // envelope-accept seam itself not exposed — separate gate
|
|
268
|
+
expect(r.body.status).toBe('accepted');
|
|
269
|
+
|
|
270
|
+
const spansRes = await driver.get(
|
|
271
|
+
`/v1/host/sample/test/otel/spans?runId=${encodeURIComponent(RUN_ID)}`,
|
|
272
|
+
);
|
|
273
|
+
expect(
|
|
274
|
+
spansRes.status,
|
|
275
|
+
driver.describe(
|
|
276
|
+
'RFCS/0034-otel-collector-test-seam.md §B',
|
|
277
|
+
'host advertising capabilities.observability.testSeams.otelScrape MUST serve the seam (200) — 404 is non-conformant',
|
|
278
|
+
),
|
|
279
|
+
).toBe(200);
|
|
280
|
+
const spans = ((spansRes.json as { spans?: Array<Record<string, unknown>> } | undefined)?.spans ?? []);
|
|
281
|
+
const serialized = JSON.stringify(spans);
|
|
282
|
+
expect(
|
|
283
|
+
serialized.includes(CANARY_VALUE),
|
|
284
|
+
driver.describe(
|
|
285
|
+
'SECURITY/invariants.yaml secret-leakage-otel-attribute',
|
|
286
|
+
'no canary plaintext from the envelope `reasoning` field may appear in any OTel span attribute',
|
|
287
|
+
),
|
|
288
|
+
).toBe(false);
|
|
289
|
+
});
|
|
290
|
+
|
|
291
|
+
it("debug-bundle export MUST NOT include plaintext `secret:`-prefixed substrings from envelope.reasoning", async () => {
|
|
292
|
+
// RFC 0034 §B: gate on capabilities.observability.testSeams.debugBundleExport.
|
|
293
|
+
const d = await readDiscovery();
|
|
294
|
+
const debugBundleAdvertised = d?.capabilities?.observability?.testSeams?.debugBundleExport === true;
|
|
295
|
+
if (!debugBundleAdvertised) return; // soft-skip — host honest about not implementing per RFC 0034 §A
|
|
296
|
+
|
|
297
|
+
const r = await acceptForRun(
|
|
298
|
+
`Step-by-step reasoning that surfaces ${CANARY_VALUE} from a prior tool result.`,
|
|
299
|
+
'env-bundle-redact-1',
|
|
300
|
+
);
|
|
301
|
+
if (r.status === 404) return; // envelope-accept seam itself not exposed — separate gate
|
|
302
|
+
expect(r.body.status).toBe('accepted');
|
|
303
|
+
|
|
304
|
+
const bundleRes = await driver.post('/v1/host/sample/test/debug-bundle/export', { runId: RUN_ID });
|
|
305
|
+
expect(
|
|
306
|
+
bundleRes.status,
|
|
307
|
+
driver.describe(
|
|
308
|
+
'RFCS/0034-otel-collector-test-seam.md §B',
|
|
309
|
+
'host advertising capabilities.observability.testSeams.debugBundleExport MUST serve the seam (200) — 404 is non-conformant',
|
|
310
|
+
),
|
|
311
|
+
).toBe(200);
|
|
312
|
+
const serialized = JSON.stringify(bundleRes.json);
|
|
313
|
+
expect(
|
|
314
|
+
serialized.includes(CANARY_VALUE),
|
|
315
|
+
driver.describe(
|
|
316
|
+
'SECURITY/invariants.yaml secret-leakage-debug-bundle-otel',
|
|
317
|
+
'no canary plaintext from envelope.reasoning may appear in the debug-bundle export',
|
|
318
|
+
),
|
|
319
|
+
).toBe(false);
|
|
320
|
+
});
|
|
321
|
+
|
|
322
|
+
it("envelope acceptance MUST NOT route on `reasoning` contents (RFC 0030 §A normative MUST NOT) — the host's handler-routing decision MUST be identical regardless of `reasoning` value", async () => {
|
|
323
|
+
// Two envelopes, identical shape EXCEPT for `reasoning` content +
|
|
324
|
+
// envelopeId. The acceptor's routing decision (status / redactedPayload
|
|
325
|
+
// structure modulo the redaction marker) MUST be identical, proving
|
|
326
|
+
// reasoning is non-routing per RFC 0030 §A.
|
|
327
|
+
const aResp = await acceptForRun('reasoning-variant-A: model thinks the input is benign.', 'env-route-A');
|
|
328
|
+
const bResp = await acceptForRun(
|
|
329
|
+
`reasoning-variant-B with embedded ${CANARY_VALUE} canary — host MUST NOT route differently.`,
|
|
330
|
+
'env-route-B',
|
|
331
|
+
);
|
|
332
|
+
if (aResp.status === 404 || bResp.status === 404) return;
|
|
333
|
+
expect(aResp.body.status).toBe('accepted');
|
|
334
|
+
expect(bResp.body.status).toBe('accepted');
|
|
335
|
+
expect(
|
|
336
|
+
aResp.body.status,
|
|
337
|
+
driver.describe(
|
|
338
|
+
'RFCS/0030-envelope-reasoning-and-tier-one-subset.md §A',
|
|
339
|
+
'reasoning is informational only; routing decision MUST NOT depend on its contents',
|
|
340
|
+
),
|
|
341
|
+
).toBe(bResp.body.status);
|
|
342
|
+
});
|
|
343
|
+
});
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* envelope-reasoning-shape — RFC 0030 §A wire-shape conformance.
|
|
3
|
+
*
|
|
4
|
+
* Asserts:
|
|
5
|
+
* 1. The three universal-kind payload schemas that carry reasoning
|
|
6
|
+
* (`clarification.request`, `schema.request`, `error`) declare the
|
|
7
|
+
* OPTIONAL `reasoning` property of type `string`.
|
|
8
|
+
* 2. The fourth universal-kind schema (`schema.response`) does NOT
|
|
9
|
+
* declare `reasoning` (side-channel ack per RFC 0030 §A).
|
|
10
|
+
* 3. Each of the three schemas validates payloads with and without
|
|
11
|
+
* `reasoning` populated (preserves v1.1 backward compatibility).
|
|
12
|
+
* 4. `capabilities.envelopes.reasoning` advertisement shape (when
|
|
13
|
+
* present) conforms per RFC 0030 §C.
|
|
14
|
+
* 5. `capabilities.envelopes.tierOneSubsetCompliance` is one of the
|
|
15
|
+
* three documented values when present.
|
|
16
|
+
*
|
|
17
|
+
* NOT capability-gated — schema-shape compilation always runs. Discovery
|
|
18
|
+
* checks soft-skip when no live host is configured.
|
|
19
|
+
*
|
|
20
|
+
* @see RFCS/0030-envelope-reasoning-and-tier-one-subset.md
|
|
21
|
+
* @see spec/v1/ai-envelope.md §"Reasoning field (normative)"
|
|
22
|
+
* @see schemas/envelopes/clarification.request.schema.json
|
|
23
|
+
* @see schemas/envelopes/schema.request.schema.json
|
|
24
|
+
* @see schemas/envelopes/error.schema.json
|
|
25
|
+
* @see schemas/envelopes/schema.response.schema.json
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
import { describe, it, expect } from 'vitest';
|
|
29
|
+
import Ajv2020 from 'ajv/dist/2020.js';
|
|
30
|
+
import addFormats from 'ajv-formats';
|
|
31
|
+
import { readFileSync } from 'node:fs';
|
|
32
|
+
import { join } from 'node:path';
|
|
33
|
+
import { driver } from '../lib/driver.js';
|
|
34
|
+
import { SCHEMAS_DIR } from '../lib/paths.js';
|
|
35
|
+
|
|
36
|
+
const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
|
|
37
|
+
|
|
38
|
+
interface DiscoveryDoc {
|
|
39
|
+
capabilities?: {
|
|
40
|
+
envelopes?: {
|
|
41
|
+
reasoning?: {
|
|
42
|
+
supported?: unknown;
|
|
43
|
+
promptDirective?: unknown;
|
|
44
|
+
};
|
|
45
|
+
tierOneSubsetCompliance?: unknown;
|
|
46
|
+
};
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function loadSchema(rel: string): Record<string, unknown> {
|
|
51
|
+
return JSON.parse(readFileSync(join(SCHEMAS_DIR, rel), 'utf8')) as Record<string, unknown>;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function makeAjv(): Ajv2020 {
|
|
55
|
+
const ajv = new Ajv2020({ allErrors: true, strict: false });
|
|
56
|
+
addFormats(ajv);
|
|
57
|
+
return ajv;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
async function readDiscovery(): Promise<DiscoveryDoc | null> {
|
|
61
|
+
try {
|
|
62
|
+
const res = await driver.get('/.well-known/openwop');
|
|
63
|
+
if (res.status !== 200) return null;
|
|
64
|
+
return res.json as DiscoveryDoc;
|
|
65
|
+
} catch {
|
|
66
|
+
return null;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
describe('envelope-reasoning-shape: universal-kind schemas (RFC 0030 §A)', () => {
|
|
71
|
+
const KINDS_WITH_REASONING = ['clarification.request', 'schema.request', 'error'] as const;
|
|
72
|
+
|
|
73
|
+
for (const kind of KINDS_WITH_REASONING) {
|
|
74
|
+
it(`${kind}.schema.json declares OPTIONAL \`reasoning: string\` per RFC 0030 §A`, () => {
|
|
75
|
+
const schema = loadSchema(`envelopes/${kind}.schema.json`);
|
|
76
|
+
const properties = schema.properties as Record<string, Record<string, unknown>> | undefined;
|
|
77
|
+
const required = (schema.required as string[] | undefined) ?? [];
|
|
78
|
+
expect(
|
|
79
|
+
properties?.reasoning,
|
|
80
|
+
`RFC 0030 §A: ${kind}.schema.json MUST declare a \`reasoning\` property`,
|
|
81
|
+
).toBeDefined();
|
|
82
|
+
expect(
|
|
83
|
+
properties?.reasoning?.type,
|
|
84
|
+
'RFC 0030 §A: `reasoning` MUST be type: string on universal-kind schemas',
|
|
85
|
+
).toBe('string');
|
|
86
|
+
expect(
|
|
87
|
+
required.includes('reasoning'),
|
|
88
|
+
`RFC 0030 §A: ${kind}.schema.json MUST NOT list \`reasoning\` in required (OPTIONAL field; absence is a valid envelope shape)`,
|
|
89
|
+
).toBe(false);
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
it('schema.response.schema.json deliberately omits `reasoning` (side-channel ack per RFC 0030 §A)', () => {
|
|
94
|
+
const schema = loadSchema('envelopes/schema.response.schema.json');
|
|
95
|
+
const properties = schema.properties as Record<string, Record<string, unknown>> | undefined;
|
|
96
|
+
expect(
|
|
97
|
+
properties?.reasoning,
|
|
98
|
+
'RFC 0030 §A: `schema.response` is a side-channel ack envelope; it MUST NOT declare `reasoning`',
|
|
99
|
+
).toBeUndefined();
|
|
100
|
+
});
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
describe('envelope-reasoning-shape: backward-compat round-trip (RFC 0030 §A)', () => {
|
|
104
|
+
// Compile each schema once at describe-scope so re-validation across `it`
|
|
105
|
+
// blocks reuses the same validator (Ajv refuses duplicate $id registration).
|
|
106
|
+
const ajvClarification = makeAjv();
|
|
107
|
+
const validateClarification = ajvClarification.compile(loadSchema('envelopes/clarification.request.schema.json'));
|
|
108
|
+
const ajvError = makeAjv();
|
|
109
|
+
const validateError = ajvError.compile(loadSchema('envelopes/error.schema.json'));
|
|
110
|
+
const ajvSchemaRequest = makeAjv();
|
|
111
|
+
const validateSchemaRequest = ajvSchemaRequest.compile(loadSchema('envelopes/schema.request.schema.json'));
|
|
112
|
+
|
|
113
|
+
it('clarification.request validates a payload WITHOUT reasoning (backward compat)', () => {
|
|
114
|
+
const payload = { questions: [{ id: 'q1', question: 'What do you mean by X?' }] };
|
|
115
|
+
expect(
|
|
116
|
+
validateClarification(payload),
|
|
117
|
+
'RFC 0030 §A: existing v1.1 envelope without `reasoning` MUST remain valid',
|
|
118
|
+
).toBe(true);
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
it('clarification.request validates a payload WITH reasoning', () => {
|
|
122
|
+
const payload = {
|
|
123
|
+
reasoning: 'The user mentioned X twice but I am not sure which X they mean.',
|
|
124
|
+
questions: [{ id: 'q1', question: 'What do you mean by X?' }],
|
|
125
|
+
};
|
|
126
|
+
expect(
|
|
127
|
+
validateClarification(payload),
|
|
128
|
+
'RFC 0030 §A: envelope with optional `reasoning` populated MUST be accepted',
|
|
129
|
+
).toBe(true);
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
it('error envelope validates without reasoning (backward compat)', () => {
|
|
133
|
+
const payload = { code: 'validation_failed', message: 'Could not match the schema.' };
|
|
134
|
+
expect(validateError(payload)).toBe(true);
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
it('error envelope validates with reasoning', () => {
|
|
138
|
+
const payload = {
|
|
139
|
+
reasoning: 'I analyzed each required field but the input was missing X.',
|
|
140
|
+
code: 'validation_failed',
|
|
141
|
+
message: 'Could not match the schema.',
|
|
142
|
+
};
|
|
143
|
+
expect(validateError(payload)).toBe(true);
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
it('schema.request validates without reasoning', () => {
|
|
147
|
+
expect(validateSchemaRequest({ envelopeType: 'vendor.acme.prd.create' })).toBe(true);
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
it('rejects reasoning of non-string type (universal kinds use plain string, not string|null union)', () => {
|
|
151
|
+
const payload = {
|
|
152
|
+
reasoning: 42,
|
|
153
|
+
questions: [{ id: 'q1', question: '?' }],
|
|
154
|
+
};
|
|
155
|
+
expect(
|
|
156
|
+
validateClarification(payload),
|
|
157
|
+
'RFC 0030 §A: universal-kind `reasoning` MUST be type: string; numbers reject',
|
|
158
|
+
).toBe(false);
|
|
159
|
+
});
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
describe.skipIf(HTTP_SKIP)('envelope-reasoning-shape: capabilities.envelopes advertisement (RFC 0030 §C)', () => {
|
|
163
|
+
it('capabilities.envelopes.reasoning (when present) conforms to RFC 0030 §C', async () => {
|
|
164
|
+
const d = await readDiscovery();
|
|
165
|
+
if (d === null) return;
|
|
166
|
+
const reasoning = d.capabilities?.envelopes?.reasoning;
|
|
167
|
+
if (reasoning === undefined) return; // optional block; host MAY omit
|
|
168
|
+
expect(
|
|
169
|
+
typeof reasoning.supported,
|
|
170
|
+
'RFC 0030 §C: capabilities.envelopes.reasoning.supported MUST be boolean when block is advertised',
|
|
171
|
+
).toBe('boolean');
|
|
172
|
+
if (reasoning.promptDirective !== undefined) {
|
|
173
|
+
expect(
|
|
174
|
+
['mandatory', 'advisory', 'off'],
|
|
175
|
+
'RFC 0030 §C: promptDirective MUST be one of the three documented values',
|
|
176
|
+
).toContain(String(reasoning.promptDirective));
|
|
177
|
+
}
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
it('capabilities.envelopes.tierOneSubsetCompliance (when present) conforms to RFC 0030 §B', async () => {
|
|
181
|
+
const d = await readDiscovery();
|
|
182
|
+
if (d === null) return;
|
|
183
|
+
const compliance = d.capabilities?.envelopes?.tierOneSubsetCompliance;
|
|
184
|
+
if (compliance === undefined) return; // optional; host MAY omit
|
|
185
|
+
expect(
|
|
186
|
+
['strict', 'warn', 'off'],
|
|
187
|
+
'RFC 0030 §B: tierOneSubsetCompliance MUST be one of strict|warn|off',
|
|
188
|
+
).toContain(String(compliance));
|
|
189
|
+
});
|
|
190
|
+
});
|