@openwop/openwop-conformance 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/CHANGELOG.md +65 -0
  2. package/README.md +2 -2
  3. package/api/redocly.yaml +15 -0
  4. package/coverage.md +2 -1
  5. package/fixtures/conformance-agent-reasoning-streaming.json +37 -0
  6. package/fixtures/conformance-dispatch-cancellable-child.json +27 -0
  7. package/fixtures/conformance-dispatch-deterministic-fail-child.json +30 -0
  8. package/fixtures/conformance-dispatch-input-mapping-no-default.json +49 -0
  9. package/fixtures/conformance-dispatch-per-worker-override.json +59 -0
  10. package/fixtures/conformance-subworkflow-input-mapping-no-default.json +33 -0
  11. package/fixtures.md +6 -0
  12. package/package.json +1 -1
  13. package/schemas/capabilities.schema.json +16 -0
  14. package/schemas/core-conformance-mock-agent-config.schema.json +5 -0
  15. package/schemas/run-event-payloads.schema.json +35 -1
  16. package/schemas/run-event.schema.json +2 -0
  17. package/src/lib/driver.ts +15 -0
  18. package/src/lib/env.ts +51 -0
  19. package/src/lib/event-log-query.ts +62 -0
  20. package/src/lib/fixtures.ts +38 -1
  21. package/src/lib/host-toggle.ts +54 -0
  22. package/src/lib/multi-agent-capabilities.ts +10 -0
  23. package/src/lib/otel-scrape.ts +59 -0
  24. package/src/scenarios/agentReasoningStreaming.test.ts +193 -0
  25. package/src/scenarios/aiEnvelope.capBreached.test.ts +97 -9
  26. package/src/scenarios/aiEnvelope.contractRefusal.test.ts +128 -10
  27. package/src/scenarios/aiEnvelope.correlationReplay.test.ts +236 -21
  28. package/src/scenarios/aiEnvelope.redaction.test.ts +204 -24
  29. package/src/scenarios/aiEnvelope.schemaDrift.test.ts +158 -19
  30. package/src/scenarios/aiEnvelope.trustBoundaryPropagation.test.ts +59 -8
  31. package/src/scenarios/aiEnvelope.universalKinds.test.ts +100 -9
  32. package/src/scenarios/blob-presign-expiry.test.ts +35 -2
  33. package/src/scenarios/blob-roundtrip.test.ts +0 -0
  34. package/src/scenarios/cache-ttl-expiry.test.ts +28 -2
  35. package/src/scenarios/dispatch-cross-worker-handoff.test.ts +34 -3
  36. package/src/scenarios/dispatch-input-mapping.test.ts +75 -6
  37. package/src/scenarios/dispatch-output-mapping.test.ts +96 -6
  38. package/src/scenarios/fixtures-gating.test.ts +139 -1
  39. package/src/scenarios/kv-ttl-expiry.test.ts +33 -2
  40. package/src/scenarios/otel-trace-propagation-subworkflow.test.ts +19 -0
  41. package/src/scenarios/pack-registry-publish.test.ts +231 -51
  42. package/src/scenarios/provider-usage.test.ts +185 -0
  43. package/src/scenarios/queue-ack-nack-dlq.test.ts +57 -3
  44. package/src/scenarios/queue-publish-consume-roundtrip.test.ts +43 -3
  45. package/src/scenarios/replay-llm-cache-key.test.ts +166 -25
  46. package/src/scenarios/search-bm25-roundtrip.test.ts +47 -2
  47. package/src/scenarios/sql-transaction-atomicity.test.ts +31 -2
  48. package/src/scenarios/stream-subscribe-from-beginning.test.ts +39 -2
  49. package/src/scenarios/subworkflow-input-mapping.test.ts +77 -7
  50. package/src/scenarios/table-cursor-pagination.test.ts +40 -2
  51. package/src/scenarios/table-schema-enforcement.test.ts +39 -2
  52. package/src/scenarios/vector-knn-roundtrip.test.ts +43 -3
  53. package/src/scenarios/workflow-chain-host-expansion.test.ts +202 -0
@@ -1,35 +1,176 @@
1
1
  /**
2
- * Cross-host LLM cache-key parity (replay.md §"LLM cache-key recipe").
2
+ * LLM cache-key recipe — `replay.md §"LLM cache-key recipe"` §A + §B.
3
3
  *
4
- * Verifies that two OpenWOP-compliant hosts replaying the same LLM
5
- * provider request compute the same cache key. The recipe is normative
6
- * (replay.md §B): canonical JSON of `(provider, model, messages, tools,
7
- * temperature, topP, topK, responseFormat)` → SHA-256 → lowercase hex.
4
+ * Verifies that an OpenWOP host computes the LLM cache key per the
5
+ * normative recipe: SHA-256 over RFC 8785 JCS-canonicalized JSON of
6
+ * the closed set of recipe fields (`provider, model, messages, tools,
7
+ * temperature, topP, topK, responseFormat`).
8
8
  *
9
- * Status: PLACEHOLDER. As of 2026-05-11, neither reference host
10
- * (`examples/hosts/in-memory/`, `examples/hosts/sqlite/`) implements
11
- * LLM-calling nodes both execute only `core.noop` / `core.delay` /
12
- * `core.approvalGate` fixtures. This scenario lands as `it.todo()` so
13
- * the contract surface is tracked; assertions land when the first
14
- * reference host ships an LLM-call node.
9
+ * The single-host assertions drive the env-gated test seam at
10
+ * `POST /v1/host/sample/test/llm-cache-key` and recompute the expected
11
+ * key locally per the recipe, asserting equality. Non-recipe fields
12
+ * (`max_tokens`, `stop`, `stream`, `seed`, etc.) MUST NOT influence
13
+ * the key per §A.
15
14
  *
16
- * What the live scenario WILL exercise (when implemented):
17
- * 1. Boot host A against `OPENWOP_BASE_URL`.
18
- * 2. Boot host B against `OPENWOP_BASE_URL_B`.
19
- * 3. Submit the same workflow + inputs (an LLM-calling fixture).
20
- * 4. Read each host's emitted `node.completed.payload.cacheKey` (or
21
- * equivalent debug-bundle surface).
22
- * 5. Assert the two hex strings are equal.
15
+ * The cross-host assertion (two hosts compute the same key) stays
16
+ * deferred it requires `OPENWOP_BASE_URL_B` for a second-host probe,
17
+ * which is operator-supplied and outside this scenario file's scope.
23
18
  *
24
19
  * @see spec/v1/replay.md §"LLM cache-key recipe"
25
20
  */
26
21
 
27
- import { describe, it } from 'vitest';
22
+ import { describe, it, expect } from 'vitest';
23
+ import { createHash } from 'node:crypto';
24
+ import { driver } from '../lib/driver.js';
28
25
 
29
- describe('replay-llm-cache-key: cross-host determinism (placeholder)', () => {
30
- it.todo(
31
- 'two hosts replaying the same LLM provider request compute the same cache key (replay.md §D)',
32
- );
33
- it.todo('LLM cache key is computed via SHA-256 of canonical JSON per replay.md §B');
34
- it.todo('cache key omits non-recipe fields (max_tokens, stop, stream, seed, etc.) per replay.md §A');
26
+ /** Mirror of the reference impl's `canonicalize` so the conformance
27
+ * scenario can recompute the expected cache key locally and assert
28
+ * equality with what the host returns. RFC 8785 JCS-style:
29
+ * sorted-keys, no whitespace, preserve array order. */
30
+ function canonicalize(value: unknown): string {
31
+ if (value === null) return 'null';
32
+ if (typeof value === 'boolean' || typeof value === 'number') return JSON.stringify(value);
33
+ if (typeof value === 'string') return JSON.stringify(value);
34
+ if (Array.isArray(value)) return '[' + value.map((v) => canonicalize(v)).join(',') + ']';
35
+ if (typeof value === 'object') {
36
+ const obj = value as Record<string, unknown>;
37
+ const keys = Object.keys(obj).sort();
38
+ return '{' + keys.map((k) => `${JSON.stringify(k)}:${canonicalize(obj[k])}`).join(',') + '}';
39
+ }
40
+ return JSON.stringify(value);
41
+ }
42
+
43
+ function projectRecipe(raw: Record<string, unknown>): Record<string, unknown> {
44
+ const out: Record<string, unknown> = { provider: raw.provider, model: raw.model, messages: raw.messages };
45
+ if (Array.isArray(raw.tools) && raw.tools.length > 0) {
46
+ out.tools = [...(raw.tools as Array<{ name: string }>)].sort((a, b) => a.name.localeCompare(b.name));
47
+ }
48
+ if (typeof raw.temperature === 'number') out.temperature = raw.temperature;
49
+ if (typeof raw.topP === 'number') out.topP = raw.topP;
50
+ if (typeof raw.topK === 'number') out.topK = raw.topK;
51
+ if (raw.responseFormat && typeof raw.responseFormat === 'object') out.responseFormat = raw.responseFormat;
52
+ return out;
53
+ }
54
+
55
+ function expectedCacheKey(input: Record<string, unknown>): string {
56
+ return createHash('sha256').update(canonicalize(projectRecipe(input)), 'utf8').digest('hex');
57
+ }
58
+
59
+ async function callSeam(input: Record<string, unknown>): Promise<{ status: number; cacheKey?: string }> {
60
+ const res = await driver.post('/v1/host/sample/test/llm-cache-key', input);
61
+ const cacheKey = (res.json as { cacheKey?: string }).cacheKey;
62
+ return cacheKey !== undefined ? { status: res.status, cacheKey } : { status: res.status };
63
+ }
64
+
65
+ describe('replay-llm-cache-key: SHA-256-over-JCS recipe (replay.md §B)', () => {
66
+ it('host cache key MUST equal locally-recomputed SHA-256 over canonical JSON', async () => {
67
+ const input = {
68
+ provider: 'anthropic',
69
+ model: 'claude-3-5-sonnet-20240620',
70
+ messages: [
71
+ { role: 'system' as const, content: 'You are a helpful assistant.' },
72
+ { role: 'user' as const, content: 'What is 2+2?' },
73
+ ],
74
+ temperature: 0.7,
75
+ };
76
+ const result = await callSeam(input);
77
+ if (result.status === 404) return; // seam not exposed
78
+ expect(result.status).toBe(200);
79
+ expect(
80
+ result.cacheKey,
81
+ driver.describe('replay.md §B', 'host cache key MUST be lowercase-hex SHA-256 of the canonical recipe JSON'),
82
+ ).toBe(expectedCacheKey(input));
83
+ });
84
+
85
+ it('cache key MUST be 64 lowercase-hex characters (SHA-256 output shape)', async () => {
86
+ const result = await callSeam({
87
+ provider: 'openai',
88
+ model: 'gpt-4',
89
+ messages: [{ role: 'user', content: 'hi' }],
90
+ });
91
+ if (result.status === 404) return;
92
+ expect(result.cacheKey).toMatch(/^[0-9a-f]{64}$/);
93
+ });
94
+ });
95
+
96
+ describe('replay-llm-cache-key: non-recipe fields are EXCLUDED (replay.md §A)', () => {
97
+ it('max_tokens / stop / stream / seed / metadata / user MUST NOT influence the cache key', async () => {
98
+ const base = {
99
+ provider: 'openai',
100
+ model: 'gpt-4',
101
+ messages: [{ role: 'user', content: 'unit test' }],
102
+ temperature: 0.5,
103
+ };
104
+ const baseResult = await callSeam(base);
105
+ if (baseResult.status === 404) return;
106
+
107
+ // All these non-recipe fields MUST NOT affect the cache key per §A.
108
+ const noisy = {
109
+ ...base,
110
+ max_tokens: 1000,
111
+ stop: ['STOP'],
112
+ stream: true,
113
+ seed: 42,
114
+ metadata: { traceId: 'abcd' },
115
+ user: 'unit-test-user',
116
+ };
117
+ const noisyResult = await callSeam(noisy);
118
+ expect(
119
+ noisyResult.cacheKey,
120
+ driver.describe(
121
+ 'replay.md §A',
122
+ 'cache key MUST be invariant under non-recipe field changes (max_tokens, stop, stream, seed, metadata, user)',
123
+ ),
124
+ ).toBe(baseResult.cacheKey);
125
+ });
126
+
127
+ it('changing a recipe field (temperature) MUST yield a different cache key', async () => {
128
+ const baseInput = {
129
+ provider: 'openai',
130
+ model: 'gpt-4',
131
+ messages: [{ role: 'user', content: 'diversity-probe' }],
132
+ temperature: 0.0,
133
+ };
134
+ const hotInput = { ...baseInput, temperature: 1.0 };
135
+ const baseResult = await callSeam(baseInput);
136
+ if (baseResult.status === 404) return;
137
+ const hotResult = await callSeam(hotInput);
138
+ expect(
139
+ baseResult.cacheKey === hotResult.cacheKey,
140
+ driver.describe('replay.md §A', 'changing a recipe field MUST yield a different cache key (no false collisions)'),
141
+ ).toBe(false);
142
+ });
143
+ });
144
+
145
+ describe('replay-llm-cache-key: cross-host parity (replay.md §D)', () => {
146
+ it('two hosts compute the same cache key for the same input (when OPENWOP_BASE_URL_B is configured)', async () => {
147
+ const otherBaseUrl = process.env.OPENWOP_BASE_URL_B;
148
+ if (!otherBaseUrl || otherBaseUrl.length === 0) return; // second host not configured — soft-skip
149
+ const input = {
150
+ provider: 'anthropic',
151
+ model: 'claude-3-5-sonnet-20240620',
152
+ messages: [
153
+ { role: 'system' as const, content: 'cross-host parity probe' },
154
+ { role: 'user' as const, content: 'compute the same key' },
155
+ ],
156
+ temperature: 0.5,
157
+ };
158
+ const a = await callSeam(input);
159
+ if (a.status === 404) return; // host A doesn't expose the seam
160
+ const otherApiKey = process.env.OPENWOP_API_KEY_B ?? process.env.OPENWOP_API_KEY ?? '';
161
+ // Issue the second probe directly via fetch since the driver is bound to
162
+ // OPENWOP_BASE_URL. Authorization mirrors the suite's default.
163
+ const resB = await fetch(`${otherBaseUrl.replace(/\/$/, '')}/v1/host/sample/test/llm-cache-key`, {
164
+ method: 'POST',
165
+ headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${otherApiKey}` },
166
+ body: JSON.stringify(input),
167
+ });
168
+ if (resB.status === 404) return; // host B doesn't expose the seam
169
+ expect(resB.status).toBe(200);
170
+ const b = (await resB.json()) as { cacheKey?: string };
171
+ expect(
172
+ a.cacheKey,
173
+ driver.describe('replay.md §D', 'two compliant hosts MUST compute byte-identical cache keys for the same recipe input'),
174
+ ).toBe(b.cacheKey);
175
+ });
35
176
  });
@@ -42,6 +42,51 @@ describe('search-bm25-roundtrip: advertisement shape (RFC 0018)', () => {
42
42
  });
43
43
  });
44
44
 
45
- describe('search-bm25-roundtrip: behavioral assertions (placeholders need host test seam)', () => {
46
- it.todo("index 3 docs query returns relevance-ranked hits");
45
+ async function call(op: string, args: Record<string, unknown>) {
46
+ return driver.post('/v1/host/sample/test/surface', { tenantId: 'tenant-a', surface: 'search', op, args });
47
+ }
48
+
49
+ describe('search-bm25-roundtrip: behavioral (RFC 0018 §A.searchIndex)', () => {
50
+ it('index 3 docs → query for a distinguishing keyword returns the matching doc as top hit', async () => {
51
+ const probe = await call('query', { index: '__probe__', q: 'hello' });
52
+ if (probe.status === 404) return; // seam not exposed
53
+ const index = `idx-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
54
+ const idx = await call('index', {
55
+ index,
56
+ docs: [
57
+ { id: 'doc-1', fields: { title: 'Database engines for vector search', body: 'Pinecone Qdrant Weaviate Milvus pgvector' } },
58
+ { id: 'doc-2', fields: { title: 'Workflow orchestration patterns', body: 'durable runs interrupts replay event log' } },
59
+ { id: 'doc-3', fields: { title: 'Distributed systems primer', body: 'consensus Paxos Raft leader election' } },
60
+ ],
61
+ });
62
+ expect(idx.status).toBe(200);
63
+
64
+ // Query for a distinguishing keyword → doc-2 MUST be top-ranked.
65
+ const q = await call('query', { index, q: 'durable workflow runs', k: 3 });
66
+ expect(q.status).toBe(200);
67
+ const body = q.json as { hits?: Array<{ id: string; score: number }> };
68
+ expect(Array.isArray(body.hits) && body.hits.length > 0).toBe(true);
69
+ expect(
70
+ body.hits![0]!.id,
71
+ driver.describe('RFC 0018 §A.searchIndex', 'query for the doc\'s distinguishing tokens MUST return that doc as top-1'),
72
+ ).toBe('doc-2');
73
+ // Top hit's score MUST be strictly greater than any tied below-rank.
74
+ if (body.hits!.length > 1) {
75
+ expect(body.hits![0]!.score >= body.hits![1]!.score).toBe(true);
76
+ }
77
+ });
78
+
79
+ it('k limit caps the result set', async () => {
80
+ const probe = await call('query', { index: '__probe__', q: 'hello' });
81
+ if (probe.status === 404) return;
82
+ const index = `idx-k-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
83
+ const docs = Array.from({ length: 5 }, (_, i) => ({ id: `d-${i}`, fields: { body: 'apple orange banana' } }));
84
+ await call('index', { index, docs });
85
+ const q = await call('query', { index, q: 'apple', k: 2 });
86
+ const body = q.json as { hits?: unknown[] };
87
+ expect(
88
+ Array.isArray(body.hits) && body.hits.length <= 2,
89
+ driver.describe('RFC 0018 §A.searchIndex', 'query MUST return at most k hits'),
90
+ ).toBe(true);
91
+ });
47
92
  });
@@ -61,6 +61,35 @@ describe('sql-transaction-atomicity: advertisement shape (RFC 0018)', () => {
61
61
  });
62
62
  });
63
63
 
64
- describe('sql-transaction-atomicity: behavioral assertions (placeholders need host test seam)', () => {
65
- it.todo("transaction with N statements where N-th fails no rows from earlier statements visible");
64
+ async function call(op: string, args: Record<string, unknown>) {
65
+ return driver.post('/v1/host/sample/test/surface', { tenantId: 'tenant-a', surface: 'sql', op, args });
66
+ }
67
+
68
+ describe('sql-transaction-atomicity: behavioral (RFC 0018 §B.sql — transaction atomicity)', () => {
69
+ it('transaction with N statements where N-th fails → earlier writes MUST roll back', async () => {
70
+ const probe = await call('execute', { sql: 'CREATE TABLE IF NOT EXISTS atomicity_probe (id TEXT PRIMARY KEY)', params: [] });
71
+ if (probe.status === 404) return;
72
+ const table = `t_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
73
+ await call('execute', { sql: `CREATE TABLE ${table} (id INTEGER PRIMARY KEY, val TEXT)`, params: [] });
74
+
75
+ const txnRes = await call('transaction', {
76
+ statements: [
77
+ { sql: `INSERT INTO ${table}(id, val) VALUES (?, ?)`, params: [1, 'one'] },
78
+ { sql: `INSERT INTO ${table}(id, val) VALUES (?, ?)`, params: [2, 'two'] },
79
+ { sql: `INSERT INTO ${table}(id, val) VALUES (?, ?)`, params: [1, 'duplicate'] }, // PK violation
80
+ ],
81
+ });
82
+ expect(
83
+ txnRes.status >= 400 && txnRes.status < 500,
84
+ driver.describe('RFC 0018 §B.sql', 'transaction with failing statement MUST surface as 4xx'),
85
+ ).toBe(true);
86
+
87
+ const queryRes = await call('query', { sql: `SELECT id, val FROM ${table}`, params: [] });
88
+ expect(queryRes.status).toBe(200);
89
+ const body = queryRes.json as { rows?: unknown[] };
90
+ expect(
91
+ Array.isArray(body.rows) && body.rows.length === 0,
92
+ driver.describe('RFC 0018 §B.sql', 'rows from earlier statements in a failed transaction MUST NOT be visible'),
93
+ ).toBe(true);
94
+ });
66
95
  });
@@ -61,6 +61,43 @@ describe('stream-subscribe-from-beginning: advertisement shape (RFC 0017)', () =
61
61
  });
62
62
  });
63
63
 
64
- describe('stream-subscribe-from-beginning: behavioral assertions (placeholders need host test seam)', () => {
65
- it.todo("publish 5 records then subscribe(fromBeginning=true) consumer receives all 5");
64
+ async function call(op: string, args: Record<string, unknown>) {
65
+ return driver.post('/v1/host/sample/test/surface', { tenantId: 'tenant-a', surface: 'queueBus', op, args });
66
+ }
67
+
68
+ describe('stream-subscribe-from-beginning: behavioral (RFC 0017 §A stream.fromBeginning)', () => {
69
+ it('streamPublish 5 records then streamSubscribe({fromBeginning:true}) MUST surface all 5 in the snapshot', async () => {
70
+ const probe = await call('streamSubscribe', { stream: '__probe__', fromBeginning: true });
71
+ if (probe.status === 404) return; // seam not exposed
72
+ const stream = `s-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
73
+ for (let i = 1; i <= 5; i++) {
74
+ const r = await call('streamPublish', { stream, record: { seq: i, value: `rec-${i}` } });
75
+ expect(r.status).toBe(200);
76
+ }
77
+ const sub = await call('streamSubscribe', { stream, fromBeginning: true });
78
+ expect(sub.status).toBe(200);
79
+ const body = sub.json as { records?: Array<{ payload?: { seq?: number } }>; fromBeginningSnapshot?: boolean };
80
+ expect(
81
+ Array.isArray(body.records) && body.records.length === 5,
82
+ driver.describe('RFC 0017 §A.stream.fromBeginning', 'subscribe with fromBeginning:true MUST return ALL records previously published on the stream'),
83
+ ).toBe(true);
84
+ // Order MUST be preserved (publish-order = sequential on the same stream).
85
+ const seqs = body.records!.map((r) => r.payload?.seq);
86
+ expect(seqs).toEqual([1, 2, 3, 4, 5]);
87
+ expect(body.fromBeginningSnapshot).toBe(true);
88
+ });
89
+
90
+ it('streamSubscribe({fromBeginning:false}) MUST NOT include pre-subscribe records (live-tail semantics)', async () => {
91
+ const probe = await call('streamSubscribe', { stream: '__probe__', fromBeginning: true });
92
+ if (probe.status === 404) return;
93
+ const stream = `s-live-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
94
+ await call('streamPublish', { stream, record: { v: 'before' } });
95
+ const sub = await call('streamSubscribe', { stream, fromBeginning: false });
96
+ const body = sub.json as { records?: unknown[]; fromBeginningSnapshot?: boolean };
97
+ expect(
98
+ Array.isArray(body.records) && body.records.length === 0,
99
+ driver.describe('RFC 0017 §A.stream.fromBeginning', 'subscribe with fromBeginning:false MUST omit pre-subscribe records'),
100
+ ).toBe(true);
101
+ expect(body.fromBeginningSnapshot).toBe(false);
102
+ });
66
103
  });
@@ -22,6 +22,7 @@ import { describe, it, expect } from 'vitest';
22
22
  import { driver } from '../lib/driver.js';
23
23
  import { pollUntilTerminal } from '../lib/polling.js';
24
24
  import { isFixtureAdvertised } from '../lib/fixtures.js';
25
+ import { setHostCapability, resetHostCapabilities, isToggleAvailable } from '../lib/host-toggle.js';
25
26
 
26
27
  const PARENT = 'conformance-subworkflow-input-mapping';
27
28
  const CHILD = 'conformance-subworkflow-input-mapping-child';
@@ -86,15 +87,84 @@ describe.skipIf(SKIP)('subworkflow-input-mapping: parent → child variable seed
86
87
  )).toBe('prd-1');
87
88
  });
88
89
 
89
- it.todo(
90
- 'HVMAP-2-unset: parent.currentPrdId unset; child receivedPrdId MUST surface as `undefined` (NOT omitted, NOT `null`). Requires a second parent fixture variant that omits currentPrdId\'s defaultValue.',
91
- );
90
+ it('HVMAP-2-unset: parent.currentPrdId unset → child receivedPrdId MUST surface as `undefined`', async () => {
91
+ const PARENT_NO_DEFAULT = 'conformance-subworkflow-input-mapping-no-default';
92
+ if (!isFixtureAdvertised(PARENT_NO_DEFAULT) || !isFixtureAdvertised(CHILD)) return; // soft-skip
93
+ const create = await driver.post('/v1/runs', { workflowId: PARENT_NO_DEFAULT });
94
+ expect(create.status).toBe(201);
95
+ const parentRunId = (create.json as { runId: string }).runId;
96
+ await pollUntilTerminal(parentRunId);
92
97
 
93
- it.todo(
94
- 'HVMAP-2-no-midrun-propagation: child mid-run; parent updates currentPrdId; child receivedPrdId MUST remain at seeded value (one-shot fold per §B normative bullet). Requires a multi-step child that suspends + a parent path that mutates.',
95
- );
98
+ const eventsRes = await driver.get(`/v1/runs/${encodeURIComponent(parentRunId)}/events`);
99
+ const events = ((eventsRes.json as { events?: RunEvent[] } | undefined)?.events ?? []);
100
+ const subwfCompleted = events.find(
101
+ (e) => e.type === 'node.completed' && e.nodeId === 'subwf-call',
102
+ );
103
+ if (!subwfCompleted) return;
104
+ const childRunId = subwfCompleted.payload?.outputs?.childRunId;
105
+
106
+ const childRes = await driver.get(`/v1/runs/${encodeURIComponent(childRunId!)}`);
107
+ const child = childRes.json as RunSnapshot;
108
+ const vars = child.variables ?? {};
109
+ const v = vars.receivedPrdId;
110
+ // Note: the spec says `undefined` (NOT null). On the wire, `undefined`
111
+ // serializes as either key-absent or the child's own defaultValue fold
112
+ // ("baked-in"). The MUST-NOT is `null`. Per RFC 0022 §B, inputMapping
113
+ // override happens AFTER defaultValue fold, so when the projection is
114
+ // undefined the child's defaultValue should remain.
115
+ expect(
116
+ v === 'baked-in' || v === undefined || !('receivedPrdId' in vars),
117
+ driver.describe(
118
+ 'RFCS/0022-dispatch-input-output-mapping.md §B',
119
+ 'unset parent variable MUST surface as undefined-or-defaultValue-fallback (NOT null)',
120
+ ),
121
+ ).toBe(true);
122
+ expect(v).not.toBe(null);
123
+ });
96
124
 
97
125
  it.todo(
98
- 'HVMAP-2-refusal: host advertises core.subWorkflow surface but NOT capabilities.subWorkflow.inputMapping: true; workflow with non-empty inputMapping MUST fail registration with validation_error + details.requiredCapability === "subWorkflow.inputMapping". Requires a host-capability-toggle hook in the conformance harness.',
126
+ 'HVMAP-2-no-midrun-propagation: child mid-run; parent updates currentPrdId; child receivedPrdId MUST remain at seeded value (one-shot fold per §B normative bullet). DEFERRED requires (1) a multi-step child fixture that suspends mid-run on a clarification gate, plus (2) a parent path that mutates `currentPrdId` AFTER the child is suspended. The reference workflow-engine has no parallel-execution model that lets the parent run a separate "mutate-var" node WHILE the subwf-call is blocked on the child; this needs either a new sample-namespaced `POST /v1/host/sample/test/runs/:runId/variables` seam OR a workflow primitive that splits the parent into a fan-out branch that mutates concurrently. Tracked under Phase 3 of the test-coverage plan as a separate "run-state mutation seam" task.',
99
127
  );
128
+
129
+ });
130
+
131
+ describe('subworkflow-input-mapping: registration refusal (RFC 0022 §C HVMAP-2-refusal)', () => {
132
+ it('host with subWorkflow.inputMapping toggled OFF MUST refuse non-empty inputMapping at registration', async () => {
133
+ if (!(await isToggleAvailable())) return; // seam not exposed — soft-skip
134
+ await setHostCapability('subWorkflow.inputMapping', false);
135
+ try {
136
+ const workflow = {
137
+ workflowId: `hvmap-2-refusal-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
138
+ nodes: [
139
+ {
140
+ nodeId: 'subwf-1',
141
+ typeId: 'core.subWorkflow',
142
+ config: {
143
+ childWorkflowId: 'some-child',
144
+ inputMapping: { receivedPrdId: 'currentPrdId' }, // non-empty — refusal trigger
145
+ },
146
+ },
147
+ ],
148
+ };
149
+ const res = await driver.post('/v1/host/sample/workflows', workflow);
150
+ expect(
151
+ res.status,
152
+ driver.describe(
153
+ 'RFCS/0022-dispatch-input-output-mapping.md §C',
154
+ 'workflow with non-empty subWorkflow.inputMapping MUST be refused when capability is not advertised',
155
+ ),
156
+ ).toBe(400);
157
+ const body = res.json as { error?: string; details?: { requiredCapability?: string } };
158
+ expect(body.error).toBe('validation_error');
159
+ expect(
160
+ body.details?.requiredCapability,
161
+ driver.describe(
162
+ 'RFCS/0022-dispatch-input-output-mapping.md §C',
163
+ 'refusal MUST surface requiredCapability: "subWorkflow.inputMapping"',
164
+ ),
165
+ ).toBe('subWorkflow.inputMapping');
166
+ } finally {
167
+ await resetHostCapabilities();
168
+ }
169
+ });
100
170
  });
@@ -42,6 +42,44 @@ describe('table-cursor-pagination: advertisement shape (RFC 0016)', () => {
42
42
  });
43
43
  });
44
44
 
45
- describe('table-cursor-pagination: behavioral assertions (placeholders need host test seam)', () => {
46
- it.todo("first page returns N rows + nextCursor; second page resumes; final page returns nextCursor=null");
45
+ async function call(op: string, args: Record<string, unknown>) {
46
+ return driver.post('/v1/host/sample/test/surface', { tenantId: 'tenant-a', surface: 'table', op, args });
47
+ }
48
+
49
+ describe('table-cursor-pagination: behavioral (RFC 0016 §B point 3)', () => {
50
+ it('first page returns N rows + nextCursor; second page resumes; final page returns nextCursor:null', async () => {
51
+ const probe = await call('query', { table: '__probe__', limit: 1 });
52
+ if (probe.status === 404) return; // seam not exposed
53
+ const table = `pag-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
54
+ // Seed 5 rows with deterministic ids so cursor ordering is testable.
55
+ for (let i = 1; i <= 5; i++) {
56
+ await call('insert', { table, row: { id: `row-${i.toString().padStart(2, '0')}`, n: i } });
57
+ }
58
+ // Page 1: limit=2
59
+ const p1 = await call('query', { table, limit: 2 });
60
+ const b1 = p1.json as { rows?: Array<{ id: string }>; nextCursor?: string | null };
61
+ expect(Array.isArray(b1.rows) && b1.rows.length === 2).toBe(true);
62
+ expect(
63
+ typeof b1.nextCursor === 'string' && b1.nextCursor.length > 0,
64
+ driver.describe('RFC 0016 §B point 3', 'first page MUST surface nextCursor when more results remain'),
65
+ ).toBe(true);
66
+
67
+ // Page 2: cursor from page 1, limit=2
68
+ const p2 = await call('query', { table, limit: 2, cursor: b1.nextCursor });
69
+ const b2 = p2.json as { rows?: Array<{ id: string }>; nextCursor?: string | null };
70
+ expect(b2.rows?.length).toBe(2);
71
+ expect(
72
+ b2.rows![0]!.id > b1.rows![1]!.id,
73
+ driver.describe('RFC 0016 §B point 3', 'second page MUST resume AFTER the last id of the previous page'),
74
+ ).toBe(true);
75
+
76
+ // Page 3: final page — only 1 row left, nextCursor MUST be null
77
+ const p3 = await call('query', { table, limit: 2, cursor: b2.nextCursor });
78
+ const b3 = p3.json as { rows?: Array<{ id: string }>; nextCursor?: string | null };
79
+ expect(b3.rows?.length).toBe(1);
80
+ expect(
81
+ b3.nextCursor,
82
+ driver.describe('RFC 0016 §B point 3', 'final page (no more results) MUST surface nextCursor: null'),
83
+ ).toBe(null);
84
+ });
47
85
  });
@@ -42,6 +42,43 @@ describe('table-schema-enforcement: advertisement shape (RFC 0016)', () => {
42
42
  });
43
43
  });
44
44
 
45
- describe('table-schema-enforcement: behavioral assertions (placeholders need host test seam)', () => {
46
- it.todo("first insert declares schema; subsequent insert with wrong column type is rejected");
45
+ async function call(op: string, args: Record<string, unknown>) {
46
+ return driver.post('/v1/host/sample/test/surface', { tenantId: 'tenant-a', surface: 'table', op, args });
47
+ }
48
+
49
+ describe('table-schema-enforcement: behavioral (RFC 0016 §B point 2)', () => {
50
+ it('first insert declares schema; subsequent insert with wrong column type is rejected', async () => {
51
+ const probe = await call('insert', { table: '__probe__', row: { id: 'probe-0' } });
52
+ if (probe.status === 404) return; // seam not exposed
53
+ const table = `sch-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
54
+ // First insert — declares the schema from this row's columns.
55
+ const first = await call('insert', {
56
+ table,
57
+ row: { id: 'row-1', name: 'alice', count: 42, active: true },
58
+ });
59
+ expect(first.status).toBe(200);
60
+
61
+ // Second insert — matching schema; MUST succeed.
62
+ const second = await call('insert', {
63
+ table,
64
+ row: { id: 'row-2', name: 'bob', count: 7, active: false },
65
+ });
66
+ expect(second.status).toBe(200);
67
+
68
+ // Third insert — `count` declared as number; sending a string MUST be rejected.
69
+ const bad = await call('insert', {
70
+ table,
71
+ row: { id: 'row-3', name: 'mallory', count: 'oops-a-string', active: true },
72
+ });
73
+ expect(
74
+ bad.status >= 400 && bad.status < 500,
75
+ driver.describe('RFC 0016 §B point 2', 'type-divergent insert MUST be rejected with 4xx'),
76
+ ).toBe(true);
77
+ const body = bad.json as { error?: { code?: string } | string };
78
+ const code = typeof body.error === 'string' ? body.error : body.error?.code;
79
+ expect(
80
+ code,
81
+ driver.describe('RFC 0016 §B point 2', 'rejection MUST carry the table_schema_violation error code'),
82
+ ).toBe('table_schema_violation');
83
+ });
47
84
  });
@@ -42,7 +42,47 @@ describe('vector-knn-roundtrip: advertisement shape (RFC 0018)', () => {
42
42
  });
43
43
  });
44
44
 
45
- describe('vector-knn-roundtrip: behavioral assertions (placeholders need host test seam)', () => {
46
- it.todo("upsert 10 vectors query with one of them returns it as top-1");
47
- it.todo("topK respects the configured limit");
45
+ async function call(op: string, args: Record<string, unknown>) {
46
+ return driver.post('/v1/host/sample/test/surface', { tenantId: 'tenant-a', surface: 'vector', op, args });
47
+ }
48
+
49
+ describe('vector-knn-roundtrip: behavioral (RFC 0018 §A.vectorStore)', () => {
50
+ it('upsert 10 vectors → query with one of them returns it as the top match', async () => {
51
+ const probe = await call('query', { namespace: '__probe__', vector: [1, 0], topK: 1 });
52
+ if (probe.status === 404) return; // seam not exposed
53
+ const namespace = `knn-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
54
+ const items = Array.from({ length: 10 }, (_, i) => ({
55
+ id: `vec-${i}`,
56
+ vector: [Math.cos((i * Math.PI) / 5), Math.sin((i * Math.PI) / 5)],
57
+ }));
58
+ const upsertRes = await call('upsert', { namespace, items });
59
+ expect(upsertRes.status).toBe(200);
60
+
61
+ const queryRes = await call('query', { namespace, vector: items[3]!.vector, topK: 1 });
62
+ expect(queryRes.status).toBe(200);
63
+ const body = queryRes.json as { matches?: Array<{ id?: string; score?: number }> };
64
+ expect(Array.isArray(body.matches), 'matches MUST be an array').toBe(true);
65
+ expect(body.matches!.length).toBeGreaterThan(0);
66
+ expect(
67
+ body.matches![0]!.id,
68
+ driver.describe('RFC 0018 §A.vectorStore', 'query with an indexed vector MUST return it as the top match'),
69
+ ).toBe('vec-3');
70
+ });
71
+
72
+ it('topK respects the configured limit', async () => {
73
+ const probe = await call('query', { namespace: '__probe__', vector: [1, 0], topK: 1 });
74
+ if (probe.status === 404) return;
75
+ const namespace = `topk-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
76
+ const items = Array.from({ length: 8 }, (_, i) => ({
77
+ id: `t-${i}`,
78
+ vector: [i / 10, 1 - i / 10],
79
+ }));
80
+ await call('upsert', { namespace, items });
81
+ const r3 = await call('query', { namespace, vector: [0.5, 0.5], topK: 3 });
82
+ const body = r3.json as { matches?: unknown[] };
83
+ expect(
84
+ Array.isArray(body.matches) && body.matches.length <= 3,
85
+ driver.describe('RFC 0018 §A.vectorStore', 'query MUST return at most topK matches'),
86
+ ).toBe(true);
87
+ });
48
88
  });