@openwop/openwop-conformance 1.1.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/CHANGELOG.md +90 -0
  2. package/README.md +2 -2
  3. package/api/redocly.yaml +15 -0
  4. package/coverage.md +27 -14
  5. package/fixtures/conformance-agent-low-confidence.json +7 -4
  6. package/fixtures/conformance-agent-pack-handoff-schema-validation.json +30 -0
  7. package/fixtures/conformance-agent-reasoning-streaming.json +37 -0
  8. package/fixtures/conformance-agent-reasoning.json +23 -4
  9. package/fixtures/conformance-dispatch-cancellable-child.json +27 -0
  10. package/fixtures/conformance-dispatch-cross-worker-handoff-child-a.json +27 -0
  11. package/fixtures/conformance-dispatch-cross-worker-handoff-child-b.json +25 -0
  12. package/fixtures/conformance-dispatch-cross-worker-handoff.json +60 -0
  13. package/fixtures/conformance-dispatch-deterministic-fail-child.json +30 -0
  14. package/fixtures/conformance-dispatch-input-mapping-child.json +25 -0
  15. package/fixtures/conformance-dispatch-input-mapping-no-default.json +49 -0
  16. package/fixtures/conformance-dispatch-input-mapping.json +49 -0
  17. package/fixtures/conformance-dispatch-output-mapping-child.json +27 -0
  18. package/fixtures/conformance-dispatch-output-mapping.json +49 -0
  19. package/fixtures/conformance-dispatch-per-worker-override.json +59 -0
  20. package/fixtures/conformance-subworkflow-input-mapping-child.json +27 -0
  21. package/fixtures/conformance-subworkflow-input-mapping-no-default.json +33 -0
  22. package/fixtures/conformance-subworkflow-input-mapping.json +33 -0
  23. package/fixtures.md +18 -2
  24. package/package.json +1 -1
  25. package/schemas/README.md +7 -0
  26. package/schemas/agent-ref.schema.json +1 -1
  27. package/schemas/ai-envelope.schema.json +106 -0
  28. package/schemas/capabilities.schema.json +264 -0
  29. package/schemas/core-conformance-mock-agent-config.schema.json +152 -0
  30. package/schemas/dispatch-config.schema.json +26 -0
  31. package/schemas/envelopes/clarification.request.schema.json +43 -0
  32. package/schemas/envelopes/error.schema.json +26 -0
  33. package/schemas/envelopes/schema.request.schema.json +22 -0
  34. package/schemas/envelopes/schema.response.schema.json +22 -0
  35. package/schemas/node-pack-manifest.schema.json +5 -0
  36. package/schemas/pack-lockfile.schema.json +16 -0
  37. package/schemas/run-event-payloads.schema.json +35 -1
  38. package/schemas/run-event.schema.json +2 -0
  39. package/schemas/workflow-chain-pack-manifest.schema.json +226 -0
  40. package/src/lib/driver.ts +15 -0
  41. package/src/lib/env.ts +51 -0
  42. package/src/lib/event-log-query.ts +62 -0
  43. package/src/lib/fixtures.ts +38 -1
  44. package/src/lib/host-toggle.ts +54 -0
  45. package/src/lib/multi-agent-capabilities.ts +10 -0
  46. package/src/lib/otel-scrape.ts +59 -0
  47. package/src/lib/webhook-receiver.ts +137 -0
  48. package/src/lib/workflow-chain-expansion.ts +213 -0
  49. package/src/scenarios/agentPackCatalog.test.ts +216 -0
  50. package/src/scenarios/agentPackHandoffSchemaValidation.test.ts +146 -0
  51. package/src/scenarios/agentReasoningEvents.test.ts +58 -7
  52. package/src/scenarios/agentReasoningStreaming.test.ts +193 -0
  53. package/src/scenarios/agents-run-tool-allowlist.test.ts +182 -0
  54. package/src/scenarios/ai-envelope-shape.test.ts +362 -0
  55. package/src/scenarios/aiEnvelope.capBreached.test.ts +261 -0
  56. package/src/scenarios/aiEnvelope.contractRefusal.test.ts +268 -0
  57. package/src/scenarios/aiEnvelope.correlationReplay.test.ts +284 -0
  58. package/src/scenarios/aiEnvelope.redaction.test.ts +253 -0
  59. package/src/scenarios/aiEnvelope.schemaDrift.test.ts +226 -0
  60. package/src/scenarios/aiEnvelope.trustBoundaryPropagation.test.ts +194 -0
  61. package/src/scenarios/aiEnvelope.universalKinds.test.ts +267 -0
  62. package/src/scenarios/append-ordering.test.ts +44 -0
  63. package/src/scenarios/artifact-auth.test.ts +58 -0
  64. package/src/scenarios/blob-cross-tenant-isolation.test.ts +66 -0
  65. package/src/scenarios/blob-presign-expiry.test.ts +99 -0
  66. package/src/scenarios/blob-roundtrip.test.ts +0 -0
  67. package/src/scenarios/cache-cross-tenant-isolation.test.ts +61 -0
  68. package/src/scenarios/cache-ttl-expiry.test.ts +73 -0
  69. package/src/scenarios/dispatch-cross-worker-handoff.test.ts +129 -0
  70. package/src/scenarios/dispatch-input-mapping.test.ts +163 -0
  71. package/src/scenarios/dispatch-output-mapping.test.ts +155 -0
  72. package/src/scenarios/fixtures-gating.test.ts +139 -1
  73. package/src/scenarios/fs-path-traversal.test.ts +124 -0
  74. package/src/scenarios/idempotency-key-determinism.test.ts +230 -0
  75. package/src/scenarios/interrupt-token-matrix.test.ts +126 -0
  76. package/src/scenarios/kv-atomic-increment.test.ts +74 -0
  77. package/src/scenarios/kv-cas.test.ts +75 -0
  78. package/src/scenarios/kv-cross-tenant-isolation.test.ts +85 -0
  79. package/src/scenarios/kv-ttl-expiry.test.ts +78 -0
  80. package/src/scenarios/mcp-server-elicitation-bridge.test.ts +92 -0
  81. package/src/scenarios/mcp-server-prompt-roundtrip.test.ts +80 -0
  82. package/src/scenarios/mcp-server-resource-roundtrip.test.ts +82 -0
  83. package/src/scenarios/mcp-server-sampling-bridge.test.ts +84 -0
  84. package/src/scenarios/mcp-server-tool-roundtrip.test.ts +107 -0
  85. package/src/scenarios/mcp-server-untrusted-args.test.ts +105 -0
  86. package/src/scenarios/otel-trace-propagation-subworkflow.test.ts +19 -0
  87. package/src/scenarios/pack-registry-publish.test.ts +231 -51
  88. package/src/scenarios/pause-resume.test.ts +43 -0
  89. package/src/scenarios/provider-usage.test.ts +185 -0
  90. package/src/scenarios/queue-ack-nack-dlq.test.ts +121 -0
  91. package/src/scenarios/queue-cross-tenant-isolation.test.ts +66 -0
  92. package/src/scenarios/queue-publish-consume-roundtrip.test.ts +88 -0
  93. package/src/scenarios/replay-llm-cache-key.test.ts +166 -25
  94. package/src/scenarios/search-bm25-roundtrip.test.ts +92 -0
  95. package/src/scenarios/spec-corpus-validity.test.ts +17 -1
  96. package/src/scenarios/sql-injection-rejection.test.ts +84 -0
  97. package/src/scenarios/sql-transaction-atomicity.test.ts +95 -0
  98. package/src/scenarios/stream-subscribe-from-beginning.test.ts +103 -0
  99. package/src/scenarios/subworkflow-input-mapping.test.ts +170 -0
  100. package/src/scenarios/table-cross-tenant-isolation.test.ts +65 -0
  101. package/src/scenarios/table-cursor-pagination.test.ts +85 -0
  102. package/src/scenarios/table-schema-enforcement.test.ts +84 -0
  103. package/src/scenarios/vector-knn-roundtrip.test.ts +88 -0
  104. package/src/scenarios/webhook-receiver-adversarial.test.ts +210 -0
  105. package/src/scenarios/workflow-chain-expansion.test.ts +366 -0
  106. package/src/scenarios/workflow-chain-host-expansion.test.ts +202 -0
  107. package/src/scenarios/workflow-chain-pack-manifest-validation.test.ts +232 -0
  108. package/src/scenarios/workflow-chain-pack-signature-verification.test.ts +138 -0
  109. package/src/scenarios/workflow-chain-unresolvable-typeid.test.ts +170 -0
@@ -0,0 +1,185 @@
1
+ /**
2
+ * RFC 0026 — `provider.usage` event conformance.
3
+ *
4
+ * Verifies the new optional event type added to `RunEventType` per RFC
5
+ * 0026. The event MUST fire after every LLM provider invocation,
6
+ * carrying per-call token counts + optional cost estimate. Three
7
+ * describe blocks:
8
+ *
9
+ * 1. Advertisement shape (`capabilities.providerUsage` block).
10
+ * 2. Schema round-trip (positive + negative fixtures).
11
+ * 3. Event presence + shape via the test-only emit seam +
12
+ * event-log query seam (Thread E.1).
13
+ *
14
+ * Each describe block soft-skips when the host doesn't expose the
15
+ * relevant seam OR the matching capability isn't advertised.
16
+ *
17
+ * @see RFCS/0026-provider-usage-event.md
18
+ * @see schemas/run-event-payloads.schema.json#/$defs/providerUsage
19
+ * @see SECURITY/invariants.yaml#provider-usage-no-credential-leak
20
+ */
21
+
22
+ import { describe, it, expect } from 'vitest';
23
+ import Ajv2020 from 'ajv/dist/2020.js';
24
+ import { readFileSync } from 'node:fs';
25
+ import { join } from 'node:path';
26
+ import { driver } from '../lib/driver.js';
27
+ import { SCHEMAS_DIR } from '../lib/paths.js';
28
+ import { queryTestEvents, isEventLogSeamAvailable, resetTestSeam } from '../lib/event-log-query.js';
29
+
30
+ interface DiscoveryDoc {
31
+ capabilities?: {
32
+ providerUsage?: { supported?: boolean; costEstimates?: boolean; currency?: string };
33
+ };
34
+ }
35
+
36
+ async function readProviderUsageCap(): Promise<{ supported?: boolean; costEstimates?: boolean; currency?: string } | null> {
37
+ const res = await driver.get('/.well-known/openwop');
38
+ const body = res.json as DiscoveryDoc | undefined;
39
+ const cap = body?.capabilities?.providerUsage;
40
+ return cap && typeof cap === 'object' ? cap : null;
41
+ }
42
+
43
+ describe('provider-usage: capability advertisement (RFC 0026 §E)', () => {
44
+ it('capabilities.providerUsage is either absent or a well-formed object', async () => {
45
+ const cap = await readProviderUsageCap();
46
+ if (cap === null) return; // host doesn't advertise — skip
47
+ expect(
48
+ typeof cap.supported,
49
+ driver.describe('RFC 0026 §E', 'capabilities.providerUsage.supported MUST be a boolean when the block is present'),
50
+ ).toBe('boolean');
51
+ if (cap.costEstimates !== undefined) {
52
+ expect(
53
+ typeof cap.costEstimates,
54
+ driver.describe('RFC 0026 §E', 'capabilities.providerUsage.costEstimates MUST be a boolean when present'),
55
+ ).toBe('boolean');
56
+ }
57
+ if (cap.currency !== undefined) {
58
+ expect(
59
+ /^[A-Z]{3}$/.test(cap.currency),
60
+ driver.describe('RFC 0026 §E', 'capabilities.providerUsage.currency MUST be a 3-letter uppercase ISO 4217 code when present'),
61
+ ).toBe(true);
62
+ }
63
+ });
64
+ });
65
+
66
+ describe('provider-usage: schema round-trip (RFC 0026 §A)', () => {
67
+ const ajv = new Ajv2020({ strict: false, allErrors: true });
68
+ // Load full payloads schema so internal $refs resolve.
69
+ const payloadsDoc = JSON.parse(readFileSync(join(SCHEMAS_DIR, 'run-event-payloads.schema.json'), 'utf8')) as Record<string, unknown>;
70
+ const providerUsageDef = (payloadsDoc.$defs as Record<string, unknown>).providerUsage as Record<string, unknown>;
71
+ const validate = ajv.compile(providerUsageDef);
72
+
73
+ it('positive fixture validates', () => {
74
+ const ok = validate({
75
+ provider: 'anthropic',
76
+ model: 'claude-3-5-sonnet-20240620',
77
+ inputTokens: 145,
78
+ outputTokens: 312,
79
+ totalTokens: 457,
80
+ costEstimateUsd: 0.005115,
81
+ currency: 'USD',
82
+ cacheHit: false,
83
+ nodeId: 'chat-respond',
84
+ });
85
+ expect(ok, `positive fixture MUST validate; errors: ${JSON.stringify(validate.errors)}`).toBe(true);
86
+ });
87
+
88
+ it('negative fixture (missing required field) MUST be rejected', () => {
89
+ const ok = validate({
90
+ provider: 'anthropic',
91
+ model: 'claude-3-5-sonnet-20240620',
92
+ inputTokens: 100,
93
+ // outputTokens missing — required per §A
94
+ });
95
+ expect(
96
+ ok,
97
+ driver.describe('RFC 0026 §A', 'payload missing required `outputTokens` MUST fail schema validation'),
98
+ ).toBe(false);
99
+ });
100
+
101
+ it('negative fixture (additionalProperties — credentialRef leak) MUST be rejected', () => {
102
+ const ok = validate({
103
+ provider: 'anthropic',
104
+ model: 'claude-3-5-sonnet-20240620',
105
+ inputTokens: 100,
106
+ outputTokens: 50,
107
+ credentialRef: 'secret:tenant:byok-anthropic:v1', // banned — additionalProperties:false
108
+ });
109
+ expect(
110
+ ok,
111
+ driver.describe('RFC 0026 §D', 'additionalProperties:false MUST reject credentialRef-shaped fields per provider-usage-no-credential-leak'),
112
+ ).toBe(false);
113
+ });
114
+
115
+ it('negative fixture (non-integer token count) MUST be rejected', () => {
116
+ const ok = validate({
117
+ provider: 'openai',
118
+ model: 'gpt-4o',
119
+ inputTokens: 100.5, // non-integer
120
+ outputTokens: 50,
121
+ });
122
+ expect(ok, 'inputTokens MUST be integer per §A').toBe(false);
123
+ });
124
+ });
125
+
126
+ describe('provider-usage: event presence via emit-seam + event-log query (RFC 0026 §B)', () => {
127
+ it('emit-seam projects exactly one provider.usage event with required fields populated', async () => {
128
+ if (!(await isEventLogSeamAvailable())) return; // E.1 seam not exposed — soft-skip
129
+ const runId = `r-pu-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
130
+ const correlationId = `${runId}:node-1:turn-0:pu-1`;
131
+ const payload = {
132
+ provider: 'anthropic',
133
+ model: 'claude-3-5-sonnet-20240620',
134
+ inputTokens: 200,
135
+ outputTokens: 80,
136
+ totalTokens: 280,
137
+ nodeId: 'node-1',
138
+ };
139
+ const emit = await driver.post('/v1/host/sample/test/emit-provider-usage', { runId, payload, correlationId, nodeId: 'node-1' });
140
+ if (emit.status === 404) return; // emit seam not exposed
141
+ expect(emit.status).toBe(200);
142
+
143
+ const events = await queryTestEvents(runId, { type: 'provider.usage' });
144
+ if (!events.ok) return;
145
+ expect(
146
+ events.events.length,
147
+ driver.describe('RFC 0026 §B', 'emit-seam MUST project exactly one provider.usage event'),
148
+ ).toBe(1);
149
+ const e = events.events[0]!;
150
+ expect(e.payload.provider).toBe('anthropic');
151
+ expect(e.payload.model).toBe('claude-3-5-sonnet-20240620');
152
+ expect(e.payload.inputTokens).toBe(200);
153
+ expect(e.payload.outputTokens).toBe(80);
154
+ expect(e.causationId).toBe(correlationId);
155
+ expect(e.nodeId).toBe('node-1');
156
+ await resetTestSeam();
157
+ });
158
+
159
+ it('emit-seam refuses payloads containing credentialRef-shaped content (provider-usage-no-credential-leak invariant)', async () => {
160
+ if (!(await isEventLogSeamAvailable())) return;
161
+ const runId = `r-pu-leak-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
162
+ // Inject a credentialRef-shaped field via a synthetic payload that
163
+ // contains 'secret:' in a string field. The seam's defense-in-depth
164
+ // check MUST refuse — even though the production emitter's schema
165
+ // validation would also catch this via additionalProperties:false.
166
+ const res = await driver.post('/v1/host/sample/test/emit-provider-usage', {
167
+ runId,
168
+ payload: {
169
+ provider: 'anthropic',
170
+ model: 'claude-3-5-sonnet-20240620',
171
+ inputTokens: 100,
172
+ outputTokens: 50,
173
+ nodeId: 'secret:tenant:byok-anthropic:v1', // banned content
174
+ },
175
+ });
176
+ if (res.status === 404) return;
177
+ expect(
178
+ res.status,
179
+ driver.describe('SECURITY/invariants.yaml provider-usage-no-credential-leak', 'payload with credentialRef-shaped content MUST be refused'),
180
+ ).toBe(400);
181
+ const body = res.json as { error?: { code?: string } };
182
+ expect(body.error?.code).toBe('provider_usage_credential_leak');
183
+ await resetTestSeam();
184
+ });
185
+ });
@@ -0,0 +1,121 @@
1
+ /**
2
+ * queue-ack-nack-dlq — RFC 0017 advertisement-shape verification + behavioral placeholders.
3
+ *
4
+ * Status: ACTIVE (advertisement-shape). RFC 0017 promoted to `Active`
5
+ * 2026-05-17. The matching `capabilities.queueBus` block has landed in
6
+ * `schemas/capabilities.schema.json`. This scenario asserts the advertisement
7
+ * shape against any host that boots the conformance suite, and keeps the
8
+ * deeper behavioral assertions as `it.todo()` until a reference host wires
9
+ * a test seam.
10
+ *
11
+ * Summary: nack returns for redelivery; deadLetter routes to the configured DLQ.
12
+ *
13
+ * @see RFCS/0017-*.md
14
+ */
15
+
16
+ import { describe, it, expect } from 'vitest';
17
+ import { driver } from '../lib/driver.js';
18
+
19
+ interface DiscoveryDoc {
20
+ capabilities?: Record<string, unknown>;
21
+ }
22
+
23
+ async function readCap(): Promise<Record<string, unknown> | null> {
24
+ const res = await driver.get('/.well-known/openwop');
25
+ const body = res.json as DiscoveryDoc | undefined;
26
+ const top = body?.capabilities as Record<string, unknown> | undefined;
27
+ const final = (top && typeof top === 'object') ? (top as Record<string, unknown>)["queueBus"] : undefined;
28
+ return (final && typeof final === 'object' ? (final as Record<string, unknown>) : null);
29
+ }
30
+
31
+ describe('queue-ack-nack-dlq: advertisement shape (RFC 0017)', () => {
32
+ it('capabilities.queueBus is either absent or a well-formed object', async () => {
33
+ const cap = await readCap();
34
+ if (cap === null) return; // host doesn't advertise — skip
35
+ expect(
36
+ typeof cap.supported,
37
+ driver.describe(
38
+ 'capabilities.schema.json §queueBus',
39
+ 'capabilities.queueBus.supported MUST be a boolean when present',
40
+ ),
41
+ ).toBe('boolean');
42
+ });
43
+
44
+ it('deadLetterSupported is a boolean when set', async () => {
45
+ const cap = await readCap();
46
+ if (!cap || cap.supported !== true) return;
47
+ const subParts = ["deadLetterSupported"];
48
+ let sub: unknown = cap;
49
+ for (const p of subParts) {
50
+ if (sub && typeof sub === 'object') sub = (sub as Record<string, unknown>)[p];
51
+ else { sub = undefined; break; }
52
+ }
53
+ if (sub === undefined) return; // optional sub-field
54
+ expect(
55
+ typeof sub,
56
+ driver.describe(
57
+ 'RFC 0017 §A',
58
+ 'queueBus.deadLetterSupported MUST be boolean when present',
59
+ ),
60
+ ).toBe('boolean');
61
+ });
62
+ });
63
+
64
+ async function call(op: string, args: Record<string, unknown>) {
65
+ return driver.post('/v1/host/sample/test/surface', { tenantId: 'tenant-a', surface: 'queueBus', op, args });
66
+ }
67
+
68
+ describe('queue-ack-nack-dlq: behavioral (RFC 0017 §B point 2 — nack + DLQ)', () => {
69
+ it('nack(requeue=true) → message is redelivered on next consume with deliveryCount incremented', async () => {
70
+ const probe = await call('consume', { subject: '__probe__' });
71
+ if (probe.status === 404) return;
72
+ const subject = `q-nack-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
73
+ await call('publish', { subject, payload: { v: 'redeliver-me' } });
74
+
75
+ const first = await call('consume', { subject });
76
+ const firstBody = first.json as { deliveryToken?: string; payload?: unknown; deliveryCount?: number };
77
+ expect(firstBody.deliveryCount).toBe(1);
78
+ const nackRes = await call('nack', { deliveryToken: firstBody.deliveryToken, requeue: true });
79
+ expect((nackRes.json as { requeued?: boolean }).requeued).toBe(true);
80
+
81
+ const second = await call('consume', { subject });
82
+ const secondBody = second.json as { found?: boolean; payload?: unknown; deliveryCount?: number };
83
+ expect(
84
+ secondBody.found,
85
+ driver.describe('RFC 0017 §B point 2', 'nack(requeue=true) MUST make the message available to next consume'),
86
+ ).toBe(true);
87
+ expect(secondBody.payload).toEqual(firstBody.payload);
88
+ expect(
89
+ secondBody.deliveryCount,
90
+ driver.describe('RFC 0017 §B point 2', 'redelivered message MUST have incremented deliveryCount'),
91
+ ).toBe(2);
92
+ });
93
+
94
+ it('deadLetter → message appears on the <subject>.dlq subject; original subject is empty', async () => {
95
+ const probe = await call('consume', { subject: '__probe__' });
96
+ if (probe.status === 404) return;
97
+ const subject = `q-dlq-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
98
+ await call('publish', { subject, payload: { v: 'poison' } });
99
+
100
+ const consumed = await call('consume', { subject });
101
+ const deliveryToken = (consumed.json as { deliveryToken?: string }).deliveryToken;
102
+ const dlqRes = await call('deadLetter', { deliveryToken, reason: 'unparseable_payload' });
103
+ expect((dlqRes.json as { deadLettered?: boolean }).deadLettered).toBe(true);
104
+ const dlqSubject = (dlqRes.json as { dlqSubject?: string }).dlqSubject;
105
+ expect(dlqSubject).toBe(`${subject}.dlq`);
106
+
107
+ // Original subject MUST be empty now
108
+ const originalEmpty = await call('consume', { subject });
109
+ expect((originalEmpty.json as { found?: boolean }).found).toBe(false);
110
+
111
+ // DLQ MUST carry the message + the deadLetterReason
112
+ const dlqMsg = await call('consume', { subject: `${subject}.dlq` });
113
+ const dlqBody = dlqMsg.json as { found?: boolean; payload?: { original?: unknown; deadLetterReason?: string } };
114
+ expect(
115
+ dlqBody.found,
116
+ driver.describe('RFC 0017 §B point 2', 'deadLetter MUST route the message to the <subject>.dlq subject'),
117
+ ).toBe(true);
118
+ expect(dlqBody.payload?.deadLetterReason).toBe('unparseable_payload');
119
+ expect(dlqBody.payload?.original).toEqual({ v: 'poison' });
120
+ });
121
+ });
@@ -0,0 +1,66 @@
1
+ /**
2
+ * queue-cross-tenant-isolation — RFC 0017 §C + SECURITY/invariants.yaml
3
+ * `queue-cross-tenant-isolation`.
4
+ *
5
+ * Status: ACTIVE (advertisement + behavioral). Asserts that messages
6
+ * published under tenant A on topic T MUST NOT be consumed under tenant B
7
+ * on the same topic.
8
+ *
9
+ * @see RFCS/0017-host-queue-bus-capability.md
10
+ */
11
+
12
+ import { describe, it, expect } from 'vitest';
13
+ import { driver } from '../lib/driver.js';
14
+
15
+ interface DiscoveryDoc {
16
+ capabilities?: Record<string, unknown>;
17
+ }
18
+
19
+ async function readCap(): Promise<Record<string, unknown> | null> {
20
+ const res = await driver.get('/.well-known/openwop');
21
+ const body = res.json as DiscoveryDoc | undefined;
22
+ const top = body?.capabilities as Record<string, unknown> | undefined;
23
+ const final = (top && typeof top === 'object') ? (top as Record<string, unknown>)["queueBus"] : undefined;
24
+ return (final && typeof final === 'object' ? (final as Record<string, unknown>) : null);
25
+ }
26
+
27
+ async function call(tenantId: string, op: string, args: Record<string, unknown>) {
28
+ return driver.post('/v1/host/sample/test/surface', { tenantId, surface: 'queueBus', op, args });
29
+ }
30
+
31
+ describe('queue-cross-tenant-isolation: advertisement shape (RFC 0017)', () => {
32
+ it('capabilities.queueBus is either absent or a well-formed object', async () => {
33
+ const cap = await readCap();
34
+ if (cap === null) return;
35
+ expect(
36
+ typeof cap.supported,
37
+ driver.describe(
38
+ 'capabilities.schema.json §queueBus',
39
+ 'capabilities.queueBus.supported MUST be a boolean when present',
40
+ ),
41
+ ).toBe('boolean');
42
+ });
43
+ });
44
+
45
+ describe('queue-cross-tenant-isolation: behavioral (RFC 0017 §C)', () => {
46
+ it('publish under tenant A → consume under tenant B returns not-found', async () => {
47
+ const cap = await readCap();
48
+ if (!cap || cap.supported !== true) return;
49
+ const topic = `xtenant.${Date.now()}.${Math.random().toString(36).slice(2, 6)}`;
50
+
51
+ const pubRes = await call('tenant-a', 'publish', { topic, payload: { hello: 'A' } });
52
+ if (pubRes.status === 404) return;
53
+ expect(pubRes.status, 'publish MUST succeed').toBe(200);
54
+
55
+ const consRes = await call('tenant-b', 'consume', { topic, timeoutMs: 100 });
56
+ expect(consRes.status).toBe(200);
57
+ const body = consRes.json as { found?: boolean };
58
+ expect(
59
+ body.found,
60
+ driver.describe(
61
+ 'SECURITY/invariants.yaml queue-cross-tenant-isolation',
62
+ 'tenant B MUST NOT consume tenant A messages on the same topic',
63
+ ),
64
+ ).toBe(false);
65
+ });
66
+ });
@@ -0,0 +1,88 @@
1
+ /**
2
+ * queue-publish-consume-roundtrip — RFC 0017 advertisement-shape verification + behavioral placeholders.
3
+ *
4
+ * Status: ACTIVE (advertisement-shape). RFC 0017 promoted to `Active`
5
+ * 2026-05-17. The matching `capabilities.queueBus` block has landed in
6
+ * `schemas/capabilities.schema.json`. This scenario asserts the advertisement
7
+ * shape against any host that boots the conformance suite, and keeps the
8
+ * deeper behavioral assertions as `it.todo()` until a reference host wires
9
+ * a test seam.
10
+ *
11
+ * Summary: publish + consume + ack roundtrip.
12
+ *
13
+ * @see RFCS/0017-*.md
14
+ */
15
+
16
+ import { describe, it, expect } from 'vitest';
17
+ import { driver } from '../lib/driver.js';
18
+
19
+ interface DiscoveryDoc {
20
+ capabilities?: Record<string, unknown>;
21
+ }
22
+
23
+ async function readCap(): Promise<Record<string, unknown> | null> {
24
+ const res = await driver.get('/.well-known/openwop');
25
+ const body = res.json as DiscoveryDoc | undefined;
26
+ const top = body?.capabilities as Record<string, unknown> | undefined;
27
+ const final = (top && typeof top === 'object') ? (top as Record<string, unknown>)["queueBus"] : undefined;
28
+ return (final && typeof final === 'object' ? (final as Record<string, unknown>) : null);
29
+ }
30
+
31
+ describe('queue-publish-consume-roundtrip: advertisement shape (RFC 0017)', () => {
32
+ it('capabilities.queueBus is either absent or a well-formed object', async () => {
33
+ const cap = await readCap();
34
+ if (cap === null) return; // host doesn't advertise — skip
35
+ expect(
36
+ typeof cap.supported,
37
+ driver.describe(
38
+ 'capabilities.schema.json §queueBus',
39
+ 'capabilities.queueBus.supported MUST be a boolean when present',
40
+ ),
41
+ ).toBe('boolean');
42
+ });
43
+ });
44
+
45
+ async function call(op: string, args: Record<string, unknown>) {
46
+ return driver.post('/v1/host/sample/test/surface', { tenantId: 'tenant-a', surface: 'queueBus', op, args });
47
+ }
48
+
49
+ describe('queue-publish-consume-roundtrip: behavioral (RFC 0017 §B point 2)', () => {
50
+ it('publish → consume returns the same payload + subject', async () => {
51
+ const probe = await call('consume', { subject: '__probe__' });
52
+ if (probe.status === 404) return; // seam not exposed
53
+ const subject = `q-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
54
+ const payload = { event: 'order.created', orderId: 42 };
55
+ const pub = await call('publish', { subject, payload });
56
+ expect(pub.status).toBe(200);
57
+
58
+ const got = await call('consume', { subject });
59
+ expect(got.status).toBe(200);
60
+ const body = got.json as { found?: boolean; subject?: string; payload?: unknown; deliveryToken?: string };
61
+ expect(body.found, 'consume MUST find the just-published message').toBe(true);
62
+ expect(body.subject).toBe(subject);
63
+ expect(
64
+ body.payload,
65
+ driver.describe('RFC 0017 §B point 2', 'consume MUST return the exact published payload'),
66
+ ).toEqual(payload);
67
+ expect(typeof body.deliveryToken, 'consume MUST return a deliveryToken for ack/nack').toBe('string');
68
+ });
69
+
70
+ it('ack removes the message; subsequent consume on empty queue returns found:false', async () => {
71
+ const probe = await call('consume', { subject: '__probe__' });
72
+ if (probe.status === 404) return;
73
+ const subject = `q-ack-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
74
+ await call('publish', { subject, payload: { v: 1 } });
75
+ const got = await call('consume', { subject });
76
+ const deliveryToken = (got.json as { deliveryToken?: string }).deliveryToken;
77
+ const ackRes = await call('ack', { deliveryToken });
78
+ expect(ackRes.status).toBe(200);
79
+ expect((ackRes.json as { acked?: boolean }).acked).toBe(true);
80
+
81
+ const empty = await call('consume', { subject });
82
+ const emptyBody = empty.json as { found?: boolean };
83
+ expect(
84
+ emptyBody.found,
85
+ driver.describe('RFC 0017 §B point 2', 'consume after ack MUST surface as found:false'),
86
+ ).toBe(false);
87
+ });
88
+ });
@@ -1,35 +1,176 @@
1
1
  /**
2
- * Cross-host LLM cache-key parity (replay.md §"LLM cache-key recipe").
2
+ * LLM cache-key recipe — `replay.md §"LLM cache-key recipe"` §A + §B.
3
3
  *
4
- * Verifies that two OpenWOP-compliant hosts replaying the same LLM
5
- * provider request compute the same cache key. The recipe is normative
6
- * (replay.md §B): canonical JSON of `(provider, model, messages, tools,
7
- * temperature, topP, topK, responseFormat)` → SHA-256 → lowercase hex.
4
+ * Verifies that an OpenWOP host computes the LLM cache key per the
5
+ * normative recipe: SHA-256 over RFC 8785 JCS-canonicalized JSON of
6
+ * the closed set of recipe fields (`provider, model, messages, tools,
7
+ * temperature, topP, topK, responseFormat`).
8
8
  *
9
- * Status: PLACEHOLDER. As of 2026-05-11, neither reference host
10
- * (`examples/hosts/in-memory/`, `examples/hosts/sqlite/`) implements
11
- * LLM-calling nodes both execute only `core.noop` / `core.delay` /
12
- * `core.approvalGate` fixtures. This scenario lands as `it.todo()` so
13
- * the contract surface is tracked; assertions land when the first
14
- * reference host ships an LLM-call node.
9
+ * The single-host assertions drive the env-gated test seam at
10
+ * `POST /v1/host/sample/test/llm-cache-key` and recompute the expected
11
+ * key locally per the recipe, asserting equality. Non-recipe fields
12
+ * (`max_tokens`, `stop`, `stream`, `seed`, etc.) MUST NOT influence
13
+ * the key per §A.
15
14
  *
16
- * What the live scenario WILL exercise (when implemented):
17
- * 1. Boot host A against `OPENWOP_BASE_URL`.
18
- * 2. Boot host B against `OPENWOP_BASE_URL_B`.
19
- * 3. Submit the same workflow + inputs (an LLM-calling fixture).
20
- * 4. Read each host's emitted `node.completed.payload.cacheKey` (or
21
- * equivalent debug-bundle surface).
22
- * 5. Assert the two hex strings are equal.
15
+ * The cross-host assertion (two hosts compute the same key) stays
16
+ * deferred it requires `OPENWOP_BASE_URL_B` for a second-host probe,
17
+ * which is operator-supplied and outside this scenario file's scope.
23
18
  *
24
19
  * @see spec/v1/replay.md §"LLM cache-key recipe"
25
20
  */
26
21
 
27
- import { describe, it } from 'vitest';
22
+ import { describe, it, expect } from 'vitest';
23
+ import { createHash } from 'node:crypto';
24
+ import { driver } from '../lib/driver.js';
28
25
 
29
- describe('replay-llm-cache-key: cross-host determinism (placeholder)', () => {
30
- it.todo(
31
- 'two hosts replaying the same LLM provider request compute the same cache key (replay.md §D)',
32
- );
33
- it.todo('LLM cache key is computed via SHA-256 of canonical JSON per replay.md §B');
34
- it.todo('cache key omits non-recipe fields (max_tokens, stop, stream, seed, etc.) per replay.md §A');
26
+ /** Mirror of the reference impl's `canonicalize` so the conformance
27
+ * scenario can recompute the expected cache key locally and assert
28
+ * equality with what the host returns. RFC 8785 JCS-style:
29
+ * sorted-keys, no whitespace, preserve array order. */
30
+ function canonicalize(value: unknown): string {
31
+ if (value === null) return 'null';
32
+ if (typeof value === 'boolean' || typeof value === 'number') return JSON.stringify(value);
33
+ if (typeof value === 'string') return JSON.stringify(value);
34
+ if (Array.isArray(value)) return '[' + value.map((v) => canonicalize(v)).join(',') + ']';
35
+ if (typeof value === 'object') {
36
+ const obj = value as Record<string, unknown>;
37
+ const keys = Object.keys(obj).sort();
38
+ return '{' + keys.map((k) => `${JSON.stringify(k)}:${canonicalize(obj[k])}`).join(',') + '}';
39
+ }
40
+ return JSON.stringify(value);
41
+ }
42
+
43
+ function projectRecipe(raw: Record<string, unknown>): Record<string, unknown> {
44
+ const out: Record<string, unknown> = { provider: raw.provider, model: raw.model, messages: raw.messages };
45
+ if (Array.isArray(raw.tools) && raw.tools.length > 0) {
46
+ out.tools = [...(raw.tools as Array<{ name: string }>)].sort((a, b) => a.name.localeCompare(b.name));
47
+ }
48
+ if (typeof raw.temperature === 'number') out.temperature = raw.temperature;
49
+ if (typeof raw.topP === 'number') out.topP = raw.topP;
50
+ if (typeof raw.topK === 'number') out.topK = raw.topK;
51
+ if (raw.responseFormat && typeof raw.responseFormat === 'object') out.responseFormat = raw.responseFormat;
52
+ return out;
53
+ }
54
+
55
+ function expectedCacheKey(input: Record<string, unknown>): string {
56
+ return createHash('sha256').update(canonicalize(projectRecipe(input)), 'utf8').digest('hex');
57
+ }
58
+
59
+ async function callSeam(input: Record<string, unknown>): Promise<{ status: number; cacheKey?: string }> {
60
+ const res = await driver.post('/v1/host/sample/test/llm-cache-key', input);
61
+ const cacheKey = (res.json as { cacheKey?: string }).cacheKey;
62
+ return cacheKey !== undefined ? { status: res.status, cacheKey } : { status: res.status };
63
+ }
64
+
65
+ describe('replay-llm-cache-key: SHA-256-over-JCS recipe (replay.md §B)', () => {
66
+ it('host cache key MUST equal locally-recomputed SHA-256 over canonical JSON', async () => {
67
+ const input = {
68
+ provider: 'anthropic',
69
+ model: 'claude-3-5-sonnet-20240620',
70
+ messages: [
71
+ { role: 'system' as const, content: 'You are a helpful assistant.' },
72
+ { role: 'user' as const, content: 'What is 2+2?' },
73
+ ],
74
+ temperature: 0.7,
75
+ };
76
+ const result = await callSeam(input);
77
+ if (result.status === 404) return; // seam not exposed
78
+ expect(result.status).toBe(200);
79
+ expect(
80
+ result.cacheKey,
81
+ driver.describe('replay.md §B', 'host cache key MUST be lowercase-hex SHA-256 of the canonical recipe JSON'),
82
+ ).toBe(expectedCacheKey(input));
83
+ });
84
+
85
+ it('cache key MUST be 64 lowercase-hex characters (SHA-256 output shape)', async () => {
86
+ const result = await callSeam({
87
+ provider: 'openai',
88
+ model: 'gpt-4',
89
+ messages: [{ role: 'user', content: 'hi' }],
90
+ });
91
+ if (result.status === 404) return;
92
+ expect(result.cacheKey).toMatch(/^[0-9a-f]{64}$/);
93
+ });
94
+ });
95
+
96
+ describe('replay-llm-cache-key: non-recipe fields are EXCLUDED (replay.md §A)', () => {
97
+ it('max_tokens / stop / stream / seed / metadata / user MUST NOT influence the cache key', async () => {
98
+ const base = {
99
+ provider: 'openai',
100
+ model: 'gpt-4',
101
+ messages: [{ role: 'user', content: 'unit test' }],
102
+ temperature: 0.5,
103
+ };
104
+ const baseResult = await callSeam(base);
105
+ if (baseResult.status === 404) return;
106
+
107
+ // All these non-recipe fields MUST NOT affect the cache key per §A.
108
+ const noisy = {
109
+ ...base,
110
+ max_tokens: 1000,
111
+ stop: ['STOP'],
112
+ stream: true,
113
+ seed: 42,
114
+ metadata: { traceId: 'abcd' },
115
+ user: 'unit-test-user',
116
+ };
117
+ const noisyResult = await callSeam(noisy);
118
+ expect(
119
+ noisyResult.cacheKey,
120
+ driver.describe(
121
+ 'replay.md §A',
122
+ 'cache key MUST be invariant under non-recipe field changes (max_tokens, stop, stream, seed, metadata, user)',
123
+ ),
124
+ ).toBe(baseResult.cacheKey);
125
+ });
126
+
127
+ it('changing a recipe field (temperature) MUST yield a different cache key', async () => {
128
+ const baseInput = {
129
+ provider: 'openai',
130
+ model: 'gpt-4',
131
+ messages: [{ role: 'user', content: 'diversity-probe' }],
132
+ temperature: 0.0,
133
+ };
134
+ const hotInput = { ...baseInput, temperature: 1.0 };
135
+ const baseResult = await callSeam(baseInput);
136
+ if (baseResult.status === 404) return;
137
+ const hotResult = await callSeam(hotInput);
138
+ expect(
139
+ baseResult.cacheKey === hotResult.cacheKey,
140
+ driver.describe('replay.md §A', 'changing a recipe field MUST yield a different cache key (no false collisions)'),
141
+ ).toBe(false);
142
+ });
143
+ });
144
+
145
+ describe('replay-llm-cache-key: cross-host parity (replay.md §D)', () => {
146
+ it('two hosts compute the same cache key for the same input (when OPENWOP_BASE_URL_B is configured)', async () => {
147
+ const otherBaseUrl = process.env.OPENWOP_BASE_URL_B;
148
+ if (!otherBaseUrl || otherBaseUrl.length === 0) return; // second host not configured — soft-skip
149
+ const input = {
150
+ provider: 'anthropic',
151
+ model: 'claude-3-5-sonnet-20240620',
152
+ messages: [
153
+ { role: 'system' as const, content: 'cross-host parity probe' },
154
+ { role: 'user' as const, content: 'compute the same key' },
155
+ ],
156
+ temperature: 0.5,
157
+ };
158
+ const a = await callSeam(input);
159
+ if (a.status === 404) return; // host A doesn't expose the seam
160
+ const otherApiKey = process.env.OPENWOP_API_KEY_B ?? process.env.OPENWOP_API_KEY ?? '';
161
+ // Issue the second probe directly via fetch since the driver is bound to
162
+ // OPENWOP_BASE_URL. Authorization mirrors the suite's default.
163
+ const resB = await fetch(`${otherBaseUrl.replace(/\/$/, '')}/v1/host/sample/test/llm-cache-key`, {
164
+ method: 'POST',
165
+ headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${otherApiKey}` },
166
+ body: JSON.stringify(input),
167
+ });
168
+ if (resB.status === 404) return; // host B doesn't expose the seam
169
+ expect(resB.status).toBe(200);
170
+ const b = (await resB.json()) as { cacheKey?: string };
171
+ expect(
172
+ a.cacheKey,
173
+ driver.describe('replay.md §D', 'two compliant hosts MUST compute byte-identical cache keys for the same recipe input'),
174
+ ).toBe(b.cacheKey);
175
+ });
35
176
  });