@openwop/openwop-conformance 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. package/CHANGELOG.md +156 -1
  2. package/README.md +3 -2
  3. package/api/asyncapi.yaml +8 -0
  4. package/api/openapi.yaml +371 -1
  5. package/api/redocly.yaml +15 -0
  6. package/coverage.md +26 -5
  7. package/fixtures/conformance-agent-reasoning-streaming.json +37 -0
  8. package/fixtures/conformance-dispatch-cancellable-child.json +27 -0
  9. package/fixtures/conformance-dispatch-deterministic-fail-child.json +30 -0
  10. package/fixtures/conformance-dispatch-input-mapping-no-default.json +49 -0
  11. package/fixtures/conformance-dispatch-per-worker-override.json +59 -0
  12. package/fixtures/conformance-envelope-nl-to-format-engaged.json +41 -0
  13. package/fixtures/conformance-envelope-recovery-applied.json +39 -0
  14. package/fixtures/conformance-envelope-refusal.json +38 -0
  15. package/fixtures/conformance-envelope-retry-attempted.json +39 -0
  16. package/fixtures/conformance-envelope-retry-exhausted.json +38 -0
  17. package/fixtures/conformance-envelope-truncated.json +39 -0
  18. package/fixtures/conformance-envelope-truncation-cap-exhaustion.json +39 -0
  19. package/fixtures/conformance-model-capability-insufficient.json +25 -0
  20. package/fixtures/conformance-multi-agent-confidence-escalation.json +49 -0
  21. package/fixtures/conformance-multi-agent-handoff-child.json +27 -0
  22. package/fixtures/conformance-multi-agent-handoff.json +49 -0
  23. package/fixtures/conformance-prompt-all-four-kinds.json +39 -0
  24. package/fixtures/conformance-prompt-end-to-end.json +33 -0
  25. package/fixtures/conformance-subworkflow-input-mapping-no-default.json +33 -0
  26. package/fixtures/conformance-subworkflow-mid-run-mutation-child.json +31 -0
  27. package/fixtures/conformance-subworkflow-mid-run-mutation.json +33 -0
  28. package/fixtures/openwop-smoke-cost-emit.json +37 -0
  29. package/fixtures/prompt-templates/conformance-prompt-few-shot-2.json +14 -0
  30. package/fixtures/prompt-templates/conformance-prompt-few-shot.json +14 -0
  31. package/fixtures/prompt-templates/conformance-prompt-schema-hint.json +14 -0
  32. package/fixtures/prompt-templates/conformance-prompt-secret-redaction.json +23 -0
  33. package/fixtures/prompt-templates/conformance-prompt-trust-marker.json +23 -0
  34. package/fixtures/prompt-templates/conformance-prompt-writer-system.json +15 -0
  35. package/fixtures/prompt-templates/conformance-prompt-writer-user.json +15 -0
  36. package/fixtures.md +45 -0
  37. package/package.json +1 -1
  38. package/schemas/README.md +5 -0
  39. package/schemas/agent-manifest.schema.json +16 -0
  40. package/schemas/capabilities.schema.json +390 -0
  41. package/schemas/core-conformance-mock-agent-config.schema.json +5 -0
  42. package/schemas/envelopes/clarification.request.schema.json +9 -0
  43. package/schemas/envelopes/error.schema.json +4 -0
  44. package/schemas/envelopes/schema.request.schema.json +4 -0
  45. package/schemas/envelopes/schema.response.schema.json +1 -1
  46. package/schemas/node-pack-manifest.schema.json +28 -0
  47. package/schemas/orchestrator-decision.schema.json +12 -0
  48. package/schemas/prompt-kind.schema.json +8 -0
  49. package/schemas/prompt-pack-manifest.schema.json +80 -0
  50. package/schemas/prompt-ref.schema.json +40 -0
  51. package/schemas/prompt-template.schema.json +149 -0
  52. package/schemas/registry-version-manifest.schema.json +5 -0
  53. package/schemas/run-ancestry-response.schema.json +54 -0
  54. package/schemas/run-event-payloads.schema.json +513 -11
  55. package/schemas/run-event.schema.json +17 -1
  56. package/schemas/run-snapshot.schema.json +3 -2
  57. package/schemas/workflow-definition.schema.json +19 -1
  58. package/src/lib/driver.ts +15 -0
  59. package/src/lib/env.ts +51 -0
  60. package/src/lib/event-log-query.ts +62 -0
  61. package/src/lib/fixtures.ts +38 -1
  62. package/src/lib/host-toggle.ts +54 -0
  63. package/src/lib/llm-cache-key-recipe.ts +68 -0
  64. package/src/lib/multi-agent-capabilities.ts +10 -0
  65. package/src/lib/otel-scrape.ts +59 -0
  66. package/src/scenarios/agentReasoningStreaming.test.ts +193 -0
  67. package/src/scenarios/aiEnvelope.capBreached.test.ts +97 -9
  68. package/src/scenarios/aiEnvelope.contractRefusal.test.ts +224 -15
  69. package/src/scenarios/aiEnvelope.correlationReplay.test.ts +257 -25
  70. package/src/scenarios/aiEnvelope.redaction.test.ts +210 -29
  71. package/src/scenarios/aiEnvelope.schemaDrift.test.ts +163 -24
  72. package/src/scenarios/aiEnvelope.trustBoundaryPropagation.test.ts +262 -12
  73. package/src/scenarios/aiEnvelope.universalKinds.test.ts +107 -16
  74. package/src/scenarios/blob-presign-expiry.test.ts +42 -9
  75. package/src/scenarios/blob-roundtrip.test.ts +0 -0
  76. package/src/scenarios/cache-ttl-expiry.test.ts +34 -8
  77. package/src/scenarios/cost-attribution.test.ts +124 -11
  78. package/src/scenarios/cross-engine-append-ordering.test.ts +99 -0
  79. package/src/scenarios/cross-host-ancestry-endpoint.test.ts +136 -0
  80. package/src/scenarios/cross-host-causation-shape.test.ts +117 -0
  81. package/src/scenarios/cross-host-traceparent-propagation.test.ts +60 -0
  82. package/src/scenarios/dispatch-cross-worker-handoff.test.ts +34 -3
  83. package/src/scenarios/dispatch-input-mapping.test.ts +75 -6
  84. package/src/scenarios/dispatch-output-mapping.test.ts +96 -6
  85. package/src/scenarios/envelope-completion-distinguishes-truncation.test.ts +223 -0
  86. package/src/scenarios/envelope-nl-to-format-engaged.test.ts +152 -0
  87. package/src/scenarios/envelope-reasoning-secret-redaction.test.ts +343 -0
  88. package/src/scenarios/envelope-reasoning-shape.test.ts +190 -0
  89. package/src/scenarios/envelope-recovery-applied.test.ts +229 -0
  90. package/src/scenarios/envelope-refusal-shape.test.ts +289 -0
  91. package/src/scenarios/envelope-retry-attempted.test.ts +258 -0
  92. package/src/scenarios/envelope-retry-exhausted.test.ts +168 -0
  93. package/src/scenarios/envelope-tier-one-subset-static.test.ts +229 -0
  94. package/src/scenarios/envelope-truncated.test.ts +136 -0
  95. package/src/scenarios/envelope-truncation-cap-exhaustion.test.ts +144 -0
  96. package/src/scenarios/envelope-variant-discriminator-static.test.ts +152 -0
  97. package/src/scenarios/fixtures-gating.test.ts +139 -1
  98. package/src/scenarios/fixtures-valid.test.ts +123 -15
  99. package/src/scenarios/kv-ttl-expiry.test.ts +40 -9
  100. package/src/scenarios/model-capability-insufficient.test.ts +221 -0
  101. package/src/scenarios/model-capability-substituted.test.ts +203 -0
  102. package/src/scenarios/multi-agent-confidence-escalation.test.ts +164 -0
  103. package/src/scenarios/multi-agent-handoff-state-machine.test.ts +167 -0
  104. package/src/scenarios/multi-agent-memory-lifecycle.test.ts +124 -0
  105. package/src/scenarios/multi-region-idempotency.test.ts +58 -0
  106. package/src/scenarios/node-module-required-capabilities-shape.test.ts +185 -0
  107. package/src/scenarios/otel-trace-propagation-subworkflow.test.ts +19 -0
  108. package/src/scenarios/pack-registry-publish.test.ts +231 -51
  109. package/src/scenarios/prompt-all-four-kinds-events.test.ts +198 -0
  110. package/src/scenarios/prompt-composed-secret-redaction.test.ts +178 -0
  111. package/src/scenarios/prompt-composed-trust-marker.test.ts +165 -0
  112. package/src/scenarios/prompt-end-to-end-events.test.ts +202 -0
  113. package/src/scenarios/prompt-list-and-fetch.test.ts +207 -0
  114. package/src/scenarios/prompt-mutable-lifecycle.test.ts +216 -0
  115. package/src/scenarios/prompt-pack-install.test.ts +187 -0
  116. package/src/scenarios/prompt-render-deterministic.test.ts +240 -0
  117. package/src/scenarios/prompt-resolution-chain-agent-intrinsic.test.ts +140 -0
  118. package/src/scenarios/prompt-resolution-chain-fallback-cascade.test.ts +172 -0
  119. package/src/scenarios/prompt-resolution-chain-node-wins.test.ts +144 -0
  120. package/src/scenarios/prompt-template-shape.test.ts +359 -0
  121. package/src/scenarios/provider-usage.test.ts +185 -0
  122. package/src/scenarios/queue-ack-nack-dlq.test.ts +64 -10
  123. package/src/scenarios/queue-publish-consume-roundtrip.test.ts +50 -10
  124. package/src/scenarios/replay-divergence-at-refusal.test.ts +134 -0
  125. package/src/scenarios/replay-llm-cache-key-portable.test.ts +197 -0
  126. package/src/scenarios/replay-llm-cache-key.test.ts +127 -25
  127. package/src/scenarios/replay-observable-sequence-determinism.test.ts +80 -0
  128. package/src/scenarios/sandbox-capability-gate-respected.test.ts +31 -0
  129. package/src/scenarios/sandbox-memory-cap.test.ts +61 -0
  130. package/src/scenarios/sandbox-no-cross-pack-mutation.test.ts +35 -0
  131. package/src/scenarios/sandbox-no-host-env-leak.test.ts +38 -0
  132. package/src/scenarios/sandbox-no-host-fs-escape.test.ts +91 -0
  133. package/src/scenarios/sandbox-no-host-process-escape.test.ts +30 -0
  134. package/src/scenarios/sandbox-no-network-escape.test.ts +49 -0
  135. package/src/scenarios/sandbox-timeout-cap.test.ts +61 -0
  136. package/src/scenarios/search-bm25-roundtrip.test.ts +54 -9
  137. package/src/scenarios/spec-corpus-validity.test.ts +34 -6
  138. package/src/scenarios/sql-transaction-atomicity.test.ts +37 -8
  139. package/src/scenarios/stream-subscribe-from-beginning.test.ts +46 -9
  140. package/src/scenarios/subworkflow-input-mapping.test.ts +146 -10
  141. package/src/scenarios/table-cursor-pagination.test.ts +47 -9
  142. package/src/scenarios/table-schema-enforcement.test.ts +46 -9
  143. package/src/scenarios/vector-knn-roundtrip.test.ts +50 -10
  144. package/src/scenarios/workflow-chain-host-expansion.test.ts +202 -0
@@ -0,0 +1,185 @@
1
+ /**
2
+ * RFC 0026 — `provider.usage` event conformance.
3
+ *
4
+ * Verifies the new optional event type added to `RunEventType` per RFC
5
+ * 0026. The event MUST fire after every LLM provider invocation,
6
+ * carrying per-call token counts + optional cost estimate. Three
7
+ * describe blocks:
8
+ *
9
+ * 1. Advertisement shape (`capabilities.providerUsage` block).
10
+ * 2. Schema round-trip (positive + negative fixtures).
11
+ * 3. Event presence + shape via the test-only emit seam +
12
+ * event-log query seam (Thread E.1).
13
+ *
14
+ * Each describe block soft-skips when the host doesn't expose the
15
+ * relevant seam OR the matching capability isn't advertised.
16
+ *
17
+ * @see RFCS/0026-provider-usage-event.md
18
+ * @see schemas/run-event-payloads.schema.json#/$defs/providerUsage
19
+ * @see SECURITY/invariants.yaml#provider-usage-no-credential-leak
20
+ */
21
+
22
+ import { describe, it, expect } from 'vitest';
23
+ import Ajv2020 from 'ajv/dist/2020.js';
24
+ import { readFileSync } from 'node:fs';
25
+ import { join } from 'node:path';
26
+ import { driver } from '../lib/driver.js';
27
+ import { SCHEMAS_DIR } from '../lib/paths.js';
28
+ import { queryTestEvents, isEventLogSeamAvailable, resetTestSeam } from '../lib/event-log-query.js';
29
+
30
+ interface DiscoveryDoc {
31
+ capabilities?: {
32
+ providerUsage?: { supported?: boolean; costEstimates?: boolean; currency?: string };
33
+ };
34
+ }
35
+
36
+ async function readProviderUsageCap(): Promise<{ supported?: boolean; costEstimates?: boolean; currency?: string } | null> {
37
+ const res = await driver.get('/.well-known/openwop');
38
+ const body = res.json as DiscoveryDoc | undefined;
39
+ const cap = body?.capabilities?.providerUsage;
40
+ return cap && typeof cap === 'object' ? cap : null;
41
+ }
42
+
43
+ describe('provider-usage: capability advertisement (RFC 0026 §E)', () => {
44
+ it('capabilities.providerUsage is either absent or a well-formed object', async () => {
45
+ const cap = await readProviderUsageCap();
46
+ if (cap === null) return; // host doesn't advertise — skip
47
+ expect(
48
+ typeof cap.supported,
49
+ driver.describe('RFC 0026 §E', 'capabilities.providerUsage.supported MUST be a boolean when the block is present'),
50
+ ).toBe('boolean');
51
+ if (cap.costEstimates !== undefined) {
52
+ expect(
53
+ typeof cap.costEstimates,
54
+ driver.describe('RFC 0026 §E', 'capabilities.providerUsage.costEstimates MUST be a boolean when present'),
55
+ ).toBe('boolean');
56
+ }
57
+ if (cap.currency !== undefined) {
58
+ expect(
59
+ /^[A-Z]{3}$/.test(cap.currency),
60
+ driver.describe('RFC 0026 §E', 'capabilities.providerUsage.currency MUST be a 3-letter uppercase ISO 4217 code when present'),
61
+ ).toBe(true);
62
+ }
63
+ });
64
+ });
65
+
66
+ describe('provider-usage: schema round-trip (RFC 0026 §A)', () => {
67
+ const ajv = new Ajv2020({ strict: false, allErrors: true });
68
+ // Load full payloads schema so internal $refs resolve.
69
+ const payloadsDoc = JSON.parse(readFileSync(join(SCHEMAS_DIR, 'run-event-payloads.schema.json'), 'utf8')) as Record<string, unknown>;
70
+ const providerUsageDef = (payloadsDoc.$defs as Record<string, unknown>).providerUsage as Record<string, unknown>;
71
+ const validate = ajv.compile(providerUsageDef);
72
+
73
+ it('positive fixture validates', () => {
74
+ const ok = validate({
75
+ provider: 'anthropic',
76
+ model: 'claude-3-5-sonnet-20240620',
77
+ inputTokens: 145,
78
+ outputTokens: 312,
79
+ totalTokens: 457,
80
+ costEstimateUsd: 0.005115,
81
+ currency: 'USD',
82
+ cacheHit: false,
83
+ nodeId: 'chat-respond',
84
+ });
85
+ expect(ok, `positive fixture MUST validate; errors: ${JSON.stringify(validate.errors)}`).toBe(true);
86
+ });
87
+
88
+ it('negative fixture (missing required field) MUST be rejected', () => {
89
+ const ok = validate({
90
+ provider: 'anthropic',
91
+ model: 'claude-3-5-sonnet-20240620',
92
+ inputTokens: 100,
93
+ // outputTokens missing — required per §A
94
+ });
95
+ expect(
96
+ ok,
97
+ driver.describe('RFC 0026 §A', 'payload missing required `outputTokens` MUST fail schema validation'),
98
+ ).toBe(false);
99
+ });
100
+
101
+ it('negative fixture (additionalProperties — credentialRef leak) MUST be rejected', () => {
102
+ const ok = validate({
103
+ provider: 'anthropic',
104
+ model: 'claude-3-5-sonnet-20240620',
105
+ inputTokens: 100,
106
+ outputTokens: 50,
107
+ credentialRef: 'secret:tenant:byok-anthropic:v1', // banned — additionalProperties:false
108
+ });
109
+ expect(
110
+ ok,
111
+ driver.describe('RFC 0026 §D', 'additionalProperties:false MUST reject credentialRef-shaped fields per provider-usage-no-credential-leak'),
112
+ ).toBe(false);
113
+ });
114
+
115
+ it('negative fixture (non-integer token count) MUST be rejected', () => {
116
+ const ok = validate({
117
+ provider: 'openai',
118
+ model: 'gpt-4o',
119
+ inputTokens: 100.5, // non-integer
120
+ outputTokens: 50,
121
+ });
122
+ expect(ok, 'inputTokens MUST be integer per §A').toBe(false);
123
+ });
124
+ });
125
+
126
+ describe('provider-usage: event presence via emit-seam + event-log query (RFC 0026 §B)', () => {
127
+ it('emit-seam projects exactly one provider.usage event with required fields populated', async () => {
128
+ if (!(await isEventLogSeamAvailable())) return; // E.1 seam not exposed — soft-skip
129
+ const runId = `r-pu-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
130
+ const correlationId = `${runId}:node-1:turn-0:pu-1`;
131
+ const payload = {
132
+ provider: 'anthropic',
133
+ model: 'claude-3-5-sonnet-20240620',
134
+ inputTokens: 200,
135
+ outputTokens: 80,
136
+ totalTokens: 280,
137
+ nodeId: 'node-1',
138
+ };
139
+ const emit = await driver.post('/v1/host/sample/test/emit-provider-usage', { runId, payload, correlationId, nodeId: 'node-1' });
140
+ if (emit.status === 404) return; // emit seam not exposed
141
+ expect(emit.status).toBe(200);
142
+
143
+ const events = await queryTestEvents(runId, { type: 'provider.usage' });
144
+ if (!events.ok) return;
145
+ expect(
146
+ events.events.length,
147
+ driver.describe('RFC 0026 §B', 'emit-seam MUST project exactly one provider.usage event'),
148
+ ).toBe(1);
149
+ const e = events.events[0]!;
150
+ expect(e.payload.provider).toBe('anthropic');
151
+ expect(e.payload.model).toBe('claude-3-5-sonnet-20240620');
152
+ expect(e.payload.inputTokens).toBe(200);
153
+ expect(e.payload.outputTokens).toBe(80);
154
+ expect(e.causationId).toBe(correlationId);
155
+ expect(e.nodeId).toBe('node-1');
156
+ await resetTestSeam();
157
+ });
158
+
159
+ it('emit-seam refuses payloads containing credentialRef-shaped content (provider-usage-no-credential-leak invariant)', async () => {
160
+ if (!(await isEventLogSeamAvailable())) return;
161
+ const runId = `r-pu-leak-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
162
+ // Inject a credentialRef-shaped field via a synthetic payload that
163
+ // contains 'secret:' in a string field. The seam's defense-in-depth
164
+ // check MUST refuse — even though the production emitter's schema
165
+ // validation would also catch this via additionalProperties:false.
166
+ const res = await driver.post('/v1/host/sample/test/emit-provider-usage', {
167
+ runId,
168
+ payload: {
169
+ provider: 'anthropic',
170
+ model: 'claude-3-5-sonnet-20240620',
171
+ inputTokens: 100,
172
+ outputTokens: 50,
173
+ nodeId: 'secret:tenant:byok-anthropic:v1', // banned content
174
+ },
175
+ });
176
+ if (res.status === 404) return;
177
+ expect(
178
+ res.status,
179
+ driver.describe('SECURITY/invariants.yaml provider-usage-no-credential-leak', 'payload with credentialRef-shaped content MUST be refused'),
180
+ ).toBe(400);
181
+ const body = res.json as { error?: { code?: string } };
182
+ expect(body.error?.code).toBe('provider_usage_credential_leak');
183
+ await resetTestSeam();
184
+ });
185
+ });
@@ -1,12 +1,12 @@
1
1
  /**
2
- * queue-ack-nack-dlq — RFC 0017 advertisement-shape verification + behavioral placeholders.
2
+ * queue-ack-nack-dlq — RFC 0017 advertisement-shape verification + behavioral roundtrip.
3
3
  *
4
- * Status: ACTIVE (advertisement-shape). RFC 0017 promoted to `Active`
5
- * 2026-05-17. The matching `capabilities.queueBus` block has landed in
6
- * `schemas/capabilities.schema.json`. This scenario asserts the advertisement
7
- * shape against any host that boots the conformance suite, and keeps the
8
- * deeper behavioral assertions as `it.todo()` until a reference host wires
9
- * a test seam.
4
+ * Status: ACTIVE (advertisement-shape + behavioral). RFC 0017 promoted to
5
+ * `Active` 2026-05-17. The matching `capabilities.queueBus` block has
6
+ * landed in `schemas/capabilities.schema.json`. This scenario asserts the
7
+ * advertisement shape against any host that boots the conformance suite, and
8
+ * exercises the behavioral surface through the `/v1/host/sample/test/surface`
9
+ * seam (soft-skip with HTTP 404 on hosts that don't expose it).
10
10
  *
11
11
  * Summary: nack returns for redelivery; deadLetter routes to the configured DLQ.
12
12
  *
@@ -61,7 +61,61 @@ describe('queue-ack-nack-dlq: advertisement shape (RFC 0017)', () => {
61
61
  });
62
62
  });
63
63
 
64
- describe('queue-ack-nack-dlq: behavioral assertions (placeholders need host test seam)', () => {
65
- it.todo("nack(requeue=true) message is redelivered on next consume");
66
- it.todo("deadLetter → message appears on the configured DLQ");
64
+ async function call(op: string, args: Record<string, unknown>) {
65
+ return driver.post('/v1/host/sample/test/surface', { tenantId: 'tenant-a', surface: 'queueBus', op, args });
66
+ }
67
+
68
+ describe('queue-ack-nack-dlq: behavioral (RFC 0017 §B point 2 — nack + DLQ)', () => {
69
+ it('nack(requeue=true) → message is redelivered on next consume with deliveryCount incremented', async () => {
70
+ const probe = await call('consume', { subject: '__probe__' });
71
+ if (probe.status === 404) return;
72
+ const subject = `q-nack-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
73
+ await call('publish', { subject, payload: { v: 'redeliver-me' } });
74
+
75
+ const first = await call('consume', { subject });
76
+ const firstBody = first.json as { deliveryToken?: string; payload?: unknown; deliveryCount?: number };
77
+ expect(firstBody.deliveryCount).toBe(1);
78
+ const nackRes = await call('nack', { deliveryToken: firstBody.deliveryToken, requeue: true });
79
+ expect((nackRes.json as { requeued?: boolean }).requeued).toBe(true);
80
+
81
+ const second = await call('consume', { subject });
82
+ const secondBody = second.json as { found?: boolean; payload?: unknown; deliveryCount?: number };
83
+ expect(
84
+ secondBody.found,
85
+ driver.describe('RFC 0017 §B point 2', 'nack(requeue=true) MUST make the message available to next consume'),
86
+ ).toBe(true);
87
+ expect(secondBody.payload).toEqual(firstBody.payload);
88
+ expect(
89
+ secondBody.deliveryCount,
90
+ driver.describe('RFC 0017 §B point 2', 'redelivered message MUST have incremented deliveryCount'),
91
+ ).toBe(2);
92
+ });
93
+
94
+ it('deadLetter → message appears on the <subject>.dlq subject; original subject is empty', async () => {
95
+ const probe = await call('consume', { subject: '__probe__' });
96
+ if (probe.status === 404) return;
97
+ const subject = `q-dlq-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
98
+ await call('publish', { subject, payload: { v: 'poison' } });
99
+
100
+ const consumed = await call('consume', { subject });
101
+ const deliveryToken = (consumed.json as { deliveryToken?: string }).deliveryToken;
102
+ const dlqRes = await call('deadLetter', { deliveryToken, reason: 'unparseable_payload' });
103
+ expect((dlqRes.json as { deadLettered?: boolean }).deadLettered).toBe(true);
104
+ const dlqSubject = (dlqRes.json as { dlqSubject?: string }).dlqSubject;
105
+ expect(dlqSubject).toBe(`${subject}.dlq`);
106
+
107
+ // Original subject MUST be empty now
108
+ const originalEmpty = await call('consume', { subject });
109
+ expect((originalEmpty.json as { found?: boolean }).found).toBe(false);
110
+
111
+ // DLQ MUST carry the message + the deadLetterReason
112
+ const dlqMsg = await call('consume', { subject: `${subject}.dlq` });
113
+ const dlqBody = dlqMsg.json as { found?: boolean; payload?: { original?: unknown; deadLetterReason?: string } };
114
+ expect(
115
+ dlqBody.found,
116
+ driver.describe('RFC 0017 §B point 2', 'deadLetter MUST route the message to the <subject>.dlq subject'),
117
+ ).toBe(true);
118
+ expect(dlqBody.payload?.deadLetterReason).toBe('unparseable_payload');
119
+ expect(dlqBody.payload?.original).toEqual({ v: 'poison' });
120
+ });
67
121
  });
@@ -1,12 +1,12 @@
1
1
  /**
2
- * queue-publish-consume-roundtrip — RFC 0017 advertisement-shape verification + behavioral placeholders.
2
+ * queue-publish-consume-roundtrip — RFC 0017 advertisement-shape verification + behavioral roundtrip.
3
3
  *
4
- * Status: ACTIVE (advertisement-shape). RFC 0017 promoted to `Active`
5
- * 2026-05-17. The matching `capabilities.queueBus` block has landed in
6
- * `schemas/capabilities.schema.json`. This scenario asserts the advertisement
7
- * shape against any host that boots the conformance suite, and keeps the
8
- * deeper behavioral assertions as `it.todo()` until a reference host wires
9
- * a test seam.
4
+ * Status: ACTIVE (advertisement-shape + behavioral). RFC 0017 promoted to
5
+ * `Active` 2026-05-17. The matching `capabilities.queueBus` block has
6
+ * landed in `schemas/capabilities.schema.json`. This scenario asserts the
7
+ * advertisement shape against any host that boots the conformance suite, and
8
+ * exercises the behavioral surface through the `/v1/host/sample/test/surface`
9
+ * seam (soft-skip with HTTP 404 on hosts that don't expose it).
10
10
  *
11
11
  * Summary: publish + consume + ack roundtrip.
12
12
  *
@@ -42,7 +42,47 @@ describe('queue-publish-consume-roundtrip: advertisement shape (RFC 0017)', () =
42
42
  });
43
43
  });
44
44
 
45
- describe('queue-publish-consume-roundtrip: behavioral assertions (placeholders need host test seam)', () => {
46
- it.todo("publish consume returns the message with the right payload + headers");
47
- it.todo("ack removes the message; subsequent consume returns not-found within timeout");
45
+ async function call(op: string, args: Record<string, unknown>) {
46
+ return driver.post('/v1/host/sample/test/surface', { tenantId: 'tenant-a', surface: 'queueBus', op, args });
47
+ }
48
+
49
+ describe('queue-publish-consume-roundtrip: behavioral (RFC 0017 §B point 2)', () => {
50
+ it('publish → consume returns the same payload + subject', async () => {
51
+ const probe = await call('consume', { subject: '__probe__' });
52
+ if (probe.status === 404) return; // seam not exposed
53
+ const subject = `q-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
54
+ const payload = { event: 'order.created', orderId: 42 };
55
+ const pub = await call('publish', { subject, payload });
56
+ expect(pub.status).toBe(200);
57
+
58
+ const got = await call('consume', { subject });
59
+ expect(got.status).toBe(200);
60
+ const body = got.json as { found?: boolean; subject?: string; payload?: unknown; deliveryToken?: string };
61
+ expect(body.found, 'consume MUST find the just-published message').toBe(true);
62
+ expect(body.subject).toBe(subject);
63
+ expect(
64
+ body.payload,
65
+ driver.describe('RFC 0017 §B point 2', 'consume MUST return the exact published payload'),
66
+ ).toEqual(payload);
67
+ expect(typeof body.deliveryToken, 'consume MUST return a deliveryToken for ack/nack').toBe('string');
68
+ });
69
+
70
+ it('ack removes the message; subsequent consume on empty queue returns found:false', async () => {
71
+ const probe = await call('consume', { subject: '__probe__' });
72
+ if (probe.status === 404) return;
73
+ const subject = `q-ack-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
74
+ await call('publish', { subject, payload: { v: 1 } });
75
+ const got = await call('consume', { subject });
76
+ const deliveryToken = (got.json as { deliveryToken?: string }).deliveryToken;
77
+ const ackRes = await call('ack', { deliveryToken });
78
+ expect(ackRes.status).toBe(200);
79
+ expect((ackRes.json as { acked?: boolean }).acked).toBe(true);
80
+
81
+ const empty = await call('consume', { subject });
82
+ const emptyBody = empty.json as { found?: boolean };
83
+ expect(
84
+ emptyBody.found,
85
+ driver.describe('RFC 0017 §B point 2', 'consume after ack MUST surface as found:false'),
86
+ ).toBe(false);
87
+ });
48
88
  });
@@ -0,0 +1,134 @@
1
+ /**
2
+ * replay-divergence-at-refusal — RFC 0041 §B behavioral assertion.
3
+ *
4
+ * Status: ACTIVE (capability-gated behavioral; soft-skips when no Phase 4
5
+ * host advertises the contract). Gated on
6
+ * `capabilities.multiAgent.executionModel.version >= 4` AND
7
+ * `capabilities.multiAgent.executionModel.replayDeterminism.refusalDivergenceEmission: true`.
8
+ *
9
+ * Asserts (behavioral, when a Phase 4 host advertises both gates):
10
+ *
11
+ * 1. When the original run obtained a valid LLM envelope but the replay
12
+ * gets a refusal, the host MUST emit a `replay.divergedAtRefusal`
13
+ * event AND fail the replay with `error.code:
14
+ * "replay_diverged_at_refusal"`. Silent substitution is non-conformant.
15
+ *
16
+ * 2. The emitted `replay.divergedAtRefusal` payload MUST carry
17
+ * `originalEnvelopeKind: "valid"` + `replayEnvelopeKind: "refusal"`
18
+ * (or the inverse for the original-refused case). The two MUST
19
+ * differ — otherwise there is no divergence to report.
20
+ *
21
+ * 3. The error envelope MAY carry `details.atSequence`, `details.nodeId`,
22
+ * `details.originalEnvelopeKind`, `details.replayEnvelopeKind` per
23
+ * `spec/v1/rest-endpoints.md` §"Common error codes" — when present,
24
+ * the values MUST be consistent with the emitted event.
25
+ *
26
+ * Driving the assertion requires a host-side test seam that can stage a
27
+ * mock provider returning a valid envelope on the original run and a
28
+ * refusal on the replay (or vice-versa). Reference workflow-engine ships
29
+ * a mock-AI provider (`OPENWOP_MULTI_AGENT_EXECUTION_MODEL=true`); the
30
+ * Phase 4 wiring extends it to honor a "refusal on replay" mode. Until
31
+ * that wiring lands, the assertion is surfaced as `it.todo` so test
32
+ * reporters track the gap rather than reporting a vacuous PASS.
33
+ *
34
+ * @see RFCS/0041-multi-agent-replay-under-nondeterminism.md §B
35
+ * @see spec/v1/replay.md §"Envelope-refusal recovery in replay (MAE-8 closure)"
36
+ * @see spec/v1/multi-agent-execution.md §"Phase 4 replay determinism"
37
+ * @see spec/v1/rest-endpoints.md §"Common error codes" — replay_diverged_at_refusal
38
+ * @see schemas/run-event-payloads.schema.json §replayDivergedAtRefusal
39
+ */
40
+
41
+ import { describe, it, expect } from 'vitest';
42
+ import { driver } from '../lib/driver.js';
43
+
44
+ const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
45
+
46
+ interface DiscoveryDoc {
47
+ capabilities?: {
48
+ multiAgent?: {
49
+ executionModel?: {
50
+ supported?: unknown;
51
+ version?: unknown;
52
+ replayDeterminism?: {
53
+ supported?: unknown;
54
+ refusalDivergenceEmission?: unknown;
55
+ };
56
+ };
57
+ };
58
+ };
59
+ }
60
+
61
+ async function readDiscovery(): Promise<DiscoveryDoc | null> {
62
+ try {
63
+ const res = await driver.get('/.well-known/openwop');
64
+ if (res.status !== 200) return null;
65
+ return res.json as DiscoveryDoc;
66
+ } catch { return null; }
67
+ }
68
+
69
+ describe.skipIf(HTTP_SKIP)('replay-divergence-at-refusal: advertisement shape (RFC 0041 §D)', () => {
70
+ it('replayDeterminism (when present) conforms to RFC 0041 §D', async (ctx) => {
71
+ const d = await readDiscovery();
72
+ if (d === null) {
73
+ ctx.skip();
74
+ return;
75
+ }
76
+ const rd = d.capabilities?.multiAgent?.executionModel?.replayDeterminism;
77
+ if (rd === undefined) {
78
+ ctx.skip(); // optional advertisement — host hasn't opted in
79
+ return;
80
+ }
81
+
82
+ expect(
83
+ typeof rd.supported,
84
+ driver.describe(
85
+ 'RFCS/0041-multi-agent-replay-under-nondeterminism.md §D',
86
+ 'replayDeterminism.supported MUST be boolean when present',
87
+ ),
88
+ ).toBe('boolean');
89
+
90
+ if (rd.supported === true) {
91
+ const version = d.capabilities?.multiAgent?.executionModel?.version as number | undefined;
92
+ expect(
93
+ typeof version === 'number' && version >= 4,
94
+ driver.describe(
95
+ 'RFCS/0041-multi-agent-replay-under-nondeterminism.md §D',
96
+ 'when replayDeterminism.supported: true, multiAgent.executionModel.version MUST be >= 4',
97
+ ),
98
+ ).toBe(true);
99
+
100
+ // Phase 4 hosts MUST commit to refusal-divergence emission per the
101
+ // schema description on capabilities.schema.json §replayDeterminism
102
+ // .refusalDivergenceEmission. The MUST is normative prose on the
103
+ // schema; JSON Schema can't express the conditional, so this
104
+ // assertion closes the conformance-enforcement gap.
105
+ expect(
106
+ rd.refusalDivergenceEmission,
107
+ driver.describe(
108
+ 'schemas/capabilities.schema.json §replayDeterminism.refusalDivergenceEmission',
109
+ 'hosts advertising version: 4 MUST set replayDeterminism.refusalDivergenceEmission to true',
110
+ ),
111
+ ).toBe(true);
112
+ }
113
+ });
114
+ });
115
+
116
+ describe.skipIf(HTTP_SKIP)('replay-divergence-at-refusal: behavioral (RFC 0041 §B MAE-8)', () => {
117
+ // Behavioral assertion drives a workflow whose mock-AI provider returns a
118
+ // valid envelope on the original run + a refusal on the replay (or
119
+ // vice-versa via a second variant). The assertion sequence:
120
+ // 1. Stage mock provider: original returns valid envelope.
121
+ // 2. Run workflow `conformance-phase4-replay-divergence` end-to-end.
122
+ // 3. Re-stage mock provider: replay-of-this-runId returns refusal.
123
+ // 4. POST /v1/runs/{runId}:fork { mode: 'replay' }.
124
+ // 5. Assert the resulting run terminates with
125
+ // error.code === 'replay_diverged_at_refusal'.
126
+ // 6. Assert event log contains a `replay.divergedAtRefusal` event with
127
+ // originalEnvelopeKind === 'valid' AND replayEnvelopeKind === 'refusal'.
128
+ // 7. Assert NO silent substitution: the replay's continuation past the
129
+ // diverging node MUST NOT execute (run terminates at the divergence).
130
+ // Until the reference host wires the staged-refusal seam, surfaced as
131
+ // `todo` so test reporters track the gap.
132
+ it.todo('Phase 4 host MUST emit replay.divergedAtRefusal + fail with replay_diverged_at_refusal when original=valid + replay=refusal');
133
+ it.todo('Phase 4 host MUST emit replay.divergedAtRefusal + fail with replay_diverged_at_refusal when original=refusal + replay=valid (symmetric case)');
134
+ });
@@ -0,0 +1,197 @@
1
+ /**
2
+ * replay-llm-cache-key-portable — RFC 0041 §E SECURITY-invariant probe.
3
+ *
4
+ * Status: ACTIVE (capability-gated behavioral). Gated on
5
+ * `capabilities.multiAgent.executionModel.version >= 4` AND
6
+ * `capabilities.multiAgent.executionModel.replayDeterminism.llmCacheKeyRecipe: "spec-rfc-0041"`.
7
+ *
8
+ * The CROSS-host parity assertion in `replay-llm-cache-key.test.ts §D`
9
+ * (gated on `OPENWOP_BASE_URL_B`) is the cross-instance probe. This file
10
+ * is the SECURITY-tier complement: it asserts that the SINGLE-host
11
+ * recipe is portable in the strict sense — given the recipe input, the
12
+ * host's emitted key is reproducible offline from the recipe alone
13
+ * (no host-internal secrets, sequence numbers, or trace context
14
+ * influence the key).
15
+ *
16
+ * Asserts:
17
+ *
18
+ * 1. Two probes with byte-identical recipe input MUST yield the same
19
+ * cache key (intra-host determinism; subsumes the SECURITY
20
+ * portability requirement at the single-host boundary).
21
+ *
22
+ * 2. The emitted key is reproducible offline: locally recomputed
23
+ * SHA-256-over-RFC-8785-JCS over the canonical recipe MUST equal
24
+ * the host's emission. This is the load-bearing claim — without
25
+ * it, the recipe is private host state masquerading as a content-
26
+ * addressable hash.
27
+ *
28
+ * 3. (Negative) Permuting any non-recipe field (`max_tokens`, `stop`,
29
+ * `stream`, `seed`, `metadata`, `user`, request IDs, trace context)
30
+ * MUST NOT shift the key. This is the security boundary: hosts
31
+ * that mix non-recipe state into the key leak that state across
32
+ * the cache boundary, defeating the portability claim and (via
33
+ * the SR-1 sibling invariant) potentially leaking BYOK plaintexts
34
+ * through the cache.
35
+ *
36
+ * 4. (Gated on Phase 4 advertisement.) The host's discovery doc MUST
37
+ * advertise `replayDeterminism.llmCacheKeyRecipe` matching the
38
+ * recipe it honors — `spec-rfc-0041` for the canonical recipe,
39
+ * `x-host-<host>-<recipe-name>` for vendor variants per
40
+ * `host-extensions.md` §"Canonical prefixes".
41
+ *
42
+ * The behavioral assertions reuse the existing test seam at
43
+ * `POST /v1/host/sample/test/llm-cache-key` (the same seam the sibling
44
+ * `replay-llm-cache-key.test.ts` drives). Hosts that don't expose the
45
+ * seam return 404 and the scenario soft-skips.
46
+ *
47
+ * @see RFCS/0041-multi-agent-replay-under-nondeterminism.md §E
48
+ * @see SECURITY/invariants.yaml §replay-llm-cache-key-portable
49
+ * @see spec/v1/replay.md §"LLM cache-key recipe" §A + §B + §D
50
+ * @see conformance/src/scenarios/replay-llm-cache-key.test.ts (the sibling behavioral suite)
51
+ */
52
+
53
+ import { describe, it, expect } from 'vitest';
54
+ import { driver } from '../lib/driver.js';
55
+ import { expectedCacheKey, callCacheKeySeam as callSeam } from '../lib/llm-cache-key-recipe.js';
56
+
57
+ const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
58
+
59
+ interface DiscoveryDoc {
60
+ capabilities?: {
61
+ multiAgent?: {
62
+ executionModel?: {
63
+ version?: unknown;
64
+ replayDeterminism?: {
65
+ supported?: unknown;
66
+ llmCacheKeyRecipe?: unknown;
67
+ };
68
+ };
69
+ };
70
+ };
71
+ }
72
+
73
+ async function readDiscovery(): Promise<DiscoveryDoc | null> {
74
+ try {
75
+ const res = await driver.get('/.well-known/openwop');
76
+ if (res.status !== 200) return null;
77
+ return res.json as DiscoveryDoc;
78
+ } catch { return null; }
79
+ }
80
+
81
+ describe.skipIf(HTTP_SKIP)('replay-llm-cache-key-portable: intra-host reproducibility (RFC 0041 §E)', () => {
82
+ it('host cache key MUST equal locally-recomputed SHA-256 over canonical JSON (reproducible offline)', async (ctx) => {
83
+ const input = {
84
+ provider: 'anthropic',
85
+ model: 'claude-3-5-sonnet-20240620',
86
+ messages: [
87
+ { role: 'system' as const, content: 'portability probe' },
88
+ { role: 'user' as const, content: 'reproduce offline' },
89
+ ],
90
+ temperature: 0.3,
91
+ };
92
+ const result = await callSeam(input);
93
+ if (result.status === 404) {
94
+ ctx.skip(); // host doesn't expose the test seam
95
+ return;
96
+ }
97
+ expect(result.status).toBe(200);
98
+ expect(
99
+ result.cacheKey,
100
+ driver.describe(
101
+ 'SECURITY/invariants.yaml §replay-llm-cache-key-portable + replay.md §B',
102
+ 'host cache key MUST be reproducible offline from the recipe alone — no host-internal state',
103
+ ),
104
+ ).toBe(expectedCacheKey(input));
105
+ });
106
+
107
+ it('two identical probes MUST yield byte-identical keys (intra-host determinism)', async (ctx) => {
108
+ const input = {
109
+ provider: 'openai',
110
+ model: 'gpt-4',
111
+ messages: [{ role: 'user' as const, content: 'idempotence probe' }],
112
+ temperature: 0.0,
113
+ };
114
+ const a = await callSeam(input);
115
+ if (a.status === 404) {
116
+ ctx.skip(); // host doesn't expose the test seam
117
+ return;
118
+ }
119
+ const b = await callSeam(input);
120
+ expect(
121
+ a.cacheKey,
122
+ driver.describe(
123
+ 'SECURITY/invariants.yaml §replay-llm-cache-key-portable',
124
+ 'two byte-identical recipe inputs MUST yield byte-identical keys (no per-request entropy)',
125
+ ),
126
+ ).toBe(b.cacheKey);
127
+ });
128
+ });
129
+
130
+ describe.skipIf(HTTP_SKIP)('replay-llm-cache-key-portable: non-recipe-field invariance (RFC 0041 §E security boundary)', () => {
131
+ it('non-recipe fields (request ID, trace context, tenant ID) MUST NOT influence the cache key', async (ctx) => {
132
+ const base = {
133
+ provider: 'openai',
134
+ model: 'gpt-4',
135
+ messages: [{ role: 'user' as const, content: 'security-boundary probe' }],
136
+ temperature: 0.5,
137
+ };
138
+ const baseResult = await callSeam(base);
139
+ if (baseResult.status === 404) {
140
+ ctx.skip(); // host doesn't expose the test seam
141
+ return;
142
+ }
143
+
144
+ // The security boundary: ANY of these fields leaking into the key
145
+ // would expose tenant/request state through cache-collision behavior.
146
+ const polluted = {
147
+ ...base,
148
+ max_tokens: 1000,
149
+ stop: ['STOP'],
150
+ stream: true,
151
+ seed: 42,
152
+ metadata: { tenantId: 'tenant-A', traceparent: '00-deadbeef-cafe-01' },
153
+ user: 'user-42',
154
+ 'x-request-id': 'req-abc-123',
155
+ };
156
+ const pollutedResult = await callSeam(polluted);
157
+ expect(
158
+ pollutedResult.cacheKey,
159
+ driver.describe(
160
+ 'SECURITY/invariants.yaml §replay-llm-cache-key-portable + replay.md §A',
161
+ 'non-recipe fields (request id, trace context, tenant id) MUST NOT influence the cache key — leaking them defeats the portability invariant',
162
+ ),
163
+ ).toBe(baseResult.cacheKey);
164
+ });
165
+ });
166
+
167
+ describe.skipIf(HTTP_SKIP)('replay-llm-cache-key-portable: Phase 4 advertisement alignment (RFC 0041 §D)', () => {
168
+ it('hosts advertising version: 4 MUST advertise replayDeterminism.llmCacheKeyRecipe', async (ctx) => {
169
+ const d = await readDiscovery();
170
+ const em = d?.capabilities?.multiAgent?.executionModel;
171
+ const version = em?.version;
172
+ if (typeof version !== 'number' || version < 4) {
173
+ ctx.skip(); // pre-Phase-4 or no multiAgent advertisement
174
+ return;
175
+ }
176
+
177
+ const recipe = em?.replayDeterminism?.llmCacheKeyRecipe;
178
+ expect(
179
+ typeof recipe === 'string',
180
+ driver.describe(
181
+ 'RFCS/0041-multi-agent-replay-under-nondeterminism.md §D',
182
+ 'Phase 4 host MUST advertise replayDeterminism.llmCacheKeyRecipe (`spec-rfc-0041` or `x-host-<host>-<recipe>`)',
183
+ ),
184
+ ).toBe(true);
185
+
186
+ const r = recipe as string;
187
+ const canonical = r === 'spec-rfc-0041';
188
+ const vendor = /^x-host-[a-z][a-z0-9-]*-[a-z][a-z0-9-]*$/.test(r);
189
+ expect(
190
+ canonical || vendor,
191
+ driver.describe(
192
+ 'schemas/capabilities.schema.json §replayDeterminism.llmCacheKeyRecipe',
193
+ 'llmCacheKeyRecipe MUST be `spec-rfc-0041` OR match `^x-host-<host>-<recipe>$` per host-extensions.md',
194
+ ),
195
+ ).toBe(true);
196
+ });
197
+ });