@openwop/openwop-conformance 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. package/CHANGELOG.md +156 -1
  2. package/README.md +3 -2
  3. package/api/asyncapi.yaml +8 -0
  4. package/api/openapi.yaml +371 -1
  5. package/api/redocly.yaml +15 -0
  6. package/coverage.md +26 -5
  7. package/fixtures/conformance-agent-reasoning-streaming.json +37 -0
  8. package/fixtures/conformance-dispatch-cancellable-child.json +27 -0
  9. package/fixtures/conformance-dispatch-deterministic-fail-child.json +30 -0
  10. package/fixtures/conformance-dispatch-input-mapping-no-default.json +49 -0
  11. package/fixtures/conformance-dispatch-per-worker-override.json +59 -0
  12. package/fixtures/conformance-envelope-nl-to-format-engaged.json +41 -0
  13. package/fixtures/conformance-envelope-recovery-applied.json +39 -0
  14. package/fixtures/conformance-envelope-refusal.json +38 -0
  15. package/fixtures/conformance-envelope-retry-attempted.json +39 -0
  16. package/fixtures/conformance-envelope-retry-exhausted.json +38 -0
  17. package/fixtures/conformance-envelope-truncated.json +39 -0
  18. package/fixtures/conformance-envelope-truncation-cap-exhaustion.json +39 -0
  19. package/fixtures/conformance-model-capability-insufficient.json +25 -0
  20. package/fixtures/conformance-multi-agent-confidence-escalation.json +49 -0
  21. package/fixtures/conformance-multi-agent-handoff-child.json +27 -0
  22. package/fixtures/conformance-multi-agent-handoff.json +49 -0
  23. package/fixtures/conformance-prompt-all-four-kinds.json +39 -0
  24. package/fixtures/conformance-prompt-end-to-end.json +33 -0
  25. package/fixtures/conformance-subworkflow-input-mapping-no-default.json +33 -0
  26. package/fixtures/conformance-subworkflow-mid-run-mutation-child.json +31 -0
  27. package/fixtures/conformance-subworkflow-mid-run-mutation.json +33 -0
  28. package/fixtures/openwop-smoke-cost-emit.json +37 -0
  29. package/fixtures/prompt-templates/conformance-prompt-few-shot-2.json +14 -0
  30. package/fixtures/prompt-templates/conformance-prompt-few-shot.json +14 -0
  31. package/fixtures/prompt-templates/conformance-prompt-schema-hint.json +14 -0
  32. package/fixtures/prompt-templates/conformance-prompt-secret-redaction.json +23 -0
  33. package/fixtures/prompt-templates/conformance-prompt-trust-marker.json +23 -0
  34. package/fixtures/prompt-templates/conformance-prompt-writer-system.json +15 -0
  35. package/fixtures/prompt-templates/conformance-prompt-writer-user.json +15 -0
  36. package/fixtures.md +45 -0
  37. package/package.json +1 -1
  38. package/schemas/README.md +5 -0
  39. package/schemas/agent-manifest.schema.json +16 -0
  40. package/schemas/capabilities.schema.json +390 -0
  41. package/schemas/core-conformance-mock-agent-config.schema.json +5 -0
  42. package/schemas/envelopes/clarification.request.schema.json +9 -0
  43. package/schemas/envelopes/error.schema.json +4 -0
  44. package/schemas/envelopes/schema.request.schema.json +4 -0
  45. package/schemas/envelopes/schema.response.schema.json +1 -1
  46. package/schemas/node-pack-manifest.schema.json +28 -0
  47. package/schemas/orchestrator-decision.schema.json +12 -0
  48. package/schemas/prompt-kind.schema.json +8 -0
  49. package/schemas/prompt-pack-manifest.schema.json +80 -0
  50. package/schemas/prompt-ref.schema.json +40 -0
  51. package/schemas/prompt-template.schema.json +149 -0
  52. package/schemas/registry-version-manifest.schema.json +5 -0
  53. package/schemas/run-ancestry-response.schema.json +54 -0
  54. package/schemas/run-event-payloads.schema.json +513 -11
  55. package/schemas/run-event.schema.json +17 -1
  56. package/schemas/run-snapshot.schema.json +3 -2
  57. package/schemas/workflow-definition.schema.json +19 -1
  58. package/src/lib/driver.ts +15 -0
  59. package/src/lib/env.ts +51 -0
  60. package/src/lib/event-log-query.ts +62 -0
  61. package/src/lib/fixtures.ts +38 -1
  62. package/src/lib/host-toggle.ts +54 -0
  63. package/src/lib/llm-cache-key-recipe.ts +68 -0
  64. package/src/lib/multi-agent-capabilities.ts +10 -0
  65. package/src/lib/otel-scrape.ts +59 -0
  66. package/src/scenarios/agentReasoningStreaming.test.ts +193 -0
  67. package/src/scenarios/aiEnvelope.capBreached.test.ts +97 -9
  68. package/src/scenarios/aiEnvelope.contractRefusal.test.ts +224 -15
  69. package/src/scenarios/aiEnvelope.correlationReplay.test.ts +257 -25
  70. package/src/scenarios/aiEnvelope.redaction.test.ts +210 -29
  71. package/src/scenarios/aiEnvelope.schemaDrift.test.ts +163 -24
  72. package/src/scenarios/aiEnvelope.trustBoundaryPropagation.test.ts +262 -12
  73. package/src/scenarios/aiEnvelope.universalKinds.test.ts +107 -16
  74. package/src/scenarios/blob-presign-expiry.test.ts +42 -9
  75. package/src/scenarios/blob-roundtrip.test.ts +0 -0
  76. package/src/scenarios/cache-ttl-expiry.test.ts +34 -8
  77. package/src/scenarios/cost-attribution.test.ts +124 -11
  78. package/src/scenarios/cross-engine-append-ordering.test.ts +99 -0
  79. package/src/scenarios/cross-host-ancestry-endpoint.test.ts +136 -0
  80. package/src/scenarios/cross-host-causation-shape.test.ts +117 -0
  81. package/src/scenarios/cross-host-traceparent-propagation.test.ts +60 -0
  82. package/src/scenarios/dispatch-cross-worker-handoff.test.ts +34 -3
  83. package/src/scenarios/dispatch-input-mapping.test.ts +75 -6
  84. package/src/scenarios/dispatch-output-mapping.test.ts +96 -6
  85. package/src/scenarios/envelope-completion-distinguishes-truncation.test.ts +223 -0
  86. package/src/scenarios/envelope-nl-to-format-engaged.test.ts +152 -0
  87. package/src/scenarios/envelope-reasoning-secret-redaction.test.ts +343 -0
  88. package/src/scenarios/envelope-reasoning-shape.test.ts +190 -0
  89. package/src/scenarios/envelope-recovery-applied.test.ts +229 -0
  90. package/src/scenarios/envelope-refusal-shape.test.ts +289 -0
  91. package/src/scenarios/envelope-retry-attempted.test.ts +258 -0
  92. package/src/scenarios/envelope-retry-exhausted.test.ts +168 -0
  93. package/src/scenarios/envelope-tier-one-subset-static.test.ts +229 -0
  94. package/src/scenarios/envelope-truncated.test.ts +136 -0
  95. package/src/scenarios/envelope-truncation-cap-exhaustion.test.ts +144 -0
  96. package/src/scenarios/envelope-variant-discriminator-static.test.ts +152 -0
  97. package/src/scenarios/fixtures-gating.test.ts +139 -1
  98. package/src/scenarios/fixtures-valid.test.ts +123 -15
  99. package/src/scenarios/kv-ttl-expiry.test.ts +40 -9
  100. package/src/scenarios/model-capability-insufficient.test.ts +221 -0
  101. package/src/scenarios/model-capability-substituted.test.ts +203 -0
  102. package/src/scenarios/multi-agent-confidence-escalation.test.ts +164 -0
  103. package/src/scenarios/multi-agent-handoff-state-machine.test.ts +167 -0
  104. package/src/scenarios/multi-agent-memory-lifecycle.test.ts +124 -0
  105. package/src/scenarios/multi-region-idempotency.test.ts +58 -0
  106. package/src/scenarios/node-module-required-capabilities-shape.test.ts +185 -0
  107. package/src/scenarios/otel-trace-propagation-subworkflow.test.ts +19 -0
  108. package/src/scenarios/pack-registry-publish.test.ts +231 -51
  109. package/src/scenarios/prompt-all-four-kinds-events.test.ts +198 -0
  110. package/src/scenarios/prompt-composed-secret-redaction.test.ts +178 -0
  111. package/src/scenarios/prompt-composed-trust-marker.test.ts +165 -0
  112. package/src/scenarios/prompt-end-to-end-events.test.ts +202 -0
  113. package/src/scenarios/prompt-list-and-fetch.test.ts +207 -0
  114. package/src/scenarios/prompt-mutable-lifecycle.test.ts +216 -0
  115. package/src/scenarios/prompt-pack-install.test.ts +187 -0
  116. package/src/scenarios/prompt-render-deterministic.test.ts +240 -0
  117. package/src/scenarios/prompt-resolution-chain-agent-intrinsic.test.ts +140 -0
  118. package/src/scenarios/prompt-resolution-chain-fallback-cascade.test.ts +172 -0
  119. package/src/scenarios/prompt-resolution-chain-node-wins.test.ts +144 -0
  120. package/src/scenarios/prompt-template-shape.test.ts +359 -0
  121. package/src/scenarios/provider-usage.test.ts +185 -0
  122. package/src/scenarios/queue-ack-nack-dlq.test.ts +64 -10
  123. package/src/scenarios/queue-publish-consume-roundtrip.test.ts +50 -10
  124. package/src/scenarios/replay-divergence-at-refusal.test.ts +134 -0
  125. package/src/scenarios/replay-llm-cache-key-portable.test.ts +197 -0
  126. package/src/scenarios/replay-llm-cache-key.test.ts +127 -25
  127. package/src/scenarios/replay-observable-sequence-determinism.test.ts +80 -0
  128. package/src/scenarios/sandbox-capability-gate-respected.test.ts +31 -0
  129. package/src/scenarios/sandbox-memory-cap.test.ts +61 -0
  130. package/src/scenarios/sandbox-no-cross-pack-mutation.test.ts +35 -0
  131. package/src/scenarios/sandbox-no-host-env-leak.test.ts +38 -0
  132. package/src/scenarios/sandbox-no-host-fs-escape.test.ts +91 -0
  133. package/src/scenarios/sandbox-no-host-process-escape.test.ts +30 -0
  134. package/src/scenarios/sandbox-no-network-escape.test.ts +49 -0
  135. package/src/scenarios/sandbox-timeout-cap.test.ts +61 -0
  136. package/src/scenarios/search-bm25-roundtrip.test.ts +54 -9
  137. package/src/scenarios/spec-corpus-validity.test.ts +34 -6
  138. package/src/scenarios/sql-transaction-atomicity.test.ts +37 -8
  139. package/src/scenarios/stream-subscribe-from-beginning.test.ts +46 -9
  140. package/src/scenarios/subworkflow-input-mapping.test.ts +146 -10
  141. package/src/scenarios/table-cursor-pagination.test.ts +47 -9
  142. package/src/scenarios/table-schema-enforcement.test.ts +46 -9
  143. package/src/scenarios/vector-knn-roundtrip.test.ts +50 -10
  144. package/src/scenarios/workflow-chain-host-expansion.test.ts +202 -0
@@ -16,20 +16,30 @@ import { FIXTURES_DIR, SCHEMAS_DIR } from '../lib/paths.js';
16
16
  // checkouts (schemas one level above the conformance package) and the
17
17
  // published tarball (schemas vendored at the package root by `prepack`).
18
18
  const PACK_MANIFEST_FIXTURES_DIR = join(FIXTURES_DIR, 'pack-manifests');
19
+ const PROMPT_TEMPLATE_FIXTURES_DIR = join(FIXTURES_DIR, 'prompt-templates');
19
20
  const SCHEMA_PATH = join(SCHEMAS_DIR, 'workflow-definition.schema.json');
20
21
  const PACK_MANIFEST_SCHEMA_PATH = join(SCHEMAS_DIR, 'node-pack-manifest.schema.json');
22
+ const PROMPT_TEMPLATE_SCHEMA_PATH = join(SCHEMAS_DIR, 'prompt-template.schema.json');
21
23
 
22
24
  describe('fixtures: workflow-definition schema validity', () => {
23
25
  const ajv = new Ajv2020({ allErrors: true, strict: false });
24
26
  addFormats(ajv);
25
- // Pre-load the agent-ref peer schema so cross-schema `$ref` in
26
- // workflow-definition (Phase 1 — `WorkflowNode.agent`) resolves.
27
- // The relative file-name `agent-ref.schema.json` is how
28
- // workflow-definition references it; register under that name so
29
- // Ajv's $ref resolver finds it.
30
- const agentRefPath = join(SCHEMAS_DIR, 'agent-ref.schema.json');
31
- const agentRefSchema = JSON.parse(readFileSync(agentRefPath, 'utf8'));
27
+ // Pre-load peer schemas that workflow-definition cross-`$ref`s:
28
+ // - agent-ref.schema.json — `WorkflowNode.agent` (Phase 1 multi-agent)
29
+ // - prompt-ref.schema.json `WorkflowDefinition.defaults.promptRefs.*`
30
+ // (RFC 0029 §B resolution-chain layer 3)
31
+ // - prompt-kind.schema.json transitively referenced by prompt-ref's
32
+ // object form when validating PromptRef variants
33
+ // Register each under both the canonical $id and the relative file
34
+ // name so Ajv resolves either way the host schema spelled the ref.
35
+ const agentRefSchema = JSON.parse(readFileSync(join(SCHEMAS_DIR, 'agent-ref.schema.json'), 'utf8'));
36
+ const promptRefSchema = JSON.parse(readFileSync(join(SCHEMAS_DIR, 'prompt-ref.schema.json'), 'utf8'));
37
+ const promptKindSchema = JSON.parse(readFileSync(join(SCHEMAS_DIR, 'prompt-kind.schema.json'), 'utf8'));
32
38
  ajv.addSchema(agentRefSchema, 'agent-ref.schema.json');
39
+ ajv.addSchema(promptRefSchema, 'prompt-ref.schema.json');
40
+ ajv.addSchema(promptRefSchema, './prompt-ref.schema.json');
41
+ ajv.addSchema(promptKindSchema, 'prompt-kind.schema.json');
42
+ ajv.addSchema(promptKindSchema, './prompt-kind.schema.json');
33
43
  const schema = JSON.parse(readFileSync(SCHEMA_PATH, 'utf8'));
34
44
  const validate = ajv.compile(schema);
35
45
 
@@ -85,14 +95,19 @@ describe('fixtures: node-pack-manifest schema validity', () => {
85
95
  // `private.<host>.*` scope is accepted by the canonical schema).
86
96
  const ajv = new Ajv2020({ allErrors: true, strict: false });
87
97
  addFormats(ajv);
88
- // Pre-load the agent-manifest peer schema so the Phase 2 `agents[]`
89
- // $ref in node-pack-manifest resolves under the same name the
90
- // manifest schema uses.
91
- const agentManifestPath = join(SCHEMAS_DIR, 'agent-manifest.schema.json');
92
- ajv.addSchema(
93
- JSON.parse(readFileSync(agentManifestPath, 'utf8')),
94
- 'agent-manifest.schema.json',
95
- );
98
+ // Pre-load peer schemas. agent-manifest references prompt-ref (RFC 0029
99
+ // §B `AgentManifest.promptOverrides[kind]` + `promptLibraryRef`); prompt-ref
100
+ // transitively references prompt-kind. Register each under both the
101
+ // canonical $id and the relative file name so Ajv resolves either way
102
+ // the consumer schema spelled the ref.
103
+ const agentManifestSchema = JSON.parse(readFileSync(join(SCHEMAS_DIR, 'agent-manifest.schema.json'), 'utf8'));
104
+ const promptRefSchema = JSON.parse(readFileSync(join(SCHEMAS_DIR, 'prompt-ref.schema.json'), 'utf8'));
105
+ const promptKindSchema = JSON.parse(readFileSync(join(SCHEMAS_DIR, 'prompt-kind.schema.json'), 'utf8'));
106
+ ajv.addSchema(agentManifestSchema, 'agent-manifest.schema.json');
107
+ ajv.addSchema(promptRefSchema, 'prompt-ref.schema.json');
108
+ ajv.addSchema(promptRefSchema, './prompt-ref.schema.json');
109
+ ajv.addSchema(promptKindSchema, 'prompt-kind.schema.json');
110
+ ajv.addSchema(promptKindSchema, './prompt-kind.schema.json');
96
111
  const schema = JSON.parse(readFileSync(PACK_MANIFEST_SCHEMA_PATH, 'utf8'));
97
112
  const validate = ajv.compile(schema);
98
113
 
@@ -138,3 +153,96 @@ describe('fixtures: node-pack-manifest schema validity', () => {
138
153
  ).toBeGreaterThan(0);
139
154
  });
140
155
  });
156
+
157
+ describe('fixtures: prompt-template schema validity', () => {
158
+ // PromptTemplate fixtures live in `fixtures/prompt-templates/` per
159
+ // RFC 0027 §A. Like pack manifests, they're schema-level proof points,
160
+ // not seeded into a workflow store. They exist so the conformance
161
+ // suite has canonical positive fixtures for the prompt-template-shape
162
+ // scenario, and so future RFCs (0028 prompt packs, 0029 resolution
163
+ // chain) can reference a stable fixture set.
164
+ const ajv = new Ajv2020({ allErrors: true, strict: false });
165
+ addFormats(ajv);
166
+ // Pre-load prompt-kind so the cross-schema `$ref` in
167
+ // prompt-template.schema.json resolves. The template references
168
+ // prompt-kind via `./prompt-kind.schema.json` (relative URI; see
169
+ // RFC 0027 commit notes for the redocly compatibility rationale).
170
+ // Register under both the canonical `$id` and the relative form so
171
+ // Ajv resolves either way.
172
+ const promptKindPath = join(SCHEMAS_DIR, 'prompt-kind.schema.json');
173
+ const promptKindSchema = JSON.parse(readFileSync(promptKindPath, 'utf8'));
174
+ ajv.addSchema(promptKindSchema, 'prompt-kind.schema.json');
175
+ ajv.addSchema(promptKindSchema, './prompt-kind.schema.json');
176
+ const schema = JSON.parse(readFileSync(PROMPT_TEMPLATE_SCHEMA_PATH, 'utf8'));
177
+ const validate = ajv.compile(schema);
178
+
179
+ const files = readdirSync(PROMPT_TEMPLATE_FIXTURES_DIR)
180
+ .filter((f) => f.endsWith('.json'))
181
+ .sort();
182
+
183
+ it('finds at least one prompt-template fixture', () => {
184
+ expect(
185
+ files.length,
186
+ 'Expected at least one PromptTemplate fixture under fixtures/prompt-templates/',
187
+ ).toBeGreaterThan(0);
188
+ });
189
+
190
+ for (const file of files) {
191
+ it(`prompt-templates/${file} validates against prompt-template.schema.json`, () => {
192
+ const data = JSON.parse(
193
+ readFileSync(join(PROMPT_TEMPLATE_FIXTURES_DIR, file), 'utf8'),
194
+ );
195
+ const ok = validate(data);
196
+ const errors = (validate.errors ?? [])
197
+ .map((e: ErrorObject) => `${e.instancePath || '/'}: ${e.message}`)
198
+ .join('\n');
199
+ expect(
200
+ ok,
201
+ `Fixture prompt-templates/${file} fails prompt-template schema:\n${errors}`,
202
+ ).toBe(true);
203
+ });
204
+ }
205
+
206
+ it('every fixture templateId matches its filename', () => {
207
+ // Filename convention: `<templateId-dot-form-with-dots-as-dashes>.json`.
208
+ // The fixture set uses dot-prefixed templateIds (e.g.,
209
+ // `conformance.prompt.writer-system`) which map directly to filenames
210
+ // with dots preserved (`conformance-prompt-writer-system.json`). The
211
+ // file→id mapping is loose (the suite doesn't enforce it) but we
212
+ // assert templateId presence so each fixture is self-describing.
213
+ for (const file of files) {
214
+ const data = JSON.parse(
215
+ readFileSync(join(PROMPT_TEMPLATE_FIXTURES_DIR, file), 'utf8'),
216
+ ) as { templateId: string };
217
+ expect(
218
+ typeof data.templateId,
219
+ `Fixture prompt-templates/${file} MUST declare a templateId`,
220
+ ).toBe('string');
221
+ expect(data.templateId.length).toBeGreaterThan(0);
222
+ }
223
+ });
224
+
225
+ it('every secret-source variable lives in a fixture tagged for the secret-redaction scenario', () => {
226
+ // SECURITY regression pin: a fixture that declares a `secret`-source
227
+ // variable but isn't visible to the prompt-composed-secret-redaction
228
+ // scenario could mask a redaction failure. We require every
229
+ // fixture carrying secret-source variables to advertise the
230
+ // `secret-redaction` tag so the scenario discovers it.
231
+ for (const file of files) {
232
+ const data = JSON.parse(
233
+ readFileSync(join(PROMPT_TEMPLATE_FIXTURES_DIR, file), 'utf8'),
234
+ ) as {
235
+ templateId: string;
236
+ variables?: Array<{ name: string; source?: string }>;
237
+ tags?: string[];
238
+ };
239
+ const hasSecretSource = (data.variables ?? []).some((v) => v.source === 'secret');
240
+ if (hasSecretSource) {
241
+ expect(
242
+ (data.tags ?? []).includes('secret-redaction'),
243
+ `Fixture prompt-templates/${file} declares a secret-source variable but lacks the 'secret-redaction' tag`,
244
+ ).toBe(true);
245
+ }
246
+ }
247
+ });
248
+ });
@@ -1,12 +1,12 @@
1
1
  /**
2
- * kv-ttl-expiry — RFC 0015 advertisement-shape verification + behavioral placeholders.
2
+ * kv-ttl-expiry — RFC 0015 advertisement-shape verification + behavioral roundtrip.
3
3
  *
4
- * Status: ACTIVE (advertisement-shape). RFC 0015 promoted to `Active`
5
- * 2026-05-17. The matching `capabilities.kvStorage` block has landed in
6
- * `schemas/capabilities.schema.json`. This scenario asserts the advertisement
7
- * shape against any host that boots the conformance suite, and keeps the
8
- * deeper behavioral assertions as `it.todo()` until a reference host wires
9
- * a test seam.
4
+ * Status: ACTIVE (advertisement-shape + behavioral). RFC 0015 promoted to
5
+ * `Active` 2026-05-17. The matching `capabilities.kvStorage` block has
6
+ * landed in `schemas/capabilities.schema.json`. This scenario asserts the
7
+ * advertisement shape against any host that boots the conformance suite, and
8
+ * exercises the behavioral surface through the `/v1/host/sample/test/surface`
9
+ * seam (soft-skip with HTTP 404 on hosts that don't expose it).
10
10
  *
11
11
  * Summary: TTL honored with at most a 1-second drift on expiry visibility.
12
12
  *
@@ -42,6 +42,37 @@ describe('kv-ttl-expiry: advertisement shape (RFC 0015)', () => {
42
42
  });
43
43
  });
44
44
 
45
- describe('kv-ttl-expiry: behavioral assertions (placeholders need host test seam)', () => {
46
- it.todo("set with ttl=2 get at t+1 returns the value; get at t+3 returns not-found");
45
+ async function call(op: string, args: Record<string, unknown>) {
46
+ return driver.post('/v1/host/sample/test/surface', { tenantId: 'tenant-a', surface: 'kv', op, args });
47
+ }
48
+
49
+ describe('kv-ttl-expiry: behavioral (RFC 0015 §B point 3 — 1s TTL drift)', () => {
50
+ it('set with ttlSeconds=2 → get before expiry returns value; get after expiry returns found:false', async () => {
51
+ const probe = await call('get', { key: '__ttl-probe__' });
52
+ if (probe.status === 404) return; // host doesn't expose the seam
53
+ const key = `ttl-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
54
+ const setRes = await call('set', { key, value: 'expires-soon', ttlSeconds: 2 });
55
+ expect(setRes.status).toBe(200);
56
+
57
+ // Read within the window
58
+ const within = await call('get', { key });
59
+ expect(within.status).toBe(200);
60
+ const withinBody = within.json as { value?: unknown; found?: boolean };
61
+ expect(
62
+ withinBody.value,
63
+ driver.describe('RFC 0015 §B point 3', 'get within TTL window MUST return the stored value'),
64
+ ).toBe('expires-soon');
65
+ expect(withinBody.found).toBe(true);
66
+
67
+ // Wait past expiry (2s TTL + 1s drift allowance per RFC 0015 §B point 3)
68
+ await new Promise((r) => setTimeout(r, 3000));
69
+
70
+ const after = await call('get', { key });
71
+ expect(after.status).toBe(200);
72
+ const afterBody = after.json as { value?: unknown; found?: boolean };
73
+ expect(
74
+ afterBody.found,
75
+ driver.describe('RFC 0015 §B point 3', 'get after TTL expiry MUST surface as found:false (≤1s drift)'),
76
+ ).toBe(false);
77
+ });
47
78
  });
@@ -0,0 +1,221 @@
1
+ /**
2
+ * model-capability-insufficient — RFC 0031 §B step 4 + §D runtime behavior.
3
+ *
4
+ * Capability-gated on `capabilities.modelCapabilities.supported: true`.
5
+ * Drives the host's `POST /v1/host/sample/test/evaluate-model-capability-gate`
6
+ * seam through the refusal branches of the §B 4-step dispatch flow.
7
+ *
8
+ * @see RFCS/0031-envelope-variants-and-model-capabilities.md §B step 4 + §D
9
+ * @see schemas/run-event-payloads.schema.json §modelCapabilityInsufficient
10
+ */
11
+
12
+ import { describe, it, expect } from 'vitest';
13
+ import { driver } from '../lib/driver.js';
14
+
15
+ const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
16
+
17
+ interface GateResponse {
18
+ outcome?: {
19
+ route?: 'dispatch' | 'substitute' | 'refuse';
20
+ missingCapabilities?: string[];
21
+ fallbackAttempted?: boolean;
22
+ };
23
+ event?: { type?: string; payload?: Record<string, unknown> } | null;
24
+ }
25
+
26
+ async function evaluateGate(input: Record<string, unknown>): Promise<{ status: number; body: GateResponse }> {
27
+ const res = await driver.post('/v1/host/sample/test/evaluate-model-capability-gate', input);
28
+ return { status: res.status, body: res.json as GateResponse };
29
+ }
30
+
31
+ describe.skipIf(HTTP_SKIP)('model-capability-insufficient: dispatch refusal (RFC 0031 §B step 4 + §D)', () => {
32
+ it('unmet + NO fallbackModel declared → refuse with fallbackAttempted: false', async () => {
33
+ const r = await evaluateGate({
34
+ module: { requiredModelCapabilities: ['structured-output', 'reasoning'] },
35
+ // no fallbackModel
36
+ activeProvider: 'unknown-vendor',
37
+ activeModel: 'unknown-model',
38
+ substitutionSupported: true,
39
+ supportedProviders: ['unknown-vendor'],
40
+ nodeId: 'editor-node',
41
+ });
42
+ if (r.status === 404) return;
43
+ expect(
44
+ r.body.outcome?.route,
45
+ driver.describe(
46
+ 'RFCS/0031-envelope-variants-and-model-capabilities.md §B step 4',
47
+ 'unmet capability + no fallbackModel declared → host MUST refuse',
48
+ ),
49
+ ).toBe('refuse');
50
+ expect(
51
+ r.body.outcome?.fallbackAttempted,
52
+ driver.describe(
53
+ 'schemas/run-event-payloads.schema.json §modelCapabilityInsufficient',
54
+ 'fallbackAttempted MUST be false when no fallbackModel was declared on the NodeModule',
55
+ ),
56
+ ).toBe(false);
57
+ expect(
58
+ r.body.event?.type,
59
+ driver.describe(
60
+ 'RFCS/0031-envelope-variants-and-model-capabilities.md §D',
61
+ 'refuse path MUST emit `model.capability.insufficient` BEFORE the node failure',
62
+ ),
63
+ ).toBe('model.capability.insufficient');
64
+ const payload = (r.body.event?.payload ?? {}) as Record<string, unknown>;
65
+ expect(payload.nodeId).toBe('editor-node');
66
+ expect(payload.provider).toBe('unknown-vendor');
67
+ expect(payload.fallbackAttempted).toBe(false);
68
+ expect(Array.isArray(payload.missingCapabilities)).toBe(true);
69
+ });
70
+
71
+ it('unmet + fallback declared but provider NOT in supportedProviders → refuse with fallbackAttempted: true', async () => {
72
+ const r = await evaluateGate({
73
+ module: {
74
+ requiredModelCapabilities: ['structured-output'],
75
+ fallbackModel: { provider: 'unauthenticated-vendor', model: 'foo' },
76
+ },
77
+ activeProvider: 'unknown-vendor',
78
+ activeModel: 'unknown-model',
79
+ substitutionSupported: true,
80
+ // Fallback's provider is NOT in supportedProviders — host cannot
81
+ // authenticate per RFC 0031 §B step 3 final clause.
82
+ supportedProviders: ['anthropic', 'unknown-vendor'],
83
+ });
84
+ if (r.status === 404) return;
85
+ expect(r.body.outcome?.route).toBe('refuse');
86
+ expect(
87
+ r.body.outcome?.fallbackAttempted,
88
+ driver.describe(
89
+ 'RFCS/0031-envelope-variants-and-model-capabilities.md §B step 3',
90
+ 'fallback provider NOT in capabilities.aiProviders.supported[] → host cannot authenticate → fallbackAttempted MUST be true (the attempt failed at credential resolution)',
91
+ ),
92
+ ).toBe(true);
93
+ expect(r.body.event?.type).toBe('model.capability.insufficient');
94
+ });
95
+
96
+ it('unmet + substitutionSupported: false (host posture) → refuse with fallbackAttempted: false', async () => {
97
+ const r = await evaluateGate({
98
+ module: {
99
+ requiredModelCapabilities: ['structured-output'],
100
+ fallbackModel: { provider: 'anthropic', model: 'claude-opus-4-7' },
101
+ },
102
+ activeProvider: 'unknown-vendor',
103
+ activeModel: 'unknown-model',
104
+ substitutionSupported: false,
105
+ supportedProviders: ['anthropic', 'unknown-vendor'],
106
+ });
107
+ if (r.status === 404) return;
108
+ expect(r.body.outcome?.route).toBe('refuse');
109
+ expect(
110
+ r.body.outcome?.fallbackAttempted,
111
+ driver.describe(
112
+ 'RFCS/0031-envelope-variants-and-model-capabilities.md §E',
113
+ 'capabilities.modelCapabilities.substitutionSupported: false → host MUST NOT attempt fallback even when NodeModule.fallbackModel is declared → fallbackAttempted MUST be false (no attempt was made)',
114
+ ),
115
+ ).toBe(false);
116
+ });
117
+
118
+ it('recursive fallback NOT permitted — fallback that itself fails capability check → refuse with fallbackAttempted: true', async () => {
119
+ // Construct a scenario where fallback's provider is in supportedProviders
120
+ // BUT the fallback provider itself doesn't advertise the required capability.
121
+ // The probe map's 'unknown-vendor-2' has empty capabilities; the gate
122
+ // refuses with fallbackAttempted: true (RFC 0031 §"Unresolved questions" #3).
123
+ const r = await evaluateGate({
124
+ module: {
125
+ requiredModelCapabilities: ['structured-output'],
126
+ fallbackModel: { provider: 'unknown-vendor-2', model: 'fallback-model' },
127
+ },
128
+ activeProvider: 'unknown-vendor',
129
+ activeModel: 'unknown-model',
130
+ substitutionSupported: true,
131
+ supportedProviders: ['unknown-vendor', 'unknown-vendor-2'],
132
+ });
133
+ if (r.status === 404) return;
134
+ expect(r.body.outcome?.route).toBe('refuse');
135
+ expect(
136
+ r.body.outcome?.fallbackAttempted,
137
+ driver.describe(
138
+ 'RFCS/0031-envelope-variants-and-model-capabilities.md §"Unresolved questions" #3',
139
+ 'recursive fallback NOT permitted — when the declared fallback model itself fails the capability check, host MUST refuse with fallbackAttempted: true (NOT chain to another fallback)',
140
+ ),
141
+ ).toBe(true);
142
+ });
143
+ });
144
+
145
+ // End-to-end pipeline: a fixture-declared workflow whose only node carries a
146
+ // NodeModule with `requiredModelCapabilities: ['nonexistent-capability-9b3f']`
147
+ // (registered as `conformance.modelCapability.insufficient` on the reference
148
+ // host). The executor's model-capability gate at dispatch time refuses with
149
+ // `capability_not_provided` AND emits `model.capability.insufficient` into
150
+ // the run event log per RFC 0031 §D. Capability-gated AND fixture-gated:
151
+ // soft-skips when either is absent.
152
+
153
+ import { pollUntilTerminal } from '../lib/polling.js';
154
+ import { isFixtureAdvertised } from '../lib/fixtures.js';
155
+
156
+ const E2E_FIXTURE = 'conformance-model-capability-insufficient';
157
+
158
+ describe.skipIf(HTTP_SKIP)('model-capability-insufficient: end-to-end refusal through executor', () => {
159
+ it('workflow with a node declaring requiredModelCapabilities the active provider does not satisfy fails with RunSnapshot.error.code = "capability_not_provided" AND emits model.capability.insufficient into the run event log BEFORE node.failed', async () => {
160
+ if (!isFixtureAdvertised(E2E_FIXTURE)) return; // fixture not seeded — soft-skip
161
+
162
+ const create = await driver.post('/v1/runs', { workflowId: E2E_FIXTURE });
163
+ expect(create.status).toBe(201);
164
+ const runId = (create.json as { runId: string }).runId;
165
+
166
+ const terminal = await pollUntilTerminal(runId, { timeoutMs: 10_000 });
167
+ expect(terminal.status).toBe('failed');
168
+ expect(
169
+ (terminal as { error?: { code?: string } }).error?.code,
170
+ driver.describe(
171
+ 'RFCS/0031-envelope-variants-and-model-capabilities.md §B step 4',
172
+ 'unmet capability without viable fallback MUST fail with error.code = "capability_not_provided"',
173
+ ),
174
+ ).toBe('capability_not_provided');
175
+
176
+ const eventsRes = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
177
+ expect(eventsRes.status).toBe(200);
178
+ const events = ((eventsRes.json as { events?: Array<{ type: string }> } | undefined)?.events ?? []);
179
+ const insufficientIdx = events.findIndex((e) => e.type === 'model.capability.insufficient');
180
+ const nodeFailedIdx = events.findIndex((e) => e.type === 'node.failed');
181
+ expect(insufficientIdx, 'model.capability.insufficient MUST appear in the event log').toBeGreaterThanOrEqual(0);
182
+ expect(nodeFailedIdx, 'node.failed MUST appear in the event log').toBeGreaterThanOrEqual(0);
183
+ expect(
184
+ insufficientIdx < nodeFailedIdx,
185
+ driver.describe(
186
+ 'RFCS/0031-envelope-variants-and-model-capabilities.md §D',
187
+ 'model.capability.insufficient MUST be emitted BEFORE node.failed (cause precedes effect)',
188
+ ),
189
+ ).toBe(true);
190
+ });
191
+
192
+ it('NO envelope emission occurs after the refusal (no node.completed, provider.usage, or envelope-reliability events)', async () => {
193
+ if (!isFixtureAdvertised(E2E_FIXTURE)) return; // fixture not seeded — soft-skip
194
+
195
+ const create = await driver.post('/v1/runs', { workflowId: E2E_FIXTURE });
196
+ expect(create.status).toBe(201);
197
+ const runId = (create.json as { runId: string }).runId;
198
+ await pollUntilTerminal(runId, { timeoutMs: 10_000 });
199
+
200
+ const eventsRes = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
201
+ const events = ((eventsRes.json as { events?: Array<{ type: string }> } | undefined)?.events ?? []);
202
+ const forbidden = [
203
+ 'node.completed',
204
+ 'provider.usage',
205
+ 'envelope.retry.attempted',
206
+ 'envelope.retry.exhausted',
207
+ 'envelope.refusal',
208
+ 'envelope.truncated',
209
+ 'envelope.nlToFormat.engaged',
210
+ 'envelope.recovery.applied',
211
+ ];
212
+ const leaked = events.filter((e) => forbidden.includes(e.type)).map((e) => e.type);
213
+ expect(
214
+ leaked,
215
+ driver.describe(
216
+ 'RFCS/0031-envelope-variants-and-model-capabilities.md §B step 4',
217
+ 'a refused dispatch MUST NOT emit any downstream envelope-emission events — the node never ran',
218
+ ),
219
+ ).toEqual([]);
220
+ });
221
+ });
@@ -0,0 +1,203 @@
1
+ /**
2
+ * model-capability-substituted — RFC 0031 §B step 3 + §D + §F runtime behavior.
3
+ *
4
+ * Capability-gated on `capabilities.modelCapabilities.supported: true`.
5
+ *
6
+ * Drives the host's `POST /v1/host/sample/test/evaluate-model-capability-gate`
7
+ * seam with synthetic inputs that hit each branch of the §B 4-step dispatch
8
+ * flow. The seam runs the pure `evaluateModelCapabilityGate()` evaluator
9
+ * and returns both the routing outcome AND the event payload the host
10
+ * would emit. Conformance asserts the decision-matrix + the event payload
11
+ * shape per RFC 0031 §D `modelCapabilitySubstituted`.
12
+ *
13
+ * @see RFCS/0031-envelope-variants-and-model-capabilities.md §B + §D + §F
14
+ * @see spec/v1/host-capabilities.md §"Model-capability declarations"
15
+ * @see schemas/run-event-payloads.schema.json §modelCapabilitySubstituted
16
+ */
17
+
18
+ import { describe, it, expect } from 'vitest';
19
+ import { driver } from '../lib/driver.js';
20
+
21
+ const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
22
+
23
+ interface DiscoveryDoc {
24
+ capabilities?: {
25
+ modelCapabilities?: {
26
+ supported?: unknown;
27
+ substitutionSupported?: unknown;
28
+ advertised?: unknown;
29
+ };
30
+ };
31
+ }
32
+
33
+ interface GateOutcome {
34
+ route?: 'dispatch' | 'substitute' | 'refuse';
35
+ originalProvider?: string;
36
+ originalModel?: string;
37
+ fallbackProvider?: string;
38
+ fallbackModel?: string;
39
+ missingCapabilities?: string[];
40
+ fallbackAttempted?: boolean;
41
+ }
42
+
43
+ interface GateResponse {
44
+ outcome?: GateOutcome;
45
+ event?: { type?: string; payload?: Record<string, unknown> } | null;
46
+ }
47
+
48
+ async function readDiscovery(): Promise<DiscoveryDoc | null> {
49
+ try {
50
+ const res = await driver.get('/.well-known/openwop');
51
+ if (res.status !== 200) return null;
52
+ return res.json as DiscoveryDoc;
53
+ } catch {
54
+ return null;
55
+ }
56
+ }
57
+
58
+ async function evaluateGate(input: Record<string, unknown>): Promise<{ status: number; body: GateResponse }> {
59
+ const res = await driver.post('/v1/host/sample/test/evaluate-model-capability-gate', input);
60
+ return { status: res.status, body: res.json as GateResponse };
61
+ }
62
+
63
+ describe.skipIf(HTTP_SKIP)('model-capability-substituted: advertisement shape (RFC 0031 §E)', () => {
64
+ it('capabilities.modelCapabilities (when present) conforms to RFC 0031 §E', async () => {
65
+ const d = await readDiscovery();
66
+ if (d === null) return;
67
+ const mc = d.capabilities?.modelCapabilities;
68
+ if (mc === undefined) return;
69
+ expect(
70
+ typeof mc.supported,
71
+ driver.describe(
72
+ 'schemas/capabilities.schema.json §modelCapabilities',
73
+ 'capabilities.modelCapabilities.supported MUST be boolean when the block is advertised',
74
+ ),
75
+ ).toBe('boolean');
76
+ if (mc.advertised !== undefined) {
77
+ expect(
78
+ Array.isArray(mc.advertised),
79
+ driver.describe('RFCS/0031-envelope-variants-and-model-capabilities.md §E', 'modelCapabilities.advertised MUST be an array of capability identifiers'),
80
+ ).toBe(true);
81
+ const SPEC_RESERVED = ['structured-output', 'discriminator-enum', 'long-context', 'reasoning', 'function-calling'];
82
+ for (const id of mc.advertised as unknown[]) {
83
+ expect(typeof id, 'each advertised identifier MUST be a string').toBe('string');
84
+ const idStr = String(id);
85
+ const isReserved = SPEC_RESERVED.includes(idStr);
86
+ const isHostExt = /^x-host-[a-z][a-z0-9-]*-[a-z][a-z0-9-]*$/.test(idStr);
87
+ expect(
88
+ isReserved || isHostExt,
89
+ driver.describe(
90
+ 'RFCS/0031-envelope-variants-and-model-capabilities.md §C',
91
+ `advertised identifier "${idStr}" MUST be spec-reserved (structured-output, discriminator-enum, long-context, reasoning, function-calling) or match the x-host-<host>-<key> extension pattern`,
92
+ ),
93
+ ).toBe(true);
94
+ }
95
+ }
96
+ if (mc.substitutionSupported !== undefined) {
97
+ expect(typeof mc.substitutionSupported, 'substitutionSupported MUST be boolean').toBe('boolean');
98
+ }
99
+ });
100
+ });
101
+
102
+ describe.skipIf(HTTP_SKIP)('model-capability-substituted: dispatch behavior (RFC 0031 §B step 3 + §D)', () => {
103
+ it('all required capabilities met → outcome: dispatch (no event emitted)', async () => {
104
+ const r = await evaluateGate({
105
+ module: { requiredModelCapabilities: ['structured-output', 'function-calling'] },
106
+ activeProvider: 'anthropic',
107
+ activeModel: 'claude-3-5-sonnet',
108
+ substitutionSupported: true,
109
+ supportedProviders: ['anthropic', 'openai'],
110
+ });
111
+ if (r.status === 404) return; // host doesn't expose the seam
112
+ expect(
113
+ r.body.outcome?.route,
114
+ driver.describe(
115
+ 'RFCS/0031-envelope-variants-and-model-capabilities.md §B step 2',
116
+ 'all required model capabilities met → route MUST be "dispatch" (gate is a no-op)',
117
+ ),
118
+ ).toBe('dispatch');
119
+ expect(r.body.event, 'no event emitted when gate is a no-op').toBeNull();
120
+ });
121
+
122
+ it('unmet + fallback declared + authenticatable → outcome: substitute + event with originalProvider/originalModel/fallbackProvider/fallbackModel/missingCapabilities', async () => {
123
+ const r = await evaluateGate({
124
+ module: {
125
+ requiredModelCapabilities: ['structured-output', 'long-context'],
126
+ fallbackModel: { provider: 'anthropic', model: 'claude-opus-4-7' },
127
+ },
128
+ // Simulate an active provider that doesn't advertise long-context.
129
+ // The seam's probe map returns the spec-known capability set for
130
+ // known providers; we use an unknown provider id here so the gate
131
+ // sees an empty advertised set and refuses to substitute (no — wait,
132
+ // we declare a fallback that IS in supportedProviders, so the gate
133
+ // substitutes). Use 'unknown-vendor' as the original provider and
134
+ // 'anthropic' as the fallback (which IS in the host's known
135
+ // providers and advertises structured-output + long-context).
136
+ activeProvider: 'unknown-vendor',
137
+ activeModel: 'unknown-model',
138
+ substitutionSupported: true,
139
+ supportedProviders: ['anthropic', 'openai', 'unknown-vendor'],
140
+ nodeId: 'writer-node',
141
+ });
142
+ if (r.status === 404) return;
143
+ expect(
144
+ r.body.outcome?.route,
145
+ driver.describe(
146
+ 'RFCS/0031-envelope-variants-and-model-capabilities.md §B step 3',
147
+ 'unmet capability + declared fallback + fallback provider authenticatable → route MUST be "substitute"',
148
+ ),
149
+ ).toBe('substitute');
150
+ expect(
151
+ r.body.event?.type,
152
+ driver.describe(
153
+ 'RFCS/0031-envelope-variants-and-model-capabilities.md §D',
154
+ 'substitute path MUST emit `model.capability.substituted`',
155
+ ),
156
+ ).toBe('model.capability.substituted');
157
+ const payload = (r.body.event?.payload ?? {}) as Record<string, unknown>;
158
+ expect(payload.nodeId, 'payload.nodeId MUST mirror the request').toBe('writer-node');
159
+ expect(payload.originalProvider).toBe('unknown-vendor');
160
+ expect(payload.originalModel).toBe('unknown-model');
161
+ expect(payload.fallbackProvider).toBe('anthropic');
162
+ expect(payload.fallbackModel).toBe('claude-opus-4-7');
163
+ expect(
164
+ Array.isArray(payload.missingCapabilities) &&
165
+ (payload.missingCapabilities as string[]).includes('structured-output'),
166
+ driver.describe(
167
+ 'schemas/run-event-payloads.schema.json §modelCapabilitySubstituted',
168
+ 'missingCapabilities[] MUST include the subset of required capabilities the active model did not satisfy',
169
+ ),
170
+ ).toBe(true);
171
+ });
172
+
173
+ it('unmet + substitutionSupported: false → outcome: refuse with fallbackAttempted: false (host posture override)', async () => {
174
+ const r = await evaluateGate({
175
+ module: {
176
+ requiredModelCapabilities: ['structured-output'],
177
+ // Fallback declared but the gate refuses BEFORE attempting because the
178
+ // host's posture is "no substitution" per RFC 0031 §E.
179
+ fallbackModel: { provider: 'anthropic', model: 'claude-opus-4-7' },
180
+ },
181
+ activeProvider: 'unknown-vendor',
182
+ activeModel: 'unknown-model',
183
+ substitutionSupported: false,
184
+ supportedProviders: ['anthropic', 'unknown-vendor'],
185
+ });
186
+ if (r.status === 404) return;
187
+ expect(
188
+ r.body.outcome?.route,
189
+ driver.describe(
190
+ 'RFCS/0031-envelope-variants-and-model-capabilities.md §E',
191
+ 'capabilities.modelCapabilities.substitutionSupported: false → host MUST refuse on any unmet capability even when NodeModule.fallbackModel is declared',
192
+ ),
193
+ ).toBe('refuse');
194
+ expect(r.body.event?.type).toBe('model.capability.insufficient');
195
+ expect(
196
+ (r.body.event?.payload as { fallbackAttempted?: boolean }).fallbackAttempted,
197
+ driver.describe(
198
+ 'schemas/run-event-payloads.schema.json §modelCapabilityInsufficient',
199
+ 'fallbackAttempted MUST be false when the refusal is driven by substitutionSupported: false (host posture, not fallback failure)',
200
+ ),
201
+ ).toBe(false);
202
+ });
203
+ });