@openwop/openwop-conformance 1.2.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +156 -1
- package/README.md +3 -2
- package/api/asyncapi.yaml +8 -0
- package/api/openapi.yaml +371 -1
- package/api/redocly.yaml +15 -0
- package/coverage.md +26 -5
- package/fixtures/conformance-agent-reasoning-streaming.json +37 -0
- package/fixtures/conformance-dispatch-cancellable-child.json +27 -0
- package/fixtures/conformance-dispatch-deterministic-fail-child.json +30 -0
- package/fixtures/conformance-dispatch-input-mapping-no-default.json +49 -0
- package/fixtures/conformance-dispatch-per-worker-override.json +59 -0
- package/fixtures/conformance-envelope-nl-to-format-engaged.json +41 -0
- package/fixtures/conformance-envelope-recovery-applied.json +39 -0
- package/fixtures/conformance-envelope-refusal.json +38 -0
- package/fixtures/conformance-envelope-retry-attempted.json +39 -0
- package/fixtures/conformance-envelope-retry-exhausted.json +38 -0
- package/fixtures/conformance-envelope-truncated.json +39 -0
- package/fixtures/conformance-envelope-truncation-cap-exhaustion.json +39 -0
- package/fixtures/conformance-model-capability-insufficient.json +25 -0
- package/fixtures/conformance-multi-agent-confidence-escalation.json +49 -0
- package/fixtures/conformance-multi-agent-handoff-child.json +27 -0
- package/fixtures/conformance-multi-agent-handoff.json +49 -0
- package/fixtures/conformance-prompt-all-four-kinds.json +39 -0
- package/fixtures/conformance-prompt-end-to-end.json +33 -0
- package/fixtures/conformance-subworkflow-input-mapping-no-default.json +33 -0
- package/fixtures/conformance-subworkflow-mid-run-mutation-child.json +31 -0
- package/fixtures/conformance-subworkflow-mid-run-mutation.json +33 -0
- package/fixtures/openwop-smoke-cost-emit.json +37 -0
- package/fixtures/prompt-templates/conformance-prompt-few-shot-2.json +14 -0
- package/fixtures/prompt-templates/conformance-prompt-few-shot.json +14 -0
- package/fixtures/prompt-templates/conformance-prompt-schema-hint.json +14 -0
- package/fixtures/prompt-templates/conformance-prompt-secret-redaction.json +23 -0
- package/fixtures/prompt-templates/conformance-prompt-trust-marker.json +23 -0
- package/fixtures/prompt-templates/conformance-prompt-writer-system.json +15 -0
- package/fixtures/prompt-templates/conformance-prompt-writer-user.json +15 -0
- package/fixtures.md +45 -0
- package/package.json +1 -1
- package/schemas/README.md +5 -0
- package/schemas/agent-manifest.schema.json +16 -0
- package/schemas/capabilities.schema.json +390 -0
- package/schemas/core-conformance-mock-agent-config.schema.json +5 -0
- package/schemas/envelopes/clarification.request.schema.json +9 -0
- package/schemas/envelopes/error.schema.json +4 -0
- package/schemas/envelopes/schema.request.schema.json +4 -0
- package/schemas/envelopes/schema.response.schema.json +1 -1
- package/schemas/node-pack-manifest.schema.json +28 -0
- package/schemas/orchestrator-decision.schema.json +12 -0
- package/schemas/prompt-kind.schema.json +8 -0
- package/schemas/prompt-pack-manifest.schema.json +80 -0
- package/schemas/prompt-ref.schema.json +40 -0
- package/schemas/prompt-template.schema.json +149 -0
- package/schemas/registry-version-manifest.schema.json +5 -0
- package/schemas/run-ancestry-response.schema.json +54 -0
- package/schemas/run-event-payloads.schema.json +513 -11
- package/schemas/run-event.schema.json +17 -1
- package/schemas/run-snapshot.schema.json +3 -2
- package/schemas/workflow-definition.schema.json +19 -1
- package/src/lib/driver.ts +15 -0
- package/src/lib/env.ts +51 -0
- package/src/lib/event-log-query.ts +62 -0
- package/src/lib/fixtures.ts +38 -1
- package/src/lib/host-toggle.ts +54 -0
- package/src/lib/llm-cache-key-recipe.ts +68 -0
- package/src/lib/multi-agent-capabilities.ts +10 -0
- package/src/lib/otel-scrape.ts +59 -0
- package/src/scenarios/agentReasoningStreaming.test.ts +193 -0
- package/src/scenarios/aiEnvelope.capBreached.test.ts +97 -9
- package/src/scenarios/aiEnvelope.contractRefusal.test.ts +224 -15
- package/src/scenarios/aiEnvelope.correlationReplay.test.ts +257 -25
- package/src/scenarios/aiEnvelope.redaction.test.ts +210 -29
- package/src/scenarios/aiEnvelope.schemaDrift.test.ts +163 -24
- package/src/scenarios/aiEnvelope.trustBoundaryPropagation.test.ts +262 -12
- package/src/scenarios/aiEnvelope.universalKinds.test.ts +107 -16
- package/src/scenarios/blob-presign-expiry.test.ts +42 -9
- package/src/scenarios/blob-roundtrip.test.ts +0 -0
- package/src/scenarios/cache-ttl-expiry.test.ts +34 -8
- package/src/scenarios/cost-attribution.test.ts +124 -11
- package/src/scenarios/cross-engine-append-ordering.test.ts +99 -0
- package/src/scenarios/cross-host-ancestry-endpoint.test.ts +136 -0
- package/src/scenarios/cross-host-causation-shape.test.ts +117 -0
- package/src/scenarios/cross-host-traceparent-propagation.test.ts +60 -0
- package/src/scenarios/dispatch-cross-worker-handoff.test.ts +34 -3
- package/src/scenarios/dispatch-input-mapping.test.ts +75 -6
- package/src/scenarios/dispatch-output-mapping.test.ts +96 -6
- package/src/scenarios/envelope-completion-distinguishes-truncation.test.ts +223 -0
- package/src/scenarios/envelope-nl-to-format-engaged.test.ts +152 -0
- package/src/scenarios/envelope-reasoning-secret-redaction.test.ts +343 -0
- package/src/scenarios/envelope-reasoning-shape.test.ts +190 -0
- package/src/scenarios/envelope-recovery-applied.test.ts +229 -0
- package/src/scenarios/envelope-refusal-shape.test.ts +289 -0
- package/src/scenarios/envelope-retry-attempted.test.ts +258 -0
- package/src/scenarios/envelope-retry-exhausted.test.ts +168 -0
- package/src/scenarios/envelope-tier-one-subset-static.test.ts +229 -0
- package/src/scenarios/envelope-truncated.test.ts +136 -0
- package/src/scenarios/envelope-truncation-cap-exhaustion.test.ts +144 -0
- package/src/scenarios/envelope-variant-discriminator-static.test.ts +152 -0
- package/src/scenarios/fixtures-gating.test.ts +139 -1
- package/src/scenarios/fixtures-valid.test.ts +123 -15
- package/src/scenarios/kv-ttl-expiry.test.ts +40 -9
- package/src/scenarios/model-capability-insufficient.test.ts +221 -0
- package/src/scenarios/model-capability-substituted.test.ts +203 -0
- package/src/scenarios/multi-agent-confidence-escalation.test.ts +164 -0
- package/src/scenarios/multi-agent-handoff-state-machine.test.ts +167 -0
- package/src/scenarios/multi-agent-memory-lifecycle.test.ts +124 -0
- package/src/scenarios/multi-region-idempotency.test.ts +58 -0
- package/src/scenarios/node-module-required-capabilities-shape.test.ts +185 -0
- package/src/scenarios/otel-trace-propagation-subworkflow.test.ts +19 -0
- package/src/scenarios/pack-registry-publish.test.ts +231 -51
- package/src/scenarios/prompt-all-four-kinds-events.test.ts +198 -0
- package/src/scenarios/prompt-composed-secret-redaction.test.ts +178 -0
- package/src/scenarios/prompt-composed-trust-marker.test.ts +165 -0
- package/src/scenarios/prompt-end-to-end-events.test.ts +202 -0
- package/src/scenarios/prompt-list-and-fetch.test.ts +207 -0
- package/src/scenarios/prompt-mutable-lifecycle.test.ts +216 -0
- package/src/scenarios/prompt-pack-install.test.ts +187 -0
- package/src/scenarios/prompt-render-deterministic.test.ts +240 -0
- package/src/scenarios/prompt-resolution-chain-agent-intrinsic.test.ts +140 -0
- package/src/scenarios/prompt-resolution-chain-fallback-cascade.test.ts +172 -0
- package/src/scenarios/prompt-resolution-chain-node-wins.test.ts +144 -0
- package/src/scenarios/prompt-template-shape.test.ts +359 -0
- package/src/scenarios/provider-usage.test.ts +185 -0
- package/src/scenarios/queue-ack-nack-dlq.test.ts +64 -10
- package/src/scenarios/queue-publish-consume-roundtrip.test.ts +50 -10
- package/src/scenarios/replay-divergence-at-refusal.test.ts +134 -0
- package/src/scenarios/replay-llm-cache-key-portable.test.ts +197 -0
- package/src/scenarios/replay-llm-cache-key.test.ts +127 -25
- package/src/scenarios/replay-observable-sequence-determinism.test.ts +80 -0
- package/src/scenarios/sandbox-capability-gate-respected.test.ts +31 -0
- package/src/scenarios/sandbox-memory-cap.test.ts +61 -0
- package/src/scenarios/sandbox-no-cross-pack-mutation.test.ts +35 -0
- package/src/scenarios/sandbox-no-host-env-leak.test.ts +38 -0
- package/src/scenarios/sandbox-no-host-fs-escape.test.ts +91 -0
- package/src/scenarios/sandbox-no-host-process-escape.test.ts +30 -0
- package/src/scenarios/sandbox-no-network-escape.test.ts +49 -0
- package/src/scenarios/sandbox-timeout-cap.test.ts +61 -0
- package/src/scenarios/search-bm25-roundtrip.test.ts +54 -9
- package/src/scenarios/spec-corpus-validity.test.ts +34 -6
- package/src/scenarios/sql-transaction-atomicity.test.ts +37 -8
- package/src/scenarios/stream-subscribe-from-beginning.test.ts +46 -9
- package/src/scenarios/subworkflow-input-mapping.test.ts +146 -10
- package/src/scenarios/table-cursor-pagination.test.ts +47 -9
- package/src/scenarios/table-schema-enforcement.test.ts +46 -9
- package/src/scenarios/vector-knn-roundtrip.test.ts +50 -10
- package/src/scenarios/workflow-chain-host-expansion.test.ts +202 -0
|
@@ -16,20 +16,30 @@ import { FIXTURES_DIR, SCHEMAS_DIR } from '../lib/paths.js';
|
|
|
16
16
|
// checkouts (schemas one level above the conformance package) and the
|
|
17
17
|
// published tarball (schemas vendored at the package root by `prepack`).
|
|
18
18
|
const PACK_MANIFEST_FIXTURES_DIR = join(FIXTURES_DIR, 'pack-manifests');
|
|
19
|
+
const PROMPT_TEMPLATE_FIXTURES_DIR = join(FIXTURES_DIR, 'prompt-templates');
|
|
19
20
|
const SCHEMA_PATH = join(SCHEMAS_DIR, 'workflow-definition.schema.json');
|
|
20
21
|
const PACK_MANIFEST_SCHEMA_PATH = join(SCHEMAS_DIR, 'node-pack-manifest.schema.json');
|
|
22
|
+
const PROMPT_TEMPLATE_SCHEMA_PATH = join(SCHEMAS_DIR, 'prompt-template.schema.json');
|
|
21
23
|
|
|
22
24
|
describe('fixtures: workflow-definition schema validity', () => {
|
|
23
25
|
const ajv = new Ajv2020({ allErrors: true, strict: false });
|
|
24
26
|
addFormats(ajv);
|
|
25
|
-
// Pre-load
|
|
26
|
-
//
|
|
27
|
-
//
|
|
28
|
-
//
|
|
29
|
-
//
|
|
30
|
-
|
|
31
|
-
|
|
27
|
+
// Pre-load peer schemas that workflow-definition cross-`$ref`s:
|
|
28
|
+
// - agent-ref.schema.json — `WorkflowNode.agent` (Phase 1 multi-agent)
|
|
29
|
+
// - prompt-ref.schema.json — `WorkflowDefinition.defaults.promptRefs.*`
|
|
30
|
+
// (RFC 0029 §B resolution-chain layer 3)
|
|
31
|
+
// - prompt-kind.schema.json — transitively referenced by prompt-ref's
|
|
32
|
+
// object form when validating PromptRef variants
|
|
33
|
+
// Register each under both the canonical $id and the relative file
|
|
34
|
+
// name so Ajv resolves either way the host schema spelled the ref.
|
|
35
|
+
const agentRefSchema = JSON.parse(readFileSync(join(SCHEMAS_DIR, 'agent-ref.schema.json'), 'utf8'));
|
|
36
|
+
const promptRefSchema = JSON.parse(readFileSync(join(SCHEMAS_DIR, 'prompt-ref.schema.json'), 'utf8'));
|
|
37
|
+
const promptKindSchema = JSON.parse(readFileSync(join(SCHEMAS_DIR, 'prompt-kind.schema.json'), 'utf8'));
|
|
32
38
|
ajv.addSchema(agentRefSchema, 'agent-ref.schema.json');
|
|
39
|
+
ajv.addSchema(promptRefSchema, 'prompt-ref.schema.json');
|
|
40
|
+
ajv.addSchema(promptRefSchema, './prompt-ref.schema.json');
|
|
41
|
+
ajv.addSchema(promptKindSchema, 'prompt-kind.schema.json');
|
|
42
|
+
ajv.addSchema(promptKindSchema, './prompt-kind.schema.json');
|
|
33
43
|
const schema = JSON.parse(readFileSync(SCHEMA_PATH, 'utf8'));
|
|
34
44
|
const validate = ajv.compile(schema);
|
|
35
45
|
|
|
@@ -85,14 +95,19 @@ describe('fixtures: node-pack-manifest schema validity', () => {
|
|
|
85
95
|
// `private.<host>.*` scope is accepted by the canonical schema).
|
|
86
96
|
const ajv = new Ajv2020({ allErrors: true, strict: false });
|
|
87
97
|
addFormats(ajv);
|
|
88
|
-
// Pre-load
|
|
89
|
-
//
|
|
90
|
-
//
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
);
|
|
98
|
+
// Pre-load peer schemas. agent-manifest references prompt-ref (RFC 0029
|
|
99
|
+
// §B `AgentManifest.promptOverrides[kind]` + `promptLibraryRef`); prompt-ref
|
|
100
|
+
// transitively references prompt-kind. Register each under both the
|
|
101
|
+
// canonical $id and the relative file name so Ajv resolves either way
|
|
102
|
+
// the consumer schema spelled the ref.
|
|
103
|
+
const agentManifestSchema = JSON.parse(readFileSync(join(SCHEMAS_DIR, 'agent-manifest.schema.json'), 'utf8'));
|
|
104
|
+
const promptRefSchema = JSON.parse(readFileSync(join(SCHEMAS_DIR, 'prompt-ref.schema.json'), 'utf8'));
|
|
105
|
+
const promptKindSchema = JSON.parse(readFileSync(join(SCHEMAS_DIR, 'prompt-kind.schema.json'), 'utf8'));
|
|
106
|
+
ajv.addSchema(agentManifestSchema, 'agent-manifest.schema.json');
|
|
107
|
+
ajv.addSchema(promptRefSchema, 'prompt-ref.schema.json');
|
|
108
|
+
ajv.addSchema(promptRefSchema, './prompt-ref.schema.json');
|
|
109
|
+
ajv.addSchema(promptKindSchema, 'prompt-kind.schema.json');
|
|
110
|
+
ajv.addSchema(promptKindSchema, './prompt-kind.schema.json');
|
|
96
111
|
const schema = JSON.parse(readFileSync(PACK_MANIFEST_SCHEMA_PATH, 'utf8'));
|
|
97
112
|
const validate = ajv.compile(schema);
|
|
98
113
|
|
|
@@ -138,3 +153,96 @@ describe('fixtures: node-pack-manifest schema validity', () => {
|
|
|
138
153
|
).toBeGreaterThan(0);
|
|
139
154
|
});
|
|
140
155
|
});
|
|
156
|
+
|
|
157
|
+
describe('fixtures: prompt-template schema validity', () => {
|
|
158
|
+
// PromptTemplate fixtures live in `fixtures/prompt-templates/` per
|
|
159
|
+
// RFC 0027 §A. Like pack manifests, they're schema-level proof points,
|
|
160
|
+
// not seeded into a workflow store. They exist so the conformance
|
|
161
|
+
// suite has canonical positive fixtures for the prompt-template-shape
|
|
162
|
+
// scenario, and so future RFCs (0028 prompt packs, 0029 resolution
|
|
163
|
+
// chain) can reference a stable fixture set.
|
|
164
|
+
const ajv = new Ajv2020({ allErrors: true, strict: false });
|
|
165
|
+
addFormats(ajv);
|
|
166
|
+
// Pre-load prompt-kind so the cross-schema `$ref` in
|
|
167
|
+
// prompt-template.schema.json resolves. The template references
|
|
168
|
+
// prompt-kind via `./prompt-kind.schema.json` (relative URI; see
|
|
169
|
+
// RFC 0027 commit notes for the redocly compatibility rationale).
|
|
170
|
+
// Register under both the canonical `$id` and the relative form so
|
|
171
|
+
// Ajv resolves either way.
|
|
172
|
+
const promptKindPath = join(SCHEMAS_DIR, 'prompt-kind.schema.json');
|
|
173
|
+
const promptKindSchema = JSON.parse(readFileSync(promptKindPath, 'utf8'));
|
|
174
|
+
ajv.addSchema(promptKindSchema, 'prompt-kind.schema.json');
|
|
175
|
+
ajv.addSchema(promptKindSchema, './prompt-kind.schema.json');
|
|
176
|
+
const schema = JSON.parse(readFileSync(PROMPT_TEMPLATE_SCHEMA_PATH, 'utf8'));
|
|
177
|
+
const validate = ajv.compile(schema);
|
|
178
|
+
|
|
179
|
+
const files = readdirSync(PROMPT_TEMPLATE_FIXTURES_DIR)
|
|
180
|
+
.filter((f) => f.endsWith('.json'))
|
|
181
|
+
.sort();
|
|
182
|
+
|
|
183
|
+
it('finds at least one prompt-template fixture', () => {
|
|
184
|
+
expect(
|
|
185
|
+
files.length,
|
|
186
|
+
'Expected at least one PromptTemplate fixture under fixtures/prompt-templates/',
|
|
187
|
+
).toBeGreaterThan(0);
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
for (const file of files) {
|
|
191
|
+
it(`prompt-templates/${file} validates against prompt-template.schema.json`, () => {
|
|
192
|
+
const data = JSON.parse(
|
|
193
|
+
readFileSync(join(PROMPT_TEMPLATE_FIXTURES_DIR, file), 'utf8'),
|
|
194
|
+
);
|
|
195
|
+
const ok = validate(data);
|
|
196
|
+
const errors = (validate.errors ?? [])
|
|
197
|
+
.map((e: ErrorObject) => `${e.instancePath || '/'}: ${e.message}`)
|
|
198
|
+
.join('\n');
|
|
199
|
+
expect(
|
|
200
|
+
ok,
|
|
201
|
+
`Fixture prompt-templates/${file} fails prompt-template schema:\n${errors}`,
|
|
202
|
+
).toBe(true);
|
|
203
|
+
});
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
it('every fixture templateId matches its filename', () => {
|
|
207
|
+
// Filename convention: `<templateId-dot-form-with-dots-as-dashes>.json`.
|
|
208
|
+
// The fixture set uses dot-prefixed templateIds (e.g.,
|
|
209
|
+
// `conformance.prompt.writer-system`) which map directly to filenames
|
|
210
|
+
// with dots preserved (`conformance-prompt-writer-system.json`). The
|
|
211
|
+
// file→id mapping is loose (the suite doesn't enforce it) but we
|
|
212
|
+
// assert templateId presence so each fixture is self-describing.
|
|
213
|
+
for (const file of files) {
|
|
214
|
+
const data = JSON.parse(
|
|
215
|
+
readFileSync(join(PROMPT_TEMPLATE_FIXTURES_DIR, file), 'utf8'),
|
|
216
|
+
) as { templateId: string };
|
|
217
|
+
expect(
|
|
218
|
+
typeof data.templateId,
|
|
219
|
+
`Fixture prompt-templates/${file} MUST declare a templateId`,
|
|
220
|
+
).toBe('string');
|
|
221
|
+
expect(data.templateId.length).toBeGreaterThan(0);
|
|
222
|
+
}
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
it('every secret-source variable lives in a fixture tagged for the secret-redaction scenario', () => {
|
|
226
|
+
// SECURITY regression pin: a fixture that declares a `secret`-source
|
|
227
|
+
// variable but isn't visible to the prompt-composed-secret-redaction
|
|
228
|
+
// scenario could mask a redaction failure. We require every
|
|
229
|
+
// fixture carrying secret-source variables to advertise the
|
|
230
|
+
// `secret-redaction` tag so the scenario discovers it.
|
|
231
|
+
for (const file of files) {
|
|
232
|
+
const data = JSON.parse(
|
|
233
|
+
readFileSync(join(PROMPT_TEMPLATE_FIXTURES_DIR, file), 'utf8'),
|
|
234
|
+
) as {
|
|
235
|
+
templateId: string;
|
|
236
|
+
variables?: Array<{ name: string; source?: string }>;
|
|
237
|
+
tags?: string[];
|
|
238
|
+
};
|
|
239
|
+
const hasSecretSource = (data.variables ?? []).some((v) => v.source === 'secret');
|
|
240
|
+
if (hasSecretSource) {
|
|
241
|
+
expect(
|
|
242
|
+
(data.tags ?? []).includes('secret-redaction'),
|
|
243
|
+
`Fixture prompt-templates/${file} declares a secret-source variable but lacks the 'secret-redaction' tag`,
|
|
244
|
+
).toBe(true);
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
});
|
|
248
|
+
});
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* kv-ttl-expiry — RFC 0015 advertisement-shape verification + behavioral
|
|
2
|
+
* kv-ttl-expiry — RFC 0015 advertisement-shape verification + behavioral roundtrip.
|
|
3
3
|
*
|
|
4
|
-
* Status: ACTIVE (advertisement-shape). RFC 0015 promoted to
|
|
5
|
-
* 2026-05-17. The matching `capabilities.kvStorage` block has
|
|
6
|
-
* `schemas/capabilities.schema.json`. This scenario asserts the
|
|
7
|
-
* shape against any host that boots the conformance suite, and
|
|
8
|
-
*
|
|
9
|
-
*
|
|
4
|
+
* Status: ACTIVE (advertisement-shape + behavioral). RFC 0015 promoted to
|
|
5
|
+
* `Active` 2026-05-17. The matching `capabilities.kvStorage` block has
|
|
6
|
+
* landed in `schemas/capabilities.schema.json`. This scenario asserts the
|
|
7
|
+
* advertisement shape against any host that boots the conformance suite, and
|
|
8
|
+
* exercises the behavioral surface through the `/v1/host/sample/test/surface`
|
|
9
|
+
* seam (soft-skip with HTTP 404 on hosts that don't expose it).
|
|
10
10
|
*
|
|
11
11
|
* Summary: TTL honored with at most a 1-second drift on expiry visibility.
|
|
12
12
|
*
|
|
@@ -42,6 +42,37 @@ describe('kv-ttl-expiry: advertisement shape (RFC 0015)', () => {
|
|
|
42
42
|
});
|
|
43
43
|
});
|
|
44
44
|
|
|
45
|
-
|
|
46
|
-
|
|
45
|
+
async function call(op: string, args: Record<string, unknown>) {
|
|
46
|
+
return driver.post('/v1/host/sample/test/surface', { tenantId: 'tenant-a', surface: 'kv', op, args });
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
describe('kv-ttl-expiry: behavioral (RFC 0015 §B point 3 — 1s TTL drift)', () => {
|
|
50
|
+
it('set with ttlSeconds=2 → get before expiry returns value; get after expiry returns found:false', async () => {
|
|
51
|
+
const probe = await call('get', { key: '__ttl-probe__' });
|
|
52
|
+
if (probe.status === 404) return; // host doesn't expose the seam
|
|
53
|
+
const key = `ttl-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
54
|
+
const setRes = await call('set', { key, value: 'expires-soon', ttlSeconds: 2 });
|
|
55
|
+
expect(setRes.status).toBe(200);
|
|
56
|
+
|
|
57
|
+
// Read within the window
|
|
58
|
+
const within = await call('get', { key });
|
|
59
|
+
expect(within.status).toBe(200);
|
|
60
|
+
const withinBody = within.json as { value?: unknown; found?: boolean };
|
|
61
|
+
expect(
|
|
62
|
+
withinBody.value,
|
|
63
|
+
driver.describe('RFC 0015 §B point 3', 'get within TTL window MUST return the stored value'),
|
|
64
|
+
).toBe('expires-soon');
|
|
65
|
+
expect(withinBody.found).toBe(true);
|
|
66
|
+
|
|
67
|
+
// Wait past expiry (2s TTL + 1s drift allowance per RFC 0015 §B point 3)
|
|
68
|
+
await new Promise((r) => setTimeout(r, 3000));
|
|
69
|
+
|
|
70
|
+
const after = await call('get', { key });
|
|
71
|
+
expect(after.status).toBe(200);
|
|
72
|
+
const afterBody = after.json as { value?: unknown; found?: boolean };
|
|
73
|
+
expect(
|
|
74
|
+
afterBody.found,
|
|
75
|
+
driver.describe('RFC 0015 §B point 3', 'get after TTL expiry MUST surface as found:false (≤1s drift)'),
|
|
76
|
+
).toBe(false);
|
|
77
|
+
});
|
|
47
78
|
});
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* model-capability-insufficient — RFC 0031 §B step 4 + §D runtime behavior.
|
|
3
|
+
*
|
|
4
|
+
* Capability-gated on `capabilities.modelCapabilities.supported: true`.
|
|
5
|
+
* Drives the host's `POST /v1/host/sample/test/evaluate-model-capability-gate`
|
|
6
|
+
* seam through the refusal branches of the §B 4-step dispatch flow.
|
|
7
|
+
*
|
|
8
|
+
* @see RFCS/0031-envelope-variants-and-model-capabilities.md §B step 4 + §D
|
|
9
|
+
* @see schemas/run-event-payloads.schema.json §modelCapabilityInsufficient
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { describe, it, expect } from 'vitest';
|
|
13
|
+
import { driver } from '../lib/driver.js';
|
|
14
|
+
|
|
15
|
+
const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
|
|
16
|
+
|
|
17
|
+
interface GateResponse {
|
|
18
|
+
outcome?: {
|
|
19
|
+
route?: 'dispatch' | 'substitute' | 'refuse';
|
|
20
|
+
missingCapabilities?: string[];
|
|
21
|
+
fallbackAttempted?: boolean;
|
|
22
|
+
};
|
|
23
|
+
event?: { type?: string; payload?: Record<string, unknown> } | null;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
async function evaluateGate(input: Record<string, unknown>): Promise<{ status: number; body: GateResponse }> {
|
|
27
|
+
const res = await driver.post('/v1/host/sample/test/evaluate-model-capability-gate', input);
|
|
28
|
+
return { status: res.status, body: res.json as GateResponse };
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
describe.skipIf(HTTP_SKIP)('model-capability-insufficient: dispatch refusal (RFC 0031 §B step 4 + §D)', () => {
|
|
32
|
+
it('unmet + NO fallbackModel declared → refuse with fallbackAttempted: false', async () => {
|
|
33
|
+
const r = await evaluateGate({
|
|
34
|
+
module: { requiredModelCapabilities: ['structured-output', 'reasoning'] },
|
|
35
|
+
// no fallbackModel
|
|
36
|
+
activeProvider: 'unknown-vendor',
|
|
37
|
+
activeModel: 'unknown-model',
|
|
38
|
+
substitutionSupported: true,
|
|
39
|
+
supportedProviders: ['unknown-vendor'],
|
|
40
|
+
nodeId: 'editor-node',
|
|
41
|
+
});
|
|
42
|
+
if (r.status === 404) return;
|
|
43
|
+
expect(
|
|
44
|
+
r.body.outcome?.route,
|
|
45
|
+
driver.describe(
|
|
46
|
+
'RFCS/0031-envelope-variants-and-model-capabilities.md §B step 4',
|
|
47
|
+
'unmet capability + no fallbackModel declared → host MUST refuse',
|
|
48
|
+
),
|
|
49
|
+
).toBe('refuse');
|
|
50
|
+
expect(
|
|
51
|
+
r.body.outcome?.fallbackAttempted,
|
|
52
|
+
driver.describe(
|
|
53
|
+
'schemas/run-event-payloads.schema.json §modelCapabilityInsufficient',
|
|
54
|
+
'fallbackAttempted MUST be false when no fallbackModel was declared on the NodeModule',
|
|
55
|
+
),
|
|
56
|
+
).toBe(false);
|
|
57
|
+
expect(
|
|
58
|
+
r.body.event?.type,
|
|
59
|
+
driver.describe(
|
|
60
|
+
'RFCS/0031-envelope-variants-and-model-capabilities.md §D',
|
|
61
|
+
'refuse path MUST emit `model.capability.insufficient` BEFORE the node failure',
|
|
62
|
+
),
|
|
63
|
+
).toBe('model.capability.insufficient');
|
|
64
|
+
const payload = (r.body.event?.payload ?? {}) as Record<string, unknown>;
|
|
65
|
+
expect(payload.nodeId).toBe('editor-node');
|
|
66
|
+
expect(payload.provider).toBe('unknown-vendor');
|
|
67
|
+
expect(payload.fallbackAttempted).toBe(false);
|
|
68
|
+
expect(Array.isArray(payload.missingCapabilities)).toBe(true);
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
it('unmet + fallback declared but provider NOT in supportedProviders → refuse with fallbackAttempted: true', async () => {
|
|
72
|
+
const r = await evaluateGate({
|
|
73
|
+
module: {
|
|
74
|
+
requiredModelCapabilities: ['structured-output'],
|
|
75
|
+
fallbackModel: { provider: 'unauthenticated-vendor', model: 'foo' },
|
|
76
|
+
},
|
|
77
|
+
activeProvider: 'unknown-vendor',
|
|
78
|
+
activeModel: 'unknown-model',
|
|
79
|
+
substitutionSupported: true,
|
|
80
|
+
// Fallback's provider is NOT in supportedProviders — host cannot
|
|
81
|
+
// authenticate per RFC 0031 §B step 3 final clause.
|
|
82
|
+
supportedProviders: ['anthropic', 'unknown-vendor'],
|
|
83
|
+
});
|
|
84
|
+
if (r.status === 404) return;
|
|
85
|
+
expect(r.body.outcome?.route).toBe('refuse');
|
|
86
|
+
expect(
|
|
87
|
+
r.body.outcome?.fallbackAttempted,
|
|
88
|
+
driver.describe(
|
|
89
|
+
'RFCS/0031-envelope-variants-and-model-capabilities.md §B step 3',
|
|
90
|
+
'fallback provider NOT in capabilities.aiProviders.supported[] → host cannot authenticate → fallbackAttempted MUST be true (the attempt failed at credential resolution)',
|
|
91
|
+
),
|
|
92
|
+
).toBe(true);
|
|
93
|
+
expect(r.body.event?.type).toBe('model.capability.insufficient');
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
it('unmet + substitutionSupported: false (host posture) → refuse with fallbackAttempted: false', async () => {
|
|
97
|
+
const r = await evaluateGate({
|
|
98
|
+
module: {
|
|
99
|
+
requiredModelCapabilities: ['structured-output'],
|
|
100
|
+
fallbackModel: { provider: 'anthropic', model: 'claude-opus-4-7' },
|
|
101
|
+
},
|
|
102
|
+
activeProvider: 'unknown-vendor',
|
|
103
|
+
activeModel: 'unknown-model',
|
|
104
|
+
substitutionSupported: false,
|
|
105
|
+
supportedProviders: ['anthropic', 'unknown-vendor'],
|
|
106
|
+
});
|
|
107
|
+
if (r.status === 404) return;
|
|
108
|
+
expect(r.body.outcome?.route).toBe('refuse');
|
|
109
|
+
expect(
|
|
110
|
+
r.body.outcome?.fallbackAttempted,
|
|
111
|
+
driver.describe(
|
|
112
|
+
'RFCS/0031-envelope-variants-and-model-capabilities.md §E',
|
|
113
|
+
'capabilities.modelCapabilities.substitutionSupported: false → host MUST NOT attempt fallback even when NodeModule.fallbackModel is declared → fallbackAttempted MUST be false (no attempt was made)',
|
|
114
|
+
),
|
|
115
|
+
).toBe(false);
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
it('recursive fallback NOT permitted — fallback that itself fails capability check → refuse with fallbackAttempted: true', async () => {
|
|
119
|
+
// Construct a scenario where fallback's provider is in supportedProviders
|
|
120
|
+
// BUT the fallback provider itself doesn't advertise the required capability.
|
|
121
|
+
// The probe map's 'unknown-vendor-2' has empty capabilities; the gate
|
|
122
|
+
// refuses with fallbackAttempted: true (RFC 0031 §"Unresolved questions" #3).
|
|
123
|
+
const r = await evaluateGate({
|
|
124
|
+
module: {
|
|
125
|
+
requiredModelCapabilities: ['structured-output'],
|
|
126
|
+
fallbackModel: { provider: 'unknown-vendor-2', model: 'fallback-model' },
|
|
127
|
+
},
|
|
128
|
+
activeProvider: 'unknown-vendor',
|
|
129
|
+
activeModel: 'unknown-model',
|
|
130
|
+
substitutionSupported: true,
|
|
131
|
+
supportedProviders: ['unknown-vendor', 'unknown-vendor-2'],
|
|
132
|
+
});
|
|
133
|
+
if (r.status === 404) return;
|
|
134
|
+
expect(r.body.outcome?.route).toBe('refuse');
|
|
135
|
+
expect(
|
|
136
|
+
r.body.outcome?.fallbackAttempted,
|
|
137
|
+
driver.describe(
|
|
138
|
+
'RFCS/0031-envelope-variants-and-model-capabilities.md §"Unresolved questions" #3',
|
|
139
|
+
'recursive fallback NOT permitted — when the declared fallback model itself fails the capability check, host MUST refuse with fallbackAttempted: true (NOT chain to another fallback)',
|
|
140
|
+
),
|
|
141
|
+
).toBe(true);
|
|
142
|
+
});
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
// End-to-end pipeline: a fixture-declared workflow whose only node carries a
|
|
146
|
+
// NodeModule with `requiredModelCapabilities: ['nonexistent-capability-9b3f']`
|
|
147
|
+
// (registered as `conformance.modelCapability.insufficient` on the reference
|
|
148
|
+
// host). The executor's model-capability gate at dispatch time refuses with
|
|
149
|
+
// `capability_not_provided` AND emits `model.capability.insufficient` into
|
|
150
|
+
// the run event log per RFC 0031 §D. Capability-gated AND fixture-gated:
|
|
151
|
+
// soft-skips when either is absent.
|
|
152
|
+
|
|
153
|
+
import { pollUntilTerminal } from '../lib/polling.js';
|
|
154
|
+
import { isFixtureAdvertised } from '../lib/fixtures.js';
|
|
155
|
+
|
|
156
|
+
const E2E_FIXTURE = 'conformance-model-capability-insufficient';
|
|
157
|
+
|
|
158
|
+
describe.skipIf(HTTP_SKIP)('model-capability-insufficient: end-to-end refusal through executor', () => {
|
|
159
|
+
it('workflow with a node declaring requiredModelCapabilities the active provider does not satisfy fails with RunSnapshot.error.code = "capability_not_provided" AND emits model.capability.insufficient into the run event log BEFORE node.failed', async () => {
|
|
160
|
+
if (!isFixtureAdvertised(E2E_FIXTURE)) return; // fixture not seeded — soft-skip
|
|
161
|
+
|
|
162
|
+
const create = await driver.post('/v1/runs', { workflowId: E2E_FIXTURE });
|
|
163
|
+
expect(create.status).toBe(201);
|
|
164
|
+
const runId = (create.json as { runId: string }).runId;
|
|
165
|
+
|
|
166
|
+
const terminal = await pollUntilTerminal(runId, { timeoutMs: 10_000 });
|
|
167
|
+
expect(terminal.status).toBe('failed');
|
|
168
|
+
expect(
|
|
169
|
+
(terminal as { error?: { code?: string } }).error?.code,
|
|
170
|
+
driver.describe(
|
|
171
|
+
'RFCS/0031-envelope-variants-and-model-capabilities.md §B step 4',
|
|
172
|
+
'unmet capability without viable fallback MUST fail with error.code = "capability_not_provided"',
|
|
173
|
+
),
|
|
174
|
+
).toBe('capability_not_provided');
|
|
175
|
+
|
|
176
|
+
const eventsRes = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
|
|
177
|
+
expect(eventsRes.status).toBe(200);
|
|
178
|
+
const events = ((eventsRes.json as { events?: Array<{ type: string }> } | undefined)?.events ?? []);
|
|
179
|
+
const insufficientIdx = events.findIndex((e) => e.type === 'model.capability.insufficient');
|
|
180
|
+
const nodeFailedIdx = events.findIndex((e) => e.type === 'node.failed');
|
|
181
|
+
expect(insufficientIdx, 'model.capability.insufficient MUST appear in the event log').toBeGreaterThanOrEqual(0);
|
|
182
|
+
expect(nodeFailedIdx, 'node.failed MUST appear in the event log').toBeGreaterThanOrEqual(0);
|
|
183
|
+
expect(
|
|
184
|
+
insufficientIdx < nodeFailedIdx,
|
|
185
|
+
driver.describe(
|
|
186
|
+
'RFCS/0031-envelope-variants-and-model-capabilities.md §D',
|
|
187
|
+
'model.capability.insufficient MUST be emitted BEFORE node.failed (cause precedes effect)',
|
|
188
|
+
),
|
|
189
|
+
).toBe(true);
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
it('NO envelope emission occurs after the refusal (no node.completed, provider.usage, or envelope-reliability events)', async () => {
|
|
193
|
+
if (!isFixtureAdvertised(E2E_FIXTURE)) return; // fixture not seeded — soft-skip
|
|
194
|
+
|
|
195
|
+
const create = await driver.post('/v1/runs', { workflowId: E2E_FIXTURE });
|
|
196
|
+
expect(create.status).toBe(201);
|
|
197
|
+
const runId = (create.json as { runId: string }).runId;
|
|
198
|
+
await pollUntilTerminal(runId, { timeoutMs: 10_000 });
|
|
199
|
+
|
|
200
|
+
const eventsRes = await driver.get(`/v1/runs/${encodeURIComponent(runId)}/events`);
|
|
201
|
+
const events = ((eventsRes.json as { events?: Array<{ type: string }> } | undefined)?.events ?? []);
|
|
202
|
+
const forbidden = [
|
|
203
|
+
'node.completed',
|
|
204
|
+
'provider.usage',
|
|
205
|
+
'envelope.retry.attempted',
|
|
206
|
+
'envelope.retry.exhausted',
|
|
207
|
+
'envelope.refusal',
|
|
208
|
+
'envelope.truncated',
|
|
209
|
+
'envelope.nlToFormat.engaged',
|
|
210
|
+
'envelope.recovery.applied',
|
|
211
|
+
];
|
|
212
|
+
const leaked = events.filter((e) => forbidden.includes(e.type)).map((e) => e.type);
|
|
213
|
+
expect(
|
|
214
|
+
leaked,
|
|
215
|
+
driver.describe(
|
|
216
|
+
'RFCS/0031-envelope-variants-and-model-capabilities.md §B step 4',
|
|
217
|
+
'a refused dispatch MUST NOT emit any downstream envelope-emission events — the node never ran',
|
|
218
|
+
),
|
|
219
|
+
).toEqual([]);
|
|
220
|
+
});
|
|
221
|
+
});
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* model-capability-substituted — RFC 0031 §B step 3 + §D + §F runtime behavior.
|
|
3
|
+
*
|
|
4
|
+
* Capability-gated on `capabilities.modelCapabilities.supported: true`.
|
|
5
|
+
*
|
|
6
|
+
* Drives the host's `POST /v1/host/sample/test/evaluate-model-capability-gate`
|
|
7
|
+
* seam with synthetic inputs that hit each branch of the §B 4-step dispatch
|
|
8
|
+
* flow. The seam runs the pure `evaluateModelCapabilityGate()` evaluator
|
|
9
|
+
* and returns both the routing outcome AND the event payload the host
|
|
10
|
+
* would emit. Conformance asserts the decision-matrix + the event payload
|
|
11
|
+
* shape per RFC 0031 §D `modelCapabilitySubstituted`.
|
|
12
|
+
*
|
|
13
|
+
* @see RFCS/0031-envelope-variants-and-model-capabilities.md §B + §D + §F
|
|
14
|
+
* @see spec/v1/host-capabilities.md §"Model-capability declarations"
|
|
15
|
+
* @see schemas/run-event-payloads.schema.json §modelCapabilitySubstituted
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import { describe, it, expect } from 'vitest';
|
|
19
|
+
import { driver } from '../lib/driver.js';
|
|
20
|
+
|
|
21
|
+
const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
|
|
22
|
+
|
|
23
|
+
interface DiscoveryDoc {
|
|
24
|
+
capabilities?: {
|
|
25
|
+
modelCapabilities?: {
|
|
26
|
+
supported?: unknown;
|
|
27
|
+
substitutionSupported?: unknown;
|
|
28
|
+
advertised?: unknown;
|
|
29
|
+
};
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
interface GateOutcome {
|
|
34
|
+
route?: 'dispatch' | 'substitute' | 'refuse';
|
|
35
|
+
originalProvider?: string;
|
|
36
|
+
originalModel?: string;
|
|
37
|
+
fallbackProvider?: string;
|
|
38
|
+
fallbackModel?: string;
|
|
39
|
+
missingCapabilities?: string[];
|
|
40
|
+
fallbackAttempted?: boolean;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
interface GateResponse {
|
|
44
|
+
outcome?: GateOutcome;
|
|
45
|
+
event?: { type?: string; payload?: Record<string, unknown> } | null;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
async function readDiscovery(): Promise<DiscoveryDoc | null> {
|
|
49
|
+
try {
|
|
50
|
+
const res = await driver.get('/.well-known/openwop');
|
|
51
|
+
if (res.status !== 200) return null;
|
|
52
|
+
return res.json as DiscoveryDoc;
|
|
53
|
+
} catch {
|
|
54
|
+
return null;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
async function evaluateGate(input: Record<string, unknown>): Promise<{ status: number; body: GateResponse }> {
|
|
59
|
+
const res = await driver.post('/v1/host/sample/test/evaluate-model-capability-gate', input);
|
|
60
|
+
return { status: res.status, body: res.json as GateResponse };
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
describe.skipIf(HTTP_SKIP)('model-capability-substituted: advertisement shape (RFC 0031 §E)', () => {
|
|
64
|
+
it('capabilities.modelCapabilities (when present) conforms to RFC 0031 §E', async () => {
|
|
65
|
+
const d = await readDiscovery();
|
|
66
|
+
if (d === null) return;
|
|
67
|
+
const mc = d.capabilities?.modelCapabilities;
|
|
68
|
+
if (mc === undefined) return;
|
|
69
|
+
expect(
|
|
70
|
+
typeof mc.supported,
|
|
71
|
+
driver.describe(
|
|
72
|
+
'schemas/capabilities.schema.json §modelCapabilities',
|
|
73
|
+
'capabilities.modelCapabilities.supported MUST be boolean when the block is advertised',
|
|
74
|
+
),
|
|
75
|
+
).toBe('boolean');
|
|
76
|
+
if (mc.advertised !== undefined) {
|
|
77
|
+
expect(
|
|
78
|
+
Array.isArray(mc.advertised),
|
|
79
|
+
driver.describe('RFCS/0031-envelope-variants-and-model-capabilities.md §E', 'modelCapabilities.advertised MUST be an array of capability identifiers'),
|
|
80
|
+
).toBe(true);
|
|
81
|
+
const SPEC_RESERVED = ['structured-output', 'discriminator-enum', 'long-context', 'reasoning', 'function-calling'];
|
|
82
|
+
for (const id of mc.advertised as unknown[]) {
|
|
83
|
+
expect(typeof id, 'each advertised identifier MUST be a string').toBe('string');
|
|
84
|
+
const idStr = String(id);
|
|
85
|
+
const isReserved = SPEC_RESERVED.includes(idStr);
|
|
86
|
+
const isHostExt = /^x-host-[a-z][a-z0-9-]*-[a-z][a-z0-9-]*$/.test(idStr);
|
|
87
|
+
expect(
|
|
88
|
+
isReserved || isHostExt,
|
|
89
|
+
driver.describe(
|
|
90
|
+
'RFCS/0031-envelope-variants-and-model-capabilities.md §C',
|
|
91
|
+
`advertised identifier "${idStr}" MUST be spec-reserved (structured-output, discriminator-enum, long-context, reasoning, function-calling) or match the x-host-<host>-<key> extension pattern`,
|
|
92
|
+
),
|
|
93
|
+
).toBe(true);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
if (mc.substitutionSupported !== undefined) {
|
|
97
|
+
expect(typeof mc.substitutionSupported, 'substitutionSupported MUST be boolean').toBe('boolean');
|
|
98
|
+
}
|
|
99
|
+
});
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
describe.skipIf(HTTP_SKIP)('model-capability-substituted: dispatch behavior (RFC 0031 §B step 3 + §D)', () => {
|
|
103
|
+
it('all required capabilities met → outcome: dispatch (no event emitted)', async () => {
|
|
104
|
+
const r = await evaluateGate({
|
|
105
|
+
module: { requiredModelCapabilities: ['structured-output', 'function-calling'] },
|
|
106
|
+
activeProvider: 'anthropic',
|
|
107
|
+
activeModel: 'claude-3-5-sonnet',
|
|
108
|
+
substitutionSupported: true,
|
|
109
|
+
supportedProviders: ['anthropic', 'openai'],
|
|
110
|
+
});
|
|
111
|
+
if (r.status === 404) return; // host doesn't expose the seam
|
|
112
|
+
expect(
|
|
113
|
+
r.body.outcome?.route,
|
|
114
|
+
driver.describe(
|
|
115
|
+
'RFCS/0031-envelope-variants-and-model-capabilities.md §B step 2',
|
|
116
|
+
'all required model capabilities met → route MUST be "dispatch" (gate is a no-op)',
|
|
117
|
+
),
|
|
118
|
+
).toBe('dispatch');
|
|
119
|
+
expect(r.body.event, 'no event emitted when gate is a no-op').toBeNull();
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
it('unmet + fallback declared + authenticatable → outcome: substitute + event with originalProvider/originalModel/fallbackProvider/fallbackModel/missingCapabilities', async () => {
|
|
123
|
+
const r = await evaluateGate({
|
|
124
|
+
module: {
|
|
125
|
+
requiredModelCapabilities: ['structured-output', 'long-context'],
|
|
126
|
+
fallbackModel: { provider: 'anthropic', model: 'claude-opus-4-7' },
|
|
127
|
+
},
|
|
128
|
+
// Simulate an active provider that doesn't advertise long-context.
|
|
129
|
+
// The seam's probe map returns the spec-known capability set for
|
|
130
|
+
// known providers; we use an unknown provider id here so the gate
|
|
131
|
+
// sees an empty advertised set and refuses to substitute (no — wait,
|
|
132
|
+
// we declare a fallback that IS in supportedProviders, so the gate
|
|
133
|
+
// substitutes). Use 'unknown-vendor' as the original provider and
|
|
134
|
+
// 'anthropic' as the fallback (which IS in the host's known
|
|
135
|
+
// providers and advertises structured-output + long-context).
|
|
136
|
+
activeProvider: 'unknown-vendor',
|
|
137
|
+
activeModel: 'unknown-model',
|
|
138
|
+
substitutionSupported: true,
|
|
139
|
+
supportedProviders: ['anthropic', 'openai', 'unknown-vendor'],
|
|
140
|
+
nodeId: 'writer-node',
|
|
141
|
+
});
|
|
142
|
+
if (r.status === 404) return;
|
|
143
|
+
expect(
|
|
144
|
+
r.body.outcome?.route,
|
|
145
|
+
driver.describe(
|
|
146
|
+
'RFCS/0031-envelope-variants-and-model-capabilities.md §B step 3',
|
|
147
|
+
'unmet capability + declared fallback + fallback provider authenticatable → route MUST be "substitute"',
|
|
148
|
+
),
|
|
149
|
+
).toBe('substitute');
|
|
150
|
+
expect(
|
|
151
|
+
r.body.event?.type,
|
|
152
|
+
driver.describe(
|
|
153
|
+
'RFCS/0031-envelope-variants-and-model-capabilities.md §D',
|
|
154
|
+
'substitute path MUST emit `model.capability.substituted`',
|
|
155
|
+
),
|
|
156
|
+
).toBe('model.capability.substituted');
|
|
157
|
+
const payload = (r.body.event?.payload ?? {}) as Record<string, unknown>;
|
|
158
|
+
expect(payload.nodeId, 'payload.nodeId MUST mirror the request').toBe('writer-node');
|
|
159
|
+
expect(payload.originalProvider).toBe('unknown-vendor');
|
|
160
|
+
expect(payload.originalModel).toBe('unknown-model');
|
|
161
|
+
expect(payload.fallbackProvider).toBe('anthropic');
|
|
162
|
+
expect(payload.fallbackModel).toBe('claude-opus-4-7');
|
|
163
|
+
expect(
|
|
164
|
+
Array.isArray(payload.missingCapabilities) &&
|
|
165
|
+
(payload.missingCapabilities as string[]).includes('structured-output'),
|
|
166
|
+
driver.describe(
|
|
167
|
+
'schemas/run-event-payloads.schema.json §modelCapabilitySubstituted',
|
|
168
|
+
'missingCapabilities[] MUST include the subset of required capabilities the active model did not satisfy',
|
|
169
|
+
),
|
|
170
|
+
).toBe(true);
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
it('unmet + substitutionSupported: false → outcome: refuse with fallbackAttempted: false (host posture override)', async () => {
|
|
174
|
+
const r = await evaluateGate({
|
|
175
|
+
module: {
|
|
176
|
+
requiredModelCapabilities: ['structured-output'],
|
|
177
|
+
// Fallback declared but the gate refuses BEFORE attempting because the
|
|
178
|
+
// host's posture is "no substitution" per RFC 0031 §E.
|
|
179
|
+
fallbackModel: { provider: 'anthropic', model: 'claude-opus-4-7' },
|
|
180
|
+
},
|
|
181
|
+
activeProvider: 'unknown-vendor',
|
|
182
|
+
activeModel: 'unknown-model',
|
|
183
|
+
substitutionSupported: false,
|
|
184
|
+
supportedProviders: ['anthropic', 'unknown-vendor'],
|
|
185
|
+
});
|
|
186
|
+
if (r.status === 404) return;
|
|
187
|
+
expect(
|
|
188
|
+
r.body.outcome?.route,
|
|
189
|
+
driver.describe(
|
|
190
|
+
'RFCS/0031-envelope-variants-and-model-capabilities.md §E',
|
|
191
|
+
'capabilities.modelCapabilities.substitutionSupported: false → host MUST refuse on any unmet capability even when NodeModule.fallbackModel is declared',
|
|
192
|
+
),
|
|
193
|
+
).toBe('refuse');
|
|
194
|
+
expect(r.body.event?.type).toBe('model.capability.insufficient');
|
|
195
|
+
expect(
|
|
196
|
+
(r.body.event?.payload as { fallbackAttempted?: boolean }).fallbackAttempted,
|
|
197
|
+
driver.describe(
|
|
198
|
+
'schemas/run-event-payloads.schema.json §modelCapabilityInsufficient',
|
|
199
|
+
'fallbackAttempted MUST be false when the refusal is driven by substitutionSupported: false (host posture, not fallback failure)',
|
|
200
|
+
),
|
|
201
|
+
).toBe(false);
|
|
202
|
+
});
|
|
203
|
+
});
|