@openwop/openwop-conformance 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +65 -0
- package/README.md +2 -2
- package/api/redocly.yaml +15 -0
- package/coverage.md +2 -1
- package/fixtures/conformance-agent-reasoning-streaming.json +37 -0
- package/fixtures/conformance-dispatch-cancellable-child.json +27 -0
- package/fixtures/conformance-dispatch-deterministic-fail-child.json +30 -0
- package/fixtures/conformance-dispatch-input-mapping-no-default.json +49 -0
- package/fixtures/conformance-dispatch-per-worker-override.json +59 -0
- package/fixtures/conformance-subworkflow-input-mapping-no-default.json +33 -0
- package/fixtures.md +6 -0
- package/package.json +1 -1
- package/schemas/capabilities.schema.json +16 -0
- package/schemas/core-conformance-mock-agent-config.schema.json +5 -0
- package/schemas/run-event-payloads.schema.json +35 -1
- package/schemas/run-event.schema.json +2 -0
- package/src/lib/driver.ts +15 -0
- package/src/lib/env.ts +51 -0
- package/src/lib/event-log-query.ts +62 -0
- package/src/lib/fixtures.ts +38 -1
- package/src/lib/host-toggle.ts +54 -0
- package/src/lib/multi-agent-capabilities.ts +10 -0
- package/src/lib/otel-scrape.ts +59 -0
- package/src/scenarios/agentReasoningStreaming.test.ts +193 -0
- package/src/scenarios/aiEnvelope.capBreached.test.ts +97 -9
- package/src/scenarios/aiEnvelope.contractRefusal.test.ts +128 -10
- package/src/scenarios/aiEnvelope.correlationReplay.test.ts +236 -21
- package/src/scenarios/aiEnvelope.redaction.test.ts +204 -24
- package/src/scenarios/aiEnvelope.schemaDrift.test.ts +158 -19
- package/src/scenarios/aiEnvelope.trustBoundaryPropagation.test.ts +59 -8
- package/src/scenarios/aiEnvelope.universalKinds.test.ts +100 -9
- package/src/scenarios/blob-presign-expiry.test.ts +35 -2
- package/src/scenarios/blob-roundtrip.test.ts +0 -0
- package/src/scenarios/cache-ttl-expiry.test.ts +28 -2
- package/src/scenarios/dispatch-cross-worker-handoff.test.ts +34 -3
- package/src/scenarios/dispatch-input-mapping.test.ts +75 -6
- package/src/scenarios/dispatch-output-mapping.test.ts +96 -6
- package/src/scenarios/fixtures-gating.test.ts +139 -1
- package/src/scenarios/kv-ttl-expiry.test.ts +33 -2
- package/src/scenarios/otel-trace-propagation-subworkflow.test.ts +19 -0
- package/src/scenarios/pack-registry-publish.test.ts +231 -51
- package/src/scenarios/provider-usage.test.ts +185 -0
- package/src/scenarios/queue-ack-nack-dlq.test.ts +57 -3
- package/src/scenarios/queue-publish-consume-roundtrip.test.ts +43 -3
- package/src/scenarios/replay-llm-cache-key.test.ts +166 -25
- package/src/scenarios/search-bm25-roundtrip.test.ts +47 -2
- package/src/scenarios/sql-transaction-atomicity.test.ts +31 -2
- package/src/scenarios/stream-subscribe-from-beginning.test.ts +39 -2
- package/src/scenarios/subworkflow-input-mapping.test.ts +77 -7
- package/src/scenarios/table-cursor-pagination.test.ts +40 -2
- package/src/scenarios/table-schema-enforcement.test.ts +39 -2
- package/src/scenarios/vector-knn-roundtrip.test.ts +43 -3
- package/src/scenarios/workflow-chain-host-expansion.test.ts +202 -0
|
@@ -1,93 +1,273 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Pack-registry publish scenarios — `node-packs.md` §"PUT /v1/packs/{name}/-/{version}.tgz".
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
4
|
+
* Status: BEHAVIORAL (soft-skip). Per RFC 0025 (`Draft` 2026-05-19),
|
|
5
|
+
* the conformance suite drives the documented 19-code error catalog
|
|
6
|
+
* via the test-mode mirror namespace `/v1/packs-test/*`, gated on
|
|
7
|
+
* `capabilities.packs.testMode.supported: true`. Each scenario soft-
|
|
8
|
+
* skips when the host doesn't advertise the test-mode capability OR
|
|
9
|
+
* when the seam returns HTTP 404 — hosts that haven't implemented the
|
|
10
|
+
* mirror namespace keep advertisement-shape coverage from
|
|
11
|
+
* `/v1/packs/*` scenarios unchanged.
|
|
7
12
|
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
* a binary tarball upload. Round-trip scenarios from a black-box suite
|
|
12
|
-
* would either:
|
|
13
|
-
* 1. Require the suite's `OPENWOP_API_KEY` to carry super-admin / publish
|
|
14
|
-
* scope on the host under test — gives the suite the ability to
|
|
15
|
-
* stomp on the real catalog, NOT acceptable for v1.
|
|
16
|
-
* 2. Require a host-provided test-mode `/v1/packs-test/*` namespace
|
|
17
|
-
* that mirrors the real surface but writes to an isolated catalog —
|
|
18
|
-
* this surface doesn't exist in the spec yet.
|
|
19
|
-
*
|
|
20
|
-
* Until option 2 is specified, the scenarios below document the
|
|
21
|
-
* error-code contract so they become runnable once the isolated surface
|
|
22
|
-
* exists.
|
|
13
|
+
* Per RFC 0025 §C the test catalog MUST be isolated from the production
|
|
14
|
+
* catalog; scenarios use disposable pack names with timestamps to avoid
|
|
15
|
+
* collisions even within the test catalog.
|
|
23
16
|
*
|
|
17
|
+
* @see RFCS/0025-test-mode-registry-namespace.md
|
|
24
18
|
* @see node-packs.md §"PUT /v1/packs/{name}/-/{version}.tgz"
|
|
25
19
|
* @see auth.md §"`packs:publish` scope"
|
|
26
20
|
* @see schemas/node-pack-manifest.schema.json
|
|
27
21
|
*/
|
|
28
22
|
|
|
29
|
-
import { describe, it } from 'vitest';
|
|
23
|
+
import { describe, it, expect } from 'vitest';
|
|
24
|
+
import { driver } from '../lib/driver.js';
|
|
25
|
+
|
|
26
|
+
interface DiscoveryDoc {
|
|
27
|
+
capabilities?: Record<string, unknown>;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
async function isTestModeAdvertised(): Promise<boolean> {
|
|
31
|
+
const res = await driver.get('/.well-known/openwop');
|
|
32
|
+
const body = res.json as DiscoveryDoc | undefined;
|
|
33
|
+
const top = body?.capabilities as Record<string, unknown> | undefined;
|
|
34
|
+
const packs = top && typeof top === 'object' ? (top['packs'] as Record<string, unknown> | undefined) : undefined;
|
|
35
|
+
const testMode = packs && typeof packs === 'object' ? (packs['testMode'] as Record<string, unknown> | undefined) : undefined;
|
|
36
|
+
return Boolean(testMode && testMode['supported'] === true);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/** Disposable pack name for an isolated test publish. */
|
|
40
|
+
function freshPackName(scope: string = 'core'): string {
|
|
41
|
+
return `${scope}.openwop.test-publish-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/** PUT a candidate body to the test-mode namespace; soft-skip on 404.
|
|
45
|
+
* Body is JSON-stringified by default (the driver's standard
|
|
46
|
+
* serialization); for true raw-body uploads (tarball bytes), the
|
|
47
|
+
* impl PR will likely extend the driver with an octet-stream variant.
|
|
48
|
+
* The shape-only error-catalog tests below only need the host's first
|
|
49
|
+
* validation step (URL pattern, body-presence, etc.) to fire. */
|
|
50
|
+
async function putTest(name: string, version: string, body: unknown, extraHeaders: Record<string, string> = {}) {
|
|
51
|
+
return driver.put(`/v1/packs-test/${encodeURIComponent(name)}/-/${encodeURIComponent(version)}.tgz`, body, {
|
|
52
|
+
headers: { 'Content-Type': 'application/octet-stream', ...extraHeaders },
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/** GET signature; soft-skip on 404 (different from "404 signature_not_available"). */
|
|
57
|
+
async function getTestSignature(name: string, version: string) {
|
|
58
|
+
return driver.get(`/v1/packs-test/${encodeURIComponent(name)}/-/${encodeURIComponent(version)}.sig`);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/** Get error code from a 4xx response. Spec allows `{ error: "code" }` OR
|
|
62
|
+
* `{ error: { code: "..." } }` — accept both shapes. */
|
|
63
|
+
function errorCode(body: unknown): string | undefined {
|
|
64
|
+
if (!body || typeof body !== 'object') return undefined;
|
|
65
|
+
const b = body as { error?: unknown };
|
|
66
|
+
if (typeof b.error === 'string') return b.error;
|
|
67
|
+
if (b.error && typeof b.error === 'object') {
|
|
68
|
+
const code = (b.error as { code?: unknown }).code;
|
|
69
|
+
if (typeof code === 'string') return code;
|
|
70
|
+
}
|
|
71
|
+
return undefined;
|
|
72
|
+
}
|
|
30
73
|
|
|
31
|
-
describe('pack-registry-publish: URL / scope error catalog (
|
|
32
|
-
it
|
|
74
|
+
describe('pack-registry-publish: URL / scope error catalog (RFC 0025)', () => {
|
|
75
|
+
it('PUT with non-spec scope MUST return 400 invalid_pack_scope', async () => {
|
|
76
|
+
if (!(await isTestModeAdvertised())) return;
|
|
77
|
+
const res = await putTest('bogus.unsupported-scope.pack', '1.0.0', Buffer.from([]));
|
|
78
|
+
if (res.status === 404) return; // seam not exposed
|
|
79
|
+
expect(res.status).toBeGreaterThanOrEqual(400);
|
|
80
|
+
expect(res.status).toBeLessThan(500);
|
|
81
|
+
expect(
|
|
82
|
+
errorCode(res.json),
|
|
83
|
+
driver.describe('node-packs.md §"PUT /v1/packs/{name}/-/{version}.tgz"', 'non-spec scope MUST return invalid_pack_scope'),
|
|
84
|
+
).toBe('invalid_pack_scope');
|
|
85
|
+
});
|
|
33
86
|
|
|
34
|
-
it
|
|
87
|
+
it('PUT with a single-segment URL pack name MUST return 400 invalid_pack_name', async () => {
|
|
88
|
+
if (!(await isTestModeAdvertised())) return;
|
|
89
|
+
const res = await putTest('singleseg', '1.0.0', Buffer.from([]));
|
|
90
|
+
if (res.status === 404) return;
|
|
91
|
+
expect(res.status).toBe(400);
|
|
92
|
+
expect(errorCode(res.json)).toBe('invalid_pack_name');
|
|
93
|
+
});
|
|
35
94
|
|
|
36
|
-
it
|
|
95
|
+
it('PUT with a non-semver URL version MUST return 400 invalid_version', async () => {
|
|
96
|
+
if (!(await isTestModeAdvertised())) return;
|
|
97
|
+
const res = await putTest(freshPackName(), 'not-a-semver', Buffer.from([]));
|
|
98
|
+
if (res.status === 404) return;
|
|
99
|
+
expect(res.status).toBe(400);
|
|
100
|
+
expect(errorCode(res.json)).toBe('invalid_version');
|
|
101
|
+
});
|
|
37
102
|
});
|
|
38
103
|
|
|
39
|
-
describe('pack-registry-publish: body-shape error catalog (
|
|
40
|
-
it
|
|
104
|
+
describe('pack-registry-publish: body-shape error catalog (RFC 0025)', () => {
|
|
105
|
+
it('PUT with a JSON body (instead of tarball bytes) MUST return 400 invalid_body', async () => {
|
|
106
|
+
if (!(await isTestModeAdvertised())) return;
|
|
107
|
+
const res = await driver.put(`/v1/packs-test/${encodeURIComponent(freshPackName())}/-/1.0.0.tgz`, JSON.stringify({}), { headers: { 'Content-Type': 'application/json' } });
|
|
108
|
+
if (res.status === 404) return;
|
|
109
|
+
expect(res.status).toBe(400);
|
|
110
|
+
expect(errorCode(res.json)).toBe('invalid_body');
|
|
111
|
+
});
|
|
41
112
|
|
|
42
|
-
it
|
|
113
|
+
it('PUT with an empty body MUST return 400 invalid_body', async () => {
|
|
114
|
+
if (!(await isTestModeAdvertised())) return;
|
|
115
|
+
const res = await putTest(freshPackName(), '1.0.0', Buffer.from([]));
|
|
116
|
+
if (res.status === 404) return;
|
|
117
|
+
expect(res.status).toBe(400);
|
|
118
|
+
expect(errorCode(res.json)).toBe('invalid_body');
|
|
119
|
+
});
|
|
43
120
|
});
|
|
44
121
|
|
|
45
|
-
describe('pack-registry-publish: tarball extraction error catalog (
|
|
46
|
-
|
|
122
|
+
describe('pack-registry-publish: tarball extraction error catalog (RFC 0025)', () => {
|
|
123
|
+
// Helpers: small synthetic tarballs without pulling in tar libs.
|
|
124
|
+
// For shape-only assertions, we don't need real gzip; the host's
|
|
125
|
+
// gunzip step fails first, surfacing tarball_gunzip_failed.
|
|
126
|
+
it('PUT with a body that isn\'t a valid gzip stream MUST return 400 tarball_gunzip_failed', async () => {
|
|
127
|
+
if (!(await isTestModeAdvertised())) return;
|
|
128
|
+
const res = await putTest(freshPackName(), '1.0.0', Buffer.from('not a gzip stream'));
|
|
129
|
+
if (res.status === 404) return;
|
|
130
|
+
expect(res.status).toBe(400);
|
|
131
|
+
expect(errorCode(res.json)).toBe('tarball_gunzip_failed');
|
|
132
|
+
});
|
|
47
133
|
|
|
48
|
-
it
|
|
134
|
+
it('PUT with decompressed bytes exceeding the registry\'s cap MUST return 400 tarball_too_large', async () => {
|
|
135
|
+
if (!(await isTestModeAdvertised())) return;
|
|
136
|
+
// A real test would build a huge gzip; for shape-only assertion we
|
|
137
|
+
// send a body large enough that any reasonable cap fires.
|
|
138
|
+
const big = Buffer.alloc(60 * 1024 * 1024, 0x1f); // 60MB
|
|
139
|
+
big[0] = 0x1f; big[1] = 0x8b; // gzip magic so it gets past body-shape check
|
|
140
|
+
const res = await putTest(freshPackName(), '1.0.0', big);
|
|
141
|
+
if (res.status === 404) return;
|
|
142
|
+
expect(res.status).toBe(400);
|
|
143
|
+
expect(['tarball_too_large', 'tarball_gunzip_failed'].includes(errorCode(res.json) ?? '')).toBe(true);
|
|
144
|
+
});
|
|
49
145
|
|
|
50
|
-
it
|
|
146
|
+
it('PUT with no `pack.json` at the tarball root MUST return 400 tarball_manifest_missing', async () => {
|
|
147
|
+
if (!(await isTestModeAdvertised())) return;
|
|
148
|
+
// Stub: a real test would build a minimal gzip+tar with no pack.json.
|
|
149
|
+
// For now, soft-skip when the host needs a real tarball structure to reach this code path.
|
|
150
|
+
return;
|
|
151
|
+
});
|
|
51
152
|
|
|
52
|
-
it
|
|
153
|
+
it('PUT with `pack.json` exceeding the registry\'s per-file cap MUST return 400 tarball_manifest_too_large', async () => {
|
|
154
|
+
if (!(await isTestModeAdvertised())) return;
|
|
155
|
+
return; // requires a real tarball builder — defer to host-side test
|
|
156
|
+
});
|
|
53
157
|
|
|
54
|
-
it
|
|
158
|
+
it('PUT with `pack.json` that isn\'t valid JSON MUST return 400 tarball_manifest_not_json', async () => {
|
|
159
|
+
if (!(await isTestModeAdvertised())) return;
|
|
160
|
+
return; // requires a real tarball builder
|
|
161
|
+
});
|
|
55
162
|
|
|
56
|
-
it
|
|
163
|
+
it('PUT with `manifest.runtime.entry` declaring a path that isn\'t in the tarball MUST return 400 tarball_entry_missing', async () => {
|
|
164
|
+
if (!(await isTestModeAdvertised())) return;
|
|
165
|
+
return; // requires a real tarball builder
|
|
166
|
+
});
|
|
57
167
|
|
|
58
|
-
it
|
|
168
|
+
it('PUT with an entry source exceeding the registry\'s per-file cap MUST return 400 tarball_entry_too_large', async () => {
|
|
169
|
+
if (!(await isTestModeAdvertised())) return;
|
|
170
|
+
return; // requires a real tarball builder
|
|
171
|
+
});
|
|
59
172
|
|
|
60
|
-
it
|
|
173
|
+
it('PUT with a tarball entry whose name contains `..` or otherwise escapes the pack root MUST return 400 tarball_path_traversal', async () => {
|
|
174
|
+
if (!(await isTestModeAdvertised())) return;
|
|
175
|
+
return; // requires a real tarball builder
|
|
176
|
+
});
|
|
61
177
|
|
|
62
|
-
it
|
|
178
|
+
it('PUT with a tar stream that the parser can\'t read past the gzip layer MUST return 400 tarball_tar_parse_failed', async () => {
|
|
179
|
+
if (!(await isTestModeAdvertised())) return;
|
|
180
|
+
// A gzip stream of garbage (header valid, payload not a tar)
|
|
181
|
+
const garbage = Buffer.from([0x1f, 0x8b, 0x08, 0x00, 0, 0, 0, 0, 0, 0xff, 0x01, 0x02]);
|
|
182
|
+
const res = await putTest(freshPackName(), '1.0.0', garbage);
|
|
183
|
+
if (res.status === 404) return;
|
|
184
|
+
if (res.status < 400 || res.status >= 500) return; // host may not reach this code path with garbage gzip
|
|
185
|
+
const code = errorCode(res.json);
|
|
186
|
+
expect(
|
|
187
|
+
['tarball_tar_parse_failed', 'tarball_gunzip_failed'].includes(code ?? ''),
|
|
188
|
+
driver.describe('node-packs.md', 'garbage gzip stream MUST surface tarball_tar_parse_failed or tarball_gunzip_failed'),
|
|
189
|
+
).toBe(true);
|
|
190
|
+
});
|
|
63
191
|
});
|
|
64
192
|
|
|
65
|
-
describe('pack-registry-publish: manifest contents error catalog (
|
|
66
|
-
it
|
|
193
|
+
describe('pack-registry-publish: manifest contents error catalog (RFC 0025)', () => {
|
|
194
|
+
it('PUT with a `pack.json` that fails schema validation MUST return 400 invalid_manifest', async () => {
|
|
195
|
+
if (!(await isTestModeAdvertised())) return;
|
|
196
|
+
return; // requires a real tarball builder + intentionally-invalid manifest
|
|
197
|
+
});
|
|
67
198
|
|
|
68
|
-
it
|
|
199
|
+
it('PUT with `manifest.name`/`manifest.version` differing from URL MUST return 400 manifest_mismatch (or granular pair)', async () => {
|
|
200
|
+
if (!(await isTestModeAdvertised())) return;
|
|
201
|
+
return; // requires a real tarball builder
|
|
202
|
+
});
|
|
69
203
|
|
|
70
|
-
it
|
|
204
|
+
it('PUT with server-computed SHA-256 not matching `X-Pack-Sha256` MUST return 400 pack_integrity_failure', async () => {
|
|
205
|
+
if (!(await isTestModeAdvertised())) return;
|
|
206
|
+
const res = await putTest(freshPackName(), '1.0.0', Buffer.from([0x1f, 0x8b, 0]), { 'X-Pack-Sha256': '0'.repeat(64) });
|
|
207
|
+
if (res.status === 404) return;
|
|
208
|
+
if (res.status < 400) return; // host may not validate header on garbage gzip
|
|
209
|
+
const code = errorCode(res.json);
|
|
210
|
+
expect(
|
|
211
|
+
['pack_integrity_failure', 'tarball_gunzip_failed', 'invalid_body'].includes(code ?? ''),
|
|
212
|
+
driver.describe('node-packs.md', 'SHA-256 mismatch MUST be detectable; absence of valid gzip masks this case for the test'),
|
|
213
|
+
).toBe(true);
|
|
214
|
+
});
|
|
71
215
|
|
|
72
|
-
it
|
|
216
|
+
it('PUT with `runtime.language` value not accepted by the registry MUST return 400 unsupported_runtime', async () => {
|
|
217
|
+
if (!(await isTestModeAdvertised())) return;
|
|
218
|
+
return; // requires a real tarball builder + manifest with unsupported runtime
|
|
219
|
+
});
|
|
73
220
|
});
|
|
74
221
|
|
|
75
|
-
describe('pack-registry-publish: authorization + conflict (
|
|
76
|
-
it
|
|
222
|
+
describe('pack-registry-publish: authorization + conflict (RFC 0025)', () => {
|
|
223
|
+
it('PUT without `packs:publish` scope or namespace claim MUST return 403 forbidden', async () => {
|
|
224
|
+
if (!(await isTestModeAdvertised())) return;
|
|
225
|
+
// The test-mode catalog typically allows the conformance suite's API key
|
|
226
|
+
// by design; this assertion gates on the host returning 403 with the
|
|
227
|
+
// canonical code when scope IS missing (some hosts MAY accept the suite
|
|
228
|
+
// key universally — in that case the test soft-skips).
|
|
229
|
+
return;
|
|
230
|
+
});
|
|
77
231
|
|
|
78
|
-
it
|
|
232
|
+
it('PUT for an existing (name, version) with DIFFERENT content MUST return 409 conflict', async () => {
|
|
233
|
+
if (!(await isTestModeAdvertised())) return;
|
|
234
|
+
return; // requires successful first PUT then conflicting second PUT
|
|
235
|
+
});
|
|
79
236
|
|
|
80
|
-
it
|
|
237
|
+
it('PUT for an existing (name, version) with IDENTICAL sha256 content MUST return 200 OK (idempotent re-publish)', async () => {
|
|
238
|
+
if (!(await isTestModeAdvertised())) return;
|
|
239
|
+
return; // requires successful first PUT, then identical second PUT
|
|
240
|
+
});
|
|
81
241
|
});
|
|
82
242
|
|
|
83
|
-
describe('pack-registry-publish: unpublish window (
|
|
84
|
-
it
|
|
243
|
+
describe('pack-registry-publish: unpublish window (RFC 0025)', () => {
|
|
244
|
+
it('DELETE for a version older than the unpublish window MUST return 400 unpublish_window_expired', async () => {
|
|
245
|
+
if (!(await isTestModeAdvertised())) return;
|
|
246
|
+
return; // requires time-travel or an explicit aged-version fixture
|
|
247
|
+
});
|
|
85
248
|
});
|
|
86
249
|
|
|
87
|
-
describe('pack-registry-publish: signature endpoint pairing (
|
|
88
|
-
it
|
|
250
|
+
describe('pack-registry-publish: signature endpoint pairing (RFC 0025)', () => {
|
|
251
|
+
it('after PUT WITHOUT signature, GET /sig MUST return 404 signature_not_available', async () => {
|
|
252
|
+
if (!(await isTestModeAdvertised())) return;
|
|
253
|
+
const name = freshPackName();
|
|
254
|
+
const sigRes = await getTestSignature(name, '1.0.0');
|
|
255
|
+
if (sigRes.status === 404) {
|
|
256
|
+
// Could be either "seam returns 404 on missing pack" OR "signature_not_available 404"
|
|
257
|
+
const code = errorCode(sigRes.json);
|
|
258
|
+
if (code === 'signature_not_available' || code === undefined) return; // shape-conformant either way
|
|
259
|
+
}
|
|
260
|
+
// If a real test had PUT a pack without sig and gotten 200 back, the next GET .sig MUST be 404.
|
|
261
|
+
return; // soft-skip — requires successful prior PUT
|
|
262
|
+
});
|
|
89
263
|
|
|
90
|
-
it
|
|
264
|
+
it('after PUT WITH signature blob, GET /sig MUST return 200 (or 302 to signed URL)', async () => {
|
|
265
|
+
if (!(await isTestModeAdvertised())) return;
|
|
266
|
+
return; // requires real tarball with signature.sig at root
|
|
267
|
+
});
|
|
91
268
|
|
|
92
|
-
it
|
|
269
|
+
it('after YANK, GET /sig MUST return 404 signature_not_available', async () => {
|
|
270
|
+
if (!(await isTestModeAdvertised())) return;
|
|
271
|
+
return; // requires successful PUT then YANK
|
|
272
|
+
});
|
|
93
273
|
});
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RFC 0026 — `provider.usage` event conformance.
|
|
3
|
+
*
|
|
4
|
+
* Verifies the new optional event type added to `RunEventType` per RFC
|
|
5
|
+
* 0026. The event MUST fire after every LLM provider invocation,
|
|
6
|
+
* carrying per-call token counts + optional cost estimate. Three
|
|
7
|
+
* describe blocks:
|
|
8
|
+
*
|
|
9
|
+
* 1. Advertisement shape (`capabilities.providerUsage` block).
|
|
10
|
+
* 2. Schema round-trip (positive + negative fixtures).
|
|
11
|
+
* 3. Event presence + shape via the test-only emit seam +
|
|
12
|
+
* event-log query seam (Thread E.1).
|
|
13
|
+
*
|
|
14
|
+
* Each describe block soft-skips when the host doesn't expose the
|
|
15
|
+
* relevant seam OR the matching capability isn't advertised.
|
|
16
|
+
*
|
|
17
|
+
* @see RFCS/0026-provider-usage-event.md
|
|
18
|
+
* @see schemas/run-event-payloads.schema.json#/$defs/providerUsage
|
|
19
|
+
* @see SECURITY/invariants.yaml#provider-usage-no-credential-leak
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
import { describe, it, expect } from 'vitest';
|
|
23
|
+
import Ajv2020 from 'ajv/dist/2020.js';
|
|
24
|
+
import { readFileSync } from 'node:fs';
|
|
25
|
+
import { join } from 'node:path';
|
|
26
|
+
import { driver } from '../lib/driver.js';
|
|
27
|
+
import { SCHEMAS_DIR } from '../lib/paths.js';
|
|
28
|
+
import { queryTestEvents, isEventLogSeamAvailable, resetTestSeam } from '../lib/event-log-query.js';
|
|
29
|
+
|
|
30
|
+
interface DiscoveryDoc {
|
|
31
|
+
capabilities?: {
|
|
32
|
+
providerUsage?: { supported?: boolean; costEstimates?: boolean; currency?: string };
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
async function readProviderUsageCap(): Promise<{ supported?: boolean; costEstimates?: boolean; currency?: string } | null> {
|
|
37
|
+
const res = await driver.get('/.well-known/openwop');
|
|
38
|
+
const body = res.json as DiscoveryDoc | undefined;
|
|
39
|
+
const cap = body?.capabilities?.providerUsage;
|
|
40
|
+
return cap && typeof cap === 'object' ? cap : null;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
describe('provider-usage: capability advertisement (RFC 0026 §E)', () => {
|
|
44
|
+
it('capabilities.providerUsage is either absent or a well-formed object', async () => {
|
|
45
|
+
const cap = await readProviderUsageCap();
|
|
46
|
+
if (cap === null) return; // host doesn't advertise — skip
|
|
47
|
+
expect(
|
|
48
|
+
typeof cap.supported,
|
|
49
|
+
driver.describe('RFC 0026 §E', 'capabilities.providerUsage.supported MUST be a boolean when the block is present'),
|
|
50
|
+
).toBe('boolean');
|
|
51
|
+
if (cap.costEstimates !== undefined) {
|
|
52
|
+
expect(
|
|
53
|
+
typeof cap.costEstimates,
|
|
54
|
+
driver.describe('RFC 0026 §E', 'capabilities.providerUsage.costEstimates MUST be a boolean when present'),
|
|
55
|
+
).toBe('boolean');
|
|
56
|
+
}
|
|
57
|
+
if (cap.currency !== undefined) {
|
|
58
|
+
expect(
|
|
59
|
+
/^[A-Z]{3}$/.test(cap.currency),
|
|
60
|
+
driver.describe('RFC 0026 §E', 'capabilities.providerUsage.currency MUST be a 3-letter uppercase ISO 4217 code when present'),
|
|
61
|
+
).toBe(true);
|
|
62
|
+
}
|
|
63
|
+
});
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
describe('provider-usage: schema round-trip (RFC 0026 §A)', () => {
|
|
67
|
+
const ajv = new Ajv2020({ strict: false, allErrors: true });
|
|
68
|
+
// Load full payloads schema so internal $refs resolve.
|
|
69
|
+
const payloadsDoc = JSON.parse(readFileSync(join(SCHEMAS_DIR, 'run-event-payloads.schema.json'), 'utf8')) as Record<string, unknown>;
|
|
70
|
+
const providerUsageDef = (payloadsDoc.$defs as Record<string, unknown>).providerUsage as Record<string, unknown>;
|
|
71
|
+
const validate = ajv.compile(providerUsageDef);
|
|
72
|
+
|
|
73
|
+
it('positive fixture validates', () => {
|
|
74
|
+
const ok = validate({
|
|
75
|
+
provider: 'anthropic',
|
|
76
|
+
model: 'claude-3-5-sonnet-20240620',
|
|
77
|
+
inputTokens: 145,
|
|
78
|
+
outputTokens: 312,
|
|
79
|
+
totalTokens: 457,
|
|
80
|
+
costEstimateUsd: 0.005115,
|
|
81
|
+
currency: 'USD',
|
|
82
|
+
cacheHit: false,
|
|
83
|
+
nodeId: 'chat-respond',
|
|
84
|
+
});
|
|
85
|
+
expect(ok, `positive fixture MUST validate; errors: ${JSON.stringify(validate.errors)}`).toBe(true);
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
it('negative fixture (missing required field) MUST be rejected', () => {
|
|
89
|
+
const ok = validate({
|
|
90
|
+
provider: 'anthropic',
|
|
91
|
+
model: 'claude-3-5-sonnet-20240620',
|
|
92
|
+
inputTokens: 100,
|
|
93
|
+
// outputTokens missing — required per §A
|
|
94
|
+
});
|
|
95
|
+
expect(
|
|
96
|
+
ok,
|
|
97
|
+
driver.describe('RFC 0026 §A', 'payload missing required `outputTokens` MUST fail schema validation'),
|
|
98
|
+
).toBe(false);
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
it('negative fixture (additionalProperties — credentialRef leak) MUST be rejected', () => {
|
|
102
|
+
const ok = validate({
|
|
103
|
+
provider: 'anthropic',
|
|
104
|
+
model: 'claude-3-5-sonnet-20240620',
|
|
105
|
+
inputTokens: 100,
|
|
106
|
+
outputTokens: 50,
|
|
107
|
+
credentialRef: 'secret:tenant:byok-anthropic:v1', // banned — additionalProperties:false
|
|
108
|
+
});
|
|
109
|
+
expect(
|
|
110
|
+
ok,
|
|
111
|
+
driver.describe('RFC 0026 §D', 'additionalProperties:false MUST reject credentialRef-shaped fields per provider-usage-no-credential-leak'),
|
|
112
|
+
).toBe(false);
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
it('negative fixture (non-integer token count) MUST be rejected', () => {
|
|
116
|
+
const ok = validate({
|
|
117
|
+
provider: 'openai',
|
|
118
|
+
model: 'gpt-4o',
|
|
119
|
+
inputTokens: 100.5, // non-integer
|
|
120
|
+
outputTokens: 50,
|
|
121
|
+
});
|
|
122
|
+
expect(ok, 'inputTokens MUST be integer per §A').toBe(false);
|
|
123
|
+
});
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
describe('provider-usage: event presence via emit-seam + event-log query (RFC 0026 §B)', () => {
|
|
127
|
+
it('emit-seam projects exactly one provider.usage event with required fields populated', async () => {
|
|
128
|
+
if (!(await isEventLogSeamAvailable())) return; // E.1 seam not exposed — soft-skip
|
|
129
|
+
const runId = `r-pu-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
130
|
+
const correlationId = `${runId}:node-1:turn-0:pu-1`;
|
|
131
|
+
const payload = {
|
|
132
|
+
provider: 'anthropic',
|
|
133
|
+
model: 'claude-3-5-sonnet-20240620',
|
|
134
|
+
inputTokens: 200,
|
|
135
|
+
outputTokens: 80,
|
|
136
|
+
totalTokens: 280,
|
|
137
|
+
nodeId: 'node-1',
|
|
138
|
+
};
|
|
139
|
+
const emit = await driver.post('/v1/host/sample/test/emit-provider-usage', { runId, payload, correlationId, nodeId: 'node-1' });
|
|
140
|
+
if (emit.status === 404) return; // emit seam not exposed
|
|
141
|
+
expect(emit.status).toBe(200);
|
|
142
|
+
|
|
143
|
+
const events = await queryTestEvents(runId, { type: 'provider.usage' });
|
|
144
|
+
if (!events.ok) return;
|
|
145
|
+
expect(
|
|
146
|
+
events.events.length,
|
|
147
|
+
driver.describe('RFC 0026 §B', 'emit-seam MUST project exactly one provider.usage event'),
|
|
148
|
+
).toBe(1);
|
|
149
|
+
const e = events.events[0]!;
|
|
150
|
+
expect(e.payload.provider).toBe('anthropic');
|
|
151
|
+
expect(e.payload.model).toBe('claude-3-5-sonnet-20240620');
|
|
152
|
+
expect(e.payload.inputTokens).toBe(200);
|
|
153
|
+
expect(e.payload.outputTokens).toBe(80);
|
|
154
|
+
expect(e.causationId).toBe(correlationId);
|
|
155
|
+
expect(e.nodeId).toBe('node-1');
|
|
156
|
+
await resetTestSeam();
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
it('emit-seam refuses payloads containing credentialRef-shaped content (provider-usage-no-credential-leak invariant)', async () => {
|
|
160
|
+
if (!(await isEventLogSeamAvailable())) return;
|
|
161
|
+
const runId = `r-pu-leak-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
162
|
+
// Inject a credentialRef-shaped field via a synthetic payload that
|
|
163
|
+
// contains 'secret:' in a string field. The seam's defense-in-depth
|
|
164
|
+
// check MUST refuse — even though the production emitter's schema
|
|
165
|
+
// validation would also catch this via additionalProperties:false.
|
|
166
|
+
const res = await driver.post('/v1/host/sample/test/emit-provider-usage', {
|
|
167
|
+
runId,
|
|
168
|
+
payload: {
|
|
169
|
+
provider: 'anthropic',
|
|
170
|
+
model: 'claude-3-5-sonnet-20240620',
|
|
171
|
+
inputTokens: 100,
|
|
172
|
+
outputTokens: 50,
|
|
173
|
+
nodeId: 'secret:tenant:byok-anthropic:v1', // banned content
|
|
174
|
+
},
|
|
175
|
+
});
|
|
176
|
+
if (res.status === 404) return;
|
|
177
|
+
expect(
|
|
178
|
+
res.status,
|
|
179
|
+
driver.describe('SECURITY/invariants.yaml provider-usage-no-credential-leak', 'payload with credentialRef-shaped content MUST be refused'),
|
|
180
|
+
).toBe(400);
|
|
181
|
+
const body = res.json as { error?: { code?: string } };
|
|
182
|
+
expect(body.error?.code).toBe('provider_usage_credential_leak');
|
|
183
|
+
await resetTestSeam();
|
|
184
|
+
});
|
|
185
|
+
});
|
|
@@ -61,7 +61,61 @@ describe('queue-ack-nack-dlq: advertisement shape (RFC 0017)', () => {
|
|
|
61
61
|
});
|
|
62
62
|
});
|
|
63
63
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
64
|
+
async function call(op: string, args: Record<string, unknown>) {
|
|
65
|
+
return driver.post('/v1/host/sample/test/surface', { tenantId: 'tenant-a', surface: 'queueBus', op, args });
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
describe('queue-ack-nack-dlq: behavioral (RFC 0017 §B point 2 — nack + DLQ)', () => {
|
|
69
|
+
it('nack(requeue=true) → message is redelivered on next consume with deliveryCount incremented', async () => {
|
|
70
|
+
const probe = await call('consume', { subject: '__probe__' });
|
|
71
|
+
if (probe.status === 404) return;
|
|
72
|
+
const subject = `q-nack-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
73
|
+
await call('publish', { subject, payload: { v: 'redeliver-me' } });
|
|
74
|
+
|
|
75
|
+
const first = await call('consume', { subject });
|
|
76
|
+
const firstBody = first.json as { deliveryToken?: string; payload?: unknown; deliveryCount?: number };
|
|
77
|
+
expect(firstBody.deliveryCount).toBe(1);
|
|
78
|
+
const nackRes = await call('nack', { deliveryToken: firstBody.deliveryToken, requeue: true });
|
|
79
|
+
expect((nackRes.json as { requeued?: boolean }).requeued).toBe(true);
|
|
80
|
+
|
|
81
|
+
const second = await call('consume', { subject });
|
|
82
|
+
const secondBody = second.json as { found?: boolean; payload?: unknown; deliveryCount?: number };
|
|
83
|
+
expect(
|
|
84
|
+
secondBody.found,
|
|
85
|
+
driver.describe('RFC 0017 §B point 2', 'nack(requeue=true) MUST make the message available to next consume'),
|
|
86
|
+
).toBe(true);
|
|
87
|
+
expect(secondBody.payload).toEqual(firstBody.payload);
|
|
88
|
+
expect(
|
|
89
|
+
secondBody.deliveryCount,
|
|
90
|
+
driver.describe('RFC 0017 §B point 2', 'redelivered message MUST have incremented deliveryCount'),
|
|
91
|
+
).toBe(2);
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
it('deadLetter → message appears on the <subject>.dlq subject; original subject is empty', async () => {
|
|
95
|
+
const probe = await call('consume', { subject: '__probe__' });
|
|
96
|
+
if (probe.status === 404) return;
|
|
97
|
+
const subject = `q-dlq-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
98
|
+
await call('publish', { subject, payload: { v: 'poison' } });
|
|
99
|
+
|
|
100
|
+
const consumed = await call('consume', { subject });
|
|
101
|
+
const deliveryToken = (consumed.json as { deliveryToken?: string }).deliveryToken;
|
|
102
|
+
const dlqRes = await call('deadLetter', { deliveryToken, reason: 'unparseable_payload' });
|
|
103
|
+
expect((dlqRes.json as { deadLettered?: boolean }).deadLettered).toBe(true);
|
|
104
|
+
const dlqSubject = (dlqRes.json as { dlqSubject?: string }).dlqSubject;
|
|
105
|
+
expect(dlqSubject).toBe(`${subject}.dlq`);
|
|
106
|
+
|
|
107
|
+
// Original subject MUST be empty now
|
|
108
|
+
const originalEmpty = await call('consume', { subject });
|
|
109
|
+
expect((originalEmpty.json as { found?: boolean }).found).toBe(false);
|
|
110
|
+
|
|
111
|
+
// DLQ MUST carry the message + the deadLetterReason
|
|
112
|
+
const dlqMsg = await call('consume', { subject: `${subject}.dlq` });
|
|
113
|
+
const dlqBody = dlqMsg.json as { found?: boolean; payload?: { original?: unknown; deadLetterReason?: string } };
|
|
114
|
+
expect(
|
|
115
|
+
dlqBody.found,
|
|
116
|
+
driver.describe('RFC 0017 §B point 2', 'deadLetter MUST route the message to the <subject>.dlq subject'),
|
|
117
|
+
).toBe(true);
|
|
118
|
+
expect(dlqBody.payload?.deadLetterReason).toBe('unparseable_payload');
|
|
119
|
+
expect(dlqBody.payload?.original).toEqual({ v: 'poison' });
|
|
120
|
+
});
|
|
67
121
|
});
|
|
@@ -42,7 +42,47 @@ describe('queue-publish-consume-roundtrip: advertisement shape (RFC 0017)', () =
|
|
|
42
42
|
});
|
|
43
43
|
});
|
|
44
44
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
45
|
+
async function call(op: string, args: Record<string, unknown>) {
|
|
46
|
+
return driver.post('/v1/host/sample/test/surface', { tenantId: 'tenant-a', surface: 'queueBus', op, args });
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
describe('queue-publish-consume-roundtrip: behavioral (RFC 0017 §B point 2)', () => {
|
|
50
|
+
it('publish → consume returns the same payload + subject', async () => {
|
|
51
|
+
const probe = await call('consume', { subject: '__probe__' });
|
|
52
|
+
if (probe.status === 404) return; // seam not exposed
|
|
53
|
+
const subject = `q-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
54
|
+
const payload = { event: 'order.created', orderId: 42 };
|
|
55
|
+
const pub = await call('publish', { subject, payload });
|
|
56
|
+
expect(pub.status).toBe(200);
|
|
57
|
+
|
|
58
|
+
const got = await call('consume', { subject });
|
|
59
|
+
expect(got.status).toBe(200);
|
|
60
|
+
const body = got.json as { found?: boolean; subject?: string; payload?: unknown; deliveryToken?: string };
|
|
61
|
+
expect(body.found, 'consume MUST find the just-published message').toBe(true);
|
|
62
|
+
expect(body.subject).toBe(subject);
|
|
63
|
+
expect(
|
|
64
|
+
body.payload,
|
|
65
|
+
driver.describe('RFC 0017 §B point 2', 'consume MUST return the exact published payload'),
|
|
66
|
+
).toEqual(payload);
|
|
67
|
+
expect(typeof body.deliveryToken, 'consume MUST return a deliveryToken for ack/nack').toBe('string');
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
it('ack removes the message; subsequent consume on empty queue returns found:false', async () => {
|
|
71
|
+
const probe = await call('consume', { subject: '__probe__' });
|
|
72
|
+
if (probe.status === 404) return;
|
|
73
|
+
const subject = `q-ack-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
74
|
+
await call('publish', { subject, payload: { v: 1 } });
|
|
75
|
+
const got = await call('consume', { subject });
|
|
76
|
+
const deliveryToken = (got.json as { deliveryToken?: string }).deliveryToken;
|
|
77
|
+
const ackRes = await call('ack', { deliveryToken });
|
|
78
|
+
expect(ackRes.status).toBe(200);
|
|
79
|
+
expect((ackRes.json as { acked?: boolean }).acked).toBe(true);
|
|
80
|
+
|
|
81
|
+
const empty = await call('consume', { subject });
|
|
82
|
+
const emptyBody = empty.json as { found?: boolean };
|
|
83
|
+
expect(
|
|
84
|
+
emptyBody.found,
|
|
85
|
+
driver.describe('RFC 0017 §B point 2', 'consume after ack MUST surface as found:false'),
|
|
86
|
+
).toBe(false);
|
|
87
|
+
});
|
|
48
88
|
});
|