@openwop/openwop-conformance 1.36.0 → 1.43.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/CHANGELOG.md +32 -0
  2. package/README.md +2 -2
  3. package/api/openapi.yaml +62 -5
  4. package/coverage.md +1 -0
  5. package/fixtures/conformance-agent-memory-injection-budget.json +44 -0
  6. package/fixtures/conformance-context-budget-multiturn.json +50 -0
  7. package/fixtures.md +2 -0
  8. package/package.json +1 -1
  9. package/schemas/README.md +3 -0
  10. package/schemas/a2ui-surface-delta-frame.schema.json +48 -0
  11. package/schemas/capabilities.schema.json +128 -1
  12. package/schemas/channel-presence-payload.schema.json +41 -0
  13. package/schemas/compact-tool-descriptor.schema.json +51 -0
  14. package/schemas/conversation-turn.schema.json +10 -0
  15. package/schemas/memory-list-options.schema.json +16 -0
  16. package/schemas/run-event-payloads.schema.json +25 -2
  17. package/schemas/run-event.schema.json +2 -0
  18. package/src/lib/toolCatalog.ts +89 -0
  19. package/src/scenarios/a2ui-surface-delta-transport.test.ts +600 -0
  20. package/src/scenarios/aiproviders-selfhosted-honesty.test.ts +133 -0
  21. package/src/scenarios/channel-presence-behavioral.test.ts +83 -0
  22. package/src/scenarios/channel-presence-shape.test.ts +93 -0
  23. package/src/scenarios/context-budget-transcript-bound.test.ts +253 -0
  24. package/src/scenarios/context-summarization-replay.test.ts +155 -0
  25. package/src/scenarios/conversation-turn-model-provenance-shape.test.ts +120 -0
  26. package/src/scenarios/memory-injection-budget.test.ts +188 -0
  27. package/src/scenarios/prompt-prefix-cache.test.ts +200 -0
  28. package/src/scenarios/run-transport-economy.test.ts +236 -0
  29. package/src/scenarios/tool-catalog-compact-projection.test.ts +149 -0
@@ -0,0 +1,236 @@
1
+ /**
2
+ * run-transport-economy — RFC 0115 behavioral.
3
+ *
4
+ * Status: ACTIVE (capability-gated behavioral). Gated on
5
+ * `capabilities.restTransport.conditionalRunGet === true` (conditional-GET
6
+ * leg) and on a non-empty `capabilities.restTransport.contentEncodings`
7
+ * (compression leg). Both legs soft-skip on hosts that do not advertise the
8
+ * surface (incl. the reference workflow-engine, which has not yet wired the
9
+ * sequence-derived `ETag` path), so they light up the moment a host advertises
10
+ * `restTransport`.
11
+ *
12
+ * Asserts (per `spec/v1/rest-endpoints.md` §"`GET /v1/runs/{runId}`
13
+ * conditional read + Content-Encoding (RFC 0115)"):
14
+ *
15
+ * 1. `GET /v1/runs/{runId}` carries a strong `ETag` on the `200`.
16
+ * 2. A re-`GET` with `If-None-Match: <current ETag>` returns `304 Not
17
+ * Modified` with an empty body while the run has NOT advanced (the
18
+ * validator is stable while no observable transition occurs).
19
+ * 3. After the run advances (the `conformance-approval` fixture is resumed
20
+ * from `waiting-approval` to `completed`), the `ETag` CHANGES — proving
21
+ * it is derived from the run's latest persisted event-log sequence
22
+ * number, not a coarser signal that could leave a `304` stale.
23
+ * 4. For each advertised `contentEncodings` value, requesting it via
24
+ * `Accept-Encoding` yields a `Content-Encoding`-tagged response whose
25
+ * decoded bytes are byte-identical to the identity body.
26
+ *
27
+ * Non-vacuity: the `conformance-approval` fixture gives two deterministic,
28
+ * stable observable states (suspended → completed), so the ETag-stability and
29
+ * ETag-change assertions are exact rather than racing a fast run.
30
+ *
31
+ * @see RFCS/0115-run-transport-economy.md
32
+ * @see spec/v1/rest-endpoints.md §"`GET /v1/runs/{runId}` conditional read + Content-Encoding (RFC 0115)"
33
+ * @see spec/v1/replay.md §"durable event log" (the monotonic sequence the ETag derives from)
34
+ */
35
+
36
+ import { describe, it, expect } from 'vitest';
37
+ import { gunzipSync, brotliDecompressSync } from 'node:zlib';
38
+ import * as zlib from 'node:zlib';
39
+ import { driver } from '../lib/driver.js';
40
+ import { loadEnv } from '../lib/env.js';
41
+ import { capabilityFamily } from '../lib/discovery-capabilities.js';
42
+ import { pollUntilStatus, pollUntilTerminal } from '../lib/polling.js';
43
+
44
+ const HTTP_SKIP = !process.env.OPENWOP_BASE_URL;
45
+ const APPROVAL_FIXTURE = 'conformance-approval';
46
+ const APPROVAL_NODE_ID = 'gate';
47
+ const NOOP_FIXTURE = 'conformance-noop';
48
+
49
+ type Encoding = 'gzip' | 'br' | 'zstd';
50
+
51
+ interface RestTransportCaps {
52
+ conditionalRunGet?: unknown;
53
+ contentEncodings?: unknown;
54
+ }
55
+
56
+ async function readRestTransport(): Promise<RestTransportCaps | undefined> {
57
+ try {
58
+ const res = await driver.get('/.well-known/openwop');
59
+ if (res.status !== 200) return undefined;
60
+ return capabilityFamily<RestTransportCaps>(res.json, 'restTransport');
61
+ } catch {
62
+ return undefined;
63
+ }
64
+ }
65
+
66
+ /** Advertised content-encodings, narrowed to the spec enum. */
67
+ function advertisedEncodings(caps: RestTransportCaps | undefined): Encoding[] {
68
+ const raw = caps?.contentEncodings;
69
+ if (!Array.isArray(raw)) return [];
70
+ return raw.filter((e): e is Encoding => e === 'gzip' || e === 'br' || e === 'zstd');
71
+ }
72
+
73
+ /** Read the run snapshot's ETag header (case-insensitive via Headers). */
74
+ function etagOf(res: { headers: Headers }): string | null {
75
+ return res.headers.get('etag');
76
+ }
77
+
78
+ describe.skipIf(HTTP_SKIP)('run-transport-economy: conditional GET on run reads (RFC 0115)', () => {
79
+ it('emits a sequence-derived strong ETag, honors If-None-Match with 304, and rotates the ETag when the run advances', async (ctx) => {
80
+ const caps = await readRestTransport();
81
+ if (caps?.conditionalRunGet !== true) {
82
+ ctx.skip(); // host does not advertise restTransport.conditionalRunGet
83
+ return;
84
+ }
85
+
86
+ // Use the approval fixture: it parks at a stable `waiting-approval` state,
87
+ // then advances to `completed` on resolve — two deterministic snapshots.
88
+ const create = await driver.post('/v1/runs', { workflowId: APPROVAL_FIXTURE });
89
+ if (create.status === 404 || create.status === 422) {
90
+ ctx.skip(); // fixture not advertised by this host
91
+ return;
92
+ }
93
+ expect(create.status).toBe(201);
94
+ const runId = (create.json as { runId: string }).runId;
95
+ await pollUntilStatus(runId, 'waiting-approval', { timeoutMs: 10_000 });
96
+
97
+ // (1) Strong ETag present on the 200.
98
+ const suspendedRead = await driver.get(`/v1/runs/${encodeURIComponent(runId)}`);
99
+ expect(suspendedRead.status).toBe(200);
100
+ const etagSuspended = etagOf(suspendedRead);
101
+ expect(
102
+ etagSuspended,
103
+ driver.describe(
104
+ 'rest-endpoints.md §GET /v1/runs/{runId} conditional read (RFC 0115)',
105
+ 'a host advertising restTransport.conditionalRunGet MUST return a strong ETag on the 200',
106
+ ),
107
+ ).toBeTruthy();
108
+
109
+ // (2) If-None-Match with the current ETag → 304, empty body, while unchanged.
110
+ const revalidate = await driver.get(`/v1/runs/${encodeURIComponent(runId)}`, {
111
+ headers: { 'If-None-Match': etagSuspended as string },
112
+ });
113
+ expect(
114
+ revalidate.status,
115
+ driver.describe(
116
+ 'rest-endpoints.md §GET /v1/runs/{runId} conditional read (RFC 0115)',
117
+ 'If-None-Match matching the current ETag MUST return 304 Not Modified',
118
+ ),
119
+ ).toBe(304);
120
+ expect(
121
+ revalidate.text,
122
+ driver.describe(
123
+ 'rest-endpoints.md §GET /v1/runs/{runId} conditional read (RFC 0115)',
124
+ '304 Not Modified MUST carry no body',
125
+ ),
126
+ ).toBe('');
127
+
128
+ // Advance the run: resolve the approval interrupt → terminal `completed`.
129
+ const resolve = await driver.post(
130
+ `/v1/runs/${encodeURIComponent(runId)}/interrupts/${encodeURIComponent(APPROVAL_NODE_ID)}`,
131
+ { resumeValue: { action: 'accept' } },
132
+ );
133
+ expect(resolve.status).toBe(200);
134
+ const terminal = await pollUntilTerminal(runId, { timeoutMs: 10_000 });
135
+ expect(terminal.status).toBe('completed');
136
+
137
+ // (3) ETag rotates after the observable state advanced.
138
+ const completedRead = await driver.get(`/v1/runs/${encodeURIComponent(runId)}`);
139
+ expect(completedRead.status).toBe(200);
140
+ const etagCompleted = etagOf(completedRead);
141
+ expect(etagCompleted).toBeTruthy();
142
+ expect(
143
+ etagCompleted,
144
+ driver.describe(
145
+ 'rest-endpoints.md §GET /v1/runs/{runId} conditional read (RFC 0115)',
146
+ 'the ETag MUST change once the run advances (it is derived from the latest event-log sequence); a stable ETag across an observable transition would leave a 304 stale',
147
+ ),
148
+ ).not.toBe(etagSuspended);
149
+
150
+ // The new ETag is itself stable while the (now terminal) run does not change.
151
+ const revalidateTerminal = await driver.get(`/v1/runs/${encodeURIComponent(runId)}`, {
152
+ headers: { 'If-None-Match': etagCompleted as string },
153
+ });
154
+ expect(
155
+ revalidateTerminal.status,
156
+ driver.describe(
157
+ 'rest-endpoints.md §GET /v1/runs/{runId} conditional read (RFC 0115)',
158
+ 'the terminal ETag MUST be stable — If-None-Match against it returns 304',
159
+ ),
160
+ ).toBe(304);
161
+ });
162
+ });
163
+
164
+ describe.skipIf(HTTP_SKIP)('run-transport-economy: Content-Encoding round-trips byte-identically (RFC 0115)', () => {
165
+ it('each advertised contentEncodings value decodes to the identity body byte-for-byte', async (ctx) => {
166
+ const caps = await readRestTransport();
167
+ const encodings = advertisedEncodings(caps);
168
+ if (encodings.length === 0) {
169
+ ctx.skip(); // host advertises no run-read content encodings
170
+ return;
171
+ }
172
+
173
+ // A terminal run gives a stable body to compare encodings against.
174
+ const create = await driver.post('/v1/runs', { workflowId: NOOP_FIXTURE });
175
+ if (create.status === 404 || create.status === 422) {
176
+ ctx.skip();
177
+ return;
178
+ }
179
+ expect(create.status).toBe(201);
180
+ const runId = (create.json as { runId: string }).runId;
181
+ await pollUntilTerminal(runId, { timeoutMs: 10_000 });
182
+
183
+ const env = loadEnv();
184
+ const url = `${env.baseUrl}/v1/runs/${encodeURIComponent(runId)}`;
185
+ const auth = { Authorization: `Bearer ${env.apiKey}`, Accept: 'application/json' };
186
+
187
+ // Identity baseline — explicit Accept-Encoding: identity so the host does
188
+ // not compress; raw bytes are the comparison oracle.
189
+ const identityRes = await fetch(url, { headers: { ...auth, 'Accept-Encoding': 'identity' } });
190
+ expect(identityRes.status).toBe(200);
191
+ const identityBytes = Buffer.from(await identityRes.arrayBuffer());
192
+ expect(identityBytes.length).toBeGreaterThan(0);
193
+
194
+ // Feature-detect zstd decode (Node >= 22.15 / 23.8); when absent we still
195
+ // assert the host negotiated Content-Encoding but defer the byte-compare.
196
+ // Cast-free: the optional-property view is assignable from the zlib module
197
+ // namespace under structural typing whether or not @types/node declares it.
198
+ const zlibMaybeZstd: { zstdDecompressSync?: (b: Buffer) => Buffer } = zlib;
199
+ const zstdDecode: ((b: Buffer) => Buffer) | undefined =
200
+ typeof zlibMaybeZstd.zstdDecompressSync === 'function'
201
+ ? zlibMaybeZstd.zstdDecompressSync
202
+ : undefined;
203
+
204
+ for (const enc of encodings) {
205
+ // Manually set Accept-Encoding so undici returns the raw compressed
206
+ // bytes (it only auto-decompresses encodings it negotiated itself).
207
+ const res = await fetch(url, { headers: { ...auth, 'Accept-Encoding': enc } });
208
+ expect(res.status).toBe(200);
209
+ const contentEncoding = res.headers.get('content-encoding');
210
+ expect(
211
+ contentEncoding,
212
+ driver.describe(
213
+ 'rest-endpoints.md §GET /v1/runs/{runId} conditional read + Content-Encoding (RFC 0115)',
214
+ `a host advertising restTransport.contentEncodings:["...","${enc}"] MUST set Content-Encoding: ${enc} when that encoding is requested`,
215
+ ),
216
+ ).toBe(enc);
217
+
218
+ const compressedBytes = Buffer.from(await res.arrayBuffer());
219
+ const decode = enc === 'gzip' ? gunzipSync : enc === 'br' ? brotliDecompressSync : zstdDecode;
220
+ if (!decode) {
221
+ // zstd decode unavailable in this runtime: negotiation already
222
+ // asserted above; skip only the byte-compare for this encoding.
223
+ expect(compressedBytes.length).toBeGreaterThan(0);
224
+ continue;
225
+ }
226
+ const decoded = Buffer.from(decode(compressedBytes));
227
+ expect(
228
+ decoded.equals(identityBytes),
229
+ driver.describe(
230
+ 'rest-endpoints.md §GET /v1/runs/{runId} conditional read + Content-Encoding (RFC 0115)',
231
+ `the ${enc}-decoded body MUST be byte-identical to the identity body (Content-Encoding MUST NOT alter decoded bytes or semantics)`,
232
+ ),
233
+ ).toBe(true);
234
+ }
235
+ });
236
+ });
@@ -0,0 +1,149 @@
1
+ /**
2
+ * Compact tool projection — `GET /v1/tools?view=compact` (RFC 0112) — behavioral.
3
+ *
4
+ * Capability-gated on `capabilities.toolCatalog.compactView === true` (root-first
5
+ * per RFC 0073). Soft-skips when unadvertised (default) / hard-fails under
6
+ * `OPENWOP_REQUIRE_BEHAVIOR=true`. The standard projection coverage lives in
7
+ * `tool-catalog-projection.test.ts`; this asserts the OPT-IN compact view per
8
+ * `spec/v1/tool-catalog.md` §compact (and the new
9
+ * `compact-tool-descriptor.schema.json`):
10
+ *
11
+ * 1. ENVELOPE (§compact) — `GET /v1/tools?view=compact` returns the
12
+ * `{ tools: CompactToolDescriptor[] }` envelope (NOT a bare array).
13
+ * 2. SCHEMA — every compact descriptor validates against
14
+ * `compact-tool-descriptor.schema.json` (closed field set; the heavy
15
+ * `ToolDescriptor` fields — `outputSchema`/`auth`/`egress`/`approval`/
16
+ * `replayPolicy`/`costHint`/`latencyHint` — are ABSENT).
17
+ * 3. STRUCTURAL SUBSET — every present `inputSchema` satisfies the compact
18
+ * structural subset: top-level `type: "object"` with `properties`, and
19
+ * none of `$ref`/`oneOf`/`allOf`/`anyOf`/`not`/`patternProperties`/
20
+ * `dependentSchemas`. Validated against the schema (no dereference of the
21
+ * informative RFC 0030 Tier-1 table).
22
+ * 4. PROJECTION COMPLETENESS — the compact `tools[]` `toolId` set EQUALS the
23
+ * standard `tools[]` `toolId` set for the same principal (a compact catalog
24
+ * that drops a tool the standard view shows is non-conformant;
25
+ * authorization-scoping preserved).
26
+ * 5. BY-ID — `GET /v1/tools/{toolId}?view=compact` returns one schema-valid
27
+ * CompactToolDescriptor.
28
+ *
29
+ * Spec references:
30
+ * - https://github.com/openwop/openwop/blob/main/spec/v1/tool-catalog.md (§compact)
31
+ * - https://github.com/openwop/openwop/blob/main/RFCS/0112-compact-tool-projection.md
32
+ */
33
+
34
+ import { describe, it, expect } from 'vitest';
35
+ import { readFileSync } from 'node:fs';
36
+ import { join } from 'node:path';
37
+ import Ajv2020 from 'ajv/dist/2020.js';
38
+ import addFormats from 'ajv-formats';
39
+ import { driver } from '../lib/driver.js';
40
+ import { behaviorGate } from '../lib/behavior-gate.js';
41
+ import { SCHEMAS_DIR } from '../lib/paths.js';
42
+ import {
43
+ readToolCatalogCap,
44
+ listToolsCompact,
45
+ COMPACT_DROPPED_FIELDS,
46
+ findBannedInputSchemaKeyword,
47
+ type CompactToolDescriptor,
48
+ } from '../lib/toolCatalog.js';
49
+
50
+ function loadSchema(name: string): Record<string, unknown> {
51
+ return JSON.parse(readFileSync(join(SCHEMAS_DIR, name), 'utf8')) as Record<string, unknown>;
52
+ }
53
+
54
+ /** Extract the `toolId` set from a `GET /v1/tools` body, tolerating both the
55
+ * bare-array and `{ tools: [] }` envelope standard shapes (cast-free). */
56
+ function toolIdSet(body: unknown): Set<string> {
57
+ const ids = new Set<string>();
58
+ const arr: unknown[] = Array.isArray(body)
59
+ ? body
60
+ : body && typeof body === 'object' && Array.isArray((body as { tools?: unknown }).tools)
61
+ ? ((body as { tools: unknown[] }).tools)
62
+ : [];
63
+ for (const t of arr) {
64
+ if (t && typeof t === 'object') {
65
+ const id = (t as { toolId?: unknown }).toolId;
66
+ if (typeof id === 'string') ids.add(id);
67
+ }
68
+ }
69
+ return ids;
70
+ }
71
+
72
+ describe('tool-catalog-compact-projection (RFC 0112 §compact)', () => {
73
+ it('serves the { tools: CompactToolDescriptor[] } projection — closed shape, bounded inputSchema, same toolId set as standard', async () => {
74
+ const cap = await readToolCatalogCap();
75
+ if (!behaviorGate('openwop-tool-catalog-compact', cap?.compactView === true)) return;
76
+
77
+ const ajv = new Ajv2020({ strict: false, allErrors: true });
78
+ addFormats(ajv);
79
+ const validate = ajv.compile(loadSchema('compact-tool-descriptor.schema.json'));
80
+
81
+ // ---- Leg 1: the compact envelope (§compact) -------------------------
82
+ const compact = await listToolsCompact();
83
+ if (compact === null) return; // advertises the cap but doesn't serve the read — soft-skip the rest
84
+
85
+ for (const t of compact) {
86
+ // ---- Leg 2: schema validity + heavy fields dropped ----------------
87
+ expect(
88
+ validate(t),
89
+ driver.describe('compact-tool-descriptor.schema.json', `each CompactToolDescriptor MUST validate (${ajv.errorsText(validate.errors)})`),
90
+ ).toBe(true);
91
+ for (const f of COMPACT_DROPPED_FIELDS) {
92
+ expect(
93
+ !(f in t),
94
+ driver.describe('tool-catalog.md §compact', `CompactToolDescriptor MUST drop the heavy field "${f}"`),
95
+ ).toBe(true);
96
+ }
97
+
98
+ // ---- Leg 3: the compact structural subset on inputSchema ----------
99
+ const input = (t as CompactToolDescriptor).inputSchema;
100
+ if (input !== undefined) {
101
+ expect(
102
+ input.type === 'object' && typeof input.properties === 'object' && input.properties !== null,
103
+ driver.describe('tool-catalog.md §compact', 'compact inputSchema MUST be top-level type:"object" with a properties map'),
104
+ ).toBe(true);
105
+ // Total (any-depth), schema-aware: a nested oneOf/$ref under a property
106
+ // schema is exactly the verbosity the compact view exists to drop.
107
+ const banned = findBannedInputSchemaKeyword(input);
108
+ expect(
109
+ banned,
110
+ driver.describe('tool-catalog.md §compact', `compact inputSchema MUST NOT use $ref/oneOf/allOf/anyOf/not/patternProperties/dependentSchemas at any nesting depth (found "${banned ?? 'none'}")`),
111
+ ).toBe(null);
112
+ }
113
+ }
114
+
115
+ // ---- Leg 4: projection completeness vs the standard view -------------
116
+ const standardRes = await driver.get('/v1/tools');
117
+ const standardIds = toolIdSet(standardRes.json);
118
+ const compactIds = new Set<string>();
119
+ for (const t of compact) {
120
+ if (typeof t.toolId === 'string') compactIds.add(t.toolId);
121
+ }
122
+ const sameSet =
123
+ standardIds.size === compactIds.size && [...standardIds].every((id) => compactIds.has(id));
124
+ expect(
125
+ sameSet,
126
+ driver.describe(
127
+ 'tool-catalog.md §compact',
128
+ `compact tools[] MUST carry the same toolId set as the standard view (standard=${[...standardIds].sort().join(',')} compact=${[...compactIds].sort().join(',')})`,
129
+ ),
130
+ ).toBe(true);
131
+
132
+ // ---- Leg 5: by-id compact round-trip --------------------------------
133
+ if (compact.length > 0 && typeof compact[0]!.toolId === 'string') {
134
+ const id = compact[0]!.toolId;
135
+ const one = await driver.get(`/v1/tools/${encodeURIComponent(id)}?view=compact`);
136
+ if (one.status === 200) {
137
+ expect(
138
+ validate(one.json),
139
+ driver.describe('compact-tool-descriptor.schema.json', `GET /v1/tools/{toolId}?view=compact MUST return a valid CompactToolDescriptor (${ajv.errorsText(validate.errors)})`),
140
+ ).toBe(true);
141
+ const got = one.json;
142
+ expect(
143
+ got && typeof got === 'object' && (got as { toolId?: unknown }).toolId === id,
144
+ driver.describe('tool-catalog.md §compact', 'GET /v1/tools/{toolId}?view=compact MUST return the requested descriptor'),
145
+ ).toBe(true);
146
+ }
147
+ }
148
+ });
149
+ });