@openwop/openwop-conformance 1.37.0 → 1.44.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/CHANGELOG.md +38 -0
  2. package/README.md +2 -2
  3. package/api/openapi.yaml +62 -5
  4. package/fixtures/conformance-agent-memory-injection-budget.json +44 -0
  5. package/fixtures/conformance-context-budget-multiturn.json +50 -0
  6. package/fixtures.md +2 -0
  7. package/package.json +1 -1
  8. package/schemas/README.md +5 -0
  9. package/schemas/a2ui-surface-delta-frame.schema.json +48 -0
  10. package/schemas/capabilities.schema.json +155 -1
  11. package/schemas/channel-presence-payload.schema.json +41 -0
  12. package/schemas/compact-tool-descriptor.schema.json +51 -0
  13. package/schemas/conversation-turn.schema.json +10 -0
  14. package/schemas/frontend-plugin-manifest.schema.json +93 -0
  15. package/schemas/memory-list-options.schema.json +16 -0
  16. package/schemas/run-event-payloads.schema.json +25 -2
  17. package/schemas/run-event.schema.json +2 -0
  18. package/schemas/ui-plugin-message.schema.json +90 -0
  19. package/src/lib/toolCatalog.ts +89 -0
  20. package/src/scenarios/a2ui-surface-delta-transport.test.ts +600 -0
  21. package/src/scenarios/channel-presence-behavioral.test.ts +83 -0
  22. package/src/scenarios/channel-presence-shape.test.ts +93 -0
  23. package/src/scenarios/context-budget-transcript-bound.test.ts +253 -0
  24. package/src/scenarios/context-summarization-replay.test.ts +155 -0
  25. package/src/scenarios/conversation-turn-model-provenance-shape.test.ts +120 -0
  26. package/src/scenarios/frontend-plugin-packs.test.ts +230 -0
  27. package/src/scenarios/memory-injection-budget.test.ts +188 -0
  28. package/src/scenarios/prompt-prefix-cache.test.ts +200 -0
  29. package/src/scenarios/run-transport-economy.test.ts +236 -0
  30. package/src/scenarios/tool-catalog-compact-projection.test.ts +149 -0
@@ -0,0 +1,230 @@
1
+ /**
2
+ * Front-end plugin packs — `frontend-plugin-packs.md` (RFC 0117). Public test for
3
+ * the four protocol-tier SECURITY invariants `frontend-plugin-isolation` /
4
+ * `frontend-plugin-egress` / `frontend-plugin-rpc-allowlist` / `frontend-plugin-no-byok`,
5
+ * plus the manifest shape and the `ui-plugin/1` host-RPC + version-token concurrency
6
+ * contract.
7
+ *
8
+ * Two layers:
9
+ *
10
+ * A. Always-on, server-free schema probe — `frontend-plugin-manifest.schema.json`
11
+ * and `ui-plugin-message.schema.json` enforce the wire shape: a valid manifest /
12
+ * message validates; a backend `runtime` member, a `uiPlugins[]` entry missing
13
+ * `entry`, an out-of-allowlist `method`, and any envelope `additionalProperties`
14
+ * are rejected. The `version`-token concurrency contract (the `artifact_conflict`
15
+ * error code + `currentVersion`) is schema-pinned. No credential-bearing field is
16
+ * admitted on the envelope (`frontend-plugin-no-byok`).
17
+ *
18
+ * B. Capability-gated behavioral leg — on a host advertising
19
+ * `capabilities.uiPlugins.supported: true` that exposes the
20
+ * `POST /v1/host/sample/ui-plugin/rpc` test seam, an undeclared-method request
21
+ * MUST be refused with `method_not_allowed` (`frontend-plugin-rpc-allowlist`), and
22
+ * a stale `artifact.write` MUST be refused with `artifact_conflict` + `currentVersion`
23
+ * and MUST NOT persist (§Concurrency). Hosts without the seam soft-skip (404);
24
+ * unadvertised hosts skip via the behavior gate. (No conformant host advertises
25
+ * `uiPlugins` yet — these legs soft-skip until the openwop-app reference host (ADR
26
+ * 0153) lands, the first witness that graduates the invariants to protocol.)
27
+ *
28
+ * @see spec/v1/frontend-plugin-packs.md
29
+ * @see SECURITY/invariants.yaml ids: frontend-plugin-{isolation,egress,rpc-allowlist,no-byok}
30
+ * @see RFCS/0117-frontend-plugin-packs.md
31
+ */
32
+
33
+ import { describe, it, expect } from 'vitest';
34
+ import { readFileSync } from 'node:fs';
35
+ import { join } from 'node:path';
36
+ import Ajv2020 from 'ajv/dist/2020.js';
37
+ import addFormats from 'ajv-formats';
38
+ import { SCHEMAS_DIR } from '../lib/paths.js';
39
+ import { driver } from '../lib/driver.js';
40
+ import { behaviorGate } from '../lib/behavior-gate.js';
41
+ import { readCapabilityFamily } from '../lib/discovery-capabilities.js';
42
+
43
+ const MANIFEST_SCHEMA = join(SCHEMAS_DIR, 'frontend-plugin-manifest.schema.json');
44
+ const MESSAGE_SCHEMA = join(SCHEMAS_DIR, 'ui-plugin-message.schema.json');
45
+
46
+ function validManifest(): Record<string, unknown> {
47
+ return {
48
+ name: 'vendor.acme.canvas-editor',
49
+ version: '1.0.0',
50
+ kind: 'frontend-plugin',
51
+ engines: { openwop: '>=1.2.0' },
52
+ uiPlugins: [
53
+ {
54
+ pluginId: 'app-builder',
55
+ surface: 'artifact-viewer',
56
+ entry: 'ui/app-builder.mjs',
57
+ hostApi: ['artifact.read', 'artifact.write'],
58
+ },
59
+ ],
60
+ };
61
+ }
62
+
63
+ describe('frontend-plugin manifest: schema layer (always-on, server-free)', () => {
64
+ const ajv = new Ajv2020({ allErrors: true, strict: false });
65
+ addFormats(ajv);
66
+ const validate = ajv.compile(JSON.parse(readFileSync(MANIFEST_SCHEMA, 'utf8')));
67
+
68
+ it('a well-formed frontend-plugin manifest validates', () => {
69
+ expect(
70
+ validate(validManifest()),
71
+ `frontend-plugin-packs.md §The pack — a valid manifest MUST validate. Errors: ${JSON.stringify(validate.errors)}`,
72
+ ).toBe(true);
73
+ });
74
+
75
+ it('a backend `runtime` member is rejected (a plugin is sandboxed UI, not a node entry)', () => {
76
+ const m = { ...validManifest(), runtime: { language: 'javascript', entry: 'index.mjs' } };
77
+ expect(
78
+ validate(m),
79
+ 'node-packs.md §Pack kinds — a kind:"frontend-plugin" manifest carrying `runtime` MUST be rejected (pack_kind_invalid)',
80
+ ).toBe(false);
81
+ });
82
+
83
+ it('a uiPlugins[] entry missing `entry` is rejected', () => {
84
+ const m = validManifest();
85
+ delete (m.uiPlugins as Array<Record<string, unknown>>)[0].entry;
86
+ expect(validate(m), 'a uiPlugins[] entry missing `entry` MUST NOT validate').toBe(false);
87
+ });
88
+
89
+ it('an `entry` path with `..` traversal is rejected', () => {
90
+ const m = validManifest();
91
+ (m.uiPlugins as Array<Record<string, unknown>>)[0].entry = '../escape.mjs';
92
+ expect(validate(m), 'an `entry` path MUST NOT contain `..` (path-traversal)').toBe(false);
93
+ });
94
+
95
+ it('a hostApi method outside the closed allowlist is rejected (frontend-plugin-rpc-allowlist)', () => {
96
+ const m = validManifest();
97
+ (m.uiPlugins as Array<Record<string, unknown>>)[0].hostApi = ['artifact.read', 'host.exec'];
98
+ expect(
99
+ validate(m),
100
+ 'frontend-plugin-packs.md §Host-RPC — only the closed allowlist methods are permitted; `host.exec` MUST NOT validate',
101
+ ).toBe(false);
102
+ });
103
+
104
+ it('an empty uiPlugins[] is rejected (a pack MUST declare at least one plugin)', () => {
105
+ const m = { ...validManifest(), uiPlugins: [] };
106
+ expect(validate(m), 'a frontend-plugin pack MUST declare at least one uiPlugins[] entry').toBe(false);
107
+ });
108
+ });
109
+
110
+ describe('ui-plugin/1 message: schema layer (always-on, server-free)', () => {
111
+ const ajv = new Ajv2020({ allErrors: true, strict: false });
112
+ addFormats(ajv);
113
+ const validate = ajv.compile(JSON.parse(readFileSync(MESSAGE_SCHEMA, 'utf8')));
114
+
115
+ it('a valid artifact.write request carrying a version token validates', () => {
116
+ const req = {
117
+ openwop: 'ui-plugin/1',
118
+ type: 'request',
119
+ id: 7,
120
+ method: 'artifact.write',
121
+ params: { artifactId: 'a-1', version: 'opaque-v1', payload: {} },
122
+ };
123
+ expect(validate(req), `a valid artifact.write request MUST validate. Errors: ${JSON.stringify(validate.errors)}`).toBe(true);
124
+ });
125
+
126
+ it('an artifact_conflict response carries currentVersion (version-token concurrency)', () => {
127
+ const res = {
128
+ openwop: 'ui-plugin/1',
129
+ type: 'response',
130
+ id: 7,
131
+ ok: false,
132
+ error: { code: 'artifact_conflict', currentVersion: 'opaque-v2' },
133
+ };
134
+ expect(
135
+ validate(res),
136
+ `frontend-plugin-packs.md §Concurrency — a stale write surfaces artifact_conflict + currentVersion. Errors: ${JSON.stringify(validate.errors)}`,
137
+ ).toBe(true);
138
+ });
139
+
140
+ it('a request with a method outside the allowlist is schema-rejected', () => {
141
+ const req = { openwop: 'ui-plugin/1', type: 'request', id: 1, method: 'host.exec' };
142
+ expect(validate(req), 'a method outside the ui-plugin/1 allowlist MUST NOT validate').toBe(false);
143
+ });
144
+
145
+ it('a message without the ui-plugin/1 protocol tag is rejected', () => {
146
+ const req = { openwop: 'ui-plugin/2', type: 'request', id: 1, method: 'artifact.read' };
147
+ expect(validate(req), 'a host MUST ignore messages whose ui-plugin tag it does not recognize').toBe(false);
148
+ });
149
+
150
+ it('no credential-bearing field is admitted on the envelope (frontend-plugin-no-byok)', () => {
151
+ // additionalProperties:false on every envelope variant — a stray apiKey/token at the
152
+ // envelope root cannot ride the boundary.
153
+ for (const leak of ['apiKey', 'token', 'clientSecret', 'authorization']) {
154
+ const req = { openwop: 'ui-plugin/1', type: 'request', id: 1, method: 'artifact.read', [leak]: 'xxx' };
155
+ expect(
156
+ validate(req),
157
+ `frontend-plugin-no-byok — a credential-named envelope field ("${leak}") MUST NOT validate (additionalProperties:false)`,
158
+ ).toBe(false);
159
+ }
160
+ });
161
+ });
162
+
163
+ describe('frontend-plugin: isolation advertisement (always-on, capability shape)', () => {
164
+ it('a host advertising uiPlugins MUST pin isolation to cross-origin-iframe', async () => {
165
+ const uiPlugins = await readCapabilityFamily<{ supported?: boolean; isolation?: string }>('uiPlugins');
166
+ if (!uiPlugins?.supported) return; // unadvertised → out of scope (graceful degradation)
167
+ expect(
168
+ uiPlugins.isolation,
169
+ driver.describe(
170
+ 'frontend-plugin-packs.md §Isolation',
171
+ 'frontend-plugin-isolation — the ONLY conformant isolation model is cross-origin-iframe (in-process is a protocol-tier MUST NOT)',
172
+ ),
173
+ ).toBe('cross-origin-iframe');
174
+ });
175
+ });
176
+
177
+ describe('frontend-plugin: host-RPC behavior (capability-gated)', () => {
178
+ it('an undeclared host-RPC method is refused with method_not_allowed', async () => {
179
+ const uiPlugins = await readCapabilityFamily<{ supported?: boolean }>('uiPlugins');
180
+ if (!behaviorGate('uiPlugins.supported', uiPlugins?.supported === true)) return;
181
+
182
+ const res = await driver.post('/v1/host/sample/ui-plugin/rpc', {
183
+ message: { openwop: 'ui-plugin/1', type: 'request', id: 1, method: 'host.exec' },
184
+ });
185
+ if (res.status === 404 || res.status === 403) return; // seam unwired — soft-skip
186
+
187
+ const body = res.json as { ok?: boolean; error?: { code?: string } } | undefined;
188
+ expect(
189
+ body?.ok,
190
+ driver.describe('frontend-plugin-packs.md §Host-RPC', 'an undeclared method MUST NOT execute'),
191
+ ).toBe(false);
192
+ expect(
193
+ body?.error?.code,
194
+ driver.describe(
195
+ 'frontend-plugin-packs.md §Host-RPC',
196
+ 'frontend-plugin-rpc-allowlist — an undeclared method surfaces method_not_allowed',
197
+ ),
198
+ ).toBe('method_not_allowed');
199
+ });
200
+
201
+ it('a stale artifact.write is refused with artifact_conflict + currentVersion (no persist)', async () => {
202
+ const uiPlugins = await readCapabilityFamily<{ supported?: boolean; hostApi?: string[] }>('uiPlugins');
203
+ if (!behaviorGate('uiPlugins.supported', uiPlugins?.supported === true)) return;
204
+ if (!(uiPlugins?.hostApi ?? []).includes('artifact.write')) return; // write unsupported → out of scope
205
+
206
+ const res = await driver.post('/v1/host/sample/ui-plugin/rpc', {
207
+ message: {
208
+ openwop: 'ui-plugin/1',
209
+ type: 'request',
210
+ id: 2,
211
+ method: 'artifact.write',
212
+ params: { artifactId: 'conformance-canary', version: 'stale-token', payload: {} },
213
+ },
214
+ });
215
+ if (res.status === 404 || res.status === 403) return; // seam unwired — soft-skip
216
+
217
+ const body = res.json as { ok?: boolean; error?: { code?: string; currentVersion?: string } } | undefined;
218
+ expect(
219
+ body?.error?.code,
220
+ driver.describe(
221
+ 'frontend-plugin-packs.md §Concurrency',
222
+ 'a stale artifact.write version surfaces artifact_conflict (host MUST NOT persist)',
223
+ ),
224
+ ).toBe('artifact_conflict');
225
+ expect(
226
+ typeof body?.error?.currentVersion,
227
+ driver.describe('frontend-plugin-packs.md §Concurrency', 'artifact_conflict carries the host currentVersion for re-read/merge'),
228
+ ).toBe('string');
229
+ });
230
+ });
@@ -0,0 +1,188 @@
1
+ /**
2
+ * RFC 0113 — Memory Injection Budget.
3
+ *
4
+ * Verifies the new token-denominated bound on the live injection read:
5
+ * `MemoryAdapter.list(memoryRef, { tokenBudget, rank, query })`
6
+ * (`spec/v1/agent-memory.md` §"Injection budget"). The genuinely new
7
+ * contribution is `tokenBudget`; `rank:'relevance'` DELEGATES to the
8
+ * existing `memory.search` semantic mode (RFC 0080) — this scenario does
9
+ * NOT assert a parallel ranking primitive, and the relevance leg soft-skips
10
+ * unless the host ALSO advertises `memory.search` semantic.
11
+ *
12
+ * Capability-gated on `capabilities.memory.injectionBudget.supported === true`
13
+ * (root-first per RFC 0073) via `behaviorGate`. Driven through the host-sample
14
+ * memory seam — the `conformance-agent-memory-injection-budget` fixture (the
15
+ * same `/v1/runs` + run-variable seam the other `agentMemory*` scenarios use to
16
+ * reach the adapter), which seeds a set whose total exceeds the budget AND
17
+ * includes one single entry larger than the whole budget, plus a BYOK-redacted
18
+ * entry and a cross-tenant probe.
19
+ *
20
+ * Asserts: cumulative tokens ≤ `tokenBudget`; an over-budget single entry is
21
+ * omitted (not truncated); `rank:'relevance'` ordering differs from recency on
22
+ * the crafted fixture (only when `memory.search` semantic is advertised, else
23
+ * soft-skip); and re-asserts SR-1 (redacted content) + CTI-1 (cross-tenant
24
+ * probe empty) on the budgeted path as a regression guard.
25
+ *
26
+ * @see RFCS/0113-memory-injection-budget.md
27
+ * @see spec/v1/agent-memory.md §"Injection budget"
28
+ */
29
+
30
+ import { describe, it, expect } from 'vitest';
31
+ import { driver } from '../lib/driver.js';
32
+ import { pollUntilTerminal } from '../lib/polling.js';
33
+ import { behaviorGate } from '../lib/behavior-gate.js';
34
+ import { isFixtureAdvertised } from '../lib/fixtures.js';
35
+ import { readCapabilityFamily } from '../lib/discovery-capabilities.js';
36
+
37
+ const FIXTURE = 'conformance-agent-memory-injection-budget';
38
+ const PROFILE = 'openwop-memory-injection-budget';
39
+
40
+ interface MemoryInjectionBudgetCap {
41
+ readonly supported?: boolean;
42
+ readonly tokenCounter?: string;
43
+ }
44
+ interface MemorySearchCap {
45
+ readonly supported?: boolean;
46
+ readonly modes?: readonly string[];
47
+ }
48
+ interface MemoryCap {
49
+ readonly injectionBudget?: MemoryInjectionBudgetCap;
50
+ readonly search?: MemorySearchCap;
51
+ }
52
+
53
+ // ── cast-free typed accessors (no `as`) ──────────────────────────────────
54
+ function isRecord(v: unknown): v is Record<string, unknown> {
55
+ return typeof v === 'object' && v !== null && !Array.isArray(v);
56
+ }
57
+ function isString(v: unknown): v is string {
58
+ return typeof v === 'string';
59
+ }
60
+ function isNumber(v: unknown): v is number {
61
+ return typeof v === 'number';
62
+ }
63
+ function isBoolean(v: unknown): v is boolean {
64
+ return typeof v === 'boolean';
65
+ }
66
+ function stringOf(v: unknown): string | undefined {
67
+ return isString(v) ? v : undefined;
68
+ }
69
+ function numberOf(v: unknown): number | undefined {
70
+ return isNumber(v) ? v : undefined;
71
+ }
72
+ function booleanOf(v: unknown): boolean | undefined {
73
+ return isBoolean(v) ? v : undefined;
74
+ }
75
+ function stringArrayOf(v: unknown): string[] | undefined {
76
+ return Array.isArray(v) && v.every(isString) ? v : undefined;
77
+ }
78
+ function recordArrayOf(v: unknown): Record<string, unknown>[] | undefined {
79
+ return Array.isArray(v) && v.every(isRecord) ? v : undefined;
80
+ }
81
+ function runIdOf(v: unknown): string | undefined {
82
+ return isRecord(v) ? stringOf(v['runId']) : undefined;
83
+ }
84
+ function variablesOf(v: unknown): Record<string, unknown> | undefined {
85
+ if (!isRecord(v)) return undefined;
86
+ const vars = v['variables'];
87
+ return isRecord(vars) ? vars : undefined;
88
+ }
89
+
90
+ function advertisesSemanticSearch(mem: MemoryCap | undefined): boolean {
91
+ const modes = mem?.search?.modes;
92
+ return mem?.search?.supported === true && Array.isArray(modes) && modes.includes('semantic');
93
+ }
94
+
95
+ async function driveFixtureVariables(): Promise<Record<string, unknown> | undefined> {
96
+ const create = await driver.post('/v1/runs', { workflowId: FIXTURE });
97
+ expect(create.status).toBe(201);
98
+ const runId = runIdOf(create.json);
99
+ expect(runId, 'POST /v1/runs MUST return a runId').toBeDefined();
100
+ if (runId === undefined) return undefined;
101
+
102
+ const terminal = await pollUntilTerminal(runId);
103
+ expect(terminal.status).toBe('completed');
104
+
105
+ const snap = await driver.get(`/v1/runs/${encodeURIComponent(runId)}`);
106
+ return variablesOf(snap.json);
107
+ }
108
+
109
+ describe('memory-injection-budget (RFC 0113)', () => {
110
+ it('token-bounds the injection read, omits the over-budget entry, and preserves SR-1 + CTI-1', async () => {
111
+ const mem = await readCapabilityFamily<MemoryCap>('memory');
112
+ if (!behaviorGate(PROFILE, mem?.injectionBudget?.supported === true)) return;
113
+ if (!isFixtureAdvertised(FIXTURE)) return; // fixture-gated soft-skip
114
+
115
+ const v = await driveFixtureVariables();
116
+ expect(v, 'fixture MUST surface run variables').toBeDefined();
117
+ if (v === undefined) return;
118
+
119
+ // ── tokenBudget bound (the new lever) ──────────────────────────────
120
+ const tokenBudget = numberOf(v['tokenBudget']);
121
+ const total = numberOf(v['budgetedTokenTotal']);
122
+ expect(tokenBudget, 'fixture MUST echo the requested tokenBudget').toBeDefined();
123
+ expect(total, 'fixture MUST surface the budgeted cumulative token total').toBeDefined();
124
+ // Cumulative tokens across the returned prefix MUST NOT exceed the budget.
125
+ if (tokenBudget !== undefined && total !== undefined) {
126
+ expect(total).toBeLessThanOrEqual(tokenBudget);
127
+ }
128
+
129
+ // ── over-budget single entry omitted (not truncated) ───────────────
130
+ expect(
131
+ booleanOf(v['overBudgetEntryOmitted']),
132
+ 'an entry larger than the whole budget MUST be omitted, not truncated mid-entry',
133
+ ).toBe(true);
134
+ const entries = recordArrayOf(v['budgetedEntries']);
135
+ expect(entries, 'fixture MUST surface the budgeted entry slice').toBeDefined();
136
+ const overId = stringOf(v['overBudgetEntryId']);
137
+ if (entries !== undefined && overId !== undefined) {
138
+ const ids = entries.map((e) => stringOf(e['id']));
139
+ expect(ids, 'the over-budget entry MUST NOT appear in the returned slice').not.toContain(overId);
140
+ }
141
+
142
+ // ── SR-1 re-assertion on the budgeted path ─────────────────────────
143
+ // A budgeted/ranked read ranks over already-redacted content; the read
144
+ // surface MUST carry the redaction marker, never the plaintext.
145
+ const redacted = stringOf(v['redactedContentSample']);
146
+ expect(redacted, 'budgeted read MUST surface a redacted-content sample').toBeDefined();
147
+ if (redacted !== undefined) {
148
+ expect(redacted).toMatch(/\[REDACTED:[^\]]+\]/);
149
+ }
150
+
151
+ // ── CTI-1 re-assertion on the budgeted path ────────────────────────
152
+ // A budget/rank prefix of an already-single-tenant list stays single-tenant:
153
+ // the cross-tenant probe under the budgeted path MUST return empty.
154
+ const probe = v['crossTenantBudgetedProbe'];
155
+ if (Array.isArray(probe)) {
156
+ expect(probe.length, 'cross-tenant probe on the budgeted path MUST return []').toBe(0);
157
+ } else {
158
+ expect(probe, 'cross-tenant probe on the budgeted path MUST return [] / null').toBeFalsy();
159
+ }
160
+ });
161
+
162
+ it("rank:'relevance' reorders vs recency — only when memory.search semantic is ALSO advertised", async () => {
163
+ const mem = await readCapabilityFamily<MemoryCap>('memory');
164
+ if (!behaviorGate(PROFILE, mem?.injectionBudget?.supported === true)) return;
165
+ // RFC 0113: rank:'relevance' DELEGATES to memory.search semantic (RFC 0080).
166
+ // A host that does not advertise memory.search semantic MUST NOT fabricate a
167
+ // relevance ranking — the relevance leg soft-skips here (it is not a new
168
+ // ranking surface advertised by injectionBudget).
169
+ if (!advertisesSemanticSearch(mem)) {
170
+ // eslint-disable-next-line no-console
171
+ console.warn(`[${PROFILE}] memory.search semantic not advertised; relevance leg soft-skipped`);
172
+ return;
173
+ }
174
+ if (!isFixtureAdvertised(FIXTURE)) return;
175
+
176
+ const v = await driveFixtureVariables();
177
+ expect(v, 'fixture MUST surface run variables').toBeDefined();
178
+ if (v === undefined) return;
179
+
180
+ const recencyOrder = stringArrayOf(v['recencyOrder']);
181
+ const relevanceOrder = stringArrayOf(v['relevanceOrder']);
182
+ expect(recencyOrder, 'fixture MUST surface the recency ordering').toBeDefined();
183
+ expect(relevanceOrder, 'fixture MUST surface the relevance ordering').toBeDefined();
184
+ // The crafted fixture pins a query whose semantic top-k differs from the
185
+ // most-recent-first order — relevance MUST reorder (not echo recency).
186
+ expect(relevanceOrder).not.toEqual(recencyOrder);
187
+ });
188
+ });
@@ -0,0 +1,200 @@
1
+ /**
2
+ * prompt-prefix-cache — RFC 0116 + SECURITY/invariants.yaml
3
+ * `prompt-prefix-cache-cross-tenant-isolation`.
4
+ *
5
+ * Status: ACTIVE (advertisement-shape + behavioral). The behavioral legs drive
6
+ * the host's real envelope/provider generate path through the OPTIONAL test
7
+ * seam `POST /v1/host/sample/ai/generate` (`host-sample-test-seams.md` §16,
8
+ * env-gated on `OPENWOP_TEST_SEAM_ENABLED=true`). Hosts that don't advertise
9
+ * `aiProviders.promptPrefixCache.supported` soft-skip; hosts that advertise it
10
+ * but don't wire the seam (HTTP 404/405) soft-skip the behavioral legs and
11
+ * verify advertisement shape only.
12
+ *
13
+ * RFC 0116 makes the optional `cachePrefixId` generate hint safe + testable via
14
+ * three pillars, each asserted here:
15
+ * (a) outcome-invariance — a generate with `cachePrefixId` and a control
16
+ * without produce the same accepted envelope + identical
17
+ * `inputTokens`/`outputTokens` (cost-hint-only, replay-invariant).
18
+ * (b) cache hit observable — a repeat generate shows
19
+ * `provider.usage.cacheReadTokens > 0`.
20
+ * (c) cross-tenant isolation — tenant B's first use of tenant A's
21
+ * `cachePrefixId` shows `cacheReadTokens == 0` (no cross-tenant share).
22
+ * THIS is the public test for the `prompt-prefix-cache-cross-tenant-isolation`
23
+ * invariant: the host MUST key its provider cache by `(tenant, cachePrefixId)`.
24
+ * (d) secret-free — a `cachePrefixId` is never emitted where SR-1 would
25
+ * redact, and the usage block carries no prompt substrings.
26
+ *
27
+ * @see RFCS/0116-prompt-prefix-cache.md
28
+ * @see spec/v1/ai-envelope.md §"Prompt-prefix cache (RFC 0116)"
29
+ * @see SECURITY/invariants.yaml — prompt-prefix-cache-cross-tenant-isolation
30
+ */
31
+
32
+ import { describe, it, expect } from 'vitest';
33
+ import { driver } from '../lib/driver.js';
34
+ import { readCapabilityFamily } from '../lib/discovery-capabilities.js';
35
+
36
+ interface PromptPrefixCacheCap {
37
+ supported?: unknown;
38
+ providers?: unknown;
39
+ }
40
+
41
+ interface AiProvidersCap {
42
+ promptPrefixCache?: PromptPrefixCacheCap;
43
+ }
44
+
45
+ interface GenerateUsage {
46
+ inputTokens?: number;
47
+ outputTokens?: number;
48
+ cacheReadTokens?: number;
49
+ cacheWriteTokens?: number;
50
+ }
51
+
52
+ interface GenerateResponse {
53
+ envelope?: { envelopeType?: string; payload?: unknown; envelopeId?: string };
54
+ usage?: GenerateUsage;
55
+ }
56
+
57
+ async function readCap(): Promise<PromptPrefixCacheCap | null> {
58
+ const fam = await readCapabilityFamily<AiProvidersCap>('aiProviders');
59
+ const block = fam?.promptPrefixCache;
60
+ return block && typeof block === 'object' ? block : null;
61
+ }
62
+
63
+ async function generate(args: {
64
+ tenantId: string;
65
+ cachePrefixId?: string;
66
+ }): Promise<{ status: number; body: GenerateResponse }> {
67
+ const res = await driver.post('/v1/host/sample/ai/generate', {
68
+ tenantId: args.tenantId,
69
+ envelopeType: 'clarification.request',
70
+ systemPrompt: 'You are a helpful assistant. Answer concisely.',
71
+ ...(args.cachePrefixId !== undefined ? { cachePrefixId: args.cachePrefixId } : {}),
72
+ });
73
+ return { status: res.status, body: (res.json ?? {}) as GenerateResponse };
74
+ }
75
+
76
+ describe('prompt-prefix-cache: advertisement shape (RFC 0116)', () => {
77
+ it('aiProviders.promptPrefixCache is either absent or a well-formed object', async () => {
78
+ const cap = await readCap();
79
+ if (cap === null) return; // not advertised — skip
80
+ expect(
81
+ typeof cap.supported,
82
+ driver.describe(
83
+ 'capabilities.schema.json §aiProviders.promptPrefixCache',
84
+ 'promptPrefixCache.supported MUST be a boolean when the block is present',
85
+ ),
86
+ ).toBe('boolean');
87
+ if (cap.providers !== undefined) {
88
+ expect(
89
+ Array.isArray(cap.providers),
90
+ driver.describe(
91
+ 'capabilities.schema.json §aiProviders.promptPrefixCache',
92
+ 'promptPrefixCache.providers MUST be an array of provider ids when present (provider-scoped)',
93
+ ),
94
+ ).toBe(true);
95
+ }
96
+ });
97
+ });
98
+
99
+ describe('prompt-prefix-cache: behavioral (RFC 0116 §"Normative requirements")', () => {
100
+ it('(a) outcome-invariance — cachePrefixId vs control → same envelope + identical input/output tokens', async () => {
101
+ const cap = await readCap();
102
+ if (!cap || cap.supported !== true) return; // not advertised — skip
103
+ const prefixId = `inv-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
104
+
105
+ const control = await generate({ tenantId: 'tenant-a' });
106
+ if (control.status === 404 || control.status === 405) return; // seam not wired
107
+ expect(control.status, driver.describe('host-sample-test-seams.md §16', 'generate seam MUST return 200')).toBe(200);
108
+
109
+ const withPrefix = await generate({ tenantId: 'tenant-a', cachePrefixId: prefixId });
110
+ expect(withPrefix.status).toBe(200);
111
+
112
+ expect(
113
+ withPrefix.body.envelope?.envelopeType,
114
+ driver.describe(
115
+ 'ai-envelope.md §"Prompt-prefix cache (RFC 0116)" rule 3',
116
+ 'cachePrefixId is a cost hint, never semantic: the accepted envelope MUST be identical hit-vs-miss',
117
+ ),
118
+ ).toBe(control.body.envelope?.envelopeType);
119
+ expect(withPrefix.body.usage?.inputTokens).toBe(control.body.usage?.inputTokens);
120
+ expect(
121
+ withPrefix.body.usage?.outputTokens,
122
+ driver.describe(
123
+ 'ai-envelope.md §"Prompt-prefix cache (RFC 0116)" rule 3',
124
+ 'provider.usage.inputTokens/outputTokens MUST be identical hit-vs-miss (replay-invariant)',
125
+ ),
126
+ ).toBe(control.body.usage?.outputTokens);
127
+ });
128
+
129
+ it('(b) cache hit observable — a repeat generate shows cacheReadTokens > 0 while tokens stay invariant', async () => {
130
+ const cap = await readCap();
131
+ if (!cap || cap.supported !== true) return; // not advertised — skip
132
+ const prefixId = `hit-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
133
+
134
+ const prime = await generate({ tenantId: 'tenant-a', cachePrefixId: prefixId });
135
+ if (prime.status === 404 || prime.status === 405) return; // seam not wired
136
+ expect(prime.status).toBe(200);
137
+
138
+ const repeat = await generate({ tenantId: 'tenant-a', cachePrefixId: prefixId });
139
+ expect(repeat.status).toBe(200);
140
+ expect(
141
+ repeat.body.usage?.cacheReadTokens ?? 0,
142
+ driver.describe(
143
+ 'ai-envelope.md §"Prompt-prefix cache (RFC 0116)" rule 4',
144
+ 'a repeat generate with the same cachePrefixId for the SAME tenant MUST be an observable cache hit (cacheReadTokens > 0)',
145
+ ),
146
+ ).toBeGreaterThan(0);
147
+ // The cost-only witness MUST NOT have changed the recorded outcome.
148
+ expect(repeat.body.usage?.inputTokens).toBe(prime.body.usage?.inputTokens);
149
+ expect(repeat.body.usage?.outputTokens).toBe(prime.body.usage?.outputTokens);
150
+ });
151
+
152
+ it('(c) cross-tenant isolation — tenant B first use of tenant A\'s cachePrefixId → cacheReadTokens == 0', async () => {
153
+ const cap = await readCap();
154
+ if (!cap || cap.supported !== true) return; // not advertised — skip
155
+ const prefixId = `xtenant-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
156
+
157
+ // Tenant A primes the cache under a shared, predictable cachePrefixId.
158
+ const aPrime = await generate({ tenantId: 'tenant-a', cachePrefixId: prefixId });
159
+ if (aPrime.status === 404 || aPrime.status === 405) return; // seam not wired
160
+ expect(aPrime.status).toBe(200);
161
+
162
+ // Tenant B's FIRST use of the SAME cachePrefixId MUST be a miss — the host
163
+ // keys its provider cache by (resolved tenant, cachePrefixId), never global.
164
+ const bFirst = await generate({ tenantId: 'tenant-b', cachePrefixId: prefixId });
165
+ expect(bFirst.status).toBe(200);
166
+ expect(
167
+ bFirst.body.usage?.cacheReadTokens ?? 0,
168
+ driver.describe(
169
+ 'SECURITY/invariants.yaml prompt-prefix-cache-cross-tenant-isolation',
170
+ 'tenant B\'s first use of tenant A\'s cachePrefixId MUST be a cache MISS (cacheReadTokens == 0) — the cache MUST be keyed by (tenant, cachePrefixId), never global; cross-tenant sharing is context leakage',
171
+ ),
172
+ ).toBe(0);
173
+ });
174
+
175
+ it('(d) secret-free — the response never echoes cachePrefixId in a SR-1-sensitive position', async () => {
176
+ const cap = await readCap();
177
+ if (!cap || cap.supported !== true) return; // not advertised — skip
178
+ const prefixId = `secretfree-${Date.now()}`;
179
+
180
+ const res = await generate({ tenantId: 'tenant-a', cachePrefixId: prefixId });
181
+ if (res.status === 404 || res.status === 405) return; // seam not wired
182
+ expect(res.status).toBe(200);
183
+ // The usage block is cost-only; it MUST NOT carry prompt/response substrings
184
+ // (SR-1). cachePrefixId is a public cache key, but the cost witness fields
185
+ // themselves are integers — assert the usage block is shape-clean.
186
+ const usage = res.body.usage ?? {};
187
+ for (const k of ['inputTokens', 'outputTokens', 'cacheReadTokens', 'cacheWriteTokens'] as const) {
188
+ const v = usage[k];
189
+ if (v !== undefined) {
190
+ expect(
191
+ typeof v,
192
+ driver.describe(
193
+ 'run-event-payloads.schema.json §providerUsage',
194
+ `provider.usage.${k} MUST be a cost-only integer (no prompt substrings per SR-1)`,
195
+ ),
196
+ ).toBe('number');
197
+ }
198
+ }
199
+ });
200
+ });