@openwop/openwop-conformance 1.37.0 → 1.44.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +38 -0
- package/README.md +2 -2
- package/api/openapi.yaml +62 -5
- package/fixtures/conformance-agent-memory-injection-budget.json +44 -0
- package/fixtures/conformance-context-budget-multiturn.json +50 -0
- package/fixtures.md +2 -0
- package/package.json +1 -1
- package/schemas/README.md +5 -0
- package/schemas/a2ui-surface-delta-frame.schema.json +48 -0
- package/schemas/capabilities.schema.json +155 -1
- package/schemas/channel-presence-payload.schema.json +41 -0
- package/schemas/compact-tool-descriptor.schema.json +51 -0
- package/schemas/conversation-turn.schema.json +10 -0
- package/schemas/frontend-plugin-manifest.schema.json +93 -0
- package/schemas/memory-list-options.schema.json +16 -0
- package/schemas/run-event-payloads.schema.json +25 -2
- package/schemas/run-event.schema.json +2 -0
- package/schemas/ui-plugin-message.schema.json +90 -0
- package/src/lib/toolCatalog.ts +89 -0
- package/src/scenarios/a2ui-surface-delta-transport.test.ts +600 -0
- package/src/scenarios/channel-presence-behavioral.test.ts +83 -0
- package/src/scenarios/channel-presence-shape.test.ts +93 -0
- package/src/scenarios/context-budget-transcript-bound.test.ts +253 -0
- package/src/scenarios/context-summarization-replay.test.ts +155 -0
- package/src/scenarios/conversation-turn-model-provenance-shape.test.ts +120 -0
- package/src/scenarios/frontend-plugin-packs.test.ts +230 -0
- package/src/scenarios/memory-injection-budget.test.ts +188 -0
- package/src/scenarios/prompt-prefix-cache.test.ts +200 -0
- package/src/scenarios/run-transport-economy.test.ts +236 -0
- package/src/scenarios/tool-catalog-compact-projection.test.ts +149 -0
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Front-end plugin packs — `frontend-plugin-packs.md` (RFC 0117). Public test for
|
|
3
|
+
* the four protocol-tier SECURITY invariants `frontend-plugin-isolation` /
|
|
4
|
+
* `frontend-plugin-egress` / `frontend-plugin-rpc-allowlist` / `frontend-plugin-no-byok`,
|
|
5
|
+
* plus the manifest shape and the `ui-plugin/1` host-RPC + version-token concurrency
|
|
6
|
+
* contract.
|
|
7
|
+
*
|
|
8
|
+
* Two layers:
|
|
9
|
+
*
|
|
10
|
+
* A. Always-on, server-free schema probe — `frontend-plugin-manifest.schema.json`
|
|
11
|
+
* and `ui-plugin-message.schema.json` enforce the wire shape: a valid manifest /
|
|
12
|
+
* message validates; a backend `runtime` member, a `uiPlugins[]` entry missing
|
|
13
|
+
* `entry`, an out-of-allowlist `method`, and any envelope `additionalProperties`
|
|
14
|
+
* are rejected. The `version`-token concurrency contract (the `artifact_conflict`
|
|
15
|
+
* error code + `currentVersion`) is schema-pinned. No credential-bearing field is
|
|
16
|
+
* admitted on the envelope (`frontend-plugin-no-byok`).
|
|
17
|
+
*
|
|
18
|
+
* B. Capability-gated behavioral leg — on a host advertising
|
|
19
|
+
* `capabilities.uiPlugins.supported: true` that exposes the
|
|
20
|
+
* `POST /v1/host/sample/ui-plugin/rpc` test seam, an undeclared-method request
|
|
21
|
+
* MUST be refused with `method_not_allowed` (`frontend-plugin-rpc-allowlist`), and
|
|
22
|
+
* a stale `artifact.write` MUST be refused with `artifact_conflict` + `currentVersion`
|
|
23
|
+
* and MUST NOT persist (§Concurrency). Hosts without the seam soft-skip (404);
|
|
24
|
+
* unadvertised hosts skip via the behavior gate. (No conformant host advertises
|
|
25
|
+
* `uiPlugins` yet — these legs soft-skip until the openwop-app reference host (ADR
|
|
26
|
+
* 0153) lands, the first witness that graduates the invariants to protocol.)
|
|
27
|
+
*
|
|
28
|
+
* @see spec/v1/frontend-plugin-packs.md
|
|
29
|
+
* @see SECURITY/invariants.yaml ids: frontend-plugin-{isolation,egress,rpc-allowlist,no-byok}
|
|
30
|
+
* @see RFCS/0117-frontend-plugin-packs.md
|
|
31
|
+
*/
|
|
32
|
+
|
|
33
|
+
import { describe, it, expect } from 'vitest';
|
|
34
|
+
import { readFileSync } from 'node:fs';
|
|
35
|
+
import { join } from 'node:path';
|
|
36
|
+
import Ajv2020 from 'ajv/dist/2020.js';
|
|
37
|
+
import addFormats from 'ajv-formats';
|
|
38
|
+
import { SCHEMAS_DIR } from '../lib/paths.js';
|
|
39
|
+
import { driver } from '../lib/driver.js';
|
|
40
|
+
import { behaviorGate } from '../lib/behavior-gate.js';
|
|
41
|
+
import { readCapabilityFamily } from '../lib/discovery-capabilities.js';
|
|
42
|
+
|
|
43
|
+
const MANIFEST_SCHEMA = join(SCHEMAS_DIR, 'frontend-plugin-manifest.schema.json');
|
|
44
|
+
const MESSAGE_SCHEMA = join(SCHEMAS_DIR, 'ui-plugin-message.schema.json');
|
|
45
|
+
|
|
46
|
+
function validManifest(): Record<string, unknown> {
|
|
47
|
+
return {
|
|
48
|
+
name: 'vendor.acme.canvas-editor',
|
|
49
|
+
version: '1.0.0',
|
|
50
|
+
kind: 'frontend-plugin',
|
|
51
|
+
engines: { openwop: '>=1.2.0' },
|
|
52
|
+
uiPlugins: [
|
|
53
|
+
{
|
|
54
|
+
pluginId: 'app-builder',
|
|
55
|
+
surface: 'artifact-viewer',
|
|
56
|
+
entry: 'ui/app-builder.mjs',
|
|
57
|
+
hostApi: ['artifact.read', 'artifact.write'],
|
|
58
|
+
},
|
|
59
|
+
],
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
describe('frontend-plugin manifest: schema layer (always-on, server-free)', () => {
|
|
64
|
+
const ajv = new Ajv2020({ allErrors: true, strict: false });
|
|
65
|
+
addFormats(ajv);
|
|
66
|
+
const validate = ajv.compile(JSON.parse(readFileSync(MANIFEST_SCHEMA, 'utf8')));
|
|
67
|
+
|
|
68
|
+
it('a well-formed frontend-plugin manifest validates', () => {
|
|
69
|
+
expect(
|
|
70
|
+
validate(validManifest()),
|
|
71
|
+
`frontend-plugin-packs.md §The pack — a valid manifest MUST validate. Errors: ${JSON.stringify(validate.errors)}`,
|
|
72
|
+
).toBe(true);
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
it('a backend `runtime` member is rejected (a plugin is sandboxed UI, not a node entry)', () => {
|
|
76
|
+
const m = { ...validManifest(), runtime: { language: 'javascript', entry: 'index.mjs' } };
|
|
77
|
+
expect(
|
|
78
|
+
validate(m),
|
|
79
|
+
'node-packs.md §Pack kinds — a kind:"frontend-plugin" manifest carrying `runtime` MUST be rejected (pack_kind_invalid)',
|
|
80
|
+
).toBe(false);
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
it('a uiPlugins[] entry missing `entry` is rejected', () => {
|
|
84
|
+
const m = validManifest();
|
|
85
|
+
delete (m.uiPlugins as Array<Record<string, unknown>>)[0].entry;
|
|
86
|
+
expect(validate(m), 'a uiPlugins[] entry missing `entry` MUST NOT validate').toBe(false);
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
it('an `entry` path with `..` traversal is rejected', () => {
|
|
90
|
+
const m = validManifest();
|
|
91
|
+
(m.uiPlugins as Array<Record<string, unknown>>)[0].entry = '../escape.mjs';
|
|
92
|
+
expect(validate(m), 'an `entry` path MUST NOT contain `..` (path-traversal)').toBe(false);
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
it('a hostApi method outside the closed allowlist is rejected (frontend-plugin-rpc-allowlist)', () => {
|
|
96
|
+
const m = validManifest();
|
|
97
|
+
(m.uiPlugins as Array<Record<string, unknown>>)[0].hostApi = ['artifact.read', 'host.exec'];
|
|
98
|
+
expect(
|
|
99
|
+
validate(m),
|
|
100
|
+
'frontend-plugin-packs.md §Host-RPC — only the closed allowlist methods are permitted; `host.exec` MUST NOT validate',
|
|
101
|
+
).toBe(false);
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
it('an empty uiPlugins[] is rejected (a pack MUST declare at least one plugin)', () => {
|
|
105
|
+
const m = { ...validManifest(), uiPlugins: [] };
|
|
106
|
+
expect(validate(m), 'a frontend-plugin pack MUST declare at least one uiPlugins[] entry').toBe(false);
|
|
107
|
+
});
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
describe('ui-plugin/1 message: schema layer (always-on, server-free)', () => {
|
|
111
|
+
const ajv = new Ajv2020({ allErrors: true, strict: false });
|
|
112
|
+
addFormats(ajv);
|
|
113
|
+
const validate = ajv.compile(JSON.parse(readFileSync(MESSAGE_SCHEMA, 'utf8')));
|
|
114
|
+
|
|
115
|
+
it('a valid artifact.write request carrying a version token validates', () => {
|
|
116
|
+
const req = {
|
|
117
|
+
openwop: 'ui-plugin/1',
|
|
118
|
+
type: 'request',
|
|
119
|
+
id: 7,
|
|
120
|
+
method: 'artifact.write',
|
|
121
|
+
params: { artifactId: 'a-1', version: 'opaque-v1', payload: {} },
|
|
122
|
+
};
|
|
123
|
+
expect(validate(req), `a valid artifact.write request MUST validate. Errors: ${JSON.stringify(validate.errors)}`).toBe(true);
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
it('an artifact_conflict response carries currentVersion (version-token concurrency)', () => {
|
|
127
|
+
const res = {
|
|
128
|
+
openwop: 'ui-plugin/1',
|
|
129
|
+
type: 'response',
|
|
130
|
+
id: 7,
|
|
131
|
+
ok: false,
|
|
132
|
+
error: { code: 'artifact_conflict', currentVersion: 'opaque-v2' },
|
|
133
|
+
};
|
|
134
|
+
expect(
|
|
135
|
+
validate(res),
|
|
136
|
+
`frontend-plugin-packs.md §Concurrency — a stale write surfaces artifact_conflict + currentVersion. Errors: ${JSON.stringify(validate.errors)}`,
|
|
137
|
+
).toBe(true);
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
it('a request with a method outside the allowlist is schema-rejected', () => {
|
|
141
|
+
const req = { openwop: 'ui-plugin/1', type: 'request', id: 1, method: 'host.exec' };
|
|
142
|
+
expect(validate(req), 'a method outside the ui-plugin/1 allowlist MUST NOT validate').toBe(false);
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
it('a message without the ui-plugin/1 protocol tag is rejected', () => {
|
|
146
|
+
const req = { openwop: 'ui-plugin/2', type: 'request', id: 1, method: 'artifact.read' };
|
|
147
|
+
expect(validate(req), 'a host MUST ignore messages whose ui-plugin tag it does not recognize').toBe(false);
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
it('no credential-bearing field is admitted on the envelope (frontend-plugin-no-byok)', () => {
|
|
151
|
+
// additionalProperties:false on every envelope variant — a stray apiKey/token at the
|
|
152
|
+
// envelope root cannot ride the boundary.
|
|
153
|
+
for (const leak of ['apiKey', 'token', 'clientSecret', 'authorization']) {
|
|
154
|
+
const req = { openwop: 'ui-plugin/1', type: 'request', id: 1, method: 'artifact.read', [leak]: 'xxx' };
|
|
155
|
+
expect(
|
|
156
|
+
validate(req),
|
|
157
|
+
`frontend-plugin-no-byok — a credential-named envelope field ("${leak}") MUST NOT validate (additionalProperties:false)`,
|
|
158
|
+
).toBe(false);
|
|
159
|
+
}
|
|
160
|
+
});
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
describe('frontend-plugin: isolation advertisement (always-on, capability shape)', () => {
|
|
164
|
+
it('a host advertising uiPlugins MUST pin isolation to cross-origin-iframe', async () => {
|
|
165
|
+
const uiPlugins = await readCapabilityFamily<{ supported?: boolean; isolation?: string }>('uiPlugins');
|
|
166
|
+
if (!uiPlugins?.supported) return; // unadvertised → out of scope (graceful degradation)
|
|
167
|
+
expect(
|
|
168
|
+
uiPlugins.isolation,
|
|
169
|
+
driver.describe(
|
|
170
|
+
'frontend-plugin-packs.md §Isolation',
|
|
171
|
+
'frontend-plugin-isolation — the ONLY conformant isolation model is cross-origin-iframe (in-process is a protocol-tier MUST NOT)',
|
|
172
|
+
),
|
|
173
|
+
).toBe('cross-origin-iframe');
|
|
174
|
+
});
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
describe('frontend-plugin: host-RPC behavior (capability-gated)', () => {
|
|
178
|
+
it('an undeclared host-RPC method is refused with method_not_allowed', async () => {
|
|
179
|
+
const uiPlugins = await readCapabilityFamily<{ supported?: boolean }>('uiPlugins');
|
|
180
|
+
if (!behaviorGate('uiPlugins.supported', uiPlugins?.supported === true)) return;
|
|
181
|
+
|
|
182
|
+
const res = await driver.post('/v1/host/sample/ui-plugin/rpc', {
|
|
183
|
+
message: { openwop: 'ui-plugin/1', type: 'request', id: 1, method: 'host.exec' },
|
|
184
|
+
});
|
|
185
|
+
if (res.status === 404 || res.status === 403) return; // seam unwired — soft-skip
|
|
186
|
+
|
|
187
|
+
const body = res.json as { ok?: boolean; error?: { code?: string } } | undefined;
|
|
188
|
+
expect(
|
|
189
|
+
body?.ok,
|
|
190
|
+
driver.describe('frontend-plugin-packs.md §Host-RPC', 'an undeclared method MUST NOT execute'),
|
|
191
|
+
).toBe(false);
|
|
192
|
+
expect(
|
|
193
|
+
body?.error?.code,
|
|
194
|
+
driver.describe(
|
|
195
|
+
'frontend-plugin-packs.md §Host-RPC',
|
|
196
|
+
'frontend-plugin-rpc-allowlist — an undeclared method surfaces method_not_allowed',
|
|
197
|
+
),
|
|
198
|
+
).toBe('method_not_allowed');
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
it('a stale artifact.write is refused with artifact_conflict + currentVersion (no persist)', async () => {
|
|
202
|
+
const uiPlugins = await readCapabilityFamily<{ supported?: boolean; hostApi?: string[] }>('uiPlugins');
|
|
203
|
+
if (!behaviorGate('uiPlugins.supported', uiPlugins?.supported === true)) return;
|
|
204
|
+
if (!(uiPlugins?.hostApi ?? []).includes('artifact.write')) return; // write unsupported → out of scope
|
|
205
|
+
|
|
206
|
+
const res = await driver.post('/v1/host/sample/ui-plugin/rpc', {
|
|
207
|
+
message: {
|
|
208
|
+
openwop: 'ui-plugin/1',
|
|
209
|
+
type: 'request',
|
|
210
|
+
id: 2,
|
|
211
|
+
method: 'artifact.write',
|
|
212
|
+
params: { artifactId: 'conformance-canary', version: 'stale-token', payload: {} },
|
|
213
|
+
},
|
|
214
|
+
});
|
|
215
|
+
if (res.status === 404 || res.status === 403) return; // seam unwired — soft-skip
|
|
216
|
+
|
|
217
|
+
const body = res.json as { ok?: boolean; error?: { code?: string; currentVersion?: string } } | undefined;
|
|
218
|
+
expect(
|
|
219
|
+
body?.error?.code,
|
|
220
|
+
driver.describe(
|
|
221
|
+
'frontend-plugin-packs.md §Concurrency',
|
|
222
|
+
'a stale artifact.write version surfaces artifact_conflict (host MUST NOT persist)',
|
|
223
|
+
),
|
|
224
|
+
).toBe('artifact_conflict');
|
|
225
|
+
expect(
|
|
226
|
+
typeof body?.error?.currentVersion,
|
|
227
|
+
driver.describe('frontend-plugin-packs.md §Concurrency', 'artifact_conflict carries the host currentVersion for re-read/merge'),
|
|
228
|
+
).toBe('string');
|
|
229
|
+
});
|
|
230
|
+
});
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RFC 0113 — Memory Injection Budget.
|
|
3
|
+
*
|
|
4
|
+
* Verifies the new token-denominated bound on the live injection read:
|
|
5
|
+
* `MemoryAdapter.list(memoryRef, { tokenBudget, rank, query })`
|
|
6
|
+
* (`spec/v1/agent-memory.md` §"Injection budget"). The genuinely new
|
|
7
|
+
* contribution is `tokenBudget`; `rank:'relevance'` DELEGATES to the
|
|
8
|
+
* existing `memory.search` semantic mode (RFC 0080) — this scenario does
|
|
9
|
+
* NOT assert a parallel ranking primitive, and the relevance leg soft-skips
|
|
10
|
+
* unless the host ALSO advertises `memory.search` semantic.
|
|
11
|
+
*
|
|
12
|
+
* Capability-gated on `capabilities.memory.injectionBudget.supported === true`
|
|
13
|
+
* (root-first per RFC 0073) via `behaviorGate`. Driven through the host-sample
|
|
14
|
+
* memory seam — the `conformance-agent-memory-injection-budget` fixture (the
|
|
15
|
+
* same `/v1/runs` + run-variable seam the other `agentMemory*` scenarios use to
|
|
16
|
+
* reach the adapter), which seeds a set whose total exceeds the budget AND
|
|
17
|
+
* includes one single entry larger than the whole budget, plus a BYOK-redacted
|
|
18
|
+
* entry and a cross-tenant probe.
|
|
19
|
+
*
|
|
20
|
+
* Asserts: cumulative tokens ≤ `tokenBudget`; an over-budget single entry is
|
|
21
|
+
* omitted (not truncated); `rank:'relevance'` ordering differs from recency on
|
|
22
|
+
* the crafted fixture (only when `memory.search` semantic is advertised, else
|
|
23
|
+
* soft-skip); and re-asserts SR-1 (redacted content) + CTI-1 (cross-tenant
|
|
24
|
+
* probe empty) on the budgeted path as a regression guard.
|
|
25
|
+
*
|
|
26
|
+
* @see RFCS/0113-memory-injection-budget.md
|
|
27
|
+
* @see spec/v1/agent-memory.md §"Injection budget"
|
|
28
|
+
*/
|
|
29
|
+
|
|
30
|
+
import { describe, it, expect } from 'vitest';
|
|
31
|
+
import { driver } from '../lib/driver.js';
|
|
32
|
+
import { pollUntilTerminal } from '../lib/polling.js';
|
|
33
|
+
import { behaviorGate } from '../lib/behavior-gate.js';
|
|
34
|
+
import { isFixtureAdvertised } from '../lib/fixtures.js';
|
|
35
|
+
import { readCapabilityFamily } from '../lib/discovery-capabilities.js';
|
|
36
|
+
|
|
37
|
+
const FIXTURE = 'conformance-agent-memory-injection-budget';
|
|
38
|
+
const PROFILE = 'openwop-memory-injection-budget';
|
|
39
|
+
|
|
40
|
+
interface MemoryInjectionBudgetCap {
|
|
41
|
+
readonly supported?: boolean;
|
|
42
|
+
readonly tokenCounter?: string;
|
|
43
|
+
}
|
|
44
|
+
interface MemorySearchCap {
|
|
45
|
+
readonly supported?: boolean;
|
|
46
|
+
readonly modes?: readonly string[];
|
|
47
|
+
}
|
|
48
|
+
interface MemoryCap {
|
|
49
|
+
readonly injectionBudget?: MemoryInjectionBudgetCap;
|
|
50
|
+
readonly search?: MemorySearchCap;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// ── cast-free typed accessors (no `as`) ──────────────────────────────────
|
|
54
|
+
function isRecord(v: unknown): v is Record<string, unknown> {
|
|
55
|
+
return typeof v === 'object' && v !== null && !Array.isArray(v);
|
|
56
|
+
}
|
|
57
|
+
function isString(v: unknown): v is string {
|
|
58
|
+
return typeof v === 'string';
|
|
59
|
+
}
|
|
60
|
+
function isNumber(v: unknown): v is number {
|
|
61
|
+
return typeof v === 'number';
|
|
62
|
+
}
|
|
63
|
+
function isBoolean(v: unknown): v is boolean {
|
|
64
|
+
return typeof v === 'boolean';
|
|
65
|
+
}
|
|
66
|
+
function stringOf(v: unknown): string | undefined {
|
|
67
|
+
return isString(v) ? v : undefined;
|
|
68
|
+
}
|
|
69
|
+
function numberOf(v: unknown): number | undefined {
|
|
70
|
+
return isNumber(v) ? v : undefined;
|
|
71
|
+
}
|
|
72
|
+
function booleanOf(v: unknown): boolean | undefined {
|
|
73
|
+
return isBoolean(v) ? v : undefined;
|
|
74
|
+
}
|
|
75
|
+
function stringArrayOf(v: unknown): string[] | undefined {
|
|
76
|
+
return Array.isArray(v) && v.every(isString) ? v : undefined;
|
|
77
|
+
}
|
|
78
|
+
function recordArrayOf(v: unknown): Record<string, unknown>[] | undefined {
|
|
79
|
+
return Array.isArray(v) && v.every(isRecord) ? v : undefined;
|
|
80
|
+
}
|
|
81
|
+
function runIdOf(v: unknown): string | undefined {
|
|
82
|
+
return isRecord(v) ? stringOf(v['runId']) : undefined;
|
|
83
|
+
}
|
|
84
|
+
function variablesOf(v: unknown): Record<string, unknown> | undefined {
|
|
85
|
+
if (!isRecord(v)) return undefined;
|
|
86
|
+
const vars = v['variables'];
|
|
87
|
+
return isRecord(vars) ? vars : undefined;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function advertisesSemanticSearch(mem: MemoryCap | undefined): boolean {
|
|
91
|
+
const modes = mem?.search?.modes;
|
|
92
|
+
return mem?.search?.supported === true && Array.isArray(modes) && modes.includes('semantic');
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
async function driveFixtureVariables(): Promise<Record<string, unknown> | undefined> {
|
|
96
|
+
const create = await driver.post('/v1/runs', { workflowId: FIXTURE });
|
|
97
|
+
expect(create.status).toBe(201);
|
|
98
|
+
const runId = runIdOf(create.json);
|
|
99
|
+
expect(runId, 'POST /v1/runs MUST return a runId').toBeDefined();
|
|
100
|
+
if (runId === undefined) return undefined;
|
|
101
|
+
|
|
102
|
+
const terminal = await pollUntilTerminal(runId);
|
|
103
|
+
expect(terminal.status).toBe('completed');
|
|
104
|
+
|
|
105
|
+
const snap = await driver.get(`/v1/runs/${encodeURIComponent(runId)}`);
|
|
106
|
+
return variablesOf(snap.json);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
describe('memory-injection-budget (RFC 0113)', () => {
|
|
110
|
+
it('token-bounds the injection read, omits the over-budget entry, and preserves SR-1 + CTI-1', async () => {
|
|
111
|
+
const mem = await readCapabilityFamily<MemoryCap>('memory');
|
|
112
|
+
if (!behaviorGate(PROFILE, mem?.injectionBudget?.supported === true)) return;
|
|
113
|
+
if (!isFixtureAdvertised(FIXTURE)) return; // fixture-gated soft-skip
|
|
114
|
+
|
|
115
|
+
const v = await driveFixtureVariables();
|
|
116
|
+
expect(v, 'fixture MUST surface run variables').toBeDefined();
|
|
117
|
+
if (v === undefined) return;
|
|
118
|
+
|
|
119
|
+
// ── tokenBudget bound (the new lever) ──────────────────────────────
|
|
120
|
+
const tokenBudget = numberOf(v['tokenBudget']);
|
|
121
|
+
const total = numberOf(v['budgetedTokenTotal']);
|
|
122
|
+
expect(tokenBudget, 'fixture MUST echo the requested tokenBudget').toBeDefined();
|
|
123
|
+
expect(total, 'fixture MUST surface the budgeted cumulative token total').toBeDefined();
|
|
124
|
+
// Cumulative tokens across the returned prefix MUST NOT exceed the budget.
|
|
125
|
+
if (tokenBudget !== undefined && total !== undefined) {
|
|
126
|
+
expect(total).toBeLessThanOrEqual(tokenBudget);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// ── over-budget single entry omitted (not truncated) ───────────────
|
|
130
|
+
expect(
|
|
131
|
+
booleanOf(v['overBudgetEntryOmitted']),
|
|
132
|
+
'an entry larger than the whole budget MUST be omitted, not truncated mid-entry',
|
|
133
|
+
).toBe(true);
|
|
134
|
+
const entries = recordArrayOf(v['budgetedEntries']);
|
|
135
|
+
expect(entries, 'fixture MUST surface the budgeted entry slice').toBeDefined();
|
|
136
|
+
const overId = stringOf(v['overBudgetEntryId']);
|
|
137
|
+
if (entries !== undefined && overId !== undefined) {
|
|
138
|
+
const ids = entries.map((e) => stringOf(e['id']));
|
|
139
|
+
expect(ids, 'the over-budget entry MUST NOT appear in the returned slice').not.toContain(overId);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// ── SR-1 re-assertion on the budgeted path ─────────────────────────
|
|
143
|
+
// A budgeted/ranked read ranks over already-redacted content; the read
|
|
144
|
+
// surface MUST carry the redaction marker, never the plaintext.
|
|
145
|
+
const redacted = stringOf(v['redactedContentSample']);
|
|
146
|
+
expect(redacted, 'budgeted read MUST surface a redacted-content sample').toBeDefined();
|
|
147
|
+
if (redacted !== undefined) {
|
|
148
|
+
expect(redacted).toMatch(/\[REDACTED:[^\]]+\]/);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// ── CTI-1 re-assertion on the budgeted path ────────────────────────
|
|
152
|
+
// A budget/rank prefix of an already-single-tenant list stays single-tenant:
|
|
153
|
+
// the cross-tenant probe under the budgeted path MUST return empty.
|
|
154
|
+
const probe = v['crossTenantBudgetedProbe'];
|
|
155
|
+
if (Array.isArray(probe)) {
|
|
156
|
+
expect(probe.length, 'cross-tenant probe on the budgeted path MUST return []').toBe(0);
|
|
157
|
+
} else {
|
|
158
|
+
expect(probe, 'cross-tenant probe on the budgeted path MUST return [] / null').toBeFalsy();
|
|
159
|
+
}
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
it("rank:'relevance' reorders vs recency — only when memory.search semantic is ALSO advertised", async () => {
|
|
163
|
+
const mem = await readCapabilityFamily<MemoryCap>('memory');
|
|
164
|
+
if (!behaviorGate(PROFILE, mem?.injectionBudget?.supported === true)) return;
|
|
165
|
+
// RFC 0113: rank:'relevance' DELEGATES to memory.search semantic (RFC 0080).
|
|
166
|
+
// A host that does not advertise memory.search semantic MUST NOT fabricate a
|
|
167
|
+
// relevance ranking — the relevance leg soft-skips here (it is not a new
|
|
168
|
+
// ranking surface advertised by injectionBudget).
|
|
169
|
+
if (!advertisesSemanticSearch(mem)) {
|
|
170
|
+
// eslint-disable-next-line no-console
|
|
171
|
+
console.warn(`[${PROFILE}] memory.search semantic not advertised; relevance leg soft-skipped`);
|
|
172
|
+
return;
|
|
173
|
+
}
|
|
174
|
+
if (!isFixtureAdvertised(FIXTURE)) return;
|
|
175
|
+
|
|
176
|
+
const v = await driveFixtureVariables();
|
|
177
|
+
expect(v, 'fixture MUST surface run variables').toBeDefined();
|
|
178
|
+
if (v === undefined) return;
|
|
179
|
+
|
|
180
|
+
const recencyOrder = stringArrayOf(v['recencyOrder']);
|
|
181
|
+
const relevanceOrder = stringArrayOf(v['relevanceOrder']);
|
|
182
|
+
expect(recencyOrder, 'fixture MUST surface the recency ordering').toBeDefined();
|
|
183
|
+
expect(relevanceOrder, 'fixture MUST surface the relevance ordering').toBeDefined();
|
|
184
|
+
// The crafted fixture pins a query whose semantic top-k differs from the
|
|
185
|
+
// most-recent-first order — relevance MUST reorder (not echo recency).
|
|
186
|
+
expect(relevanceOrder).not.toEqual(recencyOrder);
|
|
187
|
+
});
|
|
188
|
+
});
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* prompt-prefix-cache — RFC 0116 + SECURITY/invariants.yaml
|
|
3
|
+
* `prompt-prefix-cache-cross-tenant-isolation`.
|
|
4
|
+
*
|
|
5
|
+
* Status: ACTIVE (advertisement-shape + behavioral). The behavioral legs drive
|
|
6
|
+
* the host's real envelope/provider generate path through the OPTIONAL test
|
|
7
|
+
* seam `POST /v1/host/sample/ai/generate` (`host-sample-test-seams.md` §16,
|
|
8
|
+
* env-gated on `OPENWOP_TEST_SEAM_ENABLED=true`). Hosts that don't advertise
|
|
9
|
+
* `aiProviders.promptPrefixCache.supported` soft-skip; hosts that advertise it
|
|
10
|
+
* but don't wire the seam (HTTP 404/405) soft-skip the behavioral legs and
|
|
11
|
+
* verify advertisement shape only.
|
|
12
|
+
*
|
|
13
|
+
* RFC 0116 makes the optional `cachePrefixId` generate hint safe + testable via
|
|
14
|
+
* three pillars, each asserted here:
|
|
15
|
+
* (a) outcome-invariance — a generate with `cachePrefixId` and a control
|
|
16
|
+
* without produce the same accepted envelope + identical
|
|
17
|
+
* `inputTokens`/`outputTokens` (cost-hint-only, replay-invariant).
|
|
18
|
+
* (b) cache hit observable — a repeat generate shows
|
|
19
|
+
* `provider.usage.cacheReadTokens > 0`.
|
|
20
|
+
* (c) cross-tenant isolation — tenant B's first use of tenant A's
|
|
21
|
+
* `cachePrefixId` shows `cacheReadTokens == 0` (no cross-tenant share).
|
|
22
|
+
* THIS is the public test for the `prompt-prefix-cache-cross-tenant-isolation`
|
|
23
|
+
* invariant: the host MUST key its provider cache by `(tenant, cachePrefixId)`.
|
|
24
|
+
* (d) secret-free — a `cachePrefixId` is never emitted where SR-1 would
|
|
25
|
+
* redact, and the usage block carries no prompt substrings.
|
|
26
|
+
*
|
|
27
|
+
* @see RFCS/0116-prompt-prefix-cache.md
|
|
28
|
+
* @see spec/v1/ai-envelope.md §"Prompt-prefix cache (RFC 0116)"
|
|
29
|
+
* @see SECURITY/invariants.yaml — prompt-prefix-cache-cross-tenant-isolation
|
|
30
|
+
*/
|
|
31
|
+
|
|
32
|
+
import { describe, it, expect } from 'vitest';
|
|
33
|
+
import { driver } from '../lib/driver.js';
|
|
34
|
+
import { readCapabilityFamily } from '../lib/discovery-capabilities.js';
|
|
35
|
+
|
|
36
|
+
interface PromptPrefixCacheCap {
|
|
37
|
+
supported?: unknown;
|
|
38
|
+
providers?: unknown;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
interface AiProvidersCap {
|
|
42
|
+
promptPrefixCache?: PromptPrefixCacheCap;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
interface GenerateUsage {
|
|
46
|
+
inputTokens?: number;
|
|
47
|
+
outputTokens?: number;
|
|
48
|
+
cacheReadTokens?: number;
|
|
49
|
+
cacheWriteTokens?: number;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
interface GenerateResponse {
|
|
53
|
+
envelope?: { envelopeType?: string; payload?: unknown; envelopeId?: string };
|
|
54
|
+
usage?: GenerateUsage;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
async function readCap(): Promise<PromptPrefixCacheCap | null> {
|
|
58
|
+
const fam = await readCapabilityFamily<AiProvidersCap>('aiProviders');
|
|
59
|
+
const block = fam?.promptPrefixCache;
|
|
60
|
+
return block && typeof block === 'object' ? block : null;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
async function generate(args: {
|
|
64
|
+
tenantId: string;
|
|
65
|
+
cachePrefixId?: string;
|
|
66
|
+
}): Promise<{ status: number; body: GenerateResponse }> {
|
|
67
|
+
const res = await driver.post('/v1/host/sample/ai/generate', {
|
|
68
|
+
tenantId: args.tenantId,
|
|
69
|
+
envelopeType: 'clarification.request',
|
|
70
|
+
systemPrompt: 'You are a helpful assistant. Answer concisely.',
|
|
71
|
+
...(args.cachePrefixId !== undefined ? { cachePrefixId: args.cachePrefixId } : {}),
|
|
72
|
+
});
|
|
73
|
+
return { status: res.status, body: (res.json ?? {}) as GenerateResponse };
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
describe('prompt-prefix-cache: advertisement shape (RFC 0116)', () => {
|
|
77
|
+
it('aiProviders.promptPrefixCache is either absent or a well-formed object', async () => {
|
|
78
|
+
const cap = await readCap();
|
|
79
|
+
if (cap === null) return; // not advertised — skip
|
|
80
|
+
expect(
|
|
81
|
+
typeof cap.supported,
|
|
82
|
+
driver.describe(
|
|
83
|
+
'capabilities.schema.json §aiProviders.promptPrefixCache',
|
|
84
|
+
'promptPrefixCache.supported MUST be a boolean when the block is present',
|
|
85
|
+
),
|
|
86
|
+
).toBe('boolean');
|
|
87
|
+
if (cap.providers !== undefined) {
|
|
88
|
+
expect(
|
|
89
|
+
Array.isArray(cap.providers),
|
|
90
|
+
driver.describe(
|
|
91
|
+
'capabilities.schema.json §aiProviders.promptPrefixCache',
|
|
92
|
+
'promptPrefixCache.providers MUST be an array of provider ids when present (provider-scoped)',
|
|
93
|
+
),
|
|
94
|
+
).toBe(true);
|
|
95
|
+
}
|
|
96
|
+
});
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
describe('prompt-prefix-cache: behavioral (RFC 0116 §"Normative requirements")', () => {
|
|
100
|
+
it('(a) outcome-invariance — cachePrefixId vs control → same envelope + identical input/output tokens', async () => {
|
|
101
|
+
const cap = await readCap();
|
|
102
|
+
if (!cap || cap.supported !== true) return; // not advertised — skip
|
|
103
|
+
const prefixId = `inv-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
104
|
+
|
|
105
|
+
const control = await generate({ tenantId: 'tenant-a' });
|
|
106
|
+
if (control.status === 404 || control.status === 405) return; // seam not wired
|
|
107
|
+
expect(control.status, driver.describe('host-sample-test-seams.md §16', 'generate seam MUST return 200')).toBe(200);
|
|
108
|
+
|
|
109
|
+
const withPrefix = await generate({ tenantId: 'tenant-a', cachePrefixId: prefixId });
|
|
110
|
+
expect(withPrefix.status).toBe(200);
|
|
111
|
+
|
|
112
|
+
expect(
|
|
113
|
+
withPrefix.body.envelope?.envelopeType,
|
|
114
|
+
driver.describe(
|
|
115
|
+
'ai-envelope.md §"Prompt-prefix cache (RFC 0116)" rule 3',
|
|
116
|
+
'cachePrefixId is a cost hint, never semantic: the accepted envelope MUST be identical hit-vs-miss',
|
|
117
|
+
),
|
|
118
|
+
).toBe(control.body.envelope?.envelopeType);
|
|
119
|
+
expect(withPrefix.body.usage?.inputTokens).toBe(control.body.usage?.inputTokens);
|
|
120
|
+
expect(
|
|
121
|
+
withPrefix.body.usage?.outputTokens,
|
|
122
|
+
driver.describe(
|
|
123
|
+
'ai-envelope.md §"Prompt-prefix cache (RFC 0116)" rule 3',
|
|
124
|
+
'provider.usage.inputTokens/outputTokens MUST be identical hit-vs-miss (replay-invariant)',
|
|
125
|
+
),
|
|
126
|
+
).toBe(control.body.usage?.outputTokens);
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
it('(b) cache hit observable — a repeat generate shows cacheReadTokens > 0 while tokens stay invariant', async () => {
|
|
130
|
+
const cap = await readCap();
|
|
131
|
+
if (!cap || cap.supported !== true) return; // not advertised — skip
|
|
132
|
+
const prefixId = `hit-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
133
|
+
|
|
134
|
+
const prime = await generate({ tenantId: 'tenant-a', cachePrefixId: prefixId });
|
|
135
|
+
if (prime.status === 404 || prime.status === 405) return; // seam not wired
|
|
136
|
+
expect(prime.status).toBe(200);
|
|
137
|
+
|
|
138
|
+
const repeat = await generate({ tenantId: 'tenant-a', cachePrefixId: prefixId });
|
|
139
|
+
expect(repeat.status).toBe(200);
|
|
140
|
+
expect(
|
|
141
|
+
repeat.body.usage?.cacheReadTokens ?? 0,
|
|
142
|
+
driver.describe(
|
|
143
|
+
'ai-envelope.md §"Prompt-prefix cache (RFC 0116)" rule 4',
|
|
144
|
+
'a repeat generate with the same cachePrefixId for the SAME tenant MUST be an observable cache hit (cacheReadTokens > 0)',
|
|
145
|
+
),
|
|
146
|
+
).toBeGreaterThan(0);
|
|
147
|
+
// The cost-only witness MUST NOT have changed the recorded outcome.
|
|
148
|
+
expect(repeat.body.usage?.inputTokens).toBe(prime.body.usage?.inputTokens);
|
|
149
|
+
expect(repeat.body.usage?.outputTokens).toBe(prime.body.usage?.outputTokens);
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
it('(c) cross-tenant isolation — tenant B first use of tenant A\'s cachePrefixId → cacheReadTokens == 0', async () => {
|
|
153
|
+
const cap = await readCap();
|
|
154
|
+
if (!cap || cap.supported !== true) return; // not advertised — skip
|
|
155
|
+
const prefixId = `xtenant-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
156
|
+
|
|
157
|
+
// Tenant A primes the cache under a shared, predictable cachePrefixId.
|
|
158
|
+
const aPrime = await generate({ tenantId: 'tenant-a', cachePrefixId: prefixId });
|
|
159
|
+
if (aPrime.status === 404 || aPrime.status === 405) return; // seam not wired
|
|
160
|
+
expect(aPrime.status).toBe(200);
|
|
161
|
+
|
|
162
|
+
// Tenant B's FIRST use of the SAME cachePrefixId MUST be a miss — the host
|
|
163
|
+
// keys its provider cache by (resolved tenant, cachePrefixId), never global.
|
|
164
|
+
const bFirst = await generate({ tenantId: 'tenant-b', cachePrefixId: prefixId });
|
|
165
|
+
expect(bFirst.status).toBe(200);
|
|
166
|
+
expect(
|
|
167
|
+
bFirst.body.usage?.cacheReadTokens ?? 0,
|
|
168
|
+
driver.describe(
|
|
169
|
+
'SECURITY/invariants.yaml prompt-prefix-cache-cross-tenant-isolation',
|
|
170
|
+
'tenant B\'s first use of tenant A\'s cachePrefixId MUST be a cache MISS (cacheReadTokens == 0) — the cache MUST be keyed by (tenant, cachePrefixId), never global; cross-tenant sharing is context leakage',
|
|
171
|
+
),
|
|
172
|
+
).toBe(0);
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
it('(d) secret-free — the response never echoes cachePrefixId in a SR-1-sensitive position', async () => {
|
|
176
|
+
const cap = await readCap();
|
|
177
|
+
if (!cap || cap.supported !== true) return; // not advertised — skip
|
|
178
|
+
const prefixId = `secretfree-${Date.now()}`;
|
|
179
|
+
|
|
180
|
+
const res = await generate({ tenantId: 'tenant-a', cachePrefixId: prefixId });
|
|
181
|
+
if (res.status === 404 || res.status === 405) return; // seam not wired
|
|
182
|
+
expect(res.status).toBe(200);
|
|
183
|
+
// The usage block is cost-only; it MUST NOT carry prompt/response substrings
|
|
184
|
+
// (SR-1). cachePrefixId is a public cache key, but the cost witness fields
|
|
185
|
+
// themselves are integers — assert the usage block is shape-clean.
|
|
186
|
+
const usage = res.body.usage ?? {};
|
|
187
|
+
for (const k of ['inputTokens', 'outputTokens', 'cacheReadTokens', 'cacheWriteTokens'] as const) {
|
|
188
|
+
const v = usage[k];
|
|
189
|
+
if (v !== undefined) {
|
|
190
|
+
expect(
|
|
191
|
+
typeof v,
|
|
192
|
+
driver.describe(
|
|
193
|
+
'run-event-payloads.schema.json §providerUsage',
|
|
194
|
+
`provider.usage.${k} MUST be a cost-only integer (no prompt substrings per SR-1)`,
|
|
195
|
+
),
|
|
196
|
+
).toBe('number');
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
});
|
|
200
|
+
});
|