@openwop/openwop-conformance 1.36.0 → 1.43.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -0
- package/README.md +2 -2
- package/api/openapi.yaml +62 -5
- package/coverage.md +1 -0
- package/fixtures/conformance-agent-memory-injection-budget.json +44 -0
- package/fixtures/conformance-context-budget-multiturn.json +50 -0
- package/fixtures.md +2 -0
- package/package.json +1 -1
- package/schemas/README.md +3 -0
- package/schemas/a2ui-surface-delta-frame.schema.json +48 -0
- package/schemas/capabilities.schema.json +128 -1
- package/schemas/channel-presence-payload.schema.json +41 -0
- package/schemas/compact-tool-descriptor.schema.json +51 -0
- package/schemas/conversation-turn.schema.json +10 -0
- package/schemas/memory-list-options.schema.json +16 -0
- package/schemas/run-event-payloads.schema.json +25 -2
- package/schemas/run-event.schema.json +2 -0
- package/src/lib/toolCatalog.ts +89 -0
- package/src/scenarios/a2ui-surface-delta-transport.test.ts +600 -0
- package/src/scenarios/aiproviders-selfhosted-honesty.test.ts +133 -0
- package/src/scenarios/channel-presence-behavioral.test.ts +83 -0
- package/src/scenarios/channel-presence-shape.test.ts +93 -0
- package/src/scenarios/context-budget-transcript-bound.test.ts +253 -0
- package/src/scenarios/context-summarization-replay.test.ts +155 -0
- package/src/scenarios/conversation-turn-model-provenance-shape.test.ts +120 -0
- package/src/scenarios/memory-injection-budget.test.ts +188 -0
- package/src/scenarios/prompt-prefix-cache.test.ts +200 -0
- package/src/scenarios/run-transport-economy.test.ts +236 -0
- package/src/scenarios/tool-catalog-compact-projection.test.ts +149 -0
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RFC 0111 — Context Economy: declared summarization is replay-deterministic.
|
|
3
|
+
*
|
|
4
|
+
* A host-produced summary is NONDETERMINISTIC host output that breaks the
|
|
5
|
+
* purity of the transcript-as-event-log-projection, so RFC 0111 governs it
|
|
6
|
+
* exactly like an RFC 0041 nondeterministic envelope: each substitution is
|
|
7
|
+
* recorded as a `context.summarized` event whose `summaryRef` artifact a
|
|
8
|
+
* `:fork mode:replay` MUST REUSE — the host MUST NOT re-summarize and produce
|
|
9
|
+
* a different model-facing transcript (`spec/v1/multi-agent-execution.md`
|
|
10
|
+
* §"Context economy" → "Replay determinism").
|
|
11
|
+
*
|
|
12
|
+
* Capability-gated on `multiAgent.executionModel.contextBudget.summarization.supported`
|
|
13
|
+
* (root-first per RFC 0073) via `behaviorGate`. Drives the multi-turn
|
|
14
|
+
* orchestrator fixture, reads the recorded `context.summarized` events from the
|
|
15
|
+
* run event-log (`/v1/host/sample/test/runs/:runId/events`), then replays the
|
|
16
|
+
* run via `POST /v1/runs/{runId}:fork {mode:"replay"}` and asserts the replayed
|
|
17
|
+
* run re-emits the SAME `context.summarized` records (same `summaryRef` +
|
|
18
|
+
* `replacedTurns`) — i.e. the recorded summary is reused, not regenerated.
|
|
19
|
+
*
|
|
20
|
+
* The event-log seam + replay are both OPTIONAL — the scenario soft-skips when
|
|
21
|
+
* the event-log seam is unwired (`404`), when the host advertises no `replay`
|
|
22
|
+
* mode, or when the run produced no summarization (no `context.summarized`).
|
|
23
|
+
* The RFC defers reference-host implementation; the witness comes from a host
|
|
24
|
+
* that runs real orchestrator turns and summarizes.
|
|
25
|
+
*
|
|
26
|
+
* @see RFCS/0111-context-economy.md
|
|
27
|
+
* @see spec/v1/multi-agent-execution.md §"Context economy (RFC 0111)"
|
|
28
|
+
*/
|
|
29
|
+
|
|
30
|
+
import { describe, it, expect } from 'vitest';
|
|
31
|
+
import { driver } from '../lib/driver.js';
|
|
32
|
+
import { pollUntilTerminal } from '../lib/polling.js';
|
|
33
|
+
import { behaviorGate } from '../lib/behavior-gate.js';
|
|
34
|
+
import { isFixtureAdvertised } from '../lib/fixtures.js';
|
|
35
|
+
import { readCapabilityFamily } from '../lib/discovery-capabilities.js';
|
|
36
|
+
import { queryTestEvents, type TestEvent } from '../lib/event-log-query.js';
|
|
37
|
+
|
|
38
|
+
const FIXTURE = 'conformance-context-budget-multiturn';
|
|
39
|
+
const PROFILE = 'openwop-context-summarization';
|
|
40
|
+
|
|
41
|
+
interface SummarizationCap {
|
|
42
|
+
readonly supported?: boolean;
|
|
43
|
+
}
|
|
44
|
+
interface ContextBudgetCap {
|
|
45
|
+
readonly summarization?: SummarizationCap;
|
|
46
|
+
}
|
|
47
|
+
interface ExecutionModelCap {
|
|
48
|
+
readonly contextBudget?: ContextBudgetCap;
|
|
49
|
+
}
|
|
50
|
+
interface MultiAgentCap {
|
|
51
|
+
readonly executionModel?: ExecutionModelCap;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// ── cast-free typed accessors (no `as`) ──────────────────────────────────
|
|
55
|
+
function isRecord(v: unknown): v is Record<string, unknown> {
|
|
56
|
+
return typeof v === 'object' && v !== null && !Array.isArray(v);
|
|
57
|
+
}
|
|
58
|
+
function isString(v: unknown): v is string {
|
|
59
|
+
return typeof v === 'string';
|
|
60
|
+
}
|
|
61
|
+
function stringOf(v: unknown): string | undefined {
|
|
62
|
+
return isString(v) ? v : undefined;
|
|
63
|
+
}
|
|
64
|
+
function stringArrayOf(v: unknown): string[] | undefined {
|
|
65
|
+
return Array.isArray(v) && v.every(isString) ? v : undefined;
|
|
66
|
+
}
|
|
67
|
+
function runIdOf(v: unknown): string | undefined {
|
|
68
|
+
return isRecord(v) ? stringOf(v['runId']) : undefined;
|
|
69
|
+
}
|
|
70
|
+
function replayModesOf(v: unknown): string[] {
|
|
71
|
+
if (!isRecord(v)) return [];
|
|
72
|
+
const replay = v['replay'];
|
|
73
|
+
if (!isRecord(replay)) return [];
|
|
74
|
+
return stringArrayOf(replay['modes']) ?? [];
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/** A summary fingerprint: summaryRef plus the (ordered) replaced-turn ids. */
|
|
78
|
+
function summaryFingerprint(e: TestEvent): string | undefined {
|
|
79
|
+
const ref = stringOf(e.payload['summaryRef']);
|
|
80
|
+
const replaced = stringArrayOf(e.payload['replacedTurns']);
|
|
81
|
+
if (ref === undefined || replaced === undefined) return undefined;
|
|
82
|
+
return `${ref}::${replaced.join(',')}`;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function summaryFingerprints(events: readonly TestEvent[]): string[] {
|
|
86
|
+
const out: string[] = [];
|
|
87
|
+
for (const e of events) {
|
|
88
|
+
if (e.type !== 'context.summarized') continue;
|
|
89
|
+
const fp = summaryFingerprint(e);
|
|
90
|
+
expect(fp, 'a context.summarized event MUST carry summaryRef + replacedTurns').toBeDefined();
|
|
91
|
+
if (fp !== undefined) out.push(fp);
|
|
92
|
+
}
|
|
93
|
+
return out.sort();
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
describe('context-summarization-replay (RFC 0111 §"Replay determinism")', () => {
|
|
97
|
+
it('replay reuses the recorded context.summarized summaryRef — never re-summarizes', async () => {
|
|
98
|
+
const ma = await readCapabilityFamily<MultiAgentCap>('multiAgent');
|
|
99
|
+
const summarizationSupported = ma?.executionModel?.contextBudget?.summarization?.supported === true;
|
|
100
|
+
if (!behaviorGate(PROFILE, summarizationSupported)) return;
|
|
101
|
+
if (!isFixtureAdvertised(FIXTURE)) return; // fixture-gated soft-skip
|
|
102
|
+
|
|
103
|
+
// Drive the multi-turn orchestrator run.
|
|
104
|
+
const create = await driver.post('/v1/runs', { workflowId: FIXTURE });
|
|
105
|
+
expect(create.status).toBe(201);
|
|
106
|
+
const sourceRunId = runIdOf(create.json);
|
|
107
|
+
expect(sourceRunId, 'POST /v1/runs MUST return a runId').toBeDefined();
|
|
108
|
+
if (sourceRunId === undefined) return;
|
|
109
|
+
await pollUntilTerminal(sourceRunId);
|
|
110
|
+
|
|
111
|
+
// Read the recorded summarization records (OPTIONAL event-log seam).
|
|
112
|
+
const sourceQ = await queryTestEvents(sourceRunId, { type: 'context.summarized' });
|
|
113
|
+
if (!sourceQ.ok) return; // event-log seam unwired — soft-skip
|
|
114
|
+
const sourceFingerprints = summaryFingerprints(sourceQ.events);
|
|
115
|
+
if (sourceFingerprints.length === 0) {
|
|
116
|
+
// The run did not summarize (budget not exceeded on this host) — nothing
|
|
117
|
+
// to prove about reuse. Honest soft-skip; not a vacuous pass of the MUST.
|
|
118
|
+
// eslint-disable-next-line no-console
|
|
119
|
+
console.warn(`[${PROFILE}] run produced no context.summarized events; replay-reuse leg soft-skipped`);
|
|
120
|
+
return;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// Only attempt replay when the host advertises the replay fork mode.
|
|
124
|
+
const wellKnown = await driver.get('/.well-known/openwop');
|
|
125
|
+
if (!replayModesOf(wellKnown.json).includes('replay')) return;
|
|
126
|
+
|
|
127
|
+
const fork = await driver.post(
|
|
128
|
+
`/v1/runs/${encodeURIComponent(sourceRunId)}:fork`,
|
|
129
|
+
{ fromSeq: 0, mode: 'replay' },
|
|
130
|
+
);
|
|
131
|
+
if (fork.status === 501 || fork.status === 404) return; // replay not implemented for this run — soft-skip
|
|
132
|
+
expect(
|
|
133
|
+
fork.status,
|
|
134
|
+
driver.describe('rest-endpoints.md POST /v1/runs/{runId}:fork', 'replay fork MUST return 201'),
|
|
135
|
+
).toBe(201);
|
|
136
|
+
const forkRunId = runIdOf(fork.json);
|
|
137
|
+
expect(forkRunId, 'replay fork MUST return a runId').toBeDefined();
|
|
138
|
+
if (forkRunId === undefined) return;
|
|
139
|
+
await pollUntilTerminal(forkRunId);
|
|
140
|
+
|
|
141
|
+
const forkQ = await queryTestEvents(forkRunId, { type: 'context.summarized' });
|
|
142
|
+
if (!forkQ.ok) return; // event-log seam unwired for the fork — soft-skip
|
|
143
|
+
const forkFingerprints = summaryFingerprints(forkQ.events);
|
|
144
|
+
|
|
145
|
+
// The replay MUST reuse the recorded summaries (same summaryRef + replacedTurns),
|
|
146
|
+
// NOT regenerate them — the direct analogue of RFC 0041 envelope-refusal recovery.
|
|
147
|
+
expect(
|
|
148
|
+
forkFingerprints,
|
|
149
|
+
driver.describe(
|
|
150
|
+
'RFC 0111 §"Replay determinism"',
|
|
151
|
+
'a replay fork MUST reuse the recorded context.summarized summaryRef (never re-summarize to a different transcript)',
|
|
152
|
+
),
|
|
153
|
+
).toEqual(sourceFingerprints);
|
|
154
|
+
});
|
|
155
|
+
});
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Conversation-turn model provenance — `agent.model` (RFC 0109).
|
|
3
|
+
*
|
|
4
|
+
* Always-on, server-free schema-shape probe. Verifies the additive, normative
|
|
5
|
+
* RFC 0109 wire facts on the published schemas:
|
|
6
|
+
*
|
|
7
|
+
* 1. `conversation-turn.schema.json` `agent.model` is an OPTIONAL object that
|
|
8
|
+
* validates a conforming `{ provider, model }`, REQUIRES both fields, and is
|
|
9
|
+
* CLOSED (`additionalProperties: false`) — the SR-1 secret-redaction guard:
|
|
10
|
+
* no credential / endpoint / prompt can ride in the provenance stamp.
|
|
11
|
+
* 2. `agent.model` is OPTIONAL — an agent turn that omits it still validates
|
|
12
|
+
* (additive; pre-RFC-0109 producers + hosts that do not advertise).
|
|
13
|
+
* 3. `capabilities.schema.json` declares the `conversationTurnModelProvenance`
|
|
14
|
+
* block with its `supported` flag, and it is closed (`additionalProperties: false`).
|
|
15
|
+
*
|
|
16
|
+
* The host-side MUST (a host that advertises `supported: true` MUST stamp
|
|
17
|
+
* `agent.model`; one that does NOT advertise MUST omit it) is a behavioral
|
|
18
|
+
* contract gated on `conversationTurnModelProvenance.supported`, landing at the
|
|
19
|
+
* reference-host implementation (RFC 0109 §Conformance — same staging as RFC
|
|
20
|
+
* 0101's non-participant-rejection behavioral leg). This scenario asserts the
|
|
21
|
+
* wire SHAPE; the behavioral leg is gated.
|
|
22
|
+
*
|
|
23
|
+
* Normative references:
|
|
24
|
+
* - RFCS/0109-conversation-turn-model-provenance.md (§Proposal / §Conformance)
|
|
25
|
+
* - RFCS/0005-conversation.md (the conversation primitive this extends)
|
|
26
|
+
* - schemas/conversation-turn.schema.json (agent.model)
|
|
27
|
+
* - schemas/capabilities.schema.json (conversationTurnModelProvenance)
|
|
28
|
+
*
|
|
29
|
+
* @see RFCS/0109-conversation-turn-model-provenance.md
|
|
30
|
+
*/
|
|
31
|
+
|
|
32
|
+
import { describe, it, expect } from 'vitest';
|
|
33
|
+
import { readFileSync } from 'node:fs';
|
|
34
|
+
import { join } from 'node:path';
|
|
35
|
+
import Ajv2020 from 'ajv/dist/2020.js';
|
|
36
|
+
import addFormats from 'ajv-formats';
|
|
37
|
+
import { SCHEMAS_DIR } from '../lib/paths.js';
|
|
38
|
+
|
|
39
|
+
const why = (specRef: string, requirement: string): string => `${specRef} — ${requirement}`;
|
|
40
|
+
|
|
41
|
+
function loadSchema(name: string): Record<string, unknown> {
|
|
42
|
+
return JSON.parse(readFileSync(join(SCHEMAS_DIR, name), 'utf8')) as Record<string, unknown>;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
describe('conversation-turn-model-provenance-shape: agent.model on a role:agent turn (RFC 0109 §Proposal, server-free)', () => {
|
|
46
|
+
const ajv = new Ajv2020({ strict: false, allErrors: true });
|
|
47
|
+
addFormats(ajv);
|
|
48
|
+
const turn = ajv.compile(loadSchema('conversation-turn.schema.json'));
|
|
49
|
+
|
|
50
|
+
const agentBase = {
|
|
51
|
+
messageId: 'council-q1:1:agent',
|
|
52
|
+
from: 'host:advisor-cfo',
|
|
53
|
+
content: 'From a cash-runway view I would push the launch one quarter.',
|
|
54
|
+
ts: 1718900000000,
|
|
55
|
+
role: 'agent' as const,
|
|
56
|
+
turnIndex: 1,
|
|
57
|
+
speakerId: 'host:advisor-cfo',
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
it('an agent turn carrying a conforming agent.model { provider, model } validates', () => {
|
|
61
|
+
expect(
|
|
62
|
+
turn({ ...agentBase, agent: { agentId: 'advisor-cfo', model: { provider: 'anthropic', model: 'claude-opus-4-8' } } }),
|
|
63
|
+
why('RFC 0109 §Proposal', "a role:'agent' turn with agent.model { provider, model } MUST validate"),
|
|
64
|
+
).toBe(true);
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
it('agent.model REQUIRES both provider and model', () => {
|
|
68
|
+
expect(
|
|
69
|
+
turn({ ...agentBase, agent: { model: { provider: 'anthropic' } } }),
|
|
70
|
+
why('RFC 0109 §Proposal', 'agent.model without `model` MUST be rejected'),
|
|
71
|
+
).toBe(false);
|
|
72
|
+
expect(
|
|
73
|
+
turn({ ...agentBase, agent: { model: { model: 'claude-opus-4-8' } } }),
|
|
74
|
+
why('RFC 0109 §Proposal', 'agent.model without `provider` MUST be rejected'),
|
|
75
|
+
).toBe(false);
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
it('agent.model is CLOSED — an extra key (a secret/endpoint/prompt) MUST be rejected (the SR-1 guard)', () => {
|
|
79
|
+
expect(
|
|
80
|
+
turn({ ...agentBase, agent: { model: { provider: 'anthropic', model: 'claude-opus-4-8', apiKey: 'sk-secret' } } }),
|
|
81
|
+
why('RFC 0109 §Proposal', 'agent.model MUST forbid extra keys — no credential/endpoint/prompt rides the provenance stamp'),
|
|
82
|
+
).toBe(false);
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
it('agent.model is OPTIONAL — an agent turn that omits it still validates (additive, back-compat)', () => {
|
|
86
|
+
expect(
|
|
87
|
+
turn({ ...agentBase, agent: { agentId: 'advisor-cfo' } }),
|
|
88
|
+
why('RFC 0109 §Compatibility', 'agent.model is additive — a turn without it MUST still validate'),
|
|
89
|
+
).toBe(true);
|
|
90
|
+
expect(
|
|
91
|
+
turn(agentBase),
|
|
92
|
+
why('RFC 0109 §Compatibility', 'a turn with no agent object at all MUST still validate'),
|
|
93
|
+
).toBe(true);
|
|
94
|
+
});
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
describe('conversation-turn-model-provenance-shape: capability advertisement (RFC 0109 §Conformance, server-free)', () => {
|
|
98
|
+
it('capabilities.schema.json declares conversationTurnModelProvenance with supported, closed', () => {
|
|
99
|
+
const caps = loadSchema('capabilities.schema.json');
|
|
100
|
+
const props = caps.properties as Record<string, Record<string, unknown>>;
|
|
101
|
+
const block = props.conversationTurnModelProvenance as
|
|
102
|
+
| { properties?: Record<string, unknown>; required?: string[]; additionalProperties?: boolean }
|
|
103
|
+
| undefined;
|
|
104
|
+
expect(block, why('RFC 0109 §Conformance', 'capabilities.conversationTurnModelProvenance MUST be declared')).toBeDefined();
|
|
105
|
+
expect(block?.properties?.supported, why('RFC 0109 §Conformance', 'conversationTurnModelProvenance.supported MUST be declared')).toBeDefined();
|
|
106
|
+
expect(block?.required, why('RFC 0109 §Conformance', 'supported MUST be required on the block')).toContain('supported');
|
|
107
|
+
expect(block?.additionalProperties, why('RFC 0109 §Conformance', 'the block MUST be closed')).toBe(false);
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
it('the conversationTurnModelProvenance block validates a conforming advertisement and rejects extras', () => {
|
|
111
|
+
const caps = loadSchema('capabilities.schema.json');
|
|
112
|
+
const block = (caps.properties as Record<string, Record<string, unknown>>).conversationTurnModelProvenance;
|
|
113
|
+
const ajv = new Ajv2020({ strict: false, allErrors: true });
|
|
114
|
+
addFormats(ajv);
|
|
115
|
+
const validate = ajv.compile(block);
|
|
116
|
+
expect(validate({ supported: true }), why('RFC 0109 §Conformance', 'a conforming advertisement MUST validate')).toBe(true);
|
|
117
|
+
expect(validate({}), why('RFC 0109 §Conformance', 'supported is required')).toBe(false);
|
|
118
|
+
expect(validate({ supported: true, unexpected: 1 }), why('RFC 0109 §Conformance', 'an extra key MUST be rejected (closed block)')).toBe(false);
|
|
119
|
+
});
|
|
120
|
+
});
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RFC 0113 — Memory Injection Budget.
|
|
3
|
+
*
|
|
4
|
+
* Verifies the new token-denominated bound on the live injection read:
|
|
5
|
+
* `MemoryAdapter.list(memoryRef, { tokenBudget, rank, query })`
|
|
6
|
+
* (`spec/v1/agent-memory.md` §"Injection budget"). The genuinely new
|
|
7
|
+
* contribution is `tokenBudget`; `rank:'relevance'` DELEGATES to the
|
|
8
|
+
* existing `memory.search` semantic mode (RFC 0080) — this scenario does
|
|
9
|
+
* NOT assert a parallel ranking primitive, and the relevance leg soft-skips
|
|
10
|
+
* unless the host ALSO advertises `memory.search` semantic.
|
|
11
|
+
*
|
|
12
|
+
* Capability-gated on `capabilities.memory.injectionBudget.supported === true`
|
|
13
|
+
* (root-first per RFC 0073) via `behaviorGate`. Driven through the host-sample
|
|
14
|
+
* memory seam — the `conformance-agent-memory-injection-budget` fixture (the
|
|
15
|
+
* same `/v1/runs` + run-variable seam the other `agentMemory*` scenarios use to
|
|
16
|
+
* reach the adapter), which seeds a set whose total exceeds the budget AND
|
|
17
|
+
* includes one single entry larger than the whole budget, plus a BYOK-redacted
|
|
18
|
+
* entry and a cross-tenant probe.
|
|
19
|
+
*
|
|
20
|
+
* Asserts: cumulative tokens ≤ `tokenBudget`; an over-budget single entry is
|
|
21
|
+
* omitted (not truncated); `rank:'relevance'` ordering differs from recency on
|
|
22
|
+
* the crafted fixture (only when `memory.search` semantic is advertised, else
|
|
23
|
+
* soft-skip); and re-asserts SR-1 (redacted content) + CTI-1 (cross-tenant
|
|
24
|
+
* probe empty) on the budgeted path as a regression guard.
|
|
25
|
+
*
|
|
26
|
+
* @see RFCS/0113-memory-injection-budget.md
|
|
27
|
+
* @see spec/v1/agent-memory.md §"Injection budget"
|
|
28
|
+
*/
|
|
29
|
+
|
|
30
|
+
import { describe, it, expect } from 'vitest';
|
|
31
|
+
import { driver } from '../lib/driver.js';
|
|
32
|
+
import { pollUntilTerminal } from '../lib/polling.js';
|
|
33
|
+
import { behaviorGate } from '../lib/behavior-gate.js';
|
|
34
|
+
import { isFixtureAdvertised } from '../lib/fixtures.js';
|
|
35
|
+
import { readCapabilityFamily } from '../lib/discovery-capabilities.js';
|
|
36
|
+
|
|
37
|
+
const FIXTURE = 'conformance-agent-memory-injection-budget';
|
|
38
|
+
const PROFILE = 'openwop-memory-injection-budget';
|
|
39
|
+
|
|
40
|
+
interface MemoryInjectionBudgetCap {
|
|
41
|
+
readonly supported?: boolean;
|
|
42
|
+
readonly tokenCounter?: string;
|
|
43
|
+
}
|
|
44
|
+
interface MemorySearchCap {
|
|
45
|
+
readonly supported?: boolean;
|
|
46
|
+
readonly modes?: readonly string[];
|
|
47
|
+
}
|
|
48
|
+
interface MemoryCap {
|
|
49
|
+
readonly injectionBudget?: MemoryInjectionBudgetCap;
|
|
50
|
+
readonly search?: MemorySearchCap;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// ── cast-free typed accessors (no `as`) ──────────────────────────────────
|
|
54
|
+
function isRecord(v: unknown): v is Record<string, unknown> {
|
|
55
|
+
return typeof v === 'object' && v !== null && !Array.isArray(v);
|
|
56
|
+
}
|
|
57
|
+
function isString(v: unknown): v is string {
|
|
58
|
+
return typeof v === 'string';
|
|
59
|
+
}
|
|
60
|
+
function isNumber(v: unknown): v is number {
|
|
61
|
+
return typeof v === 'number';
|
|
62
|
+
}
|
|
63
|
+
function isBoolean(v: unknown): v is boolean {
|
|
64
|
+
return typeof v === 'boolean';
|
|
65
|
+
}
|
|
66
|
+
function stringOf(v: unknown): string | undefined {
|
|
67
|
+
return isString(v) ? v : undefined;
|
|
68
|
+
}
|
|
69
|
+
function numberOf(v: unknown): number | undefined {
|
|
70
|
+
return isNumber(v) ? v : undefined;
|
|
71
|
+
}
|
|
72
|
+
function booleanOf(v: unknown): boolean | undefined {
|
|
73
|
+
return isBoolean(v) ? v : undefined;
|
|
74
|
+
}
|
|
75
|
+
function stringArrayOf(v: unknown): string[] | undefined {
|
|
76
|
+
return Array.isArray(v) && v.every(isString) ? v : undefined;
|
|
77
|
+
}
|
|
78
|
+
function recordArrayOf(v: unknown): Record<string, unknown>[] | undefined {
|
|
79
|
+
return Array.isArray(v) && v.every(isRecord) ? v : undefined;
|
|
80
|
+
}
|
|
81
|
+
function runIdOf(v: unknown): string | undefined {
|
|
82
|
+
return isRecord(v) ? stringOf(v['runId']) : undefined;
|
|
83
|
+
}
|
|
84
|
+
function variablesOf(v: unknown): Record<string, unknown> | undefined {
|
|
85
|
+
if (!isRecord(v)) return undefined;
|
|
86
|
+
const vars = v['variables'];
|
|
87
|
+
return isRecord(vars) ? vars : undefined;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function advertisesSemanticSearch(mem: MemoryCap | undefined): boolean {
|
|
91
|
+
const modes = mem?.search?.modes;
|
|
92
|
+
return mem?.search?.supported === true && Array.isArray(modes) && modes.includes('semantic');
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
async function driveFixtureVariables(): Promise<Record<string, unknown> | undefined> {
|
|
96
|
+
const create = await driver.post('/v1/runs', { workflowId: FIXTURE });
|
|
97
|
+
expect(create.status).toBe(201);
|
|
98
|
+
const runId = runIdOf(create.json);
|
|
99
|
+
expect(runId, 'POST /v1/runs MUST return a runId').toBeDefined();
|
|
100
|
+
if (runId === undefined) return undefined;
|
|
101
|
+
|
|
102
|
+
const terminal = await pollUntilTerminal(runId);
|
|
103
|
+
expect(terminal.status).toBe('completed');
|
|
104
|
+
|
|
105
|
+
const snap = await driver.get(`/v1/runs/${encodeURIComponent(runId)}`);
|
|
106
|
+
return variablesOf(snap.json);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
describe('memory-injection-budget (RFC 0113)', () => {
|
|
110
|
+
it('token-bounds the injection read, omits the over-budget entry, and preserves SR-1 + CTI-1', async () => {
|
|
111
|
+
const mem = await readCapabilityFamily<MemoryCap>('memory');
|
|
112
|
+
if (!behaviorGate(PROFILE, mem?.injectionBudget?.supported === true)) return;
|
|
113
|
+
if (!isFixtureAdvertised(FIXTURE)) return; // fixture-gated soft-skip
|
|
114
|
+
|
|
115
|
+
const v = await driveFixtureVariables();
|
|
116
|
+
expect(v, 'fixture MUST surface run variables').toBeDefined();
|
|
117
|
+
if (v === undefined) return;
|
|
118
|
+
|
|
119
|
+
// ── tokenBudget bound (the new lever) ──────────────────────────────
|
|
120
|
+
const tokenBudget = numberOf(v['tokenBudget']);
|
|
121
|
+
const total = numberOf(v['budgetedTokenTotal']);
|
|
122
|
+
expect(tokenBudget, 'fixture MUST echo the requested tokenBudget').toBeDefined();
|
|
123
|
+
expect(total, 'fixture MUST surface the budgeted cumulative token total').toBeDefined();
|
|
124
|
+
// Cumulative tokens across the returned prefix MUST NOT exceed the budget.
|
|
125
|
+
if (tokenBudget !== undefined && total !== undefined) {
|
|
126
|
+
expect(total).toBeLessThanOrEqual(tokenBudget);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// ── over-budget single entry omitted (not truncated) ───────────────
|
|
130
|
+
expect(
|
|
131
|
+
booleanOf(v['overBudgetEntryOmitted']),
|
|
132
|
+
'an entry larger than the whole budget MUST be omitted, not truncated mid-entry',
|
|
133
|
+
).toBe(true);
|
|
134
|
+
const entries = recordArrayOf(v['budgetedEntries']);
|
|
135
|
+
expect(entries, 'fixture MUST surface the budgeted entry slice').toBeDefined();
|
|
136
|
+
const overId = stringOf(v['overBudgetEntryId']);
|
|
137
|
+
if (entries !== undefined && overId !== undefined) {
|
|
138
|
+
const ids = entries.map((e) => stringOf(e['id']));
|
|
139
|
+
expect(ids, 'the over-budget entry MUST NOT appear in the returned slice').not.toContain(overId);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// ── SR-1 re-assertion on the budgeted path ─────────────────────────
|
|
143
|
+
// A budgeted/ranked read ranks over already-redacted content; the read
|
|
144
|
+
// surface MUST carry the redaction marker, never the plaintext.
|
|
145
|
+
const redacted = stringOf(v['redactedContentSample']);
|
|
146
|
+
expect(redacted, 'budgeted read MUST surface a redacted-content sample').toBeDefined();
|
|
147
|
+
if (redacted !== undefined) {
|
|
148
|
+
expect(redacted).toMatch(/\[REDACTED:[^\]]+\]/);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// ── CTI-1 re-assertion on the budgeted path ────────────────────────
|
|
152
|
+
// A budget/rank prefix of an already-single-tenant list stays single-tenant:
|
|
153
|
+
// the cross-tenant probe under the budgeted path MUST return empty.
|
|
154
|
+
const probe = v['crossTenantBudgetedProbe'];
|
|
155
|
+
if (Array.isArray(probe)) {
|
|
156
|
+
expect(probe.length, 'cross-tenant probe on the budgeted path MUST return []').toBe(0);
|
|
157
|
+
} else {
|
|
158
|
+
expect(probe, 'cross-tenant probe on the budgeted path MUST return [] / null').toBeFalsy();
|
|
159
|
+
}
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
it("rank:'relevance' reorders vs recency — only when memory.search semantic is ALSO advertised", async () => {
|
|
163
|
+
const mem = await readCapabilityFamily<MemoryCap>('memory');
|
|
164
|
+
if (!behaviorGate(PROFILE, mem?.injectionBudget?.supported === true)) return;
|
|
165
|
+
// RFC 0113: rank:'relevance' DELEGATES to memory.search semantic (RFC 0080).
|
|
166
|
+
// A host that does not advertise memory.search semantic MUST NOT fabricate a
|
|
167
|
+
// relevance ranking — the relevance leg soft-skips here (it is not a new
|
|
168
|
+
// ranking surface advertised by injectionBudget).
|
|
169
|
+
if (!advertisesSemanticSearch(mem)) {
|
|
170
|
+
// eslint-disable-next-line no-console
|
|
171
|
+
console.warn(`[${PROFILE}] memory.search semantic not advertised; relevance leg soft-skipped`);
|
|
172
|
+
return;
|
|
173
|
+
}
|
|
174
|
+
if (!isFixtureAdvertised(FIXTURE)) return;
|
|
175
|
+
|
|
176
|
+
const v = await driveFixtureVariables();
|
|
177
|
+
expect(v, 'fixture MUST surface run variables').toBeDefined();
|
|
178
|
+
if (v === undefined) return;
|
|
179
|
+
|
|
180
|
+
const recencyOrder = stringArrayOf(v['recencyOrder']);
|
|
181
|
+
const relevanceOrder = stringArrayOf(v['relevanceOrder']);
|
|
182
|
+
expect(recencyOrder, 'fixture MUST surface the recency ordering').toBeDefined();
|
|
183
|
+
expect(relevanceOrder, 'fixture MUST surface the relevance ordering').toBeDefined();
|
|
184
|
+
// The crafted fixture pins a query whose semantic top-k differs from the
|
|
185
|
+
// most-recent-first order — relevance MUST reorder (not echo recency).
|
|
186
|
+
expect(relevanceOrder).not.toEqual(recencyOrder);
|
|
187
|
+
});
|
|
188
|
+
});
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* prompt-prefix-cache — RFC 0116 + SECURITY/invariants.yaml
|
|
3
|
+
* `prompt-prefix-cache-cross-tenant-isolation`.
|
|
4
|
+
*
|
|
5
|
+
* Status: ACTIVE (advertisement-shape + behavioral). The behavioral legs drive
|
|
6
|
+
* the host's real envelope/provider generate path through the OPTIONAL test
|
|
7
|
+
* seam `POST /v1/host/sample/ai/generate` (`host-sample-test-seams.md` §16,
|
|
8
|
+
* env-gated on `OPENWOP_TEST_SEAM_ENABLED=true`). Hosts that don't advertise
|
|
9
|
+
* `aiProviders.promptPrefixCache.supported` soft-skip; hosts that advertise it
|
|
10
|
+
* but don't wire the seam (HTTP 404/405) soft-skip the behavioral legs and
|
|
11
|
+
* verify advertisement shape only.
|
|
12
|
+
*
|
|
13
|
+
* RFC 0116 makes the optional `cachePrefixId` generate hint safe + testable via
|
|
14
|
+
* three pillars, each asserted here:
|
|
15
|
+
* (a) outcome-invariance — a generate with `cachePrefixId` and a control
|
|
16
|
+
* without produce the same accepted envelope + identical
|
|
17
|
+
* `inputTokens`/`outputTokens` (cost-hint-only, replay-invariant).
|
|
18
|
+
* (b) cache hit observable — a repeat generate shows
|
|
19
|
+
* `provider.usage.cacheReadTokens > 0`.
|
|
20
|
+
* (c) cross-tenant isolation — tenant B's first use of tenant A's
|
|
21
|
+
* `cachePrefixId` shows `cacheReadTokens == 0` (no cross-tenant share).
|
|
22
|
+
* THIS is the public test for the `prompt-prefix-cache-cross-tenant-isolation`
|
|
23
|
+
* invariant: the host MUST key its provider cache by `(tenant, cachePrefixId)`.
|
|
24
|
+
* (d) secret-free — a `cachePrefixId` is never emitted where SR-1 would
|
|
25
|
+
* redact, and the usage block carries no prompt substrings.
|
|
26
|
+
*
|
|
27
|
+
* @see RFCS/0116-prompt-prefix-cache.md
|
|
28
|
+
* @see spec/v1/ai-envelope.md §"Prompt-prefix cache (RFC 0116)"
|
|
29
|
+
* @see SECURITY/invariants.yaml — prompt-prefix-cache-cross-tenant-isolation
|
|
30
|
+
*/
|
|
31
|
+
|
|
32
|
+
import { describe, it, expect } from 'vitest';
|
|
33
|
+
import { driver } from '../lib/driver.js';
|
|
34
|
+
import { readCapabilityFamily } from '../lib/discovery-capabilities.js';
|
|
35
|
+
|
|
36
|
+
interface PromptPrefixCacheCap {
|
|
37
|
+
supported?: unknown;
|
|
38
|
+
providers?: unknown;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
interface AiProvidersCap {
|
|
42
|
+
promptPrefixCache?: PromptPrefixCacheCap;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
interface GenerateUsage {
|
|
46
|
+
inputTokens?: number;
|
|
47
|
+
outputTokens?: number;
|
|
48
|
+
cacheReadTokens?: number;
|
|
49
|
+
cacheWriteTokens?: number;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
interface GenerateResponse {
|
|
53
|
+
envelope?: { envelopeType?: string; payload?: unknown; envelopeId?: string };
|
|
54
|
+
usage?: GenerateUsage;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
async function readCap(): Promise<PromptPrefixCacheCap | null> {
|
|
58
|
+
const fam = await readCapabilityFamily<AiProvidersCap>('aiProviders');
|
|
59
|
+
const block = fam?.promptPrefixCache;
|
|
60
|
+
return block && typeof block === 'object' ? block : null;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
async function generate(args: {
|
|
64
|
+
tenantId: string;
|
|
65
|
+
cachePrefixId?: string;
|
|
66
|
+
}): Promise<{ status: number; body: GenerateResponse }> {
|
|
67
|
+
const res = await driver.post('/v1/host/sample/ai/generate', {
|
|
68
|
+
tenantId: args.tenantId,
|
|
69
|
+
envelopeType: 'clarification.request',
|
|
70
|
+
systemPrompt: 'You are a helpful assistant. Answer concisely.',
|
|
71
|
+
...(args.cachePrefixId !== undefined ? { cachePrefixId: args.cachePrefixId } : {}),
|
|
72
|
+
});
|
|
73
|
+
return { status: res.status, body: (res.json ?? {}) as GenerateResponse };
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
describe('prompt-prefix-cache: advertisement shape (RFC 0116)', () => {
|
|
77
|
+
it('aiProviders.promptPrefixCache is either absent or a well-formed object', async () => {
|
|
78
|
+
const cap = await readCap();
|
|
79
|
+
if (cap === null) return; // not advertised — skip
|
|
80
|
+
expect(
|
|
81
|
+
typeof cap.supported,
|
|
82
|
+
driver.describe(
|
|
83
|
+
'capabilities.schema.json §aiProviders.promptPrefixCache',
|
|
84
|
+
'promptPrefixCache.supported MUST be a boolean when the block is present',
|
|
85
|
+
),
|
|
86
|
+
).toBe('boolean');
|
|
87
|
+
if (cap.providers !== undefined) {
|
|
88
|
+
expect(
|
|
89
|
+
Array.isArray(cap.providers),
|
|
90
|
+
driver.describe(
|
|
91
|
+
'capabilities.schema.json §aiProviders.promptPrefixCache',
|
|
92
|
+
'promptPrefixCache.providers MUST be an array of provider ids when present (provider-scoped)',
|
|
93
|
+
),
|
|
94
|
+
).toBe(true);
|
|
95
|
+
}
|
|
96
|
+
});
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
describe('prompt-prefix-cache: behavioral (RFC 0116 §"Normative requirements")', () => {
|
|
100
|
+
it('(a) outcome-invariance — cachePrefixId vs control → same envelope + identical input/output tokens', async () => {
|
|
101
|
+
const cap = await readCap();
|
|
102
|
+
if (!cap || cap.supported !== true) return; // not advertised — skip
|
|
103
|
+
const prefixId = `inv-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
104
|
+
|
|
105
|
+
const control = await generate({ tenantId: 'tenant-a' });
|
|
106
|
+
if (control.status === 404 || control.status === 405) return; // seam not wired
|
|
107
|
+
expect(control.status, driver.describe('host-sample-test-seams.md §16', 'generate seam MUST return 200')).toBe(200);
|
|
108
|
+
|
|
109
|
+
const withPrefix = await generate({ tenantId: 'tenant-a', cachePrefixId: prefixId });
|
|
110
|
+
expect(withPrefix.status).toBe(200);
|
|
111
|
+
|
|
112
|
+
expect(
|
|
113
|
+
withPrefix.body.envelope?.envelopeType,
|
|
114
|
+
driver.describe(
|
|
115
|
+
'ai-envelope.md §"Prompt-prefix cache (RFC 0116)" rule 3',
|
|
116
|
+
'cachePrefixId is a cost hint, never semantic: the accepted envelope MUST be identical hit-vs-miss',
|
|
117
|
+
),
|
|
118
|
+
).toBe(control.body.envelope?.envelopeType);
|
|
119
|
+
expect(withPrefix.body.usage?.inputTokens).toBe(control.body.usage?.inputTokens);
|
|
120
|
+
expect(
|
|
121
|
+
withPrefix.body.usage?.outputTokens,
|
|
122
|
+
driver.describe(
|
|
123
|
+
'ai-envelope.md §"Prompt-prefix cache (RFC 0116)" rule 3',
|
|
124
|
+
'provider.usage.inputTokens/outputTokens MUST be identical hit-vs-miss (replay-invariant)',
|
|
125
|
+
),
|
|
126
|
+
).toBe(control.body.usage?.outputTokens);
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
it('(b) cache hit observable — a repeat generate shows cacheReadTokens > 0 while tokens stay invariant', async () => {
|
|
130
|
+
const cap = await readCap();
|
|
131
|
+
if (!cap || cap.supported !== true) return; // not advertised — skip
|
|
132
|
+
const prefixId = `hit-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
133
|
+
|
|
134
|
+
const prime = await generate({ tenantId: 'tenant-a', cachePrefixId: prefixId });
|
|
135
|
+
if (prime.status === 404 || prime.status === 405) return; // seam not wired
|
|
136
|
+
expect(prime.status).toBe(200);
|
|
137
|
+
|
|
138
|
+
const repeat = await generate({ tenantId: 'tenant-a', cachePrefixId: prefixId });
|
|
139
|
+
expect(repeat.status).toBe(200);
|
|
140
|
+
expect(
|
|
141
|
+
repeat.body.usage?.cacheReadTokens ?? 0,
|
|
142
|
+
driver.describe(
|
|
143
|
+
'ai-envelope.md §"Prompt-prefix cache (RFC 0116)" rule 4',
|
|
144
|
+
'a repeat generate with the same cachePrefixId for the SAME tenant MUST be an observable cache hit (cacheReadTokens > 0)',
|
|
145
|
+
),
|
|
146
|
+
).toBeGreaterThan(0);
|
|
147
|
+
// The cost-only witness MUST NOT have changed the recorded outcome.
|
|
148
|
+
expect(repeat.body.usage?.inputTokens).toBe(prime.body.usage?.inputTokens);
|
|
149
|
+
expect(repeat.body.usage?.outputTokens).toBe(prime.body.usage?.outputTokens);
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
it('(c) cross-tenant isolation — tenant B first use of tenant A\'s cachePrefixId → cacheReadTokens == 0', async () => {
|
|
153
|
+
const cap = await readCap();
|
|
154
|
+
if (!cap || cap.supported !== true) return; // not advertised — skip
|
|
155
|
+
const prefixId = `xtenant-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
156
|
+
|
|
157
|
+
// Tenant A primes the cache under a shared, predictable cachePrefixId.
|
|
158
|
+
const aPrime = await generate({ tenantId: 'tenant-a', cachePrefixId: prefixId });
|
|
159
|
+
if (aPrime.status === 404 || aPrime.status === 405) return; // seam not wired
|
|
160
|
+
expect(aPrime.status).toBe(200);
|
|
161
|
+
|
|
162
|
+
// Tenant B's FIRST use of the SAME cachePrefixId MUST be a miss — the host
|
|
163
|
+
// keys its provider cache by (resolved tenant, cachePrefixId), never global.
|
|
164
|
+
const bFirst = await generate({ tenantId: 'tenant-b', cachePrefixId: prefixId });
|
|
165
|
+
expect(bFirst.status).toBe(200);
|
|
166
|
+
expect(
|
|
167
|
+
bFirst.body.usage?.cacheReadTokens ?? 0,
|
|
168
|
+
driver.describe(
|
|
169
|
+
'SECURITY/invariants.yaml prompt-prefix-cache-cross-tenant-isolation',
|
|
170
|
+
'tenant B\'s first use of tenant A\'s cachePrefixId MUST be a cache MISS (cacheReadTokens == 0) — the cache MUST be keyed by (tenant, cachePrefixId), never global; cross-tenant sharing is context leakage',
|
|
171
|
+
),
|
|
172
|
+
).toBe(0);
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
it('(d) secret-free — the response never echoes cachePrefixId in a SR-1-sensitive position', async () => {
|
|
176
|
+
const cap = await readCap();
|
|
177
|
+
if (!cap || cap.supported !== true) return; // not advertised — skip
|
|
178
|
+
const prefixId = `secretfree-${Date.now()}`;
|
|
179
|
+
|
|
180
|
+
const res = await generate({ tenantId: 'tenant-a', cachePrefixId: prefixId });
|
|
181
|
+
if (res.status === 404 || res.status === 405) return; // seam not wired
|
|
182
|
+
expect(res.status).toBe(200);
|
|
183
|
+
// The usage block is cost-only; it MUST NOT carry prompt/response substrings
|
|
184
|
+
// (SR-1). cachePrefixId is a public cache key, but the cost witness fields
|
|
185
|
+
// themselves are integers — assert the usage block is shape-clean.
|
|
186
|
+
const usage = res.body.usage ?? {};
|
|
187
|
+
for (const k of ['inputTokens', 'outputTokens', 'cacheReadTokens', 'cacheWriteTokens'] as const) {
|
|
188
|
+
const v = usage[k];
|
|
189
|
+
if (v !== undefined) {
|
|
190
|
+
expect(
|
|
191
|
+
typeof v,
|
|
192
|
+
driver.describe(
|
|
193
|
+
'run-event-payloads.schema.json §providerUsage',
|
|
194
|
+
`provider.usage.${k} MUST be a cost-only integer (no prompt substrings per SR-1)`,
|
|
195
|
+
),
|
|
196
|
+
).toBe('number');
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
});
|
|
200
|
+
});
|