@takk/racs 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +70 -0
- package/LICENSE +190 -0
- package/NOTICE +40 -0
- package/README.md +381 -0
- package/SECURITY.md +57 -0
- package/dist/cli/index.js +3016 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/edge/index.cjs +2000 -0
- package/dist/edge/index.cjs.map +1 -0
- package/dist/edge/index.d.cts +598 -0
- package/dist/edge/index.d.ts +598 -0
- package/dist/edge/index.js +1987 -0
- package/dist/edge/index.js.map +1 -0
- package/dist/index.cjs +2071 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +39 -0
- package/dist/index.d.ts +39 -0
- package/dist/index.js +2057 -0
- package/dist/index.js.map +1 -0
- package/dist/integrations/index.cjs +123 -0
- package/dist/integrations/index.cjs.map +1 -0
- package/dist/integrations/index.d.cts +285 -0
- package/dist/integrations/index.d.ts +285 -0
- package/dist/integrations/index.js +117 -0
- package/dist/integrations/index.js.map +1 -0
- package/dist/otel/index.cjs +93 -0
- package/dist/otel/index.cjs.map +1 -0
- package/dist/otel/index.d.cts +105 -0
- package/dist/otel/index.d.ts +105 -0
- package/dist/otel/index.js +91 -0
- package/dist/otel/index.js.map +1 -0
- package/dist/types-DQ7-9sk3.d.cts +758 -0
- package/dist/types-DQ7-9sk3.d.ts +758 -0
- package/dist/vercel/index.cjs +209 -0
- package/dist/vercel/index.cjs.map +1 -0
- package/dist/vercel/index.d.cts +210 -0
- package/dist/vercel/index.d.ts +210 -0
- package/dist/vercel/index.js +206 -0
- package/dist/vercel/index.js.map +1 -0
- package/dist/web/index.cjs +2000 -0
- package/dist/web/index.cjs.map +1 -0
- package/dist/web/index.d.cts +2 -0
- package/dist/web/index.d.ts +2 -0
- package/dist/web/index.js +1987 -0
- package/dist/web/index.js.map +1 -0
- package/package.json +189 -0
|
@@ -0,0 +1,758 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* The single type contract of RACS (Remote Agent Context Store), provider-faithful
|
|
3
|
+
* prefix-cache management for Massive Intelligence (IM) agent workloads.
|
|
4
|
+
*
|
|
5
|
+
* Product invariant, the one rule everything else follows from:
|
|
6
|
+
*
|
|
7
|
+
* RACS never talks to any provider network API. It plans cache directives, normalizes the
|
|
8
|
+
* usage reports the host application already receives from its own provider calls, and
|
|
9
|
+
* accounts for every cached token. The host application stays in full control of its own
|
|
10
|
+
* API calls, credentials, retries, and transport.
|
|
11
|
+
*
|
|
12
|
+
* Why this invariant exists:
|
|
13
|
+
* - Zero runtime dependencies. No provider SDK ever enters the dependency graph, so the
|
|
14
|
+
* package stays auditable and immune to upstream SDK churn.
|
|
15
|
+
* - Zero credentials. RACS never sees an API key, so it can never leak one, and security
|
|
16
|
+
* review of the package is a pure-logic review.
|
|
17
|
+
* - Works everywhere. Browsers, edge runtimes, workers, and Node all run the same code,
|
|
18
|
+
* because nothing here touches sockets, the filesystem (outside the optional file state
|
|
19
|
+
* backend), or platform-specific globals.
|
|
20
|
+
*
|
|
21
|
+
* Determinism contract: RACS never calls the global random generator. Identifiers derive
|
|
22
|
+
* from a seeded generator ({@link RACSOptions.seed}), and the platform wall clock is read
|
|
23
|
+
* only where a timestamp is part of the public record, always injectable through
|
|
24
|
+
* {@link RACSOptions.clock} for tests.
|
|
25
|
+
*
|
|
26
|
+
* @packageDocumentation
|
|
27
|
+
*/
|
|
28
|
+
/**
|
|
29
|
+
* Semantic role of a prompt segment inside the assembled request.
|
|
30
|
+
*
|
|
31
|
+
* Roles let the planner reason about conventional prompt anatomy without parsing content:
|
|
32
|
+
* - `'system'`: system instructions, persona, policies. Usually the most stable text.
|
|
33
|
+
* - `'tools'`: tool and function definitions. Should be byte-stable between calls, see
|
|
34
|
+
* the `'unstable-tools'` lint.
|
|
35
|
+
* - `'documents'`: retrieved or attached reference material, knowledge bases, file dumps.
|
|
36
|
+
* - `'history'`: prior conversation turns. Grows monotonically in well-behaved agents.
|
|
37
|
+
* - `'dynamic'`: the live tail, the current user turn, scratch state, anything expected to
|
|
38
|
+
* differ on every call.
|
|
39
|
+
*
|
|
40
|
+
* The union is minor-extensible: new roles may appear in minor versions, consumers must
|
|
41
|
+
* tolerate unknown members.
|
|
42
|
+
*/
|
|
43
|
+
type SegmentRole = 'system' | 'tools' | 'documents' | 'history' | 'dynamic';
|
|
44
|
+
/**
|
|
45
|
+
* Declared change frequency of a segment, the planner's primary input.
|
|
46
|
+
*
|
|
47
|
+
* - `'stable'`: byte-identical across calls for the lifetime of the agent or deployment.
|
|
48
|
+
* - `'semi'`: changes occasionally, for example a document set refreshed hourly. May still
|
|
49
|
+
* be worth caching when expected reuse is dense enough.
|
|
50
|
+
* - `'volatile'`: expected to differ on every call. Anything cached after a volatile
|
|
51
|
+
* segment can never be reused, see the `'breakpoint-after-volatile'` lint.
|
|
52
|
+
*
|
|
53
|
+
* Stability is declared by the caller, not inferred from content, because only the host
|
|
54
|
+
* knows its own update cadence. RACS lints for declarations that look wrong.
|
|
55
|
+
*/
|
|
56
|
+
type Stability = 'stable' | 'semi' | 'volatile';
|
|
57
|
+
/**
|
|
58
|
+
* One contiguous span of the prompt, the planning unit of RACS.
|
|
59
|
+
*
|
|
60
|
+
* Content contract: provide `content` OR `contentHash`, at least one of the two.
|
|
61
|
+
* - When only `content` is given, the engine hashes it itself to derive the deterministic
|
|
62
|
+
* prefix key, and estimates `tokens` at roughly 4 characters per token, the standard
|
|
63
|
+
* English-text approximation. Estimates are good enough for break-even math, pass real
|
|
64
|
+
* `tokens` from a tokenizer when precision matters.
|
|
65
|
+
* - When only `contentHash` is given (hash-only mode), RACS never sees and never stores the
|
|
66
|
+
* text. This is the privacy mode: plans, drift reports, persisted snapshots, and telemetry
|
|
67
|
+
* carry hashes and token counts only, never prompt content. Provide `tokens` alongside,
|
|
68
|
+
* otherwise the segment counts as zero tokens in break-even math.
|
|
69
|
+
* - When both are given, `contentHash` wins for keying and `content` is used only for
|
|
70
|
+
* content-shape lints such as `'timestamp-in-stable'`.
|
|
71
|
+
*/
|
|
72
|
+
interface PromptSegment {
|
|
73
|
+
/**
|
|
74
|
+
* Caller-chosen identifier, unique within one {@link PlanInput}. Referenced by
|
|
75
|
+
* directives, lint findings, and drift reports, so keep it stable across calls for the
|
|
76
|
+
* same logical segment ("system-prompt", "tool-defs", "kb-v3").
|
|
77
|
+
*/
|
|
78
|
+
readonly id: string;
|
|
79
|
+
/** Semantic role of this span, see {@link SegmentRole}. */
|
|
80
|
+
readonly role: SegmentRole;
|
|
81
|
+
/** Declared change frequency, see {@link Stability}. */
|
|
82
|
+
readonly stability: Stability;
|
|
83
|
+
/**
|
|
84
|
+
* The literal text of the segment. Optional, see the content contract on
|
|
85
|
+
* {@link PromptSegment}. Never persisted, never emitted in telemetry.
|
|
86
|
+
*/
|
|
87
|
+
readonly content?: string;
|
|
88
|
+
/**
|
|
89
|
+
* Caller-computed digest of the segment text, any stable scheme the caller likes
|
|
90
|
+
* (sha-256 hex is conventional). Presence of this field without `content` activates
|
|
91
|
+
* hash-only privacy mode for the segment.
|
|
92
|
+
*/
|
|
93
|
+
readonly contentHash?: string;
|
|
94
|
+
/**
|
|
95
|
+
* Exact token count from the provider tokenizer when the caller has one. Overrides the
|
|
96
|
+
* 4-characters-per-token estimate derived from `content`.
|
|
97
|
+
*/
|
|
98
|
+
readonly tokens?: number;
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* How often the caller expects to replay this prefix, the demand side of break-even math
|
|
102
|
+
* and the input to TTL selection and refresh scheduling.
|
|
103
|
+
*
|
|
104
|
+
* Both fields describe the same thing from two angles, provide whichever is natural.
|
|
105
|
+
* When both are present `intervalSeconds` wins, it is the more precise statement.
|
|
106
|
+
*/
|
|
107
|
+
interface ExpectedReuse {
|
|
108
|
+
/** Expected seconds between consecutive calls sharing this prefix. */
|
|
109
|
+
readonly intervalSeconds?: number;
|
|
110
|
+
/** Expected number of calls per hour sharing this prefix. */
|
|
111
|
+
readonly callsPerHour?: number;
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Everything the planner needs to produce a {@link CachePlan}. Pure data, no callbacks,
|
|
115
|
+
* trivially serializable, so plans can be computed anywhere and shipped anywhere.
|
|
116
|
+
*/
|
|
117
|
+
interface PlanInput {
|
|
118
|
+
/**
|
|
119
|
+
* Optional logical agent identity. Segments from different agents never share prefix
|
|
120
|
+
* keys even when content collides, and drift is tracked per agent lineage.
|
|
121
|
+
*/
|
|
122
|
+
readonly agentId?: string;
|
|
123
|
+
/** Target provider, selects the adapter family and profile. */
|
|
124
|
+
readonly provider: ProviderId;
|
|
125
|
+
/** Provider model identifier, verbatim, for example 'claude-sonnet-4-5'. */
|
|
126
|
+
readonly model: string;
|
|
127
|
+
/**
|
|
128
|
+
* Prompt segments in request order, first element is the start of the prompt. Order is
|
|
129
|
+
* meaningful: prefix caches are strictly left-anchored on every provider family.
|
|
130
|
+
*/
|
|
131
|
+
readonly segments: readonly PromptSegment[];
|
|
132
|
+
/** Expected reuse pattern, drives TTL choice, break-even math, and refresh scheduling. */
|
|
133
|
+
readonly reuse?: ExpectedReuse;
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* The four cache-control mechanisms that exist across the provider landscape. Every named
|
|
137
|
+
* provider is a thin profile over exactly one family, which is why RACS supports 15+
|
|
138
|
+
* providers without 15 code paths.
|
|
139
|
+
*
|
|
140
|
+
* - `'breakpoint'`: the caller marks explicit cache boundaries inside the request body and
|
|
141
|
+
* pays a write premium per marked span (Anthropic `cache_control`, Amazon Bedrock
|
|
142
|
+
* `cachePoint`). Plans emit `'breakpoint'` directives.
|
|
143
|
+
* - `'routing-key'`: the provider caches implicitly server-side and the caller can only
|
|
144
|
+
* steer request routing with a key so that identical prefixes land on the same cache
|
|
145
|
+
* (OpenAI `prompt_cache_key`). Plans emit `'routing-key'` directives.
|
|
146
|
+
* - `'resource'`: the cache is a first-class server resource with its own lifecycle,
|
|
147
|
+
* created, refreshed, and deleted by the host, often with per-token-hour storage billing
|
|
148
|
+
* (Google Gemini `cachedContents`). Plans emit `'resource'` directives.
|
|
149
|
+
* - `'passive'`: the provider caches automatically and exposes no control surface at all
|
|
150
|
+
* (DeepSeek, Groq, local runtimes). RACS still plans segment ordering, lints, and
|
|
151
|
+
* accounts usage, the ordering itself is the optimization.
|
|
152
|
+
*/
|
|
153
|
+
type AdapterFamily = 'breakpoint' | 'routing-key' | 'resource' | 'passive';
|
|
154
|
+
/**
|
|
155
|
+
* Named provider profiles shipped with RACS, each a thin parameterization of one
|
|
156
|
+
* {@link AdapterFamily}.
|
|
157
|
+
*
|
|
158
|
+
* This union is minor-extensible: new providers may be added in minor versions, consumers
|
|
159
|
+
* must tolerate unknown members and should treat the type as open when switching on it.
|
|
160
|
+
* The TeleologHI provider arrives in 2.0.0.
|
|
161
|
+
*
|
|
162
|
+
* `'custom'` is the escape hatch: pair it with {@link RACSOptions.profiles} to describe
|
|
163
|
+
* any provider RACS does not name yet.
|
|
164
|
+
*/
|
|
165
|
+
type ProviderId = 'anthropic' | 'openai' | 'google' | 'bedrock' | 'xai' | 'groq' | 'deepseek' | 'mistral' | 'openrouter' | 'moonshot' | 'ollama' | 'lmstudio' | 'huggingface' | 'microsoft-foundry' | 'hermes' | 'custom';
|
|
166
|
+
/**
|
|
167
|
+
* Cache time-to-live tiers offered by breakpoint-family providers. Resource-family TTLs
|
|
168
|
+
* are arbitrary second counts and are expressed as numbers where they occur
|
|
169
|
+
* ({@link RefreshEntry.ttl}, the `'resource'` directive).
|
|
170
|
+
*
|
|
171
|
+
* As of June 2026 the two-tier 5-minute and 1-hour model is the breakpoint-family
|
|
172
|
+
* standard, sources cited in the profiles module.
|
|
173
|
+
*
|
|
174
|
+
* The union is minor-extensible: new TTL tiers may be added in minor versions, and
|
|
175
|
+
* consumers must tolerate unknown members, exactly as with {@link ProviderId}.
|
|
176
|
+
*/
|
|
177
|
+
type CacheTtl = '5m' | '1h';
|
|
178
|
+
/**
|
|
179
|
+
* Cache semantics of one provider, the numbers the planner reasons with.
|
|
180
|
+
*
|
|
181
|
+
* Every numeric field documents provider semantics as researched in June 2026, with
|
|
182
|
+
* sources cited in JSDoc inside the profiles module where the shipped values live. All of
|
|
183
|
+
* them are overridable per engine instance through {@link RACSOptions.profiles}, because
|
|
184
|
+
* providers change terms faster than packages release.
|
|
185
|
+
*/
|
|
186
|
+
interface ProviderProfile {
|
|
187
|
+
/** The provider this profile describes. */
|
|
188
|
+
readonly id: ProviderId;
|
|
189
|
+
/** Which of the four mechanisms this provider implements, see {@link AdapterFamily}. */
|
|
190
|
+
readonly family: AdapterFamily;
|
|
191
|
+
/**
|
|
192
|
+
* Smallest prefix, in tokens, the provider will cache at all. Shorter prefixes are
|
|
193
|
+
* silently uncached by the provider, the `'below-minimum'` lint fires before that
|
|
194
|
+
* happens. Example as of June 2026: 1024 tokens on most Anthropic and OpenAI models.
|
|
195
|
+
*/
|
|
196
|
+
readonly minCacheableTokens?: number;
|
|
197
|
+
/**
|
|
198
|
+
* Maximum number of cache breakpoints one request may carry, breakpoint family only.
|
|
199
|
+
* Example as of June 2026: 4 `cache_control` blocks per Anthropic request.
|
|
200
|
+
*/
|
|
201
|
+
readonly maxBreakpoints?: number;
|
|
202
|
+
/** TTL tiers this provider offers, breakpoint family only. */
|
|
203
|
+
readonly ttls?: readonly CacheTtl[];
|
|
204
|
+
/**
|
|
205
|
+
* Price multiplier for writing a 5-minute-TTL cache span, relative to base input price.
|
|
206
|
+
* Example as of June 2026: 1.25 on Anthropic, meaning a 25 percent write premium.
|
|
207
|
+
*/
|
|
208
|
+
readonly writeMultiplier5m?: number;
|
|
209
|
+
/**
|
|
210
|
+
* Price multiplier for writing a 1-hour-TTL cache span, relative to base input price.
|
|
211
|
+
* Example as of June 2026: 2.0 on Anthropic.
|
|
212
|
+
*/
|
|
213
|
+
readonly writeMultiplier1h?: number;
|
|
214
|
+
/**
|
|
215
|
+
* Price multiplier for reading cached tokens, relative to base input price. Example as
|
|
216
|
+
* of June 2026: 0.1 on Anthropic, meaning cached reads cost a tenth of fresh input.
|
|
217
|
+
*/
|
|
218
|
+
readonly readMultiplier?: number;
|
|
219
|
+
/**
|
|
220
|
+
* Whether the provider offers extended cache retention behind its routing key, for
|
|
221
|
+
* example the 24-hour retention tier OpenAI attaches to `prompt_cache_key` as of
|
|
222
|
+
* June 2026. Routing-key family only.
|
|
223
|
+
*/
|
|
224
|
+
readonly supportsRetention?: boolean;
|
|
225
|
+
/**
|
|
226
|
+
* Storage price in USD per million tokens per hour for keeping a resource-family cache
|
|
227
|
+
* alive (Google Gemini `cachedContents` billing model as of June 2026). Resource family
|
|
228
|
+
* only, feeds the net-cost side of break-even math.
|
|
229
|
+
*/
|
|
230
|
+
readonly storagePerMTokHour?: number;
|
|
231
|
+
/** Free-form caveats that do not fit a number, surfaced verbatim in plan reasoning. */
|
|
232
|
+
readonly notes?: string;
|
|
233
|
+
}
|
|
234
|
+
/**
|
|
235
|
+
* One provider-faithful instruction the host applies to its own API call. Discriminated on
|
|
236
|
+
* `kind`, exactly one kind per adapter family plus the explicit `'none'`.
|
|
237
|
+
*
|
|
238
|
+
* RACS emits directives, the host executes them. This is the product invariant in type
|
|
239
|
+
* form: nothing here is a network call, everything here is a description of one.
|
|
240
|
+
*
|
|
241
|
+
* Minor-extensible literals, same rule as {@link ProviderId}: the routing-key
|
|
242
|
+
* `retention` literal (`'24h'`) and the resource `action` union
|
|
243
|
+
* (`'create' | 'reuse' | 'refresh' | 'delete'`) may gain new members in minor versions,
|
|
244
|
+
* and consumers must tolerate unknown members when switching on them.
|
|
245
|
+
*/
|
|
246
|
+
type CacheDirective = {
|
|
247
|
+
/**
|
|
248
|
+
* Breakpoint family: place a cache marker (Anthropic `cache_control`, Bedrock
|
|
249
|
+
* `cachePoint`) at the end of the named segment, with the chosen TTL tier.
|
|
250
|
+
*/
|
|
251
|
+
readonly kind: 'breakpoint';
|
|
252
|
+
/** Segment after which the marker goes, references {@link PromptSegment.id}. */
|
|
253
|
+
readonly segmentId: string;
|
|
254
|
+
/** TTL tier to request for the span ending here. */
|
|
255
|
+
readonly ttl: CacheTtl;
|
|
256
|
+
} | {
|
|
257
|
+
/**
|
|
258
|
+
* Routing-key family: send this key with the request (OpenAI `prompt_cache_key`) so
|
|
259
|
+
* identical prefixes route to the same server-side cache.
|
|
260
|
+
*/
|
|
261
|
+
readonly kind: 'routing-key';
|
|
262
|
+
/** Deterministic key derived from the stable prefix, send verbatim. */
|
|
263
|
+
readonly key: string;
|
|
264
|
+
/** Request the extended retention tier when the profile supports it. */
|
|
265
|
+
readonly retention?: '24h';
|
|
266
|
+
} | {
|
|
267
|
+
/**
|
|
268
|
+
* Resource family: perform one lifecycle action on a server-side cache resource
|
|
269
|
+
* (Google Gemini `cachedContents`). The host owns the actual API call and should
|
|
270
|
+
* report the outcome back through usage recording.
|
|
271
|
+
*/
|
|
272
|
+
readonly kind: 'resource';
|
|
273
|
+
/** Lifecycle step: create a new resource, reuse, refresh its TTL, or delete it. */
|
|
274
|
+
readonly action: 'create' | 'reuse' | 'refresh' | 'delete';
|
|
275
|
+
/** RACS-side identity of the resource, stable across plans for the same prefix. */
|
|
276
|
+
readonly resourceKey: string;
|
|
277
|
+
/** TTL in seconds to set on the resource, resource families take arbitrary values. */
|
|
278
|
+
readonly ttlSeconds: number;
|
|
279
|
+
} | {
|
|
280
|
+
/**
|
|
281
|
+
* Nothing to do, with the reason stated. Emitted for passive-family providers, for
|
|
282
|
+
* prefixes below the cacheable minimum, and for plans where caching loses money.
|
|
283
|
+
*/
|
|
284
|
+
readonly kind: 'none';
|
|
285
|
+
/** Human-readable explanation of why no directive applies. */
|
|
286
|
+
readonly reason: string;
|
|
287
|
+
};
|
|
288
|
+
/**
|
|
289
|
+
* Machine-readable lint codes, each a cache-efficiency hazard the planner can detect from
|
|
290
|
+
* structure alone. Minor-extensible: new codes may be added in minor versions, consumers
|
|
291
|
+
* must tolerate unknown members.
|
|
292
|
+
*
|
|
293
|
+
* - `'volatile-early'`: a volatile segment sits before stable or semi segments, every
|
|
294
|
+
* token after it is unreachable for the cache.
|
|
295
|
+
* - `'below-minimum'`: the stable prefix is shorter than the provider's minimum cacheable
|
|
296
|
+
* token count, the provider would silently cache nothing.
|
|
297
|
+
* - `'unstable-tools'`: a `'tools'` segment is declared semi or volatile. Tool definitions
|
|
298
|
+
* are usually generated and should be byte-stable, instability here is almost always a
|
|
299
|
+
* serialization bug (key order, timestamps in descriptions).
|
|
300
|
+
* - `'timestamp-in-stable'`: a stable segment's content appears to embed a timestamp or
|
|
301
|
+
* date, which silently changes the prefix on every call and defeats the cache.
|
|
302
|
+
* - `'identifier-in-stable'`: a stable segment's content appears to embed a per-request
|
|
303
|
+
* identifier (UUID, request id, session id), same failure mode as a timestamp.
|
|
304
|
+
* - `'breakpoint-after-volatile'`: a breakpoint would land after a volatile segment, the
|
|
305
|
+
* written span could never be read back.
|
|
306
|
+
* - `'write-premium-trap'`: given the declared {@link ExpectedReuse}, the cache write
|
|
307
|
+
* premium exceeds the plausible read savings, caching this prefix loses money.
|
|
308
|
+
* - `'segment-order'`: segments are not ordered stable-first, reordering would lengthen
|
|
309
|
+
* the cacheable prefix without changing semantics the planner can see.
|
|
310
|
+
* - `'missing-stability'`: a segment arrived without a usable stability declaration.
|
|
311
|
+
* Unreachable through this type system, guards untyped JavaScript callers at runtime.
|
|
312
|
+
*/
|
|
313
|
+
type LintCode = 'volatile-early' | 'below-minimum' | 'unstable-tools' | 'timestamp-in-stable' | 'identifier-in-stable' | 'breakpoint-after-volatile' | 'write-premium-trap' | 'segment-order' | 'missing-stability';
|
|
314
|
+
/**
|
|
315
|
+
* One lint result. Errors mean the plan as declared cannot achieve cache hits, warnings
|
|
316
|
+
* mean money or hit ratio is probably being left on the table, info is advisory.
|
|
317
|
+
*/
|
|
318
|
+
interface LintFinding {
|
|
319
|
+
/** How bad it is: 'error' defeats caching, 'warning' degrades it, 'info' advises. */
|
|
320
|
+
readonly severity: 'error' | 'warning' | 'info';
|
|
321
|
+
/** Machine-readable code, see {@link LintCode} for the catalog. */
|
|
322
|
+
readonly code: LintCode;
|
|
323
|
+
/** Offending segment when the finding is local to one, references {@link PromptSegment.id}. */
|
|
324
|
+
readonly segmentId?: string;
|
|
325
|
+
/** Human-readable explanation with the concrete fix, English prose for logs. */
|
|
326
|
+
readonly message: string;
|
|
327
|
+
}
|
|
328
|
+
/**
|
|
329
|
+
* Economics of caching this prefix: what the write premium costs and how many reuses pay
|
|
330
|
+
* it back. Computed from the provider profile multipliers, and stated in tokens so it
|
|
331
|
+
* works even when no {@link PricingTable} was supplied.
|
|
332
|
+
*/
|
|
333
|
+
interface BreakEven {
|
|
334
|
+
/**
|
|
335
|
+
* Extra tokens the write premium effectively costs, the multiplier surcharge expressed
|
|
336
|
+
* in base-input-token equivalents.
|
|
337
|
+
*/
|
|
338
|
+
readonly writePremiumTokens: number;
|
|
339
|
+
/** Number of cache reads after the write at which cumulative savings turn positive. */
|
|
340
|
+
readonly minReusesToProfit: number;
|
|
341
|
+
/** Whether the declared {@link ExpectedReuse} reaches that reuse count inside the TTL. */
|
|
342
|
+
readonly profitable: boolean;
|
|
343
|
+
/** Human-readable derivation of the numbers above, suitable for logs and reviews. */
|
|
344
|
+
readonly reasoning: string;
|
|
345
|
+
}
|
|
346
|
+
/**
|
|
347
|
+
* The planner's complete answer for one {@link PlanInput}: directives to apply, lints to
|
|
348
|
+
* heed, economics, and a deterministic identity.
|
|
349
|
+
*
|
|
350
|
+
* Determinism: the same input, options, and seed always produce the same `planId` and
|
|
351
|
+
* `prefixKey`. Plans are pure data and safe to persist, diff, and replay.
|
|
352
|
+
*/
|
|
353
|
+
interface CachePlan {
|
|
354
|
+
/** Deterministic plan identity, derived from the seeded generator, never random. */
|
|
355
|
+
readonly planId: string;
|
|
356
|
+
/** Provider this plan targets, echoed from the input. */
|
|
357
|
+
readonly provider: ProviderId;
|
|
358
|
+
/** Model this plan targets, echoed from the input. */
|
|
359
|
+
readonly model: string;
|
|
360
|
+
/** Adapter family the directives belong to, resolved from the provider profile. */
|
|
361
|
+
readonly family: AdapterFamily;
|
|
362
|
+
/**
|
|
363
|
+
* Deterministic cache key of the stable prefix, derived from segment hashes, provider,
|
|
364
|
+
* model, and agent identity. Equal keys mean byte-equal cacheable prefixes. This is the
|
|
365
|
+
* join key across plans, usage, stats, drift, and refresh scheduling.
|
|
366
|
+
*/
|
|
367
|
+
readonly prefixKey: string;
|
|
368
|
+
/** Token count of the cacheable stable prefix, exact or estimated per segment rules. */
|
|
369
|
+
readonly stableTokens: number;
|
|
370
|
+
/** Token count of the whole prompt, same exact-or-estimated provenance. */
|
|
371
|
+
readonly totalTokens: number;
|
|
372
|
+
/** Provider-faithful instructions for the host to apply, in application order. */
|
|
373
|
+
readonly directives: readonly CacheDirective[];
|
|
374
|
+
/** Lint findings for this input, also available standalone via {@link RACS.lint}. */
|
|
375
|
+
readonly findings: readonly LintFinding[];
|
|
376
|
+
/** Cache economics, present when the profile carries the multipliers to compute it. */
|
|
377
|
+
readonly breakEven?: BreakEven;
|
|
378
|
+
/** Human-readable narrative of why the planner chose these directives. */
|
|
379
|
+
readonly reasoning: string;
|
|
380
|
+
}
|
|
381
|
+
/**
|
|
382
|
+
* Normalized usage report for one provider call, the input to hit-ratio analytics.
|
|
383
|
+
*
|
|
384
|
+
* The host extracts these counts from the provider response it already has (for example
|
|
385
|
+
* Anthropic `usage.cache_read_input_tokens`, OpenAI `usage.prompt_tokens_details.cached_tokens`)
|
|
386
|
+
* and reports them here. RACS normalizes the babel of provider usage shapes into this one
|
|
387
|
+
* record, it never fetches usage itself.
|
|
388
|
+
*/
|
|
389
|
+
interface CacheUsage {
|
|
390
|
+
/** Provider that served the call. */
|
|
391
|
+
readonly provider: ProviderId;
|
|
392
|
+
/** Model that served the call, must match a {@link PricingTable} key for USD figures. */
|
|
393
|
+
readonly model: string;
|
|
394
|
+
/**
|
|
395
|
+
* Prefix key of the plan this call executed, links the usage to plan-level stats and
|
|
396
|
+
* drift tracking. Omit for calls made outside any RACS plan, they still aggregate into
|
|
397
|
+
* ledger totals.
|
|
398
|
+
*/
|
|
399
|
+
readonly prefixKey?: string;
|
|
400
|
+
/**
|
|
401
|
+
* Total input tokens billed for the call, ALL-IN: uncached fresh input plus cached
|
|
402
|
+
* reads plus cache writes of both TTL tiers. Hit-ratio math derives the uncached
|
|
403
|
+
* remainder as `inputTokens - cacheReadTokens - cacheWriteTokens5m - cacheWriteTokens1h`,
|
|
404
|
+
* so a source reporting EXCLUSIVE counts (raw Anthropic usage, whose `input_tokens`
|
|
405
|
+
* excludes cache reads and cache writes) must be normalized to the all-in total before
|
|
406
|
+
* recording. The shipped otel and vercel adapters perform that normalization; hosts
|
|
407
|
+
* recording by hand must sum the exclusive counts themselves.
|
|
408
|
+
*/
|
|
409
|
+
readonly inputTokens: number;
|
|
410
|
+
/** Input tokens served from cache at the discounted read rate. */
|
|
411
|
+
readonly cacheReadTokens: number;
|
|
412
|
+
/** Tokens written to a 5-minute-TTL cache on this call, breakpoint family. */
|
|
413
|
+
readonly cacheWriteTokens5m?: number;
|
|
414
|
+
/** Tokens written to a 1-hour-TTL cache on this call, breakpoint family. */
|
|
415
|
+
readonly cacheWriteTokens1h?: number;
|
|
416
|
+
/**
|
|
417
|
+
* Milliseconds since the Unix epoch when the call happened. Defaults to the injected
|
|
418
|
+
* clock at recording time, pass it explicitly when replaying historical usage.
|
|
419
|
+
*/
|
|
420
|
+
readonly timestamp?: number;
|
|
421
|
+
}
|
|
422
|
+
/**
|
|
423
|
+
* Per-model price card in USD per million tokens. Always user-supplied, see
|
|
424
|
+
* {@link PricingTable}.
|
|
425
|
+
*/
|
|
426
|
+
interface Pricing {
|
|
427
|
+
/** Base input price, USD per million tokens. */
|
|
428
|
+
readonly inputPerMTok: number;
|
|
429
|
+
/** Discounted cache read price, USD per million tokens. */
|
|
430
|
+
readonly cacheReadPerMTok?: number;
|
|
431
|
+
/** 5-minute-TTL cache write price, USD per million tokens. */
|
|
432
|
+
readonly cacheWrite5mPerMTok?: number;
|
|
433
|
+
/** 1-hour-TTL cache write price, USD per million tokens. */
|
|
434
|
+
readonly cacheWrite1hPerMTok?: number;
|
|
435
|
+
/** Output price, USD per million tokens, used only for completeness in reports. */
|
|
436
|
+
readonly outputPerMTok?: number;
|
|
437
|
+
/** Resource-family storage price, USD per million tokens per hour. */
|
|
438
|
+
readonly storagePerMTokHour?: number;
|
|
439
|
+
}
|
|
440
|
+
/**
|
|
441
|
+
* Prices keyed by model id, matched against {@link CacheUsage.model}.
|
|
442
|
+
*
|
|
443
|
+
* ALWAYS user-supplied: the engine never hardcodes prices, because prices change without
|
|
444
|
+
* notice and a stale hardcoded number is worse than none. USD figures such as
|
|
445
|
+
* {@link PrefixStats.savedUsd} are reported only when the table covers the model in
|
|
446
|
+
* question, token-denominated statistics are always reported regardless.
|
|
447
|
+
*/
|
|
448
|
+
type PricingTable = Readonly<Record<string, Pricing>>;
|
|
449
|
+
/**
|
|
450
|
+
* Aggregated cache performance of one prefix across every recorded call.
|
|
451
|
+
*/
|
|
452
|
+
interface PrefixStats {
|
|
453
|
+
/** The prefix these numbers describe, see {@link CachePlan.prefixKey}. */
|
|
454
|
+
readonly prefixKey: string;
|
|
455
|
+
/** Number of usage records aggregated. */
|
|
456
|
+
readonly calls: number;
|
|
457
|
+
/**
|
|
458
|
+
* Normalized hit ratio in [0, 1]: cache read tokens divided by total input tokens,
|
|
459
|
+
* the same formula across all providers so numbers are comparable between them.
|
|
460
|
+
*/
|
|
461
|
+
readonly hitRatio: number;
|
|
462
|
+
/** Total tokens served from cache. */
|
|
463
|
+
readonly readTokens: number;
|
|
464
|
+
/** Total tokens written to cache, both TTL tiers combined. */
|
|
465
|
+
readonly writeTokens: number;
|
|
466
|
+
/** Total input tokens that were neither read from nor written to cache. */
|
|
467
|
+
readonly uncachedTokens: number;
|
|
468
|
+
/**
|
|
469
|
+
* USD saved by cache reads versus paying base input price, present only when the
|
|
470
|
+
* {@link PricingTable} covers the model.
|
|
471
|
+
*/
|
|
472
|
+
readonly savedUsd?: number;
|
|
473
|
+
/** USD spent on write premiums, present only when pricing covers the model. */
|
|
474
|
+
readonly writeSpendUsd?: number;
|
|
475
|
+
}
|
|
476
|
+
/**
|
|
477
|
+
* Ledger-wide aggregate over every recorded usage, plus the per-prefix breakdown.
|
|
478
|
+
* Returned by {@link RACS.stats}.
|
|
479
|
+
*/
|
|
480
|
+
interface LedgerStats {
|
|
481
|
+
/** Total usage records aggregated under the active filter. */
|
|
482
|
+
readonly calls: number;
|
|
483
|
+
/** Normalized hit ratio in [0, 1], same formula as {@link PrefixStats.hitRatio}. */
|
|
484
|
+
readonly hitRatio: number;
|
|
485
|
+
/** Total tokens served from cache. */
|
|
486
|
+
readonly readTokens: number;
|
|
487
|
+
/** Total tokens written to cache. */
|
|
488
|
+
readonly writeTokens: number;
|
|
489
|
+
/** Total input tokens untouched by any cache. */
|
|
490
|
+
readonly uncachedTokens: number;
|
|
491
|
+
/** USD saved by cache reads, present only when pricing covers the models involved. */
|
|
492
|
+
readonly savedUsd?: number;
|
|
493
|
+
/**
|
|
494
|
+
* Net USD effect of caching, savings minus write premiums and storage, present only
|
|
495
|
+
* when pricing covers the models involved. Negative means caching lost money.
|
|
496
|
+
*/
|
|
497
|
+
readonly netUsd?: number;
|
|
498
|
+
/** Per-prefix breakdown, sorted by the engine for stable output. */
|
|
499
|
+
readonly prefixes: readonly PrefixStats[];
|
|
500
|
+
}
|
|
501
|
+
/**
|
|
502
|
+
* Record of one detected prefix drift: the same agent and model lineage produced a
|
|
503
|
+
* different prefix key than last time, so previously cached tokens are dead.
|
|
504
|
+
*
|
|
505
|
+
* Drift is the silent cache killer, a one-byte change in a "stable" segment invalidates
|
|
506
|
+
* the entire left-anchored prefix from that byte onward. RACS detects it by comparing
|
|
507
|
+
* fingerprints across plans, names the segments that changed, and quantifies the loss.
|
|
508
|
+
*/
|
|
509
|
+
interface DriftReport {
|
|
510
|
+
/**
|
|
511
|
+
* Logical agent the drifting lineage belongs to, present when the plans carried
|
|
512
|
+
* {@link PlanInput.agentId}. Lets downstream consumers, for example parameter-tuning
|
|
513
|
+
* runtimes, map the drift back to the agent whose reward landscape just moved.
|
|
514
|
+
*/
|
|
515
|
+
readonly agentId?: string;
|
|
516
|
+
/** The new prefix key produced by the latest plan. */
|
|
517
|
+
readonly prefixKey: string;
|
|
518
|
+
/** The prefix key the same lineage produced previously. */
|
|
519
|
+
readonly previousKey: string;
|
|
520
|
+
/** Ids of the segments whose hashes changed between the two plans. */
|
|
521
|
+
readonly changedSegmentIds: readonly string[];
|
|
522
|
+
/** Stable-prefix tokens whose cached copies the drift invalidated. */
|
|
523
|
+
readonly invalidatedTokens: number;
|
|
524
|
+
/** Milliseconds since the Unix epoch when the drift was detected, from the clock. */
|
|
525
|
+
readonly timestamp: number;
|
|
526
|
+
}
|
|
527
|
+
/**
|
|
528
|
+
* One entry in the keep-warm schedule, the heartbeat pattern as a library primitive.
|
|
529
|
+
*
|
|
530
|
+
* Provider caches expire on a TTL, and a read or refresh shortly before expiry keeps the
|
|
531
|
+
* cache warm for another window at read price instead of paying the write premium again.
|
|
532
|
+
* `refreshAt` is set at 90 percent of the TTL window after the last write, early enough to
|
|
533
|
+
* absorb scheduling jitter, late enough not to waste reads. The host runs the timer and
|
|
534
|
+
* the call, RACS only computes when, see the product invariant.
|
|
535
|
+
*/
|
|
536
|
+
interface RefreshEntry {
|
|
537
|
+
/** Prefix this entry keeps warm, see {@link CachePlan.prefixKey}. */
|
|
538
|
+
readonly prefixKey: string;
|
|
539
|
+
/** Provider the cached prefix lives on. */
|
|
540
|
+
readonly provider: ProviderId;
|
|
541
|
+
/** Model the cached prefix belongs to. */
|
|
542
|
+
readonly model: string;
|
|
543
|
+
/**
|
|
544
|
+
* TTL the cache was written with: a {@link CacheTtl} tier for breakpoint and
|
|
545
|
+
* routing-key families, a plain number of seconds for resource-family entries, which
|
|
546
|
+
* carry the `ttlSeconds` of their directive.
|
|
547
|
+
*/
|
|
548
|
+
readonly ttl: CacheTtl | number;
|
|
549
|
+
/** Milliseconds since the Unix epoch of the last cache write or refresh. */
|
|
550
|
+
readonly lastWriteAt: number;
|
|
551
|
+
/** Milliseconds since the Unix epoch when the keep-warm touch is due, 90 percent of TTL. */
|
|
552
|
+
readonly refreshAt: number;
|
|
553
|
+
}
|
|
554
|
+
/**
|
|
555
|
+
* Everything observable about a running engine, discriminated on `type`. Listeners are
|
|
556
|
+
* synchronous and must not throw, see {@link TelemetryListener}. All timestamps come from
|
|
557
|
+
* the injected clock, so telemetry is deterministic under test.
|
|
558
|
+
*
|
|
559
|
+
* - `'plan.created'`: a plan was produced by {@link RACS.plan}.
|
|
560
|
+
* - `'prefix.drifted'`: a drift was detected, the report carries its own timestamp.
|
|
561
|
+
* - `'usage.recorded'`: a usage record was ingested, `hit` is true when the call read at
|
|
562
|
+
* least one cached token.
|
|
563
|
+
* - `'refresh.due'`: a keep-warm entry crossed its `refreshAt` during {@link RACS.schedule}.
|
|
564
|
+
* - `'resource.action'`: a resource-family lifecycle directive was emitted, mirror this
|
|
565
|
+
* into the host's own resource bookkeeping.
|
|
566
|
+
* - `'limit.reached'`: a bounded internal store hit its cap and evicted or dropped data,
|
|
567
|
+
* `scope` names the store, `detail` says what was sacrificed.
|
|
568
|
+
*/
|
|
569
|
+
type TelemetryEvent = {
|
|
570
|
+
readonly type: 'plan.created';
|
|
571
|
+
readonly plan: CachePlan;
|
|
572
|
+
readonly timestamp: number;
|
|
573
|
+
} | {
|
|
574
|
+
readonly type: 'prefix.drifted';
|
|
575
|
+
readonly report: DriftReport;
|
|
576
|
+
} | {
|
|
577
|
+
readonly type: 'usage.recorded';
|
|
578
|
+
readonly usage: CacheUsage;
|
|
579
|
+
readonly hit: boolean;
|
|
580
|
+
readonly timestamp: number;
|
|
581
|
+
} | {
|
|
582
|
+
readonly type: 'refresh.due';
|
|
583
|
+
readonly entry: RefreshEntry;
|
|
584
|
+
readonly timestamp: number;
|
|
585
|
+
} | {
|
|
586
|
+
readonly type: 'resource.action';
|
|
587
|
+
readonly directive: Extract<CacheDirective, {
|
|
588
|
+
kind: 'resource';
|
|
589
|
+
}>;
|
|
590
|
+
readonly timestamp: number;
|
|
591
|
+
} | {
|
|
592
|
+
readonly type: 'limit.reached';
|
|
593
|
+
readonly scope: 'prefixes' | 'ledger';
|
|
594
|
+
readonly detail: string;
|
|
595
|
+
readonly timestamp: number;
|
|
596
|
+
};
|
|
597
|
+
/**
|
|
598
|
+
* Receives every {@link TelemetryEvent} synchronously, in emission order. Keep it fast and
|
|
599
|
+
* non-throwing, the engine calls it inline on its own hot path. Subscribe via
|
|
600
|
+
* {@link RACS.on}, which returns the matching unsubscribe function.
|
|
601
|
+
*/
|
|
602
|
+
type TelemetryListener = (event: TelemetryEvent) => void;
|
|
603
|
+
/**
|
|
604
|
+
* Minimal structural contract for any string key-value store. Deliberately tiny so that
|
|
605
|
+
* any Redis, Upstash, or Cloudflare KV client wraps into it in one line, for example
|
|
606
|
+
* `{ get: (k) => redis.get(k), set: (k, v) => redis.set(k, v), delete: (k) => redis.del(k) }`.
|
|
607
|
+
*
|
|
608
|
+
* RACS never constructs a client and never sees connection credentials, the host passes a
|
|
609
|
+
* ready object. Returning `null` or `undefined` from `get` both mean "absent".
|
|
610
|
+
*/
|
|
611
|
+
interface KvLike {
|
|
612
|
+
/** Reads a value, `undefined` or `null` when the key is absent. */
|
|
613
|
+
get(key: string): Promise<string | undefined | null>;
|
|
614
|
+
/** Writes a value, the return value is ignored. */
|
|
615
|
+
set(key: string, value: string): Promise<unknown>;
|
|
616
|
+
/** Deletes a key, the return value is ignored. */
|
|
617
|
+
delete(key: string): Promise<unknown>;
|
|
618
|
+
}
|
|
619
|
+
/**
|
|
620
|
+
* Serialized engine state: aggregate fingerprints, the resource registry, and ledger
|
|
621
|
+
* aggregates. Never prompt content, the snapshot holds hashes and numbers only, so
|
|
622
|
+
* persisting it leaks nothing even when the backing store is shared.
|
|
623
|
+
*/
|
|
624
|
+
interface StateSnapshot {
|
|
625
|
+
/** Snapshot schema version, currently the literal 1, bumped on breaking layout change. */
|
|
626
|
+
readonly version: 1;
|
|
627
|
+
/** Milliseconds since the Unix epoch when the snapshot was taken, from the clock. */
|
|
628
|
+
readonly savedAt: number;
|
|
629
|
+
/** Opaque engine state, treat as a black box, round-trip it unmodified. */
|
|
630
|
+
readonly data: Readonly<Record<string, unknown>>;
|
|
631
|
+
}
|
|
632
|
+
/**
|
|
633
|
+
* Where snapshots go. Implementations decide the medium: in-memory for tests, a file via
|
|
634
|
+
* the file backend, any {@link KvLike} via the KV backend. The engine calls `save` on
|
|
635
|
+
* {@link RACS.flush} and {@link RACS.close}, and `load` once on startup.
|
|
636
|
+
*/
|
|
637
|
+
interface StateBackend {
|
|
638
|
+
/** Returns the last saved snapshot, or `undefined` when none exists yet. */
|
|
639
|
+
load(): Promise<StateSnapshot | undefined>;
|
|
640
|
+
/** Persists the snapshot, replacing any previous one. */
|
|
641
|
+
save(snapshot: StateSnapshot): Promise<void>;
|
|
642
|
+
}
|
|
643
|
+
/**
|
|
644
|
+
* Construction options for the engine. Every field is optional, the zero-config default
|
|
645
|
+
* is a fully working in-memory engine.
|
|
646
|
+
*/
|
|
647
|
+
interface RACSOptions {
|
|
648
|
+
/**
|
|
649
|
+
* Per-provider overrides merged over the shipped profiles. Use this when a provider
|
|
650
|
+
* changes its terms before RACS releases, or to describe a `'custom'` provider.
|
|
651
|
+
*/
|
|
652
|
+
readonly profiles?: Partial<Readonly<Record<ProviderId, Partial<ProviderProfile>>>>;
|
|
653
|
+
/**
|
|
654
|
+
* Model prices for USD reporting, see {@link PricingTable}. Without it the engine still
|
|
655
|
+
* reports every token-denominated statistic, just no USD figures.
|
|
656
|
+
*/
|
|
657
|
+
readonly pricing?: PricingTable;
|
|
658
|
+
/**
|
|
659
|
+
* Maximum number of distinct prefixes tracked before least-recently-used eviction, with
|
|
660
|
+
* a `'limit.reached'` telemetry event on each eviction. Default 1000.
|
|
661
|
+
*/
|
|
662
|
+
readonly maxPrefixes?: number;
|
|
663
|
+
/**
|
|
664
|
+
* Seed for the deterministic id generator. Same seed, same inputs, same ids. Default 7.
|
|
665
|
+
* RACS never calls the global random generator.
|
|
666
|
+
*/
|
|
667
|
+
readonly seed?: number;
|
|
668
|
+
/** Persistence backend, see {@link StateBackend}. Without it state is in-memory only. */
|
|
669
|
+
readonly state?: StateBackend;
|
|
670
|
+
/**
|
|
671
|
+
* Time source returning milliseconds since the Unix epoch. Default is the platform wall
|
|
672
|
+
* clock. Inject a fake in tests to make every timestamp in the public record
|
|
673
|
+
* deterministic.
|
|
674
|
+
*/
|
|
675
|
+
readonly clock?: () => number;
|
|
676
|
+
}
|
|
677
|
+
/**
|
|
678
|
+
* The engine surface. All planning and accounting methods are synchronous and pure with
|
|
679
|
+
* respect to the network, only `flush` and `close` are asynchronous because persistence
|
|
680
|
+
* may be. See the product invariant at the top of this module: nothing on this interface
|
|
681
|
+
* ever performs a provider API call.
|
|
682
|
+
*/
|
|
683
|
+
interface RACS {
|
|
684
|
+
/**
|
|
685
|
+
* Computes the full cache plan for one prompt: prefix key, directives, lints, and
|
|
686
|
+
* break-even economics. Records the plan fingerprint for drift detection and feeds the
|
|
687
|
+
* refresh schedule. Emits `'plan.created'`, and `'prefix.drifted'` when applicable.
|
|
688
|
+
*
|
|
689
|
+
* @throws RacsError code `'ERR_INVALID_INPUT'` on malformed input.
|
|
690
|
+
*/
|
|
691
|
+
plan(input: PlanInput): CachePlan;
|
|
692
|
+
/**
|
|
693
|
+
* Runs only the lint pass over the input, no fingerprinting, no drift tracking, no
|
|
694
|
+
* telemetry. Use it in CI to gate prompt changes before they ship.
|
|
695
|
+
*
|
|
696
|
+
* @throws RacsError code `'ERR_INVALID_INPUT'` on malformed input.
|
|
697
|
+
*/
|
|
698
|
+
lint(input: PlanInput): readonly LintFinding[];
|
|
699
|
+
/**
|
|
700
|
+
* Ingests one normalized usage report into the ledger and updates per-prefix
|
|
701
|
+
* aggregates. Emits `'usage.recorded'`.
|
|
702
|
+
*
|
|
703
|
+
* @throws RacsError code `'ERR_INVALID_INPUT'` on malformed usage.
|
|
704
|
+
*/
|
|
705
|
+
record(usage: CacheUsage): void;
|
|
706
|
+
/**
|
|
707
|
+
* Returns ledger-wide statistics, optionally narrowed to one prefix or one provider.
|
|
708
|
+
* USD figures appear only where the pricing table covers the models involved.
|
|
709
|
+
*/
|
|
710
|
+
stats(filter?: {
|
|
711
|
+
prefixKey?: string;
|
|
712
|
+
provider?: ProviderId;
|
|
713
|
+
}): LedgerStats;
|
|
714
|
+
/**
|
|
715
|
+
* Returns every keep-warm entry due at or before `now` (default: the injected clock).
|
|
716
|
+
* Emits `'refresh.due'` per returned entry. The host performs the actual warming call,
|
|
717
|
+
* then reports it via {@link RACS.markRefreshed}.
|
|
718
|
+
*/
|
|
719
|
+
schedule(now?: number): readonly RefreshEntry[];
|
|
720
|
+
/**
|
|
721
|
+
* Tells the engine the host touched the cache for this prefix at `now` (default: the
|
|
722
|
+
* injected clock), restarting that entry's TTL window and rescheduling its refresh.
|
|
723
|
+
*/
|
|
724
|
+
markRefreshed(prefixKey: string, now?: number): void;
|
|
725
|
+
/** Returns the most recent drift reports in chronological order, newest last, capped at `limit` when given. */
|
|
726
|
+
drifts(limit?: number): readonly DriftReport[];
|
|
727
|
+
/**
|
|
728
|
+
* Clears engine bookkeeping for every matching prefix: drift fingerprints, keep-warm
|
|
729
|
+
* refresh schedules, and resource registry entries. Emits a `'resource.action'`
|
|
730
|
+
* telemetry event with action `'delete'` for each resource-family entry invalidated, so
|
|
731
|
+
* the host can mirror the deletion onto the provider (Gemini `cachedContents` deletes
|
|
732
|
+
* especially). Returns the number of distinct prefixes invalidated. Without a filter
|
|
733
|
+
* everything is cleared.
|
|
734
|
+
*
|
|
735
|
+
* Built for credential rotation: provider-side cached resources may be scoped to the
|
|
736
|
+
* credential or workspace that created them, so after a key rotates their handles are
|
|
737
|
+
* unreliable or orphaned, invalidate and re-plan from scratch. Ledger statistics and
|
|
738
|
+
* drift history are accounting records, not cache state, and are deliberately left
|
|
739
|
+
* untouched.
|
|
740
|
+
*/
|
|
741
|
+
invalidate(filter?: {
|
|
742
|
+
readonly prefixKey?: string;
|
|
743
|
+
readonly provider?: ProviderId;
|
|
744
|
+
}): number;
|
|
745
|
+
/**
|
|
746
|
+
* Returns the effective profile for a provider, shipped values merged with the
|
|
747
|
+
* {@link RACSOptions.profiles} overrides, the numbers the planner is actually using.
|
|
748
|
+
*/
|
|
749
|
+
profileOf(provider: ProviderId): ProviderProfile;
|
|
750
|
+
/** Subscribes to telemetry. Returns the unsubscribe function, idempotent to call twice. */
|
|
751
|
+
on(listener: TelemetryListener): () => void;
|
|
752
|
+
/** Persists a snapshot through the configured state backend, no-op without one. */
|
|
753
|
+
flush(): Promise<void>;
|
|
754
|
+
/** Flushes, then releases internal resources. The instance must not be used afterward. */
|
|
755
|
+
close(): Promise<void>;
|
|
756
|
+
}
|
|
757
|
+
|
|
758
|
+
export type { AdapterFamily as A, BreakEven as B, CacheDirective as C, DriftReport as D, ExpectedReuse as E, KvLike as K, LedgerStats as L, PlanInput as P, RACS as R, StateBackend as S, TelemetryEvent as T, CachePlan as a, CacheTtl as b, CacheUsage as c, LintCode as d, LintFinding as e, PrefixStats as f, Pricing as g, PricingTable as h, PromptSegment as i, ProviderId as j, ProviderProfile as k, RACSOptions as l, RefreshEntry as m, SegmentRole as n, Stability as o, StateSnapshot as p, TelemetryListener as q };
|