@takk/racs 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +70 -0
- package/LICENSE +190 -0
- package/NOTICE +40 -0
- package/README.md +381 -0
- package/SECURITY.md +57 -0
- package/dist/cli/index.js +3016 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/edge/index.cjs +2000 -0
- package/dist/edge/index.cjs.map +1 -0
- package/dist/edge/index.d.cts +598 -0
- package/dist/edge/index.d.ts +598 -0
- package/dist/edge/index.js +1987 -0
- package/dist/edge/index.js.map +1 -0
- package/dist/index.cjs +2071 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +39 -0
- package/dist/index.d.ts +39 -0
- package/dist/index.js +2057 -0
- package/dist/index.js.map +1 -0
- package/dist/integrations/index.cjs +123 -0
- package/dist/integrations/index.cjs.map +1 -0
- package/dist/integrations/index.d.cts +285 -0
- package/dist/integrations/index.d.ts +285 -0
- package/dist/integrations/index.js +117 -0
- package/dist/integrations/index.js.map +1 -0
- package/dist/otel/index.cjs +93 -0
- package/dist/otel/index.cjs.map +1 -0
- package/dist/otel/index.d.cts +105 -0
- package/dist/otel/index.d.ts +105 -0
- package/dist/otel/index.js +91 -0
- package/dist/otel/index.js.map +1 -0
- package/dist/types-DQ7-9sk3.d.cts +758 -0
- package/dist/types-DQ7-9sk3.d.ts +758 -0
- package/dist/vercel/index.cjs +209 -0
- package/dist/vercel/index.cjs.map +1 -0
- package/dist/vercel/index.d.cts +210 -0
- package/dist/vercel/index.d.ts +210 -0
- package/dist/vercel/index.js +206 -0
- package/dist/vercel/index.js.map +1 -0
- package/dist/web/index.cjs +2000 -0
- package/dist/web/index.cjs.map +1 -0
- package/dist/web/index.d.cts +2 -0
- package/dist/web/index.d.ts +2 -0
- package/dist/web/index.js +1987 -0
- package/dist/web/index.js.map +1 -0
- package/package.json +189 -0
|
@@ -0,0 +1,598 @@
|
|
|
1
|
+
import { l as RACSOptions, R as RACS, h as PricingTable, c as CacheUsage, j as ProviderId, L as LedgerStats, e as LintFinding, P as PlanInput, k as ProviderProfile, g as Pricing, C as CacheDirective, B as BreakEven, K as KvLike, S as StateBackend } from '../types-DQ7-9sk3.js';
|
|
2
|
+
export { A as AdapterFamily, a as CachePlan, b as CacheTtl, D as DriftReport, E as ExpectedReuse, d as LintCode, f as PrefixStats, i as PromptSegment, m as RefreshEntry, n as SegmentRole, o as Stability, p as StateSnapshot, T as TelemetryEvent, q as TelemetryListener } from '../types-DQ7-9sk3.js';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Engine core of RACS (Remote Agent Context Store): wires the analyzer, the planner, the
|
|
6
|
+
* ledger, drift fingerprints, the keep-warm keeper, and the resource registry behind the
|
|
7
|
+
* one public {@link RACS} surface.
|
|
8
|
+
*
|
|
9
|
+
* The core owns everything stateful: deterministic plan identity, prefix bookkeeping,
|
|
10
|
+
* telemetry fan-out, and persistence. The modules it wires stay pure or self-contained, so
|
|
11
|
+
* this file is the only place where their interactions are decided.
|
|
12
|
+
*
|
|
13
|
+
* Determinism: plan ids derive from a seeded counter ({@link RACSOptions.seed}, default 7)
|
|
14
|
+
* through the seeded short-id generator, never from the platform UUID or the global random
|
|
15
|
+
* generator. The clock is read once per mutating call and is injectable for tests.
|
|
16
|
+
*
|
|
17
|
+
* @packageDocumentation
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Creates one RACS (Remote Agent Context Store) engine, the single entry point of the
|
|
22
|
+
* package. Zero-config by default: no options yields a fully working in-memory engine with
|
|
23
|
+
* the shipped provider profiles, seed 7, a 1000-prefix cap, and the platform wall clock.
|
|
24
|
+
*
|
|
25
|
+
* Persistence note: when {@link RACSOptions.state} is given, the previous snapshot is
|
|
26
|
+
* restored asynchronously after construction, section by section, skipping anything
|
|
27
|
+
* corrupt. Hosts that need restored state before their first plan should `await
|
|
28
|
+
* racs.flush()` once after construction, flush waits for the restore to settle.
|
|
29
|
+
*
|
|
30
|
+
* @param options - See {@link RACSOptions}, every field optional.
|
|
31
|
+
* @returns The engine, see {@link RACS} for the full surface contract.
|
|
32
|
+
*
|
|
33
|
+
* @example
|
|
34
|
+
* ```ts
|
|
35
|
+
* const racs = createRACS({ seed: 42 });
|
|
36
|
+
* const plan = racs.plan({
|
|
37
|
+
* provider: 'anthropic',
|
|
38
|
+
* model: 'claude-sonnet-4-5',
|
|
39
|
+
* segments: [
|
|
40
|
+
* { id: 'system', role: 'system', stability: 'stable', content: SYSTEM_PROMPT },
|
|
41
|
+
* { id: 'turn', role: 'dynamic', stability: 'volatile', content: userTurn },
|
|
42
|
+
* ],
|
|
43
|
+
* reuse: { intervalSeconds: 60 },
|
|
44
|
+
* });
|
|
45
|
+
* // Apply plan.directives to the API call the host owns, then report usage back:
|
|
46
|
+
* racs.record({ provider: 'anthropic', model: 'claude-sonnet-4-5', prefixKey: plan.prefixKey,
|
|
47
|
+
* inputTokens: 5000, cacheReadTokens: 4200 });
|
|
48
|
+
* ```
|
|
49
|
+
*/
|
|
50
|
+
declare function createRACS(options?: RACSOptions): RACS;
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Error model for RACS (Remote Agent Context Store).
|
|
54
|
+
*
|
|
55
|
+
* RACS throws exactly one error class so that host applications can catch and branch on a
|
|
56
|
+
* single type, then dispatch on the stable machine-readable {@link RacsError.code}. New codes
|
|
57
|
+
* may be added in minor versions, so consumers must treat the code space as open and fall back
|
|
58
|
+
* gracefully on codes they do not recognize.
|
|
59
|
+
*
|
|
60
|
+
* @packageDocumentation
|
|
61
|
+
*/
|
|
62
|
+
/**
|
|
63
|
+
* The only error type thrown by RACS.
|
|
64
|
+
*
|
|
65
|
+
* Invariants:
|
|
66
|
+
* - `name` is always the literal string `'RacsError'`, safe for cross-realm checks where
|
|
67
|
+
* `instanceof` fails (multiple bundles, workers, iframes).
|
|
68
|
+
* - `code` is a stable, machine-readable, SCREAMING_SNAKE identifier prefixed with `ERR_`.
|
|
69
|
+
* It never changes meaning across versions, although new codes may appear in minors.
|
|
70
|
+
* - `message` is human-readable English prose intended for logs, never for parsing.
|
|
71
|
+
*
|
|
72
|
+
* @example
|
|
73
|
+
* ```ts
|
|
74
|
+
* try {
|
|
75
|
+
* racs.plan(input);
|
|
76
|
+
* } catch (error) {
|
|
77
|
+
* if (error instanceof RacsError && error.code === 'ERR_INVALID_INPUT') {
|
|
78
|
+
* // The caller sent a malformed PlanInput, fix the call site.
|
|
79
|
+
* }
|
|
80
|
+
* }
|
|
81
|
+
* ```
|
|
82
|
+
*/
|
|
83
|
+
declare class RacsError extends Error {
|
|
84
|
+
/**
|
|
85
|
+
* Stable machine-readable error code, for example `'ERR_INVALID_INPUT'`.
|
|
86
|
+
*
|
|
87
|
+
* Branch on this field, never on `message`. The code space is minor-extensible.
|
|
88
|
+
*/
|
|
89
|
+
readonly code: string;
|
|
90
|
+
/**
|
|
91
|
+
* @param message - Human-readable description of what went wrong and how to fix it.
|
|
92
|
+
* @param code - Stable machine-readable code, see {@link RacsError.code}.
|
|
93
|
+
*/
|
|
94
|
+
constructor(message: string, code: string);
|
|
95
|
+
/**
|
|
96
|
+
* Builds a {@link RacsError} with code `'ERR_INVALID_INPUT'`.
|
|
97
|
+
*
|
|
98
|
+
* Used for every caller-side contract violation: malformed segments, a segment carrying
|
|
99
|
+
* neither `content` nor `contentHash`, negative token counts, unknown TTL strings, and any
|
|
100
|
+
* other input the type system cannot reject for untyped JavaScript callers.
|
|
101
|
+
*
|
|
102
|
+
* @param message - Human-readable description of the invalid input.
|
|
103
|
+
* @returns A new error instance, never thrown by this factory itself.
|
|
104
|
+
*/
|
|
105
|
+
static invalid(message: string): RacsError;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Usage ledger of RACS (Remote Agent Context Store): aggregates normalized provider usage
|
|
110
|
+
* reports into per-prefix and ledger-wide hit ratios and USD savings.
|
|
111
|
+
*
|
|
112
|
+
* The ledger holds aggregates only, never prompt content, so its serialized form leaks
|
|
113
|
+
* nothing. It is synchronous, allocation-light, and bounded: at most `maxPrefixes` distinct
|
|
114
|
+
* prefixes are tracked, with least-recently-used eviction beyond that.
|
|
115
|
+
*
|
|
116
|
+
* @packageDocumentation
|
|
117
|
+
*/
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* One serialized prefix aggregate, see {@link LedgerJSON}.
|
|
121
|
+
*/
|
|
122
|
+
interface LedgerEntryJSON {
|
|
123
|
+
/** Aggregate key: the usage `prefixKey`, or `provider:model` for plan-less usage. */
|
|
124
|
+
readonly key: string;
|
|
125
|
+
/** Provider of the aggregated calls. */
|
|
126
|
+
readonly provider: ProviderId;
|
|
127
|
+
/** Model of the aggregated calls, the {@link PricingTable} lookup key. */
|
|
128
|
+
readonly model: string;
|
|
129
|
+
/** Number of usage records aggregated. */
|
|
130
|
+
readonly calls: number;
|
|
131
|
+
/** Total tokens served from cache. */
|
|
132
|
+
readonly readTokens: number;
|
|
133
|
+
/** Total tokens written to 5-minute-TTL caches. */
|
|
134
|
+
readonly write5mTokens: number;
|
|
135
|
+
/** Total tokens written to 1-hour-TTL caches. */
|
|
136
|
+
readonly write1hTokens: number;
|
|
137
|
+
/** Total input tokens that were neither read from nor written to cache. */
|
|
138
|
+
readonly uncachedTokens: number;
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Serialized ledger state, the shape produced by {@link Ledger.toJSON} and consumed by
|
|
142
|
+
* {@link Ledger.fromJSON}. Entries are ordered least-recently-used first, so a round trip
|
|
143
|
+
* preserves eviction order exactly. Pricing is configuration, not state, and is therefore
|
|
144
|
+
* re-supplied to `fromJSON` instead of being serialized.
|
|
145
|
+
*/
|
|
146
|
+
interface LedgerJSON {
|
|
147
|
+
/** The eviction cap the ledger was running with. */
|
|
148
|
+
readonly maxPrefixes: number;
|
|
149
|
+
/** Per-prefix aggregates, least-recently-used first. */
|
|
150
|
+
readonly entries: readonly LedgerEntryJSON[];
|
|
151
|
+
}
|
|
152
|
+
/**
|
|
153
|
+
* Bounded, LRU-evicting accumulator of {@link CacheUsage} records.
|
|
154
|
+
*
|
|
155
|
+
* Aggregation key: `usage.prefixKey` when the call executed a RACS plan, otherwise the
|
|
156
|
+
* synthetic `provider:model` pair, so plan-less calls still aggregate into ledger totals
|
|
157
|
+
* as {@link CacheUsage.prefixKey} documents.
|
|
158
|
+
*
|
|
159
|
+
* Hit-ratio definition: `readTokens / (readTokens + writeTokens + uncachedTokens)`, the
|
|
160
|
+
* share of all input-side token traffic that was served from cache. The denominator counts
|
|
161
|
+
* cached reads, cache writes of both TTL tiers, and uncached input, so a prefix that keeps
|
|
162
|
+
* paying write premiums without ever reading back scores zero, exactly the failure the
|
|
163
|
+
* ratio exists to expose. A zero denominator reports a ratio of zero. Uncached input
|
|
164
|
+
* derives from the all-in {@link CacheUsage.inputTokens} convention, see
|
|
165
|
+
* {@link Ledger.record}, so the denominator equals the all-in billed input when the
|
|
166
|
+
* source reports consistently.
|
|
167
|
+
*
|
|
168
|
+
* USD math, computed only for models the {@link PricingTable} covers:
|
|
169
|
+
* - `savedUsd = readTokens / 1e6 * (inputPerMTok - cacheReadPerMTok)`, what the cached
|
|
170
|
+
* reads would have cost at base input price minus what they actually cost. Requires
|
|
171
|
+
* `cacheReadPerMTok`, without it the model counts as not covered for savings.
|
|
172
|
+
* - `writeSpendUsd` is the write PREMIUM over base input price, not the full write bill:
|
|
173
|
+
* `write5mTokens / 1e6 * (cacheWrite5mPerMTok - inputPerMTok)` plus the 1-hour tier
|
|
174
|
+
* likewise. A mispriced table can make a tier premium negative, which would silently
|
|
175
|
+
* inflate savings, so each tier term is clamped at zero before summing.
|
|
176
|
+
* - `netUsd = savedUsd - writeSpendUsd`, negative when caching lost money.
|
|
177
|
+
*
|
|
178
|
+
* Ledger-wide USD totals sum every prefix whose model the pricing table covers, prefixes
|
|
179
|
+
* without pricing contribute only token statistics, and the USD fields are omitted
|
|
180
|
+
* entirely when no aggregated prefix is covered.
|
|
181
|
+
*/
|
|
182
|
+
declare class Ledger {
|
|
183
|
+
private readonly pricing;
|
|
184
|
+
private readonly maxPrefixes;
|
|
185
|
+
/** Map iteration order doubles as recency order: oldest first, see {@link Ledger.record}. */
|
|
186
|
+
private readonly aggregates;
|
|
187
|
+
/**
|
|
188
|
+
* @param pricing - Per-model price cards for USD figures, always user-supplied. Without
|
|
189
|
+
* it every token-denominated statistic is still reported, just no USD.
|
|
190
|
+
* @param maxPrefixes - Cap on distinct tracked prefixes before LRU eviction.
|
|
191
|
+
*/
|
|
192
|
+
constructor(pricing?: PricingTable, maxPrefixes?: number);
|
|
193
|
+
/**
|
|
194
|
+
* Ingests one normalized usage record into the aggregate for its prefix.
|
|
195
|
+
*
|
|
196
|
+
* Per call, `uncachedTokens` accumulates
|
|
197
|
+
* `max(0, inputTokens - cacheReadTokens - cacheWriteTokens5m - cacheWriteTokens1h)`:
|
|
198
|
+
* {@link CacheUsage.inputTokens} is the ALL-IN billed input including cached reads and
|
|
199
|
+
* cache writes of both tiers, so the uncached remainder subtracts all three. Clamped at
|
|
200
|
+
* zero because a source reporting more cached traffic than billed input is a reporting
|
|
201
|
+
* artifact that must not drive the aggregate negative.
|
|
202
|
+
*
|
|
203
|
+
* @param usage - The normalized usage report, see {@link CacheUsage}.
|
|
204
|
+
* @returns `hit` is true when the call read at least one cached token. `evicted` names
|
|
205
|
+
* the least-recently-used prefix key dropped to stay within `maxPrefixes`, present
|
|
206
|
+
* only when an eviction happened.
|
|
207
|
+
*/
|
|
208
|
+
record(usage: CacheUsage): {
|
|
209
|
+
hit: boolean;
|
|
210
|
+
evicted?: string;
|
|
211
|
+
};
|
|
212
|
+
/**
|
|
213
|
+
* Returns ledger-wide statistics with the per-prefix breakdown, optionally narrowed to
|
|
214
|
+
* one prefix key or one provider. The breakdown is sorted by prefix key ascending for
|
|
215
|
+
* stable, diffable output. USD presence rules are documented on {@link Ledger}.
|
|
216
|
+
*
|
|
217
|
+
* @param filter - Optional narrowing, both fields combine conjunctively when given.
|
|
218
|
+
*/
|
|
219
|
+
stats(filter?: {
|
|
220
|
+
prefixKey?: string;
|
|
221
|
+
provider?: ProviderId;
|
|
222
|
+
}): LedgerStats;
|
|
223
|
+
/**
|
|
224
|
+
* Serializes every aggregate, least-recently-used first. The result is pure JSON data,
|
|
225
|
+
* carries no prompt content, and round-trips through {@link Ledger.fromJSON}.
|
|
226
|
+
*/
|
|
227
|
+
toJSON(): LedgerJSON;
|
|
228
|
+
/**
|
|
229
|
+
* Rebuilds a ledger from {@link Ledger.toJSON} output, restoring aggregates and their
|
|
230
|
+
* recency order. Pricing is configuration, pass the current table, it is deliberately
|
|
231
|
+
* not part of the snapshot so stale prices never resurrect from persistence.
|
|
232
|
+
*
|
|
233
|
+
* @param json - A previously serialized ledger.
|
|
234
|
+
* @param pricing - The pricing table to compute USD figures with from now on.
|
|
235
|
+
*/
|
|
236
|
+
static fromJSON(json: LedgerJSON, pricing?: PricingTable): Ledger;
|
|
237
|
+
/** Computes one {@link PrefixStats} from a live aggregate, USD rules per {@link Ledger}. */
|
|
238
|
+
private prefixStats;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
/**
|
|
242
|
+
* The directive planner of RACS (Remote Agent Context Store).
|
|
243
|
+
*
|
|
244
|
+
* One pure class maps an analyzed prompt onto the cache-control surface of one provider
|
|
245
|
+
* family: explicit breakpoints, routing keys, server-side cache resources, or a reasoned
|
|
246
|
+
* no-op for passive providers. The planner is deterministic and stateless, it reads no
|
|
247
|
+
* clock and no random source, so identical inputs always yield identical directives. The
|
|
248
|
+
* engine core owns fingerprinting, drift tracking, refresh timing, and telemetry.
|
|
249
|
+
*
|
|
250
|
+
* @packageDocumentation
|
|
251
|
+
*/
|
|
252
|
+
|
|
253
|
+
/**
|
|
254
|
+
* Aggregates the analysis stage computed once, so the planner never recounts the prompt.
|
|
255
|
+
*/
|
|
256
|
+
interface PlanAnalysis {
|
|
257
|
+
/** Findings the lint pass produced, context the planner extends through extraFindings. */
|
|
258
|
+
findings: LintFinding[];
|
|
259
|
+
/** Token count of the cacheable stable prefix, exact or estimated per segment rules. */
|
|
260
|
+
stableTokens: number;
|
|
261
|
+
/** Token count of the whole prompt, same exact-or-estimated provenance. */
|
|
262
|
+
totalTokens: number;
|
|
263
|
+
/**
|
|
264
|
+
* Count of leading segments, in request order, forming the left-anchored cacheable
|
|
265
|
+
* prefix. Prefix caches reuse nothing past this boundary on any provider family.
|
|
266
|
+
*/
|
|
267
|
+
orderedStableBoundary: number;
|
|
268
|
+
}
|
|
269
|
+
/**
|
|
270
|
+
* The planner's contribution to one cache plan, merged by the engine core with the plan
|
|
271
|
+
* identity, the fingerprints, and the analysis findings.
|
|
272
|
+
*/
|
|
273
|
+
interface PlannerResult {
|
|
274
|
+
/** Provider-faithful instructions in application order. */
|
|
275
|
+
directives: CacheDirective[];
|
|
276
|
+
/** Cache economics, present when profile multipliers or pricing allow computing them. */
|
|
277
|
+
breakEven?: BreakEven;
|
|
278
|
+
/** One dense human-readable sentence per planning decision. */
|
|
279
|
+
reasoning: string;
|
|
280
|
+
/** Findings only the planning stage can detect, appended to the analysis findings. */
|
|
281
|
+
extraFindings: LintFinding[];
|
|
282
|
+
}
|
|
283
|
+
/**
|
|
284
|
+
* Maps one analyzed prompt to provider-faithful cache directives, per adapter family.
|
|
285
|
+
*
|
|
286
|
+
* Family semantics implemented here are research snapshots of June 2026, sources cited on
|
|
287
|
+
* the constants above, and every number flows from the {@link ProviderProfile}, so a
|
|
288
|
+
* profile override updates the planner without a release.
|
|
289
|
+
*/
|
|
290
|
+
declare class Planner {
|
|
291
|
+
/**
|
|
292
|
+
* Produces directives, break-even economics, and planner-stage findings for one input.
|
|
293
|
+
*
|
|
294
|
+
* @param input - The prompt being planned, segments in request order.
|
|
295
|
+
* @param profile - Effective provider profile, overrides already merged by the core.
|
|
296
|
+
* @param analysis - Aggregates the analysis stage computed, see {@link PlanAnalysis}.
|
|
297
|
+
* @param prefixKey - Deterministic key of the stable prefix, computed by the core.
|
|
298
|
+
* @param pricing - Optional model price card, refines multipliers and storage math.
|
|
299
|
+
* @param knownResource - Resource family only: whether the core already tracks a live
|
|
300
|
+
* cache resource for this prefix. First sight emits `'create'`, later sights `'reuse'`,
|
|
301
|
+
* and the core swaps `'reuse'` for `'refresh'` when its clock sits inside the last 10
|
|
302
|
+
* percent of the TTL window, the planner emits the shape, the core decides the timing.
|
|
303
|
+
*/
|
|
304
|
+
plan(input: PlanInput, profile: ProviderProfile, analysis: PlanAnalysis, prefixKey: string, pricing?: Pricing, knownResource?: boolean): PlannerResult;
|
|
305
|
+
private planFamily;
|
|
306
|
+
/**
|
|
307
|
+
* Breakpoint family (anthropic, bedrock, hermes, microsoft-foundry): explicit markers,
|
|
308
|
+
* one TTL tier chosen from the declared reuse. The deepest stable boundary is always
|
|
309
|
+
* marked because the last marker determines left-anchored coverage, remaining budget
|
|
310
|
+
* goes to the largest stable spans by role weight. Economics follow the refresh-on-use
|
|
311
|
+
* model documented on {@link breakpointBreakEven} inside the TTL window and the
|
|
312
|
+
* touch-cost model on {@link keepWarmBreakEven} beyond it; when neither sustains the
|
|
313
|
+
* cache the planner declines with a reasoned `'none'` plus a `'write-premium-trap'`
|
|
314
|
+
* finding, so an emitted breakpoint plan is never knowingly unprofitable.
|
|
315
|
+
*/
|
|
316
|
+
private planBreakpoint;
|
|
317
|
+
/**
|
|
318
|
+
* Routing-key family (openai, xai, mistral, moonshot, openrouter): the provider caches
|
|
319
|
+
* implicitly, the key only steers identical prefixes to the same cache shard, and the
|
|
320
|
+
* extended 24-hour retention tier rides along when supported and the reuse is sparse
|
|
321
|
+
* (OpenAI `prompt_cache_key` retention as of June 2026).
|
|
322
|
+
*/
|
|
323
|
+
private planRoutingKey;
|
|
324
|
+
/**
|
|
325
|
+
* Resource family (google): the cache is a server resource the host creates, reuses,
|
|
326
|
+
* refreshes, and deletes, billed per token-hour of storage while it stays alive.
|
|
327
|
+
*/
|
|
328
|
+
private planResource;
|
|
329
|
+
/**
|
|
330
|
+
* Passive family (groq, deepseek, ollama, lmstudio, huggingface, custom): no control
|
|
331
|
+
* surface exists, stable-first ordering and accounting are the whole contribution.
|
|
332
|
+
*/
|
|
333
|
+
private planPassive;
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
/**
|
|
337
|
+
* Deterministic structural linting of a {@link PlanInput} segment list, the documented
|
|
338
|
+
* failure modes of production prefix caching caught before a single token is billed.
|
|
339
|
+
*
|
|
340
|
+
* Provider semantics this module leans on, researched June 2026:
|
|
341
|
+
* - Prefix caches are strictly left-anchored on every provider family, so one volatile
|
|
342
|
+
* byte invalidates everything after it (Anthropic prompt caching docs, June 2026;
|
|
343
|
+
* OpenAI prompt caching guide, June 2026).
|
|
344
|
+
* - Breakpoint providers hash tool definitions ahead of the message list, so a volatile
|
|
345
|
+
* `'tools'` segment defeats the cache for the whole request (Anthropic `cache_control`
|
|
346
|
+
* semantics, June 2026).
|
|
347
|
+
* - Prefixes below the provider minimum are silently uncached, no error, no usage signal
|
|
348
|
+
* (1024 tokens on most Anthropic and OpenAI models as of June 2026); the minimum itself
|
|
349
|
+
* always comes from the {@link ProviderProfile}, never from constants here.
|
|
350
|
+
*
|
|
351
|
+
* Privacy contract: content heuristics run only on segments that carry `content`.
|
|
352
|
+
* Hash-only segments (only `contentHash` present) are skipped by design, RACS never sees
|
|
353
|
+
* their text so there is nothing to scan. Finding messages never embed matched substrings,
|
|
354
|
+
* because findings travel inside persisted plans while segment content must never be
|
|
355
|
+
* persisted; matches are referenced by a short content digest instead, which still lets
|
|
356
|
+
* the owner locate the offending text in their own prompt source.
|
|
357
|
+
*
|
|
358
|
+
* Determinism: pure function of the input and profile, no clock, no randomness, findings
|
|
359
|
+
* are emitted in a fixed order (structural lints, then per-segment scans in segment order,
|
|
360
|
+
* then the prefix-level summary).
|
|
361
|
+
*/
|
|
362
|
+
|
|
363
|
+
/** Result of one {@link PrefixAnalyzer.analyze} pass, pure data. */
|
|
364
|
+
interface PrefixAnalysis {
|
|
365
|
+
/** Lint findings in deterministic emission order. */
|
|
366
|
+
findings: LintFinding[];
|
|
367
|
+
/** Token total of the longest stable-or-semi run from the start, the cacheable prefix. */
|
|
368
|
+
stableTokens: number;
|
|
369
|
+
/** Token total of the whole prompt, exact or estimated per segment rules. */
|
|
370
|
+
totalTokens: number;
|
|
371
|
+
/** Index of the first volatile segment, `segments.length` when none is volatile. */
|
|
372
|
+
orderedStableBoundary: number;
|
|
373
|
+
}
|
|
374
|
+
/**
|
|
375
|
+
* Structural linter over a segment list and one provider profile. Stateless, every call
|
|
376
|
+
* is independent, safe to share one instance across plans.
|
|
377
|
+
*/
|
|
378
|
+
declare class PrefixAnalyzer {
|
|
379
|
+
/**
|
|
380
|
+
* Runs every structural lint and computes the prefix geometry the planner needs.
|
|
381
|
+
*
|
|
382
|
+
* @param input - The plan input whose segments are analyzed, in request order.
|
|
383
|
+
* @param profile - Effective provider profile, supplies `minCacheableTokens`.
|
|
384
|
+
* @returns Findings plus the cacheable-prefix token counts and the volatile boundary.
|
|
385
|
+
*/
|
|
386
|
+
analyze(input: PlanInput, profile: ProviderProfile): PrefixAnalysis;
|
|
387
|
+
/** `'segment-order'`: the first volatile segment that precedes a cacheable one. */
|
|
388
|
+
private lintSegmentOrder;
|
|
389
|
+
/**
|
|
390
|
+
* `'volatile-early'`: a volatile segment inside the first half of total tokens and
|
|
391
|
+
* before any breakpoint-eligible boundary, the silent-cache-killer layout.
|
|
392
|
+
*
|
|
393
|
+
* A boundary is breakpoint-eligible only inside the leading stable run (a span that
|
|
394
|
+
* contains volatile content can never be read back), so eligibility reduces to the
|
|
395
|
+
* leading run reaching the provider minimum. The first volatile segment is reported, it
|
|
396
|
+
* is the one that caps the run. Its start offset equals `stableTokens` by construction.
|
|
397
|
+
*/
|
|
398
|
+
private lintVolatileEarly;
|
|
399
|
+
/** Per-segment lints: declaration checks always, content heuristics only with content. */
|
|
400
|
+
private lintSegment;
|
|
401
|
+
/** `'timestamp-in-stable'`: timestamp-like content inside a stable or semi segment. */
|
|
402
|
+
private lintTimestamps;
|
|
403
|
+
/** `'identifier-in-stable'`: per-request identifier shapes inside a stable segment. */
|
|
404
|
+
private lintIdentifiers;
|
|
405
|
+
/** `'below-minimum'`: the stable prefix is silently uncacheable on this provider. */
|
|
406
|
+
private lintBelowMinimum;
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
/**
|
|
410
|
+
* Shipped provider profiles for RACS (Remote Agent Context Store), the numbers the planner
|
|
411
|
+
* reasons with.
|
|
412
|
+
*
|
|
413
|
+
* Every named provider is a thin parameterization of exactly one {@link AdapterFamily}, which
|
|
414
|
+
* is why one table covers the whole provider landscape without per-provider code paths. The
|
|
415
|
+
* values document provider semantics as researched in June 2026, each entry cites its source
|
|
416
|
+
* and retrieval date in JSDoc. Providers change terms faster than packages release, so every
|
|
417
|
+
* value is overridable per engine instance through {@link RACSOptions.profiles}, merged by
|
|
418
|
+
* {@link resolveProfile}.
|
|
419
|
+
*
|
|
420
|
+
* @packageDocumentation
|
|
421
|
+
*/
|
|
422
|
+
|
|
423
|
+
/**
|
|
424
|
+
* The shipped profile table, one entry per {@link ProviderId}, semantics as documented in
|
|
425
|
+
* June 2026. Treat as read-only defaults: the planner must always go through
|
|
426
|
+
* {@link resolveProfile} so per-engine overrides apply.
|
|
427
|
+
*/
|
|
428
|
+
declare const PROVIDER_PROFILES: Readonly<Record<ProviderId, ProviderProfile>>;
|
|
429
|
+
/**
|
|
430
|
+
* Returns the effective profile for one provider: the shipped table entry shallow-merged
|
|
431
|
+
* with the caller's per-engine override from {@link RACSOptions.profiles}.
|
|
432
|
+
*
|
|
433
|
+
* Merge rules:
|
|
434
|
+
* - Shallow: every override field replaces the shipped field wholesale, `ttls` included.
|
|
435
|
+
* - Override fields holding `undefined` at runtime (possible for untyped JavaScript
|
|
436
|
+
* callers) are ignored rather than clobbering shipped values.
|
|
437
|
+
* - `id` is not overridable, the result always names the requested provider.
|
|
438
|
+
*
|
|
439
|
+
* @param id - Provider whose profile to resolve.
|
|
440
|
+
* @param overrides - Per-provider override map, see {@link RACSOptions.profiles}.
|
|
441
|
+
* @returns The merged profile the planner actually uses.
|
|
442
|
+
* @throws RacsError code `'ERR_INVALID_INPUT'` when `id` names no shipped profile (only
|
|
443
|
+
* reachable from untyped callers) or when the merged `family` is not a known
|
|
444
|
+
* {@link AdapterFamily}.
|
|
445
|
+
*/
|
|
446
|
+
declare function resolveProfile(id: ProviderId, overrides?: RACSOptions['profiles']): ProviderProfile;
|
|
447
|
+
|
|
448
|
+
/**
|
|
449
|
+
* Key-value state backend of RACS (Remote Agent Context Store): one JSON string under one
|
|
450
|
+
* key in any structural {@link KvLike} store, the persistence shape for edge runtimes and
|
|
451
|
+
* multi-instance hosts that already run Redis, Upstash, or Cloudflare KV.
|
|
452
|
+
*
|
|
453
|
+
* RACS never constructs the client and never sees connection credentials, the host passes
|
|
454
|
+
* a ready object, per the product invariant.
|
|
455
|
+
*
|
|
456
|
+
* @packageDocumentation
|
|
457
|
+
*/
|
|
458
|
+
|
|
459
|
+
/**
|
|
460
|
+
* Creates a state backend persisting snapshots as one JSON string in a key-value store.
|
|
461
|
+
*
|
|
462
|
+
* Any client exposing string get, set, and delete wraps into {@link KvLike} in one line,
|
|
463
|
+
* no adapter package needed:
|
|
464
|
+
*
|
|
465
|
+
* @example
|
|
466
|
+
* ```ts
|
|
467
|
+
* // Redis (node-redis or ioredis):
|
|
468
|
+
* const state = kvState({
|
|
469
|
+
* get: (k) => redis.get(k),
|
|
470
|
+
* set: (k, v) => redis.set(k, v),
|
|
471
|
+
* delete: (k) => redis.del(k),
|
|
472
|
+
* });
|
|
473
|
+
*
|
|
474
|
+
* // Upstash Redis:
|
|
475
|
+
* const state = kvState({
|
|
476
|
+
* get: (k) => upstash.get<string>(k),
|
|
477
|
+
* set: (k, v) => upstash.set(k, v),
|
|
478
|
+
* delete: (k) => upstash.del(k),
|
|
479
|
+
* });
|
|
480
|
+
*
|
|
481
|
+
* // Cloudflare KV (a binding named RACS_KV):
|
|
482
|
+
* const state = kvState({
|
|
483
|
+
* get: (k) => env.RACS_KV.get(k),
|
|
484
|
+
* set: (k, v) => env.RACS_KV.put(k, v),
|
|
485
|
+
* delete: (k) => env.RACS_KV.delete(k),
|
|
486
|
+
* });
|
|
487
|
+
* ```
|
|
488
|
+
*
|
|
489
|
+
* Load tolerates both `null` and `undefined` from `get`, the two absence conventions in
|
|
490
|
+
* the wild, and returns `undefined` for either, the normal first-run case. A present but
|
|
491
|
+
* unparseable value throws RacsError `'ERR_STATE_LOAD'`, and a parseable value with the
|
|
492
|
+
* wrong snapshot version throws RacsError `'ERR_STATE_VERSION'`.
|
|
493
|
+
*
|
|
494
|
+
* @param kv - The ready client, see {@link KvLike}.
|
|
495
|
+
* @param key - Storage key for the snapshot, namespace it per engine when several engines
|
|
496
|
+
* share one store.
|
|
497
|
+
*/
|
|
498
|
+
declare function kvState(kv: KvLike, key?: string): StateBackend;
|
|
499
|
+
|
|
500
|
+
/**
|
|
501
|
+
* In-memory state backend of RACS (Remote Agent Context Store): the zero-config default
|
|
502
|
+
* persistence shape, a snapshot held in a closure variable. Nothing survives the process,
|
|
503
|
+
* which is exactly right for tests, demos, and hosts that persist elsewhere.
|
|
504
|
+
*
|
|
505
|
+
* @packageDocumentation
|
|
506
|
+
*/
|
|
507
|
+
|
|
508
|
+
/**
|
|
509
|
+
* Creates a state backend that keeps the latest snapshot in memory.
|
|
510
|
+
*
|
|
511
|
+
* Each call returns an independent backend with its own closure variable, two engines
|
|
512
|
+
* given two `memoryState()` results never see each other's snapshots. The snapshot object
|
|
513
|
+
* is stored by reference, callers must treat saved snapshots as immutable, which the
|
|
514
|
+
* {@link StateSnapshot} readonly contract already requires.
|
|
515
|
+
*
|
|
516
|
+
* @returns A {@link StateBackend} whose `load` resolves to the last saved snapshot, or
|
|
517
|
+
* `undefined` before the first save.
|
|
518
|
+
*/
|
|
519
|
+
declare function memoryState(): StateBackend;
|
|
520
|
+
|
|
521
|
+
/**
|
|
522
|
+
* Deterministic, non-cryptographic hashing primitives for RACS (Remote Agent Context
|
|
523
|
+
* Store): prefix keys, combined keys, and seeded short identifiers.
|
|
524
|
+
*
|
|
525
|
+
* Everything here is pure, allocation-light, dependency-free, and runs identically in
|
|
526
|
+
* browsers, edge runtimes, workers, and Node. No randomness, no clock, no platform globals.
|
|
527
|
+
*
|
|
528
|
+
* Security stance: FNV-1a is NOT a cryptographic hash. It is trivially invertible and
|
|
529
|
+
* collision-constructible by an adversary, so values produced by this module must never
|
|
530
|
+
* gate a security decision (authentication, authorization, integrity verification). They
|
|
531
|
+
* exist solely to give byte-equal inputs equal keys for cache bookkeeping.
|
|
532
|
+
*
|
|
533
|
+
* @packageDocumentation
|
|
534
|
+
*/
|
|
535
|
+
/**
|
|
536
|
+
* Hashes `text` with FNV-1a 64-bit over its UTF-16 code units (two bytes per code unit,
|
|
537
|
+
* low byte first) and returns the digest as a fixed-width 16-character lowercase hex
|
|
538
|
+
* string.
|
|
539
|
+
*
|
|
540
|
+
* Determinism: pure function of the input, identical across runtimes and runs.
|
|
541
|
+
*
|
|
542
|
+
* Collision odds, non-adversarial inputs: with a 64-bit digest the birthday bound puts the
|
|
543
|
+
* probability of any collision among n distinct inputs at roughly n^2 / 2^65, about 1 in
|
|
544
|
+
* 37 million for one million distinct prefixes, reaching 50 percent only near 5 billion
|
|
545
|
+
* inputs. Acceptable for cache-key bookkeeping, where a collision costs at worst one
|
|
546
|
+
* misattributed statistic, never a wrong answer to the host.
|
|
547
|
+
*
|
|
548
|
+
* Non-cryptographic: an adversary can construct collisions at will. Never use the result
|
|
549
|
+
* for security decisions, see the module-level security stance.
|
|
550
|
+
*
|
|
551
|
+
* @param text - The string to hash, hashed as-is with no normalization.
|
|
552
|
+
* @returns 16 lowercase hex characters, zero-padded, for example '0a1b2c3d4e5f6789'.
|
|
553
|
+
*/
|
|
554
|
+
declare function fnv1a64(text: string): string;
|
|
555
|
+
/**
|
|
556
|
+
* Derives one key from several parts by joining them with a separator that cannot appear
|
|
557
|
+
* in hex output (U+001F) and hashing the joined string with {@link fnv1a64}.
|
|
558
|
+
*
|
|
559
|
+
* The separator guarantees that part boundaries contribute to the digest, so
|
|
560
|
+
* `combineKeys(['ab', 'c'])` and `combineKeys(['a', 'bc'])` differ even though their
|
|
561
|
+
* concatenations are equal. Used to fuse segment hashes, provider, model, and agent
|
|
562
|
+
* identity into one prefix key.
|
|
563
|
+
*
|
|
564
|
+
* Same non-cryptographic caveats and collision odds as {@link fnv1a64}.
|
|
565
|
+
*
|
|
566
|
+
* @param parts - Key components in significance order, empty parts are preserved.
|
|
567
|
+
* @returns 16 lowercase hex characters identifying the part sequence.
|
|
568
|
+
*/
|
|
569
|
+
declare function combineKeys(parts: readonly string[]): string;
|
|
570
|
+
|
|
571
|
+
/**
|
|
572
|
+
* Token estimation helpers for RACS (Remote Agent Context Store).
|
|
573
|
+
*
|
|
574
|
+
* Estimates exist to gate minimum-token planning decisions (is this prefix long enough to
|
|
575
|
+
* cache at all, does the write premium pay back), never to bill anyone. Exact counts come
|
|
576
|
+
* from provider usage reports recorded after the fact, and callers with a real tokenizer
|
|
577
|
+
* should pass explicit token counts, which always win, see {@link tokensOf}.
|
|
578
|
+
*
|
|
579
|
+
* Pure module: no dependencies, no randomness, no clock, runs everywhere.
|
|
580
|
+
*
|
|
581
|
+
* @packageDocumentation
|
|
582
|
+
*/
|
|
583
|
+
/**
|
|
584
|
+
* Estimates the token count of `content` as the ceiling of its UTF-16 length divided by
|
|
585
|
+
* 4, the industry characters-per-token heuristic (see {@link CHARS_PER_TOKEN}).
|
|
586
|
+
*
|
|
587
|
+
* Accuracy contract: this is a planning estimate, not a measurement. It is typically
|
|
588
|
+
* within tens of percent for English prose and can be off by more for CJK text or dense
|
|
589
|
+
* code. Exact counts come from provider usage reports; estimates only gate minimum-token
|
|
590
|
+
* planning such as the `'below-minimum'` lint and break-even math. Pass exact counts from
|
|
591
|
+
* a provider tokenizer whenever precision matters.
|
|
592
|
+
*
|
|
593
|
+
* @param content - The text to estimate, measured by UTF-16 length, no normalization.
|
|
594
|
+
* @returns Non-negative integer estimate, 0 for the empty string.
|
|
595
|
+
*/
|
|
596
|
+
declare function estimateTokens(content: string): number;
|
|
597
|
+
|
|
598
|
+
export { BreakEven, CacheDirective, CacheUsage, KvLike, Ledger, type LedgerEntryJSON, type LedgerJSON, LedgerStats, LintFinding, PROVIDER_PROFILES, type PlanAnalysis, PlanInput, Planner, type PlannerResult, type PrefixAnalysis, PrefixAnalyzer, Pricing, PricingTable, ProviderId, ProviderProfile, RACS, RACSOptions, RacsError, StateBackend, combineKeys, createRACS, estimateTokens, fnv1a64, kvState, memoryState, resolveProfile };
|