@takk/racs 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/CHANGELOG.md +70 -0
  2. package/LICENSE +190 -0
  3. package/NOTICE +40 -0
  4. package/README.md +381 -0
  5. package/SECURITY.md +57 -0
  6. package/dist/cli/index.js +3016 -0
  7. package/dist/cli/index.js.map +1 -0
  8. package/dist/edge/index.cjs +2000 -0
  9. package/dist/edge/index.cjs.map +1 -0
  10. package/dist/edge/index.d.cts +598 -0
  11. package/dist/edge/index.d.ts +598 -0
  12. package/dist/edge/index.js +1987 -0
  13. package/dist/edge/index.js.map +1 -0
  14. package/dist/index.cjs +2071 -0
  15. package/dist/index.cjs.map +1 -0
  16. package/dist/index.d.cts +39 -0
  17. package/dist/index.d.ts +39 -0
  18. package/dist/index.js +2057 -0
  19. package/dist/index.js.map +1 -0
  20. package/dist/integrations/index.cjs +123 -0
  21. package/dist/integrations/index.cjs.map +1 -0
  22. package/dist/integrations/index.d.cts +285 -0
  23. package/dist/integrations/index.d.ts +285 -0
  24. package/dist/integrations/index.js +117 -0
  25. package/dist/integrations/index.js.map +1 -0
  26. package/dist/otel/index.cjs +93 -0
  27. package/dist/otel/index.cjs.map +1 -0
  28. package/dist/otel/index.d.cts +105 -0
  29. package/dist/otel/index.d.ts +105 -0
  30. package/dist/otel/index.js +91 -0
  31. package/dist/otel/index.js.map +1 -0
  32. package/dist/types-DQ7-9sk3.d.cts +758 -0
  33. package/dist/types-DQ7-9sk3.d.ts +758 -0
  34. package/dist/vercel/index.cjs +209 -0
  35. package/dist/vercel/index.cjs.map +1 -0
  36. package/dist/vercel/index.d.cts +210 -0
  37. package/dist/vercel/index.d.ts +210 -0
  38. package/dist/vercel/index.js +206 -0
  39. package/dist/vercel/index.js.map +1 -0
  40. package/dist/web/index.cjs +2000 -0
  41. package/dist/web/index.cjs.map +1 -0
  42. package/dist/web/index.d.cts +2 -0
  43. package/dist/web/index.d.ts +2 -0
  44. package/dist/web/index.js +1987 -0
  45. package/dist/web/index.js.map +1 -0
  46. package/package.json +189 -0
package/dist/index.js ADDED
@@ -0,0 +1,2057 @@
1
+ // src/drift/Fingerprints.ts
2
+ var Fingerprints = class _Fingerprints {
3
+ capacity;
4
+ /** Map iteration order doubles as recency order, oldest lineage first. */
5
+ records = /* @__PURE__ */ new Map();
6
+ /**
7
+ * @param capacity - Cap on distinct tracked lineages before LRU eviction.
8
+ */
9
+ constructor(capacity = 1e3) {
10
+ this.capacity = capacity;
11
+ }
12
+ /**
13
+ * Records the fingerprint of one plan and compares it with the previous plan of the same
14
+ * lineage.
15
+ *
16
+ * @param input - The plan input, provides the lineage key and segment stabilities.
17
+ * @param prefixKey - Deterministic prefix key the plan produced.
18
+ * @param segmentHashes - Segment id to content hash, as the planner derived them.
19
+ * @param totalTokens - Token count of the stable prefix behind `prefixKey`. Stored, and
20
+ * reported as `invalidatedTokens` if a later plan drifts to a different key, because
21
+ * those are exactly the previously cached tokens the drift killed.
22
+ * @param now - Milliseconds since the Unix epoch, from the injected engine clock.
23
+ * @returns `undefined` on the first observation of a lineage and when nothing relevant
24
+ * changed. Otherwise a {@link DriftReport} naming the changed, added, or removed
25
+ * stable and semi segments, with `invalidatedTokens` equal to the previous stable
26
+ * prefix size when the prefix key changed, and zero when the key survived.
27
+ */
28
+ observe(input, prefixKey, segmentHashes, totalTokens, now) {
29
+ const key = input.agentId ?? `${input.provider}:${input.model}`;
30
+ const stabilityOf = /* @__PURE__ */ new Map();
31
+ for (const segment of input.segments) {
32
+ stabilityOf.set(segment.id, segment.stability);
33
+ }
34
+ const next = /* @__PURE__ */ new Map();
35
+ for (const [id, hash] of segmentHashes) {
36
+ next.set(id, { hash, stability: stabilityOf.get(id) ?? "volatile" });
37
+ }
38
+ const previous = this.records.get(key);
39
+ this.records.delete(key);
40
+ this.records.set(key, { prefixKey, tokens: totalTokens, segments: next });
41
+ if (this.records.size > this.capacity) {
42
+ const oldest = this.records.keys().next();
43
+ if (!oldest.done) {
44
+ this.records.delete(oldest.value);
45
+ }
46
+ }
47
+ if (previous === void 0) {
48
+ return void 0;
49
+ }
50
+ const changed = /* @__PURE__ */ new Set();
51
+ for (const [id, fingerprint] of next) {
52
+ if (fingerprint.stability === "volatile") {
53
+ continue;
54
+ }
55
+ const before = previous.segments.get(id);
56
+ if (before === void 0 || before.hash !== fingerprint.hash) {
57
+ changed.add(id);
58
+ }
59
+ }
60
+ for (const [id, fingerprint] of previous.segments) {
61
+ if (!next.has(id) && fingerprint.stability !== "volatile") {
62
+ changed.add(id);
63
+ }
64
+ }
65
+ if (changed.size === 0 && prefixKey === previous.prefixKey) {
66
+ return void 0;
67
+ }
68
+ return {
69
+ ...input.agentId !== void 0 ? { agentId: input.agentId } : {},
70
+ prefixKey,
71
+ previousKey: previous.prefixKey,
72
+ changedSegmentIds: [...changed].sort(),
73
+ invalidatedTokens: prefixKey === previous.prefixKey ? 0 : previous.tokens,
74
+ timestamp: now
75
+ };
76
+ }
77
+ /**
78
+ * Removes every lineage record whose latest prefix key satisfies `predicate` and returns
79
+ * the distinct prefix keys removed. Engine-level invalidation calls this when the host
80
+ * clears prefixes, for example on credential rotation: a removed lineage simply restarts
81
+ * drift tracking at its next plan, and first observations never report drift.
82
+ */
83
+ prune(predicate) {
84
+ const removed = /* @__PURE__ */ new Set();
85
+ for (const [key, record] of [...this.records]) {
86
+ if (predicate(record.prefixKey)) {
87
+ this.records.delete(key);
88
+ removed.add(record.prefixKey);
89
+ }
90
+ }
91
+ return [...removed];
92
+ }
93
+ /**
94
+ * Serializes every lineage record, least-recently-observed first. Pure JSON data, no
95
+ * prompt content, round-trips through {@link Fingerprints.fromJSON}.
96
+ */
97
+ toJSON() {
98
+ const entries = [];
99
+ for (const [key, record] of this.records) {
100
+ const segments = [];
101
+ for (const [id, fingerprint] of record.segments) {
102
+ segments.push({ id, hash: fingerprint.hash, stability: fingerprint.stability });
103
+ }
104
+ entries.push({ key, prefixKey: record.prefixKey, tokens: record.tokens, segments });
105
+ }
106
+ return { capacity: this.capacity, entries };
107
+ }
108
+ /**
109
+ * Rebuilds a fingerprint store from {@link Fingerprints.toJSON} output, restoring
110
+ * lineage records, their segment maps, and their recency order.
111
+ *
112
+ * @param json - A previously serialized store.
113
+ */
114
+ static fromJSON(json) {
115
+ const store = new _Fingerprints(json.capacity);
116
+ for (const entry of json.entries) {
117
+ const segments = /* @__PURE__ */ new Map();
118
+ for (const segment of entry.segments) {
119
+ segments.set(segment.id, { hash: segment.hash, stability: segment.stability });
120
+ }
121
+ store.records.set(entry.key, {
122
+ prefixKey: entry.prefixKey,
123
+ tokens: entry.tokens,
124
+ segments
125
+ });
126
+ }
127
+ return store;
128
+ }
129
+ };
130
+
131
+ // src/errors.ts
132
+ var RacsError = class _RacsError extends Error {
133
+ /**
134
+ * Stable machine-readable error code, for example `'ERR_INVALID_INPUT'`.
135
+ *
136
+ * Branch on this field, never on `message`. The code space is minor-extensible.
137
+ */
138
+ code;
139
+ /**
140
+ * @param message - Human-readable description of what went wrong and how to fix it.
141
+ * @param code - Stable machine-readable code, see {@link RacsError.code}.
142
+ */
143
+ constructor(message, code) {
144
+ super(message);
145
+ this.name = "RacsError";
146
+ this.code = code;
147
+ }
148
+ /**
149
+ * Builds a {@link RacsError} with code `'ERR_INVALID_INPUT'`.
150
+ *
151
+ * Used for every caller-side contract violation: malformed segments, a segment carrying
152
+ * neither `content` nor `contentHash`, negative token counts, unknown TTL strings, and any
153
+ * other input the type system cannot reject for untyped JavaScript callers.
154
+ *
155
+ * @param message - Human-readable description of the invalid input.
156
+ * @returns A new error instance, never thrown by this factory itself.
157
+ */
158
+ static invalid(message) {
159
+ return new _RacsError(message, "ERR_INVALID_INPUT");
160
+ }
161
+ };
162
+
163
+ // src/ledger/Ledger.ts
164
+ var Ledger = class _Ledger {
165
+ pricing;
166
+ maxPrefixes;
167
+ /** Map iteration order doubles as recency order: oldest first, see {@link Ledger.record}. */
168
+ aggregates = /* @__PURE__ */ new Map();
169
+ /**
170
+ * @param pricing - Per-model price cards for USD figures, always user-supplied. Without
171
+ * it every token-denominated statistic is still reported, just no USD.
172
+ * @param maxPrefixes - Cap on distinct tracked prefixes before LRU eviction.
173
+ */
174
+ constructor(pricing, maxPrefixes = 1e3) {
175
+ this.pricing = pricing;
176
+ this.maxPrefixes = maxPrefixes;
177
+ }
178
+ /**
179
+ * Ingests one normalized usage record into the aggregate for its prefix.
180
+ *
181
+ * Per call, `uncachedTokens` accumulates
182
+ * `max(0, inputTokens - cacheReadTokens - cacheWriteTokens5m - cacheWriteTokens1h)`:
183
+ * {@link CacheUsage.inputTokens} is the ALL-IN billed input including cached reads and
184
+ * cache writes of both tiers, so the uncached remainder subtracts all three. Clamped at
185
+ * zero because a source reporting more cached traffic than billed input is a reporting
186
+ * artifact that must not drive the aggregate negative.
187
+ *
188
+ * @param usage - The normalized usage report, see {@link CacheUsage}.
189
+ * @returns `hit` is true when the call read at least one cached token. `evicted` names
190
+ * the least-recently-used prefix key dropped to stay within `maxPrefixes`, present
191
+ * only when an eviction happened.
192
+ */
193
+ record(usage) {
194
+ const key = usage.prefixKey ?? `${usage.provider}:${usage.model}`;
195
+ const aggregate = this.aggregates.get(key) ?? {
196
+ provider: usage.provider,
197
+ model: usage.model,
198
+ calls: 0,
199
+ readTokens: 0,
200
+ write5mTokens: 0,
201
+ write1hTokens: 0,
202
+ uncachedTokens: 0
203
+ };
204
+ this.aggregates.delete(key);
205
+ this.aggregates.set(key, aggregate);
206
+ aggregate.calls += 1;
207
+ aggregate.readTokens += usage.cacheReadTokens;
208
+ aggregate.write5mTokens += usage.cacheWriteTokens5m ?? 0;
209
+ aggregate.write1hTokens += usage.cacheWriteTokens1h ?? 0;
210
+ aggregate.uncachedTokens += Math.max(
211
+ 0,
212
+ usage.inputTokens - usage.cacheReadTokens - (usage.cacheWriteTokens5m ?? 0) - (usage.cacheWriteTokens1h ?? 0)
213
+ );
214
+ aggregate.provider = usage.provider;
215
+ aggregate.model = usage.model;
216
+ const hit = usage.cacheReadTokens > 0;
217
+ let evicted;
218
+ if (this.aggregates.size > this.maxPrefixes) {
219
+ const oldest = this.aggregates.keys().next();
220
+ if (!oldest.done) {
221
+ evicted = oldest.value;
222
+ this.aggregates.delete(oldest.value);
223
+ }
224
+ }
225
+ return evicted === void 0 ? { hit } : { hit, evicted };
226
+ }
227
+ /**
228
+ * Returns ledger-wide statistics with the per-prefix breakdown, optionally narrowed to
229
+ * one prefix key or one provider. The breakdown is sorted by prefix key ascending for
230
+ * stable, diffable output. USD presence rules are documented on {@link Ledger}.
231
+ *
232
+ * @param filter - Optional narrowing, both fields combine conjunctively when given.
233
+ */
234
+ stats(filter) {
235
+ const prefixes = [];
236
+ let calls = 0;
237
+ let readTokens = 0;
238
+ let writeTokens = 0;
239
+ let uncachedTokens = 0;
240
+ let savedUsd = 0;
241
+ let writeSpendUsd = 0;
242
+ let priced = false;
243
+ for (const [key, aggregate] of this.aggregates) {
244
+ if (filter?.prefixKey !== void 0 && key !== filter.prefixKey) {
245
+ continue;
246
+ }
247
+ if (filter?.provider !== void 0 && aggregate.provider !== filter.provider) {
248
+ continue;
249
+ }
250
+ const stat = this.prefixStats(key, aggregate);
251
+ prefixes.push(stat);
252
+ calls += aggregate.calls;
253
+ readTokens += aggregate.readTokens;
254
+ writeTokens += aggregate.write5mTokens + aggregate.write1hTokens;
255
+ uncachedTokens += aggregate.uncachedTokens;
256
+ if (stat.savedUsd !== void 0) {
257
+ savedUsd += stat.savedUsd;
258
+ priced = true;
259
+ }
260
+ if (stat.writeSpendUsd !== void 0) {
261
+ writeSpendUsd += stat.writeSpendUsd;
262
+ priced = true;
263
+ }
264
+ }
265
+ prefixes.sort((a, b) => a.prefixKey < b.prefixKey ? -1 : a.prefixKey > b.prefixKey ? 1 : 0);
266
+ const denominator = readTokens + writeTokens + uncachedTokens;
267
+ return {
268
+ calls,
269
+ hitRatio: denominator === 0 ? 0 : readTokens / denominator,
270
+ readTokens,
271
+ writeTokens,
272
+ uncachedTokens,
273
+ ...priced ? { savedUsd, netUsd: savedUsd - writeSpendUsd } : {},
274
+ prefixes
275
+ };
276
+ }
277
+ /**
278
+ * Serializes every aggregate, least-recently-used first. The result is pure JSON data,
279
+ * carries no prompt content, and round-trips through {@link Ledger.fromJSON}.
280
+ */
281
+ toJSON() {
282
+ const entries = [];
283
+ for (const [key, aggregate] of this.aggregates) {
284
+ entries.push({
285
+ key,
286
+ provider: aggregate.provider,
287
+ model: aggregate.model,
288
+ calls: aggregate.calls,
289
+ readTokens: aggregate.readTokens,
290
+ write5mTokens: aggregate.write5mTokens,
291
+ write1hTokens: aggregate.write1hTokens,
292
+ uncachedTokens: aggregate.uncachedTokens
293
+ });
294
+ }
295
+ return { maxPrefixes: this.maxPrefixes, entries };
296
+ }
297
+ /**
298
+ * Rebuilds a ledger from {@link Ledger.toJSON} output, restoring aggregates and their
299
+ * recency order. Pricing is configuration, pass the current table, it is deliberately
300
+ * not part of the snapshot so stale prices never resurrect from persistence.
301
+ *
302
+ * @param json - A previously serialized ledger.
303
+ * @param pricing - The pricing table to compute USD figures with from now on.
304
+ */
305
+ static fromJSON(json, pricing) {
306
+ const ledger = new _Ledger(pricing, json.maxPrefixes);
307
+ for (const entry of json.entries) {
308
+ ledger.aggregates.set(entry.key, {
309
+ provider: entry.provider,
310
+ model: entry.model,
311
+ calls: entry.calls,
312
+ readTokens: entry.readTokens,
313
+ write5mTokens: entry.write5mTokens,
314
+ write1hTokens: entry.write1hTokens,
315
+ uncachedTokens: entry.uncachedTokens
316
+ });
317
+ }
318
+ return ledger;
319
+ }
320
+ /** Computes one {@link PrefixStats} from a live aggregate, USD rules per {@link Ledger}. */
321
+ prefixStats(key, aggregate) {
322
+ const writeTokens = aggregate.write5mTokens + aggregate.write1hTokens;
323
+ const denominator = aggregate.readTokens + writeTokens + aggregate.uncachedTokens;
324
+ const hitRatio = denominator === 0 ? 0 : aggregate.readTokens / denominator;
325
+ const price = this.pricing?.[aggregate.model];
326
+ let savedUsd;
327
+ let writeSpendUsd;
328
+ if (price !== void 0) {
329
+ if (price.cacheReadPerMTok !== void 0) {
330
+ savedUsd = aggregate.readTokens / 1e6 * (price.inputPerMTok - price.cacheReadPerMTok);
331
+ }
332
+ const premium5m = price.cacheWrite5mPerMTok === void 0 ? 0 : Math.max(
333
+ 0,
334
+ aggregate.write5mTokens / 1e6 * (price.cacheWrite5mPerMTok - price.inputPerMTok)
335
+ );
336
+ const premium1h = price.cacheWrite1hPerMTok === void 0 ? 0 : Math.max(
337
+ 0,
338
+ aggregate.write1hTokens / 1e6 * (price.cacheWrite1hPerMTok - price.inputPerMTok)
339
+ );
340
+ writeSpendUsd = premium5m + premium1h;
341
+ }
342
+ return {
343
+ prefixKey: key,
344
+ calls: aggregate.calls,
345
+ hitRatio,
346
+ readTokens: aggregate.readTokens,
347
+ writeTokens,
348
+ uncachedTokens: aggregate.uncachedTokens,
349
+ ...savedUsd !== void 0 ? { savedUsd } : {},
350
+ ...writeSpendUsd !== void 0 ? { writeSpendUsd } : {}
351
+ };
352
+ }
353
+ };
354
+
355
+ // src/stats/hash.ts
356
+ var FNV_OFFSET_BASIS = 0xcbf29ce484222325n;
357
+ var FNV_PRIME = 0x100000001b3n;
358
+ var MASK_64 = 0xffffffffffffffffn;
359
+ var KEY_SEPARATOR = "";
360
+ function fnv1a64Value(text) {
361
+ let hash = FNV_OFFSET_BASIS;
362
+ for (let index = 0; index < text.length; index += 1) {
363
+ const unit = text.charCodeAt(index);
364
+ hash ^= BigInt(unit & 255);
365
+ hash = hash * FNV_PRIME & MASK_64;
366
+ hash ^= BigInt(unit >>> 8);
367
+ hash = hash * FNV_PRIME & MASK_64;
368
+ }
369
+ return hash;
370
+ }
371
+ function fnv1a64(text) {
372
+ return fnv1a64Value(text).toString(16).padStart(16, "0");
373
+ }
374
+ function combineKeys(parts) {
375
+ return fnv1a64(parts.join(KEY_SEPARATOR));
376
+ }
377
+ function shortId(seededCounter, salt) {
378
+ return fnv1a64Value(`${salt}${KEY_SEPARATOR}${String(seededCounter)}`).toString(36);
379
+ }
380
+
381
+ // src/stats/tokens.ts
382
+ var CHARS_PER_TOKEN = 4;
383
+ function estimateTokens(content) {
384
+ return Math.ceil(content.length / CHARS_PER_TOKEN);
385
+ }
386
+ function tokensOf(segment) {
387
+ if (typeof segment.tokens === "number") {
388
+ return segment.tokens;
389
+ }
390
+ if (typeof segment.content === "string") {
391
+ return estimateTokens(segment.content);
392
+ }
393
+ return 0;
394
+ }
395
+
396
+ // src/plan/Planner.ts
397
+ var ROLE_WEIGHT = {
398
+ tools: 4,
399
+ system: 3,
400
+ documents: 2,
401
+ history: 1,
402
+ dynamic: 0
403
+ };
404
+ var TTL_SECONDS = { "5m": 300, "1h": 3600 };
405
+ var FIVE_MINUTE_INTERVAL_CEILING = 240;
406
+ var ONE_HOUR_INTERVAL_CEILING = 3300;
407
+ var RESOURCE_TTL_MIN_SECONDS = 300;
408
+ var RESOURCE_TTL_DEFAULT_SECONDS = 3600;
409
+ var formatCount = (value) => Number.isInteger(value) ? value.toString() : value.toFixed(2);
410
+ var formatUsd = (value) => `${value.toPrecision(3)} USD`;
411
+ var intervalSecondsOf = (input) => {
412
+ const reuse = input.reuse;
413
+ if (reuse === void 0) {
414
+ return void 0;
415
+ }
416
+ if (reuse.intervalSeconds !== void 0) {
417
+ return reuse.intervalSeconds;
418
+ }
419
+ if (reuse.callsPerHour !== void 0 && reuse.callsPerHour > 0) {
420
+ return 3600 / reuse.callsPerHour;
421
+ }
422
+ return void 0;
423
+ };
424
+ var callsPerHourOf = (input) => {
425
+ const reuse = input.reuse;
426
+ if (reuse === void 0) {
427
+ return void 0;
428
+ }
429
+ if (reuse.intervalSeconds !== void 0 && reuse.intervalSeconds > 0) {
430
+ return 3600 / reuse.intervalSeconds;
431
+ }
432
+ if (reuse.callsPerHour !== void 0) {
433
+ return reuse.callsPerHour;
434
+ }
435
+ return void 0;
436
+ };
437
+ var writeMultiplierFor = (ttl, profile, pricing) => {
438
+ const fromProfile = ttl === "1h" ? profile.writeMultiplier1h : profile.writeMultiplier5m;
439
+ if (fromProfile !== void 0) {
440
+ return fromProfile;
441
+ }
442
+ if (pricing !== void 0 && pricing.inputPerMTok > 0) {
443
+ const writePrice = ttl === "1h" ? pricing.cacheWrite1hPerMTok : pricing.cacheWrite5mPerMTok;
444
+ if (writePrice !== void 0) {
445
+ return writePrice / pricing.inputPerMTok;
446
+ }
447
+ }
448
+ return void 0;
449
+ };
450
+ var readMultiplierFor = (profile, pricing) => {
451
+ if (profile.readMultiplier !== void 0) {
452
+ return profile.readMultiplier;
453
+ }
454
+ if (pricing !== void 0 && pricing.inputPerMTok > 0 && pricing.cacheReadPerMTok !== void 0) {
455
+ return pricing.cacheReadPerMTok / pricing.inputPerMTok;
456
+ }
457
+ return void 0;
458
+ };
459
+ var stableSpansOf = (segments, boundary) => {
460
+ const spans = [];
461
+ let open;
462
+ const limit = Math.min(boundary, segments.length);
463
+ for (let index = 0; index < limit; index += 1) {
464
+ const segment = segments[index];
465
+ if (segment === void 0 || segment.stability === "volatile") {
466
+ break;
467
+ }
468
+ const tokens = tokensOf(segment);
469
+ if (open !== void 0 && open.role === segment.role) {
470
+ open = {
471
+ role: open.role,
472
+ tokens: open.tokens + tokens,
473
+ endSegmentId: segment.id,
474
+ endIndex: index
475
+ };
476
+ } else {
477
+ if (open !== void 0) {
478
+ spans.push(open);
479
+ }
480
+ open = { role: segment.role, tokens, endSegmentId: segment.id, endIndex: index };
481
+ }
482
+ }
483
+ if (open !== void 0) {
484
+ spans.push(open);
485
+ }
486
+ return spans;
487
+ };
488
+ var breakpointBreakEven = (coveredTokens, ttl, callsPerHour, profile, pricing) => {
489
+ const writeMultiplier = writeMultiplierFor(ttl, profile, pricing);
490
+ const readMultiplier = readMultiplierFor(profile, pricing);
491
+ if (writeMultiplier === void 0 || readMultiplier === void 0) {
492
+ return void 0;
493
+ }
494
+ const writePremiumTokens = Math.max(0, coveredTokens * (writeMultiplier - 1));
495
+ const savingsPerReuse = coveredTokens * (1 - readMultiplier);
496
+ const minReusesToProfit = savingsPerReuse > 0 ? Math.ceil(writePremiumTokens / savingsPerReuse) : Number.POSITIVE_INFINITY;
497
+ if (callsPerHour === void 0) {
498
+ const profitable2 = 1 >= minReusesToProfit;
499
+ const reasoning2 = `The ${ttl} write multiplier ${formatCount(writeMultiplier)} prices the premium at ${formatCount(writePremiumTokens)} base-token equivalents, each reuse at read multiplier ${formatCount(readMultiplier)} recovers ${formatCount(savingsPerReuse)}, so ${formatCount(minReusesToProfit)} reuse(s) repay the write against the single assumed reuse (no reuse pattern was declared; multipliers are price-relative, the math holds with or without a pricing table), ${profitable2 ? "profitable" : "not profitable"}.`;
500
+ return { writePremiumTokens, minReusesToProfit, profitable: profitable2, reasoning: reasoning2 };
501
+ }
502
+ const expectedReuses = Math.max(1, Math.round(callsPerHour));
503
+ const profitable = Number.isFinite(minReusesToProfit);
504
+ const reasoning = `The ${ttl} write multiplier ${formatCount(writeMultiplier)} prices the premium at ${formatCount(writePremiumTokens)} base-token equivalents, each reuse at read multiplier ${formatCount(readMultiplier)} recovers ${formatCount(savingsPerReuse)}, so ${formatCount(minReusesToProfit)} reuse(s) repay the single write; reads refresh the ${ttl} window at no extra cost (refresh-on-use), steady declared reuse keeps the entry alive indefinitely at about ${formatCount(expectedReuses)} reuse(s) per hour (multipliers are price-relative, the math holds with or without a pricing table), ${profitable ? "profitable" : "not profitable"}.`;
505
+ return { writePremiumTokens, minReusesToProfit, profitable, reasoning };
506
+ };
507
+ var keepWarmBreakEven = (coveredTokens, ttl, callsPerHour, refreshesPerHour, readMultiplier, profile, pricing) => {
508
+ const writeMultiplier = writeMultiplierFor(ttl, profile, pricing);
509
+ if (writeMultiplier === void 0 || callsPerHour <= 0) {
510
+ return void 0;
511
+ }
512
+ const writePremiumTokens = Math.max(0, coveredTokens * (writeMultiplier - 1));
513
+ const savingsPerReuse = coveredTokens * (1 - readMultiplier);
514
+ const touchCostPerReuse = refreshesPerHour / callsPerHour * coveredTokens * readMultiplier;
515
+ const netPerReuse = savingsPerReuse - touchCostPerReuse;
516
+ const profitable = netPerReuse > 0;
517
+ const minReusesToProfit = profitable ? Math.ceil(writePremiumTokens / netPerReuse) : Number.POSITIVE_INFINITY;
518
+ const reasoning = `Keep-warm economics: each reuse recovers ${formatCount(savingsPerReuse)} base-token equivalents and carries ${formatCount(touchCostPerReuse)} of scheduled refresh touches (${formatCount(refreshesPerHour)} touch(es) per hour at read multiplier ${formatCount(readMultiplier)} spread over ${formatCount(callsPerHour)} call(s) per hour), netting ${formatCount(netPerReuse)}, so the single ${ttl} write premium of ${formatCount(writePremiumTokens)} is repaid after ${formatCount(minReusesToProfit)} reuse(s), ${profitable ? "profitable" : "not profitable"}.`;
519
+ return { writePremiumTokens, minReusesToProfit, profitable, reasoning };
520
+ };
521
+ var resourceBreakEven = (stableTokens, callsPerHour, profile, pricing) => {
522
+ const storagePerMTokHour = pricing?.storagePerMTokHour ?? profile.storagePerMTokHour;
523
+ if (storagePerMTokHour === void 0 || pricing === void 0 || pricing.inputPerMTok <= 0) {
524
+ return void 0;
525
+ }
526
+ const readPerMTok = pricing.cacheReadPerMTok ?? (profile.readMultiplier === void 0 ? void 0 : pricing.inputPerMTok * profile.readMultiplier);
527
+ if (readPerMTok === void 0) {
528
+ return void 0;
529
+ }
530
+ const savingsPerCallUsd = stableTokens / 1e6 * (pricing.inputPerMTok - readPerMTok);
531
+ if (savingsPerCallUsd <= 0) {
532
+ return void 0;
533
+ }
534
+ const storagePerHourUsd = stableTokens / 1e6 * storagePerMTokHour;
535
+ const writePremiumTokens = storagePerHourUsd / pricing.inputPerMTok * 1e6;
536
+ const minReusesToProfit = Math.ceil(storagePerHourUsd / savingsPerCallUsd);
537
+ const profitable = callsPerHour >= minReusesToProfit;
538
+ const reasoning = `Keeping ${formatCount(stableTokens)} tokens resident bills ${formatUsd(storagePerHourUsd)} per hour of storage (${formatCount(writePremiumTokens)} base-token equivalents), each cached read saves ${formatUsd(savingsPerCallUsd)}, so ${formatCount(minReusesToProfit)} reuse(s) per hour break even against ${formatCount(callsPerHour)} expected, ${profitable ? "profitable" : "not profitable"}.`;
539
+ return { writePremiumTokens, minReusesToProfit, profitable, reasoning };
540
+ };
541
+ var Planner = class {
542
+ /**
543
+ * Produces directives, break-even economics, and planner-stage findings for one input.
544
+ *
545
+ * @param input - The prompt being planned, segments in request order.
546
+ * @param profile - Effective provider profile, overrides already merged by the core.
547
+ * @param analysis - Aggregates the analysis stage computed, see {@link PlanAnalysis}.
548
+ * @param prefixKey - Deterministic key of the stable prefix, computed by the core.
549
+ * @param pricing - Optional model price card, refines multipliers and storage math.
550
+ * @param knownResource - Resource family only: whether the core already tracks a live
551
+ * cache resource for this prefix. First sight emits `'create'`, later sights `'reuse'`,
552
+ * and the core swaps `'reuse'` for `'refresh'` when its clock sits inside the last 10
553
+ * percent of the TTL window, the planner emits the shape, the core decides the timing.
554
+ */
555
+ plan(input, profile, analysis, prefixKey, pricing, knownResource = false) {
556
+ const result = this.planFamily(input, profile, analysis, prefixKey, pricing, knownResource);
557
+ if (profile.notes !== void 0 && profile.notes !== "") {
558
+ return { ...result, reasoning: `${result.reasoning} Provider note: ${profile.notes}` };
559
+ }
560
+ return result;
561
+ }
562
+ planFamily(input, profile, analysis, prefixKey, pricing, knownResource) {
563
+ switch (profile.family) {
564
+ case "breakpoint":
565
+ return this.planBreakpoint(input, profile, analysis, pricing);
566
+ case "routing-key":
567
+ return this.planRoutingKey(input, profile, analysis, prefixKey);
568
+ case "resource":
569
+ return this.planResource(input, profile, analysis, prefixKey, knownResource, pricing);
570
+ default:
571
+ return this.planPassive(profile);
572
+ }
573
+ }
574
+ /**
575
+ * Breakpoint family (anthropic, bedrock, hermes, microsoft-foundry): explicit markers,
576
+ * one TTL tier chosen from the declared reuse. The deepest stable boundary is always
577
+ * marked because the last marker determines left-anchored coverage, remaining budget
578
+ * goes to the largest stable spans by role weight. Economics follow the refresh-on-use
579
+ * model documented on {@link breakpointBreakEven} inside the TTL window and the
580
+ * touch-cost model on {@link keepWarmBreakEven} beyond it; when neither sustains the
581
+ * cache the planner declines with a reasoned `'none'` plus a `'write-premium-trap'`
582
+ * finding, so an emitted breakpoint plan is never knowingly unprofitable.
583
+ */
584
+ planBreakpoint(input, profile, analysis, pricing) {
585
+ const extraFindings = [];
586
+ const minCacheable = profile.minCacheableTokens ?? 0;
587
+ if (analysis.stableTokens < minCacheable) {
588
+ return {
589
+ directives: [
590
+ {
591
+ kind: "none",
592
+ reason: `stable prefix is ${formatCount(analysis.stableTokens)} tokens, below the provider minimum of ${formatCount(minCacheable)}, the provider would silently cache nothing`
593
+ }
594
+ ],
595
+ reasoning: `Skipped caching because the ${formatCount(analysis.stableTokens)}-token stable prefix sits below the ${formatCount(minCacheable)}-token provider minimum, a marker there would buy nothing.`,
596
+ extraFindings
597
+ };
598
+ }
599
+ const spans = stableSpansOf(input.segments, analysis.orderedStableBoundary);
600
+ const deepest = spans[spans.length - 1];
601
+ if (deepest === void 0) {
602
+ const first = input.segments[0];
603
+ if (first !== void 0 && first.stability === "volatile") {
604
+ extraFindings.push({
605
+ severity: "error",
606
+ code: "breakpoint-after-volatile",
607
+ segmentId: first.id,
608
+ message: `the only breakpoint candidate follows volatile segment '${first.id}', a span written there could never be read back; move volatile content after the stable prefix`
609
+ });
610
+ return {
611
+ directives: [
612
+ {
613
+ kind: "none",
614
+ reason: "every breakpoint candidate sits after a volatile segment, a written span could never be read back"
615
+ }
616
+ ],
617
+ reasoning: "Refused to place a breakpoint after a volatile segment, the written span could never be read back, so the write premium would be pure loss.",
618
+ extraFindings
619
+ };
620
+ }
621
+ return {
622
+ directives: [
623
+ { kind: "none", reason: "no left-anchored stable prefix exists to mark for caching" }
624
+ ],
625
+ reasoning: "Skipped caching, the prompt carries no left-anchored stable prefix to mark.",
626
+ extraFindings
627
+ };
628
+ }
629
+ const budget = profile.maxBreakpoints ?? 1;
630
+ if (budget <= 0) {
631
+ return {
632
+ directives: [
633
+ { kind: "none", reason: "the provider profile allows zero cache breakpoints" }
634
+ ],
635
+ reasoning: "Skipped caching, the effective profile grants no breakpoint slots.",
636
+ extraFindings
637
+ };
638
+ }
639
+ const sentences = [];
640
+ const interval = intervalSecondsOf(input);
641
+ const callsPerHour = callsPerHourOf(input);
642
+ const tiers = profile.ttls ?? ["5m"];
643
+ const supports1h = tiers.includes("1h");
644
+ let ttl;
645
+ let keepWarm;
646
+ if (interval === void 0) {
647
+ ttl = tiers.includes("5m") || !supports1h ? "5m" : "1h";
648
+ sentences.push(
649
+ `No reuse pattern was declared, defaulting to the ${ttl} tier, the lowest write premium the profile offers, with a single assumed reuse for break-even.`
650
+ );
651
+ } else if (interval <= FIVE_MINUTE_INTERVAL_CEILING && tiers.includes("5m")) {
652
+ ttl = "5m";
653
+ sentences.push(
654
+ `Reuse every ${formatCount(interval)}s fits the 5m tier with ${formatCount(TTL_SECONDS["5m"] - interval)}s of refresh headroom before the 300s window closes, and every read refreshes the window at no cost.`
655
+ );
656
+ } else if (interval <= ONE_HOUR_INTERVAL_CEILING && supports1h) {
657
+ ttl = "1h";
658
+ sentences.push(
659
+ `Reuse every ${formatCount(interval)}s overruns the 5m tier, the 1h tier holds it with ${formatCount(TTL_SECONDS["1h"] - interval)}s of refresh headroom under the 3600s window, and every read refreshes the window at no cost.`
660
+ );
661
+ } else {
662
+ const widest = supports1h ? "1h" : "5m";
663
+ const refreshesPerHour = 3600 / TTL_SECONDS[widest];
664
+ const readMultiplier = readMultiplierFor(profile, pricing);
665
+ if (callsPerHour === void 0 || readMultiplier === void 0 || readMultiplier >= 1 || callsPerHour * (1 - readMultiplier) <= refreshesPerHour * readMultiplier) {
666
+ extraFindings.push({
667
+ severity: "warning",
668
+ code: "write-premium-trap",
669
+ message: `the declared reuse every ${formatCount(interval)}s outruns the widest ${widest} window and the traffic cannot cover keep-warm touches, so the write premium on ${formatCount(analysis.stableTokens)} stable tokens would never be repaid; raise reuse density or skip caching`
670
+ });
671
+ return {
672
+ directives: [
673
+ {
674
+ kind: "none",
675
+ reason: "reuse interval exceeds provider TTL, caching would re-write every call"
676
+ }
677
+ ],
678
+ reasoning: `Reuse every ${formatCount(interval)}s exceeds what the widest ${widest} window holds with refresh headroom, every call would pay the write premium again, so no marker is placed.`,
679
+ extraFindings
680
+ };
681
+ }
682
+ ttl = widest;
683
+ keepWarm = { refreshesPerHour, readMultiplier };
684
+ sentences.push(
685
+ `Reuse every ${formatCount(interval)}s exceeds the ${widest} window, but ${formatCount(callsPerHour)} calls per hour against ${formatCount(refreshesPerHour)} keep-warm touches per hour at read multiplier ${formatCount(readMultiplier)} keep refresh-keeping profitable, so the ${widest} tier stays on with touches scheduled at 90 percent of the TTL.`
686
+ );
687
+ }
688
+ const ranked = [...spans.slice(0, -1)].sort((a, b) => {
689
+ const byWeight = ROLE_WEIGHT[b.role] - ROLE_WEIGHT[a.role];
690
+ if (byWeight !== 0) {
691
+ return byWeight;
692
+ }
693
+ if (b.tokens !== a.tokens) {
694
+ return b.tokens - a.tokens;
695
+ }
696
+ return a.endIndex - b.endIndex;
697
+ });
698
+ const chosen = [deepest, ...ranked.slice(0, Math.max(0, budget - 1))].sort(
699
+ (a, b) => a.endIndex - b.endIndex
700
+ );
701
+ const directives = chosen.map((span) => ({
702
+ kind: "breakpoint",
703
+ segmentId: span.endSegmentId,
704
+ ttl
705
+ }));
706
+ const lastChosen = chosen[chosen.length - 1] ?? deepest;
707
+ let coveredTokens = 0;
708
+ for (const span of spans) {
709
+ if (span.endIndex <= lastChosen.endIndex) {
710
+ coveredTokens += span.tokens;
711
+ }
712
+ }
713
+ const placement = chosen.map(
714
+ (span) => `${span.role} ending at '${span.endSegmentId}', ${formatCount(span.tokens)} tokens`
715
+ ).join("; ");
716
+ sentences.push(
717
+ `Placed ${formatCount(directives.length)} of ${formatCount(budget)} allowed breakpoints: the deepest stable boundary '${deepest.endSegmentId}' is always marked because the last marker determines left-anchored coverage, and the remaining slots go to the largest stable spans weighted tools over system over documents over history (${placement}) because the provider hashes tools, then system, then messages, so each extra marker preserves partial reuse when a later region drifts, covering ${formatCount(coveredTokens)} of ${formatCount(analysis.totalTokens)} prompt tokens.`
718
+ );
719
+ const breakEven = keepWarm !== void 0 && callsPerHour !== void 0 ? keepWarmBreakEven(
720
+ coveredTokens,
721
+ ttl,
722
+ callsPerHour,
723
+ keepWarm.refreshesPerHour,
724
+ keepWarm.readMultiplier,
725
+ profile,
726
+ pricing
727
+ ) : breakpointBreakEven(
728
+ coveredTokens,
729
+ ttl,
730
+ interval === void 0 ? void 0 : callsPerHour,
731
+ profile,
732
+ pricing
733
+ );
734
+ if (breakEven !== void 0) {
735
+ sentences.push(breakEven.reasoning);
736
+ if (!breakEven.profitable) {
737
+ extraFindings.push({
738
+ severity: "warning",
739
+ code: "write-premium-trap",
740
+ message: `expected reuse inside the ${ttl} window does not repay the cache write premium on ${formatCount(coveredTokens)} covered stable tokens; raise reuse density, choose a longer TTL tier, or skip caching`
741
+ });
742
+ }
743
+ }
744
+ return {
745
+ directives,
746
+ ...breakEven !== void 0 ? { breakEven } : {},
747
+ reasoning: sentences.join(" "),
748
+ extraFindings
749
+ };
750
+ }
751
+ /**
752
+ * Routing-key family (openai, xai, mistral, moonshot, openrouter): the provider caches
753
+ * implicitly, the key only steers identical prefixes to the same cache shard, and the
754
+ * extended 24-hour retention tier rides along when supported and the reuse is sparse
755
+ * (OpenAI `prompt_cache_key` retention as of June 2026).
756
+ */
757
+ planRoutingKey(input, profile, analysis, prefixKey) {
758
+ const interval = intervalSecondsOf(input);
759
+ const wantsRetention = profile.supportsRetention === true && interval !== void 0 && interval > 3600;
760
+ const directives = [
761
+ {
762
+ kind: "routing-key",
763
+ key: prefixKey,
764
+ ...wantsRetention ? { retention: "24h" } : {}
765
+ }
766
+ ];
767
+ const retentionNote = wantsRetention && interval !== void 0 ? `, and reuse every ${formatCount(interval)}s outlives the default cache window, so the 24h retention tier is requested behind the key` : "";
768
+ return {
769
+ directives,
770
+ reasoning: `${profile.id} caches prefixes automatically on the server, the routing key only pins the ${formatCount(analysis.stableTokens)}-token stable prefix of ${formatCount(analysis.totalTokens)} prompt tokens to one cache shard, so keeping that prefix byte-stable is the real lever${retentionNote}.`,
771
+ extraFindings: []
772
+ };
773
+ }
774
+ /**
775
+ * Resource family (google): the cache is a server resource the host creates, reuses,
776
+ * refreshes, and deletes, billed per token-hour of storage while it stays alive.
777
+ */
778
+ planResource(input, profile, analysis, prefixKey, knownResource, pricing) {
779
+ const extraFindings = [];
780
+ const interval = intervalSecondsOf(input);
781
+ const ttlSeconds = interval === void 0 ? RESOURCE_TTL_DEFAULT_SECONDS : Math.min(RESOURCE_TTL_DEFAULT_SECONDS, Math.max(RESOURCE_TTL_MIN_SECONDS, interval * 4));
782
+ const resourceKey = prefixKey !== "" ? prefixKey : combineKeys([input.provider, input.model, input.agentId ?? ""]);
783
+ const callsPerHour = callsPerHourOf(input) ?? 1;
784
+ const breakEven = resourceBreakEven(analysis.stableTokens, callsPerHour, profile, pricing);
785
+ if (callsPerHour < 1) {
786
+ extraFindings.push({
787
+ severity: "warning",
788
+ code: "write-premium-trap",
789
+ message: `the storage trap: at ${formatCount(callsPerHour)} reuses per hour, per-token-hour storage on ${formatCount(analysis.stableTokens)} resident tokens outruns the read savings; raise reuse density or rely on implicit provider caching`
790
+ });
791
+ return {
792
+ directives: [
793
+ {
794
+ kind: "none",
795
+ reason: "below one reuse per hour the per-token-hour storage bill outruns the read savings, the storage trap, so no cache resource is worth keeping alive"
796
+ }
797
+ ],
798
+ ...breakEven !== void 0 ? { breakEven } : {},
799
+ reasoning: `Skipped the cache resource because ${formatCount(callsPerHour)} reuses per hour cannot cover storage billed for every hour the resource stays alive, the classic resource-family storage trap.`,
800
+ extraFindings
801
+ };
802
+ }
803
+ const action = knownResource ? "reuse" : "create";
804
+ const directives = [{ kind: "resource", action, resourceKey, ttlSeconds }];
805
+ const sentences = [
806
+ `${action === "create" ? "Creating" : "Reusing"} the server-side cache resource for this prefix with a ${formatCount(ttlSeconds)}s TTL, four times the ${interval === void 0 ? "default-assumed" : `${formatCount(interval)}s`} reuse interval clamped to [${formatCount(RESOURCE_TTL_MIN_SECONDS)}, ${formatCount(RESOURCE_TTL_DEFAULT_SECONDS)}], and the core swaps reuse for refresh inside the last 10 percent of that window.`
807
+ ];
808
+ if (breakEven !== void 0) {
809
+ sentences.push(breakEven.reasoning);
810
+ if (!breakEven.profitable) {
811
+ extraFindings.push({
812
+ severity: "warning",
813
+ code: "write-premium-trap",
814
+ message: `at ${formatCount(callsPerHour)} reuses per hour the per-token-hour storage bill on ${formatCount(analysis.stableTokens)} resident tokens exceeds the read savings; raise reuse density or shorten the resource TTL`
815
+ });
816
+ }
817
+ }
818
+ return {
819
+ directives,
820
+ ...breakEven !== void 0 ? { breakEven } : {},
821
+ reasoning: sentences.join(" "),
822
+ extraFindings
823
+ };
824
+ }
825
+ /**
826
+ * Passive family (groq, deepseek, ollama, lmstudio, huggingface, custom): no control
827
+ * surface exists, stable-first ordering and accounting are the whole contribution.
828
+ */
829
+ planPassive(profile) {
830
+ return {
831
+ directives: [
832
+ {
833
+ kind: "none",
834
+ reason: "provider caches automatically (or exposes no controls); RACS contributes structure linting and analytics"
835
+ }
836
+ ],
837
+ reasoning: `${profile.id} exposes no cache control surface, so stable-first segment ordering is the entire optimization, and the ledger still accounts every cached token the provider reports.`,
838
+ extraFindings: []
839
+ };
840
+ }
841
+ };
842
+
843
+ // src/plan/PrefixAnalyzer.ts
844
+ var ISO_8601_DATETIME = /\b\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}(?::\d{2}(?:\.\d{1,9})?)?(?:Z|[+-]\d{2}:?\d{2})?\b/;
845
+ var UNIX_EPOCH = /\b(?:\d{13}|\d{10})\b/;
846
+ var RELATIVE_TIME_NEAR_DIGITS = /\b(?:today|current time)\b[^0-9\n\r]{0,32}[0-9]|[0-9][^0-9\n\r]{0,32}\b(?:today|current time)\b/i;
847
+ var UUID_V4 = /\b[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}\b/i;
848
+ var HEX_RUN = /\b[0-9a-f]{24,}\b/i;
849
+ var BASE64_RUN = /[A-Za-z0-9+/]{24,}={0,2}/g;
850
+ var digestOf = (match) => fnv1a64(match).slice(0, 8);
851
+ var firstMatch = (pattern, content) => {
852
+ const result = pattern.exec(content);
853
+ return result ? result[0] : void 0;
854
+ };
855
+ var PrefixAnalyzer = class {
856
+ /**
857
+ * Runs every structural lint and computes the prefix geometry the planner needs.
858
+ *
859
+ * @param input - The plan input whose segments are analyzed, in request order.
860
+ * @param profile - Effective provider profile, supplies `minCacheableTokens`.
861
+ * @returns Findings plus the cacheable-prefix token counts and the volatile boundary.
862
+ */
863
+ analyze(input, profile) {
864
+ const segments = input.segments;
865
+ let totalTokens = 0;
866
+ let orderedStableBoundary = segments.length;
867
+ let stableTokens = 0;
868
+ for (const [index, segment] of segments.entries()) {
869
+ const tokens = tokensOf(segment);
870
+ totalTokens += tokens;
871
+ if (index < orderedStableBoundary) {
872
+ if (segment.stability === "volatile") {
873
+ orderedStableBoundary = index;
874
+ } else {
875
+ stableTokens += tokens;
876
+ }
877
+ }
878
+ }
879
+ const findings = [];
880
+ this.lintSegmentOrder(segments, orderedStableBoundary, findings);
881
+ this.lintVolatileEarly(
882
+ segments,
883
+ orderedStableBoundary,
884
+ stableTokens,
885
+ totalTokens,
886
+ profile,
887
+ findings
888
+ );
889
+ for (const segment of segments) {
890
+ this.lintSegment(segment, findings);
891
+ }
892
+ this.lintBelowMinimum(stableTokens, profile, findings);
893
+ return { findings, stableTokens, totalTokens, orderedStableBoundary };
894
+ }
895
+ /** `'segment-order'`: the first volatile segment that precedes a cacheable one. */
896
+ lintSegmentOrder(segments, orderedStableBoundary, findings) {
897
+ const volatileSegment = segments[orderedStableBoundary];
898
+ if (volatileSegment === void 0) {
899
+ return;
900
+ }
901
+ for (let index = orderedStableBoundary + 1; index < segments.length; index += 1) {
902
+ const later = segments[index];
903
+ if (later !== void 0 && later.stability !== "volatile") {
904
+ findings.push({
905
+ severity: "warning",
906
+ code: "segment-order",
907
+ segmentId: volatileSegment.id,
908
+ message: `Volatile segment '${volatileSegment.id}' precedes ${later.stability} segment '${later.id}'. Prefix caches are left-anchored, so every token after '${volatileSegment.id}' is unreachable for the cache. Reorder stable-first: move '${later.id}' and every other stable segment ahead of '${volatileSegment.id}'.`
909
+ });
910
+ return;
911
+ }
912
+ }
913
+ }
914
+ /**
915
+ * `'volatile-early'`: a volatile segment inside the first half of total tokens and
916
+ * before any breakpoint-eligible boundary, the silent-cache-killer layout.
917
+ *
918
+ * A boundary is breakpoint-eligible only inside the leading stable run (a span that
919
+ * contains volatile content can never be read back), so eligibility reduces to the
920
+ * leading run reaching the provider minimum. The first volatile segment is reported, it
921
+ * is the one that caps the run. Its start offset equals `stableTokens` by construction.
922
+ */
923
+ lintVolatileEarly(segments, orderedStableBoundary, stableTokens, totalTokens, profile, findings) {
924
+ const volatileSegment = segments[orderedStableBoundary];
925
+ if (volatileSegment === void 0 || totalTokens === 0) {
926
+ return;
927
+ }
928
+ const minimumEligible = profile.minCacheableTokens !== void 0 && profile.minCacheableTokens > 0 ? profile.minCacheableTokens : 1;
929
+ const inFirstHalf = stableTokens * 2 < totalTokens;
930
+ if (inFirstHalf && stableTokens < minimumEligible) {
931
+ findings.push({
932
+ severity: "error",
933
+ code: "volatile-early",
934
+ segmentId: volatileSegment.id,
935
+ message: `Volatile segment '${volatileSegment.id}' sits inside the first half of the prompt (${stableTokens} of ${totalTokens} tokens precede it) and before any breakpoint-eligible boundary (provider minimum ${minimumEligible} tokens on '${profile.id}'). Nothing in this prompt can ever be cached. The usual culprits are timestamps or session metadata interpolated into the system prompt; move every per-call value to the end of the prompt and keep the opening segments byte-stable.`
936
+ });
937
+ }
938
+ }
939
+ /** Per-segment lints: declaration checks always, content heuristics only with content. */
940
+ lintSegment(segment, findings) {
941
+ if (segment.role === "tools" && segment.stability === "volatile") {
942
+ findings.push({
943
+ severity: "error",
944
+ code: "unstable-tools",
945
+ segmentId: segment.id,
946
+ message: `Tools segment '${segment.id}' is declared volatile. Breakpoint providers hash tool definitions first, so volatile tools defeat the cache for the entire request. Tool instability is almost always a serialization bug, fix key ordering or remove timestamps from descriptions, then declare the segment stable.`
947
+ });
948
+ }
949
+ if (segment.role === "dynamic" && segment.stability === "stable") {
950
+ findings.push({
951
+ severity: "info",
952
+ code: "missing-stability",
953
+ segmentId: segment.id,
954
+ message: `Segment '${segment.id}' has the dynamic role but is declared stable. Dynamic content is expected to differ on every call, which contradicts the declaration. Declare it volatile, or change the role if the content really is byte-stable.`
955
+ });
956
+ }
957
+ const content = segment.content;
958
+ if (typeof content !== "string") {
959
+ return;
960
+ }
961
+ if (segment.stability === "stable" || segment.stability === "semi") {
962
+ this.lintTimestamps(segment, content, findings);
963
+ }
964
+ if (segment.stability === "stable") {
965
+ this.lintIdentifiers(segment, content, findings);
966
+ }
967
+ }
968
+ /** `'timestamp-in-stable'`: timestamp-like content inside a stable or semi segment. */
969
+ lintTimestamps(segment, content, findings) {
970
+ const hits = [];
971
+ const iso = firstMatch(ISO_8601_DATETIME, content);
972
+ if (iso !== void 0) {
973
+ hits.push(`an ISO-8601 datetime (digest ${digestOf(iso)})`);
974
+ }
975
+ const epoch = firstMatch(UNIX_EPOCH, content);
976
+ if (epoch !== void 0) {
977
+ hits.push(`a 10-or-13-digit unix epoch (digest ${digestOf(epoch)})`);
978
+ }
979
+ if (RELATIVE_TIME_NEAR_DIGITS.test(content)) {
980
+ hits.push(`the words 'today' or 'current time' near digits`);
981
+ }
982
+ if (hits.length === 0) {
983
+ return;
984
+ }
985
+ findings.push({
986
+ severity: "warning",
987
+ code: "timestamp-in-stable",
988
+ segmentId: segment.id,
989
+ message: `Segment '${segment.id}' is declared ${segment.stability} but contains ${hits.join(", and ")}. A timestamp changes the prefix on every call and silently defeats the cache. Move live time values into a volatile segment at the prompt tail.`
990
+ });
991
+ }
992
+ /** `'identifier-in-stable'`: per-request identifier shapes inside a stable segment. */
993
+ lintIdentifiers(segment, content, findings) {
994
+ let kind;
995
+ let match = firstMatch(UUID_V4, content);
996
+ if (match !== void 0) {
997
+ kind = "a UUID v4-like identifier";
998
+ } else {
999
+ match = firstMatch(HEX_RUN, content);
1000
+ if (match !== void 0) {
1001
+ kind = "a hex run of 24 or more characters";
1002
+ } else {
1003
+ for (const candidate of content.matchAll(BASE64_RUN)) {
1004
+ const text = candidate[0];
1005
+ if (text !== void 0 && /\d/.test(text) && /[a-z]/i.test(text)) {
1006
+ match = text;
1007
+ kind = "a base64-like run of 24 or more characters";
1008
+ break;
1009
+ }
1010
+ }
1011
+ }
1012
+ }
1013
+ if (match === void 0 || kind === void 0) {
1014
+ return;
1015
+ }
1016
+ findings.push({
1017
+ severity: "warning",
1018
+ code: "identifier-in-stable",
1019
+ segmentId: segment.id,
1020
+ message: `Segment '${segment.id}' is declared stable but contains ${kind} (digest ${digestOf(match)}). Session ids and request ids churn per call, the same failure mode as a timestamp. Move per-request identifiers into a volatile segment.`
1021
+ });
1022
+ }
1023
+ /** `'below-minimum'`: the stable prefix is silently uncacheable on this provider. */
1024
+ lintBelowMinimum(stableTokens, profile, findings) {
1025
+ const minimum = profile.minCacheableTokens;
1026
+ if (minimum === void 0 || minimum <= 0 || stableTokens >= minimum) {
1027
+ return;
1028
+ }
1029
+ findings.push({
1030
+ severity: "info",
1031
+ code: "below-minimum",
1032
+ message: `The stable prefix totals ${stableTokens} tokens, below the ${minimum}-token minimum '${profile.id}' will cache. The provider would silently cache nothing. Lengthen the stable prefix, or accept that this prompt rides uncached on this provider.`
1033
+ });
1034
+ }
1035
+ };
1036
+
1037
+ // src/providers/profiles.ts
1038
+ var ADAPTER_FAMILIES = [
1039
+ "breakpoint",
1040
+ "routing-key",
1041
+ "resource",
1042
+ "passive"
1043
+ ];
1044
+ var PROVIDER_PROFILES = {
1045
+ /**
1046
+ * Anthropic Claude API, explicit `cache_control` breakpoints with two TTL tiers.
1047
+ *
1048
+ * Writes cost 1.25x base input on the 5-minute tier and 2x on the 1-hour tier, cached
1049
+ * reads cost 0.1x, at most 4 breakpoints per request. The 1024-token minimum is the
1050
+ * conservative common case: the newest frontier models cache from 512 tokens and the
1051
+ * small models require 4096, override per model through {@link RACSOptions.profiles}
1052
+ * when targeting those.
1053
+ *
1054
+ * Source: Anthropic prompt caching documentation,
1055
+ * https://docs.claude.com/en/docs/build-with-claude/prompt-caching, retrieved June 2026.
1056
+ */
1057
+ anthropic: {
1058
+ id: "anthropic",
1059
+ family: "breakpoint",
1060
+ minCacheableTokens: 1024,
1061
+ maxBreakpoints: 4,
1062
+ ttls: ["5m", "1h"],
1063
+ writeMultiplier5m: 1.25,
1064
+ writeMultiplier1h: 2,
1065
+ readMultiplier: 0.1,
1066
+ notes: "Explicit cache_control breakpoints, up to 4 per request, 5m and 1h TTL tiers. Minimum cacheable prefix is 1024 tokens in the common case, 512 on the newest frontier models, 4096 on small models, override per model via options.profiles."
1067
+ },
1068
+ /**
1069
+ * OpenAI, automatic server-side prefix caching steered by `prompt_cache_key`.
1070
+ *
1071
+ * Caches in 128-token increments above a 1024-token minimum, no write counter and no
1072
+ * write premium. The cached-read discount varies between 50 and 90 percent by model, so
1073
+ * the 0.25 read multiplier is a conservative default, override per model when the exact
1074
+ * discount is known. Extended 24-hour retention attaches to `prompt_cache_key`, hence
1075
+ * `supportsRetention`.
1076
+ *
1077
+ * Source: OpenAI prompt caching guide,
1078
+ * https://platform.openai.com/docs/guides/prompt-caching, retrieved June 2026.
1079
+ */
1080
+ openai: {
1081
+ id: "openai",
1082
+ family: "routing-key",
1083
+ minCacheableTokens: 1024,
1084
+ readMultiplier: 0.25,
1085
+ supportsRetention: true,
1086
+ notes: "Automatic prefix caching in 128-token increments above 1024 tokens, prompt_cache_key routing, no write counter, read discount varies 50 to 90 percent by model so 0.25 is a conservative default."
1087
+ },
1088
+ /**
1089
+ * Google Gemini, implicit caching on 2.5 and newer models plus the explicit
1090
+ * `cachedContents` resource lifecycle with caller-set TTL and per-token-hour storage
1091
+ * billing at 1.0 USD per million tokens per hour.
1092
+ *
1093
+ * Source: Google Gemini API context caching documentation,
1094
+ * https://ai.google.dev/gemini-api/docs/caching, retrieved June 2026.
1095
+ */
1096
+ google: {
1097
+ id: "google",
1098
+ family: "resource",
1099
+ minCacheableTokens: 2048,
1100
+ readMultiplier: 0.1,
1101
+ storagePerMTokHour: 1,
1102
+ notes: "Implicit caching on 2.5+ models plus explicit cachedContents lifecycle with TTL and per-token-hour storage billing."
1103
+ },
1104
+ /**
1105
+ * Amazon Bedrock, `cachePoint` blocks on the Converse API, Anthropic-equivalent
1106
+ * breakpoint semantics and multipliers.
1107
+ *
1108
+ * Source: Amazon Bedrock prompt caching documentation,
1109
+ * https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html, retrieved
1110
+ * June 2026.
1111
+ */
1112
+ bedrock: {
1113
+ id: "bedrock",
1114
+ family: "breakpoint",
1115
+ minCacheableTokens: 1024,
1116
+ maxBreakpoints: 4,
1117
+ ttls: ["5m", "1h"],
1118
+ writeMultiplier5m: 1.25,
1119
+ writeMultiplier1h: 2,
1120
+ readMultiplier: 0.1,
1121
+ notes: "cachePoint blocks on the Converse API, Anthropic-equivalent breakpoint semantics."
1122
+ },
1123
+ /**
1124
+ * xAI Grok, automatic prefix caching, steerable with the `x-grok-conv-id` header and
1125
+ * `prompt_cache_key`, cached reads at roughly 0.16x base input.
1126
+ *
1127
+ * Source: xAI API documentation, https://docs.x.ai/, retrieved June 2026.
1128
+ */
1129
+ xai: {
1130
+ id: "xai",
1131
+ family: "routing-key",
1132
+ minCacheableTokens: 1024,
1133
+ readMultiplier: 0.16,
1134
+ notes: "Automatic prefix caching, steerable via the x-grok-conv-id header and prompt_cache_key."
1135
+ },
1136
+ /**
1137
+ * Groq, automatic prefix caching on gpt-oss models with no control surface, cached reads
1138
+ * at 0.5x base input, entries expire after roughly 2 hours idle.
1139
+ *
1140
+ * Source: Groq prompt caching documentation,
1141
+ * https://console.groq.com/docs/prompt-caching, retrieved June 2026.
1142
+ */
1143
+ groq: {
1144
+ id: "groq",
1145
+ family: "passive",
1146
+ minCacheableTokens: 128,
1147
+ readMultiplier: 0.5,
1148
+ notes: "Automatic on gpt-oss models, no controls, entries expire after 2 hours idle."
1149
+ },
1150
+ /**
1151
+ * DeepSeek, disk-based automatic context caching with cache hit and miss token
1152
+ * reporting, cached reads at 0.1x base input.
1153
+ *
1154
+ * Source: DeepSeek context caching documentation,
1155
+ * https://api-docs.deepseek.com/guides/kv_cache, retrieved June 2026.
1156
+ */
1157
+ deepseek: {
1158
+ id: "deepseek",
1159
+ family: "passive",
1160
+ readMultiplier: 0.1,
1161
+ notes: "Disk-based automatic context caching with hit and miss token reporting."
1162
+ },
1163
+ /**
1164
+ * Mistral, automatic caching in 64-token blocks with `prompt_cache_key` routing, cached
1165
+ * reads at 0.1x base input.
1166
+ *
1167
+ * Source: Mistral platform documentation, https://docs.mistral.ai/, retrieved June 2026.
1168
+ */
1169
+ mistral: {
1170
+ id: "mistral",
1171
+ family: "routing-key",
1172
+ minCacheableTokens: 64,
1173
+ readMultiplier: 0.1,
1174
+ notes: "64-token cache blocks, prompt_cache_key routing."
1175
+ },
1176
+ /**
1177
+ * OpenRouter normalizes `cache_control` passthrough and `cached_tokens` reporting across
1178
+ * upstream providers. The numbers are conservative defaults because the effective
1179
+ * discount is whatever the routed upstream charges.
1180
+ *
1181
+ * Source: OpenRouter prompt caching documentation,
1182
+ * https://openrouter.ai/docs/features/prompt-caching, retrieved June 2026.
1183
+ */
1184
+ openrouter: {
1185
+ id: "openrouter",
1186
+ family: "routing-key",
1187
+ minCacheableTokens: 1024,
1188
+ readMultiplier: 0.25,
1189
+ notes: "Normalizes cache_control passthrough and cached_tokens reporting across upstreams, effective discount depends on the routed upstream."
1190
+ },
1191
+ /**
1192
+ * Moonshot Kimi, platform caching reached through the OpenAI-compatible surface. The
1193
+ * public semantics are less documented than peers as of June 2026, so the profile
1194
+ * carries conservative defaults mirroring the OpenAI numbers.
1195
+ *
1196
+ * Source: Moonshot platform documentation, https://platform.moonshot.ai/docs, retrieved
1197
+ * June 2026.
1198
+ */
1199
+ moonshot: {
1200
+ id: "moonshot",
1201
+ family: "routing-key",
1202
+ minCacheableTokens: 1024,
1203
+ readMultiplier: 0.25,
1204
+ notes: "Kimi platform caching via the OpenAI-compatible surface, semantics less documented, conservative defaults."
1205
+ },
1206
+ /**
1207
+ * Ollama, local runtime KV reuse with no billing dimension, analytics measure
1208
+ * latency-motivated reuse only.
1209
+ *
1210
+ * Source: Ollama documentation, https://docs.ollama.com/, retrieved June 2026.
1211
+ */
1212
+ ollama: {
1213
+ id: "ollama",
1214
+ family: "passive",
1215
+ notes: "Local runtime KV reuse, no billing, analytics measure latency-motivated reuse only."
1216
+ },
1217
+ /**
1218
+ * LM Studio, local runtime KV reuse with no billing dimension, same posture as Ollama.
1219
+ *
1220
+ * Source: LM Studio documentation, https://lmstudio.ai/docs, retrieved June 2026.
1221
+ */
1222
+ lmstudio: {
1223
+ id: "lmstudio",
1224
+ family: "passive",
1225
+ notes: "Local runtime KV reuse, no billing, analytics measure latency-motivated reuse only."
1226
+ },
1227
+ /**
1228
+ * Hugging Face Inference Endpoints expose no public prefix-cache controls as of
1229
+ * June 2026, so the profile is passive and carries no numbers.
1230
+ *
1231
+ * Source: Hugging Face Inference Endpoints documentation,
1232
+ * https://huggingface.co/docs/inference-endpoints, retrieved June 2026.
1233
+ */
1234
+ huggingface: {
1235
+ id: "huggingface",
1236
+ family: "passive",
1237
+ notes: "Inference Endpoints without public prefix-cache controls as of June 2026."
1238
+ },
1239
+ /**
1240
+ * Claude models on Microsoft Foundry honor `cache_control` unchanged, so the profile
1241
+ * mirrors the Anthropic numbers.
1242
+ *
1243
+ * Source: Microsoft Foundry documentation for Anthropic Claude models,
1244
+ * https://learn.microsoft.com/en-us/azure/ai-foundry/, retrieved June 2026.
1245
+ */
1246
+ "microsoft-foundry": {
1247
+ id: "microsoft-foundry",
1248
+ family: "breakpoint",
1249
+ minCacheableTokens: 1024,
1250
+ maxBreakpoints: 4,
1251
+ ttls: ["5m", "1h"],
1252
+ writeMultiplier5m: 1.25,
1253
+ writeMultiplier1h: 2,
1254
+ readMultiplier: 0.1,
1255
+ notes: "Claude on Microsoft Foundry honors cache_control, Anthropic breakpoint semantics."
1256
+ },
1257
+ /**
1258
+ * Nous Research Hermes Agent rides Anthropic `cache_control` semantics with its fixed
1259
+ * system_and_3 layout, the system prompt plus the last 3 messages, so the multipliers,
1260
+ * the 1024-token cacheable minimum, and the breakpoint budget are the Anthropic numbers
1261
+ * and RACS plans superior layouts for it.
1262
+ *
1263
+ * Sources: Hermes Agent system_and_3 cache layout, observed June 2026, and Anthropic
1264
+ * prompt caching documentation,
1265
+ * https://docs.claude.com/en/docs/build-with-claude/prompt-caching, retrieved June 2026.
1266
+ */
1267
+ hermes: {
1268
+ id: "hermes",
1269
+ family: "breakpoint",
1270
+ minCacheableTokens: 1024,
1271
+ maxBreakpoints: 4,
1272
+ ttls: ["5m", "1h"],
1273
+ writeMultiplier5m: 1.25,
1274
+ writeMultiplier1h: 2,
1275
+ readMultiplier: 0.1,
1276
+ notes: "Hermes Agent system_and_3 layout (system plus last 3 messages) rides Anthropic cache_control semantics, RACS plans superior layouts for it."
1277
+ },
1278
+ /**
1279
+ * Escape hatch for providers RACS does not name yet, fully caller-defined through
1280
+ * {@link RACSOptions.profiles}. Defaults to passive so a bare 'custom' plan still orders
1281
+ * segments, lints, and accounts usage without inventing numbers.
1282
+ */
1283
+ custom: {
1284
+ id: "custom",
1285
+ family: "passive",
1286
+ notes: "Fully caller-defined via the options.profiles override."
1287
+ }
1288
+ };
1289
+ function resolveProfile(id, overrides) {
1290
+ const base = PROVIDER_PROFILES[id];
1291
+ if (base === void 0) {
1292
+ throw RacsError.invalid(
1293
+ `Unknown provider id '${String(id)}', expected one of: ${Object.keys(PROVIDER_PROFILES).join(", ")}.`
1294
+ );
1295
+ }
1296
+ const override = overrides?.[id];
1297
+ if (override === void 0) {
1298
+ return base;
1299
+ }
1300
+ const merged = {
1301
+ ...base,
1302
+ ...override.family !== void 0 ? { family: override.family } : {},
1303
+ ...override.minCacheableTokens !== void 0 ? { minCacheableTokens: override.minCacheableTokens } : {},
1304
+ ...override.maxBreakpoints !== void 0 ? { maxBreakpoints: override.maxBreakpoints } : {},
1305
+ ...override.ttls !== void 0 ? { ttls: override.ttls } : {},
1306
+ ...override.writeMultiplier5m !== void 0 ? { writeMultiplier5m: override.writeMultiplier5m } : {},
1307
+ ...override.writeMultiplier1h !== void 0 ? { writeMultiplier1h: override.writeMultiplier1h } : {},
1308
+ ...override.readMultiplier !== void 0 ? { readMultiplier: override.readMultiplier } : {},
1309
+ ...override.supportsRetention !== void 0 ? { supportsRetention: override.supportsRetention } : {},
1310
+ ...override.storagePerMTokHour !== void 0 ? { storagePerMTokHour: override.storagePerMTokHour } : {},
1311
+ ...override.notes !== void 0 ? { notes: override.notes } : {},
1312
+ id: base.id
1313
+ };
1314
+ if (!ADAPTER_FAMILIES.includes(merged.family)) {
1315
+ throw RacsError.invalid(
1316
+ `Profile override for '${id}' sets family '${String(merged.family)}', expected one of: ${ADAPTER_FAMILIES.join(", ")}.`
1317
+ );
1318
+ }
1319
+ return merged;
1320
+ }
1321
+
1322
+ // src/schedule/TtlKeeper.ts
1323
+ var REFRESH_FRACTION = 0.9;
1324
+ function ttlToMillis(ttl) {
1325
+ if (ttl === "5m") {
1326
+ return 3e5;
1327
+ }
1328
+ if (ttl === "1h") {
1329
+ return 36e5;
1330
+ }
1331
+ return ttl * 1e3;
1332
+ }
1333
+ var TtlKeeper = class _TtlKeeper {
1334
+ capacity;
1335
+ /** Map iteration order doubles as recency order, oldest entry first. */
1336
+ entries = /* @__PURE__ */ new Map();
1337
+ /**
1338
+ * @param capacity - Cap on distinct tracked prefixes before LRU eviction.
1339
+ */
1340
+ constructor(capacity = 1e3) {
1341
+ this.capacity = capacity;
1342
+ }
1343
+ /**
1344
+ * Records or replaces the keep-warm entry for the plan's prefix.
1345
+ *
1346
+ * `refreshAt = lastWriteAt + 0.9 * ttlMillis`, see {@link REFRESH_FRACTION} for why
1347
+ * 90 percent.
1348
+ *
1349
+ * @param plan - The plan whose directives describe the cache writes to keep warm.
1350
+ * @param now - Milliseconds since the Unix epoch, from the injected engine clock,
1351
+ * taken as the moment of the cache write.
1352
+ */
1353
+ track(plan, now) {
1354
+ let ttl;
1355
+ for (const directive of plan.directives) {
1356
+ if (directive.kind === "breakpoint") {
1357
+ if (ttl === void 0 || ttlToMillis(directive.ttl) < ttlToMillis(ttl)) {
1358
+ ttl = directive.ttl;
1359
+ }
1360
+ } else if (directive.kind === "resource") {
1361
+ if (directive.action === "delete") {
1362
+ this.remove(plan.prefixKey);
1363
+ return;
1364
+ }
1365
+ if (ttl === void 0 || ttlToMillis(directive.ttlSeconds) < ttlToMillis(ttl)) {
1366
+ ttl = directive.ttlSeconds;
1367
+ }
1368
+ }
1369
+ }
1370
+ if (ttl === void 0) {
1371
+ return;
1372
+ }
1373
+ const entry = {
1374
+ prefixKey: plan.prefixKey,
1375
+ provider: plan.provider,
1376
+ model: plan.model,
1377
+ ttl,
1378
+ lastWriteAt: now,
1379
+ refreshAt: now + REFRESH_FRACTION * ttlToMillis(ttl)
1380
+ };
1381
+ this.entries.delete(plan.prefixKey);
1382
+ this.entries.set(plan.prefixKey, entry);
1383
+ if (this.entries.size > this.capacity) {
1384
+ const oldest = this.entries.keys().next();
1385
+ if (!oldest.done) {
1386
+ this.entries.delete(oldest.value);
1387
+ }
1388
+ }
1389
+ }
1390
+ /**
1391
+ * Returns every entry whose refresh touch is due at or before `now`, most overdue first,
1392
+ * ties broken by prefix key for deterministic output. Read-only: the host performs the
1393
+ * warming call and then reports it through {@link TtlKeeper.markRefreshed}.
1394
+ *
1395
+ * @param now - Milliseconds since the Unix epoch, from the injected engine clock.
1396
+ */
1397
+ due(now) {
1398
+ const due = [];
1399
+ for (const entry of this.entries.values()) {
1400
+ if (entry.refreshAt <= now) {
1401
+ due.push(entry);
1402
+ }
1403
+ }
1404
+ due.sort(
1405
+ (a, b) => a.refreshAt === b.refreshAt ? a.prefixKey < b.prefixKey ? -1 : a.prefixKey > b.prefixKey ? 1 : 0 : a.refreshAt - b.refreshAt
1406
+ );
1407
+ return due;
1408
+ }
1409
+ /**
1410
+ * Slides the TTL window after the host touched the cache: `lastWriteAt` becomes `now`
1411
+ * and `refreshAt` moves to 90 percent of the entry's TTL after it. Unknown prefixes are
1412
+ * ignored, the host may legitimately refresh a prefix the keeper already evicted.
1413
+ *
1414
+ * @param prefixKey - The prefix the host kept warm.
1415
+ * @param now - Milliseconds since the Unix epoch of the touch.
1416
+ */
1417
+ markRefreshed(prefixKey, now) {
1418
+ const entry = this.entries.get(prefixKey);
1419
+ if (entry === void 0) {
1420
+ return;
1421
+ }
1422
+ const updated = {
1423
+ ...entry,
1424
+ lastWriteAt: now,
1425
+ refreshAt: now + REFRESH_FRACTION * ttlToMillis(entry.ttl)
1426
+ };
1427
+ this.entries.delete(prefixKey);
1428
+ this.entries.set(prefixKey, updated);
1429
+ }
1430
+ /**
1431
+ * Drops the entry for a prefix, used when the host abandons a cache or a resource
1432
+ * directive deletes it. Unknown prefixes are ignored.
1433
+ */
1434
+ remove(prefixKey) {
1435
+ this.entries.delete(prefixKey);
1436
+ }
1437
+ /**
1438
+ * Serializes every entry, least-recently-tracked first. Pure JSON data, round-trips
1439
+ * through {@link TtlKeeper.fromJSON}.
1440
+ */
1441
+ toJSON() {
1442
+ return { capacity: this.capacity, entries: [...this.entries.values()] };
1443
+ }
1444
+ /**
1445
+ * Rebuilds a keeper from {@link TtlKeeper.toJSON} output, restoring entries and their
1446
+ * recency order.
1447
+ *
1448
+ * @param json - A previously serialized keeper.
1449
+ */
1450
+ static fromJSON(json) {
1451
+ const keeper = new _TtlKeeper(json.capacity);
1452
+ for (const entry of json.entries) {
1453
+ keeper.entries.set(entry.prefixKey, entry);
1454
+ }
1455
+ return keeper;
1456
+ }
1457
+ };
1458
+
1459
+ // src/core/createRACS.ts
1460
+ var DEFAULT_SEED = 7;
1461
+ var DEFAULT_MAX_PREFIXES = 1e3;
1462
+ var DRIFT_RING_CAPACITY = 200;
1463
+ var RESOURCE_REFRESH_FRACTION = 0.9;
1464
+ function isRecord(value) {
1465
+ return typeof value === "object" && value !== null;
1466
+ }
1467
+ function isResourceRecordJSON(value) {
1468
+ return isRecord(value) && typeof value.key === "string" && typeof value.provider === "string" && typeof value.ttlSeconds === "number" && typeof value.lastWriteAt === "number";
1469
+ }
1470
+ function isDriftReport(value) {
1471
+ return isRecord(value) && (value.agentId === void 0 || typeof value.agentId === "string") && typeof value.prefixKey === "string" && typeof value.previousKey === "string" && Array.isArray(value.changedSegmentIds) && value.changedSegmentIds.every((id) => typeof id === "string") && typeof value.invalidatedTokens === "number" && typeof value.timestamp === "number";
1472
+ }
1473
+ function requireCount(value, field) {
1474
+ if (typeof value !== "number" || !Number.isFinite(value) || value < 0) {
1475
+ throw RacsError.invalid(
1476
+ `${field} must be a finite non-negative number, received ${String(value)}.`
1477
+ );
1478
+ }
1479
+ }
1480
+ function validatePlanInput(input) {
1481
+ if (!isRecord(input)) {
1482
+ throw RacsError.invalid(`PlanInput must be an object, received ${typeof input}.`);
1483
+ }
1484
+ if (typeof input.model !== "string" || input.model === "") {
1485
+ throw RacsError.invalid("PlanInput.model must be a non-empty model identifier string.");
1486
+ }
1487
+ if (!Array.isArray(input.segments) || input.segments.length === 0) {
1488
+ throw RacsError.invalid("PlanInput.segments must be a non-empty array of prompt segments.");
1489
+ }
1490
+ const seen = /* @__PURE__ */ new Set();
1491
+ for (const segment of input.segments) {
1492
+ if (typeof segment.id !== "string" || segment.id === "") {
1493
+ throw RacsError.invalid("Every segment needs a non-empty string id unique within the plan.");
1494
+ }
1495
+ if (seen.has(segment.id)) {
1496
+ throw RacsError.invalid(
1497
+ `Segment id '${segment.id}' appears more than once, ids must be unique within one plan.`
1498
+ );
1499
+ }
1500
+ seen.add(segment.id);
1501
+ if (typeof segment.content !== "string" && (typeof segment.contentHash !== "string" || segment.contentHash === "")) {
1502
+ throw RacsError.invalid(
1503
+ `Segment '${segment.id}' carries neither content nor contentHash, provide at least one so the segment can be keyed.`
1504
+ );
1505
+ }
1506
+ if (segment.tokens !== void 0) {
1507
+ requireCount(segment.tokens, `Segment '${segment.id}' tokens`);
1508
+ }
1509
+ }
1510
+ }
1511
+ function validateUsage(usage) {
1512
+ if (!isRecord(usage)) {
1513
+ throw RacsError.invalid(`CacheUsage must be an object, received ${typeof usage}.`);
1514
+ }
1515
+ const provider = usage.provider;
1516
+ if (typeof provider !== "string" || provider === "") {
1517
+ throw RacsError.invalid("CacheUsage.provider must be a non-empty provider id string.");
1518
+ }
1519
+ if (typeof usage.model !== "string" || usage.model === "") {
1520
+ throw RacsError.invalid("CacheUsage.model must be a non-empty model identifier string.");
1521
+ }
1522
+ requireCount(usage.inputTokens, "CacheUsage.inputTokens");
1523
+ requireCount(usage.cacheReadTokens, "CacheUsage.cacheReadTokens");
1524
+ if (usage.cacheWriteTokens5m !== void 0) {
1525
+ requireCount(usage.cacheWriteTokens5m, "CacheUsage.cacheWriteTokens5m");
1526
+ }
1527
+ if (usage.cacheWriteTokens1h !== void 0) {
1528
+ requireCount(usage.cacheWriteTokens1h, "CacheUsage.cacheWriteTokens1h");
1529
+ }
1530
+ if (usage.timestamp !== void 0) {
1531
+ requireCount(usage.timestamp, "CacheUsage.timestamp");
1532
+ }
1533
+ }
1534
+ function hashOf(segment) {
1535
+ if (typeof segment.contentHash === "string" && segment.contentHash !== "") {
1536
+ return segment.contentHash;
1537
+ }
1538
+ return fnv1a64(segment.content ?? "");
1539
+ }
1540
+ var RacsEngine = class {
1541
+ profiles;
1542
+ pricing;
1543
+ maxPrefixes;
1544
+ clock;
1545
+ salt;
1546
+ state;
1547
+ analyzer = new PrefixAnalyzer();
1548
+ planner = new Planner();
1549
+ /** Replaced wholesale when a persisted section restores, hence not readonly. */
1550
+ ledger;
1551
+ fingerprints;
1552
+ keeper;
1553
+ /** Live resource-family caches by resource key, the planner's `knownResource` source. */
1554
+ resources = /* @__PURE__ */ new Map();
1555
+ /** Chronological drift ring, oldest first, newest last, capacity {@link DRIFT_RING_CAPACITY}. */
1556
+ driftRing = [];
1557
+ /**
1558
+ * Every prefix key registered for keeper and resource bookkeeping, capped at
1559
+ * maxPrefixes, each mapped to its provider so {@link RACS.invalidate} can clear by
1560
+ * provider, the shape credential rotation needs.
1561
+ */
1562
+ prefixKeys = /* @__PURE__ */ new Map();
1563
+ listeners = [];
1564
+ planCounter = 0;
1565
+ /** Resolves when the state backend finished restoring, awaited by flush to avoid races. */
1566
+ restored;
1567
+ constructor(options = {}) {
1568
+ this.profiles = options.profiles;
1569
+ this.pricing = options.pricing;
1570
+ this.maxPrefixes = options.maxPrefixes ?? DEFAULT_MAX_PREFIXES;
1571
+ this.clock = options.clock ?? (() => Date.now());
1572
+ this.salt = String(options.seed ?? DEFAULT_SEED);
1573
+ this.state = options.state;
1574
+ this.ledger = new Ledger(this.pricing, this.maxPrefixes);
1575
+ this.fingerprints = new Fingerprints(this.maxPrefixes);
1576
+ this.keeper = new TtlKeeper(this.maxPrefixes);
1577
+ this.restored = this.state === void 0 ? Promise.resolve() : this.restore(this.state);
1578
+ }
1579
+ plan(input) {
1580
+ validatePlanInput(input);
1581
+ const profile = resolveProfile(input.provider, this.profiles);
1582
+ const now = this.clock();
1583
+ const segmentHashes = /* @__PURE__ */ new Map();
1584
+ for (const segment of input.segments) {
1585
+ segmentHashes.set(segment.id, hashOf(segment));
1586
+ }
1587
+ const analysis = this.analyzer.analyze(input, profile);
1588
+ const stableHashes = [];
1589
+ for (const segment of input.segments.slice(0, analysis.orderedStableBoundary)) {
1590
+ stableHashes.push(segmentHashes.get(segment.id) ?? "");
1591
+ }
1592
+ const prefixKey = combineKeys([
1593
+ ...stableHashes,
1594
+ input.provider,
1595
+ input.model,
1596
+ input.agentId ?? ""
1597
+ ]);
1598
+ let knownResource = false;
1599
+ if (profile.family === "resource") {
1600
+ const record = this.resources.get(prefixKey);
1601
+ if (record !== void 0) {
1602
+ if (now >= record.lastWriteAt + record.ttlSeconds * 1e3) {
1603
+ this.resources.delete(prefixKey);
1604
+ } else {
1605
+ knownResource = true;
1606
+ }
1607
+ }
1608
+ }
1609
+ const result = this.planner.plan(
1610
+ input,
1611
+ profile,
1612
+ analysis,
1613
+ prefixKey,
1614
+ this.pricing?.[input.model],
1615
+ knownResource
1616
+ );
1617
+ const directives = result.directives.map((directive) => {
1618
+ if (directive.kind !== "resource" || directive.action !== "reuse") {
1619
+ return directive;
1620
+ }
1621
+ const record = this.resources.get(directive.resourceKey);
1622
+ if (record !== void 0 && now >= record.lastWriteAt + RESOURCE_REFRESH_FRACTION * record.ttlSeconds * 1e3) {
1623
+ return { ...directive, action: "refresh" };
1624
+ }
1625
+ return directive;
1626
+ });
1627
+ this.planCounter += 1;
1628
+ const plan = {
1629
+ planId: `rx-${this.planCounter}-${shortId(this.planCounter, this.salt)}`,
1630
+ provider: input.provider,
1631
+ model: input.model,
1632
+ family: profile.family,
1633
+ prefixKey,
1634
+ stableTokens: analysis.stableTokens,
1635
+ totalTokens: analysis.totalTokens,
1636
+ directives,
1637
+ findings: [...analysis.findings, ...result.extraFindings],
1638
+ ...result.breakEven !== void 0 ? { breakEven: result.breakEven } : {},
1639
+ reasoning: result.reasoning
1640
+ };
1641
+ const report = this.fingerprints.observe(
1642
+ input,
1643
+ prefixKey,
1644
+ segmentHashes,
1645
+ analysis.stableTokens,
1646
+ now
1647
+ );
1648
+ if (report !== void 0) {
1649
+ this.pushDrift(report);
1650
+ this.emit({ type: "prefix.drifted", report });
1651
+ }
1652
+ if (!this.prefixKeys.has(prefixKey) && this.prefixKeys.size >= this.maxPrefixes) {
1653
+ this.emit({
1654
+ type: "limit.reached",
1655
+ scope: "prefixes",
1656
+ detail: `The ${this.maxPrefixes}-prefix cap is reached, plan for new prefix '${prefixKey}' was served without keep-warm tracking or resource bookkeeping.`,
1657
+ timestamp: now
1658
+ });
1659
+ } else {
1660
+ this.prefixKeys.set(prefixKey, input.provider);
1661
+ this.keeper.track(plan, now);
1662
+ this.applyResourceDirectives(input.provider, directives, now);
1663
+ }
1664
+ this.emit({ type: "plan.created", plan, timestamp: now });
1665
+ return plan;
1666
+ }
1667
+ lint(input) {
1668
+ validatePlanInput(input);
1669
+ const profile = resolveProfile(input.provider, this.profiles);
1670
+ return this.analyzer.analyze(input, profile).findings;
1671
+ }
1672
+ record(usage) {
1673
+ validateUsage(usage);
1674
+ const timestamp = usage.timestamp ?? this.clock();
1675
+ const stamped = usage.timestamp !== void 0 ? usage : { ...usage, timestamp };
1676
+ const { hit, evicted } = this.ledger.record(stamped);
1677
+ this.emit({ type: "usage.recorded", usage: stamped, hit, timestamp });
1678
+ if (evicted !== void 0) {
1679
+ this.emit({
1680
+ type: "limit.reached",
1681
+ scope: "ledger",
1682
+ detail: `The ledger evicted least-recently-used aggregate '${evicted}' to stay within its cap.`,
1683
+ timestamp
1684
+ });
1685
+ }
1686
+ }
1687
+ stats(filter) {
1688
+ return this.ledger.stats(filter);
1689
+ }
1690
+ schedule(now) {
1691
+ const at = now ?? this.clock();
1692
+ const due = this.keeper.due(at);
1693
+ for (const entry of due) {
1694
+ this.emit({ type: "refresh.due", entry, timestamp: at });
1695
+ }
1696
+ return due;
1697
+ }
1698
+ markRefreshed(prefixKey, now) {
1699
+ this.keeper.markRefreshed(prefixKey, now ?? this.clock());
1700
+ }
1701
+ drifts(limit) {
1702
+ if (limit === void 0) {
1703
+ return [...this.driftRing];
1704
+ }
1705
+ if (limit <= 0) {
1706
+ return [];
1707
+ }
1708
+ return this.driftRing.slice(-limit);
1709
+ }
1710
+ invalidate(filter) {
1711
+ const now = this.clock();
1712
+ const keyFilter = filter?.prefixKey;
1713
+ const providerFilter = filter?.provider;
1714
+ const matchesKey = (key) => keyFilter === void 0 || key === keyFilter;
1715
+ const matched = /* @__PURE__ */ new Set();
1716
+ for (const [key, provider] of this.prefixKeys) {
1717
+ if (matchesKey(key) && (providerFilter === void 0 || provider === providerFilter)) {
1718
+ matched.add(key);
1719
+ }
1720
+ }
1721
+ for (const [key, record] of this.resources) {
1722
+ if (matchesKey(key) && (providerFilter === void 0 || record.provider === providerFilter)) {
1723
+ matched.add(key);
1724
+ }
1725
+ }
1726
+ const pruned = this.fingerprints.prune(
1727
+ (key) => providerFilter === void 0 ? matchesKey(key) : matched.has(key)
1728
+ );
1729
+ for (const key of pruned) {
1730
+ matched.add(key);
1731
+ }
1732
+ for (const key of matched) {
1733
+ this.prefixKeys.delete(key);
1734
+ this.keeper.remove(key);
1735
+ const record = this.resources.get(key);
1736
+ if (record !== void 0) {
1737
+ this.resources.delete(key);
1738
+ this.emit({
1739
+ type: "resource.action",
1740
+ directive: {
1741
+ kind: "resource",
1742
+ action: "delete",
1743
+ resourceKey: key,
1744
+ ttlSeconds: record.ttlSeconds
1745
+ },
1746
+ timestamp: now
1747
+ });
1748
+ }
1749
+ }
1750
+ return matched.size;
1751
+ }
1752
+ profileOf(provider) {
1753
+ return resolveProfile(provider, this.profiles);
1754
+ }
1755
+ on(listener) {
1756
+ this.listeners.push(listener);
1757
+ return () => {
1758
+ const index = this.listeners.indexOf(listener);
1759
+ if (index !== -1) {
1760
+ this.listeners.splice(index, 1);
1761
+ }
1762
+ };
1763
+ }
1764
+ async flush() {
1765
+ if (this.state === void 0) {
1766
+ return;
1767
+ }
1768
+ await this.restored;
1769
+ const resources = [];
1770
+ for (const [key, record] of this.resources) {
1771
+ resources.push({
1772
+ key,
1773
+ provider: record.provider,
1774
+ ttlSeconds: record.ttlSeconds,
1775
+ lastWriteAt: record.lastWriteAt
1776
+ });
1777
+ }
1778
+ const snapshot = {
1779
+ version: 1,
1780
+ savedAt: this.clock(),
1781
+ data: {
1782
+ ledger: this.ledger.toJSON(),
1783
+ fingerprints: this.fingerprints.toJSON(),
1784
+ keeper: this.keeper.toJSON(),
1785
+ resources,
1786
+ drifts: [...this.driftRing]
1787
+ }
1788
+ };
1789
+ await this.state.save(snapshot);
1790
+ }
1791
+ async close() {
1792
+ await this.flush();
1793
+ this.listeners.length = 0;
1794
+ }
1795
+ /** Appends one drift report, dropping the oldest beyond {@link DRIFT_RING_CAPACITY}. */
1796
+ pushDrift(report) {
1797
+ this.driftRing.push(report);
1798
+ if (this.driftRing.length > DRIFT_RING_CAPACITY) {
1799
+ this.driftRing.shift();
1800
+ }
1801
+ }
1802
+ /**
1803
+ * Mirrors resource directives into the registry and telemetry: create and refresh start
1804
+ * a new TTL window, delete drops the record, reuse leaves the window untouched because
1805
+ * reading a resource does not rewrite its server-side TTL.
1806
+ */
1807
+ applyResourceDirectives(provider, directives, now) {
1808
+ for (const directive of directives) {
1809
+ if (directive.kind !== "resource") {
1810
+ continue;
1811
+ }
1812
+ if (directive.action === "delete") {
1813
+ this.resources.delete(directive.resourceKey);
1814
+ } else if (directive.action === "create" || directive.action === "refresh") {
1815
+ this.resources.set(directive.resourceKey, {
1816
+ provider,
1817
+ ttlSeconds: directive.ttlSeconds,
1818
+ lastWriteAt: now
1819
+ });
1820
+ }
1821
+ this.emit({ type: "resource.action", directive, timestamp: now });
1822
+ }
1823
+ }
1824
+ /** Synchronous fan-out over a copy of the listener list, exceptions swallowed. */
1825
+ emit(event) {
1826
+ for (const listener of [...this.listeners]) {
1827
+ try {
1828
+ listener(event);
1829
+ } catch {
1830
+ }
1831
+ }
1832
+ }
1833
+ /**
1834
+ * Defensive startup restore: each snapshot section is applied inside its own try/catch
1835
+ * with structural checks first, so one corrupt section never poisons the others and a
1836
+ * corrupt snapshot degrades to a fresh engine, never to a crash.
1837
+ */
1838
+ async restore(state) {
1839
+ let snapshot;
1840
+ try {
1841
+ snapshot = await state.load();
1842
+ } catch {
1843
+ return;
1844
+ }
1845
+ if (snapshot === void 0) {
1846
+ return;
1847
+ }
1848
+ const data = snapshot.data;
1849
+ try {
1850
+ const section = data.ledger;
1851
+ if (isRecord(section) && typeof section.maxPrefixes === "number" && Array.isArray(section.entries)) {
1852
+ this.ledger = Ledger.fromJSON(
1853
+ { maxPrefixes: section.maxPrefixes, entries: section.entries },
1854
+ this.pricing
1855
+ );
1856
+ }
1857
+ } catch {
1858
+ }
1859
+ try {
1860
+ const section = data.fingerprints;
1861
+ if (isRecord(section) && typeof section.capacity === "number" && Array.isArray(section.entries)) {
1862
+ this.fingerprints = Fingerprints.fromJSON({
1863
+ capacity: section.capacity,
1864
+ entries: section.entries
1865
+ });
1866
+ }
1867
+ } catch {
1868
+ }
1869
+ try {
1870
+ const section = data.keeper;
1871
+ if (isRecord(section) && typeof section.capacity === "number" && Array.isArray(section.entries)) {
1872
+ this.keeper = TtlKeeper.fromJSON({ capacity: section.capacity, entries: section.entries });
1873
+ }
1874
+ } catch {
1875
+ }
1876
+ try {
1877
+ const section = data.resources;
1878
+ if (Array.isArray(section)) {
1879
+ for (const item of section) {
1880
+ if (isResourceRecordJSON(item)) {
1881
+ this.resources.set(item.key, {
1882
+ provider: item.provider,
1883
+ ttlSeconds: item.ttlSeconds,
1884
+ lastWriteAt: item.lastWriteAt
1885
+ });
1886
+ }
1887
+ }
1888
+ }
1889
+ } catch {
1890
+ }
1891
+ try {
1892
+ const section = data.drifts;
1893
+ if (Array.isArray(section)) {
1894
+ const reports = [];
1895
+ for (const item of section) {
1896
+ if (isDriftReport(item)) {
1897
+ reports.push(item);
1898
+ }
1899
+ }
1900
+ this.driftRing.unshift(...reports.slice(-DRIFT_RING_CAPACITY));
1901
+ while (this.driftRing.length > DRIFT_RING_CAPACITY) {
1902
+ this.driftRing.shift();
1903
+ }
1904
+ }
1905
+ } catch {
1906
+ }
1907
+ for (const entry of this.keeper.toJSON().entries) {
1908
+ this.prefixKeys.set(entry.prefixKey, entry.provider);
1909
+ }
1910
+ for (const [key, record] of this.resources) {
1911
+ this.prefixKeys.set(key, record.provider);
1912
+ }
1913
+ }
1914
+ };
1915
+ function createRACS(options = {}) {
1916
+ return new RacsEngine(options);
1917
+ }
1918
+
1919
+ // src/state/file.ts
1920
+ function hasErrnoCode(error, code) {
1921
+ return typeof error === "object" && error !== null && "code" in error && error.code === code;
1922
+ }
1923
+ function describe(error) {
1924
+ return error instanceof Error ? error.message : String(error);
1925
+ }
1926
+ function parseSnapshot(text, source) {
1927
+ let parsed;
1928
+ try {
1929
+ parsed = JSON.parse(text);
1930
+ } catch (error) {
1931
+ throw new RacsError(
1932
+ `RACS state at ${source} is not valid JSON: ${describe(error)}`,
1933
+ "ERR_STATE_LOAD"
1934
+ );
1935
+ }
1936
+ if (typeof parsed !== "object" || parsed === null) {
1937
+ throw new RacsError(
1938
+ `RACS state at ${source} is not a snapshot object, found ${typeof parsed}.`,
1939
+ "ERR_STATE_LOAD"
1940
+ );
1941
+ }
1942
+ const record = parsed;
1943
+ if (record.version !== 1) {
1944
+ throw new RacsError(
1945
+ `RACS state at ${source} has unsupported snapshot version ${String(record.version)}, expected 1.`,
1946
+ "ERR_STATE_VERSION"
1947
+ );
1948
+ }
1949
+ const savedAt = record.savedAt;
1950
+ const data = record.data;
1951
+ if (typeof savedAt !== "number" || typeof data !== "object" || data === null) {
1952
+ throw new RacsError(
1953
+ `RACS state at ${source} is missing the savedAt or data field of a version 1 snapshot.`,
1954
+ "ERR_STATE_LOAD"
1955
+ );
1956
+ }
1957
+ return { version: 1, savedAt, data };
1958
+ }
1959
+ function fileState(options) {
1960
+ const { path } = options;
1961
+ return {
1962
+ async load() {
1963
+ const fs = await import('node:fs/promises');
1964
+ let text;
1965
+ try {
1966
+ text = await fs.readFile(path, "utf8");
1967
+ } catch (error) {
1968
+ if (hasErrnoCode(error, "ENOENT")) {
1969
+ return void 0;
1970
+ }
1971
+ throw new RacsError(
1972
+ `Failed to read RACS state from ${path}: ${describe(error)}`,
1973
+ "ERR_STATE_LOAD"
1974
+ );
1975
+ }
1976
+ return parseSnapshot(text, path);
1977
+ },
1978
+ async save(snapshot) {
1979
+ const fs = await import('node:fs/promises');
1980
+ const { dirname } = await import('node:path');
1981
+ await fs.mkdir(dirname(path), { recursive: true });
1982
+ const tmpPath = `${path}.tmp`;
1983
+ await fs.writeFile(tmpPath, JSON.stringify(snapshot), "utf8");
1984
+ await fs.rename(tmpPath, path);
1985
+ }
1986
+ };
1987
+ }
1988
+
1989
+ // src/state/kv.ts
1990
+ function describe2(error) {
1991
+ return error instanceof Error ? error.message : String(error);
1992
+ }
1993
+ function parseSnapshot2(text, source) {
1994
+ let parsed;
1995
+ try {
1996
+ parsed = JSON.parse(text);
1997
+ } catch (error) {
1998
+ throw new RacsError(
1999
+ `RACS state at ${source} is not valid JSON: ${describe2(error)}`,
2000
+ "ERR_STATE_LOAD"
2001
+ );
2002
+ }
2003
+ if (typeof parsed !== "object" || parsed === null) {
2004
+ throw new RacsError(
2005
+ `RACS state at ${source} is not a snapshot object, found ${typeof parsed}.`,
2006
+ "ERR_STATE_LOAD"
2007
+ );
2008
+ }
2009
+ const record = parsed;
2010
+ if (record.version !== 1) {
2011
+ throw new RacsError(
2012
+ `RACS state at ${source} has unsupported snapshot version ${String(record.version)}, expected 1.`,
2013
+ "ERR_STATE_VERSION"
2014
+ );
2015
+ }
2016
+ const savedAt = record.savedAt;
2017
+ const data = record.data;
2018
+ if (typeof savedAt !== "number" || typeof data !== "object" || data === null) {
2019
+ throw new RacsError(
2020
+ `RACS state at ${source} is missing the savedAt or data field of a version 1 snapshot.`,
2021
+ "ERR_STATE_LOAD"
2022
+ );
2023
+ }
2024
+ return { version: 1, savedAt, data };
2025
+ }
2026
+ function kvState(kv, key = "racs:state") {
2027
+ return {
2028
+ async load() {
2029
+ const raw = await kv.get(key);
2030
+ if (raw === void 0 || raw === null) {
2031
+ return void 0;
2032
+ }
2033
+ return parseSnapshot2(raw, `kv key "${key}"`);
2034
+ },
2035
+ async save(snapshot) {
2036
+ await kv.set(key, JSON.stringify(snapshot));
2037
+ }
2038
+ };
2039
+ }
2040
+
2041
+ // src/state/memory.ts
2042
+ function memoryState() {
2043
+ let snapshot;
2044
+ return {
2045
+ load() {
2046
+ return Promise.resolve(snapshot);
2047
+ },
2048
+ save(next) {
2049
+ snapshot = next;
2050
+ return Promise.resolve();
2051
+ }
2052
+ };
2053
+ }
2054
+
2055
+ export { Ledger, PROVIDER_PROFILES, Planner, PrefixAnalyzer, RacsError, combineKeys, createRACS, estimateTokens, fileState, fnv1a64, kvState, memoryState, resolveProfile };
2056
+ //# sourceMappingURL=index.js.map
2057
+ //# sourceMappingURL=index.js.map