ai-lcr 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -4,6 +4,27 @@ All notable changes to `ai-lcr` are documented here. The format follows
4
4
  [Keep a Changelog](https://keepachangelog.com/), and the project adheres to
5
5
  [Semantic Versioning](https://semver.org/).
6
6
 
7
+ ## [0.5.3] — 2026-06-03
8
+
9
+ All additions are optional and backward compatible.
10
+
11
+ ### Added
12
+
13
+ - **`defaultCacheReadRatio` — chain-wide fallback price for prompt-cache reads.**
14
+ ai-lcr already detects cache hits from the provider's reported usage and emits
15
+ `cachedInputTokens` for any provider that reports them (Anthropic, Gemini's
16
+ implicit cache, DeepSeek, …). But the *saving* (`cachedSavingUsd`) and the
17
+ cache-discounted `costUsd` were only computed when a leg set an explicit
18
+ `cost.cacheRead` — so a route that forgot it (e.g. a Gemini OpenRouter leg)
19
+ silently reported `$0` saved and billed cached tokens at the full input rate.
20
+
21
+ `createLCR({ defaultCacheReadRatio: 0.1 })` now supplies a fallback cache-read
22
+ price as a fraction of each leg's `input`, applied **only** to legs that omit
23
+ an explicit `cacheRead`. Most providers' cache-read price is ~0.1× input, so
24
+ `0.1` makes cache cost + savings "just work" across every model without each
25
+ route hardcoding a rate. Legs with their own `cacheRead` are untouched (set it
26
+ for outliers like OpenAI's ~0.5×). Unset = previous behavior. Must be in [0, 1].
27
+
7
28
  ## [0.5.0] — 2026-06-02
8
29
 
9
30
  All additions are optional and backward compatible.
package/dist/index.cjs CHANGED
@@ -1224,11 +1224,20 @@ function normalize(entry) {
1224
1224
  function priceKey(p) {
1225
1225
  return p.cost ? p.cost.input + p.cost.output : Number.POSITIVE_INFINITY;
1226
1226
  }
1227
+ function withDefaultCacheRead(p, ratio) {
1228
+ if (ratio === void 0 || !p.cost || p.cost.cacheRead !== void 0) return p;
1229
+ return { ...p, cost: { ...p.cost, cacheRead: p.cost.input * ratio } };
1230
+ }
1227
1231
  function createLCR(config) {
1228
- const { models, autoSort = false, resetIntervalMs, onError, onCost, onCall } = config;
1232
+ const { models, autoSort = false, resetIntervalMs, onError, onCost, onCall, defaultCacheReadRatio } = config;
1233
+ if (defaultCacheReadRatio !== void 0 && (defaultCacheReadRatio < 0 || defaultCacheReadRatio > 1)) {
1234
+ throw new Error(
1235
+ `ai-lcr: defaultCacheReadRatio must be in [0, 1], got ${defaultCacheReadRatio}`
1236
+ );
1237
+ }
1229
1238
  const routed = /* @__PURE__ */ new Map();
1230
1239
  for (const [name, entries] of Object.entries(models)) {
1231
- let providers = entries.map(normalize);
1240
+ let providers = entries.map(normalize).map((p) => withDefaultCacheRead(p, defaultCacheReadRatio));
1232
1241
  if (autoSort) {
1233
1242
  providers = [...providers].sort((a, b) => priceKey(a) - priceKey(b));
1234
1243
  }
package/dist/index.d.cts CHANGED
@@ -589,6 +589,21 @@ interface LCRConfig {
589
589
  * you. Pair with `formatCallRecord` for a one-line log. See {@link CallRecord}.
590
590
  */
591
591
  onCall?: (record: CallRecord) => void;
592
+ /**
593
+ * Fallback prompt-cache read rate, as a fraction of each leg's `input` price,
594
+ * applied ONLY to legs whose `cost` omits an explicit `cacheRead`. So a leg
595
+ * priced `{ input: 0.5, output: 3 }` with `defaultCacheReadRatio: 0.1` bills
596
+ * its cached input tokens at `0.05`/1M and reports the resulting
597
+ * `cachedSavingUsd` — without every route having to hardcode `cacheRead`.
598
+ *
599
+ * Most providers' cache-read price is ~0.1× input (Anthropic, Gemini, DeepSeek);
600
+ * `0.1` is a sane default. Legs with their own `cacheRead` are untouched, so set
601
+ * it explicitly for outliers (e.g. OpenAI's ~0.5×). Unset = pre-existing
602
+ * behavior: cached tokens bill at the full input rate and save nothing.
603
+ * Caching is detected from the provider's reported usage either way; this only
604
+ * controls the *price* applied to it. Must be in [0, 1].
605
+ */
606
+ defaultCacheReadRatio?: number;
592
607
  }
593
608
  /** Resolve a logical model name to a routed model. */
594
609
  type LCRRouter = (modelName: string) => LanguageModelV3;
package/dist/index.d.ts CHANGED
@@ -589,6 +589,21 @@ interface LCRConfig {
589
589
  * you. Pair with `formatCallRecord` for a one-line log. See {@link CallRecord}.
590
590
  */
591
591
  onCall?: (record: CallRecord) => void;
592
+ /**
593
+ * Fallback prompt-cache read rate, as a fraction of each leg's `input` price,
594
+ * applied ONLY to legs whose `cost` omits an explicit `cacheRead`. So a leg
595
+ * priced `{ input: 0.5, output: 3 }` with `defaultCacheReadRatio: 0.1` bills
596
+ * its cached input tokens at `0.05`/1M and reports the resulting
597
+ * `cachedSavingUsd` — without every route having to hardcode `cacheRead`.
598
+ *
599
+ * Most providers' cache-read price is ~0.1× input (Anthropic, Gemini, DeepSeek);
600
+ * `0.1` is a sane default. Legs with their own `cacheRead` are untouched, so set
601
+ * it explicitly for outliers (e.g. OpenAI's ~0.5×). Unset = pre-existing
602
+ * behavior: cached tokens bill at the full input rate and save nothing.
603
+ * Caching is detected from the provider's reported usage either way; this only
604
+ * controls the *price* applied to it. Must be in [0, 1].
605
+ */
606
+ defaultCacheReadRatio?: number;
592
607
  }
593
608
  /** Resolve a logical model name to a routed model. */
594
609
  type LCRRouter = (modelName: string) => LanguageModelV3;
package/dist/index.js CHANGED
@@ -1182,11 +1182,20 @@ function normalize(entry) {
1182
1182
  function priceKey(p) {
1183
1183
  return p.cost ? p.cost.input + p.cost.output : Number.POSITIVE_INFINITY;
1184
1184
  }
1185
+ function withDefaultCacheRead(p, ratio) {
1186
+ if (ratio === void 0 || !p.cost || p.cost.cacheRead !== void 0) return p;
1187
+ return { ...p, cost: { ...p.cost, cacheRead: p.cost.input * ratio } };
1188
+ }
1185
1189
  function createLCR(config) {
1186
- const { models, autoSort = false, resetIntervalMs, onError, onCost, onCall } = config;
1190
+ const { models, autoSort = false, resetIntervalMs, onError, onCost, onCall, defaultCacheReadRatio } = config;
1191
+ if (defaultCacheReadRatio !== void 0 && (defaultCacheReadRatio < 0 || defaultCacheReadRatio > 1)) {
1192
+ throw new Error(
1193
+ `ai-lcr: defaultCacheReadRatio must be in [0, 1], got ${defaultCacheReadRatio}`
1194
+ );
1195
+ }
1187
1196
  const routed = /* @__PURE__ */ new Map();
1188
1197
  for (const [name, entries] of Object.entries(models)) {
1189
- let providers = entries.map(normalize);
1198
+ let providers = entries.map(normalize).map((p) => withDefaultCacheRead(p, defaultCacheReadRatio));
1190
1199
  if (autoSort) {
1191
1200
  providers = [...providers].sort((a, b) => priceKey(a) - priceKey(b));
1192
1201
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ai-lcr",
3
- "version": "0.5.2",
3
+ "version": "0.5.3",
4
4
  "description": "Least Cost Routing for LLMs — route every model call to the cheapest available provider, fall back automatically, and track real cost. Built for the Vercel AI SDK.",
5
5
  "keywords": [
6
6
  "ai",