npm - ai-lcr - Versions diffs - 0.5.2 → 0.5.3 - Mend

ai-lcr 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/CHANGELOG.md CHANGED Viewed

@@ -4,6 +4,27 @@ All notable changes to `ai-lcr` are documented here. The format follows
 [Keep a Changelog](https://keepachangelog.com/), and the project adheres to
 [Semantic Versioning](https://semver.org/).
+## [0.5.3] — 2026-06-03
+All additions are optional and backward compatible.
+### Added
+- **`defaultCacheReadRatio` — chain-wide fallback price for prompt-cache reads.**
+  ai-lcr already detects cache hits from the provider's reported usage and emits
+  `cachedInputTokens` for any provider that reports them (Anthropic, Gemini's
+  implicit cache, DeepSeek, …). But the *saving* (`cachedSavingUsd`) and the
+  cache-discounted `costUsd` were only computed when a leg set an explicit
+  `cost.cacheRead` — so a route that forgot it (e.g. a Gemini OpenRouter leg)
+  silently reported `$0` saved and billed cached tokens at the full input rate.
+  `createLCR({ defaultCacheReadRatio: 0.1 })` now supplies a fallback cache-read
+  price as a fraction of each leg's `input`, applied **only** to legs that omit
+  an explicit `cacheRead`. Most providers' cache-read price is ~0.1× input, so
+  `0.1` makes cache cost + savings "just work" across every model without each
+  route hardcoding a rate. Legs with their own `cacheRead` are untouched (set it
+  for outliers like OpenAI's ~0.5×). Unset = previous behavior. Must be in [0, 1].
 ## [0.5.0] — 2026-06-02
 All additions are optional and backward compatible.

package/dist/index.cjs CHANGED Viewed

@@ -1224,11 +1224,20 @@ function normalize(entry) {
 function priceKey(p) {
   return p.cost ? p.cost.input + p.cost.output : Number.POSITIVE_INFINITY;
 }
+function withDefaultCacheRead(p, ratio) {
+  if (ratio === void 0 || !p.cost || p.cost.cacheRead !== void 0) return p;
+  return { ...p, cost: { ...p.cost, cacheRead: p.cost.input * ratio } };
+}
 function createLCR(config) {
-  const { models, autoSort = false, resetIntervalMs, onError, onCost, onCall } = config;
+  const { models, autoSort = false, resetIntervalMs, onError, onCost, onCall, defaultCacheReadRatio } = config;
+  if (defaultCacheReadRatio !== void 0 && (defaultCacheReadRatio < 0 || defaultCacheReadRatio > 1)) {
+    throw new Error(
+      `ai-lcr: defaultCacheReadRatio must be in [0, 1], got ${defaultCacheReadRatio}`
+    );
+  }
   const routed = /* @__PURE__ */ new Map();
   for (const [name, entries] of Object.entries(models)) {
-    let providers = entries.map(normalize);
+    let providers = entries.map(normalize).map((p) => withDefaultCacheRead(p, defaultCacheReadRatio));
     if (autoSort) {
       providers = [...providers].sort((a, b) => priceKey(a) - priceKey(b));
     }

package/dist/index.d.cts CHANGED Viewed

@@ -589,6 +589,21 @@ interface LCRConfig {
      * you. Pair with `formatCallRecord` for a one-line log. See {@link CallRecord}.
      */
     onCall?: (record: CallRecord) => void;
+    /**
+     * Fallback prompt-cache read rate, as a fraction of each leg's `input` price,
+     * applied ONLY to legs whose `cost` omits an explicit `cacheRead`. So a leg
+     * priced `{ input: 0.5, output: 3 }` with `defaultCacheReadRatio: 0.1` bills
+     * its cached input tokens at `0.05`/1M and reports the resulting
+     * `cachedSavingUsd` — without every route having to hardcode `cacheRead`.
+     *
+     * Most providers' cache-read price is ~0.1× input (Anthropic, Gemini, DeepSeek);
+     * `0.1` is a sane default. Legs with their own `cacheRead` are untouched, so set
+     * it explicitly for outliers (e.g. OpenAI's ~0.5×). Unset = pre-existing
+     * behavior: cached tokens bill at the full input rate and save nothing.
+     * Caching is detected from the provider's reported usage either way; this only
+     * controls the *price* applied to it. Must be in [0, 1].
+     */
+    defaultCacheReadRatio?: number;
 }
 /** Resolve a logical model name to a routed model. */
 type LCRRouter = (modelName: string) => LanguageModelV3;

package/dist/index.d.ts CHANGED Viewed

@@ -589,6 +589,21 @@ interface LCRConfig {
      * you. Pair with `formatCallRecord` for a one-line log. See {@link CallRecord}.
      */
     onCall?: (record: CallRecord) => void;
+    /**
+     * Fallback prompt-cache read rate, as a fraction of each leg's `input` price,
+     * applied ONLY to legs whose `cost` omits an explicit `cacheRead`. So a leg
+     * priced `{ input: 0.5, output: 3 }` with `defaultCacheReadRatio: 0.1` bills
+     * its cached input tokens at `0.05`/1M and reports the resulting
+     * `cachedSavingUsd` — without every route having to hardcode `cacheRead`.
+     *
+     * Most providers' cache-read price is ~0.1× input (Anthropic, Gemini, DeepSeek);
+     * `0.1` is a sane default. Legs with their own `cacheRead` are untouched, so set
+     * it explicitly for outliers (e.g. OpenAI's ~0.5×). Unset = pre-existing
+     * behavior: cached tokens bill at the full input rate and save nothing.
+     * Caching is detected from the provider's reported usage either way; this only
+     * controls the *price* applied to it. Must be in [0, 1].
+     */
+    defaultCacheReadRatio?: number;
 }
 /** Resolve a logical model name to a routed model. */
 type LCRRouter = (modelName: string) => LanguageModelV3;

package/dist/index.js CHANGED Viewed

@@ -1182,11 +1182,20 @@ function normalize(entry) {
 function priceKey(p) {
   return p.cost ? p.cost.input + p.cost.output : Number.POSITIVE_INFINITY;
 }
+function withDefaultCacheRead(p, ratio) {
+  if (ratio === void 0 || !p.cost || p.cost.cacheRead !== void 0) return p;
+  return { ...p, cost: { ...p.cost, cacheRead: p.cost.input * ratio } };
+}
 function createLCR(config) {
-  const { models, autoSort = false, resetIntervalMs, onError, onCost, onCall } = config;
+  const { models, autoSort = false, resetIntervalMs, onError, onCost, onCall, defaultCacheReadRatio } = config;
+  if (defaultCacheReadRatio !== void 0 && (defaultCacheReadRatio < 0 || defaultCacheReadRatio > 1)) {
+    throw new Error(
+      `ai-lcr: defaultCacheReadRatio must be in [0, 1], got ${defaultCacheReadRatio}`
+    );
+  }
   const routed = /* @__PURE__ */ new Map();
   for (const [name, entries] of Object.entries(models)) {
-    let providers = entries.map(normalize);
+    let providers = entries.map(normalize).map((p) => withDefaultCacheRead(p, defaultCacheReadRatio));
     if (autoSort) {
       providers = [...providers].sort((a, b) => priceKey(a) - priceKey(b));
     }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "ai-lcr",
-  "version": "0.5.2",
+  "version": "0.5.3",
   "description": "Least Cost Routing for LLMs — route every model call to the cheapest available provider, fall back automatically, and track real cost. Built for the Vercel AI SDK.",
   "keywords": [
     "ai",