npm - ai-lcr - Versions diffs - 0.5.0 → 0.5.1 - Mend

ai-lcr 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.cjs CHANGED Viewed

@@ -194,6 +194,11 @@ function costForUsage(cost, inputTokens, outputTokens, cacheReadTokens) {
   const cachedRate = cost.cacheRead ?? cost.input;
   return fullInput / 1e6 * cost.input + cached / 1e6 * cachedRate + outputTokens / 1e6 * cost.output;
 }
+function cacheSavingForUsage(cost, inputTokens, cacheReadTokens) {
+  if (cost.cacheRead === void 0) return 0;
+  const cached = Math.min(Math.max(cacheReadTokens, 0), inputTokens);
+  return cached / 1e6 * (cost.input - cost.cacheRead);
+}
 function requestIdFrom(options) {
   const raw = options.providerOptions?.lcr?.requestId;
   return typeof raw === "string" && raw.length > 0 ? raw : void 0;
@@ -298,19 +303,22 @@ var LcrFallbackModel = class {
     });
   }
   /**
-   * Baseline = what this same usage would have cost on the most expensive
-   * *priced* provider in the chain (typically the OpenRouter fallback leg). The
-   * winner's savings is `baselineUsd - costUsd`. Undefined when no provider in
-   * the chain carries a price (nothing to compare against).
+   * Baseline = what this same usage would have cost on the always-on fallback:
+   * the LAST priced leg of the chain (by convention the list-price provider you'd
+   * use without routing — e.g. OpenRouter, always last). The winner's saving is
+   * `baselineUsd - costUsd`. We take the last priced leg, NOT the most expensive
+   * one: prompt caching can make a sticker-cheaper provider (no `cacheRead`) cost
+   * MORE on a cache-heavy call, and a max-of-chain baseline would then fabricate a
+   * "saving" even on calls the fallback itself served. Undefined when no provider
+   * in the chain carries a price (nothing to compare against).
    */
   baselineUsd(inputTokens, outputTokens, cacheReadTokens) {
-    let max;
+    let baseline;
     for (const p of this.opts.providers) {
       if (!p.cost) continue;
-      const c = costForUsage(p.cost, inputTokens, outputTokens, cacheReadTokens);
-      if (max === void 0 || c > max) max = c;
+      baseline = costForUsage(p.cost, inputTokens, outputTokens, cacheReadTokens);
     }
-    return max;
+    return baseline;
   }
   /** Winner settled: record the attempt, fire `onCost` (compat) + `onCall`. */
   finalizeOk(ctx, provider, attemptStart, usage, ttftMs) {
@@ -319,6 +327,7 @@ var LcrFallbackModel = class {
     const outputTokens = usage?.outputTokens?.total ?? 0;
     const cacheReadTokens = usage?.inputTokens?.cacheRead ?? 0;
     const costUsd = provider.cost ? costForUsage(provider.cost, inputTokens, outputTokens, cacheReadTokens) : 0;
+    const cachedSavingUsd = provider.cost ? cacheSavingForUsage(provider.cost, inputTokens, cacheReadTokens) : 0;
     const usageMissing = inputTokens === 0 && outputTokens === 0;
     this.emitCost({
       model: this.opts.modelName,
@@ -341,6 +350,7 @@ var LcrFallbackModel = class {
       ...cacheReadTokens > 0 ? { cachedInputTokens: cacheReadTokens } : {},
       costUsd,
       baselineUsd: this.baselineUsd(inputTokens, outputTokens, cacheReadTokens),
+      ...cachedSavingUsd > 0 ? { cachedSavingUsd } : {},
       ...ctx.requestId ? { requestId: ctx.requestId } : {},
       ...usageMissing ? { usageMissing: true } : {}
     });

package/dist/index.d.cts CHANGED Viewed

@@ -109,12 +109,25 @@ interface CallRecord {
     /** Computed from the winner's `cost`; 0 if no price was given or the call failed. */
     costUsd: number;
     /**
-     * What the same request would have cost on the most expensive *priced*
-     * provider in the chain, on identical token usage — the savings baseline
-     * (`baselineUsd - costUsd`). Set by both routers whenever at least one
-     * provider carries a `cost`; undefined only when no provider was priced.
+     * What this same usage would have cost on the savings baseline, so
+     * `baselineUsd - costUsd` is what routing actually saved. Text router: the
+     * always-on fallback leg — the LAST priced provider in the chain, i.e. the
+     * list-price provider you'd fall back to without routing (e.g. OpenRouter).
+     * Media router: the model-maker's official direct price. NOT the most
+     * expensive leg of the chain: prompt caching can make a sticker-cheaper
+     * provider cost more on a cache-heavy call, and a max-of-chain baseline would
+     * fabricate a "saving" on calls the fallback itself served. Undefined only
+     * when no provider was priced.
      */
     baselineUsd?: number;
+    /**
+     * The slice of `costUsd` that prompt-cache reads saved versus paying the full
+     * input rate for those same tokens (`cachedTokens × (input − cacheRead)`).
+     * Present only when > 0. This is the serving provider's own caching benefit —
+     * it happens with or without routing — so it is NOT a routing saving and must
+     * be surfaced separately, never folded into `baselineUsd - costUsd`.
+     */
+    cachedSavingUsd?: number;
     /**
      * Caller-supplied correlation id, read from `providerOptions.lcr.requestId`
      * on the call. Multi-step tool loops emit one record per `doStream`/

package/dist/index.d.ts CHANGED Viewed

@@ -109,12 +109,25 @@ interface CallRecord {
     /** Computed from the winner's `cost`; 0 if no price was given or the call failed. */
     costUsd: number;
     /**
-     * What the same request would have cost on the most expensive *priced*
-     * provider in the chain, on identical token usage — the savings baseline
-     * (`baselineUsd - costUsd`). Set by both routers whenever at least one
-     * provider carries a `cost`; undefined only when no provider was priced.
+     * What this same usage would have cost on the savings baseline, so
+     * `baselineUsd - costUsd` is what routing actually saved. Text router: the
+     * always-on fallback leg — the LAST priced provider in the chain, i.e. the
+     * list-price provider you'd fall back to without routing (e.g. OpenRouter).
+     * Media router: the model-maker's official direct price. NOT the most
+     * expensive leg of the chain: prompt caching can make a sticker-cheaper
+     * provider cost more on a cache-heavy call, and a max-of-chain baseline would
+     * fabricate a "saving" on calls the fallback itself served. Undefined only
+     * when no provider was priced.
      */
     baselineUsd?: number;
+    /**
+     * The slice of `costUsd` that prompt-cache reads saved versus paying the full
+     * input rate for those same tokens (`cachedTokens × (input − cacheRead)`).
+     * Present only when > 0. This is the serving provider's own caching benefit —
+     * it happens with or without routing — so it is NOT a routing saving and must
+     * be surfaced separately, never folded into `baselineUsd - costUsd`.
+     */
+    cachedSavingUsd?: number;
     /**
      * Caller-supplied correlation id, read from `providerOptions.lcr.requestId`
      * on the call. Multi-step tool loops emit one record per `doStream`/

package/dist/index.js CHANGED Viewed

@@ -152,6 +152,11 @@ function costForUsage(cost, inputTokens, outputTokens, cacheReadTokens) {
   const cachedRate = cost.cacheRead ?? cost.input;
   return fullInput / 1e6 * cost.input + cached / 1e6 * cachedRate + outputTokens / 1e6 * cost.output;
 }
+function cacheSavingForUsage(cost, inputTokens, cacheReadTokens) {
+  if (cost.cacheRead === void 0) return 0;
+  const cached = Math.min(Math.max(cacheReadTokens, 0), inputTokens);
+  return cached / 1e6 * (cost.input - cost.cacheRead);
+}
 function requestIdFrom(options) {
   const raw = options.providerOptions?.lcr?.requestId;
   return typeof raw === "string" && raw.length > 0 ? raw : void 0;
@@ -256,19 +261,22 @@ var LcrFallbackModel = class {
     });
   }
   /**
-   * Baseline = what this same usage would have cost on the most expensive
-   * *priced* provider in the chain (typically the OpenRouter fallback leg). The
-   * winner's savings is `baselineUsd - costUsd`. Undefined when no provider in
-   * the chain carries a price (nothing to compare against).
+   * Baseline = what this same usage would have cost on the always-on fallback:
+   * the LAST priced leg of the chain (by convention the list-price provider you'd
+   * use without routing — e.g. OpenRouter, always last). The winner's saving is
+   * `baselineUsd - costUsd`. We take the last priced leg, NOT the most expensive
+   * one: prompt caching can make a sticker-cheaper provider (no `cacheRead`) cost
+   * MORE on a cache-heavy call, and a max-of-chain baseline would then fabricate a
+   * "saving" even on calls the fallback itself served. Undefined when no provider
+   * in the chain carries a price (nothing to compare against).
    */
   baselineUsd(inputTokens, outputTokens, cacheReadTokens) {
-    let max;
+    let baseline;
     for (const p of this.opts.providers) {
       if (!p.cost) continue;
-      const c = costForUsage(p.cost, inputTokens, outputTokens, cacheReadTokens);
-      if (max === void 0 || c > max) max = c;
+      baseline = costForUsage(p.cost, inputTokens, outputTokens, cacheReadTokens);
     }
-    return max;
+    return baseline;
   }
   /** Winner settled: record the attempt, fire `onCost` (compat) + `onCall`. */
   finalizeOk(ctx, provider, attemptStart, usage, ttftMs) {
@@ -277,6 +285,7 @@ var LcrFallbackModel = class {
     const outputTokens = usage?.outputTokens?.total ?? 0;
     const cacheReadTokens = usage?.inputTokens?.cacheRead ?? 0;
     const costUsd = provider.cost ? costForUsage(provider.cost, inputTokens, outputTokens, cacheReadTokens) : 0;
+    const cachedSavingUsd = provider.cost ? cacheSavingForUsage(provider.cost, inputTokens, cacheReadTokens) : 0;
     const usageMissing = inputTokens === 0 && outputTokens === 0;
     this.emitCost({
       model: this.opts.modelName,
@@ -299,6 +308,7 @@ var LcrFallbackModel = class {
       ...cacheReadTokens > 0 ? { cachedInputTokens: cacheReadTokens } : {},
       costUsd,
       baselineUsd: this.baselineUsd(inputTokens, outputTokens, cacheReadTokens),
+      ...cachedSavingUsd > 0 ? { cachedSavingUsd } : {},
       ...ctx.requestId ? { requestId: ctx.requestId } : {},
       ...usageMissing ? { usageMissing: true } : {}
     });

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "ai-lcr",
-  "version": "0.5.0",
+  "version": "0.5.1",
   "description": "Least Cost Routing for LLMs — route every model call to the cheapest available provider, fall back automatically, and track real cost. Built for the Vercel AI SDK.",
   "keywords": [
     "ai",