npm - ai-lcr - Versions diffs - 0.6.5 → 0.7.0 - Mend

ai-lcr 0.6.5 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/CHANGELOG.md CHANGED Viewed

@@ -4,6 +4,44 @@ All notable changes to `ai-lcr` are documented here. The format follows
 [Keep a Changelog](https://keepachangelog.com/), and the project adheres to
 [Semantic Versioning](https://semver.org/).
+## [0.7.0] — 2026-06-20
+The text router now records the **provider-reported actual cost** when a provider
+returns one, instead of always estimating from the price table. The table becomes
+the routing input and the drift baseline (`estCostUsd`); the recorded `costUsd` is
+the real bill wherever the provider gives it.
+### Why
+A static price table can only encode one price per model, but an aggregator
+(OpenRouter) routes a single model across many sub-providers whose prices differ
+several-fold, picking one per call — so `tokens × table` is structurally unable to
+match the bill for multi-provider models (measured: `deepseek-v4-pro` reconciled at
+~57% of the real cost, while single-provider models like Gemini/Claude/GPT matched
+at 100%). The provider's own number already accounts for which sub-provider served,
+every token kind (cache read/write, reasoning), and fees — none of which a flat
+table can track.
+### Added
+- **`costUsd` prefers the provider-reported actual cost** (text path). Read from
+  OpenRouter's `providerMetadata.openrouter.usage` —
+  `costDetails.upstreamInferenceCost` (the real upstream / BYOK model spend) when
+  present, otherwise `cost` (the credit charge) — and from an OpenAI-compatible
+  provider's `estimated_cost` on the raw usage body. Requires the caller to enable
+  usage accounting on the provider (e.g. OpenRouter `usage: { include: true }`);
+  without it, behavior is unchanged.
+- **`estCostUsd` is now set on text records** (previously media-only) — the
+  price-table prediction for the same usage. `costUsd − estCostUsd` is the
+  price-table drift signal, so a dashboard's drift panel now works for text too.
+### Changed
+- When no provider cost is reported, `costUsd` still equals the price-table
+  estimate (and `estCostUsd` equals it, so no drift is flagged) — a pure fallback,
+  fully backward-compatible. The streaming path reads the reported cost from the
+  `finish` chunk's `providerMetadata`.
 ## [0.6.5] — 2026-06-16
 Bundled price table now covers the open-weights labs, not just the Western

package/dist/index.cjs CHANGED Viewed

@@ -341,6 +341,20 @@ function cacheSavingForUsage(cost, inputTokens, cacheReadTokens) {
   const cached = Math.min(Math.max(cacheReadTokens, 0), inputTokens);
   return cached / 1e6 * (cost.input - cost.cacheRead);
 }
+function reportedCost(providerMetadata, usage) {
+  const orUsage = providerMetadata?.openrouter?.usage;
+  if (orUsage) {
+    const upstream = orUsage.costDetails?.upstreamInferenceCost;
+    if (typeof upstream === "number" && upstream > 0) return upstream;
+    if (typeof orUsage.cost === "number") return orUsage.cost;
+  }
+  const raw = usage?.raw;
+  if (raw) {
+    const est = raw["estimated_cost"] ?? raw["cost"];
+    if (typeof est === "number") return est;
+  }
+  return void 0;
+}
 function requestIdFrom(options) {
   const raw = options.providerOptions?.lcr?.requestId;
   return typeof raw === "string" && raw.length > 0 ? raw : void 0;
@@ -539,12 +553,13 @@ var LcrFallbackModel = class {
     return baseline;
   }
   /** Winner settled: record the attempt, fire `onCost` (compat) + `onCall`. */
-  finalizeOk(ctx, provider, attemptStart, usage, ttftMs) {
+  finalizeOk(ctx, provider, attemptStart, usage, ttftMs, providerMetadata) {
     ctx.attempts.push({ provider: provider.label, ok: true, latencyMs: Date.now() - attemptStart });
     const inputTokens = usage?.inputTokens?.total ?? 0;
     const outputTokens = usage?.outputTokens?.total ?? 0;
     const cacheReadTokens = usage?.inputTokens?.cacheRead ?? 0;
-    const costUsd = provider.cost ? costForUsage(provider.cost, inputTokens, outputTokens, cacheReadTokens) : 0;
+    const estCostUsd = provider.cost ? costForUsage(provider.cost, inputTokens, outputTokens, cacheReadTokens) : void 0;
+    const costUsd = reportedCost(providerMetadata, usage) ?? estCostUsd ?? 0;
     const cachedSavingUsd = provider.cost ? cacheSavingForUsage(provider.cost, inputTokens, cacheReadTokens) : 0;
     const usageMissing = inputTokens === 0 && outputTokens === 0;
     const emptyCompletion = inputTokens > 0 && outputTokens === 0;
@@ -579,6 +594,7 @@ var LcrFallbackModel = class {
       outputTokens,
       ...cacheReadTokens > 0 ? { cachedInputTokens: cacheReadTokens } : {},
       costUsd,
+      ...estCostUsd !== void 0 ? { estCostUsd } : {},
       ...baselineUsd !== void 0 ? { baselineUsd, baselineKind: "last-leg" } : {},
       ...cachedSavingUsd > 0 ? { cachedSavingUsd } : {},
       ...ctx.requestId ? { requestId: ctx.requestId } : {},
@@ -635,7 +651,7 @@ var LcrFallbackModel = class {
         }
         this.recordProviderSuccess(idx);
         this.settleSticky(idx);
-        this.finalizeOk(ctx, provider, attemptStart, result.usage);
+        this.finalizeOk(ctx, provider, attemptStart, result.usage, void 0, result.providerMetadata);
         if (cache && cacheKey !== void 0 && ctx.settled?.cacheable) {
           this.storeCache(cacheKey, { kind: "generate", result, meta: ctx.settled.meta });
         }
@@ -767,6 +783,7 @@ var LcrFallbackModel = class {
     const servingIdx = idx;
     const servingPos = p;
     let usage;
+    let finishProviderMetadata;
     let contentStreamed = false;
     let ttftMs;
     const stream = new ReadableStream({
@@ -783,6 +800,7 @@ var LcrFallbackModel = class {
             if (done) break;
             if (value.type === "finish") {
               usage = value.usage;
+              finishProviderMetadata = value.providerMetadata;
               const out = value.usage?.outputTokens?.total ?? 0;
               const inp = value.usage?.inputTokens?.total ?? 0;
               if (inp > 0 && out === 0 && !contentStreamed && servingPos + 1 < n) {
@@ -797,7 +815,7 @@ var LcrFallbackModel = class {
           }
           self.recordProviderSuccess(servingIdx);
           self.settleSticky(servingIdx);
-          self.finalizeOk(ctx, servingProvider, servingAttemptStart, usage, ttftMs);
+          self.finalizeOk(ctx, servingProvider, servingAttemptStart, usage, ttftMs, finishProviderMetadata);
           controller.close();
         } catch (error) {
           self.emitError(error, servingProvider.label);

package/dist/index.js CHANGED Viewed

@@ -287,6 +287,20 @@ function cacheSavingForUsage(cost, inputTokens, cacheReadTokens) {
   const cached = Math.min(Math.max(cacheReadTokens, 0), inputTokens);
   return cached / 1e6 * (cost.input - cost.cacheRead);
 }
+function reportedCost(providerMetadata, usage) {
+  const orUsage = providerMetadata?.openrouter?.usage;
+  if (orUsage) {
+    const upstream = orUsage.costDetails?.upstreamInferenceCost;
+    if (typeof upstream === "number" && upstream > 0) return upstream;
+    if (typeof orUsage.cost === "number") return orUsage.cost;
+  }
+  const raw = usage?.raw;
+  if (raw) {
+    const est = raw["estimated_cost"] ?? raw["cost"];
+    if (typeof est === "number") return est;
+  }
+  return void 0;
+}
 function requestIdFrom(options) {
   const raw = options.providerOptions?.lcr?.requestId;
   return typeof raw === "string" && raw.length > 0 ? raw : void 0;
@@ -485,12 +499,13 @@ var LcrFallbackModel = class {
     return baseline;
   }
   /** Winner settled: record the attempt, fire `onCost` (compat) + `onCall`. */
-  finalizeOk(ctx, provider, attemptStart, usage, ttftMs) {
+  finalizeOk(ctx, provider, attemptStart, usage, ttftMs, providerMetadata) {
     ctx.attempts.push({ provider: provider.label, ok: true, latencyMs: Date.now() - attemptStart });
     const inputTokens = usage?.inputTokens?.total ?? 0;
     const outputTokens = usage?.outputTokens?.total ?? 0;
     const cacheReadTokens = usage?.inputTokens?.cacheRead ?? 0;
-    const costUsd = provider.cost ? costForUsage(provider.cost, inputTokens, outputTokens, cacheReadTokens) : 0;
+    const estCostUsd = provider.cost ? costForUsage(provider.cost, inputTokens, outputTokens, cacheReadTokens) : void 0;
+    const costUsd = reportedCost(providerMetadata, usage) ?? estCostUsd ?? 0;
     const cachedSavingUsd = provider.cost ? cacheSavingForUsage(provider.cost, inputTokens, cacheReadTokens) : 0;
     const usageMissing = inputTokens === 0 && outputTokens === 0;
     const emptyCompletion = inputTokens > 0 && outputTokens === 0;
@@ -525,6 +540,7 @@ var LcrFallbackModel = class {
       outputTokens,
       ...cacheReadTokens > 0 ? { cachedInputTokens: cacheReadTokens } : {},
       costUsd,
+      ...estCostUsd !== void 0 ? { estCostUsd } : {},
       ...baselineUsd !== void 0 ? { baselineUsd, baselineKind: "last-leg" } : {},
       ...cachedSavingUsd > 0 ? { cachedSavingUsd } : {},
       ...ctx.requestId ? { requestId: ctx.requestId } : {},
@@ -581,7 +597,7 @@ var LcrFallbackModel = class {
         }
         this.recordProviderSuccess(idx);
         this.settleSticky(idx);
-        this.finalizeOk(ctx, provider, attemptStart, result.usage);
+        this.finalizeOk(ctx, provider, attemptStart, result.usage, void 0, result.providerMetadata);
         if (cache && cacheKey !== void 0 && ctx.settled?.cacheable) {
           this.storeCache(cacheKey, { kind: "generate", result, meta: ctx.settled.meta });
         }
@@ -713,6 +729,7 @@ var LcrFallbackModel = class {
     const servingIdx = idx;
     const servingPos = p;
     let usage;
+    let finishProviderMetadata;
     let contentStreamed = false;
     let ttftMs;
     const stream = new ReadableStream({
@@ -729,6 +746,7 @@ var LcrFallbackModel = class {
             if (done) break;
             if (value.type === "finish") {
               usage = value.usage;
+              finishProviderMetadata = value.providerMetadata;
               const out = value.usage?.outputTokens?.total ?? 0;
               const inp = value.usage?.inputTokens?.total ?? 0;
               if (inp > 0 && out === 0 && !contentStreamed && servingPos + 1 < n) {
@@ -743,7 +761,7 @@ var LcrFallbackModel = class {
           }
           self.recordProviderSuccess(servingIdx);
           self.settleSticky(servingIdx);
-          self.finalizeOk(ctx, servingProvider, servingAttemptStart, usage, ttftMs);
+          self.finalizeOk(ctx, servingProvider, servingAttemptStart, usage, ttftMs, finishProviderMetadata);
           controller.close();
         } catch (error) {
           self.emitError(error, servingProvider.label);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "ai-lcr",
-  "version": "0.6.5",
+  "version": "0.7.0",
   "description": "Least Cost Routing for LLMs — route every model call to the cheapest available provider, fall back automatically, and track real cost. Built for the Vercel AI SDK.",
   "keywords": [
     "ai",