ai-lcr 0.6.5 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -4,6 +4,44 @@ All notable changes to `ai-lcr` are documented here. The format follows
4
4
  [Keep a Changelog](https://keepachangelog.com/), and the project adheres to
5
5
  [Semantic Versioning](https://semver.org/).
6
6
 
7
+ ## [0.7.0] — 2026-06-20
8
+
9
+ The text router now records the **provider-reported actual cost** when a provider
10
+ returns one, instead of always estimating from the price table. The table becomes
11
+ the routing input and the drift baseline (`estCostUsd`); the recorded `costUsd` is
12
+ the real bill wherever the provider gives it.
13
+
14
+ ### Why
15
+
16
+ A static price table can only encode one price per model, but an aggregator
17
+ (OpenRouter) routes a single model across many sub-providers whose prices differ
18
+ several-fold, picking one per call — so `tokens × table` is structurally unable to
19
+ match the bill for multi-provider models (measured: `deepseek-v4-pro` reconciled at
20
+ ~57% of the real cost, while single-provider models like Gemini/Claude/GPT matched
21
+ at 100%). The provider's own number already accounts for which sub-provider served,
22
+ every token kind (cache read/write, reasoning), and fees — none of which a flat
23
+ table can track.
24
+
25
+ ### Added
26
+
27
+ - **`costUsd` prefers the provider-reported actual cost** (text path). Read from
28
+ OpenRouter's `providerMetadata.openrouter.usage` —
29
+ `costDetails.upstreamInferenceCost` (the real upstream / BYOK model spend) when
30
+ present, otherwise `cost` (the credit charge) — and from an OpenAI-compatible
31
+ provider's `estimated_cost` on the raw usage body. Requires the caller to enable
32
+ usage accounting on the provider (e.g. OpenRouter `usage: { include: true }`);
33
+ without it, behavior is unchanged.
34
+ - **`estCostUsd` is now set on text records** (previously media-only) — the
35
+ price-table prediction for the same usage. `costUsd − estCostUsd` is the
36
+ price-table drift signal, so a dashboard's drift panel now works for text too.
37
+
38
+ ### Changed
39
+
40
+ - When no provider cost is reported, `costUsd` still equals the price-table
41
+ estimate (and `estCostUsd` equals it, so no drift is flagged) — a pure fallback,
42
+ fully backward-compatible. The streaming path reads the reported cost from the
43
+ `finish` chunk's `providerMetadata`.
44
+
7
45
  ## [0.6.5] — 2026-06-16
8
46
 
9
47
  Bundled price table now covers the open-weights labs, not just the Western
package/dist/index.cjs CHANGED
@@ -341,6 +341,20 @@ function cacheSavingForUsage(cost, inputTokens, cacheReadTokens) {
341
341
  const cached = Math.min(Math.max(cacheReadTokens, 0), inputTokens);
342
342
  return cached / 1e6 * (cost.input - cost.cacheRead);
343
343
  }
344
+ function reportedCost(providerMetadata, usage) {
345
+ const orUsage = providerMetadata?.openrouter?.usage;
346
+ if (orUsage) {
347
+ const upstream = orUsage.costDetails?.upstreamInferenceCost;
348
+ if (typeof upstream === "number" && upstream > 0) return upstream;
349
+ if (typeof orUsage.cost === "number") return orUsage.cost;
350
+ }
351
+ const raw = usage?.raw;
352
+ if (raw) {
353
+ const est = raw["estimated_cost"] ?? raw["cost"];
354
+ if (typeof est === "number") return est;
355
+ }
356
+ return void 0;
357
+ }
344
358
  function requestIdFrom(options) {
345
359
  const raw = options.providerOptions?.lcr?.requestId;
346
360
  return typeof raw === "string" && raw.length > 0 ? raw : void 0;
@@ -539,12 +553,13 @@ var LcrFallbackModel = class {
539
553
  return baseline;
540
554
  }
541
555
  /** Winner settled: record the attempt, fire `onCost` (compat) + `onCall`. */
542
- finalizeOk(ctx, provider, attemptStart, usage, ttftMs) {
556
+ finalizeOk(ctx, provider, attemptStart, usage, ttftMs, providerMetadata) {
543
557
  ctx.attempts.push({ provider: provider.label, ok: true, latencyMs: Date.now() - attemptStart });
544
558
  const inputTokens = usage?.inputTokens?.total ?? 0;
545
559
  const outputTokens = usage?.outputTokens?.total ?? 0;
546
560
  const cacheReadTokens = usage?.inputTokens?.cacheRead ?? 0;
547
- const costUsd = provider.cost ? costForUsage(provider.cost, inputTokens, outputTokens, cacheReadTokens) : 0;
561
+ const estCostUsd = provider.cost ? costForUsage(provider.cost, inputTokens, outputTokens, cacheReadTokens) : void 0;
562
+ const costUsd = reportedCost(providerMetadata, usage) ?? estCostUsd ?? 0;
548
563
  const cachedSavingUsd = provider.cost ? cacheSavingForUsage(provider.cost, inputTokens, cacheReadTokens) : 0;
549
564
  const usageMissing = inputTokens === 0 && outputTokens === 0;
550
565
  const emptyCompletion = inputTokens > 0 && outputTokens === 0;
@@ -579,6 +594,7 @@ var LcrFallbackModel = class {
579
594
  outputTokens,
580
595
  ...cacheReadTokens > 0 ? { cachedInputTokens: cacheReadTokens } : {},
581
596
  costUsd,
597
+ ...estCostUsd !== void 0 ? { estCostUsd } : {},
582
598
  ...baselineUsd !== void 0 ? { baselineUsd, baselineKind: "last-leg" } : {},
583
599
  ...cachedSavingUsd > 0 ? { cachedSavingUsd } : {},
584
600
  ...ctx.requestId ? { requestId: ctx.requestId } : {},
@@ -635,7 +651,7 @@ var LcrFallbackModel = class {
635
651
  }
636
652
  this.recordProviderSuccess(idx);
637
653
  this.settleSticky(idx);
638
- this.finalizeOk(ctx, provider, attemptStart, result.usage);
654
+ this.finalizeOk(ctx, provider, attemptStart, result.usage, void 0, result.providerMetadata);
639
655
  if (cache && cacheKey !== void 0 && ctx.settled?.cacheable) {
640
656
  this.storeCache(cacheKey, { kind: "generate", result, meta: ctx.settled.meta });
641
657
  }
@@ -767,6 +783,7 @@ var LcrFallbackModel = class {
767
783
  const servingIdx = idx;
768
784
  const servingPos = p;
769
785
  let usage;
786
+ let finishProviderMetadata;
770
787
  let contentStreamed = false;
771
788
  let ttftMs;
772
789
  const stream = new ReadableStream({
@@ -783,6 +800,7 @@ var LcrFallbackModel = class {
783
800
  if (done) break;
784
801
  if (value.type === "finish") {
785
802
  usage = value.usage;
803
+ finishProviderMetadata = value.providerMetadata;
786
804
  const out = value.usage?.outputTokens?.total ?? 0;
787
805
  const inp = value.usage?.inputTokens?.total ?? 0;
788
806
  if (inp > 0 && out === 0 && !contentStreamed && servingPos + 1 < n) {
@@ -797,7 +815,7 @@ var LcrFallbackModel = class {
797
815
  }
798
816
  self.recordProviderSuccess(servingIdx);
799
817
  self.settleSticky(servingIdx);
800
- self.finalizeOk(ctx, servingProvider, servingAttemptStart, usage, ttftMs);
818
+ self.finalizeOk(ctx, servingProvider, servingAttemptStart, usage, ttftMs, finishProviderMetadata);
801
819
  controller.close();
802
820
  } catch (error) {
803
821
  self.emitError(error, servingProvider.label);
package/dist/index.js CHANGED
@@ -287,6 +287,20 @@ function cacheSavingForUsage(cost, inputTokens, cacheReadTokens) {
287
287
  const cached = Math.min(Math.max(cacheReadTokens, 0), inputTokens);
288
288
  return cached / 1e6 * (cost.input - cost.cacheRead);
289
289
  }
290
+ function reportedCost(providerMetadata, usage) {
291
+ const orUsage = providerMetadata?.openrouter?.usage;
292
+ if (orUsage) {
293
+ const upstream = orUsage.costDetails?.upstreamInferenceCost;
294
+ if (typeof upstream === "number" && upstream > 0) return upstream;
295
+ if (typeof orUsage.cost === "number") return orUsage.cost;
296
+ }
297
+ const raw = usage?.raw;
298
+ if (raw) {
299
+ const est = raw["estimated_cost"] ?? raw["cost"];
300
+ if (typeof est === "number") return est;
301
+ }
302
+ return void 0;
303
+ }
290
304
  function requestIdFrom(options) {
291
305
  const raw = options.providerOptions?.lcr?.requestId;
292
306
  return typeof raw === "string" && raw.length > 0 ? raw : void 0;
@@ -485,12 +499,13 @@ var LcrFallbackModel = class {
485
499
  return baseline;
486
500
  }
487
501
  /** Winner settled: record the attempt, fire `onCost` (compat) + `onCall`. */
488
- finalizeOk(ctx, provider, attemptStart, usage, ttftMs) {
502
+ finalizeOk(ctx, provider, attemptStart, usage, ttftMs, providerMetadata) {
489
503
  ctx.attempts.push({ provider: provider.label, ok: true, latencyMs: Date.now() - attemptStart });
490
504
  const inputTokens = usage?.inputTokens?.total ?? 0;
491
505
  const outputTokens = usage?.outputTokens?.total ?? 0;
492
506
  const cacheReadTokens = usage?.inputTokens?.cacheRead ?? 0;
493
- const costUsd = provider.cost ? costForUsage(provider.cost, inputTokens, outputTokens, cacheReadTokens) : 0;
507
+ const estCostUsd = provider.cost ? costForUsage(provider.cost, inputTokens, outputTokens, cacheReadTokens) : void 0;
508
+ const costUsd = reportedCost(providerMetadata, usage) ?? estCostUsd ?? 0;
494
509
  const cachedSavingUsd = provider.cost ? cacheSavingForUsage(provider.cost, inputTokens, cacheReadTokens) : 0;
495
510
  const usageMissing = inputTokens === 0 && outputTokens === 0;
496
511
  const emptyCompletion = inputTokens > 0 && outputTokens === 0;
@@ -525,6 +540,7 @@ var LcrFallbackModel = class {
525
540
  outputTokens,
526
541
  ...cacheReadTokens > 0 ? { cachedInputTokens: cacheReadTokens } : {},
527
542
  costUsd,
543
+ ...estCostUsd !== void 0 ? { estCostUsd } : {},
528
544
  ...baselineUsd !== void 0 ? { baselineUsd, baselineKind: "last-leg" } : {},
529
545
  ...cachedSavingUsd > 0 ? { cachedSavingUsd } : {},
530
546
  ...ctx.requestId ? { requestId: ctx.requestId } : {},
@@ -581,7 +597,7 @@ var LcrFallbackModel = class {
581
597
  }
582
598
  this.recordProviderSuccess(idx);
583
599
  this.settleSticky(idx);
584
- this.finalizeOk(ctx, provider, attemptStart, result.usage);
600
+ this.finalizeOk(ctx, provider, attemptStart, result.usage, void 0, result.providerMetadata);
585
601
  if (cache && cacheKey !== void 0 && ctx.settled?.cacheable) {
586
602
  this.storeCache(cacheKey, { kind: "generate", result, meta: ctx.settled.meta });
587
603
  }
@@ -713,6 +729,7 @@ var LcrFallbackModel = class {
713
729
  const servingIdx = idx;
714
730
  const servingPos = p;
715
731
  let usage;
732
+ let finishProviderMetadata;
716
733
  let contentStreamed = false;
717
734
  let ttftMs;
718
735
  const stream = new ReadableStream({
@@ -729,6 +746,7 @@ var LcrFallbackModel = class {
729
746
  if (done) break;
730
747
  if (value.type === "finish") {
731
748
  usage = value.usage;
749
+ finishProviderMetadata = value.providerMetadata;
732
750
  const out = value.usage?.outputTokens?.total ?? 0;
733
751
  const inp = value.usage?.inputTokens?.total ?? 0;
734
752
  if (inp > 0 && out === 0 && !contentStreamed && servingPos + 1 < n) {
@@ -743,7 +761,7 @@ var LcrFallbackModel = class {
743
761
  }
744
762
  self.recordProviderSuccess(servingIdx);
745
763
  self.settleSticky(servingIdx);
746
- self.finalizeOk(ctx, servingProvider, servingAttemptStart, usage, ttftMs);
764
+ self.finalizeOk(ctx, servingProvider, servingAttemptStart, usage, ttftMs, finishProviderMetadata);
747
765
  controller.close();
748
766
  } catch (error) {
749
767
  self.emitError(error, servingProvider.label);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ai-lcr",
3
- "version": "0.6.5",
3
+ "version": "0.7.0",
4
4
  "description": "Least Cost Routing for LLMs — route every model call to the cheapest available provider, fall back automatically, and track real cost. Built for the Vercel AI SDK.",
5
5
  "keywords": [
6
6
  "ai",