ai-lcr 0.6.5 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +38 -0
- package/dist/index.cjs +22 -4
- package/dist/index.js +22 -4
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,44 @@ All notable changes to `ai-lcr` are documented here. The format follows
|
|
|
4
4
|
[Keep a Changelog](https://keepachangelog.com/), and the project adheres to
|
|
5
5
|
[Semantic Versioning](https://semver.org/).
|
|
6
6
|
|
|
7
|
+
## [0.7.0] — 2026-06-20
|
|
8
|
+
|
|
9
|
+
The text router now records the **provider-reported actual cost** when a provider
|
|
10
|
+
returns one, instead of always estimating from the price table. The table becomes
|
|
11
|
+
the routing input and the drift baseline (`estCostUsd`); the recorded `costUsd` is
|
|
12
|
+
the real bill wherever the provider gives it.
|
|
13
|
+
|
|
14
|
+
### Why
|
|
15
|
+
|
|
16
|
+
A static price table can only encode one price per model, but an aggregator
|
|
17
|
+
(OpenRouter) routes a single model across many sub-providers whose prices differ
|
|
18
|
+
several-fold, picking one per call — so `tokens × table` is structurally unable to
|
|
19
|
+
match the bill for multi-provider models (measured: `deepseek-v4-pro` reconciled at
|
|
20
|
+
~57% of the real cost, while single-provider models like Gemini/Claude/GPT matched
|
|
21
|
+
at 100%). The provider's own number already accounts for which sub-provider served,
|
|
22
|
+
every token kind (cache read/write, reasoning), and fees — none of which a flat
|
|
23
|
+
table can track.
|
|
24
|
+
|
|
25
|
+
### Added
|
|
26
|
+
|
|
27
|
+
- **`costUsd` prefers the provider-reported actual cost** (text path). Read from
|
|
28
|
+
OpenRouter's `providerMetadata.openrouter.usage` —
|
|
29
|
+
`costDetails.upstreamInferenceCost` (the real upstream / BYOK model spend) when
|
|
30
|
+
present, otherwise `cost` (the credit charge) — and from an OpenAI-compatible
|
|
31
|
+
provider's `estimated_cost` on the raw usage body. Requires the caller to enable
|
|
32
|
+
usage accounting on the provider (e.g. OpenRouter `usage: { include: true }`);
|
|
33
|
+
without it, behavior is unchanged.
|
|
34
|
+
- **`estCostUsd` is now set on text records** (previously media-only) — the
|
|
35
|
+
price-table prediction for the same usage. `costUsd − estCostUsd` is the
|
|
36
|
+
price-table drift signal, so a dashboard's drift panel now works for text too.
|
|
37
|
+
|
|
38
|
+
### Changed
|
|
39
|
+
|
|
40
|
+
- When no provider cost is reported, `costUsd` still equals the price-table
|
|
41
|
+
estimate (and `estCostUsd` equals it, so no drift is flagged) — a pure fallback,
|
|
42
|
+
fully backward-compatible. The streaming path reads the reported cost from the
|
|
43
|
+
`finish` chunk's `providerMetadata`.
|
|
44
|
+
|
|
7
45
|
## [0.6.5] — 2026-06-16
|
|
8
46
|
|
|
9
47
|
Bundled price table now covers the open-weights labs, not just the Western
|
package/dist/index.cjs
CHANGED
|
@@ -341,6 +341,20 @@ function cacheSavingForUsage(cost, inputTokens, cacheReadTokens) {
|
|
|
341
341
|
const cached = Math.min(Math.max(cacheReadTokens, 0), inputTokens);
|
|
342
342
|
return cached / 1e6 * (cost.input - cost.cacheRead);
|
|
343
343
|
}
|
|
344
|
+
function reportedCost(providerMetadata, usage) {
|
|
345
|
+
const orUsage = providerMetadata?.openrouter?.usage;
|
|
346
|
+
if (orUsage) {
|
|
347
|
+
const upstream = orUsage.costDetails?.upstreamInferenceCost;
|
|
348
|
+
if (typeof upstream === "number" && upstream > 0) return upstream;
|
|
349
|
+
if (typeof orUsage.cost === "number") return orUsage.cost;
|
|
350
|
+
}
|
|
351
|
+
const raw = usage?.raw;
|
|
352
|
+
if (raw) {
|
|
353
|
+
const est = raw["estimated_cost"] ?? raw["cost"];
|
|
354
|
+
if (typeof est === "number") return est;
|
|
355
|
+
}
|
|
356
|
+
return void 0;
|
|
357
|
+
}
|
|
344
358
|
function requestIdFrom(options) {
|
|
345
359
|
const raw = options.providerOptions?.lcr?.requestId;
|
|
346
360
|
return typeof raw === "string" && raw.length > 0 ? raw : void 0;
|
|
@@ -539,12 +553,13 @@ var LcrFallbackModel = class {
|
|
|
539
553
|
return baseline;
|
|
540
554
|
}
|
|
541
555
|
/** Winner settled: record the attempt, fire `onCost` (compat) + `onCall`. */
|
|
542
|
-
finalizeOk(ctx, provider, attemptStart, usage, ttftMs) {
|
|
556
|
+
finalizeOk(ctx, provider, attemptStart, usage, ttftMs, providerMetadata) {
|
|
543
557
|
ctx.attempts.push({ provider: provider.label, ok: true, latencyMs: Date.now() - attemptStart });
|
|
544
558
|
const inputTokens = usage?.inputTokens?.total ?? 0;
|
|
545
559
|
const outputTokens = usage?.outputTokens?.total ?? 0;
|
|
546
560
|
const cacheReadTokens = usage?.inputTokens?.cacheRead ?? 0;
|
|
547
|
-
const
|
|
561
|
+
const estCostUsd = provider.cost ? costForUsage(provider.cost, inputTokens, outputTokens, cacheReadTokens) : void 0;
|
|
562
|
+
const costUsd = reportedCost(providerMetadata, usage) ?? estCostUsd ?? 0;
|
|
548
563
|
const cachedSavingUsd = provider.cost ? cacheSavingForUsage(provider.cost, inputTokens, cacheReadTokens) : 0;
|
|
549
564
|
const usageMissing = inputTokens === 0 && outputTokens === 0;
|
|
550
565
|
const emptyCompletion = inputTokens > 0 && outputTokens === 0;
|
|
@@ -579,6 +594,7 @@ var LcrFallbackModel = class {
|
|
|
579
594
|
outputTokens,
|
|
580
595
|
...cacheReadTokens > 0 ? { cachedInputTokens: cacheReadTokens } : {},
|
|
581
596
|
costUsd,
|
|
597
|
+
...estCostUsd !== void 0 ? { estCostUsd } : {},
|
|
582
598
|
...baselineUsd !== void 0 ? { baselineUsd, baselineKind: "last-leg" } : {},
|
|
583
599
|
...cachedSavingUsd > 0 ? { cachedSavingUsd } : {},
|
|
584
600
|
...ctx.requestId ? { requestId: ctx.requestId } : {},
|
|
@@ -635,7 +651,7 @@ var LcrFallbackModel = class {
|
|
|
635
651
|
}
|
|
636
652
|
this.recordProviderSuccess(idx);
|
|
637
653
|
this.settleSticky(idx);
|
|
638
|
-
this.finalizeOk(ctx, provider, attemptStart, result.usage);
|
|
654
|
+
this.finalizeOk(ctx, provider, attemptStart, result.usage, void 0, result.providerMetadata);
|
|
639
655
|
if (cache && cacheKey !== void 0 && ctx.settled?.cacheable) {
|
|
640
656
|
this.storeCache(cacheKey, { kind: "generate", result, meta: ctx.settled.meta });
|
|
641
657
|
}
|
|
@@ -767,6 +783,7 @@ var LcrFallbackModel = class {
|
|
|
767
783
|
const servingIdx = idx;
|
|
768
784
|
const servingPos = p;
|
|
769
785
|
let usage;
|
|
786
|
+
let finishProviderMetadata;
|
|
770
787
|
let contentStreamed = false;
|
|
771
788
|
let ttftMs;
|
|
772
789
|
const stream = new ReadableStream({
|
|
@@ -783,6 +800,7 @@ var LcrFallbackModel = class {
|
|
|
783
800
|
if (done) break;
|
|
784
801
|
if (value.type === "finish") {
|
|
785
802
|
usage = value.usage;
|
|
803
|
+
finishProviderMetadata = value.providerMetadata;
|
|
786
804
|
const out = value.usage?.outputTokens?.total ?? 0;
|
|
787
805
|
const inp = value.usage?.inputTokens?.total ?? 0;
|
|
788
806
|
if (inp > 0 && out === 0 && !contentStreamed && servingPos + 1 < n) {
|
|
@@ -797,7 +815,7 @@ var LcrFallbackModel = class {
|
|
|
797
815
|
}
|
|
798
816
|
self.recordProviderSuccess(servingIdx);
|
|
799
817
|
self.settleSticky(servingIdx);
|
|
800
|
-
self.finalizeOk(ctx, servingProvider, servingAttemptStart, usage, ttftMs);
|
|
818
|
+
self.finalizeOk(ctx, servingProvider, servingAttemptStart, usage, ttftMs, finishProviderMetadata);
|
|
801
819
|
controller.close();
|
|
802
820
|
} catch (error) {
|
|
803
821
|
self.emitError(error, servingProvider.label);
|
package/dist/index.js
CHANGED
|
@@ -287,6 +287,20 @@ function cacheSavingForUsage(cost, inputTokens, cacheReadTokens) {
|
|
|
287
287
|
const cached = Math.min(Math.max(cacheReadTokens, 0), inputTokens);
|
|
288
288
|
return cached / 1e6 * (cost.input - cost.cacheRead);
|
|
289
289
|
}
|
|
290
|
+
function reportedCost(providerMetadata, usage) {
|
|
291
|
+
const orUsage = providerMetadata?.openrouter?.usage;
|
|
292
|
+
if (orUsage) {
|
|
293
|
+
const upstream = orUsage.costDetails?.upstreamInferenceCost;
|
|
294
|
+
if (typeof upstream === "number" && upstream > 0) return upstream;
|
|
295
|
+
if (typeof orUsage.cost === "number") return orUsage.cost;
|
|
296
|
+
}
|
|
297
|
+
const raw = usage?.raw;
|
|
298
|
+
if (raw) {
|
|
299
|
+
const est = raw["estimated_cost"] ?? raw["cost"];
|
|
300
|
+
if (typeof est === "number") return est;
|
|
301
|
+
}
|
|
302
|
+
return void 0;
|
|
303
|
+
}
|
|
290
304
|
function requestIdFrom(options) {
|
|
291
305
|
const raw = options.providerOptions?.lcr?.requestId;
|
|
292
306
|
return typeof raw === "string" && raw.length > 0 ? raw : void 0;
|
|
@@ -485,12 +499,13 @@ var LcrFallbackModel = class {
|
|
|
485
499
|
return baseline;
|
|
486
500
|
}
|
|
487
501
|
/** Winner settled: record the attempt, fire `onCost` (compat) + `onCall`. */
|
|
488
|
-
finalizeOk(ctx, provider, attemptStart, usage, ttftMs) {
|
|
502
|
+
finalizeOk(ctx, provider, attemptStart, usage, ttftMs, providerMetadata) {
|
|
489
503
|
ctx.attempts.push({ provider: provider.label, ok: true, latencyMs: Date.now() - attemptStart });
|
|
490
504
|
const inputTokens = usage?.inputTokens?.total ?? 0;
|
|
491
505
|
const outputTokens = usage?.outputTokens?.total ?? 0;
|
|
492
506
|
const cacheReadTokens = usage?.inputTokens?.cacheRead ?? 0;
|
|
493
|
-
const
|
|
507
|
+
const estCostUsd = provider.cost ? costForUsage(provider.cost, inputTokens, outputTokens, cacheReadTokens) : void 0;
|
|
508
|
+
const costUsd = reportedCost(providerMetadata, usage) ?? estCostUsd ?? 0;
|
|
494
509
|
const cachedSavingUsd = provider.cost ? cacheSavingForUsage(provider.cost, inputTokens, cacheReadTokens) : 0;
|
|
495
510
|
const usageMissing = inputTokens === 0 && outputTokens === 0;
|
|
496
511
|
const emptyCompletion = inputTokens > 0 && outputTokens === 0;
|
|
@@ -525,6 +540,7 @@ var LcrFallbackModel = class {
|
|
|
525
540
|
outputTokens,
|
|
526
541
|
...cacheReadTokens > 0 ? { cachedInputTokens: cacheReadTokens } : {},
|
|
527
542
|
costUsd,
|
|
543
|
+
...estCostUsd !== void 0 ? { estCostUsd } : {},
|
|
528
544
|
...baselineUsd !== void 0 ? { baselineUsd, baselineKind: "last-leg" } : {},
|
|
529
545
|
...cachedSavingUsd > 0 ? { cachedSavingUsd } : {},
|
|
530
546
|
...ctx.requestId ? { requestId: ctx.requestId } : {},
|
|
@@ -581,7 +597,7 @@ var LcrFallbackModel = class {
|
|
|
581
597
|
}
|
|
582
598
|
this.recordProviderSuccess(idx);
|
|
583
599
|
this.settleSticky(idx);
|
|
584
|
-
this.finalizeOk(ctx, provider, attemptStart, result.usage);
|
|
600
|
+
this.finalizeOk(ctx, provider, attemptStart, result.usage, void 0, result.providerMetadata);
|
|
585
601
|
if (cache && cacheKey !== void 0 && ctx.settled?.cacheable) {
|
|
586
602
|
this.storeCache(cacheKey, { kind: "generate", result, meta: ctx.settled.meta });
|
|
587
603
|
}
|
|
@@ -713,6 +729,7 @@ var LcrFallbackModel = class {
|
|
|
713
729
|
const servingIdx = idx;
|
|
714
730
|
const servingPos = p;
|
|
715
731
|
let usage;
|
|
732
|
+
let finishProviderMetadata;
|
|
716
733
|
let contentStreamed = false;
|
|
717
734
|
let ttftMs;
|
|
718
735
|
const stream = new ReadableStream({
|
|
@@ -729,6 +746,7 @@ var LcrFallbackModel = class {
|
|
|
729
746
|
if (done) break;
|
|
730
747
|
if (value.type === "finish") {
|
|
731
748
|
usage = value.usage;
|
|
749
|
+
finishProviderMetadata = value.providerMetadata;
|
|
732
750
|
const out = value.usage?.outputTokens?.total ?? 0;
|
|
733
751
|
const inp = value.usage?.inputTokens?.total ?? 0;
|
|
734
752
|
if (inp > 0 && out === 0 && !contentStreamed && servingPos + 1 < n) {
|
|
@@ -743,7 +761,7 @@ var LcrFallbackModel = class {
|
|
|
743
761
|
}
|
|
744
762
|
self.recordProviderSuccess(servingIdx);
|
|
745
763
|
self.settleSticky(servingIdx);
|
|
746
|
-
self.finalizeOk(ctx, servingProvider, servingAttemptStart, usage, ttftMs);
|
|
764
|
+
self.finalizeOk(ctx, servingProvider, servingAttemptStart, usage, ttftMs, finishProviderMetadata);
|
|
747
765
|
controller.close();
|
|
748
766
|
} catch (error) {
|
|
749
767
|
self.emitError(error, servingProvider.label);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ai-lcr",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.7.0",
|
|
4
4
|
"description": "Least Cost Routing for LLMs — route every model call to the cheapest available provider, fall back automatically, and track real cost. Built for the Vercel AI SDK.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai",
|