ai-lcr 0.5.2 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +21 -0
- package/dist/index.cjs +11 -2
- package/dist/index.d.cts +15 -0
- package/dist/index.d.ts +15 -0
- package/dist/index.js +11 -2
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,27 @@ All notable changes to `ai-lcr` are documented here. The format follows
|
|
|
4
4
|
[Keep a Changelog](https://keepachangelog.com/), and the project adheres to
|
|
5
5
|
[Semantic Versioning](https://semver.org/).
|
|
6
6
|
|
|
7
|
+
## [0.5.3] — 2026-06-03
|
|
8
|
+
|
|
9
|
+
All additions are optional and backward compatible.
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
|
|
13
|
+
- **`defaultCacheReadRatio` — chain-wide fallback price for prompt-cache reads.**
|
|
14
|
+
ai-lcr already detects cache hits from the provider's reported usage and emits
|
|
15
|
+
`cachedInputTokens` for any provider that reports them (Anthropic, Gemini's
|
|
16
|
+
implicit cache, DeepSeek, …). But the *saving* (`cachedSavingUsd`) and the
|
|
17
|
+
cache-discounted `costUsd` were only computed when a leg set an explicit
|
|
18
|
+
`cost.cacheRead` — so a route that forgot it (e.g. a Gemini OpenRouter leg)
|
|
19
|
+
silently reported `$0` saved and billed cached tokens at the full input rate.
|
|
20
|
+
|
|
21
|
+
`createLCR({ defaultCacheReadRatio: 0.1 })` now supplies a fallback cache-read
|
|
22
|
+
price as a fraction of each leg's `input`, applied **only** to legs that omit
|
|
23
|
+
an explicit `cacheRead`. Most providers' cache-read price is ~0.1× input, so
|
|
24
|
+
`0.1` makes cache cost + savings "just work" across every model without each
|
|
25
|
+
route hardcoding a rate. Legs with their own `cacheRead` are untouched (set it
|
|
26
|
+
for outliers like OpenAI's ~0.5×). Unset = previous behavior. Must be in [0, 1].
|
|
27
|
+
|
|
7
28
|
## [0.5.0] — 2026-06-02
|
|
8
29
|
|
|
9
30
|
All additions are optional and backward compatible.
|
package/dist/index.cjs
CHANGED
|
@@ -1224,11 +1224,20 @@ function normalize(entry) {
|
|
|
1224
1224
|
function priceKey(p) {
|
|
1225
1225
|
return p.cost ? p.cost.input + p.cost.output : Number.POSITIVE_INFINITY;
|
|
1226
1226
|
}
|
|
1227
|
+
function withDefaultCacheRead(p, ratio) {
|
|
1228
|
+
if (ratio === void 0 || !p.cost || p.cost.cacheRead !== void 0) return p;
|
|
1229
|
+
return { ...p, cost: { ...p.cost, cacheRead: p.cost.input * ratio } };
|
|
1230
|
+
}
|
|
1227
1231
|
function createLCR(config) {
|
|
1228
|
-
const { models, autoSort = false, resetIntervalMs, onError, onCost, onCall } = config;
|
|
1232
|
+
const { models, autoSort = false, resetIntervalMs, onError, onCost, onCall, defaultCacheReadRatio } = config;
|
|
1233
|
+
if (defaultCacheReadRatio !== void 0 && (defaultCacheReadRatio < 0 || defaultCacheReadRatio > 1)) {
|
|
1234
|
+
throw new Error(
|
|
1235
|
+
`ai-lcr: defaultCacheReadRatio must be in [0, 1], got ${defaultCacheReadRatio}`
|
|
1236
|
+
);
|
|
1237
|
+
}
|
|
1229
1238
|
const routed = /* @__PURE__ */ new Map();
|
|
1230
1239
|
for (const [name, entries] of Object.entries(models)) {
|
|
1231
|
-
let providers = entries.map(normalize);
|
|
1240
|
+
let providers = entries.map(normalize).map((p) => withDefaultCacheRead(p, defaultCacheReadRatio));
|
|
1232
1241
|
if (autoSort) {
|
|
1233
1242
|
providers = [...providers].sort((a, b) => priceKey(a) - priceKey(b));
|
|
1234
1243
|
}
|
package/dist/index.d.cts
CHANGED
|
@@ -589,6 +589,21 @@ interface LCRConfig {
|
|
|
589
589
|
* you. Pair with `formatCallRecord` for a one-line log. See {@link CallRecord}.
|
|
590
590
|
*/
|
|
591
591
|
onCall?: (record: CallRecord) => void;
|
|
592
|
+
/**
|
|
593
|
+
* Fallback prompt-cache read rate, as a fraction of each leg's `input` price,
|
|
594
|
+
* applied ONLY to legs whose `cost` omits an explicit `cacheRead`. So a leg
|
|
595
|
+
* priced `{ input: 0.5, output: 3 }` with `defaultCacheReadRatio: 0.1` bills
|
|
596
|
+
* its cached input tokens at `0.05`/1M and reports the resulting
|
|
597
|
+
* `cachedSavingUsd` — without every route having to hardcode `cacheRead`.
|
|
598
|
+
*
|
|
599
|
+
* Most providers' cache-read price is ~0.1× input (Anthropic, Gemini, DeepSeek);
|
|
600
|
+
* `0.1` is a sane default. Legs with their own `cacheRead` are untouched, so set
|
|
601
|
+
* it explicitly for outliers (e.g. OpenAI's ~0.5×). Unset = pre-existing
|
|
602
|
+
* behavior: cached tokens bill at the full input rate and save nothing.
|
|
603
|
+
* Caching is detected from the provider's reported usage either way; this only
|
|
604
|
+
* controls the *price* applied to it. Must be in [0, 1].
|
|
605
|
+
*/
|
|
606
|
+
defaultCacheReadRatio?: number;
|
|
592
607
|
}
|
|
593
608
|
/** Resolve a logical model name to a routed model. */
|
|
594
609
|
type LCRRouter = (modelName: string) => LanguageModelV3;
|
package/dist/index.d.ts
CHANGED
|
@@ -589,6 +589,21 @@ interface LCRConfig {
|
|
|
589
589
|
* you. Pair with `formatCallRecord` for a one-line log. See {@link CallRecord}.
|
|
590
590
|
*/
|
|
591
591
|
onCall?: (record: CallRecord) => void;
|
|
592
|
+
/**
|
|
593
|
+
* Fallback prompt-cache read rate, as a fraction of each leg's `input` price,
|
|
594
|
+
* applied ONLY to legs whose `cost` omits an explicit `cacheRead`. So a leg
|
|
595
|
+
* priced `{ input: 0.5, output: 3 }` with `defaultCacheReadRatio: 0.1` bills
|
|
596
|
+
* its cached input tokens at `0.05`/1M and reports the resulting
|
|
597
|
+
* `cachedSavingUsd` — without every route having to hardcode `cacheRead`.
|
|
598
|
+
*
|
|
599
|
+
* Most providers' cache-read price is ~0.1× input (Anthropic, Gemini, DeepSeek);
|
|
600
|
+
* `0.1` is a sane default. Legs with their own `cacheRead` are untouched, so set
|
|
601
|
+
* it explicitly for outliers (e.g. OpenAI's ~0.5×). Unset = pre-existing
|
|
602
|
+
* behavior: cached tokens bill at the full input rate and save nothing.
|
|
603
|
+
* Caching is detected from the provider's reported usage either way; this only
|
|
604
|
+
* controls the *price* applied to it. Must be in [0, 1].
|
|
605
|
+
*/
|
|
606
|
+
defaultCacheReadRatio?: number;
|
|
592
607
|
}
|
|
593
608
|
/** Resolve a logical model name to a routed model. */
|
|
594
609
|
type LCRRouter = (modelName: string) => LanguageModelV3;
|
package/dist/index.js
CHANGED
|
@@ -1182,11 +1182,20 @@ function normalize(entry) {
|
|
|
1182
1182
|
function priceKey(p) {
|
|
1183
1183
|
return p.cost ? p.cost.input + p.cost.output : Number.POSITIVE_INFINITY;
|
|
1184
1184
|
}
|
|
1185
|
+
function withDefaultCacheRead(p, ratio) {
|
|
1186
|
+
if (ratio === void 0 || !p.cost || p.cost.cacheRead !== void 0) return p;
|
|
1187
|
+
return { ...p, cost: { ...p.cost, cacheRead: p.cost.input * ratio } };
|
|
1188
|
+
}
|
|
1185
1189
|
function createLCR(config) {
|
|
1186
|
-
const { models, autoSort = false, resetIntervalMs, onError, onCost, onCall } = config;
|
|
1190
|
+
const { models, autoSort = false, resetIntervalMs, onError, onCost, onCall, defaultCacheReadRatio } = config;
|
|
1191
|
+
if (defaultCacheReadRatio !== void 0 && (defaultCacheReadRatio < 0 || defaultCacheReadRatio > 1)) {
|
|
1192
|
+
throw new Error(
|
|
1193
|
+
`ai-lcr: defaultCacheReadRatio must be in [0, 1], got ${defaultCacheReadRatio}`
|
|
1194
|
+
);
|
|
1195
|
+
}
|
|
1187
1196
|
const routed = /* @__PURE__ */ new Map();
|
|
1188
1197
|
for (const [name, entries] of Object.entries(models)) {
|
|
1189
|
-
let providers = entries.map(normalize);
|
|
1198
|
+
let providers = entries.map(normalize).map((p) => withDefaultCacheRead(p, defaultCacheReadRatio));
|
|
1190
1199
|
if (autoSort) {
|
|
1191
1200
|
providers = [...providers].sort((a, b) => priceKey(a) - priceKey(b));
|
|
1192
1201
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ai-lcr",
|
|
3
|
-
"version": "0.5.
|
|
3
|
+
"version": "0.5.3",
|
|
4
4
|
"description": "Least Cost Routing for LLMs — route every model call to the cheapest available provider, fall back automatically, and track real cost. Built for the Vercel AI SDK.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai",
|