ai-lcr 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +18 -8
- package/dist/index.d.cts +17 -4
- package/dist/index.d.ts +17 -4
- package/dist/index.js +18 -8
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -194,6 +194,11 @@ function costForUsage(cost, inputTokens, outputTokens, cacheReadTokens) {
|
|
|
194
194
|
const cachedRate = cost.cacheRead ?? cost.input;
|
|
195
195
|
return fullInput / 1e6 * cost.input + cached / 1e6 * cachedRate + outputTokens / 1e6 * cost.output;
|
|
196
196
|
}
|
|
197
|
+
function cacheSavingForUsage(cost, inputTokens, cacheReadTokens) {
|
|
198
|
+
if (cost.cacheRead === void 0) return 0;
|
|
199
|
+
const cached = Math.min(Math.max(cacheReadTokens, 0), inputTokens);
|
|
200
|
+
return cached / 1e6 * (cost.input - cost.cacheRead);
|
|
201
|
+
}
|
|
197
202
|
function requestIdFrom(options) {
|
|
198
203
|
const raw = options.providerOptions?.lcr?.requestId;
|
|
199
204
|
return typeof raw === "string" && raw.length > 0 ? raw : void 0;
|
|
@@ -298,19 +303,22 @@ var LcrFallbackModel = class {
|
|
|
298
303
|
});
|
|
299
304
|
}
|
|
300
305
|
/**
|
|
301
|
-
* Baseline = what this same usage would have cost on the
|
|
302
|
-
*
|
|
303
|
-
*
|
|
304
|
-
* the
|
|
306
|
+
* Baseline = what this same usage would have cost on the always-on fallback:
|
|
307
|
+
* the LAST priced leg of the chain (by convention the list-price provider you'd
|
|
308
|
+
* use without routing — e.g. OpenRouter, always last). The winner's saving is
|
|
309
|
+
* `baselineUsd - costUsd`. We take the last priced leg, NOT the most expensive
|
|
310
|
+
* one: prompt caching can make a sticker-cheaper provider (no `cacheRead`) cost
|
|
311
|
+
* MORE on a cache-heavy call, and a max-of-chain baseline would then fabricate a
|
|
312
|
+
* "saving" even on calls the fallback itself served. Undefined when no provider
|
|
313
|
+
* in the chain carries a price (nothing to compare against).
|
|
305
314
|
*/
|
|
306
315
|
baselineUsd(inputTokens, outputTokens, cacheReadTokens) {
|
|
307
|
-
let
|
|
316
|
+
let baseline;
|
|
308
317
|
for (const p of this.opts.providers) {
|
|
309
318
|
if (!p.cost) continue;
|
|
310
|
-
|
|
311
|
-
if (max === void 0 || c > max) max = c;
|
|
319
|
+
baseline = costForUsage(p.cost, inputTokens, outputTokens, cacheReadTokens);
|
|
312
320
|
}
|
|
313
|
-
return
|
|
321
|
+
return baseline;
|
|
314
322
|
}
|
|
315
323
|
/** Winner settled: record the attempt, fire `onCost` (compat) + `onCall`. */
|
|
316
324
|
finalizeOk(ctx, provider, attemptStart, usage, ttftMs) {
|
|
@@ -319,6 +327,7 @@ var LcrFallbackModel = class {
|
|
|
319
327
|
const outputTokens = usage?.outputTokens?.total ?? 0;
|
|
320
328
|
const cacheReadTokens = usage?.inputTokens?.cacheRead ?? 0;
|
|
321
329
|
const costUsd = provider.cost ? costForUsage(provider.cost, inputTokens, outputTokens, cacheReadTokens) : 0;
|
|
330
|
+
const cachedSavingUsd = provider.cost ? cacheSavingForUsage(provider.cost, inputTokens, cacheReadTokens) : 0;
|
|
322
331
|
const usageMissing = inputTokens === 0 && outputTokens === 0;
|
|
323
332
|
this.emitCost({
|
|
324
333
|
model: this.opts.modelName,
|
|
@@ -341,6 +350,7 @@ var LcrFallbackModel = class {
|
|
|
341
350
|
...cacheReadTokens > 0 ? { cachedInputTokens: cacheReadTokens } : {},
|
|
342
351
|
costUsd,
|
|
343
352
|
baselineUsd: this.baselineUsd(inputTokens, outputTokens, cacheReadTokens),
|
|
353
|
+
...cachedSavingUsd > 0 ? { cachedSavingUsd } : {},
|
|
344
354
|
...ctx.requestId ? { requestId: ctx.requestId } : {},
|
|
345
355
|
...usageMissing ? { usageMissing: true } : {}
|
|
346
356
|
});
|
package/dist/index.d.cts
CHANGED
|
@@ -109,12 +109,25 @@ interface CallRecord {
|
|
|
109
109
|
/** Computed from the winner's `cost`; 0 if no price was given or the call failed. */
|
|
110
110
|
costUsd: number;
|
|
111
111
|
/**
|
|
112
|
-
* What
|
|
113
|
-
*
|
|
114
|
-
*
|
|
115
|
-
* provider
|
|
112
|
+
* What this same usage would have cost on the savings baseline, so
|
|
113
|
+
* `baselineUsd - costUsd` is what routing actually saved. Text router: the
|
|
114
|
+
* always-on fallback leg — the LAST priced provider in the chain, i.e. the
|
|
115
|
+
* list-price provider you'd fall back to without routing (e.g. OpenRouter).
|
|
116
|
+
* Media router: the model-maker's official direct price. NOT the most
|
|
117
|
+
* expensive leg of the chain: prompt caching can make a sticker-cheaper
|
|
118
|
+
* provider cost more on a cache-heavy call, and a max-of-chain baseline would
|
|
119
|
+
* fabricate a "saving" on calls the fallback itself served. Undefined only
|
|
120
|
+
* when no provider was priced.
|
|
116
121
|
*/
|
|
117
122
|
baselineUsd?: number;
|
|
123
|
+
/**
|
|
124
|
+
* The slice of `costUsd` that prompt-cache reads saved versus paying the full
|
|
125
|
+
* input rate for those same tokens (`cachedTokens × (input − cacheRead)`).
|
|
126
|
+
* Present only when > 0. This is the serving provider's own caching benefit —
|
|
127
|
+
* it happens with or without routing — so it is NOT a routing saving and must
|
|
128
|
+
* be surfaced separately, never folded into `baselineUsd - costUsd`.
|
|
129
|
+
*/
|
|
130
|
+
cachedSavingUsd?: number;
|
|
118
131
|
/**
|
|
119
132
|
* Caller-supplied correlation id, read from `providerOptions.lcr.requestId`
|
|
120
133
|
* on the call. Multi-step tool loops emit one record per `doStream`/
|
package/dist/index.d.ts
CHANGED
|
@@ -109,12 +109,25 @@ interface CallRecord {
|
|
|
109
109
|
/** Computed from the winner's `cost`; 0 if no price was given or the call failed. */
|
|
110
110
|
costUsd: number;
|
|
111
111
|
/**
|
|
112
|
-
* What
|
|
113
|
-
*
|
|
114
|
-
*
|
|
115
|
-
* provider
|
|
112
|
+
* What this same usage would have cost on the savings baseline, so
|
|
113
|
+
* `baselineUsd - costUsd` is what routing actually saved. Text router: the
|
|
114
|
+
* always-on fallback leg — the LAST priced provider in the chain, i.e. the
|
|
115
|
+
* list-price provider you'd fall back to without routing (e.g. OpenRouter).
|
|
116
|
+
* Media router: the model-maker's official direct price. NOT the most
|
|
117
|
+
* expensive leg of the chain: prompt caching can make a sticker-cheaper
|
|
118
|
+
* provider cost more on a cache-heavy call, and a max-of-chain baseline would
|
|
119
|
+
* fabricate a "saving" on calls the fallback itself served. Undefined only
|
|
120
|
+
* when no provider was priced.
|
|
116
121
|
*/
|
|
117
122
|
baselineUsd?: number;
|
|
123
|
+
/**
|
|
124
|
+
* The slice of `costUsd` that prompt-cache reads saved versus paying the full
|
|
125
|
+
* input rate for those same tokens (`cachedTokens × (input − cacheRead)`).
|
|
126
|
+
* Present only when > 0. This is the serving provider's own caching benefit —
|
|
127
|
+
* it happens with or without routing — so it is NOT a routing saving and must
|
|
128
|
+
* be surfaced separately, never folded into `baselineUsd - costUsd`.
|
|
129
|
+
*/
|
|
130
|
+
cachedSavingUsd?: number;
|
|
118
131
|
/**
|
|
119
132
|
* Caller-supplied correlation id, read from `providerOptions.lcr.requestId`
|
|
120
133
|
* on the call. Multi-step tool loops emit one record per `doStream`/
|
package/dist/index.js
CHANGED
|
@@ -152,6 +152,11 @@ function costForUsage(cost, inputTokens, outputTokens, cacheReadTokens) {
|
|
|
152
152
|
const cachedRate = cost.cacheRead ?? cost.input;
|
|
153
153
|
return fullInput / 1e6 * cost.input + cached / 1e6 * cachedRate + outputTokens / 1e6 * cost.output;
|
|
154
154
|
}
|
|
155
|
+
function cacheSavingForUsage(cost, inputTokens, cacheReadTokens) {
|
|
156
|
+
if (cost.cacheRead === void 0) return 0;
|
|
157
|
+
const cached = Math.min(Math.max(cacheReadTokens, 0), inputTokens);
|
|
158
|
+
return cached / 1e6 * (cost.input - cost.cacheRead);
|
|
159
|
+
}
|
|
155
160
|
function requestIdFrom(options) {
|
|
156
161
|
const raw = options.providerOptions?.lcr?.requestId;
|
|
157
162
|
return typeof raw === "string" && raw.length > 0 ? raw : void 0;
|
|
@@ -256,19 +261,22 @@ var LcrFallbackModel = class {
|
|
|
256
261
|
});
|
|
257
262
|
}
|
|
258
263
|
/**
|
|
259
|
-
* Baseline = what this same usage would have cost on the
|
|
260
|
-
*
|
|
261
|
-
*
|
|
262
|
-
* the
|
|
264
|
+
* Baseline = what this same usage would have cost on the always-on fallback:
|
|
265
|
+
* the LAST priced leg of the chain (by convention the list-price provider you'd
|
|
266
|
+
* use without routing — e.g. OpenRouter, always last). The winner's saving is
|
|
267
|
+
* `baselineUsd - costUsd`. We take the last priced leg, NOT the most expensive
|
|
268
|
+
* one: prompt caching can make a sticker-cheaper provider (no `cacheRead`) cost
|
|
269
|
+
* MORE on a cache-heavy call, and a max-of-chain baseline would then fabricate a
|
|
270
|
+
* "saving" even on calls the fallback itself served. Undefined when no provider
|
|
271
|
+
* in the chain carries a price (nothing to compare against).
|
|
263
272
|
*/
|
|
264
273
|
baselineUsd(inputTokens, outputTokens, cacheReadTokens) {
|
|
265
|
-
let
|
|
274
|
+
let baseline;
|
|
266
275
|
for (const p of this.opts.providers) {
|
|
267
276
|
if (!p.cost) continue;
|
|
268
|
-
|
|
269
|
-
if (max === void 0 || c > max) max = c;
|
|
277
|
+
baseline = costForUsage(p.cost, inputTokens, outputTokens, cacheReadTokens);
|
|
270
278
|
}
|
|
271
|
-
return
|
|
279
|
+
return baseline;
|
|
272
280
|
}
|
|
273
281
|
/** Winner settled: record the attempt, fire `onCost` (compat) + `onCall`. */
|
|
274
282
|
finalizeOk(ctx, provider, attemptStart, usage, ttftMs) {
|
|
@@ -277,6 +285,7 @@ var LcrFallbackModel = class {
|
|
|
277
285
|
const outputTokens = usage?.outputTokens?.total ?? 0;
|
|
278
286
|
const cacheReadTokens = usage?.inputTokens?.cacheRead ?? 0;
|
|
279
287
|
const costUsd = provider.cost ? costForUsage(provider.cost, inputTokens, outputTokens, cacheReadTokens) : 0;
|
|
288
|
+
const cachedSavingUsd = provider.cost ? cacheSavingForUsage(provider.cost, inputTokens, cacheReadTokens) : 0;
|
|
280
289
|
const usageMissing = inputTokens === 0 && outputTokens === 0;
|
|
281
290
|
this.emitCost({
|
|
282
291
|
model: this.opts.modelName,
|
|
@@ -299,6 +308,7 @@ var LcrFallbackModel = class {
|
|
|
299
308
|
...cacheReadTokens > 0 ? { cachedInputTokens: cacheReadTokens } : {},
|
|
300
309
|
costUsd,
|
|
301
310
|
baselineUsd: this.baselineUsd(inputTokens, outputTokens, cacheReadTokens),
|
|
311
|
+
...cachedSavingUsd > 0 ? { cachedSavingUsd } : {},
|
|
302
312
|
...ctx.requestId ? { requestId: ctx.requestId } : {},
|
|
303
313
|
...usageMissing ? { usageMissing: true } : {}
|
|
304
314
|
});
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ai-lcr",
|
|
3
|
-
"version": "0.5.
|
|
3
|
+
"version": "0.5.1",
|
|
4
4
|
"description": "Least Cost Routing for LLMs — route every model call to the cheapest available provider, fall back automatically, and track real cost. Built for the Vercel AI SDK.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai",
|