ai-lcr 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -194,6 +194,11 @@ function costForUsage(cost, inputTokens, outputTokens, cacheReadTokens) {
194
194
  const cachedRate = cost.cacheRead ?? cost.input;
195
195
  return fullInput / 1e6 * cost.input + cached / 1e6 * cachedRate + outputTokens / 1e6 * cost.output;
196
196
  }
197
+ function cacheSavingForUsage(cost, inputTokens, cacheReadTokens) {
198
+ if (cost.cacheRead === void 0) return 0;
199
+ const cached = Math.min(Math.max(cacheReadTokens, 0), inputTokens);
200
+ return cached / 1e6 * (cost.input - cost.cacheRead);
201
+ }
197
202
  function requestIdFrom(options) {
198
203
  const raw = options.providerOptions?.lcr?.requestId;
199
204
  return typeof raw === "string" && raw.length > 0 ? raw : void 0;
@@ -298,19 +303,22 @@ var LcrFallbackModel = class {
298
303
  });
299
304
  }
300
305
  /**
301
- * Baseline = what this same usage would have cost on the most expensive
302
- * *priced* provider in the chain (typically the OpenRouter fallback leg). The
303
- * winner's savings is `baselineUsd - costUsd`. Undefined when no provider in
304
- * the chain carries a price (nothing to compare against).
306
+ * Baseline = what this same usage would have cost on the always-on fallback:
307
+ * the LAST priced leg of the chain (by convention the list-price provider you'd
308
+ * use without routing e.g. OpenRouter, always last). The winner's saving is
309
+ * `baselineUsd - costUsd`. We take the last priced leg, NOT the most expensive
310
+ * one: prompt caching can make a sticker-cheaper provider (no `cacheRead`) cost
311
+ * MORE on a cache-heavy call, and a max-of-chain baseline would then fabricate a
312
+ * "saving" even on calls the fallback itself served. Undefined when no provider
313
+ * in the chain carries a price (nothing to compare against).
305
314
  */
306
315
  baselineUsd(inputTokens, outputTokens, cacheReadTokens) {
307
- let max;
316
+ let baseline;
308
317
  for (const p of this.opts.providers) {
309
318
  if (!p.cost) continue;
310
- const c = costForUsage(p.cost, inputTokens, outputTokens, cacheReadTokens);
311
- if (max === void 0 || c > max) max = c;
319
+ baseline = costForUsage(p.cost, inputTokens, outputTokens, cacheReadTokens);
312
320
  }
313
- return max;
321
+ return baseline;
314
322
  }
315
323
  /** Winner settled: record the attempt, fire `onCost` (compat) + `onCall`. */
316
324
  finalizeOk(ctx, provider, attemptStart, usage, ttftMs) {
@@ -319,6 +327,7 @@ var LcrFallbackModel = class {
319
327
  const outputTokens = usage?.outputTokens?.total ?? 0;
320
328
  const cacheReadTokens = usage?.inputTokens?.cacheRead ?? 0;
321
329
  const costUsd = provider.cost ? costForUsage(provider.cost, inputTokens, outputTokens, cacheReadTokens) : 0;
330
+ const cachedSavingUsd = provider.cost ? cacheSavingForUsage(provider.cost, inputTokens, cacheReadTokens) : 0;
322
331
  const usageMissing = inputTokens === 0 && outputTokens === 0;
323
332
  this.emitCost({
324
333
  model: this.opts.modelName,
@@ -341,6 +350,7 @@ var LcrFallbackModel = class {
341
350
  ...cacheReadTokens > 0 ? { cachedInputTokens: cacheReadTokens } : {},
342
351
  costUsd,
343
352
  baselineUsd: this.baselineUsd(inputTokens, outputTokens, cacheReadTokens),
353
+ ...cachedSavingUsd > 0 ? { cachedSavingUsd } : {},
344
354
  ...ctx.requestId ? { requestId: ctx.requestId } : {},
345
355
  ...usageMissing ? { usageMissing: true } : {}
346
356
  });
package/dist/index.d.cts CHANGED
@@ -109,12 +109,25 @@ interface CallRecord {
109
109
  /** Computed from the winner's `cost`; 0 if no price was given or the call failed. */
110
110
  costUsd: number;
111
111
  /**
112
- * What the same request would have cost on the most expensive *priced*
113
- * provider in the chain, on identical token usage the savings baseline
114
- * (`baselineUsd - costUsd`). Set by both routers whenever at least one
115
- * provider carries a `cost`; undefined only when no provider was priced.
112
+ * What this same usage would have cost on the savings baseline, so
113
+ * `baselineUsd - costUsd` is what routing actually saved. Text router: the
114
+ * always-on fallback leg the LAST priced provider in the chain, i.e. the
115
+ * list-price provider you'd fall back to without routing (e.g. OpenRouter).
116
+ * Media router: the model-maker's official direct price. NOT the most
117
+ * expensive leg of the chain: prompt caching can make a sticker-cheaper
118
+ * provider cost more on a cache-heavy call, and a max-of-chain baseline would
119
+ * fabricate a "saving" on calls the fallback itself served. Undefined only
120
+ * when no provider was priced.
116
121
  */
117
122
  baselineUsd?: number;
123
+ /**
124
+ * The slice of `costUsd` that prompt-cache reads saved versus paying the full
125
+ * input rate for those same tokens (`cachedTokens × (input − cacheRead)`).
126
+ * Present only when > 0. This is the serving provider's own caching benefit —
127
+ * it happens with or without routing — so it is NOT a routing saving and must
128
+ * be surfaced separately, never folded into `baselineUsd - costUsd`.
129
+ */
130
+ cachedSavingUsd?: number;
118
131
  /**
119
132
  * Caller-supplied correlation id, read from `providerOptions.lcr.requestId`
120
133
  * on the call. Multi-step tool loops emit one record per `doStream`/
package/dist/index.d.ts CHANGED
@@ -109,12 +109,25 @@ interface CallRecord {
109
109
  /** Computed from the winner's `cost`; 0 if no price was given or the call failed. */
110
110
  costUsd: number;
111
111
  /**
112
- * What the same request would have cost on the most expensive *priced*
113
- * provider in the chain, on identical token usage the savings baseline
114
- * (`baselineUsd - costUsd`). Set by both routers whenever at least one
115
- * provider carries a `cost`; undefined only when no provider was priced.
112
+ * What this same usage would have cost on the savings baseline, so
113
+ * `baselineUsd - costUsd` is what routing actually saved. Text router: the
114
+ * always-on fallback leg the LAST priced provider in the chain, i.e. the
115
+ * list-price provider you'd fall back to without routing (e.g. OpenRouter).
116
+ * Media router: the model-maker's official direct price. NOT the most
117
+ * expensive leg of the chain: prompt caching can make a sticker-cheaper
118
+ * provider cost more on a cache-heavy call, and a max-of-chain baseline would
119
+ * fabricate a "saving" on calls the fallback itself served. Undefined only
120
+ * when no provider was priced.
116
121
  */
117
122
  baselineUsd?: number;
123
+ /**
124
+ * The slice of `costUsd` that prompt-cache reads saved versus paying the full
125
+ * input rate for those same tokens (`cachedTokens × (input − cacheRead)`).
126
+ * Present only when > 0. This is the serving provider's own caching benefit —
127
+ * it happens with or without routing — so it is NOT a routing saving and must
128
+ * be surfaced separately, never folded into `baselineUsd - costUsd`.
129
+ */
130
+ cachedSavingUsd?: number;
118
131
  /**
119
132
  * Caller-supplied correlation id, read from `providerOptions.lcr.requestId`
120
133
  * on the call. Multi-step tool loops emit one record per `doStream`/
package/dist/index.js CHANGED
@@ -152,6 +152,11 @@ function costForUsage(cost, inputTokens, outputTokens, cacheReadTokens) {
152
152
  const cachedRate = cost.cacheRead ?? cost.input;
153
153
  return fullInput / 1e6 * cost.input + cached / 1e6 * cachedRate + outputTokens / 1e6 * cost.output;
154
154
  }
155
+ function cacheSavingForUsage(cost, inputTokens, cacheReadTokens) {
156
+ if (cost.cacheRead === void 0) return 0;
157
+ const cached = Math.min(Math.max(cacheReadTokens, 0), inputTokens);
158
+ return cached / 1e6 * (cost.input - cost.cacheRead);
159
+ }
155
160
  function requestIdFrom(options) {
156
161
  const raw = options.providerOptions?.lcr?.requestId;
157
162
  return typeof raw === "string" && raw.length > 0 ? raw : void 0;
@@ -256,19 +261,22 @@ var LcrFallbackModel = class {
256
261
  });
257
262
  }
258
263
  /**
259
- * Baseline = what this same usage would have cost on the most expensive
260
- * *priced* provider in the chain (typically the OpenRouter fallback leg). The
261
- * winner's savings is `baselineUsd - costUsd`. Undefined when no provider in
262
- * the chain carries a price (nothing to compare against).
264
+ * Baseline = what this same usage would have cost on the always-on fallback:
265
+ * the LAST priced leg of the chain (by convention the list-price provider you'd
266
+ * use without routing e.g. OpenRouter, always last). The winner's saving is
267
+ * `baselineUsd - costUsd`. We take the last priced leg, NOT the most expensive
268
+ * one: prompt caching can make a sticker-cheaper provider (no `cacheRead`) cost
269
+ * MORE on a cache-heavy call, and a max-of-chain baseline would then fabricate a
270
+ * "saving" even on calls the fallback itself served. Undefined when no provider
271
+ * in the chain carries a price (nothing to compare against).
263
272
  */
264
273
  baselineUsd(inputTokens, outputTokens, cacheReadTokens) {
265
- let max;
274
+ let baseline;
266
275
  for (const p of this.opts.providers) {
267
276
  if (!p.cost) continue;
268
- const c = costForUsage(p.cost, inputTokens, outputTokens, cacheReadTokens);
269
- if (max === void 0 || c > max) max = c;
277
+ baseline = costForUsage(p.cost, inputTokens, outputTokens, cacheReadTokens);
270
278
  }
271
- return max;
279
+ return baseline;
272
280
  }
273
281
  /** Winner settled: record the attempt, fire `onCost` (compat) + `onCall`. */
274
282
  finalizeOk(ctx, provider, attemptStart, usage, ttftMs) {
@@ -277,6 +285,7 @@ var LcrFallbackModel = class {
277
285
  const outputTokens = usage?.outputTokens?.total ?? 0;
278
286
  const cacheReadTokens = usage?.inputTokens?.cacheRead ?? 0;
279
287
  const costUsd = provider.cost ? costForUsage(provider.cost, inputTokens, outputTokens, cacheReadTokens) : 0;
288
+ const cachedSavingUsd = provider.cost ? cacheSavingForUsage(provider.cost, inputTokens, cacheReadTokens) : 0;
280
289
  const usageMissing = inputTokens === 0 && outputTokens === 0;
281
290
  this.emitCost({
282
291
  model: this.opts.modelName,
@@ -299,6 +308,7 @@ var LcrFallbackModel = class {
299
308
  ...cacheReadTokens > 0 ? { cachedInputTokens: cacheReadTokens } : {},
300
309
  costUsd,
301
310
  baselineUsd: this.baselineUsd(inputTokens, outputTokens, cacheReadTokens),
311
+ ...cachedSavingUsd > 0 ? { cachedSavingUsd } : {},
302
312
  ...ctx.requestId ? { requestId: ctx.requestId } : {},
303
313
  ...usageMissing ? { usageMissing: true } : {}
304
314
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ai-lcr",
3
- "version": "0.5.0",
3
+ "version": "0.5.1",
4
4
  "description": "Least Cost Routing for LLMs — route every model call to the cheapest available provider, fall back automatically, and track real cost. Built for the Vercel AI SDK.",
5
5
  "keywords": [
6
6
  "ai",