ai-lcr 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -15,7 +15,7 @@
15
15
  </p>
16
16
 
17
17
  <p align="center">
18
- <img src="assets/ai-lcr-hero.svg" alt="ai-lcr routes each model to its own cheapest providerGemini to Kunavo, DeepSeek to OpenRouter, Seedream to fal, Flux Schnell to Runware and falls back on failure" width="820">
18
+ <img src="assets/ai-lcr-hero.svg" alt="ai-lcr keeps a cheapest-first list of providers per modelserves the cheapest (saving ~40%), fails over to the next on error, and snaps back to the cheapest after ~60s" width="720">
19
19
  </p>
20
20
 
21
21
  The same model costs different amounts on different providers — and no single provider is cheapest for everything. `ai-lcr` keeps a cheapest-first list per model, routes to the cheapest healthy one (⭐ below), and falls through on failure — the way phone carriers have done [Least Cost Routing](https://en.wikipedia.org/wiki/Least-cost_routing) for decades.
@@ -144,10 +144,6 @@ DeepInfra carries open weights only — no first-party Claude / GPT / Gemini. Fo
144
144
  2. **Fall through on failure.** On a retryable error — rate limit, 5xx, timeout, or a **billing cap** (402 / out-of-credit / quota) — it advances to the next provider, streaming-safe. A caller's own bad request (e.g. 400, 422) passes through immediately.
145
145
  3. **Recover.** After an idle window (`resetIntervalMs`, default 60s) it snaps back to the cheapest provider.
146
146
 
147
- <p align="center">
148
- <img src="assets/ai-lcr-routing.svg" alt="routing diagram: cheapest first, fallback on failure, recover after idle" width="820">
149
- </p>
150
-
151
147
  ## See what happened (`onCall`)
152
148
 
153
149
  `onError`/`onCost` fire separately and uncorrelated, so a failover is hard to read after the fact. `onCall` gives you **one record per request** — the full chain, the winner, the reason for each failed hop, latency, and cost — and `formatCallRecord` turns it into a one-liner you can scan:
package/README.zh-CN.md CHANGED
@@ -15,7 +15,7 @@
15
15
  </p>
16
16
 
17
17
  <p align="center">
18
- <img src="assets/ai-lcr-hero.svg" alt="ai-lcr 把每个模型路由到各自最便宜的 provider——Gemini Kunavo,DeepSeek OpenRouter,Seedream 走 fal,Flux Schnell 走 Runware——失败时自动 fallback" width="820">
18
+ <img src="assets/ai-lcr-hero.svg" alt="ai-lcr 为每个模型维护一份「最便宜优先」的 provider 列表——默认走最便宜的(省约 40%),出错时切到下一个,约 60 秒后自动切回最便宜" width="720">
19
19
  </p>
20
20
 
21
21
  同一个模型在不同 provider 上的价格不同——而且没有任何单一 provider 在所有模型上都最便宜。`ai-lcr` 为每个模型维护一份「最便宜优先」的列表,路由到其中最便宜且健康的 provider(下表中的 ⭐),失败时向下穿透——这正是电话运营商几十年来一直在做的 [最低成本路由(Least Cost Routing)](https://en.wikipedia.org/wiki/Least-cost_routing)。
@@ -144,10 +144,6 @@ DeepInfra 只承载开源权重——没有第一方 Claude / GPT / Gemini。那
144
144
  2. **失败时向下穿透。** 遇到可重试的错误(限流、5xx、超时)时,前进到下一个 provider,且对流式安全。硬错误(400、401、403、422)会直接透传,不做重试。
145
145
  3. **恢复。** 在一段空闲窗口(`resetIntervalMs`,默认 60s)之后,自动回到最便宜的 provider。
146
146
 
147
- <p align="center">
148
- <img src="assets/ai-lcr-routing.svg" alt="路由示意图:最便宜优先、失败时 fallback、空闲后恢复" width="820">
149
- </p>
150
-
151
147
  ## 支持的 provider
152
148
 
153
149
  任何 OpenAI 兼容的 endpoint 都可用——任何 AI SDK 的 provider 包也都可用,包括模型厂商自己的官方 API。
package/dist/index.cjs CHANGED
@@ -41,6 +41,12 @@ __export(index_exports, {
41
41
  module.exports = __toCommonJS(index_exports);
42
42
 
43
43
  // src/fallback.ts
44
+ var EmptyCompletionError = class extends Error {
45
+ constructor(provider) {
46
+ super(`ai-lcr: provider "${provider}" returned an empty completion (0 output tokens, no content)`);
47
+ this.name = "EmptyCompletionError";
48
+ }
49
+ };
44
50
  var RETRYABLE_STATUS = /* @__PURE__ */ new Set([401, 402, 403, 408, 409, 413, 429, 498, 500]);
45
51
  var RETRYABLE_PATTERNS = [
46
52
  "overloaded",
@@ -153,6 +159,7 @@ function isRetryableError(error) {
153
159
  return RETRYABLE_PATTERNS.some((p) => text.includes(p));
154
160
  }
155
161
  function classifyError(error) {
162
+ if (error instanceof EmptyCompletionError) return "empty_completion";
156
163
  const e = error;
157
164
  const status = e?.statusCode ?? e?.status;
158
165
  if (typeof status === "number") return String(status);
@@ -175,6 +182,7 @@ var BILLING_PATTERNS = [
175
182
  "\u6263\u6B3E"
176
183
  ];
177
184
  function classifyErrorKind(error) {
185
+ if (error instanceof EmptyCompletionError) return "empty";
178
186
  const e = error;
179
187
  const status = e?.statusCode ?? e?.status;
180
188
  const { text } = errorSignals(error);
@@ -194,10 +202,27 @@ function costForUsage(cost, inputTokens, outputTokens, cacheReadTokens) {
194
202
  const cachedRate = cost.cacheRead ?? cost.input;
195
203
  return fullInput / 1e6 * cost.input + cached / 1e6 * cachedRate + outputTokens / 1e6 * cost.output;
196
204
  }
205
+ function cacheSavingForUsage(cost, inputTokens, cacheReadTokens) {
206
+ if (cost.cacheRead === void 0) return 0;
207
+ const cached = Math.min(Math.max(cacheReadTokens, 0), inputTokens);
208
+ return cached / 1e6 * (cost.input - cost.cacheRead);
209
+ }
197
210
  function requestIdFrom(options) {
198
211
  const raw = options.providerOptions?.lcr?.requestId;
199
212
  return typeof raw === "string" && raw.length > 0 ? raw : void 0;
200
213
  }
214
+ var CONTENT_PART_TYPES = /* @__PURE__ */ new Set([
215
+ "text-delta",
216
+ "reasoning-delta",
217
+ "tool-call",
218
+ "tool-input-start",
219
+ "tool-input-delta",
220
+ "tool-input-end",
221
+ "file",
222
+ "source",
223
+ "tool-result",
224
+ "raw"
225
+ ]);
201
226
  var LcrFallbackModel = class {
202
227
  constructor(opts) {
203
228
  this.opts = opts;
@@ -298,19 +323,22 @@ var LcrFallbackModel = class {
298
323
  });
299
324
  }
300
325
  /**
301
- * Baseline = what this same usage would have cost on the most expensive
302
- * *priced* provider in the chain (typically the OpenRouter fallback leg). The
303
- * winner's savings is `baselineUsd - costUsd`. Undefined when no provider in
304
- * the chain carries a price (nothing to compare against).
326
+ * Baseline = what this same usage would have cost on the always-on fallback:
327
+ * the LAST priced leg of the chain (by convention the list-price provider you'd
328
+ * use without routing e.g. OpenRouter, always last). The winner's saving is
329
+ * `baselineUsd - costUsd`. We take the last priced leg, NOT the most expensive
330
+ * one: prompt caching can make a sticker-cheaper provider (no `cacheRead`) cost
331
+ * MORE on a cache-heavy call, and a max-of-chain baseline would then fabricate a
332
+ * "saving" even on calls the fallback itself served. Undefined when no provider
333
+ * in the chain carries a price (nothing to compare against).
305
334
  */
306
335
  baselineUsd(inputTokens, outputTokens, cacheReadTokens) {
307
- let max;
336
+ let baseline;
308
337
  for (const p of this.opts.providers) {
309
338
  if (!p.cost) continue;
310
- const c = costForUsage(p.cost, inputTokens, outputTokens, cacheReadTokens);
311
- if (max === void 0 || c > max) max = c;
339
+ baseline = costForUsage(p.cost, inputTokens, outputTokens, cacheReadTokens);
312
340
  }
313
- return max;
341
+ return baseline;
314
342
  }
315
343
  /** Winner settled: record the attempt, fire `onCost` (compat) + `onCall`. */
316
344
  finalizeOk(ctx, provider, attemptStart, usage, ttftMs) {
@@ -319,7 +347,9 @@ var LcrFallbackModel = class {
319
347
  const outputTokens = usage?.outputTokens?.total ?? 0;
320
348
  const cacheReadTokens = usage?.inputTokens?.cacheRead ?? 0;
321
349
  const costUsd = provider.cost ? costForUsage(provider.cost, inputTokens, outputTokens, cacheReadTokens) : 0;
350
+ const cachedSavingUsd = provider.cost ? cacheSavingForUsage(provider.cost, inputTokens, cacheReadTokens) : 0;
322
351
  const usageMissing = inputTokens === 0 && outputTokens === 0;
352
+ const emptyCompletion = inputTokens > 0 && outputTokens === 0;
323
353
  this.emitCost({
324
354
  model: this.opts.modelName,
325
355
  provider: provider.label,
@@ -341,8 +371,10 @@ var LcrFallbackModel = class {
341
371
  ...cacheReadTokens > 0 ? { cachedInputTokens: cacheReadTokens } : {},
342
372
  costUsd,
343
373
  baselineUsd: this.baselineUsd(inputTokens, outputTokens, cacheReadTokens),
374
+ ...cachedSavingUsd > 0 ? { cachedSavingUsd } : {},
344
375
  ...ctx.requestId ? { requestId: ctx.requestId } : {},
345
- ...usageMissing ? { usageMissing: true } : {}
376
+ ...usageMissing ? { usageMissing: true } : {},
377
+ ...emptyCompletion ? { emptyCompletion: true } : {}
346
378
  });
347
379
  }
348
380
  /** Every provider failed: fire `onCall` with no winner. */
@@ -373,6 +405,15 @@ var LcrFallbackModel = class {
373
405
  const attemptStart = Date.now();
374
406
  try {
375
407
  const result = await provider.model.doGenerate(options);
408
+ const out = result.usage?.outputTokens?.total ?? 0;
409
+ const inp = result.usage?.inputTokens?.total ?? 0;
410
+ if (inp > 0 && out === 0 && tried < n - 1) {
411
+ const emptyErr = new EmptyCompletionError(provider.label);
412
+ lastError = emptyErr;
413
+ this.emitError(emptyErr, provider.label);
414
+ this.recordFail(ctx, provider, attemptStart, emptyErr);
415
+ continue;
416
+ }
376
417
  this.settleSticky(idx);
377
418
  this.finalizeOk(ctx, provider, attemptStart, result.usage);
378
419
  return result;
@@ -434,7 +475,7 @@ var LcrFallbackModel = class {
434
475
  const servingIdx = idx;
435
476
  const triedBeforeServing = tried;
436
477
  let usage;
437
- let streamedAny = false;
478
+ let contentStreamed = false;
438
479
  let ttftMs;
439
480
  const stream = new ReadableStream({
440
481
  async start(controller) {
@@ -443,17 +484,24 @@ var LcrFallbackModel = class {
443
484
  reader = result.stream.getReader();
444
485
  for (; ; ) {
445
486
  const { done, value } = await reader.read();
446
- if (!streamedAny && value && typeof value === "object" && "error" in value) {
487
+ if (!contentStreamed && value && typeof value === "object" && "error" in value) {
447
488
  const err = value.error;
448
489
  if (self.shouldRetry(err)) throw err;
449
490
  }
450
491
  if (done) break;
451
- if (value.type === "finish") usage = value.usage;
492
+ if (value.type === "finish") {
493
+ usage = value.usage;
494
+ const out = value.usage?.outputTokens?.total ?? 0;
495
+ const inp = value.usage?.inputTokens?.total ?? 0;
496
+ if (inp > 0 && out === 0 && !contentStreamed && triedBeforeServing + 1 < n) {
497
+ throw new EmptyCompletionError(servingProvider.label);
498
+ }
499
+ }
452
500
  if (ttftMs === void 0 && (value.type === "text-delta" || value.type === "reasoning-delta")) {
453
501
  ttftMs = Date.now() - servingAttemptStart;
454
502
  }
455
503
  controller.enqueue(value);
456
- if (value.type !== "stream-start") streamedAny = true;
504
+ if (CONTENT_PART_TYPES.has(value.type)) contentStreamed = true;
457
505
  }
458
506
  self.settleSticky(servingIdx);
459
507
  self.finalizeOk(ctx, servingProvider, servingAttemptStart, usage, ttftMs);
@@ -461,7 +509,7 @@ var LcrFallbackModel = class {
461
509
  } catch (error) {
462
510
  self.emitError(error, servingProvider.label);
463
511
  self.recordFail(ctx, servingProvider, servingAttemptStart, error);
464
- if (!streamedAny) {
512
+ if (!contentStreamed) {
465
513
  const nextTried = triedBeforeServing + 1;
466
514
  if (nextTried >= n) {
467
515
  self.finalizeFail(ctx);
@@ -524,6 +572,7 @@ function formatCallRecord(record, opts = {}) {
524
572
  line += ` (saved $${(record.baselineUsd - record.costUsd).toFixed(4)})`;
525
573
  }
526
574
  if (record.usageMissing) line += ` \u26A0no-usage`;
575
+ if (record.emptyCompletion) line += ` \u26A0empty`;
527
576
  const failed = record.attempts.filter((a) => !a.ok);
528
577
  if (failed.length > 0) {
529
578
  const reasons = failed.map((a) => `${a.provider} ${a.errorClass ?? "error"}`).join(", ");
package/dist/index.d.cts CHANGED
@@ -49,8 +49,15 @@ interface CostEvent {
49
49
  * - "auth": 401 / 403 — a misconfigured or revoked key.
50
50
  * - "billing": 402 / out-of-credit / quota — account needs topping up.
51
51
  * - "client": a non-retryable caller error (e.g. 400 bad request).
52
+ * - "empty": provider returned a clean 200 but generated nothing
53
+ * (zero output tokens, no content) — a *content*-integrity
54
+ * failure, not a transport one. The provider looks healthy to
55
+ * every status/network check yet hands the user a blank. We
56
+ * fail over on it like a transient error, but tag it separately
57
+ * so a run of `"empty"` attempts (a quietly degraded model)
58
+ * doesn't hide inside the transient noise.
52
59
  */
53
- type ErrorKind = "transient" | "auth" | "billing" | "client";
60
+ type ErrorKind = "transient" | "auth" | "billing" | "client" | "empty";
54
61
  /** One provider attempt within a single request. */
55
62
  interface RouteAttempt {
56
63
  /** Provider label that was tried (e.g. "tokenmart"). */
@@ -109,12 +116,25 @@ interface CallRecord {
109
116
  /** Computed from the winner's `cost`; 0 if no price was given or the call failed. */
110
117
  costUsd: number;
111
118
  /**
112
- * What the same request would have cost on the most expensive *priced*
113
- * provider in the chain, on identical token usage the savings baseline
114
- * (`baselineUsd - costUsd`). Set by both routers whenever at least one
115
- * provider carries a `cost`; undefined only when no provider was priced.
119
+ * What this same usage would have cost on the savings baseline, so
120
+ * `baselineUsd - costUsd` is what routing actually saved. Text router: the
121
+ * always-on fallback leg the LAST priced provider in the chain, i.e. the
122
+ * list-price provider you'd fall back to without routing (e.g. OpenRouter).
123
+ * Media router: the model-maker's official direct price. NOT the most
124
+ * expensive leg of the chain: prompt caching can make a sticker-cheaper
125
+ * provider cost more on a cache-heavy call, and a max-of-chain baseline would
126
+ * fabricate a "saving" on calls the fallback itself served. Undefined only
127
+ * when no provider was priced.
116
128
  */
117
129
  baselineUsd?: number;
130
+ /**
131
+ * The slice of `costUsd` that prompt-cache reads saved versus paying the full
132
+ * input rate for those same tokens (`cachedTokens × (input − cacheRead)`).
133
+ * Present only when > 0. This is the serving provider's own caching benefit —
134
+ * it happens with or without routing — so it is NOT a routing saving and must
135
+ * be surfaced separately, never folded into `baselineUsd - costUsd`.
136
+ */
137
+ cachedSavingUsd?: number;
118
138
  /**
119
139
  * Caller-supplied correlation id, read from `providerOptions.lcr.requestId`
120
140
  * on the call. Multi-step tool loops emit one record per `doStream`/
@@ -129,6 +149,21 @@ interface CallRecord {
129
149
  * other signal. Treat a flagged record as "cost unknown", not "free".
130
150
  */
131
151
  usageMissing?: boolean;
152
+ /**
153
+ * True when the winner served a clean, error-free response that nonetheless
154
+ * generated **nothing**: zero output tokens with a non-empty prompt (and, for
155
+ * streams, not one content part). The user asked and got a blank. Distinct
156
+ * from {@link usageMissing} (which is input *and* output both zero — usage not
157
+ * reported); here the prompt was billed but the model produced no output.
158
+ *
159
+ * Set only when this empty response is what the caller actually received —
160
+ * i.e. every provider in the chain came back empty, so failover couldn't
161
+ * rescue it. (When an earlier provider returns empty but a later one produces
162
+ * content, that earlier attempt is recorded as a failed `empty_completion` hop
163
+ * and this flag stays unset, because the winner did produce output.) Alert on
164
+ * it: a provider that quietly returns blanks passes every health check.
165
+ */
166
+ emptyCompletion?: boolean;
132
167
  }
133
168
  /**
134
169
  * Normalize an error into a short, log-friendly class for {@link CallRecord}.
package/dist/index.d.ts CHANGED
@@ -49,8 +49,15 @@ interface CostEvent {
49
49
  * - "auth": 401 / 403 — a misconfigured or revoked key.
50
50
  * - "billing": 402 / out-of-credit / quota — account needs topping up.
51
51
  * - "client": a non-retryable caller error (e.g. 400 bad request).
52
+ * - "empty": provider returned a clean 200 but generated nothing
53
+ * (zero output tokens, no content) — a *content*-integrity
54
+ * failure, not a transport one. The provider looks healthy to
55
+ * every status/network check yet hands the user a blank. We
56
+ * fail over on it like a transient error, but tag it separately
57
+ * so a run of `"empty"` attempts (a quietly degraded model)
58
+ * doesn't hide inside the transient noise.
52
59
  */
53
- type ErrorKind = "transient" | "auth" | "billing" | "client";
60
+ type ErrorKind = "transient" | "auth" | "billing" | "client" | "empty";
54
61
  /** One provider attempt within a single request. */
55
62
  interface RouteAttempt {
56
63
  /** Provider label that was tried (e.g. "tokenmart"). */
@@ -109,12 +116,25 @@ interface CallRecord {
109
116
  /** Computed from the winner's `cost`; 0 if no price was given or the call failed. */
110
117
  costUsd: number;
111
118
  /**
112
- * What the same request would have cost on the most expensive *priced*
113
- * provider in the chain, on identical token usage the savings baseline
114
- * (`baselineUsd - costUsd`). Set by both routers whenever at least one
115
- * provider carries a `cost`; undefined only when no provider was priced.
119
+ * What this same usage would have cost on the savings baseline, so
120
+ * `baselineUsd - costUsd` is what routing actually saved. Text router: the
121
+ * always-on fallback leg the LAST priced provider in the chain, i.e. the
122
+ * list-price provider you'd fall back to without routing (e.g. OpenRouter).
123
+ * Media router: the model-maker's official direct price. NOT the most
124
+ * expensive leg of the chain: prompt caching can make a sticker-cheaper
125
+ * provider cost more on a cache-heavy call, and a max-of-chain baseline would
126
+ * fabricate a "saving" on calls the fallback itself served. Undefined only
127
+ * when no provider was priced.
116
128
  */
117
129
  baselineUsd?: number;
130
+ /**
131
+ * The slice of `costUsd` that prompt-cache reads saved versus paying the full
132
+ * input rate for those same tokens (`cachedTokens × (input − cacheRead)`).
133
+ * Present only when > 0. This is the serving provider's own caching benefit —
134
+ * it happens with or without routing — so it is NOT a routing saving and must
135
+ * be surfaced separately, never folded into `baselineUsd - costUsd`.
136
+ */
137
+ cachedSavingUsd?: number;
118
138
  /**
119
139
  * Caller-supplied correlation id, read from `providerOptions.lcr.requestId`
120
140
  * on the call. Multi-step tool loops emit one record per `doStream`/
@@ -129,6 +149,21 @@ interface CallRecord {
129
149
  * other signal. Treat a flagged record as "cost unknown", not "free".
130
150
  */
131
151
  usageMissing?: boolean;
152
+ /**
153
+ * True when the winner served a clean, error-free response that nonetheless
154
+ * generated **nothing**: zero output tokens with a non-empty prompt (and, for
155
+ * streams, not one content part). The user asked and got a blank. Distinct
156
+ * from {@link usageMissing} (which is input *and* output both zero — usage not
157
+ * reported); here the prompt was billed but the model produced no output.
158
+ *
159
+ * Set only when this empty response is what the caller actually received —
160
+ * i.e. every provider in the chain came back empty, so failover couldn't
161
+ * rescue it. (When an earlier provider returns empty but a later one produces
162
+ * content, that earlier attempt is recorded as a failed `empty_completion` hop
163
+ * and this flag stays unset, because the winner did produce output.) Alert on
164
+ * it: a provider that quietly returns blanks passes every health check.
165
+ */
166
+ emptyCompletion?: boolean;
132
167
  }
133
168
  /**
134
169
  * Normalize an error into a short, log-friendly class for {@link CallRecord}.
package/dist/index.js CHANGED
@@ -1,4 +1,10 @@
1
1
  // src/fallback.ts
2
+ var EmptyCompletionError = class extends Error {
3
+ constructor(provider) {
4
+ super(`ai-lcr: provider "${provider}" returned an empty completion (0 output tokens, no content)`);
5
+ this.name = "EmptyCompletionError";
6
+ }
7
+ };
2
8
  var RETRYABLE_STATUS = /* @__PURE__ */ new Set([401, 402, 403, 408, 409, 413, 429, 498, 500]);
3
9
  var RETRYABLE_PATTERNS = [
4
10
  "overloaded",
@@ -111,6 +117,7 @@ function isRetryableError(error) {
111
117
  return RETRYABLE_PATTERNS.some((p) => text.includes(p));
112
118
  }
113
119
  function classifyError(error) {
120
+ if (error instanceof EmptyCompletionError) return "empty_completion";
114
121
  const e = error;
115
122
  const status = e?.statusCode ?? e?.status;
116
123
  if (typeof status === "number") return String(status);
@@ -133,6 +140,7 @@ var BILLING_PATTERNS = [
133
140
  "\u6263\u6B3E"
134
141
  ];
135
142
  function classifyErrorKind(error) {
143
+ if (error instanceof EmptyCompletionError) return "empty";
136
144
  const e = error;
137
145
  const status = e?.statusCode ?? e?.status;
138
146
  const { text } = errorSignals(error);
@@ -152,10 +160,27 @@ function costForUsage(cost, inputTokens, outputTokens, cacheReadTokens) {
152
160
  const cachedRate = cost.cacheRead ?? cost.input;
153
161
  return fullInput / 1e6 * cost.input + cached / 1e6 * cachedRate + outputTokens / 1e6 * cost.output;
154
162
  }
163
+ function cacheSavingForUsage(cost, inputTokens, cacheReadTokens) {
164
+ if (cost.cacheRead === void 0) return 0;
165
+ const cached = Math.min(Math.max(cacheReadTokens, 0), inputTokens);
166
+ return cached / 1e6 * (cost.input - cost.cacheRead);
167
+ }
155
168
  function requestIdFrom(options) {
156
169
  const raw = options.providerOptions?.lcr?.requestId;
157
170
  return typeof raw === "string" && raw.length > 0 ? raw : void 0;
158
171
  }
172
+ var CONTENT_PART_TYPES = /* @__PURE__ */ new Set([
173
+ "text-delta",
174
+ "reasoning-delta",
175
+ "tool-call",
176
+ "tool-input-start",
177
+ "tool-input-delta",
178
+ "tool-input-end",
179
+ "file",
180
+ "source",
181
+ "tool-result",
182
+ "raw"
183
+ ]);
159
184
  var LcrFallbackModel = class {
160
185
  constructor(opts) {
161
186
  this.opts = opts;
@@ -256,19 +281,22 @@ var LcrFallbackModel = class {
256
281
  });
257
282
  }
258
283
  /**
259
- * Baseline = what this same usage would have cost on the most expensive
260
- * *priced* provider in the chain (typically the OpenRouter fallback leg). The
261
- * winner's savings is `baselineUsd - costUsd`. Undefined when no provider in
262
- * the chain carries a price (nothing to compare against).
284
+ * Baseline = what this same usage would have cost on the always-on fallback:
285
+ * the LAST priced leg of the chain (by convention the list-price provider you'd
286
+ * use without routing e.g. OpenRouter, always last). The winner's saving is
287
+ * `baselineUsd - costUsd`. We take the last priced leg, NOT the most expensive
288
+ * one: prompt caching can make a sticker-cheaper provider (no `cacheRead`) cost
289
+ * MORE on a cache-heavy call, and a max-of-chain baseline would then fabricate a
290
+ * "saving" even on calls the fallback itself served. Undefined when no provider
291
+ * in the chain carries a price (nothing to compare against).
263
292
  */
264
293
  baselineUsd(inputTokens, outputTokens, cacheReadTokens) {
265
- let max;
294
+ let baseline;
266
295
  for (const p of this.opts.providers) {
267
296
  if (!p.cost) continue;
268
- const c = costForUsage(p.cost, inputTokens, outputTokens, cacheReadTokens);
269
- if (max === void 0 || c > max) max = c;
297
+ baseline = costForUsage(p.cost, inputTokens, outputTokens, cacheReadTokens);
270
298
  }
271
- return max;
299
+ return baseline;
272
300
  }
273
301
  /** Winner settled: record the attempt, fire `onCost` (compat) + `onCall`. */
274
302
  finalizeOk(ctx, provider, attemptStart, usage, ttftMs) {
@@ -277,7 +305,9 @@ var LcrFallbackModel = class {
277
305
  const outputTokens = usage?.outputTokens?.total ?? 0;
278
306
  const cacheReadTokens = usage?.inputTokens?.cacheRead ?? 0;
279
307
  const costUsd = provider.cost ? costForUsage(provider.cost, inputTokens, outputTokens, cacheReadTokens) : 0;
308
+ const cachedSavingUsd = provider.cost ? cacheSavingForUsage(provider.cost, inputTokens, cacheReadTokens) : 0;
280
309
  const usageMissing = inputTokens === 0 && outputTokens === 0;
310
+ const emptyCompletion = inputTokens > 0 && outputTokens === 0;
281
311
  this.emitCost({
282
312
  model: this.opts.modelName,
283
313
  provider: provider.label,
@@ -299,8 +329,10 @@ var LcrFallbackModel = class {
299
329
  ...cacheReadTokens > 0 ? { cachedInputTokens: cacheReadTokens } : {},
300
330
  costUsd,
301
331
  baselineUsd: this.baselineUsd(inputTokens, outputTokens, cacheReadTokens),
332
+ ...cachedSavingUsd > 0 ? { cachedSavingUsd } : {},
302
333
  ...ctx.requestId ? { requestId: ctx.requestId } : {},
303
- ...usageMissing ? { usageMissing: true } : {}
334
+ ...usageMissing ? { usageMissing: true } : {},
335
+ ...emptyCompletion ? { emptyCompletion: true } : {}
304
336
  });
305
337
  }
306
338
  /** Every provider failed: fire `onCall` with no winner. */
@@ -331,6 +363,15 @@ var LcrFallbackModel = class {
331
363
  const attemptStart = Date.now();
332
364
  try {
333
365
  const result = await provider.model.doGenerate(options);
366
+ const out = result.usage?.outputTokens?.total ?? 0;
367
+ const inp = result.usage?.inputTokens?.total ?? 0;
368
+ if (inp > 0 && out === 0 && tried < n - 1) {
369
+ const emptyErr = new EmptyCompletionError(provider.label);
370
+ lastError = emptyErr;
371
+ this.emitError(emptyErr, provider.label);
372
+ this.recordFail(ctx, provider, attemptStart, emptyErr);
373
+ continue;
374
+ }
334
375
  this.settleSticky(idx);
335
376
  this.finalizeOk(ctx, provider, attemptStart, result.usage);
336
377
  return result;
@@ -392,7 +433,7 @@ var LcrFallbackModel = class {
392
433
  const servingIdx = idx;
393
434
  const triedBeforeServing = tried;
394
435
  let usage;
395
- let streamedAny = false;
436
+ let contentStreamed = false;
396
437
  let ttftMs;
397
438
  const stream = new ReadableStream({
398
439
  async start(controller) {
@@ -401,17 +442,24 @@ var LcrFallbackModel = class {
401
442
  reader = result.stream.getReader();
402
443
  for (; ; ) {
403
444
  const { done, value } = await reader.read();
404
- if (!streamedAny && value && typeof value === "object" && "error" in value) {
445
+ if (!contentStreamed && value && typeof value === "object" && "error" in value) {
405
446
  const err = value.error;
406
447
  if (self.shouldRetry(err)) throw err;
407
448
  }
408
449
  if (done) break;
409
- if (value.type === "finish") usage = value.usage;
450
+ if (value.type === "finish") {
451
+ usage = value.usage;
452
+ const out = value.usage?.outputTokens?.total ?? 0;
453
+ const inp = value.usage?.inputTokens?.total ?? 0;
454
+ if (inp > 0 && out === 0 && !contentStreamed && triedBeforeServing + 1 < n) {
455
+ throw new EmptyCompletionError(servingProvider.label);
456
+ }
457
+ }
410
458
  if (ttftMs === void 0 && (value.type === "text-delta" || value.type === "reasoning-delta")) {
411
459
  ttftMs = Date.now() - servingAttemptStart;
412
460
  }
413
461
  controller.enqueue(value);
414
- if (value.type !== "stream-start") streamedAny = true;
462
+ if (CONTENT_PART_TYPES.has(value.type)) contentStreamed = true;
415
463
  }
416
464
  self.settleSticky(servingIdx);
417
465
  self.finalizeOk(ctx, servingProvider, servingAttemptStart, usage, ttftMs);
@@ -419,7 +467,7 @@ var LcrFallbackModel = class {
419
467
  } catch (error) {
420
468
  self.emitError(error, servingProvider.label);
421
469
  self.recordFail(ctx, servingProvider, servingAttemptStart, error);
422
- if (!streamedAny) {
470
+ if (!contentStreamed) {
423
471
  const nextTried = triedBeforeServing + 1;
424
472
  if (nextTried >= n) {
425
473
  self.finalizeFail(ctx);
@@ -482,6 +530,7 @@ function formatCallRecord(record, opts = {}) {
482
530
  line += ` (saved $${(record.baselineUsd - record.costUsd).toFixed(4)})`;
483
531
  }
484
532
  if (record.usageMissing) line += ` \u26A0no-usage`;
533
+ if (record.emptyCompletion) line += ` \u26A0empty`;
485
534
  const failed = record.attempts.filter((a) => !a.ok);
486
535
  if (failed.length > 0) {
487
536
  const reasons = failed.map((a) => `${a.provider} ${a.errorClass ?? "error"}`).join(", ");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ai-lcr",
3
- "version": "0.5.0",
3
+ "version": "0.5.2",
4
4
  "description": "Least Cost Routing for LLMs — route every model call to the cheapest available provider, fall back automatically, and track real cost. Built for the Vercel AI SDK.",
5
5
  "keywords": [
6
6
  "ai",