npm - ai-lcr - Versions diffs - 0.5.0 → 0.5.2 - Mend

ai-lcr 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md CHANGED Viewed

@@ -15,7 +15,7 @@
 </p>
 <p align="center">
-  <img src="assets/ai-lcr-hero.svg" alt="ai-lcr routes each model to its own cheapest provider — Gemini to Kunavo, DeepSeek to OpenRouter, Seedream to fal, Flux Schnell to Runware — and falls back on failure" width="820">
+  <img src="assets/ai-lcr-hero.svg" alt="ai-lcr keeps a cheapest-first list of providers per model — serves the cheapest (saving ~40%), fails over to the next on error, and snaps back to the cheapest after ~60s" width="720">
 </p>
 The same model costs different amounts on different providers — and no single provider is cheapest for everything. `ai-lcr` keeps a cheapest-first list per model, routes to the cheapest healthy one (⭐ below), and falls through on failure — the way phone carriers have done [Least Cost Routing](https://en.wikipedia.org/wiki/Least-cost_routing) for decades.
@@ -144,10 +144,6 @@ DeepInfra carries open weights only — no first-party Claude / GPT / Gemini. Fo
 2. **Fall through on failure.** On a retryable error — rate limit, 5xx, timeout, or a **billing cap** (402 / out-of-credit / quota) — it advances to the next provider, streaming-safe. A caller's own bad request (e.g. 400, 422) passes through immediately.
 3. **Recover.** After an idle window (`resetIntervalMs`, default 60s) it snaps back to the cheapest provider.
-<p align="center">
-  <img src="assets/ai-lcr-routing.svg" alt="routing diagram: cheapest first, fallback on failure, recover after idle" width="820">
-</p>
 ## See what happened (`onCall`)
 `onError`/`onCost` fire separately and uncorrelated, so a failover is hard to read after the fact. `onCall` gives you **one record per request** — the full chain, the winner, the reason for each failed hop, latency, and cost — and `formatCallRecord` turns it into a one-liner you can scan:

package/README.zh-CN.md CHANGED Viewed

@@ -15,7 +15,7 @@
 </p>
 <p align="center">
-  <img src="assets/ai-lcr-hero.svg" alt="ai-lcr 把每个模型路由到各自最便宜的 provider——Gemini 走 Kunavo，DeepSeek 走 OpenRouter，Seedream 走 fal，Flux Schnell 走 Runware——失败时自动 fallback" width="820">
+  <img src="assets/ai-lcr-hero.svg" alt="ai-lcr 为每个模型维护一份「最便宜优先」的 provider 列表——默认走最便宜的（省约 40%），出错时切到下一个，约 60 秒后自动切回最便宜" width="720">
 </p>
 同一个模型在不同 provider 上的价格不同——而且没有任何单一 provider 在所有模型上都最便宜。`ai-lcr` 为每个模型维护一份「最便宜优先」的列表，路由到其中最便宜且健康的 provider（下表中的 ⭐），失败时向下穿透——这正是电话运营商几十年来一直在做的 [最低成本路由（Least Cost Routing）](https://en.wikipedia.org/wiki/Least-cost_routing)。
@@ -144,10 +144,6 @@ DeepInfra 只承载开源权重——没有第一方 Claude / GPT / Gemini。那
 2. **失败时向下穿透。** 遇到可重试的错误（限流、5xx、超时）时，前进到下一个 provider，且对流式安全。硬错误（400、401、403、422）会直接透传，不做重试。
 3. **恢复。** 在一段空闲窗口（`resetIntervalMs`，默认 60s）之后，自动回到最便宜的 provider。
-<p align="center">
-  <img src="assets/ai-lcr-routing.svg" alt="路由示意图：最便宜优先、失败时 fallback、空闲后恢复" width="820">
-</p>
 ## 支持的 provider
 任何 OpenAI 兼容的 endpoint 都可用——任何 AI SDK 的 provider 包也都可用，包括模型厂商自己的官方 API。

package/dist/index.cjs CHANGED Viewed

@@ -41,6 +41,12 @@ __export(index_exports, {
 module.exports = __toCommonJS(index_exports);
 // src/fallback.ts
+var EmptyCompletionError = class extends Error {
+  constructor(provider) {
+    super(`ai-lcr: provider "${provider}" returned an empty completion (0 output tokens, no content)`);
+    this.name = "EmptyCompletionError";
+  }
+};
 var RETRYABLE_STATUS = /* @__PURE__ */ new Set([401, 402, 403, 408, 409, 413, 429, 498, 500]);
 var RETRYABLE_PATTERNS = [
   "overloaded",
@@ -153,6 +159,7 @@ function isRetryableError(error) {
   return RETRYABLE_PATTERNS.some((p) => text.includes(p));
 }
 function classifyError(error) {
+  if (error instanceof EmptyCompletionError) return "empty_completion";
   const e = error;
   const status = e?.statusCode ?? e?.status;
   if (typeof status === "number") return String(status);
@@ -175,6 +182,7 @@ var BILLING_PATTERNS = [
   "\u6263\u6B3E"
 ];
 function classifyErrorKind(error) {
+  if (error instanceof EmptyCompletionError) return "empty";
   const e = error;
   const status = e?.statusCode ?? e?.status;
   const { text } = errorSignals(error);
@@ -194,10 +202,27 @@ function costForUsage(cost, inputTokens, outputTokens, cacheReadTokens) {
   const cachedRate = cost.cacheRead ?? cost.input;
   return fullInput / 1e6 * cost.input + cached / 1e6 * cachedRate + outputTokens / 1e6 * cost.output;
 }
+function cacheSavingForUsage(cost, inputTokens, cacheReadTokens) {
+  if (cost.cacheRead === void 0) return 0;
+  const cached = Math.min(Math.max(cacheReadTokens, 0), inputTokens);
+  return cached / 1e6 * (cost.input - cost.cacheRead);
+}
 function requestIdFrom(options) {
   const raw = options.providerOptions?.lcr?.requestId;
   return typeof raw === "string" && raw.length > 0 ? raw : void 0;
 }
+var CONTENT_PART_TYPES = /* @__PURE__ */ new Set([
+  "text-delta",
+  "reasoning-delta",
+  "tool-call",
+  "tool-input-start",
+  "tool-input-delta",
+  "tool-input-end",
+  "file",
+  "source",
+  "tool-result",
+  "raw"
+]);
 var LcrFallbackModel = class {
   constructor(opts) {
     this.opts = opts;
@@ -298,19 +323,22 @@ var LcrFallbackModel = class {
     });
   }
   /**
-   * Baseline = what this same usage would have cost on the most expensive
-   * *priced* provider in the chain (typically the OpenRouter fallback leg). The
-   * winner's savings is `baselineUsd - costUsd`. Undefined when no provider in
-   * the chain carries a price (nothing to compare against).
+   * Baseline = what this same usage would have cost on the always-on fallback:
+   * the LAST priced leg of the chain (by convention the list-price provider you'd
+   * use without routing — e.g. OpenRouter, always last). The winner's saving is
+   * `baselineUsd - costUsd`. We take the last priced leg, NOT the most expensive
+   * one: prompt caching can make a sticker-cheaper provider (no `cacheRead`) cost
+   * MORE on a cache-heavy call, and a max-of-chain baseline would then fabricate a
+   * "saving" even on calls the fallback itself served. Undefined when no provider
+   * in the chain carries a price (nothing to compare against).
    */
   baselineUsd(inputTokens, outputTokens, cacheReadTokens) {
-    let max;
+    let baseline;
     for (const p of this.opts.providers) {
       if (!p.cost) continue;
-      const c = costForUsage(p.cost, inputTokens, outputTokens, cacheReadTokens);
-      if (max === void 0 || c > max) max = c;
+      baseline = costForUsage(p.cost, inputTokens, outputTokens, cacheReadTokens);
     }
-    return max;
+    return baseline;
   }
   /** Winner settled: record the attempt, fire `onCost` (compat) + `onCall`. */
   finalizeOk(ctx, provider, attemptStart, usage, ttftMs) {
@@ -319,7 +347,9 @@ var LcrFallbackModel = class {
     const outputTokens = usage?.outputTokens?.total ?? 0;
     const cacheReadTokens = usage?.inputTokens?.cacheRead ?? 0;
     const costUsd = provider.cost ? costForUsage(provider.cost, inputTokens, outputTokens, cacheReadTokens) : 0;
+    const cachedSavingUsd = provider.cost ? cacheSavingForUsage(provider.cost, inputTokens, cacheReadTokens) : 0;
     const usageMissing = inputTokens === 0 && outputTokens === 0;
+    const emptyCompletion = inputTokens > 0 && outputTokens === 0;
     this.emitCost({
       model: this.opts.modelName,
       provider: provider.label,
@@ -341,8 +371,10 @@ var LcrFallbackModel = class {
       ...cacheReadTokens > 0 ? { cachedInputTokens: cacheReadTokens } : {},
       costUsd,
       baselineUsd: this.baselineUsd(inputTokens, outputTokens, cacheReadTokens),
+      ...cachedSavingUsd > 0 ? { cachedSavingUsd } : {},
       ...ctx.requestId ? { requestId: ctx.requestId } : {},
-      ...usageMissing ? { usageMissing: true } : {}
+      ...usageMissing ? { usageMissing: true } : {},
+      ...emptyCompletion ? { emptyCompletion: true } : {}
     });
   }
   /** Every provider failed: fire `onCall` with no winner. */
@@ -373,6 +405,15 @@ var LcrFallbackModel = class {
       const attemptStart = Date.now();
       try {
         const result = await provider.model.doGenerate(options);
+        const out = result.usage?.outputTokens?.total ?? 0;
+        const inp = result.usage?.inputTokens?.total ?? 0;
+        if (inp > 0 && out === 0 && tried < n - 1) {
+          const emptyErr = new EmptyCompletionError(provider.label);
+          lastError = emptyErr;
+          this.emitError(emptyErr, provider.label);
+          this.recordFail(ctx, provider, attemptStart, emptyErr);
+          continue;
+        }
         this.settleSticky(idx);
         this.finalizeOk(ctx, provider, attemptStart, result.usage);
         return result;
@@ -434,7 +475,7 @@ var LcrFallbackModel = class {
     const servingIdx = idx;
     const triedBeforeServing = tried;
     let usage;
-    let streamedAny = false;
+    let contentStreamed = false;
     let ttftMs;
     const stream = new ReadableStream({
       async start(controller) {
@@ -443,17 +484,24 @@ var LcrFallbackModel = class {
           reader = result.stream.getReader();
           for (; ; ) {
             const { done, value } = await reader.read();
-            if (!streamedAny && value && typeof value === "object" && "error" in value) {
+            if (!contentStreamed && value && typeof value === "object" && "error" in value) {
               const err = value.error;
               if (self.shouldRetry(err)) throw err;
             }
             if (done) break;
-            if (value.type === "finish") usage = value.usage;
+            if (value.type === "finish") {
+              usage = value.usage;
+              const out = value.usage?.outputTokens?.total ?? 0;
+              const inp = value.usage?.inputTokens?.total ?? 0;
+              if (inp > 0 && out === 0 && !contentStreamed && triedBeforeServing + 1 < n) {
+                throw new EmptyCompletionError(servingProvider.label);
+              }
+            }
             if (ttftMs === void 0 && (value.type === "text-delta" || value.type === "reasoning-delta")) {
               ttftMs = Date.now() - servingAttemptStart;
             }
             controller.enqueue(value);
-            if (value.type !== "stream-start") streamedAny = true;
+            if (CONTENT_PART_TYPES.has(value.type)) contentStreamed = true;
           }
           self.settleSticky(servingIdx);
           self.finalizeOk(ctx, servingProvider, servingAttemptStart, usage, ttftMs);
@@ -461,7 +509,7 @@ var LcrFallbackModel = class {
         } catch (error) {
           self.emitError(error, servingProvider.label);
           self.recordFail(ctx, servingProvider, servingAttemptStart, error);
-          if (!streamedAny) {
+          if (!contentStreamed) {
             const nextTried = triedBeforeServing + 1;
             if (nextTried >= n) {
               self.finalizeFail(ctx);
@@ -524,6 +572,7 @@ function formatCallRecord(record, opts = {}) {
     line += `  (saved $${(record.baselineUsd - record.costUsd).toFixed(4)})`;
   }
   if (record.usageMissing) line += `  \u26A0no-usage`;
+  if (record.emptyCompletion) line += `  \u26A0empty`;
   const failed = record.attempts.filter((a) => !a.ok);
   if (failed.length > 0) {
     const reasons = failed.map((a) => `${a.provider} ${a.errorClass ?? "error"}`).join(", ");

package/dist/index.d.cts CHANGED Viewed

@@ -49,8 +49,15 @@ interface CostEvent {
  *   - "auth":      401 / 403 — a misconfigured or revoked key.
  *   - "billing":   402 / out-of-credit / quota — account needs topping up.
  *   - "client":    a non-retryable caller error (e.g. 400 bad request).
+ *   - "empty":     provider returned a clean 200 but generated nothing
+ *                  (zero output tokens, no content) — a *content*-integrity
+ *                  failure, not a transport one. The provider looks healthy to
+ *                  every status/network check yet hands the user a blank. We
+ *                  fail over on it like a transient error, but tag it separately
+ *                  so a run of `"empty"` attempts (a quietly degraded model)
+ *                  doesn't hide inside the transient noise.
  */
-type ErrorKind = "transient" | "auth" | "billing" | "client";
+type ErrorKind = "transient" | "auth" | "billing" | "client" | "empty";
 /** One provider attempt within a single request. */
 interface RouteAttempt {
     /** Provider label that was tried (e.g. "tokenmart"). */
@@ -109,12 +116,25 @@ interface CallRecord {
     /** Computed from the winner's `cost`; 0 if no price was given or the call failed. */
     costUsd: number;
     /**
-     * What the same request would have cost on the most expensive *priced*
-     * provider in the chain, on identical token usage — the savings baseline
-     * (`baselineUsd - costUsd`). Set by both routers whenever at least one
-     * provider carries a `cost`; undefined only when no provider was priced.
+     * What this same usage would have cost on the savings baseline, so
+     * `baselineUsd - costUsd` is what routing actually saved. Text router: the
+     * always-on fallback leg — the LAST priced provider in the chain, i.e. the
+     * list-price provider you'd fall back to without routing (e.g. OpenRouter).
+     * Media router: the model-maker's official direct price. NOT the most
+     * expensive leg of the chain: prompt caching can make a sticker-cheaper
+     * provider cost more on a cache-heavy call, and a max-of-chain baseline would
+     * fabricate a "saving" on calls the fallback itself served. Undefined only
+     * when no provider was priced.
      */
     baselineUsd?: number;
+    /**
+     * The slice of `costUsd` that prompt-cache reads saved versus paying the full
+     * input rate for those same tokens (`cachedTokens × (input − cacheRead)`).
+     * Present only when > 0. This is the serving provider's own caching benefit —
+     * it happens with or without routing — so it is NOT a routing saving and must
+     * be surfaced separately, never folded into `baselineUsd - costUsd`.
+     */
+    cachedSavingUsd?: number;
     /**
      * Caller-supplied correlation id, read from `providerOptions.lcr.requestId`
      * on the call. Multi-step tool loops emit one record per `doStream`/
@@ -129,6 +149,21 @@ interface CallRecord {
      * other signal. Treat a flagged record as "cost unknown", not "free".
      */
     usageMissing?: boolean;
+    /**
+     * True when the winner served a clean, error-free response that nonetheless
+     * generated **nothing**: zero output tokens with a non-empty prompt (and, for
+     * streams, not one content part). The user asked and got a blank. Distinct
+     * from {@link usageMissing} (which is input *and* output both zero — usage not
+     * reported); here the prompt was billed but the model produced no output.
+     *
+     * Set only when this empty response is what the caller actually received —
+     * i.e. every provider in the chain came back empty, so failover couldn't
+     * rescue it. (When an earlier provider returns empty but a later one produces
+     * content, that earlier attempt is recorded as a failed `empty_completion` hop
+     * and this flag stays unset, because the winner did produce output.) Alert on
+     * it: a provider that quietly returns blanks passes every health check.
+     */
+    emptyCompletion?: boolean;
 }
 /**
  * Normalize an error into a short, log-friendly class for {@link CallRecord}.

package/dist/index.d.ts CHANGED Viewed

@@ -49,8 +49,15 @@ interface CostEvent {
  *   - "auth":      401 / 403 — a misconfigured or revoked key.
  *   - "billing":   402 / out-of-credit / quota — account needs topping up.
  *   - "client":    a non-retryable caller error (e.g. 400 bad request).
+ *   - "empty":     provider returned a clean 200 but generated nothing
+ *                  (zero output tokens, no content) — a *content*-integrity
+ *                  failure, not a transport one. The provider looks healthy to
+ *                  every status/network check yet hands the user a blank. We
+ *                  fail over on it like a transient error, but tag it separately
+ *                  so a run of `"empty"` attempts (a quietly degraded model)
+ *                  doesn't hide inside the transient noise.
  */
-type ErrorKind = "transient" | "auth" | "billing" | "client";
+type ErrorKind = "transient" | "auth" | "billing" | "client" | "empty";
 /** One provider attempt within a single request. */
 interface RouteAttempt {
     /** Provider label that was tried (e.g. "tokenmart"). */
@@ -109,12 +116,25 @@ interface CallRecord {
     /** Computed from the winner's `cost`; 0 if no price was given or the call failed. */
     costUsd: number;
     /**
-     * What the same request would have cost on the most expensive *priced*
-     * provider in the chain, on identical token usage — the savings baseline
-     * (`baselineUsd - costUsd`). Set by both routers whenever at least one
-     * provider carries a `cost`; undefined only when no provider was priced.
+     * What this same usage would have cost on the savings baseline, so
+     * `baselineUsd - costUsd` is what routing actually saved. Text router: the
+     * always-on fallback leg — the LAST priced provider in the chain, i.e. the
+     * list-price provider you'd fall back to without routing (e.g. OpenRouter).
+     * Media router: the model-maker's official direct price. NOT the most
+     * expensive leg of the chain: prompt caching can make a sticker-cheaper
+     * provider cost more on a cache-heavy call, and a max-of-chain baseline would
+     * fabricate a "saving" on calls the fallback itself served. Undefined only
+     * when no provider was priced.
      */
     baselineUsd?: number;
+    /**
+     * The slice of `costUsd` that prompt-cache reads saved versus paying the full
+     * input rate for those same tokens (`cachedTokens × (input − cacheRead)`).
+     * Present only when > 0. This is the serving provider's own caching benefit —
+     * it happens with or without routing — so it is NOT a routing saving and must
+     * be surfaced separately, never folded into `baselineUsd - costUsd`.
+     */
+    cachedSavingUsd?: number;
     /**
      * Caller-supplied correlation id, read from `providerOptions.lcr.requestId`
      * on the call. Multi-step tool loops emit one record per `doStream`/
@@ -129,6 +149,21 @@ interface CallRecord {
      * other signal. Treat a flagged record as "cost unknown", not "free".
      */
     usageMissing?: boolean;
+    /**
+     * True when the winner served a clean, error-free response that nonetheless
+     * generated **nothing**: zero output tokens with a non-empty prompt (and, for
+     * streams, not one content part). The user asked and got a blank. Distinct
+     * from {@link usageMissing} (which is input *and* output both zero — usage not
+     * reported); here the prompt was billed but the model produced no output.
+     *
+     * Set only when this empty response is what the caller actually received —
+     * i.e. every provider in the chain came back empty, so failover couldn't
+     * rescue it. (When an earlier provider returns empty but a later one produces
+     * content, that earlier attempt is recorded as a failed `empty_completion` hop
+     * and this flag stays unset, because the winner did produce output.) Alert on
+     * it: a provider that quietly returns blanks passes every health check.
+     */
+    emptyCompletion?: boolean;
 }
 /**
  * Normalize an error into a short, log-friendly class for {@link CallRecord}.

package/dist/index.js CHANGED Viewed

@@ -1,4 +1,10 @@
 // src/fallback.ts
+var EmptyCompletionError = class extends Error {
+  constructor(provider) {
+    super(`ai-lcr: provider "${provider}" returned an empty completion (0 output tokens, no content)`);
+    this.name = "EmptyCompletionError";
+  }
+};
 var RETRYABLE_STATUS = /* @__PURE__ */ new Set([401, 402, 403, 408, 409, 413, 429, 498, 500]);
 var RETRYABLE_PATTERNS = [
   "overloaded",
@@ -111,6 +117,7 @@ function isRetryableError(error) {
   return RETRYABLE_PATTERNS.some((p) => text.includes(p));
 }
 function classifyError(error) {
+  if (error instanceof EmptyCompletionError) return "empty_completion";
   const e = error;
   const status = e?.statusCode ?? e?.status;
   if (typeof status === "number") return String(status);
@@ -133,6 +140,7 @@ var BILLING_PATTERNS = [
   "\u6263\u6B3E"
 ];
 function classifyErrorKind(error) {
+  if (error instanceof EmptyCompletionError) return "empty";
   const e = error;
   const status = e?.statusCode ?? e?.status;
   const { text } = errorSignals(error);
@@ -152,10 +160,27 @@ function costForUsage(cost, inputTokens, outputTokens, cacheReadTokens) {
   const cachedRate = cost.cacheRead ?? cost.input;
   return fullInput / 1e6 * cost.input + cached / 1e6 * cachedRate + outputTokens / 1e6 * cost.output;
 }
+function cacheSavingForUsage(cost, inputTokens, cacheReadTokens) {
+  if (cost.cacheRead === void 0) return 0;
+  const cached = Math.min(Math.max(cacheReadTokens, 0), inputTokens);
+  return cached / 1e6 * (cost.input - cost.cacheRead);
+}
 function requestIdFrom(options) {
   const raw = options.providerOptions?.lcr?.requestId;
   return typeof raw === "string" && raw.length > 0 ? raw : void 0;
 }
+var CONTENT_PART_TYPES = /* @__PURE__ */ new Set([
+  "text-delta",
+  "reasoning-delta",
+  "tool-call",
+  "tool-input-start",
+  "tool-input-delta",
+  "tool-input-end",
+  "file",
+  "source",
+  "tool-result",
+  "raw"
+]);
 var LcrFallbackModel = class {
   constructor(opts) {
     this.opts = opts;
@@ -256,19 +281,22 @@ var LcrFallbackModel = class {
     });
   }
   /**
-   * Baseline = what this same usage would have cost on the most expensive
-   * *priced* provider in the chain (typically the OpenRouter fallback leg). The
-   * winner's savings is `baselineUsd - costUsd`. Undefined when no provider in
-   * the chain carries a price (nothing to compare against).
+   * Baseline = what this same usage would have cost on the always-on fallback:
+   * the LAST priced leg of the chain (by convention the list-price provider you'd
+   * use without routing — e.g. OpenRouter, always last). The winner's saving is
+   * `baselineUsd - costUsd`. We take the last priced leg, NOT the most expensive
+   * one: prompt caching can make a sticker-cheaper provider (no `cacheRead`) cost
+   * MORE on a cache-heavy call, and a max-of-chain baseline would then fabricate a
+   * "saving" even on calls the fallback itself served. Undefined when no provider
+   * in the chain carries a price (nothing to compare against).
    */
   baselineUsd(inputTokens, outputTokens, cacheReadTokens) {
-    let max;
+    let baseline;
     for (const p of this.opts.providers) {
       if (!p.cost) continue;
-      const c = costForUsage(p.cost, inputTokens, outputTokens, cacheReadTokens);
-      if (max === void 0 || c > max) max = c;
+      baseline = costForUsage(p.cost, inputTokens, outputTokens, cacheReadTokens);
     }
-    return max;
+    return baseline;
   }
   /** Winner settled: record the attempt, fire `onCost` (compat) + `onCall`. */
   finalizeOk(ctx, provider, attemptStart, usage, ttftMs) {
@@ -277,7 +305,9 @@ var LcrFallbackModel = class {
     const outputTokens = usage?.outputTokens?.total ?? 0;
     const cacheReadTokens = usage?.inputTokens?.cacheRead ?? 0;
     const costUsd = provider.cost ? costForUsage(provider.cost, inputTokens, outputTokens, cacheReadTokens) : 0;
+    const cachedSavingUsd = provider.cost ? cacheSavingForUsage(provider.cost, inputTokens, cacheReadTokens) : 0;
     const usageMissing = inputTokens === 0 && outputTokens === 0;
+    const emptyCompletion = inputTokens > 0 && outputTokens === 0;
     this.emitCost({
       model: this.opts.modelName,
       provider: provider.label,
@@ -299,8 +329,10 @@ var LcrFallbackModel = class {
       ...cacheReadTokens > 0 ? { cachedInputTokens: cacheReadTokens } : {},
       costUsd,
       baselineUsd: this.baselineUsd(inputTokens, outputTokens, cacheReadTokens),
+      ...cachedSavingUsd > 0 ? { cachedSavingUsd } : {},
       ...ctx.requestId ? { requestId: ctx.requestId } : {},
-      ...usageMissing ? { usageMissing: true } : {}
+      ...usageMissing ? { usageMissing: true } : {},
+      ...emptyCompletion ? { emptyCompletion: true } : {}
     });
   }
   /** Every provider failed: fire `onCall` with no winner. */
@@ -331,6 +363,15 @@ var LcrFallbackModel = class {
       const attemptStart = Date.now();
       try {
         const result = await provider.model.doGenerate(options);
+        const out = result.usage?.outputTokens?.total ?? 0;
+        const inp = result.usage?.inputTokens?.total ?? 0;
+        if (inp > 0 && out === 0 && tried < n - 1) {
+          const emptyErr = new EmptyCompletionError(provider.label);
+          lastError = emptyErr;
+          this.emitError(emptyErr, provider.label);
+          this.recordFail(ctx, provider, attemptStart, emptyErr);
+          continue;
+        }
         this.settleSticky(idx);
         this.finalizeOk(ctx, provider, attemptStart, result.usage);
         return result;
@@ -392,7 +433,7 @@ var LcrFallbackModel = class {
     const servingIdx = idx;
     const triedBeforeServing = tried;
     let usage;
-    let streamedAny = false;
+    let contentStreamed = false;
     let ttftMs;
     const stream = new ReadableStream({
       async start(controller) {
@@ -401,17 +442,24 @@ var LcrFallbackModel = class {
           reader = result.stream.getReader();
           for (; ; ) {
             const { done, value } = await reader.read();
-            if (!streamedAny && value && typeof value === "object" && "error" in value) {
+            if (!contentStreamed && value && typeof value === "object" && "error" in value) {
               const err = value.error;
               if (self.shouldRetry(err)) throw err;
             }
             if (done) break;
-            if (value.type === "finish") usage = value.usage;
+            if (value.type === "finish") {
+              usage = value.usage;
+              const out = value.usage?.outputTokens?.total ?? 0;
+              const inp = value.usage?.inputTokens?.total ?? 0;
+              if (inp > 0 && out === 0 && !contentStreamed && triedBeforeServing + 1 < n) {
+                throw new EmptyCompletionError(servingProvider.label);
+              }
+            }
             if (ttftMs === void 0 && (value.type === "text-delta" || value.type === "reasoning-delta")) {
               ttftMs = Date.now() - servingAttemptStart;
             }
             controller.enqueue(value);
-            if (value.type !== "stream-start") streamedAny = true;
+            if (CONTENT_PART_TYPES.has(value.type)) contentStreamed = true;
           }
           self.settleSticky(servingIdx);
           self.finalizeOk(ctx, servingProvider, servingAttemptStart, usage, ttftMs);
@@ -419,7 +467,7 @@ var LcrFallbackModel = class {
         } catch (error) {
           self.emitError(error, servingProvider.label);
           self.recordFail(ctx, servingProvider, servingAttemptStart, error);
-          if (!streamedAny) {
+          if (!contentStreamed) {
             const nextTried = triedBeforeServing + 1;
             if (nextTried >= n) {
               self.finalizeFail(ctx);
@@ -482,6 +530,7 @@ function formatCallRecord(record, opts = {}) {
     line += `  (saved $${(record.baselineUsd - record.costUsd).toFixed(4)})`;
   }
   if (record.usageMissing) line += `  \u26A0no-usage`;
+  if (record.emptyCompletion) line += `  \u26A0empty`;
   const failed = record.attempts.filter((a) => !a.ok);
   if (failed.length > 0) {
     const reasons = failed.map((a) => `${a.provider} ${a.errorClass ?? "error"}`).join(", ");

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "ai-lcr",
-  "version": "0.5.0",
+  "version": "0.5.2",
   "description": "Least Cost Routing for LLMs — route every model call to the cheapest available provider, fall back automatically, and track real cost. Built for the Vercel AI SDK.",
   "keywords": [
     "ai",