ai-lcr 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -15,7 +15,7 @@
15
15
  </p>
16
16
 
17
17
  <p align="center">
18
- <img src="assets/ai-lcr-hero.svg" alt="ai-lcr routes each model to its own cheapest providerGemini to Kunavo, DeepSeek to OpenRouter, Seedream to fal, Flux Schnell to Runware and falls back on failure" width="820">
18
+ <img src="assets/ai-lcr-hero.svg" alt="ai-lcr keeps a cheapest-first list of providers per modelserves the cheapest (saving ~40%), fails over to the next on error, and snaps back to the cheapest after ~60s" width="720">
19
19
  </p>
20
20
 
21
21
  The same model costs different amounts on different providers — and no single provider is cheapest for everything. `ai-lcr` keeps a cheapest-first list per model, routes to the cheapest healthy one (⭐ below), and falls through on failure — the way phone carriers have done [Least Cost Routing](https://en.wikipedia.org/wiki/Least-cost_routing) for decades.
@@ -144,10 +144,6 @@ DeepInfra carries open weights only — no first-party Claude / GPT / Gemini. Fo
144
144
  2. **Fall through on failure.** On a retryable error — rate limit, 5xx, timeout, or a **billing cap** (402 / out-of-credit / quota) — it advances to the next provider, streaming-safe. A caller's own bad request (e.g. 400, 422) passes through immediately.
145
145
  3. **Recover.** After an idle window (`resetIntervalMs`, default 60s) it snaps back to the cheapest provider.
146
146
 
147
- <p align="center">
148
- <img src="assets/ai-lcr-routing.svg" alt="routing diagram: cheapest first, fallback on failure, recover after idle" width="820">
149
- </p>
150
-
151
147
  ## See what happened (`onCall`)
152
148
 
153
149
  `onError`/`onCost` fire separately and uncorrelated, so a failover is hard to read after the fact. `onCall` gives you **one record per request** — the full chain, the winner, the reason for each failed hop, latency, and cost — and `formatCallRecord` turns it into a one-liner you can scan:
package/README.zh-CN.md CHANGED
@@ -15,7 +15,7 @@
15
15
  </p>
16
16
 
17
17
  <p align="center">
18
- <img src="assets/ai-lcr-hero.svg" alt="ai-lcr 把每个模型路由到各自最便宜的 provider——Gemini Kunavo,DeepSeek OpenRouter,Seedream 走 fal,Flux Schnell 走 Runware——失败时自动 fallback" width="820">
18
+ <img src="assets/ai-lcr-hero.svg" alt="ai-lcr 为每个模型维护一份「最便宜优先」的 provider 列表——默认走最便宜的(省约 40%),出错时切到下一个,约 60 秒后自动切回最便宜" width="720">
19
19
  </p>
20
20
 
21
21
  同一个模型在不同 provider 上的价格不同——而且没有任何单一 provider 在所有模型上都最便宜。`ai-lcr` 为每个模型维护一份「最便宜优先」的列表,路由到其中最便宜且健康的 provider(下表中的 ⭐),失败时向下穿透——这正是电话运营商几十年来一直在做的 [最低成本路由(Least Cost Routing)](https://en.wikipedia.org/wiki/Least-cost_routing)。
@@ -144,10 +144,6 @@ DeepInfra 只承载开源权重——没有第一方 Claude / GPT / Gemini。那
144
144
  2. **失败时向下穿透。** 遇到可重试的错误(限流、5xx、超时)时,前进到下一个 provider,且对流式安全。硬错误(400、401、403、422)会直接透传,不做重试。
145
145
  3. **恢复。** 在一段空闲窗口(`resetIntervalMs`,默认 60s)之后,自动回到最便宜的 provider。
146
146
 
147
- <p align="center">
148
- <img src="assets/ai-lcr-routing.svg" alt="路由示意图:最便宜优先、失败时 fallback、空闲后恢复" width="820">
149
- </p>
150
-
151
147
  ## 支持的 provider
152
148
 
153
149
  任何 OpenAI 兼容的 endpoint 都可用——任何 AI SDK 的 provider 包也都可用,包括模型厂商自己的官方 API。
package/dist/index.cjs CHANGED
@@ -41,6 +41,12 @@ __export(index_exports, {
41
41
  module.exports = __toCommonJS(index_exports);
42
42
 
43
43
  // src/fallback.ts
44
+ var EmptyCompletionError = class extends Error {
45
+ constructor(provider) {
46
+ super(`ai-lcr: provider "${provider}" returned an empty completion (0 output tokens, no content)`);
47
+ this.name = "EmptyCompletionError";
48
+ }
49
+ };
44
50
  var RETRYABLE_STATUS = /* @__PURE__ */ new Set([401, 402, 403, 408, 409, 413, 429, 498, 500]);
45
51
  var RETRYABLE_PATTERNS = [
46
52
  "overloaded",
@@ -153,6 +159,7 @@ function isRetryableError(error) {
153
159
  return RETRYABLE_PATTERNS.some((p) => text.includes(p));
154
160
  }
155
161
  function classifyError(error) {
162
+ if (error instanceof EmptyCompletionError) return "empty_completion";
156
163
  const e = error;
157
164
  const status = e?.statusCode ?? e?.status;
158
165
  if (typeof status === "number") return String(status);
@@ -175,6 +182,7 @@ var BILLING_PATTERNS = [
175
182
  "\u6263\u6B3E"
176
183
  ];
177
184
  function classifyErrorKind(error) {
185
+ if (error instanceof EmptyCompletionError) return "empty";
178
186
  const e = error;
179
187
  const status = e?.statusCode ?? e?.status;
180
188
  const { text } = errorSignals(error);
@@ -203,6 +211,18 @@ function requestIdFrom(options) {
203
211
  const raw = options.providerOptions?.lcr?.requestId;
204
212
  return typeof raw === "string" && raw.length > 0 ? raw : void 0;
205
213
  }
214
+ var CONTENT_PART_TYPES = /* @__PURE__ */ new Set([
215
+ "text-delta",
216
+ "reasoning-delta",
217
+ "tool-call",
218
+ "tool-input-start",
219
+ "tool-input-delta",
220
+ "tool-input-end",
221
+ "file",
222
+ "source",
223
+ "tool-result",
224
+ "raw"
225
+ ]);
206
226
  var LcrFallbackModel = class {
207
227
  constructor(opts) {
208
228
  this.opts = opts;
@@ -329,6 +349,7 @@ var LcrFallbackModel = class {
329
349
  const costUsd = provider.cost ? costForUsage(provider.cost, inputTokens, outputTokens, cacheReadTokens) : 0;
330
350
  const cachedSavingUsd = provider.cost ? cacheSavingForUsage(provider.cost, inputTokens, cacheReadTokens) : 0;
331
351
  const usageMissing = inputTokens === 0 && outputTokens === 0;
352
+ const emptyCompletion = inputTokens > 0 && outputTokens === 0;
332
353
  this.emitCost({
333
354
  model: this.opts.modelName,
334
355
  provider: provider.label,
@@ -352,7 +373,8 @@ var LcrFallbackModel = class {
352
373
  baselineUsd: this.baselineUsd(inputTokens, outputTokens, cacheReadTokens),
353
374
  ...cachedSavingUsd > 0 ? { cachedSavingUsd } : {},
354
375
  ...ctx.requestId ? { requestId: ctx.requestId } : {},
355
- ...usageMissing ? { usageMissing: true } : {}
376
+ ...usageMissing ? { usageMissing: true } : {},
377
+ ...emptyCompletion ? { emptyCompletion: true } : {}
356
378
  });
357
379
  }
358
380
  /** Every provider failed: fire `onCall` with no winner. */
@@ -383,6 +405,15 @@ var LcrFallbackModel = class {
383
405
  const attemptStart = Date.now();
384
406
  try {
385
407
  const result = await provider.model.doGenerate(options);
408
+ const out = result.usage?.outputTokens?.total ?? 0;
409
+ const inp = result.usage?.inputTokens?.total ?? 0;
410
+ if (inp > 0 && out === 0 && tried < n - 1) {
411
+ const emptyErr = new EmptyCompletionError(provider.label);
412
+ lastError = emptyErr;
413
+ this.emitError(emptyErr, provider.label);
414
+ this.recordFail(ctx, provider, attemptStart, emptyErr);
415
+ continue;
416
+ }
386
417
  this.settleSticky(idx);
387
418
  this.finalizeOk(ctx, provider, attemptStart, result.usage);
388
419
  return result;
@@ -444,7 +475,7 @@ var LcrFallbackModel = class {
444
475
  const servingIdx = idx;
445
476
  const triedBeforeServing = tried;
446
477
  let usage;
447
- let streamedAny = false;
478
+ let contentStreamed = false;
448
479
  let ttftMs;
449
480
  const stream = new ReadableStream({
450
481
  async start(controller) {
@@ -453,17 +484,24 @@ var LcrFallbackModel = class {
453
484
  reader = result.stream.getReader();
454
485
  for (; ; ) {
455
486
  const { done, value } = await reader.read();
456
- if (!streamedAny && value && typeof value === "object" && "error" in value) {
487
+ if (!contentStreamed && value && typeof value === "object" && "error" in value) {
457
488
  const err = value.error;
458
489
  if (self.shouldRetry(err)) throw err;
459
490
  }
460
491
  if (done) break;
461
- if (value.type === "finish") usage = value.usage;
492
+ if (value.type === "finish") {
493
+ usage = value.usage;
494
+ const out = value.usage?.outputTokens?.total ?? 0;
495
+ const inp = value.usage?.inputTokens?.total ?? 0;
496
+ if (inp > 0 && out === 0 && !contentStreamed && triedBeforeServing + 1 < n) {
497
+ throw new EmptyCompletionError(servingProvider.label);
498
+ }
499
+ }
462
500
  if (ttftMs === void 0 && (value.type === "text-delta" || value.type === "reasoning-delta")) {
463
501
  ttftMs = Date.now() - servingAttemptStart;
464
502
  }
465
503
  controller.enqueue(value);
466
- if (value.type !== "stream-start") streamedAny = true;
504
+ if (CONTENT_PART_TYPES.has(value.type)) contentStreamed = true;
467
505
  }
468
506
  self.settleSticky(servingIdx);
469
507
  self.finalizeOk(ctx, servingProvider, servingAttemptStart, usage, ttftMs);
@@ -471,7 +509,7 @@ var LcrFallbackModel = class {
471
509
  } catch (error) {
472
510
  self.emitError(error, servingProvider.label);
473
511
  self.recordFail(ctx, servingProvider, servingAttemptStart, error);
474
- if (!streamedAny) {
512
+ if (!contentStreamed) {
475
513
  const nextTried = triedBeforeServing + 1;
476
514
  if (nextTried >= n) {
477
515
  self.finalizeFail(ctx);
@@ -534,6 +572,7 @@ function formatCallRecord(record, opts = {}) {
534
572
  line += ` (saved $${(record.baselineUsd - record.costUsd).toFixed(4)})`;
535
573
  }
536
574
  if (record.usageMissing) line += ` \u26A0no-usage`;
575
+ if (record.emptyCompletion) line += ` \u26A0empty`;
537
576
  const failed = record.attempts.filter((a) => !a.ok);
538
577
  if (failed.length > 0) {
539
578
  const reasons = failed.map((a) => `${a.provider} ${a.errorClass ?? "error"}`).join(", ");
package/dist/index.d.cts CHANGED
@@ -49,8 +49,15 @@ interface CostEvent {
49
49
  * - "auth": 401 / 403 — a misconfigured or revoked key.
50
50
  * - "billing": 402 / out-of-credit / quota — account needs topping up.
51
51
  * - "client": a non-retryable caller error (e.g. 400 bad request).
52
+ * - "empty": provider returned a clean 200 but generated nothing
53
+ * (zero output tokens, no content) — a *content*-integrity
54
+ * failure, not a transport one. The provider looks healthy to
55
+ * every status/network check yet hands the user a blank. We
56
+ * fail over on it like a transient error, but tag it separately
57
+ * so a run of `"empty"` attempts (a quietly degraded model)
58
+ * doesn't hide inside the transient noise.
52
59
  */
53
- type ErrorKind = "transient" | "auth" | "billing" | "client";
60
+ type ErrorKind = "transient" | "auth" | "billing" | "client" | "empty";
54
61
  /** One provider attempt within a single request. */
55
62
  interface RouteAttempt {
56
63
  /** Provider label that was tried (e.g. "tokenmart"). */
@@ -142,6 +149,21 @@ interface CallRecord {
142
149
  * other signal. Treat a flagged record as "cost unknown", not "free".
143
150
  */
144
151
  usageMissing?: boolean;
152
+ /**
153
+ * True when the winner served a clean, error-free response that nonetheless
154
+ * generated **nothing**: zero output tokens with a non-empty prompt (and, for
155
+ * streams, not one content part). The user asked and got a blank. Distinct
156
+ * from {@link usageMissing} (which is input *and* output both zero — usage not
157
+ * reported); here the prompt was billed but the model produced no output.
158
+ *
159
+ * Set only when this empty response is what the caller actually received —
160
+ * i.e. every provider in the chain came back empty, so failover couldn't
161
+ * rescue it. (When an earlier provider returns empty but a later one produces
162
+ * content, that earlier attempt is recorded as a failed `empty_completion` hop
163
+ * and this flag stays unset, because the winner did produce output.) Alert on
164
+ * it: a provider that quietly returns blanks passes every health check.
165
+ */
166
+ emptyCompletion?: boolean;
145
167
  }
146
168
  /**
147
169
  * Normalize an error into a short, log-friendly class for {@link CallRecord}.
package/dist/index.d.ts CHANGED
@@ -49,8 +49,15 @@ interface CostEvent {
49
49
  * - "auth": 401 / 403 — a misconfigured or revoked key.
50
50
  * - "billing": 402 / out-of-credit / quota — account needs topping up.
51
51
  * - "client": a non-retryable caller error (e.g. 400 bad request).
52
+ * - "empty": provider returned a clean 200 but generated nothing
53
+ * (zero output tokens, no content) — a *content*-integrity
54
+ * failure, not a transport one. The provider looks healthy to
55
+ * every status/network check yet hands the user a blank. We
56
+ * fail over on it like a transient error, but tag it separately
57
+ * so a run of `"empty"` attempts (a quietly degraded model)
58
+ * doesn't hide inside the transient noise.
52
59
  */
53
- type ErrorKind = "transient" | "auth" | "billing" | "client";
60
+ type ErrorKind = "transient" | "auth" | "billing" | "client" | "empty";
54
61
  /** One provider attempt within a single request. */
55
62
  interface RouteAttempt {
56
63
  /** Provider label that was tried (e.g. "tokenmart"). */
@@ -142,6 +149,21 @@ interface CallRecord {
142
149
  * other signal. Treat a flagged record as "cost unknown", not "free".
143
150
  */
144
151
  usageMissing?: boolean;
152
+ /**
153
+ * True when the winner served a clean, error-free response that nonetheless
154
+ * generated **nothing**: zero output tokens with a non-empty prompt (and, for
155
+ * streams, not one content part). The user asked and got a blank. Distinct
156
+ * from {@link usageMissing} (which is input *and* output both zero — usage not
157
+ * reported); here the prompt was billed but the model produced no output.
158
+ *
159
+ * Set only when this empty response is what the caller actually received —
160
+ * i.e. every provider in the chain came back empty, so failover couldn't
161
+ * rescue it. (When an earlier provider returns empty but a later one produces
162
+ * content, that earlier attempt is recorded as a failed `empty_completion` hop
163
+ * and this flag stays unset, because the winner did produce output.) Alert on
164
+ * it: a provider that quietly returns blanks passes every health check.
165
+ */
166
+ emptyCompletion?: boolean;
145
167
  }
146
168
  /**
147
169
  * Normalize an error into a short, log-friendly class for {@link CallRecord}.
package/dist/index.js CHANGED
@@ -1,4 +1,10 @@
1
1
  // src/fallback.ts
2
+ var EmptyCompletionError = class extends Error {
3
+ constructor(provider) {
4
+ super(`ai-lcr: provider "${provider}" returned an empty completion (0 output tokens, no content)`);
5
+ this.name = "EmptyCompletionError";
6
+ }
7
+ };
2
8
  var RETRYABLE_STATUS = /* @__PURE__ */ new Set([401, 402, 403, 408, 409, 413, 429, 498, 500]);
3
9
  var RETRYABLE_PATTERNS = [
4
10
  "overloaded",
@@ -111,6 +117,7 @@ function isRetryableError(error) {
111
117
  return RETRYABLE_PATTERNS.some((p) => text.includes(p));
112
118
  }
113
119
  function classifyError(error) {
120
+ if (error instanceof EmptyCompletionError) return "empty_completion";
114
121
  const e = error;
115
122
  const status = e?.statusCode ?? e?.status;
116
123
  if (typeof status === "number") return String(status);
@@ -133,6 +140,7 @@ var BILLING_PATTERNS = [
133
140
  "\u6263\u6B3E"
134
141
  ];
135
142
  function classifyErrorKind(error) {
143
+ if (error instanceof EmptyCompletionError) return "empty";
136
144
  const e = error;
137
145
  const status = e?.statusCode ?? e?.status;
138
146
  const { text } = errorSignals(error);
@@ -161,6 +169,18 @@ function requestIdFrom(options) {
161
169
  const raw = options.providerOptions?.lcr?.requestId;
162
170
  return typeof raw === "string" && raw.length > 0 ? raw : void 0;
163
171
  }
172
+ var CONTENT_PART_TYPES = /* @__PURE__ */ new Set([
173
+ "text-delta",
174
+ "reasoning-delta",
175
+ "tool-call",
176
+ "tool-input-start",
177
+ "tool-input-delta",
178
+ "tool-input-end",
179
+ "file",
180
+ "source",
181
+ "tool-result",
182
+ "raw"
183
+ ]);
164
184
  var LcrFallbackModel = class {
165
185
  constructor(opts) {
166
186
  this.opts = opts;
@@ -287,6 +307,7 @@ var LcrFallbackModel = class {
287
307
  const costUsd = provider.cost ? costForUsage(provider.cost, inputTokens, outputTokens, cacheReadTokens) : 0;
288
308
  const cachedSavingUsd = provider.cost ? cacheSavingForUsage(provider.cost, inputTokens, cacheReadTokens) : 0;
289
309
  const usageMissing = inputTokens === 0 && outputTokens === 0;
310
+ const emptyCompletion = inputTokens > 0 && outputTokens === 0;
290
311
  this.emitCost({
291
312
  model: this.opts.modelName,
292
313
  provider: provider.label,
@@ -310,7 +331,8 @@ var LcrFallbackModel = class {
310
331
  baselineUsd: this.baselineUsd(inputTokens, outputTokens, cacheReadTokens),
311
332
  ...cachedSavingUsd > 0 ? { cachedSavingUsd } : {},
312
333
  ...ctx.requestId ? { requestId: ctx.requestId } : {},
313
- ...usageMissing ? { usageMissing: true } : {}
334
+ ...usageMissing ? { usageMissing: true } : {},
335
+ ...emptyCompletion ? { emptyCompletion: true } : {}
314
336
  });
315
337
  }
316
338
  /** Every provider failed: fire `onCall` with no winner. */
@@ -341,6 +363,15 @@ var LcrFallbackModel = class {
341
363
  const attemptStart = Date.now();
342
364
  try {
343
365
  const result = await provider.model.doGenerate(options);
366
+ const out = result.usage?.outputTokens?.total ?? 0;
367
+ const inp = result.usage?.inputTokens?.total ?? 0;
368
+ if (inp > 0 && out === 0 && tried < n - 1) {
369
+ const emptyErr = new EmptyCompletionError(provider.label);
370
+ lastError = emptyErr;
371
+ this.emitError(emptyErr, provider.label);
372
+ this.recordFail(ctx, provider, attemptStart, emptyErr);
373
+ continue;
374
+ }
344
375
  this.settleSticky(idx);
345
376
  this.finalizeOk(ctx, provider, attemptStart, result.usage);
346
377
  return result;
@@ -402,7 +433,7 @@ var LcrFallbackModel = class {
402
433
  const servingIdx = idx;
403
434
  const triedBeforeServing = tried;
404
435
  let usage;
405
- let streamedAny = false;
436
+ let contentStreamed = false;
406
437
  let ttftMs;
407
438
  const stream = new ReadableStream({
408
439
  async start(controller) {
@@ -411,17 +442,24 @@ var LcrFallbackModel = class {
411
442
  reader = result.stream.getReader();
412
443
  for (; ; ) {
413
444
  const { done, value } = await reader.read();
414
- if (!streamedAny && value && typeof value === "object" && "error" in value) {
445
+ if (!contentStreamed && value && typeof value === "object" && "error" in value) {
415
446
  const err = value.error;
416
447
  if (self.shouldRetry(err)) throw err;
417
448
  }
418
449
  if (done) break;
419
- if (value.type === "finish") usage = value.usage;
450
+ if (value.type === "finish") {
451
+ usage = value.usage;
452
+ const out = value.usage?.outputTokens?.total ?? 0;
453
+ const inp = value.usage?.inputTokens?.total ?? 0;
454
+ if (inp > 0 && out === 0 && !contentStreamed && triedBeforeServing + 1 < n) {
455
+ throw new EmptyCompletionError(servingProvider.label);
456
+ }
457
+ }
420
458
  if (ttftMs === void 0 && (value.type === "text-delta" || value.type === "reasoning-delta")) {
421
459
  ttftMs = Date.now() - servingAttemptStart;
422
460
  }
423
461
  controller.enqueue(value);
424
- if (value.type !== "stream-start") streamedAny = true;
462
+ if (CONTENT_PART_TYPES.has(value.type)) contentStreamed = true;
425
463
  }
426
464
  self.settleSticky(servingIdx);
427
465
  self.finalizeOk(ctx, servingProvider, servingAttemptStart, usage, ttftMs);
@@ -429,7 +467,7 @@ var LcrFallbackModel = class {
429
467
  } catch (error) {
430
468
  self.emitError(error, servingProvider.label);
431
469
  self.recordFail(ctx, servingProvider, servingAttemptStart, error);
432
- if (!streamedAny) {
470
+ if (!contentStreamed) {
433
471
  const nextTried = triedBeforeServing + 1;
434
472
  if (nextTried >= n) {
435
473
  self.finalizeFail(ctx);
@@ -492,6 +530,7 @@ function formatCallRecord(record, opts = {}) {
492
530
  line += ` (saved $${(record.baselineUsd - record.costUsd).toFixed(4)})`;
493
531
  }
494
532
  if (record.usageMissing) line += ` \u26A0no-usage`;
533
+ if (record.emptyCompletion) line += ` \u26A0empty`;
495
534
  const failed = record.attempts.filter((a) => !a.ok);
496
535
  if (failed.length > 0) {
497
536
  const reasons = failed.map((a) => `${a.provider} ${a.errorClass ?? "error"}`).join(", ");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ai-lcr",
3
- "version": "0.5.1",
3
+ "version": "0.5.2",
4
4
  "description": "Least Cost Routing for LLMs — route every model call to the cheapest available provider, fall back automatically, and track real cost. Built for the Vercel AI SDK.",
5
5
  "keywords": [
6
6
  "ai",