ai-lcr 0.5.1 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -5
- package/README.zh-CN.md +1 -5
- package/dist/index.cjs +45 -6
- package/dist/index.d.cts +23 -1
- package/dist/index.d.ts +23 -1
- package/dist/index.js +45 -6
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
</p>
|
|
16
16
|
|
|
17
17
|
<p align="center">
|
|
18
|
-
<img src="assets/ai-lcr-hero.svg" alt="ai-lcr
|
|
18
|
+
<img src="assets/ai-lcr-hero.svg" alt="ai-lcr keeps a cheapest-first list of providers per model — serves the cheapest (saving ~40%), fails over to the next on error, and snaps back to the cheapest after ~60s" width="720">
|
|
19
19
|
</p>
|
|
20
20
|
|
|
21
21
|
The same model costs different amounts on different providers — and no single provider is cheapest for everything. `ai-lcr` keeps a cheapest-first list per model, routes to the cheapest healthy one (⭐ below), and falls through on failure — the way phone carriers have done [Least Cost Routing](https://en.wikipedia.org/wiki/Least-cost_routing) for decades.
|
|
@@ -144,10 +144,6 @@ DeepInfra carries open weights only — no first-party Claude / GPT / Gemini. Fo
|
|
|
144
144
|
2. **Fall through on failure.** On a retryable error — rate limit, 5xx, timeout, or a **billing cap** (402 / out-of-credit / quota) — it advances to the next provider, streaming-safe. A caller's own bad request (e.g. 400, 422) passes through immediately.
|
|
145
145
|
3. **Recover.** After an idle window (`resetIntervalMs`, default 60s) it snaps back to the cheapest provider.
|
|
146
146
|
|
|
147
|
-
<p align="center">
|
|
148
|
-
<img src="assets/ai-lcr-routing.svg" alt="routing diagram: cheapest first, fallback on failure, recover after idle" width="820">
|
|
149
|
-
</p>
|
|
150
|
-
|
|
151
147
|
## See what happened (`onCall`)
|
|
152
148
|
|
|
153
149
|
`onError`/`onCost` fire separately and uncorrelated, so a failover is hard to read after the fact. `onCall` gives you **one record per request** — the full chain, the winner, the reason for each failed hop, latency, and cost — and `formatCallRecord` turns it into a one-liner you can scan:
|
package/README.zh-CN.md
CHANGED
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
</p>
|
|
16
16
|
|
|
17
17
|
<p align="center">
|
|
18
|
-
<img src="assets/ai-lcr-hero.svg" alt="ai-lcr
|
|
18
|
+
<img src="assets/ai-lcr-hero.svg" alt="ai-lcr 为每个模型维护一份「最便宜优先」的 provider 列表——默认走最便宜的(省约 40%),出错时切到下一个,约 60 秒后自动切回最便宜" width="720">
|
|
19
19
|
</p>
|
|
20
20
|
|
|
21
21
|
同一个模型在不同 provider 上的价格不同——而且没有任何单一 provider 在所有模型上都最便宜。`ai-lcr` 为每个模型维护一份「最便宜优先」的列表,路由到其中最便宜且健康的 provider(下表中的 ⭐),失败时向下穿透——这正是电话运营商几十年来一直在做的 [最低成本路由(Least Cost Routing)](https://en.wikipedia.org/wiki/Least-cost_routing)。
|
|
@@ -144,10 +144,6 @@ DeepInfra 只承载开源权重——没有第一方 Claude / GPT / Gemini。那
|
|
|
144
144
|
2. **失败时向下穿透。** 遇到可重试的错误(限流、5xx、超时)时,前进到下一个 provider,且对流式安全。硬错误(400、401、403、422)会直接透传,不做重试。
|
|
145
145
|
3. **恢复。** 在一段空闲窗口(`resetIntervalMs`,默认 60s)之后,自动回到最便宜的 provider。
|
|
146
146
|
|
|
147
|
-
<p align="center">
|
|
148
|
-
<img src="assets/ai-lcr-routing.svg" alt="路由示意图:最便宜优先、失败时 fallback、空闲后恢复" width="820">
|
|
149
|
-
</p>
|
|
150
|
-
|
|
151
147
|
## 支持的 provider
|
|
152
148
|
|
|
153
149
|
任何 OpenAI 兼容的 endpoint 都可用——任何 AI SDK 的 provider 包也都可用,包括模型厂商自己的官方 API。
|
package/dist/index.cjs
CHANGED
|
@@ -41,6 +41,12 @@ __export(index_exports, {
|
|
|
41
41
|
module.exports = __toCommonJS(index_exports);
|
|
42
42
|
|
|
43
43
|
// src/fallback.ts
|
|
44
|
+
var EmptyCompletionError = class extends Error {
|
|
45
|
+
constructor(provider) {
|
|
46
|
+
super(`ai-lcr: provider "${provider}" returned an empty completion (0 output tokens, no content)`);
|
|
47
|
+
this.name = "EmptyCompletionError";
|
|
48
|
+
}
|
|
49
|
+
};
|
|
44
50
|
var RETRYABLE_STATUS = /* @__PURE__ */ new Set([401, 402, 403, 408, 409, 413, 429, 498, 500]);
|
|
45
51
|
var RETRYABLE_PATTERNS = [
|
|
46
52
|
"overloaded",
|
|
@@ -153,6 +159,7 @@ function isRetryableError(error) {
|
|
|
153
159
|
return RETRYABLE_PATTERNS.some((p) => text.includes(p));
|
|
154
160
|
}
|
|
155
161
|
function classifyError(error) {
|
|
162
|
+
if (error instanceof EmptyCompletionError) return "empty_completion";
|
|
156
163
|
const e = error;
|
|
157
164
|
const status = e?.statusCode ?? e?.status;
|
|
158
165
|
if (typeof status === "number") return String(status);
|
|
@@ -175,6 +182,7 @@ var BILLING_PATTERNS = [
|
|
|
175
182
|
"\u6263\u6B3E"
|
|
176
183
|
];
|
|
177
184
|
function classifyErrorKind(error) {
|
|
185
|
+
if (error instanceof EmptyCompletionError) return "empty";
|
|
178
186
|
const e = error;
|
|
179
187
|
const status = e?.statusCode ?? e?.status;
|
|
180
188
|
const { text } = errorSignals(error);
|
|
@@ -203,6 +211,18 @@ function requestIdFrom(options) {
|
|
|
203
211
|
const raw = options.providerOptions?.lcr?.requestId;
|
|
204
212
|
return typeof raw === "string" && raw.length > 0 ? raw : void 0;
|
|
205
213
|
}
|
|
214
|
+
var CONTENT_PART_TYPES = /* @__PURE__ */ new Set([
|
|
215
|
+
"text-delta",
|
|
216
|
+
"reasoning-delta",
|
|
217
|
+
"tool-call",
|
|
218
|
+
"tool-input-start",
|
|
219
|
+
"tool-input-delta",
|
|
220
|
+
"tool-input-end",
|
|
221
|
+
"file",
|
|
222
|
+
"source",
|
|
223
|
+
"tool-result",
|
|
224
|
+
"raw"
|
|
225
|
+
]);
|
|
206
226
|
var LcrFallbackModel = class {
|
|
207
227
|
constructor(opts) {
|
|
208
228
|
this.opts = opts;
|
|
@@ -329,6 +349,7 @@ var LcrFallbackModel = class {
|
|
|
329
349
|
const costUsd = provider.cost ? costForUsage(provider.cost, inputTokens, outputTokens, cacheReadTokens) : 0;
|
|
330
350
|
const cachedSavingUsd = provider.cost ? cacheSavingForUsage(provider.cost, inputTokens, cacheReadTokens) : 0;
|
|
331
351
|
const usageMissing = inputTokens === 0 && outputTokens === 0;
|
|
352
|
+
const emptyCompletion = inputTokens > 0 && outputTokens === 0;
|
|
332
353
|
this.emitCost({
|
|
333
354
|
model: this.opts.modelName,
|
|
334
355
|
provider: provider.label,
|
|
@@ -352,7 +373,8 @@ var LcrFallbackModel = class {
|
|
|
352
373
|
baselineUsd: this.baselineUsd(inputTokens, outputTokens, cacheReadTokens),
|
|
353
374
|
...cachedSavingUsd > 0 ? { cachedSavingUsd } : {},
|
|
354
375
|
...ctx.requestId ? { requestId: ctx.requestId } : {},
|
|
355
|
-
...usageMissing ? { usageMissing: true } : {}
|
|
376
|
+
...usageMissing ? { usageMissing: true } : {},
|
|
377
|
+
...emptyCompletion ? { emptyCompletion: true } : {}
|
|
356
378
|
});
|
|
357
379
|
}
|
|
358
380
|
/** Every provider failed: fire `onCall` with no winner. */
|
|
@@ -383,6 +405,15 @@ var LcrFallbackModel = class {
|
|
|
383
405
|
const attemptStart = Date.now();
|
|
384
406
|
try {
|
|
385
407
|
const result = await provider.model.doGenerate(options);
|
|
408
|
+
const out = result.usage?.outputTokens?.total ?? 0;
|
|
409
|
+
const inp = result.usage?.inputTokens?.total ?? 0;
|
|
410
|
+
if (inp > 0 && out === 0 && tried < n - 1) {
|
|
411
|
+
const emptyErr = new EmptyCompletionError(provider.label);
|
|
412
|
+
lastError = emptyErr;
|
|
413
|
+
this.emitError(emptyErr, provider.label);
|
|
414
|
+
this.recordFail(ctx, provider, attemptStart, emptyErr);
|
|
415
|
+
continue;
|
|
416
|
+
}
|
|
386
417
|
this.settleSticky(idx);
|
|
387
418
|
this.finalizeOk(ctx, provider, attemptStart, result.usage);
|
|
388
419
|
return result;
|
|
@@ -444,7 +475,7 @@ var LcrFallbackModel = class {
|
|
|
444
475
|
const servingIdx = idx;
|
|
445
476
|
const triedBeforeServing = tried;
|
|
446
477
|
let usage;
|
|
447
|
-
let
|
|
478
|
+
let contentStreamed = false;
|
|
448
479
|
let ttftMs;
|
|
449
480
|
const stream = new ReadableStream({
|
|
450
481
|
async start(controller) {
|
|
@@ -453,17 +484,24 @@ var LcrFallbackModel = class {
|
|
|
453
484
|
reader = result.stream.getReader();
|
|
454
485
|
for (; ; ) {
|
|
455
486
|
const { done, value } = await reader.read();
|
|
456
|
-
if (!
|
|
487
|
+
if (!contentStreamed && value && typeof value === "object" && "error" in value) {
|
|
457
488
|
const err = value.error;
|
|
458
489
|
if (self.shouldRetry(err)) throw err;
|
|
459
490
|
}
|
|
460
491
|
if (done) break;
|
|
461
|
-
if (value.type === "finish")
|
|
492
|
+
if (value.type === "finish") {
|
|
493
|
+
usage = value.usage;
|
|
494
|
+
const out = value.usage?.outputTokens?.total ?? 0;
|
|
495
|
+
const inp = value.usage?.inputTokens?.total ?? 0;
|
|
496
|
+
if (inp > 0 && out === 0 && !contentStreamed && triedBeforeServing + 1 < n) {
|
|
497
|
+
throw new EmptyCompletionError(servingProvider.label);
|
|
498
|
+
}
|
|
499
|
+
}
|
|
462
500
|
if (ttftMs === void 0 && (value.type === "text-delta" || value.type === "reasoning-delta")) {
|
|
463
501
|
ttftMs = Date.now() - servingAttemptStart;
|
|
464
502
|
}
|
|
465
503
|
controller.enqueue(value);
|
|
466
|
-
if (value.type
|
|
504
|
+
if (CONTENT_PART_TYPES.has(value.type)) contentStreamed = true;
|
|
467
505
|
}
|
|
468
506
|
self.settleSticky(servingIdx);
|
|
469
507
|
self.finalizeOk(ctx, servingProvider, servingAttemptStart, usage, ttftMs);
|
|
@@ -471,7 +509,7 @@ var LcrFallbackModel = class {
|
|
|
471
509
|
} catch (error) {
|
|
472
510
|
self.emitError(error, servingProvider.label);
|
|
473
511
|
self.recordFail(ctx, servingProvider, servingAttemptStart, error);
|
|
474
|
-
if (!
|
|
512
|
+
if (!contentStreamed) {
|
|
475
513
|
const nextTried = triedBeforeServing + 1;
|
|
476
514
|
if (nextTried >= n) {
|
|
477
515
|
self.finalizeFail(ctx);
|
|
@@ -534,6 +572,7 @@ function formatCallRecord(record, opts = {}) {
|
|
|
534
572
|
line += ` (saved $${(record.baselineUsd - record.costUsd).toFixed(4)})`;
|
|
535
573
|
}
|
|
536
574
|
if (record.usageMissing) line += ` \u26A0no-usage`;
|
|
575
|
+
if (record.emptyCompletion) line += ` \u26A0empty`;
|
|
537
576
|
const failed = record.attempts.filter((a) => !a.ok);
|
|
538
577
|
if (failed.length > 0) {
|
|
539
578
|
const reasons = failed.map((a) => `${a.provider} ${a.errorClass ?? "error"}`).join(", ");
|
package/dist/index.d.cts
CHANGED
|
@@ -49,8 +49,15 @@ interface CostEvent {
|
|
|
49
49
|
* - "auth": 401 / 403 — a misconfigured or revoked key.
|
|
50
50
|
* - "billing": 402 / out-of-credit / quota — account needs topping up.
|
|
51
51
|
* - "client": a non-retryable caller error (e.g. 400 bad request).
|
|
52
|
+
* - "empty": provider returned a clean 200 but generated nothing
|
|
53
|
+
* (zero output tokens, no content) — a *content*-integrity
|
|
54
|
+
* failure, not a transport one. The provider looks healthy to
|
|
55
|
+
* every status/network check yet hands the user a blank. We
|
|
56
|
+
* fail over on it like a transient error, but tag it separately
|
|
57
|
+
* so a run of `"empty"` attempts (a quietly degraded model)
|
|
58
|
+
* doesn't hide inside the transient noise.
|
|
52
59
|
*/
|
|
53
|
-
type ErrorKind = "transient" | "auth" | "billing" | "client";
|
|
60
|
+
type ErrorKind = "transient" | "auth" | "billing" | "client" | "empty";
|
|
54
61
|
/** One provider attempt within a single request. */
|
|
55
62
|
interface RouteAttempt {
|
|
56
63
|
/** Provider label that was tried (e.g. "tokenmart"). */
|
|
@@ -142,6 +149,21 @@ interface CallRecord {
|
|
|
142
149
|
* other signal. Treat a flagged record as "cost unknown", not "free".
|
|
143
150
|
*/
|
|
144
151
|
usageMissing?: boolean;
|
|
152
|
+
/**
|
|
153
|
+
* True when the winner served a clean, error-free response that nonetheless
|
|
154
|
+
* generated **nothing**: zero output tokens with a non-empty prompt (and, for
|
|
155
|
+
* streams, not one content part). The user asked and got a blank. Distinct
|
|
156
|
+
* from {@link usageMissing} (which is input *and* output both zero — usage not
|
|
157
|
+
* reported); here the prompt was billed but the model produced no output.
|
|
158
|
+
*
|
|
159
|
+
* Set only when this empty response is what the caller actually received —
|
|
160
|
+
* i.e. every provider in the chain came back empty, so failover couldn't
|
|
161
|
+
* rescue it. (When an earlier provider returns empty but a later one produces
|
|
162
|
+
* content, that earlier attempt is recorded as a failed `empty_completion` hop
|
|
163
|
+
* and this flag stays unset, because the winner did produce output.) Alert on
|
|
164
|
+
* it: a provider that quietly returns blanks passes every health check.
|
|
165
|
+
*/
|
|
166
|
+
emptyCompletion?: boolean;
|
|
145
167
|
}
|
|
146
168
|
/**
|
|
147
169
|
* Normalize an error into a short, log-friendly class for {@link CallRecord}.
|
package/dist/index.d.ts
CHANGED
|
@@ -49,8 +49,15 @@ interface CostEvent {
|
|
|
49
49
|
* - "auth": 401 / 403 — a misconfigured or revoked key.
|
|
50
50
|
* - "billing": 402 / out-of-credit / quota — account needs topping up.
|
|
51
51
|
* - "client": a non-retryable caller error (e.g. 400 bad request).
|
|
52
|
+
* - "empty": provider returned a clean 200 but generated nothing
|
|
53
|
+
* (zero output tokens, no content) — a *content*-integrity
|
|
54
|
+
* failure, not a transport one. The provider looks healthy to
|
|
55
|
+
* every status/network check yet hands the user a blank. We
|
|
56
|
+
* fail over on it like a transient error, but tag it separately
|
|
57
|
+
* so a run of `"empty"` attempts (a quietly degraded model)
|
|
58
|
+
* doesn't hide inside the transient noise.
|
|
52
59
|
*/
|
|
53
|
-
type ErrorKind = "transient" | "auth" | "billing" | "client";
|
|
60
|
+
type ErrorKind = "transient" | "auth" | "billing" | "client" | "empty";
|
|
54
61
|
/** One provider attempt within a single request. */
|
|
55
62
|
interface RouteAttempt {
|
|
56
63
|
/** Provider label that was tried (e.g. "tokenmart"). */
|
|
@@ -142,6 +149,21 @@ interface CallRecord {
|
|
|
142
149
|
* other signal. Treat a flagged record as "cost unknown", not "free".
|
|
143
150
|
*/
|
|
144
151
|
usageMissing?: boolean;
|
|
152
|
+
/**
|
|
153
|
+
* True when the winner served a clean, error-free response that nonetheless
|
|
154
|
+
* generated **nothing**: zero output tokens with a non-empty prompt (and, for
|
|
155
|
+
* streams, not one content part). The user asked and got a blank. Distinct
|
|
156
|
+
* from {@link usageMissing} (which is input *and* output both zero — usage not
|
|
157
|
+
* reported); here the prompt was billed but the model produced no output.
|
|
158
|
+
*
|
|
159
|
+
* Set only when this empty response is what the caller actually received —
|
|
160
|
+
* i.e. every provider in the chain came back empty, so failover couldn't
|
|
161
|
+
* rescue it. (When an earlier provider returns empty but a later one produces
|
|
162
|
+
* content, that earlier attempt is recorded as a failed `empty_completion` hop
|
|
163
|
+
* and this flag stays unset, because the winner did produce output.) Alert on
|
|
164
|
+
* it: a provider that quietly returns blanks passes every health check.
|
|
165
|
+
*/
|
|
166
|
+
emptyCompletion?: boolean;
|
|
145
167
|
}
|
|
146
168
|
/**
|
|
147
169
|
* Normalize an error into a short, log-friendly class for {@link CallRecord}.
|
package/dist/index.js
CHANGED
|
@@ -1,4 +1,10 @@
|
|
|
1
1
|
// src/fallback.ts
|
|
2
|
+
var EmptyCompletionError = class extends Error {
|
|
3
|
+
constructor(provider) {
|
|
4
|
+
super(`ai-lcr: provider "${provider}" returned an empty completion (0 output tokens, no content)`);
|
|
5
|
+
this.name = "EmptyCompletionError";
|
|
6
|
+
}
|
|
7
|
+
};
|
|
2
8
|
var RETRYABLE_STATUS = /* @__PURE__ */ new Set([401, 402, 403, 408, 409, 413, 429, 498, 500]);
|
|
3
9
|
var RETRYABLE_PATTERNS = [
|
|
4
10
|
"overloaded",
|
|
@@ -111,6 +117,7 @@ function isRetryableError(error) {
|
|
|
111
117
|
return RETRYABLE_PATTERNS.some((p) => text.includes(p));
|
|
112
118
|
}
|
|
113
119
|
function classifyError(error) {
|
|
120
|
+
if (error instanceof EmptyCompletionError) return "empty_completion";
|
|
114
121
|
const e = error;
|
|
115
122
|
const status = e?.statusCode ?? e?.status;
|
|
116
123
|
if (typeof status === "number") return String(status);
|
|
@@ -133,6 +140,7 @@ var BILLING_PATTERNS = [
|
|
|
133
140
|
"\u6263\u6B3E"
|
|
134
141
|
];
|
|
135
142
|
function classifyErrorKind(error) {
|
|
143
|
+
if (error instanceof EmptyCompletionError) return "empty";
|
|
136
144
|
const e = error;
|
|
137
145
|
const status = e?.statusCode ?? e?.status;
|
|
138
146
|
const { text } = errorSignals(error);
|
|
@@ -161,6 +169,18 @@ function requestIdFrom(options) {
|
|
|
161
169
|
const raw = options.providerOptions?.lcr?.requestId;
|
|
162
170
|
return typeof raw === "string" && raw.length > 0 ? raw : void 0;
|
|
163
171
|
}
|
|
172
|
+
var CONTENT_PART_TYPES = /* @__PURE__ */ new Set([
|
|
173
|
+
"text-delta",
|
|
174
|
+
"reasoning-delta",
|
|
175
|
+
"tool-call",
|
|
176
|
+
"tool-input-start",
|
|
177
|
+
"tool-input-delta",
|
|
178
|
+
"tool-input-end",
|
|
179
|
+
"file",
|
|
180
|
+
"source",
|
|
181
|
+
"tool-result",
|
|
182
|
+
"raw"
|
|
183
|
+
]);
|
|
164
184
|
var LcrFallbackModel = class {
|
|
165
185
|
constructor(opts) {
|
|
166
186
|
this.opts = opts;
|
|
@@ -287,6 +307,7 @@ var LcrFallbackModel = class {
|
|
|
287
307
|
const costUsd = provider.cost ? costForUsage(provider.cost, inputTokens, outputTokens, cacheReadTokens) : 0;
|
|
288
308
|
const cachedSavingUsd = provider.cost ? cacheSavingForUsage(provider.cost, inputTokens, cacheReadTokens) : 0;
|
|
289
309
|
const usageMissing = inputTokens === 0 && outputTokens === 0;
|
|
310
|
+
const emptyCompletion = inputTokens > 0 && outputTokens === 0;
|
|
290
311
|
this.emitCost({
|
|
291
312
|
model: this.opts.modelName,
|
|
292
313
|
provider: provider.label,
|
|
@@ -310,7 +331,8 @@ var LcrFallbackModel = class {
|
|
|
310
331
|
baselineUsd: this.baselineUsd(inputTokens, outputTokens, cacheReadTokens),
|
|
311
332
|
...cachedSavingUsd > 0 ? { cachedSavingUsd } : {},
|
|
312
333
|
...ctx.requestId ? { requestId: ctx.requestId } : {},
|
|
313
|
-
...usageMissing ? { usageMissing: true } : {}
|
|
334
|
+
...usageMissing ? { usageMissing: true } : {},
|
|
335
|
+
...emptyCompletion ? { emptyCompletion: true } : {}
|
|
314
336
|
});
|
|
315
337
|
}
|
|
316
338
|
/** Every provider failed: fire `onCall` with no winner. */
|
|
@@ -341,6 +363,15 @@ var LcrFallbackModel = class {
|
|
|
341
363
|
const attemptStart = Date.now();
|
|
342
364
|
try {
|
|
343
365
|
const result = await provider.model.doGenerate(options);
|
|
366
|
+
const out = result.usage?.outputTokens?.total ?? 0;
|
|
367
|
+
const inp = result.usage?.inputTokens?.total ?? 0;
|
|
368
|
+
if (inp > 0 && out === 0 && tried < n - 1) {
|
|
369
|
+
const emptyErr = new EmptyCompletionError(provider.label);
|
|
370
|
+
lastError = emptyErr;
|
|
371
|
+
this.emitError(emptyErr, provider.label);
|
|
372
|
+
this.recordFail(ctx, provider, attemptStart, emptyErr);
|
|
373
|
+
continue;
|
|
374
|
+
}
|
|
344
375
|
this.settleSticky(idx);
|
|
345
376
|
this.finalizeOk(ctx, provider, attemptStart, result.usage);
|
|
346
377
|
return result;
|
|
@@ -402,7 +433,7 @@ var LcrFallbackModel = class {
|
|
|
402
433
|
const servingIdx = idx;
|
|
403
434
|
const triedBeforeServing = tried;
|
|
404
435
|
let usage;
|
|
405
|
-
let
|
|
436
|
+
let contentStreamed = false;
|
|
406
437
|
let ttftMs;
|
|
407
438
|
const stream = new ReadableStream({
|
|
408
439
|
async start(controller) {
|
|
@@ -411,17 +442,24 @@ var LcrFallbackModel = class {
|
|
|
411
442
|
reader = result.stream.getReader();
|
|
412
443
|
for (; ; ) {
|
|
413
444
|
const { done, value } = await reader.read();
|
|
414
|
-
if (!
|
|
445
|
+
if (!contentStreamed && value && typeof value === "object" && "error" in value) {
|
|
415
446
|
const err = value.error;
|
|
416
447
|
if (self.shouldRetry(err)) throw err;
|
|
417
448
|
}
|
|
418
449
|
if (done) break;
|
|
419
|
-
if (value.type === "finish")
|
|
450
|
+
if (value.type === "finish") {
|
|
451
|
+
usage = value.usage;
|
|
452
|
+
const out = value.usage?.outputTokens?.total ?? 0;
|
|
453
|
+
const inp = value.usage?.inputTokens?.total ?? 0;
|
|
454
|
+
if (inp > 0 && out === 0 && !contentStreamed && triedBeforeServing + 1 < n) {
|
|
455
|
+
throw new EmptyCompletionError(servingProvider.label);
|
|
456
|
+
}
|
|
457
|
+
}
|
|
420
458
|
if (ttftMs === void 0 && (value.type === "text-delta" || value.type === "reasoning-delta")) {
|
|
421
459
|
ttftMs = Date.now() - servingAttemptStart;
|
|
422
460
|
}
|
|
423
461
|
controller.enqueue(value);
|
|
424
|
-
if (value.type
|
|
462
|
+
if (CONTENT_PART_TYPES.has(value.type)) contentStreamed = true;
|
|
425
463
|
}
|
|
426
464
|
self.settleSticky(servingIdx);
|
|
427
465
|
self.finalizeOk(ctx, servingProvider, servingAttemptStart, usage, ttftMs);
|
|
@@ -429,7 +467,7 @@ var LcrFallbackModel = class {
|
|
|
429
467
|
} catch (error) {
|
|
430
468
|
self.emitError(error, servingProvider.label);
|
|
431
469
|
self.recordFail(ctx, servingProvider, servingAttemptStart, error);
|
|
432
|
-
if (!
|
|
470
|
+
if (!contentStreamed) {
|
|
433
471
|
const nextTried = triedBeforeServing + 1;
|
|
434
472
|
if (nextTried >= n) {
|
|
435
473
|
self.finalizeFail(ctx);
|
|
@@ -492,6 +530,7 @@ function formatCallRecord(record, opts = {}) {
|
|
|
492
530
|
line += ` (saved $${(record.baselineUsd - record.costUsd).toFixed(4)})`;
|
|
493
531
|
}
|
|
494
532
|
if (record.usageMissing) line += ` \u26A0no-usage`;
|
|
533
|
+
if (record.emptyCompletion) line += ` \u26A0empty`;
|
|
495
534
|
const failed = record.attempts.filter((a) => !a.ok);
|
|
496
535
|
if (failed.length > 0) {
|
|
497
536
|
const reasons = failed.map((a) => `${a.provider} ${a.errorClass ?? "error"}`).join(", ");
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ai-lcr",
|
|
3
|
-
"version": "0.5.
|
|
3
|
+
"version": "0.5.2",
|
|
4
4
|
"description": "Least Cost Routing for LLMs — route every model call to the cheapest available provider, fall back automatically, and track real cost. Built for the Vercel AI SDK.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai",
|