ai-lcr 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +60 -0
- package/dist/index.cjs +84 -81
- package/dist/index.d.cts +30 -57
- package/dist/index.d.ts +30 -57
- package/dist/index.js +83 -80
- package/package.json +5 -3
package/CHANGELOG.md
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to `ai-lcr` are documented here. The format follows
|
|
4
|
+
[Keep a Changelog](https://keepachangelog.com/), and the project adheres to
|
|
5
|
+
[Semantic Versioning](https://semver.org/).
|
|
6
|
+
|
|
7
|
+
## [0.2.3] — 2026-06-01
|
|
8
|
+
|
|
9
|
+
Release-quality and engine-correctness pass.
|
|
10
|
+
|
|
11
|
+
### Fixed
|
|
12
|
+
|
|
13
|
+
- **Build was red on `main`.** `media.ts` set `CallRecord.baselineUsd` but the
|
|
14
|
+
type never declared it, so `tsc`/`npm run build` failed while `npm test`
|
|
15
|
+
(which doesn't typecheck) stayed green. `baselineUsd?: number` is now part of
|
|
16
|
+
`CallRecord`. The text router leaves it `undefined`; the media router sets it.
|
|
17
|
+
- **Failover used shared mutable state across concurrent requests.** The active
|
|
18
|
+
provider index was an instance field used both as the per-request loop cursor
|
|
19
|
+
and the loop's termination check. Two requests sharing one model instance
|
|
20
|
+
could clobber each other's cursor mid-flight (skipped providers, wrong
|
|
21
|
+
termination). Each request now walks providers on a fully local cursor; the
|
|
22
|
+
only shared state is a "where to start next" hint, read once and written once.
|
|
23
|
+
- **Cheapest provider was never re-probed under sustained traffic.** The
|
|
24
|
+
snap-back-to-cheapest timer reset on *every* call, so with calls more frequent
|
|
25
|
+
than `resetIntervalMs` it never fired — one blip pinned you on the expensive
|
|
26
|
+
fallback indefinitely (exactly when spend is highest). The timer now measures
|
|
27
|
+
from the last *failover*, so re-probe fires under load too.
|
|
28
|
+
|
|
29
|
+
### Added
|
|
30
|
+
|
|
31
|
+
- **`classifyErrorKind(error)` and `RouteAttempt.kind`** (`"transient" | "auth"
|
|
32
|
+
| "billing" | "client"`). 401/403 (auth) and 402/out-of-credit (billing)
|
|
33
|
+
still fail over so the request survives — but they're now tagged distinctly
|
|
34
|
+
from transient 429/5xx, so a misconfigured key silently burning the pricey
|
|
35
|
+
fallback is something you can alert on instead of mistaking for healthy
|
|
36
|
+
routing.
|
|
37
|
+
- **Continuous Integration** (`.github/workflows/ci.yml`): `build` +
|
|
38
|
+
`typecheck` + `test` on Node 20 & 22, plus a `pack-smoke` job that installs
|
|
39
|
+
the actual `npm pack` tarball into a clean directory and imports it (ESM and
|
|
40
|
+
CJS) — catching dropped exports and broken `dist` that an in-repo test can't.
|
|
41
|
+
- **`prepublishOnly` gate**: `npm publish` now runs build + typecheck + test
|
|
42
|
+
first, so a red tree can't be published.
|
|
43
|
+
- **Public-export surface test** (`public-api.test.ts`): pins every runtime
|
|
44
|
+
export by name, so removing one fails loudly and adding one is deliberate.
|
|
45
|
+
|
|
46
|
+
## [0.2.1] — earlier
|
|
47
|
+
|
|
48
|
+
- `onCall` correlated `CallRecord` + `formatCallRecord` one-liner for the text
|
|
49
|
+
router, extended to the media router (image/video).
|
|
50
|
+
|
|
51
|
+
## [0.2.0] — earlier
|
|
52
|
+
|
|
53
|
+
- Observability: `onCall` / `CallRecord`, `formatCallRecord`.
|
|
54
|
+
|
|
55
|
+
## [0.1.x] — earlier
|
|
56
|
+
|
|
57
|
+
- Dual ESM/CJS build. Media (image/video) least-cost routing with the Runware
|
|
58
|
+
and Kunavo adapters; cap-aware failover for the text router.
|
|
59
|
+
|
|
60
|
+
[0.2.3]: https://github.com/victorzhrn/ai-lcr/releases/tag/v0.2.3
|
package/dist/index.cjs
CHANGED
|
@@ -24,8 +24,8 @@ __export(index_exports, {
|
|
|
24
24
|
MEDIA_PRICING: () => MEDIA_PRICING,
|
|
25
25
|
cheapestRoute: () => cheapestRoute,
|
|
26
26
|
classifyError: () => classifyError,
|
|
27
|
+
classifyErrorKind: () => classifyErrorKind,
|
|
27
28
|
comparePrices: () => comparePrices,
|
|
28
|
-
createHttpSink: () => createHttpSink,
|
|
29
29
|
createKunavoMediaAdapter: () => createKunavoMediaAdapter,
|
|
30
30
|
createLCR: () => createLCR,
|
|
31
31
|
createMediaLCR: () => createMediaLCR,
|
|
@@ -86,6 +86,16 @@ function classifyError(error) {
|
|
|
86
86
|
const text = (e?.message ? String(e.message) : safeStringify(error)).toLowerCase();
|
|
87
87
|
return RETRYABLE_PATTERNS.find((p) => text.includes(p)) ?? "error";
|
|
88
88
|
}
|
|
89
|
+
var AUTH_STATUS = /* @__PURE__ */ new Set([401, 403]);
|
|
90
|
+
var BILLING_PATTERNS = ["insufficient", "credit", "quota", "billing", "payment required"];
|
|
91
|
+
function classifyErrorKind(error) {
|
|
92
|
+
const e = error;
|
|
93
|
+
const status = e?.statusCode ?? e?.status;
|
|
94
|
+
const text = (e?.message ? String(e.message) : safeStringify(error)).toLowerCase();
|
|
95
|
+
if (typeof status === "number" && AUTH_STATUS.has(status)) return "auth";
|
|
96
|
+
if (status === 402 || BILLING_PATTERNS.some((p) => text.includes(p))) return "billing";
|
|
97
|
+
return isRetryableError(error) ? "transient" : "client";
|
|
98
|
+
}
|
|
89
99
|
var callSeq = 0;
|
|
90
100
|
function newCallId() {
|
|
91
101
|
const c = globalThis.crypto;
|
|
@@ -102,11 +112,20 @@ var LcrFallbackModel = class {
|
|
|
102
112
|
}
|
|
103
113
|
opts;
|
|
104
114
|
specificationVersion = "v3";
|
|
105
|
-
|
|
106
|
-
|
|
115
|
+
// Cross-request *hint* for where the next request starts: after a failover we
|
|
116
|
+
// remember the provider that worked so we don't re-probe a dead cheap one on
|
|
117
|
+
// every call. This is the ONLY shared mutable state — and crucially it is read
|
|
118
|
+
// once per request (snapshotted into a local cursor) and written once on
|
|
119
|
+
// settle, never used as a per-request loop bound. The within-request iteration
|
|
120
|
+
// is fully local, so concurrent requests can't corrupt each other's routing.
|
|
121
|
+
sticky = 0;
|
|
122
|
+
// When `sticky` was last advanced (a failover). The re-probe timer measures
|
|
123
|
+
// from THIS, not from the last call — so it fires under sustained traffic too,
|
|
124
|
+
// instead of being pushed forward forever by a busy stream of requests.
|
|
125
|
+
lastFailoverAt = Date.now();
|
|
107
126
|
resetIntervalMs;
|
|
108
127
|
get current() {
|
|
109
|
-
return this.opts.providers[this.
|
|
128
|
+
return this.opts.providers[this.sticky];
|
|
110
129
|
}
|
|
111
130
|
get modelId() {
|
|
112
131
|
return this.current.model.modelId;
|
|
@@ -117,14 +136,28 @@ var LcrFallbackModel = class {
|
|
|
117
136
|
get supportedUrls() {
|
|
118
137
|
return this.current.model.supportedUrls;
|
|
119
138
|
}
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
139
|
+
/**
|
|
140
|
+
* Index a new request should start at. If we're parked on a non-cheapest
|
|
141
|
+
* provider and it's been `resetIntervalMs` since the failover, snap back to
|
|
142
|
+
* the cheapest and re-probe it — this is what lets routing recover to the
|
|
143
|
+
* cheap source even during continuous traffic.
|
|
144
|
+
*/
|
|
145
|
+
startIndex() {
|
|
146
|
+
if (this.sticky !== 0 && Date.now() - this.lastFailoverAt >= this.resetIntervalMs) {
|
|
147
|
+
this.sticky = 0;
|
|
123
148
|
}
|
|
124
|
-
this.
|
|
149
|
+
return this.sticky;
|
|
125
150
|
}
|
|
126
|
-
|
|
127
|
-
|
|
151
|
+
/**
|
|
152
|
+
* A request settled on `winIndex`. Park there so the next request skips the
|
|
153
|
+
* providers we just learned are down. Stamp the failover time only when the
|
|
154
|
+
* parked provider actually CHANGES — so a steady stream of successful calls
|
|
155
|
+
* on the same fallback doesn't keep pushing the re-probe timer forward.
|
|
156
|
+
*/
|
|
157
|
+
settleSticky(winIndex) {
|
|
158
|
+
if (winIndex === this.sticky) return;
|
|
159
|
+
this.sticky = winIndex;
|
|
160
|
+
this.lastFailoverAt = Date.now();
|
|
128
161
|
}
|
|
129
162
|
shouldRetry(error) {
|
|
130
163
|
return (this.opts.shouldRetry ?? isRetryableError)(error);
|
|
@@ -138,23 +171,16 @@ var LcrFallbackModel = class {
|
|
|
138
171
|
provider: provider.label,
|
|
139
172
|
ok: false,
|
|
140
173
|
latencyMs: Date.now() - attemptStart,
|
|
141
|
-
errorClass: classifyError(error)
|
|
174
|
+
errorClass: classifyError(error),
|
|
175
|
+
kind: classifyErrorKind(error)
|
|
142
176
|
});
|
|
143
177
|
}
|
|
144
|
-
/** Cost of one route for the given token counts; 0 if it has no price. */
|
|
145
|
-
routeCost(p, inputTokens, outputTokens) {
|
|
146
|
-
return p.cost ? inputTokens / 1e6 * p.cost.input + outputTokens / 1e6 * p.cost.output : 0;
|
|
147
|
-
}
|
|
148
178
|
/** Winner settled: record the attempt, fire `onCost` (compat) + `onCall`. */
|
|
149
179
|
finalizeOk(ctx, provider, attemptStart, usage) {
|
|
150
180
|
ctx.attempts.push({ provider: provider.label, ok: true, latencyMs: Date.now() - attemptStart });
|
|
151
181
|
const inputTokens = usage?.inputTokens?.total ?? 0;
|
|
152
182
|
const outputTokens = usage?.outputTokens?.total ?? 0;
|
|
153
|
-
const costUsd =
|
|
154
|
-
const baselineUsd = this.opts.providers.reduce(
|
|
155
|
-
(max, p) => Math.max(max, this.routeCost(p, inputTokens, outputTokens)),
|
|
156
|
-
costUsd
|
|
157
|
-
);
|
|
183
|
+
const costUsd = provider.cost ? inputTokens / 1e6 * provider.cost.input + outputTokens / 1e6 * provider.cost.output : 0;
|
|
158
184
|
this.opts.onCost?.({
|
|
159
185
|
model: this.opts.modelName,
|
|
160
186
|
provider: provider.label,
|
|
@@ -172,8 +198,7 @@ var LcrFallbackModel = class {
|
|
|
172
198
|
latencyMs: Date.now() - ctx.startedAt,
|
|
173
199
|
inputTokens,
|
|
174
200
|
outputTokens,
|
|
175
|
-
costUsd
|
|
176
|
-
baselineUsd
|
|
201
|
+
costUsd
|
|
177
202
|
});
|
|
178
203
|
}
|
|
179
204
|
/** Every provider failed: fire `onCall` with no winner. */
|
|
@@ -188,20 +213,22 @@ var LcrFallbackModel = class {
|
|
|
188
213
|
latencyMs: Date.now() - ctx.startedAt,
|
|
189
214
|
inputTokens: 0,
|
|
190
215
|
outputTokens: 0,
|
|
191
|
-
costUsd: 0
|
|
192
|
-
baselineUsd: 0
|
|
216
|
+
costUsd: 0
|
|
193
217
|
});
|
|
194
218
|
}
|
|
195
219
|
async doGenerate(options) {
|
|
196
|
-
this.checkReset();
|
|
197
220
|
const ctx = this.startCall();
|
|
198
|
-
const
|
|
221
|
+
const providers = this.opts.providers;
|
|
222
|
+
const n = providers.length;
|
|
223
|
+
const start = this.startIndex();
|
|
199
224
|
let lastError;
|
|
200
|
-
for (; ; ) {
|
|
201
|
-
const
|
|
225
|
+
for (let tried = 0; tried < n; tried++) {
|
|
226
|
+
const idx = (start + tried) % n;
|
|
227
|
+
const provider = providers[idx];
|
|
202
228
|
const attemptStart = Date.now();
|
|
203
229
|
try {
|
|
204
230
|
const result = await provider.model.doGenerate(options);
|
|
231
|
+
this.settleSticky(idx);
|
|
205
232
|
this.finalizeOk(ctx, provider, attemptStart, result.usage);
|
|
206
233
|
return result;
|
|
207
234
|
} catch (error) {
|
|
@@ -213,29 +240,30 @@ var LcrFallbackModel = class {
|
|
|
213
240
|
}
|
|
214
241
|
this.opts.onError?.(error, provider.label);
|
|
215
242
|
this.recordFail(ctx, provider, attemptStart, error);
|
|
216
|
-
this.switchNext();
|
|
217
|
-
if (this.index === start) {
|
|
218
|
-
this.finalizeFail(ctx);
|
|
219
|
-
throw lastError;
|
|
220
|
-
}
|
|
221
243
|
}
|
|
222
244
|
}
|
|
245
|
+
this.finalizeFail(ctx);
|
|
246
|
+
throw lastError;
|
|
223
247
|
}
|
|
224
248
|
async doStream(options) {
|
|
225
|
-
this.
|
|
226
|
-
return this.doStreamWithCtx(options, this.startCall());
|
|
249
|
+
return this.doStreamWithCtx(options, this.startCall(), this.startIndex(), 0);
|
|
227
250
|
}
|
|
228
|
-
// The stream's failover recursion re-enters here with the SAME `ctx
|
|
229
|
-
//
|
|
230
|
-
//
|
|
231
|
-
|
|
251
|
+
// The stream's failover recursion re-enters here with the SAME `ctx` and a
|
|
252
|
+
// threaded-through local cursor (`idx`/`tried`), so a mid-stream switch keeps
|
|
253
|
+
// appending to one CallRecord and bounds itself on the local `tried` count —
|
|
254
|
+
// never on shared instance state. `finalizeOk`/`finalizeFail` fire exactly
|
|
255
|
+
// once per outer request.
|
|
256
|
+
async doStreamWithCtx(options, ctx, startIdx, alreadyTried) {
|
|
232
257
|
const self = this;
|
|
233
|
-
const
|
|
258
|
+
const providers = this.opts.providers;
|
|
259
|
+
const n = providers.length;
|
|
234
260
|
let result;
|
|
235
261
|
let serving;
|
|
236
262
|
let servingStart;
|
|
263
|
+
let idx = startIdx;
|
|
264
|
+
let tried = alreadyTried;
|
|
237
265
|
for (; ; ) {
|
|
238
|
-
serving =
|
|
266
|
+
serving = providers[idx];
|
|
239
267
|
servingStart = Date.now();
|
|
240
268
|
try {
|
|
241
269
|
result = await serving.model.doStream(options);
|
|
@@ -248,15 +276,18 @@ var LcrFallbackModel = class {
|
|
|
248
276
|
}
|
|
249
277
|
this.opts.onError?.(error, serving.label);
|
|
250
278
|
this.recordFail(ctx, serving, servingStart, error);
|
|
251
|
-
|
|
252
|
-
if (
|
|
279
|
+
tried++;
|
|
280
|
+
if (tried >= n) {
|
|
253
281
|
this.finalizeFail(ctx);
|
|
254
282
|
throw error;
|
|
255
283
|
}
|
|
284
|
+
idx = (idx + 1) % n;
|
|
256
285
|
}
|
|
257
286
|
}
|
|
258
287
|
const servingProvider = serving;
|
|
259
288
|
const servingAttemptStart = servingStart;
|
|
289
|
+
const servingIdx = idx;
|
|
290
|
+
const triedBeforeServing = tried;
|
|
260
291
|
let usage;
|
|
261
292
|
let streamedAny = false;
|
|
262
293
|
const stream = new ReadableStream({
|
|
@@ -275,20 +306,26 @@ var LcrFallbackModel = class {
|
|
|
275
306
|
controller.enqueue(value);
|
|
276
307
|
if (value.type !== "stream-start") streamedAny = true;
|
|
277
308
|
}
|
|
309
|
+
self.settleSticky(servingIdx);
|
|
278
310
|
self.finalizeOk(ctx, servingProvider, servingAttemptStart, usage);
|
|
279
311
|
controller.close();
|
|
280
312
|
} catch (error) {
|
|
281
313
|
self.opts.onError?.(error, servingProvider.label);
|
|
282
314
|
self.recordFail(ctx, servingProvider, servingAttemptStart, error);
|
|
283
315
|
if (!streamedAny) {
|
|
284
|
-
|
|
285
|
-
if (
|
|
316
|
+
const nextTried = triedBeforeServing + 1;
|
|
317
|
+
if (nextTried >= n) {
|
|
286
318
|
self.finalizeFail(ctx);
|
|
287
319
|
controller.error(error);
|
|
288
320
|
return;
|
|
289
321
|
}
|
|
290
322
|
try {
|
|
291
|
-
const next = await self.doStreamWithCtx(
|
|
323
|
+
const next = await self.doStreamWithCtx(
|
|
324
|
+
options,
|
|
325
|
+
ctx,
|
|
326
|
+
(servingIdx + 1) % n,
|
|
327
|
+
nextTried
|
|
328
|
+
);
|
|
292
329
|
const nextReader = next.stream.getReader();
|
|
293
330
|
try {
|
|
294
331
|
for (; ; ) {
|
|
@@ -345,40 +382,6 @@ function formatCallRecord(record, opts = {}) {
|
|
|
345
382
|
return line;
|
|
346
383
|
}
|
|
347
384
|
|
|
348
|
-
// src/sink.ts
|
|
349
|
-
function createHttpSink(options) {
|
|
350
|
-
const {
|
|
351
|
-
url,
|
|
352
|
-
headers,
|
|
353
|
-
project,
|
|
354
|
-
dispatch = (task) => {
|
|
355
|
-
void task();
|
|
356
|
-
},
|
|
357
|
-
fetchImpl,
|
|
358
|
-
onError
|
|
359
|
-
} = options;
|
|
360
|
-
const doFetch = fetchImpl ?? globalThis.fetch;
|
|
361
|
-
return (record) => {
|
|
362
|
-
if (!doFetch) {
|
|
363
|
-
onError?.(new Error("ai-lcr: no fetch available for createHttpSink"));
|
|
364
|
-
return;
|
|
365
|
-
}
|
|
366
|
-
const payload = project ? { project, ...record } : record;
|
|
367
|
-
dispatch(async () => {
|
|
368
|
-
try {
|
|
369
|
-
await doFetch(url, {
|
|
370
|
-
method: "POST",
|
|
371
|
-
headers: { "content-type": "application/json", ...headers },
|
|
372
|
-
body: JSON.stringify(payload),
|
|
373
|
-
keepalive: true
|
|
374
|
-
});
|
|
375
|
-
} catch (err) {
|
|
376
|
-
onError?.(err);
|
|
377
|
-
}
|
|
378
|
-
});
|
|
379
|
-
};
|
|
380
|
-
}
|
|
381
|
-
|
|
382
385
|
// src/media.ts
|
|
383
386
|
var DEFAULT_REFERENCE = {
|
|
384
387
|
image: { width: 1920, height: 1080 },
|
|
@@ -822,8 +825,8 @@ function createLCR(config) {
|
|
|
822
825
|
MEDIA_PRICING,
|
|
823
826
|
cheapestRoute,
|
|
824
827
|
classifyError,
|
|
828
|
+
classifyErrorKind,
|
|
825
829
|
comparePrices,
|
|
826
|
-
createHttpSink,
|
|
827
830
|
createKunavoMediaAdapter,
|
|
828
831
|
createLCR,
|
|
829
832
|
createMediaLCR,
|
package/dist/index.d.cts
CHANGED
|
@@ -5,8 +5,10 @@ import { LanguageModelV3 } from '@ai-sdk/provider';
|
|
|
5
5
|
*
|
|
6
6
|
* A LanguageModelV3 that wraps an ordered, cheapest-first list of providers:
|
|
7
7
|
* it serves from the first healthy one, switches to the next on a retryable
|
|
8
|
-
* error (streaming-safe), and
|
|
9
|
-
*
|
|
8
|
+
* error (streaming-safe), and periodically re-probes the cheapest provider
|
|
9
|
+
* (every `resetIntervalMs` after a failover — under load too, not only when
|
|
10
|
+
* idle). It also computes per-call cost from each provider's price and fires
|
|
11
|
+
* `onCost`.
|
|
10
12
|
*
|
|
11
13
|
* The switching loop is adapted from `ai-fallback` (MIT, © remorses) — its
|
|
12
14
|
* streaming-safe fallback approach — reimplemented here so ai-lcr owns its core
|
|
@@ -28,6 +30,17 @@ interface CostEvent {
|
|
|
28
30
|
/** Computed from the serving provider's `cost`; 0 if no price was given. */
|
|
29
31
|
costUsd: number;
|
|
30
32
|
}
|
|
33
|
+
/**
|
|
34
|
+
* Coarse error category for a failed attempt — distinct from `errorClass`
|
|
35
|
+
* (which is the raw status/pattern). Use it to alert: `"auth"` and `"billing"`
|
|
36
|
+
* mean a config/account problem masquerading as a healthy failover, the thing
|
|
37
|
+
* you want to page on rather than silently keep burning the pricey fallback.
|
|
38
|
+
* - "transient": rate limit / overload / 5xx — expected, self-healing.
|
|
39
|
+
* - "auth": 401 / 403 — a misconfigured or revoked key.
|
|
40
|
+
* - "billing": 402 / out-of-credit / quota — account needs topping up.
|
|
41
|
+
* - "client": a non-retryable caller error (e.g. 400 bad request).
|
|
42
|
+
*/
|
|
43
|
+
type ErrorKind = "transient" | "auth" | "billing" | "client";
|
|
31
44
|
/** One provider attempt within a single request. */
|
|
32
45
|
interface RouteAttempt {
|
|
33
46
|
/** Provider label that was tried (e.g. "tokenmart"). */
|
|
@@ -38,6 +51,8 @@ interface RouteAttempt {
|
|
|
38
51
|
latencyMs: number;
|
|
39
52
|
/** Normalized failure reason when `ok` is false (e.g. "502", "rate_limit", "timeout"). */
|
|
40
53
|
errorClass?: string;
|
|
54
|
+
/** Coarse category of the failure when `ok` is false. See {@link ErrorKind}. */
|
|
55
|
+
kind?: ErrorKind;
|
|
41
56
|
}
|
|
42
57
|
/**
|
|
43
58
|
* One settled request, with its full failover chain. Emitted exactly once per
|
|
@@ -65,13 +80,12 @@ interface CallRecord {
|
|
|
65
80
|
/** Computed from the winner's `cost`; 0 if no price was given or the call failed. */
|
|
66
81
|
costUsd: number;
|
|
67
82
|
/**
|
|
68
|
-
* What
|
|
69
|
-
* provider
|
|
70
|
-
*
|
|
71
|
-
*
|
|
72
|
-
* external price table needed.
|
|
83
|
+
* What the same request would have cost on the most expensive configured
|
|
84
|
+
* provider — the savings baseline (`baselineUsd - costUsd`). Set by the media
|
|
85
|
+
* router; the text router omits it (left undefined) until a per-call text
|
|
86
|
+
* baseline lands. Optional so both routers share one {@link CallRecord} shape.
|
|
73
87
|
*/
|
|
74
|
-
baselineUsd
|
|
88
|
+
baselineUsd?: number;
|
|
75
89
|
}
|
|
76
90
|
/**
|
|
77
91
|
* Normalize an error into a short, log-friendly class for {@link CallRecord}.
|
|
@@ -80,6 +94,13 @@ interface CallRecord {
|
|
|
80
94
|
* Reuses the same signals as {@link isRetryableError} — no new vocabulary.
|
|
81
95
|
*/
|
|
82
96
|
declare function classifyError(error: unknown): string;
|
|
97
|
+
/**
|
|
98
|
+
* Categorize an error for alerting. Orthogonal to {@link isRetryableError}
|
|
99
|
+
* (which decides *whether* to fail over) — this decides *how alarming* the
|
|
100
|
+
* failover is. A run of `"auth"`/`"billing"` attempts means you're silently
|
|
101
|
+
* burning the pricey fallback because a key/account is broken: page on it.
|
|
102
|
+
*/
|
|
103
|
+
declare function classifyErrorKind(error: unknown): ErrorKind;
|
|
83
104
|
|
|
84
105
|
/**
|
|
85
106
|
* Human-readable one-liner for a {@link CallRecord}.
|
|
@@ -101,54 +122,6 @@ interface FormatOptions {
|
|
|
101
122
|
}
|
|
102
123
|
declare function formatCallRecord(record: CallRecord, opts?: FormatOptions): string;
|
|
103
124
|
|
|
104
|
-
/**
|
|
105
|
-
* Optional HTTP sink for `onCall` — ship each {@link CallRecord} as JSON to a
|
|
106
|
-
* collector (e.g. a self-hosted ai-lcr-dashboard `/api/ingest`, or any endpoint
|
|
107
|
-
* that accepts the CallRecord shape).
|
|
108
|
-
*
|
|
109
|
-
* Fully optional and dashboard-agnostic: omit it and ai-lcr stores nothing;
|
|
110
|
-
* point `url` at whatever you run. Logging must never break your app, so a
|
|
111
|
-
* failed POST is swallowed by default (surface it via `onError` if you want).
|
|
112
|
-
*
|
|
113
|
-
* import { createLCR, createHttpSink } from "ai-lcr";
|
|
114
|
-
* import { after } from "next/server"; // serverless: don't block the response
|
|
115
|
-
*
|
|
116
|
-
* const lcr = createLCR({
|
|
117
|
-
* models: { ... },
|
|
118
|
-
* onCall: createHttpSink({
|
|
119
|
-
* url: process.env.LCR_INGEST_URL + "/api/ingest",
|
|
120
|
-
* headers: { authorization: `Bearer ${process.env.LCR_INGEST_KEY}` },
|
|
121
|
-
* project: process.env.LCR_PROJECT,
|
|
122
|
-
* dispatch: after, // run after the response is sent
|
|
123
|
-
* }),
|
|
124
|
-
* });
|
|
125
|
-
*/
|
|
126
|
-
|
|
127
|
-
interface HttpSinkOptions {
|
|
128
|
-
/** Where to POST each CallRecord (a collector that accepts the JSON shape). */
|
|
129
|
-
url: string;
|
|
130
|
-
/** Extra headers, e.g. `{ authorization: ` + "`Bearer ${key}`" + ` }`. */
|
|
131
|
-
headers?: Record<string, string>;
|
|
132
|
-
/** Optional tenant/project tag merged into each payload (`{ project, ...record }`). */
|
|
133
|
-
project?: string;
|
|
134
|
-
/**
|
|
135
|
-
* Wrap the dispatch so it survives a serverless function returning. On
|
|
136
|
-
* Next.js pass `after` from "next/server"; elsewhere pass a `waitUntil`-style
|
|
137
|
-
* function. Defaults to running immediately — correct for long-lived servers,
|
|
138
|
-
* but on serverless an un-awaited POST may be cut off, so pass `after`.
|
|
139
|
-
*/
|
|
140
|
-
dispatch?: (task: () => void | Promise<void>) => void;
|
|
141
|
-
/** Custom fetch (tests / runtimes without a global `fetch`). */
|
|
142
|
-
fetchImpl?: typeof fetch;
|
|
143
|
-
/** Called if the POST fails. Failures are swallowed by default. */
|
|
144
|
-
onError?: (error: unknown) => void;
|
|
145
|
-
}
|
|
146
|
-
/**
|
|
147
|
-
* Build an `onCall` handler that POSTs each {@link CallRecord} to `url`.
|
|
148
|
-
* Returns a plain `(record) => void` — pass it straight to `createLCR`'s `onCall`.
|
|
149
|
-
*/
|
|
150
|
-
declare function createHttpSink(options: HttpSinkOptions): (record: CallRecord) => void;
|
|
151
|
-
|
|
152
125
|
/**
|
|
153
126
|
* ai-lcr media routing — Least Cost Routing for image & video models.
|
|
154
127
|
*
|
|
@@ -438,4 +411,4 @@ type LCRRouter = (modelName: string) => LanguageModelV3;
|
|
|
438
411
|
*/
|
|
439
412
|
declare function createLCR(config: LCRConfig): LCRRouter;
|
|
440
413
|
|
|
441
|
-
export { type CallRecord, type CostEvent, DEFAULT_REFERENCE, type
|
|
414
|
+
export { type CallRecord, type CostEvent, DEFAULT_REFERENCE, type ErrorKind, type FormatOptions, type LCRConfig, type LCRRouter, MEDIA_PRICING, type MediaAdapter, type MediaCostEvent, type MediaGenerateRequest, type MediaGenerateResult, type MediaLCRConfig, type MediaModality, type MediaModelDef, type MediaOutput, type MediaPricing, type MediaRegistry, type MediaRoute, type MediaRunResult, type MediaUnit, type PriceComparisonRow, type ProviderCost, type ProviderEntry, type RankedRoute, type ReferenceSpec, type RouteAttempt, cheapestRoute, classifyError, classifyErrorKind, comparePrices, createKunavoMediaAdapter, createLCR, createMediaLCR, createRunwareMediaAdapter, formatCallRecord, normalizedCents, rankRoutes, referenceMegapixels };
|
package/dist/index.d.ts
CHANGED
|
@@ -5,8 +5,10 @@ import { LanguageModelV3 } from '@ai-sdk/provider';
|
|
|
5
5
|
*
|
|
6
6
|
* A LanguageModelV3 that wraps an ordered, cheapest-first list of providers:
|
|
7
7
|
* it serves from the first healthy one, switches to the next on a retryable
|
|
8
|
-
* error (streaming-safe), and
|
|
9
|
-
*
|
|
8
|
+
* error (streaming-safe), and periodically re-probes the cheapest provider
|
|
9
|
+
* (every `resetIntervalMs` after a failover — under load too, not only when
|
|
10
|
+
* idle). It also computes per-call cost from each provider's price and fires
|
|
11
|
+
* `onCost`.
|
|
10
12
|
*
|
|
11
13
|
* The switching loop is adapted from `ai-fallback` (MIT, © remorses) — its
|
|
12
14
|
* streaming-safe fallback approach — reimplemented here so ai-lcr owns its core
|
|
@@ -28,6 +30,17 @@ interface CostEvent {
|
|
|
28
30
|
/** Computed from the serving provider's `cost`; 0 if no price was given. */
|
|
29
31
|
costUsd: number;
|
|
30
32
|
}
|
|
33
|
+
/**
|
|
34
|
+
* Coarse error category for a failed attempt — distinct from `errorClass`
|
|
35
|
+
* (which is the raw status/pattern). Use it to alert: `"auth"` and `"billing"`
|
|
36
|
+
* mean a config/account problem masquerading as a healthy failover, the thing
|
|
37
|
+
* you want to page on rather than silently keep burning the pricey fallback.
|
|
38
|
+
* - "transient": rate limit / overload / 5xx — expected, self-healing.
|
|
39
|
+
* - "auth": 401 / 403 — a misconfigured or revoked key.
|
|
40
|
+
* - "billing": 402 / out-of-credit / quota — account needs topping up.
|
|
41
|
+
* - "client": a non-retryable caller error (e.g. 400 bad request).
|
|
42
|
+
*/
|
|
43
|
+
type ErrorKind = "transient" | "auth" | "billing" | "client";
|
|
31
44
|
/** One provider attempt within a single request. */
|
|
32
45
|
interface RouteAttempt {
|
|
33
46
|
/** Provider label that was tried (e.g. "tokenmart"). */
|
|
@@ -38,6 +51,8 @@ interface RouteAttempt {
|
|
|
38
51
|
latencyMs: number;
|
|
39
52
|
/** Normalized failure reason when `ok` is false (e.g. "502", "rate_limit", "timeout"). */
|
|
40
53
|
errorClass?: string;
|
|
54
|
+
/** Coarse category of the failure when `ok` is false. See {@link ErrorKind}. */
|
|
55
|
+
kind?: ErrorKind;
|
|
41
56
|
}
|
|
42
57
|
/**
|
|
43
58
|
* One settled request, with its full failover chain. Emitted exactly once per
|
|
@@ -65,13 +80,12 @@ interface CallRecord {
|
|
|
65
80
|
/** Computed from the winner's `cost`; 0 if no price was given or the call failed. */
|
|
66
81
|
costUsd: number;
|
|
67
82
|
/**
|
|
68
|
-
* What
|
|
69
|
-
* provider
|
|
70
|
-
*
|
|
71
|
-
*
|
|
72
|
-
* external price table needed.
|
|
83
|
+
* What the same request would have cost on the most expensive configured
|
|
84
|
+
* provider — the savings baseline (`baselineUsd - costUsd`). Set by the media
|
|
85
|
+
* router; the text router omits it (left undefined) until a per-call text
|
|
86
|
+
* baseline lands. Optional so both routers share one {@link CallRecord} shape.
|
|
73
87
|
*/
|
|
74
|
-
baselineUsd
|
|
88
|
+
baselineUsd?: number;
|
|
75
89
|
}
|
|
76
90
|
/**
|
|
77
91
|
* Normalize an error into a short, log-friendly class for {@link CallRecord}.
|
|
@@ -80,6 +94,13 @@ interface CallRecord {
|
|
|
80
94
|
* Reuses the same signals as {@link isRetryableError} — no new vocabulary.
|
|
81
95
|
*/
|
|
82
96
|
declare function classifyError(error: unknown): string;
|
|
97
|
+
/**
|
|
98
|
+
* Categorize an error for alerting. Orthogonal to {@link isRetryableError}
|
|
99
|
+
* (which decides *whether* to fail over) — this decides *how alarming* the
|
|
100
|
+
* failover is. A run of `"auth"`/`"billing"` attempts means you're silently
|
|
101
|
+
* burning the pricey fallback because a key/account is broken: page on it.
|
|
102
|
+
*/
|
|
103
|
+
declare function classifyErrorKind(error: unknown): ErrorKind;
|
|
83
104
|
|
|
84
105
|
/**
|
|
85
106
|
* Human-readable one-liner for a {@link CallRecord}.
|
|
@@ -101,54 +122,6 @@ interface FormatOptions {
|
|
|
101
122
|
}
|
|
102
123
|
declare function formatCallRecord(record: CallRecord, opts?: FormatOptions): string;
|
|
103
124
|
|
|
104
|
-
/**
|
|
105
|
-
* Optional HTTP sink for `onCall` — ship each {@link CallRecord} as JSON to a
|
|
106
|
-
* collector (e.g. a self-hosted ai-lcr-dashboard `/api/ingest`, or any endpoint
|
|
107
|
-
* that accepts the CallRecord shape).
|
|
108
|
-
*
|
|
109
|
-
* Fully optional and dashboard-agnostic: omit it and ai-lcr stores nothing;
|
|
110
|
-
* point `url` at whatever you run. Logging must never break your app, so a
|
|
111
|
-
* failed POST is swallowed by default (surface it via `onError` if you want).
|
|
112
|
-
*
|
|
113
|
-
* import { createLCR, createHttpSink } from "ai-lcr";
|
|
114
|
-
* import { after } from "next/server"; // serverless: don't block the response
|
|
115
|
-
*
|
|
116
|
-
* const lcr = createLCR({
|
|
117
|
-
* models: { ... },
|
|
118
|
-
* onCall: createHttpSink({
|
|
119
|
-
* url: process.env.LCR_INGEST_URL + "/api/ingest",
|
|
120
|
-
* headers: { authorization: `Bearer ${process.env.LCR_INGEST_KEY}` },
|
|
121
|
-
* project: process.env.LCR_PROJECT,
|
|
122
|
-
* dispatch: after, // run after the response is sent
|
|
123
|
-
* }),
|
|
124
|
-
* });
|
|
125
|
-
*/
|
|
126
|
-
|
|
127
|
-
interface HttpSinkOptions {
|
|
128
|
-
/** Where to POST each CallRecord (a collector that accepts the JSON shape). */
|
|
129
|
-
url: string;
|
|
130
|
-
/** Extra headers, e.g. `{ authorization: ` + "`Bearer ${key}`" + ` }`. */
|
|
131
|
-
headers?: Record<string, string>;
|
|
132
|
-
/** Optional tenant/project tag merged into each payload (`{ project, ...record }`). */
|
|
133
|
-
project?: string;
|
|
134
|
-
/**
|
|
135
|
-
* Wrap the dispatch so it survives a serverless function returning. On
|
|
136
|
-
* Next.js pass `after` from "next/server"; elsewhere pass a `waitUntil`-style
|
|
137
|
-
* function. Defaults to running immediately — correct for long-lived servers,
|
|
138
|
-
* but on serverless an un-awaited POST may be cut off, so pass `after`.
|
|
139
|
-
*/
|
|
140
|
-
dispatch?: (task: () => void | Promise<void>) => void;
|
|
141
|
-
/** Custom fetch (tests / runtimes without a global `fetch`). */
|
|
142
|
-
fetchImpl?: typeof fetch;
|
|
143
|
-
/** Called if the POST fails. Failures are swallowed by default. */
|
|
144
|
-
onError?: (error: unknown) => void;
|
|
145
|
-
}
|
|
146
|
-
/**
|
|
147
|
-
* Build an `onCall` handler that POSTs each {@link CallRecord} to `url`.
|
|
148
|
-
* Returns a plain `(record) => void` — pass it straight to `createLCR`'s `onCall`.
|
|
149
|
-
*/
|
|
150
|
-
declare function createHttpSink(options: HttpSinkOptions): (record: CallRecord) => void;
|
|
151
|
-
|
|
152
125
|
/**
|
|
153
126
|
* ai-lcr media routing — Least Cost Routing for image & video models.
|
|
154
127
|
*
|
|
@@ -438,4 +411,4 @@ type LCRRouter = (modelName: string) => LanguageModelV3;
|
|
|
438
411
|
*/
|
|
439
412
|
declare function createLCR(config: LCRConfig): LCRRouter;
|
|
440
413
|
|
|
441
|
-
export { type CallRecord, type CostEvent, DEFAULT_REFERENCE, type
|
|
414
|
+
export { type CallRecord, type CostEvent, DEFAULT_REFERENCE, type ErrorKind, type FormatOptions, type LCRConfig, type LCRRouter, MEDIA_PRICING, type MediaAdapter, type MediaCostEvent, type MediaGenerateRequest, type MediaGenerateResult, type MediaLCRConfig, type MediaModality, type MediaModelDef, type MediaOutput, type MediaPricing, type MediaRegistry, type MediaRoute, type MediaRunResult, type MediaUnit, type PriceComparisonRow, type ProviderCost, type ProviderEntry, type RankedRoute, type ReferenceSpec, type RouteAttempt, cheapestRoute, classifyError, classifyErrorKind, comparePrices, createKunavoMediaAdapter, createLCR, createMediaLCR, createRunwareMediaAdapter, formatCallRecord, normalizedCents, rankRoutes, referenceMegapixels };
|
package/dist/index.js
CHANGED
|
@@ -47,6 +47,16 @@ function classifyError(error) {
|
|
|
47
47
|
const text = (e?.message ? String(e.message) : safeStringify(error)).toLowerCase();
|
|
48
48
|
return RETRYABLE_PATTERNS.find((p) => text.includes(p)) ?? "error";
|
|
49
49
|
}
|
|
50
|
+
var AUTH_STATUS = /* @__PURE__ */ new Set([401, 403]);
|
|
51
|
+
var BILLING_PATTERNS = ["insufficient", "credit", "quota", "billing", "payment required"];
|
|
52
|
+
function classifyErrorKind(error) {
|
|
53
|
+
const e = error;
|
|
54
|
+
const status = e?.statusCode ?? e?.status;
|
|
55
|
+
const text = (e?.message ? String(e.message) : safeStringify(error)).toLowerCase();
|
|
56
|
+
if (typeof status === "number" && AUTH_STATUS.has(status)) return "auth";
|
|
57
|
+
if (status === 402 || BILLING_PATTERNS.some((p) => text.includes(p))) return "billing";
|
|
58
|
+
return isRetryableError(error) ? "transient" : "client";
|
|
59
|
+
}
|
|
50
60
|
var callSeq = 0;
|
|
51
61
|
function newCallId() {
|
|
52
62
|
const c = globalThis.crypto;
|
|
@@ -63,11 +73,20 @@ var LcrFallbackModel = class {
|
|
|
63
73
|
}
|
|
64
74
|
opts;
|
|
65
75
|
specificationVersion = "v3";
|
|
66
|
-
|
|
67
|
-
|
|
76
|
+
// Cross-request *hint* for where the next request starts: after a failover we
|
|
77
|
+
// remember the provider that worked so we don't re-probe a dead cheap one on
|
|
78
|
+
// every call. This is the ONLY shared mutable state — and crucially it is read
|
|
79
|
+
// once per request (snapshotted into a local cursor) and written once on
|
|
80
|
+
// settle, never used as a per-request loop bound. The within-request iteration
|
|
81
|
+
// is fully local, so concurrent requests can't corrupt each other's routing.
|
|
82
|
+
sticky = 0;
|
|
83
|
+
// When `sticky` was last advanced (a failover). The re-probe timer measures
|
|
84
|
+
// from THIS, not from the last call — so it fires under sustained traffic too,
|
|
85
|
+
// instead of being pushed forward forever by a busy stream of requests.
|
|
86
|
+
lastFailoverAt = Date.now();
|
|
68
87
|
resetIntervalMs;
|
|
69
88
|
get current() {
|
|
70
|
-
return this.opts.providers[this.
|
|
89
|
+
return this.opts.providers[this.sticky];
|
|
71
90
|
}
|
|
72
91
|
get modelId() {
|
|
73
92
|
return this.current.model.modelId;
|
|
@@ -78,14 +97,28 @@ var LcrFallbackModel = class {
|
|
|
78
97
|
get supportedUrls() {
|
|
79
98
|
return this.current.model.supportedUrls;
|
|
80
99
|
}
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
100
|
+
/**
|
|
101
|
+
* Index a new request should start at. If we're parked on a non-cheapest
|
|
102
|
+
* provider and it's been `resetIntervalMs` since the failover, snap back to
|
|
103
|
+
* the cheapest and re-probe it — this is what lets routing recover to the
|
|
104
|
+
* cheap source even during continuous traffic.
|
|
105
|
+
*/
|
|
106
|
+
startIndex() {
|
|
107
|
+
if (this.sticky !== 0 && Date.now() - this.lastFailoverAt >= this.resetIntervalMs) {
|
|
108
|
+
this.sticky = 0;
|
|
84
109
|
}
|
|
85
|
-
this.
|
|
110
|
+
return this.sticky;
|
|
86
111
|
}
|
|
87
|
-
|
|
88
|
-
|
|
112
|
+
/**
|
|
113
|
+
* A request settled on `winIndex`. Park there so the next request skips the
|
|
114
|
+
* providers we just learned are down. Stamp the failover time only when the
|
|
115
|
+
* parked provider actually CHANGES — so a steady stream of successful calls
|
|
116
|
+
* on the same fallback doesn't keep pushing the re-probe timer forward.
|
|
117
|
+
*/
|
|
118
|
+
settleSticky(winIndex) {
|
|
119
|
+
if (winIndex === this.sticky) return;
|
|
120
|
+
this.sticky = winIndex;
|
|
121
|
+
this.lastFailoverAt = Date.now();
|
|
89
122
|
}
|
|
90
123
|
shouldRetry(error) {
|
|
91
124
|
return (this.opts.shouldRetry ?? isRetryableError)(error);
|
|
@@ -99,23 +132,16 @@ var LcrFallbackModel = class {
|
|
|
99
132
|
provider: provider.label,
|
|
100
133
|
ok: false,
|
|
101
134
|
latencyMs: Date.now() - attemptStart,
|
|
102
|
-
errorClass: classifyError(error)
|
|
135
|
+
errorClass: classifyError(error),
|
|
136
|
+
kind: classifyErrorKind(error)
|
|
103
137
|
});
|
|
104
138
|
}
|
|
105
|
-
/** Cost of one route for the given token counts; 0 if it has no price. */
|
|
106
|
-
routeCost(p, inputTokens, outputTokens) {
|
|
107
|
-
return p.cost ? inputTokens / 1e6 * p.cost.input + outputTokens / 1e6 * p.cost.output : 0;
|
|
108
|
-
}
|
|
109
139
|
/** Winner settled: record the attempt, fire `onCost` (compat) + `onCall`. */
|
|
110
140
|
finalizeOk(ctx, provider, attemptStart, usage) {
|
|
111
141
|
ctx.attempts.push({ provider: provider.label, ok: true, latencyMs: Date.now() - attemptStart });
|
|
112
142
|
const inputTokens = usage?.inputTokens?.total ?? 0;
|
|
113
143
|
const outputTokens = usage?.outputTokens?.total ?? 0;
|
|
114
|
-
const costUsd =
|
|
115
|
-
const baselineUsd = this.opts.providers.reduce(
|
|
116
|
-
(max, p) => Math.max(max, this.routeCost(p, inputTokens, outputTokens)),
|
|
117
|
-
costUsd
|
|
118
|
-
);
|
|
144
|
+
const costUsd = provider.cost ? inputTokens / 1e6 * provider.cost.input + outputTokens / 1e6 * provider.cost.output : 0;
|
|
119
145
|
this.opts.onCost?.({
|
|
120
146
|
model: this.opts.modelName,
|
|
121
147
|
provider: provider.label,
|
|
@@ -133,8 +159,7 @@ var LcrFallbackModel = class {
|
|
|
133
159
|
latencyMs: Date.now() - ctx.startedAt,
|
|
134
160
|
inputTokens,
|
|
135
161
|
outputTokens,
|
|
136
|
-
costUsd
|
|
137
|
-
baselineUsd
|
|
162
|
+
costUsd
|
|
138
163
|
});
|
|
139
164
|
}
|
|
140
165
|
/** Every provider failed: fire `onCall` with no winner. */
|
|
@@ -149,20 +174,22 @@ var LcrFallbackModel = class {
|
|
|
149
174
|
latencyMs: Date.now() - ctx.startedAt,
|
|
150
175
|
inputTokens: 0,
|
|
151
176
|
outputTokens: 0,
|
|
152
|
-
costUsd: 0
|
|
153
|
-
baselineUsd: 0
|
|
177
|
+
costUsd: 0
|
|
154
178
|
});
|
|
155
179
|
}
|
|
156
180
|
async doGenerate(options) {
|
|
157
|
-
this.checkReset();
|
|
158
181
|
const ctx = this.startCall();
|
|
159
|
-
const
|
|
182
|
+
const providers = this.opts.providers;
|
|
183
|
+
const n = providers.length;
|
|
184
|
+
const start = this.startIndex();
|
|
160
185
|
let lastError;
|
|
161
|
-
for (; ; ) {
|
|
162
|
-
const
|
|
186
|
+
for (let tried = 0; tried < n; tried++) {
|
|
187
|
+
const idx = (start + tried) % n;
|
|
188
|
+
const provider = providers[idx];
|
|
163
189
|
const attemptStart = Date.now();
|
|
164
190
|
try {
|
|
165
191
|
const result = await provider.model.doGenerate(options);
|
|
192
|
+
this.settleSticky(idx);
|
|
166
193
|
this.finalizeOk(ctx, provider, attemptStart, result.usage);
|
|
167
194
|
return result;
|
|
168
195
|
} catch (error) {
|
|
@@ -174,29 +201,30 @@ var LcrFallbackModel = class {
|
|
|
174
201
|
}
|
|
175
202
|
this.opts.onError?.(error, provider.label);
|
|
176
203
|
this.recordFail(ctx, provider, attemptStart, error);
|
|
177
|
-
this.switchNext();
|
|
178
|
-
if (this.index === start) {
|
|
179
|
-
this.finalizeFail(ctx);
|
|
180
|
-
throw lastError;
|
|
181
|
-
}
|
|
182
204
|
}
|
|
183
205
|
}
|
|
206
|
+
this.finalizeFail(ctx);
|
|
207
|
+
throw lastError;
|
|
184
208
|
}
|
|
185
209
|
async doStream(options) {
|
|
186
|
-
this.
|
|
187
|
-
return this.doStreamWithCtx(options, this.startCall());
|
|
210
|
+
return this.doStreamWithCtx(options, this.startCall(), this.startIndex(), 0);
|
|
188
211
|
}
|
|
189
|
-
// The stream's failover recursion re-enters here with the SAME `ctx
|
|
190
|
-
//
|
|
191
|
-
//
|
|
192
|
-
|
|
212
|
+
// The stream's failover recursion re-enters here with the SAME `ctx` and a
|
|
213
|
+
// threaded-through local cursor (`idx`/`tried`), so a mid-stream switch keeps
|
|
214
|
+
// appending to one CallRecord and bounds itself on the local `tried` count —
|
|
215
|
+
// never on shared instance state. `finalizeOk`/`finalizeFail` fire exactly
|
|
216
|
+
// once per outer request.
|
|
217
|
+
async doStreamWithCtx(options, ctx, startIdx, alreadyTried) {
|
|
193
218
|
const self = this;
|
|
194
|
-
const
|
|
219
|
+
const providers = this.opts.providers;
|
|
220
|
+
const n = providers.length;
|
|
195
221
|
let result;
|
|
196
222
|
let serving;
|
|
197
223
|
let servingStart;
|
|
224
|
+
let idx = startIdx;
|
|
225
|
+
let tried = alreadyTried;
|
|
198
226
|
for (; ; ) {
|
|
199
|
-
serving =
|
|
227
|
+
serving = providers[idx];
|
|
200
228
|
servingStart = Date.now();
|
|
201
229
|
try {
|
|
202
230
|
result = await serving.model.doStream(options);
|
|
@@ -209,15 +237,18 @@ var LcrFallbackModel = class {
|
|
|
209
237
|
}
|
|
210
238
|
this.opts.onError?.(error, serving.label);
|
|
211
239
|
this.recordFail(ctx, serving, servingStart, error);
|
|
212
|
-
|
|
213
|
-
if (
|
|
240
|
+
tried++;
|
|
241
|
+
if (tried >= n) {
|
|
214
242
|
this.finalizeFail(ctx);
|
|
215
243
|
throw error;
|
|
216
244
|
}
|
|
245
|
+
idx = (idx + 1) % n;
|
|
217
246
|
}
|
|
218
247
|
}
|
|
219
248
|
const servingProvider = serving;
|
|
220
249
|
const servingAttemptStart = servingStart;
|
|
250
|
+
const servingIdx = idx;
|
|
251
|
+
const triedBeforeServing = tried;
|
|
221
252
|
let usage;
|
|
222
253
|
let streamedAny = false;
|
|
223
254
|
const stream = new ReadableStream({
|
|
@@ -236,20 +267,26 @@ var LcrFallbackModel = class {
|
|
|
236
267
|
controller.enqueue(value);
|
|
237
268
|
if (value.type !== "stream-start") streamedAny = true;
|
|
238
269
|
}
|
|
270
|
+
self.settleSticky(servingIdx);
|
|
239
271
|
self.finalizeOk(ctx, servingProvider, servingAttemptStart, usage);
|
|
240
272
|
controller.close();
|
|
241
273
|
} catch (error) {
|
|
242
274
|
self.opts.onError?.(error, servingProvider.label);
|
|
243
275
|
self.recordFail(ctx, servingProvider, servingAttemptStart, error);
|
|
244
276
|
if (!streamedAny) {
|
|
245
|
-
|
|
246
|
-
if (
|
|
277
|
+
const nextTried = triedBeforeServing + 1;
|
|
278
|
+
if (nextTried >= n) {
|
|
247
279
|
self.finalizeFail(ctx);
|
|
248
280
|
controller.error(error);
|
|
249
281
|
return;
|
|
250
282
|
}
|
|
251
283
|
try {
|
|
252
|
-
const next = await self.doStreamWithCtx(
|
|
284
|
+
const next = await self.doStreamWithCtx(
|
|
285
|
+
options,
|
|
286
|
+
ctx,
|
|
287
|
+
(servingIdx + 1) % n,
|
|
288
|
+
nextTried
|
|
289
|
+
);
|
|
253
290
|
const nextReader = next.stream.getReader();
|
|
254
291
|
try {
|
|
255
292
|
for (; ; ) {
|
|
@@ -306,40 +343,6 @@ function formatCallRecord(record, opts = {}) {
|
|
|
306
343
|
return line;
|
|
307
344
|
}
|
|
308
345
|
|
|
309
|
-
// src/sink.ts
|
|
310
|
-
function createHttpSink(options) {
|
|
311
|
-
const {
|
|
312
|
-
url,
|
|
313
|
-
headers,
|
|
314
|
-
project,
|
|
315
|
-
dispatch = (task) => {
|
|
316
|
-
void task();
|
|
317
|
-
},
|
|
318
|
-
fetchImpl,
|
|
319
|
-
onError
|
|
320
|
-
} = options;
|
|
321
|
-
const doFetch = fetchImpl ?? globalThis.fetch;
|
|
322
|
-
return (record) => {
|
|
323
|
-
if (!doFetch) {
|
|
324
|
-
onError?.(new Error("ai-lcr: no fetch available for createHttpSink"));
|
|
325
|
-
return;
|
|
326
|
-
}
|
|
327
|
-
const payload = project ? { project, ...record } : record;
|
|
328
|
-
dispatch(async () => {
|
|
329
|
-
try {
|
|
330
|
-
await doFetch(url, {
|
|
331
|
-
method: "POST",
|
|
332
|
-
headers: { "content-type": "application/json", ...headers },
|
|
333
|
-
body: JSON.stringify(payload),
|
|
334
|
-
keepalive: true
|
|
335
|
-
});
|
|
336
|
-
} catch (err) {
|
|
337
|
-
onError?.(err);
|
|
338
|
-
}
|
|
339
|
-
});
|
|
340
|
-
};
|
|
341
|
-
}
|
|
342
|
-
|
|
343
346
|
// src/media.ts
|
|
344
347
|
var DEFAULT_REFERENCE = {
|
|
345
348
|
image: { width: 1920, height: 1080 },
|
|
@@ -782,8 +785,8 @@ export {
|
|
|
782
785
|
MEDIA_PRICING,
|
|
783
786
|
cheapestRoute,
|
|
784
787
|
classifyError,
|
|
788
|
+
classifyErrorKind,
|
|
785
789
|
comparePrices,
|
|
786
|
-
createHttpSink,
|
|
787
790
|
createKunavoMediaAdapter,
|
|
788
791
|
createLCR,
|
|
789
792
|
createMediaLCR,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ai-lcr",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.3",
|
|
4
4
|
"description": "Least Cost Routing for LLMs — route every model call to the cheapest available provider, fall back automatically, and track real cost. Built for the Vercel AI SDK.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai",
|
|
@@ -39,13 +39,15 @@
|
|
|
39
39
|
"files": [
|
|
40
40
|
"dist",
|
|
41
41
|
"README.md",
|
|
42
|
-
"LICENSE"
|
|
42
|
+
"LICENSE",
|
|
43
|
+
"CHANGELOG.md"
|
|
43
44
|
],
|
|
44
45
|
"scripts": {
|
|
45
46
|
"build": "tsup src/index.ts --format esm,cjs --dts --clean",
|
|
46
47
|
"typecheck": "tsc --noEmit",
|
|
47
48
|
"test": "vitest run",
|
|
48
|
-
"test:watch": "vitest"
|
|
49
|
+
"test:watch": "vitest",
|
|
50
|
+
"prepublishOnly": "npm run build && npm run typecheck && npm test"
|
|
49
51
|
},
|
|
50
52
|
"peerDependencies": {
|
|
51
53
|
"ai": "^6.0.0"
|