ai-lcr 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md ADDED
@@ -0,0 +1,60 @@
1
+ # Changelog
2
+
3
+ All notable changes to `ai-lcr` are documented here. The format follows
4
+ [Keep a Changelog](https://keepachangelog.com/), and the project adheres to
5
+ [Semantic Versioning](https://semver.org/).
6
+
7
+ ## [0.2.3] — 2026-06-01
8
+
9
+ Release-quality and engine-correctness pass.
10
+
11
+ ### Fixed
12
+
13
+ - **Build was red on `main`.** `media.ts` set `CallRecord.baselineUsd` but the
14
+ type never declared it, so `tsc`/`npm run build` failed while `npm test`
15
+ (which doesn't typecheck) stayed green. `baselineUsd?: number` is now part of
16
+ `CallRecord`. The text router leaves it `undefined`; the media router sets it.
17
+ - **Failover used shared mutable state across concurrent requests.** The active
18
+ provider index was an instance field used both as the per-request loop cursor
19
+ and the loop's termination check. Two requests sharing one model instance
20
+ could clobber each other's cursor mid-flight (skipped providers, wrong
21
+ termination). Each request now walks providers on a fully local cursor; the
22
+ only shared state is a "where to start next" hint, read once and written once.
23
+ - **Cheapest provider was never re-probed under sustained traffic.** The
24
+ snap-back-to-cheapest timer reset on *every* call, so with calls more frequent
25
+ than `resetIntervalMs` it never fired — one blip pinned you on the expensive
26
+ fallback indefinitely (exactly when spend is highest). The timer now measures
27
+ from the last *failover*, so re-probe fires under load too.
28
+
29
+ ### Added
30
+
31
+ - **`classifyErrorKind(error)` and `RouteAttempt.kind`** (`"transient" | "auth"
32
+ | "billing" | "client"`). 401/403 (auth) and 402/out-of-credit (billing)
33
+ still fail over so the request survives — but they're now tagged distinctly
34
+ from transient 429/5xx, so a misconfigured key silently burning the pricey
35
+ fallback is something you can alert on instead of mistaking for healthy
36
+ routing.
37
+ - **Continuous Integration** (`.github/workflows/ci.yml`): `build` +
38
+ `typecheck` + `test` on Node 20 & 22, plus a `pack-smoke` job that installs
39
+ the actual `npm pack` tarball into a clean directory and imports it (ESM and
40
+ CJS) — catching dropped exports and broken `dist` that an in-repo test can't.
41
+ - **`prepublishOnly` gate**: `npm publish` now runs build + typecheck + test
42
+ first, so a red tree can't be published.
43
+ - **Public-export surface test** (`public-api.test.ts`): pins every runtime
44
+ export by name, so removing one fails loudly and adding one is deliberate.
45
+
46
+ ## [0.2.1] — earlier
47
+
48
+ - `onCall` correlated `CallRecord` + `formatCallRecord` one-liner for the text
49
+ router, extended to the media router (image/video).
50
+
51
+ ## [0.2.0] — earlier
52
+
53
+ - Observability: `onCall` / `CallRecord`, `formatCallRecord`.
54
+
55
+ ## [0.1.x] — earlier
56
+
57
+ - Dual ESM/CJS build. Media (image/video) least-cost routing with the Runware
58
+ and Kunavo adapters; cap-aware failover for the text router.
59
+
60
+ [0.2.3]: https://github.com/victorzhrn/ai-lcr/releases/tag/v0.2.3
package/dist/index.cjs CHANGED
@@ -24,8 +24,8 @@ __export(index_exports, {
24
24
  MEDIA_PRICING: () => MEDIA_PRICING,
25
25
  cheapestRoute: () => cheapestRoute,
26
26
  classifyError: () => classifyError,
27
+ classifyErrorKind: () => classifyErrorKind,
27
28
  comparePrices: () => comparePrices,
28
- createHttpSink: () => createHttpSink,
29
29
  createKunavoMediaAdapter: () => createKunavoMediaAdapter,
30
30
  createLCR: () => createLCR,
31
31
  createMediaLCR: () => createMediaLCR,
@@ -86,6 +86,16 @@ function classifyError(error) {
86
86
  const text = (e?.message ? String(e.message) : safeStringify(error)).toLowerCase();
87
87
  return RETRYABLE_PATTERNS.find((p) => text.includes(p)) ?? "error";
88
88
  }
89
+ var AUTH_STATUS = /* @__PURE__ */ new Set([401, 403]);
90
+ var BILLING_PATTERNS = ["insufficient", "credit", "quota", "billing", "payment required"];
91
+ function classifyErrorKind(error) {
92
+ const e = error;
93
+ const status = e?.statusCode ?? e?.status;
94
+ const text = (e?.message ? String(e.message) : safeStringify(error)).toLowerCase();
95
+ if (typeof status === "number" && AUTH_STATUS.has(status)) return "auth";
96
+ if (status === 402 || BILLING_PATTERNS.some((p) => text.includes(p))) return "billing";
97
+ return isRetryableError(error) ? "transient" : "client";
98
+ }
89
99
  var callSeq = 0;
90
100
  function newCallId() {
91
101
  const c = globalThis.crypto;
@@ -102,11 +112,20 @@ var LcrFallbackModel = class {
102
112
  }
103
113
  opts;
104
114
  specificationVersion = "v3";
105
- index = 0;
106
- lastReset = Date.now();
115
+ // Cross-request *hint* for where the next request starts: after a failover we
116
+ // remember the provider that worked so we don't re-probe a dead cheap one on
117
+ // every call. This is the ONLY shared mutable state — and crucially it is read
118
+ // once per request (snapshotted into a local cursor) and written once on
119
+ // settle, never used as a per-request loop bound. The within-request iteration
120
+ // is fully local, so concurrent requests can't corrupt each other's routing.
121
+ sticky = 0;
122
+ // When `sticky` was last advanced (a failover). The re-probe timer measures
123
+ // from THIS, not from the last call — so it fires under sustained traffic too,
124
+ // instead of being pushed forward forever by a busy stream of requests.
125
+ lastFailoverAt = Date.now();
107
126
  resetIntervalMs;
108
127
  get current() {
109
- return this.opts.providers[this.index];
128
+ return this.opts.providers[this.sticky];
110
129
  }
111
130
  get modelId() {
112
131
  return this.current.model.modelId;
@@ -117,14 +136,28 @@ var LcrFallbackModel = class {
117
136
  get supportedUrls() {
118
137
  return this.current.model.supportedUrls;
119
138
  }
120
- checkReset() {
121
- if (this.index !== 0 && Date.now() - this.lastReset >= this.resetIntervalMs) {
122
- this.index = 0;
139
+ /**
140
+ * Index a new request should start at. If we're parked on a non-cheapest
141
+ * provider and it's been `resetIntervalMs` since the failover, snap back to
142
+ * the cheapest and re-probe it — this is what lets routing recover to the
143
+ * cheap source even during continuous traffic.
144
+ */
145
+ startIndex() {
146
+ if (this.sticky !== 0 && Date.now() - this.lastFailoverAt >= this.resetIntervalMs) {
147
+ this.sticky = 0;
123
148
  }
124
- this.lastReset = Date.now();
149
+ return this.sticky;
125
150
  }
126
- switchNext() {
127
- this.index = (this.index + 1) % this.opts.providers.length;
151
+ /**
152
+ * A request settled on `winIndex`. Park there so the next request skips the
153
+ * providers we just learned are down. Stamp the failover time only when the
154
+ * parked provider actually CHANGES — so a steady stream of successful calls
155
+ * on the same fallback doesn't keep pushing the re-probe timer forward.
156
+ */
157
+ settleSticky(winIndex) {
158
+ if (winIndex === this.sticky) return;
159
+ this.sticky = winIndex;
160
+ this.lastFailoverAt = Date.now();
128
161
  }
129
162
  shouldRetry(error) {
130
163
  return (this.opts.shouldRetry ?? isRetryableError)(error);
@@ -138,23 +171,16 @@ var LcrFallbackModel = class {
138
171
  provider: provider.label,
139
172
  ok: false,
140
173
  latencyMs: Date.now() - attemptStart,
141
- errorClass: classifyError(error)
174
+ errorClass: classifyError(error),
175
+ kind: classifyErrorKind(error)
142
176
  });
143
177
  }
144
- /** Cost of one route for the given token counts; 0 if it has no price. */
145
- routeCost(p, inputTokens, outputTokens) {
146
- return p.cost ? inputTokens / 1e6 * p.cost.input + outputTokens / 1e6 * p.cost.output : 0;
147
- }
148
178
  /** Winner settled: record the attempt, fire `onCost` (compat) + `onCall`. */
149
179
  finalizeOk(ctx, provider, attemptStart, usage) {
150
180
  ctx.attempts.push({ provider: provider.label, ok: true, latencyMs: Date.now() - attemptStart });
151
181
  const inputTokens = usage?.inputTokens?.total ?? 0;
152
182
  const outputTokens = usage?.outputTokens?.total ?? 0;
153
- const costUsd = this.routeCost(provider, inputTokens, outputTokens);
154
- const baselineUsd = this.opts.providers.reduce(
155
- (max, p) => Math.max(max, this.routeCost(p, inputTokens, outputTokens)),
156
- costUsd
157
- );
183
+ const costUsd = provider.cost ? inputTokens / 1e6 * provider.cost.input + outputTokens / 1e6 * provider.cost.output : 0;
158
184
  this.opts.onCost?.({
159
185
  model: this.opts.modelName,
160
186
  provider: provider.label,
@@ -172,8 +198,7 @@ var LcrFallbackModel = class {
172
198
  latencyMs: Date.now() - ctx.startedAt,
173
199
  inputTokens,
174
200
  outputTokens,
175
- costUsd,
176
- baselineUsd
201
+ costUsd
177
202
  });
178
203
  }
179
204
  /** Every provider failed: fire `onCall` with no winner. */
@@ -188,20 +213,22 @@ var LcrFallbackModel = class {
188
213
  latencyMs: Date.now() - ctx.startedAt,
189
214
  inputTokens: 0,
190
215
  outputTokens: 0,
191
- costUsd: 0,
192
- baselineUsd: 0
216
+ costUsd: 0
193
217
  });
194
218
  }
195
219
  async doGenerate(options) {
196
- this.checkReset();
197
220
  const ctx = this.startCall();
198
- const start = this.index;
221
+ const providers = this.opts.providers;
222
+ const n = providers.length;
223
+ const start = this.startIndex();
199
224
  let lastError;
200
- for (; ; ) {
201
- const provider = this.current;
225
+ for (let tried = 0; tried < n; tried++) {
226
+ const idx = (start + tried) % n;
227
+ const provider = providers[idx];
202
228
  const attemptStart = Date.now();
203
229
  try {
204
230
  const result = await provider.model.doGenerate(options);
231
+ this.settleSticky(idx);
205
232
  this.finalizeOk(ctx, provider, attemptStart, result.usage);
206
233
  return result;
207
234
  } catch (error) {
@@ -213,29 +240,30 @@ var LcrFallbackModel = class {
213
240
  }
214
241
  this.opts.onError?.(error, provider.label);
215
242
  this.recordFail(ctx, provider, attemptStart, error);
216
- this.switchNext();
217
- if (this.index === start) {
218
- this.finalizeFail(ctx);
219
- throw lastError;
220
- }
221
243
  }
222
244
  }
245
+ this.finalizeFail(ctx);
246
+ throw lastError;
223
247
  }
224
248
  async doStream(options) {
225
- this.checkReset();
226
- return this.doStreamWithCtx(options, this.startCall());
249
+ return this.doStreamWithCtx(options, this.startCall(), this.startIndex(), 0);
227
250
  }
228
- // The stream's failover recursion re-enters here with the SAME `ctx`, so a
229
- // mid-stream switch keeps appending to one CallRecord instead of starting a
230
- // fresh one. `finalizeOk`/`finalizeFail` fire exactly once per outer request.
231
- async doStreamWithCtx(options, ctx) {
251
+ // The stream's failover recursion re-enters here with the SAME `ctx` and a
252
+ // threaded-through local cursor (`idx`/`tried`), so a mid-stream switch keeps
253
+ // appending to one CallRecord and bounds itself on the local `tried` count —
254
+ // never on shared instance state. `finalizeOk`/`finalizeFail` fire exactly
255
+ // once per outer request.
256
+ async doStreamWithCtx(options, ctx, startIdx, alreadyTried) {
232
257
  const self = this;
233
- const start = this.index;
258
+ const providers = this.opts.providers;
259
+ const n = providers.length;
234
260
  let result;
235
261
  let serving;
236
262
  let servingStart;
263
+ let idx = startIdx;
264
+ let tried = alreadyTried;
237
265
  for (; ; ) {
238
- serving = this.current;
266
+ serving = providers[idx];
239
267
  servingStart = Date.now();
240
268
  try {
241
269
  result = await serving.model.doStream(options);
@@ -248,15 +276,18 @@ var LcrFallbackModel = class {
248
276
  }
249
277
  this.opts.onError?.(error, serving.label);
250
278
  this.recordFail(ctx, serving, servingStart, error);
251
- this.switchNext();
252
- if (this.index === start) {
279
+ tried++;
280
+ if (tried >= n) {
253
281
  this.finalizeFail(ctx);
254
282
  throw error;
255
283
  }
284
+ idx = (idx + 1) % n;
256
285
  }
257
286
  }
258
287
  const servingProvider = serving;
259
288
  const servingAttemptStart = servingStart;
289
+ const servingIdx = idx;
290
+ const triedBeforeServing = tried;
260
291
  let usage;
261
292
  let streamedAny = false;
262
293
  const stream = new ReadableStream({
@@ -275,20 +306,26 @@ var LcrFallbackModel = class {
275
306
  controller.enqueue(value);
276
307
  if (value.type !== "stream-start") streamedAny = true;
277
308
  }
309
+ self.settleSticky(servingIdx);
278
310
  self.finalizeOk(ctx, servingProvider, servingAttemptStart, usage);
279
311
  controller.close();
280
312
  } catch (error) {
281
313
  self.opts.onError?.(error, servingProvider.label);
282
314
  self.recordFail(ctx, servingProvider, servingAttemptStart, error);
283
315
  if (!streamedAny) {
284
- self.switchNext();
285
- if (self.index === start) {
316
+ const nextTried = triedBeforeServing + 1;
317
+ if (nextTried >= n) {
286
318
  self.finalizeFail(ctx);
287
319
  controller.error(error);
288
320
  return;
289
321
  }
290
322
  try {
291
- const next = await self.doStreamWithCtx(options, ctx);
323
+ const next = await self.doStreamWithCtx(
324
+ options,
325
+ ctx,
326
+ (servingIdx + 1) % n,
327
+ nextTried
328
+ );
292
329
  const nextReader = next.stream.getReader();
293
330
  try {
294
331
  for (; ; ) {
@@ -345,40 +382,6 @@ function formatCallRecord(record, opts = {}) {
345
382
  return line;
346
383
  }
347
384
 
348
- // src/sink.ts
349
- function createHttpSink(options) {
350
- const {
351
- url,
352
- headers,
353
- project,
354
- dispatch = (task) => {
355
- void task();
356
- },
357
- fetchImpl,
358
- onError
359
- } = options;
360
- const doFetch = fetchImpl ?? globalThis.fetch;
361
- return (record) => {
362
- if (!doFetch) {
363
- onError?.(new Error("ai-lcr: no fetch available for createHttpSink"));
364
- return;
365
- }
366
- const payload = project ? { project, ...record } : record;
367
- dispatch(async () => {
368
- try {
369
- await doFetch(url, {
370
- method: "POST",
371
- headers: { "content-type": "application/json", ...headers },
372
- body: JSON.stringify(payload),
373
- keepalive: true
374
- });
375
- } catch (err) {
376
- onError?.(err);
377
- }
378
- });
379
- };
380
- }
381
-
382
385
  // src/media.ts
383
386
  var DEFAULT_REFERENCE = {
384
387
  image: { width: 1920, height: 1080 },
@@ -822,8 +825,8 @@ function createLCR(config) {
822
825
  MEDIA_PRICING,
823
826
  cheapestRoute,
824
827
  classifyError,
828
+ classifyErrorKind,
825
829
  comparePrices,
826
- createHttpSink,
827
830
  createKunavoMediaAdapter,
828
831
  createLCR,
829
832
  createMediaLCR,
package/dist/index.d.cts CHANGED
@@ -5,8 +5,10 @@ import { LanguageModelV3 } from '@ai-sdk/provider';
5
5
  *
6
6
  * A LanguageModelV3 that wraps an ordered, cheapest-first list of providers:
7
7
  * it serves from the first healthy one, switches to the next on a retryable
8
- * error (streaming-safe), and snaps back to the cheapest after an idle window.
9
- * It also computes per-call cost from each provider's price and fires `onCost`.
8
+ * error (streaming-safe), and periodically re-probes the cheapest provider
9
+ * (every `resetIntervalMs` after a failover under load too, not only when
10
+ * idle). It also computes per-call cost from each provider's price and fires
11
+ * `onCost`.
10
12
  *
11
13
  * The switching loop is adapted from `ai-fallback` (MIT, © remorses) — its
12
14
  * streaming-safe fallback approach — reimplemented here so ai-lcr owns its core
@@ -28,6 +30,17 @@ interface CostEvent {
28
30
  /** Computed from the serving provider's `cost`; 0 if no price was given. */
29
31
  costUsd: number;
30
32
  }
33
+ /**
34
+ * Coarse error category for a failed attempt — distinct from `errorClass`
35
+ * (which is the raw status/pattern). Use it to alert: `"auth"` and `"billing"`
36
+ * mean a config/account problem masquerading as a healthy failover, the thing
37
+ * you want to page on rather than silently keep burning the pricey fallback.
38
+ * - "transient": rate limit / overload / 5xx — expected, self-healing.
39
+ * - "auth": 401 / 403 — a misconfigured or revoked key.
40
+ * - "billing": 402 / out-of-credit / quota — account needs topping up.
41
+ * - "client": a non-retryable caller error (e.g. 400 bad request).
42
+ */
43
+ type ErrorKind = "transient" | "auth" | "billing" | "client";
31
44
  /** One provider attempt within a single request. */
32
45
  interface RouteAttempt {
33
46
  /** Provider label that was tried (e.g. "tokenmart"). */
@@ -38,6 +51,8 @@ interface RouteAttempt {
38
51
  latencyMs: number;
39
52
  /** Normalized failure reason when `ok` is false (e.g. "502", "rate_limit", "timeout"). */
40
53
  errorClass?: string;
54
+ /** Coarse category of the failure when `ok` is false. See {@link ErrorKind}. */
55
+ kind?: ErrorKind;
41
56
  }
42
57
  /**
43
58
  * One settled request, with its full failover chain. Emitted exactly once per
@@ -65,13 +80,12 @@ interface CallRecord {
65
80
  /** Computed from the winner's `cost`; 0 if no price was given or the call failed. */
66
81
  costUsd: number;
67
82
  /**
68
- * What these same tokens would have cost at the **most expensive** configured
69
- * provider for this model the "if you never routed cheap" baseline. Savings
70
- * = `baselineUsd - costUsd`. Equals `costUsd` (savings 0) when prices are
71
- * missing or the priciest route is the one that served. Self-contained: no
72
- * external price table needed.
83
+ * What the same request would have cost on the most expensive configured
84
+ * provider the savings baseline (`baselineUsd - costUsd`). Set by the media
85
+ * router; the text router omits it (left undefined) until a per-call text
86
+ * baseline lands. Optional so both routers share one {@link CallRecord} shape.
73
87
  */
74
- baselineUsd: number;
88
+ baselineUsd?: number;
75
89
  }
76
90
  /**
77
91
  * Normalize an error into a short, log-friendly class for {@link CallRecord}.
@@ -80,6 +94,13 @@ interface CallRecord {
80
94
  * Reuses the same signals as {@link isRetryableError} — no new vocabulary.
81
95
  */
82
96
  declare function classifyError(error: unknown): string;
97
+ /**
98
+ * Categorize an error for alerting. Orthogonal to {@link isRetryableError}
99
+ * (which decides *whether* to fail over) — this decides *how alarming* the
100
+ * failover is. A run of `"auth"`/`"billing"` attempts means you're silently
101
+ * burning the pricey fallback because a key/account is broken: page on it.
102
+ */
103
+ declare function classifyErrorKind(error: unknown): ErrorKind;
83
104
 
84
105
  /**
85
106
  * Human-readable one-liner for a {@link CallRecord}.
@@ -101,54 +122,6 @@ interface FormatOptions {
101
122
  }
102
123
  declare function formatCallRecord(record: CallRecord, opts?: FormatOptions): string;
103
124
 
104
- /**
105
- * Optional HTTP sink for `onCall` — ship each {@link CallRecord} as JSON to a
106
- * collector (e.g. a self-hosted ai-lcr-dashboard `/api/ingest`, or any endpoint
107
- * that accepts the CallRecord shape).
108
- *
109
- * Fully optional and dashboard-agnostic: omit it and ai-lcr stores nothing;
110
- * point `url` at whatever you run. Logging must never break your app, so a
111
- * failed POST is swallowed by default (surface it via `onError` if you want).
112
- *
113
- * import { createLCR, createHttpSink } from "ai-lcr";
114
- * import { after } from "next/server"; // serverless: don't block the response
115
- *
116
- * const lcr = createLCR({
117
- * models: { ... },
118
- * onCall: createHttpSink({
119
- * url: process.env.LCR_INGEST_URL + "/api/ingest",
120
- * headers: { authorization: `Bearer ${process.env.LCR_INGEST_KEY}` },
121
- * project: process.env.LCR_PROJECT,
122
- * dispatch: after, // run after the response is sent
123
- * }),
124
- * });
125
- */
126
-
127
- interface HttpSinkOptions {
128
- /** Where to POST each CallRecord (a collector that accepts the JSON shape). */
129
- url: string;
130
- /** Extra headers, e.g. `{ authorization: ` + "`Bearer ${key}`" + ` }`. */
131
- headers?: Record<string, string>;
132
- /** Optional tenant/project tag merged into each payload (`{ project, ...record }`). */
133
- project?: string;
134
- /**
135
- * Wrap the dispatch so it survives a serverless function returning. On
136
- * Next.js pass `after` from "next/server"; elsewhere pass a `waitUntil`-style
137
- * function. Defaults to running immediately — correct for long-lived servers,
138
- * but on serverless an un-awaited POST may be cut off, so pass `after`.
139
- */
140
- dispatch?: (task: () => void | Promise<void>) => void;
141
- /** Custom fetch (tests / runtimes without a global `fetch`). */
142
- fetchImpl?: typeof fetch;
143
- /** Called if the POST fails. Failures are swallowed by default. */
144
- onError?: (error: unknown) => void;
145
- }
146
- /**
147
- * Build an `onCall` handler that POSTs each {@link CallRecord} to `url`.
148
- * Returns a plain `(record) => void` — pass it straight to `createLCR`'s `onCall`.
149
- */
150
- declare function createHttpSink(options: HttpSinkOptions): (record: CallRecord) => void;
151
-
152
125
  /**
153
126
  * ai-lcr media routing — Least Cost Routing for image & video models.
154
127
  *
@@ -438,4 +411,4 @@ type LCRRouter = (modelName: string) => LanguageModelV3;
438
411
  */
439
412
  declare function createLCR(config: LCRConfig): LCRRouter;
440
413
 
441
- export { type CallRecord, type CostEvent, DEFAULT_REFERENCE, type FormatOptions, type HttpSinkOptions, type LCRConfig, type LCRRouter, MEDIA_PRICING, type MediaAdapter, type MediaCostEvent, type MediaGenerateRequest, type MediaGenerateResult, type MediaLCRConfig, type MediaModality, type MediaModelDef, type MediaOutput, type MediaPricing, type MediaRegistry, type MediaRoute, type MediaRunResult, type MediaUnit, type PriceComparisonRow, type ProviderCost, type ProviderEntry, type RankedRoute, type ReferenceSpec, type RouteAttempt, cheapestRoute, classifyError, comparePrices, createHttpSink, createKunavoMediaAdapter, createLCR, createMediaLCR, createRunwareMediaAdapter, formatCallRecord, normalizedCents, rankRoutes, referenceMegapixels };
414
+ export { type CallRecord, type CostEvent, DEFAULT_REFERENCE, type ErrorKind, type FormatOptions, type LCRConfig, type LCRRouter, MEDIA_PRICING, type MediaAdapter, type MediaCostEvent, type MediaGenerateRequest, type MediaGenerateResult, type MediaLCRConfig, type MediaModality, type MediaModelDef, type MediaOutput, type MediaPricing, type MediaRegistry, type MediaRoute, type MediaRunResult, type MediaUnit, type PriceComparisonRow, type ProviderCost, type ProviderEntry, type RankedRoute, type ReferenceSpec, type RouteAttempt, cheapestRoute, classifyError, classifyErrorKind, comparePrices, createKunavoMediaAdapter, createLCR, createMediaLCR, createRunwareMediaAdapter, formatCallRecord, normalizedCents, rankRoutes, referenceMegapixels };
package/dist/index.d.ts CHANGED
@@ -5,8 +5,10 @@ import { LanguageModelV3 } from '@ai-sdk/provider';
5
5
  *
6
6
  * A LanguageModelV3 that wraps an ordered, cheapest-first list of providers:
7
7
  * it serves from the first healthy one, switches to the next on a retryable
8
- * error (streaming-safe), and snaps back to the cheapest after an idle window.
9
- * It also computes per-call cost from each provider's price and fires `onCost`.
8
+ * error (streaming-safe), and periodically re-probes the cheapest provider
9
+ * (every `resetIntervalMs` after a failover under load too, not only when
10
+ * idle). It also computes per-call cost from each provider's price and fires
11
+ * `onCost`.
10
12
  *
11
13
  * The switching loop is adapted from `ai-fallback` (MIT, © remorses) — its
12
14
  * streaming-safe fallback approach — reimplemented here so ai-lcr owns its core
@@ -28,6 +30,17 @@ interface CostEvent {
28
30
  /** Computed from the serving provider's `cost`; 0 if no price was given. */
29
31
  costUsd: number;
30
32
  }
33
+ /**
34
+ * Coarse error category for a failed attempt — distinct from `errorClass`
35
+ * (which is the raw status/pattern). Use it to alert: `"auth"` and `"billing"`
36
+ * mean a config/account problem masquerading as a healthy failover, the thing
37
+ * you want to page on rather than silently keep burning the pricey fallback.
38
+ * - "transient": rate limit / overload / 5xx — expected, self-healing.
39
+ * - "auth": 401 / 403 — a misconfigured or revoked key.
40
+ * - "billing": 402 / out-of-credit / quota — account needs topping up.
41
+ * - "client": a non-retryable caller error (e.g. 400 bad request).
42
+ */
43
+ type ErrorKind = "transient" | "auth" | "billing" | "client";
31
44
  /** One provider attempt within a single request. */
32
45
  interface RouteAttempt {
33
46
  /** Provider label that was tried (e.g. "tokenmart"). */
@@ -38,6 +51,8 @@ interface RouteAttempt {
38
51
  latencyMs: number;
39
52
  /** Normalized failure reason when `ok` is false (e.g. "502", "rate_limit", "timeout"). */
40
53
  errorClass?: string;
54
+ /** Coarse category of the failure when `ok` is false. See {@link ErrorKind}. */
55
+ kind?: ErrorKind;
41
56
  }
42
57
  /**
43
58
  * One settled request, with its full failover chain. Emitted exactly once per
@@ -65,13 +80,12 @@ interface CallRecord {
65
80
  /** Computed from the winner's `cost`; 0 if no price was given or the call failed. */
66
81
  costUsd: number;
67
82
  /**
68
- * What these same tokens would have cost at the **most expensive** configured
69
- * provider for this model the "if you never routed cheap" baseline. Savings
70
- * = `baselineUsd - costUsd`. Equals `costUsd` (savings 0) when prices are
71
- * missing or the priciest route is the one that served. Self-contained: no
72
- * external price table needed.
83
+ * What the same request would have cost on the most expensive configured
84
+ * provider the savings baseline (`baselineUsd - costUsd`). Set by the media
85
+ * router; the text router omits it (left undefined) until a per-call text
86
+ * baseline lands. Optional so both routers share one {@link CallRecord} shape.
73
87
  */
74
- baselineUsd: number;
88
+ baselineUsd?: number;
75
89
  }
76
90
  /**
77
91
  * Normalize an error into a short, log-friendly class for {@link CallRecord}.
@@ -80,6 +94,13 @@ interface CallRecord {
80
94
  * Reuses the same signals as {@link isRetryableError} — no new vocabulary.
81
95
  */
82
96
  declare function classifyError(error: unknown): string;
97
+ /**
98
+ * Categorize an error for alerting. Orthogonal to {@link isRetryableError}
99
+ * (which decides *whether* to fail over) — this decides *how alarming* the
100
+ * failover is. A run of `"auth"`/`"billing"` attempts means you're silently
101
+ * burning the pricey fallback because a key/account is broken: page on it.
102
+ */
103
+ declare function classifyErrorKind(error: unknown): ErrorKind;
83
104
 
84
105
  /**
85
106
  * Human-readable one-liner for a {@link CallRecord}.
@@ -101,54 +122,6 @@ interface FormatOptions {
101
122
  }
102
123
  declare function formatCallRecord(record: CallRecord, opts?: FormatOptions): string;
103
124
 
104
- /**
105
- * Optional HTTP sink for `onCall` — ship each {@link CallRecord} as JSON to a
106
- * collector (e.g. a self-hosted ai-lcr-dashboard `/api/ingest`, or any endpoint
107
- * that accepts the CallRecord shape).
108
- *
109
- * Fully optional and dashboard-agnostic: omit it and ai-lcr stores nothing;
110
- * point `url` at whatever you run. Logging must never break your app, so a
111
- * failed POST is swallowed by default (surface it via `onError` if you want).
112
- *
113
- * import { createLCR, createHttpSink } from "ai-lcr";
114
- * import { after } from "next/server"; // serverless: don't block the response
115
- *
116
- * const lcr = createLCR({
117
- * models: { ... },
118
- * onCall: createHttpSink({
119
- * url: process.env.LCR_INGEST_URL + "/api/ingest",
120
- * headers: { authorization: `Bearer ${process.env.LCR_INGEST_KEY}` },
121
- * project: process.env.LCR_PROJECT,
122
- * dispatch: after, // run after the response is sent
123
- * }),
124
- * });
125
- */
126
-
127
- interface HttpSinkOptions {
128
- /** Where to POST each CallRecord (a collector that accepts the JSON shape). */
129
- url: string;
130
- /** Extra headers, e.g. `{ authorization: ` + "`Bearer ${key}`" + ` }`. */
131
- headers?: Record<string, string>;
132
- /** Optional tenant/project tag merged into each payload (`{ project, ...record }`). */
133
- project?: string;
134
- /**
135
- * Wrap the dispatch so it survives a serverless function returning. On
136
- * Next.js pass `after` from "next/server"; elsewhere pass a `waitUntil`-style
137
- * function. Defaults to running immediately — correct for long-lived servers,
138
- * but on serverless an un-awaited POST may be cut off, so pass `after`.
139
- */
140
- dispatch?: (task: () => void | Promise<void>) => void;
141
- /** Custom fetch (tests / runtimes without a global `fetch`). */
142
- fetchImpl?: typeof fetch;
143
- /** Called if the POST fails. Failures are swallowed by default. */
144
- onError?: (error: unknown) => void;
145
- }
146
- /**
147
- * Build an `onCall` handler that POSTs each {@link CallRecord} to `url`.
148
- * Returns a plain `(record) => void` — pass it straight to `createLCR`'s `onCall`.
149
- */
150
- declare function createHttpSink(options: HttpSinkOptions): (record: CallRecord) => void;
151
-
152
125
  /**
153
126
  * ai-lcr media routing — Least Cost Routing for image & video models.
154
127
  *
@@ -438,4 +411,4 @@ type LCRRouter = (modelName: string) => LanguageModelV3;
438
411
  */
439
412
  declare function createLCR(config: LCRConfig): LCRRouter;
440
413
 
441
- export { type CallRecord, type CostEvent, DEFAULT_REFERENCE, type FormatOptions, type HttpSinkOptions, type LCRConfig, type LCRRouter, MEDIA_PRICING, type MediaAdapter, type MediaCostEvent, type MediaGenerateRequest, type MediaGenerateResult, type MediaLCRConfig, type MediaModality, type MediaModelDef, type MediaOutput, type MediaPricing, type MediaRegistry, type MediaRoute, type MediaRunResult, type MediaUnit, type PriceComparisonRow, type ProviderCost, type ProviderEntry, type RankedRoute, type ReferenceSpec, type RouteAttempt, cheapestRoute, classifyError, comparePrices, createHttpSink, createKunavoMediaAdapter, createLCR, createMediaLCR, createRunwareMediaAdapter, formatCallRecord, normalizedCents, rankRoutes, referenceMegapixels };
414
+ export { type CallRecord, type CostEvent, DEFAULT_REFERENCE, type ErrorKind, type FormatOptions, type LCRConfig, type LCRRouter, MEDIA_PRICING, type MediaAdapter, type MediaCostEvent, type MediaGenerateRequest, type MediaGenerateResult, type MediaLCRConfig, type MediaModality, type MediaModelDef, type MediaOutput, type MediaPricing, type MediaRegistry, type MediaRoute, type MediaRunResult, type MediaUnit, type PriceComparisonRow, type ProviderCost, type ProviderEntry, type RankedRoute, type ReferenceSpec, type RouteAttempt, cheapestRoute, classifyError, classifyErrorKind, comparePrices, createKunavoMediaAdapter, createLCR, createMediaLCR, createRunwareMediaAdapter, formatCallRecord, normalizedCents, rankRoutes, referenceMegapixels };
package/dist/index.js CHANGED
@@ -47,6 +47,16 @@ function classifyError(error) {
47
47
  const text = (e?.message ? String(e.message) : safeStringify(error)).toLowerCase();
48
48
  return RETRYABLE_PATTERNS.find((p) => text.includes(p)) ?? "error";
49
49
  }
50
+ var AUTH_STATUS = /* @__PURE__ */ new Set([401, 403]);
51
+ var BILLING_PATTERNS = ["insufficient", "credit", "quota", "billing", "payment required"];
52
+ function classifyErrorKind(error) {
53
+ const e = error;
54
+ const status = e?.statusCode ?? e?.status;
55
+ const text = (e?.message ? String(e.message) : safeStringify(error)).toLowerCase();
56
+ if (typeof status === "number" && AUTH_STATUS.has(status)) return "auth";
57
+ if (status === 402 || BILLING_PATTERNS.some((p) => text.includes(p))) return "billing";
58
+ return isRetryableError(error) ? "transient" : "client";
59
+ }
50
60
  var callSeq = 0;
51
61
  function newCallId() {
52
62
  const c = globalThis.crypto;
@@ -63,11 +73,20 @@ var LcrFallbackModel = class {
63
73
  }
64
74
  opts;
65
75
  specificationVersion = "v3";
66
- index = 0;
67
- lastReset = Date.now();
76
+ // Cross-request *hint* for where the next request starts: after a failover we
77
+ // remember the provider that worked so we don't re-probe a dead cheap one on
78
+ // every call. This is the ONLY shared mutable state — and crucially it is read
79
+ // once per request (snapshotted into a local cursor) and written once on
80
+ // settle, never used as a per-request loop bound. The within-request iteration
81
+ // is fully local, so concurrent requests can't corrupt each other's routing.
82
+ sticky = 0;
83
+ // When `sticky` was last advanced (a failover). The re-probe timer measures
84
+ // from THIS, not from the last call — so it fires under sustained traffic too,
85
+ // instead of being pushed forward forever by a busy stream of requests.
86
+ lastFailoverAt = Date.now();
68
87
  resetIntervalMs;
69
88
  get current() {
70
- return this.opts.providers[this.index];
89
+ return this.opts.providers[this.sticky];
71
90
  }
72
91
  get modelId() {
73
92
  return this.current.model.modelId;
@@ -78,14 +97,28 @@ var LcrFallbackModel = class {
78
97
  get supportedUrls() {
79
98
  return this.current.model.supportedUrls;
80
99
  }
81
- checkReset() {
82
- if (this.index !== 0 && Date.now() - this.lastReset >= this.resetIntervalMs) {
83
- this.index = 0;
100
+ /**
101
+ * Index a new request should start at. If we're parked on a non-cheapest
102
+ * provider and it's been `resetIntervalMs` since the failover, snap back to
103
+ * the cheapest and re-probe it — this is what lets routing recover to the
104
+ * cheap source even during continuous traffic.
105
+ */
106
+ startIndex() {
107
+ if (this.sticky !== 0 && Date.now() - this.lastFailoverAt >= this.resetIntervalMs) {
108
+ this.sticky = 0;
84
109
  }
85
- this.lastReset = Date.now();
110
+ return this.sticky;
86
111
  }
87
- switchNext() {
88
- this.index = (this.index + 1) % this.opts.providers.length;
112
+ /**
113
+ * A request settled on `winIndex`. Park there so the next request skips the
114
+ * providers we just learned are down. Stamp the failover time only when the
115
+ * parked provider actually CHANGES — so a steady stream of successful calls
116
+ * on the same fallback doesn't keep pushing the re-probe timer forward.
117
+ */
118
+ settleSticky(winIndex) {
119
+ if (winIndex === this.sticky) return;
120
+ this.sticky = winIndex;
121
+ this.lastFailoverAt = Date.now();
89
122
  }
90
123
  shouldRetry(error) {
91
124
  return (this.opts.shouldRetry ?? isRetryableError)(error);
@@ -99,23 +132,16 @@ var LcrFallbackModel = class {
99
132
  provider: provider.label,
100
133
  ok: false,
101
134
  latencyMs: Date.now() - attemptStart,
102
- errorClass: classifyError(error)
135
+ errorClass: classifyError(error),
136
+ kind: classifyErrorKind(error)
103
137
  });
104
138
  }
105
- /** Cost of one route for the given token counts; 0 if it has no price. */
106
- routeCost(p, inputTokens, outputTokens) {
107
- return p.cost ? inputTokens / 1e6 * p.cost.input + outputTokens / 1e6 * p.cost.output : 0;
108
- }
109
139
  /** Winner settled: record the attempt, fire `onCost` (compat) + `onCall`. */
110
140
  finalizeOk(ctx, provider, attemptStart, usage) {
111
141
  ctx.attempts.push({ provider: provider.label, ok: true, latencyMs: Date.now() - attemptStart });
112
142
  const inputTokens = usage?.inputTokens?.total ?? 0;
113
143
  const outputTokens = usage?.outputTokens?.total ?? 0;
114
- const costUsd = this.routeCost(provider, inputTokens, outputTokens);
115
- const baselineUsd = this.opts.providers.reduce(
116
- (max, p) => Math.max(max, this.routeCost(p, inputTokens, outputTokens)),
117
- costUsd
118
- );
144
+ const costUsd = provider.cost ? inputTokens / 1e6 * provider.cost.input + outputTokens / 1e6 * provider.cost.output : 0;
119
145
  this.opts.onCost?.({
120
146
  model: this.opts.modelName,
121
147
  provider: provider.label,
@@ -133,8 +159,7 @@ var LcrFallbackModel = class {
133
159
  latencyMs: Date.now() - ctx.startedAt,
134
160
  inputTokens,
135
161
  outputTokens,
136
- costUsd,
137
- baselineUsd
162
+ costUsd
138
163
  });
139
164
  }
140
165
  /** Every provider failed: fire `onCall` with no winner. */
@@ -149,20 +174,22 @@ var LcrFallbackModel = class {
149
174
  latencyMs: Date.now() - ctx.startedAt,
150
175
  inputTokens: 0,
151
176
  outputTokens: 0,
152
- costUsd: 0,
153
- baselineUsd: 0
177
+ costUsd: 0
154
178
  });
155
179
  }
156
180
  async doGenerate(options) {
157
- this.checkReset();
158
181
  const ctx = this.startCall();
159
- const start = this.index;
182
+ const providers = this.opts.providers;
183
+ const n = providers.length;
184
+ const start = this.startIndex();
160
185
  let lastError;
161
- for (; ; ) {
162
- const provider = this.current;
186
+ for (let tried = 0; tried < n; tried++) {
187
+ const idx = (start + tried) % n;
188
+ const provider = providers[idx];
163
189
  const attemptStart = Date.now();
164
190
  try {
165
191
  const result = await provider.model.doGenerate(options);
192
+ this.settleSticky(idx);
166
193
  this.finalizeOk(ctx, provider, attemptStart, result.usage);
167
194
  return result;
168
195
  } catch (error) {
@@ -174,29 +201,30 @@ var LcrFallbackModel = class {
174
201
  }
175
202
  this.opts.onError?.(error, provider.label);
176
203
  this.recordFail(ctx, provider, attemptStart, error);
177
- this.switchNext();
178
- if (this.index === start) {
179
- this.finalizeFail(ctx);
180
- throw lastError;
181
- }
182
204
  }
183
205
  }
206
+ this.finalizeFail(ctx);
207
+ throw lastError;
184
208
  }
185
209
  async doStream(options) {
186
- this.checkReset();
187
- return this.doStreamWithCtx(options, this.startCall());
210
+ return this.doStreamWithCtx(options, this.startCall(), this.startIndex(), 0);
188
211
  }
189
- // The stream's failover recursion re-enters here with the SAME `ctx`, so a
190
- // mid-stream switch keeps appending to one CallRecord instead of starting a
191
- // fresh one. `finalizeOk`/`finalizeFail` fire exactly once per outer request.
192
- async doStreamWithCtx(options, ctx) {
212
+ // The stream's failover recursion re-enters here with the SAME `ctx` and a
213
+ // threaded-through local cursor (`idx`/`tried`), so a mid-stream switch keeps
214
+ // appending to one CallRecord and bounds itself on the local `tried` count —
215
+ // never on shared instance state. `finalizeOk`/`finalizeFail` fire exactly
216
+ // once per outer request.
217
+ async doStreamWithCtx(options, ctx, startIdx, alreadyTried) {
193
218
  const self = this;
194
- const start = this.index;
219
+ const providers = this.opts.providers;
220
+ const n = providers.length;
195
221
  let result;
196
222
  let serving;
197
223
  let servingStart;
224
+ let idx = startIdx;
225
+ let tried = alreadyTried;
198
226
  for (; ; ) {
199
- serving = this.current;
227
+ serving = providers[idx];
200
228
  servingStart = Date.now();
201
229
  try {
202
230
  result = await serving.model.doStream(options);
@@ -209,15 +237,18 @@ var LcrFallbackModel = class {
209
237
  }
210
238
  this.opts.onError?.(error, serving.label);
211
239
  this.recordFail(ctx, serving, servingStart, error);
212
- this.switchNext();
213
- if (this.index === start) {
240
+ tried++;
241
+ if (tried >= n) {
214
242
  this.finalizeFail(ctx);
215
243
  throw error;
216
244
  }
245
+ idx = (idx + 1) % n;
217
246
  }
218
247
  }
219
248
  const servingProvider = serving;
220
249
  const servingAttemptStart = servingStart;
250
+ const servingIdx = idx;
251
+ const triedBeforeServing = tried;
221
252
  let usage;
222
253
  let streamedAny = false;
223
254
  const stream = new ReadableStream({
@@ -236,20 +267,26 @@ var LcrFallbackModel = class {
236
267
  controller.enqueue(value);
237
268
  if (value.type !== "stream-start") streamedAny = true;
238
269
  }
270
+ self.settleSticky(servingIdx);
239
271
  self.finalizeOk(ctx, servingProvider, servingAttemptStart, usage);
240
272
  controller.close();
241
273
  } catch (error) {
242
274
  self.opts.onError?.(error, servingProvider.label);
243
275
  self.recordFail(ctx, servingProvider, servingAttemptStart, error);
244
276
  if (!streamedAny) {
245
- self.switchNext();
246
- if (self.index === start) {
277
+ const nextTried = triedBeforeServing + 1;
278
+ if (nextTried >= n) {
247
279
  self.finalizeFail(ctx);
248
280
  controller.error(error);
249
281
  return;
250
282
  }
251
283
  try {
252
- const next = await self.doStreamWithCtx(options, ctx);
284
+ const next = await self.doStreamWithCtx(
285
+ options,
286
+ ctx,
287
+ (servingIdx + 1) % n,
288
+ nextTried
289
+ );
253
290
  const nextReader = next.stream.getReader();
254
291
  try {
255
292
  for (; ; ) {
@@ -306,40 +343,6 @@ function formatCallRecord(record, opts = {}) {
306
343
  return line;
307
344
  }
308
345
 
309
- // src/sink.ts
310
- function createHttpSink(options) {
311
- const {
312
- url,
313
- headers,
314
- project,
315
- dispatch = (task) => {
316
- void task();
317
- },
318
- fetchImpl,
319
- onError
320
- } = options;
321
- const doFetch = fetchImpl ?? globalThis.fetch;
322
- return (record) => {
323
- if (!doFetch) {
324
- onError?.(new Error("ai-lcr: no fetch available for createHttpSink"));
325
- return;
326
- }
327
- const payload = project ? { project, ...record } : record;
328
- dispatch(async () => {
329
- try {
330
- await doFetch(url, {
331
- method: "POST",
332
- headers: { "content-type": "application/json", ...headers },
333
- body: JSON.stringify(payload),
334
- keepalive: true
335
- });
336
- } catch (err) {
337
- onError?.(err);
338
- }
339
- });
340
- };
341
- }
342
-
343
346
  // src/media.ts
344
347
  var DEFAULT_REFERENCE = {
345
348
  image: { width: 1920, height: 1080 },
@@ -782,8 +785,8 @@ export {
782
785
  MEDIA_PRICING,
783
786
  cheapestRoute,
784
787
  classifyError,
788
+ classifyErrorKind,
785
789
  comparePrices,
786
- createHttpSink,
787
790
  createKunavoMediaAdapter,
788
791
  createLCR,
789
792
  createMediaLCR,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ai-lcr",
3
- "version": "0.2.1",
3
+ "version": "0.2.3",
4
4
  "description": "Least Cost Routing for LLMs — route every model call to the cheapest available provider, fall back automatically, and track real cost. Built for the Vercel AI SDK.",
5
5
  "keywords": [
6
6
  "ai",
@@ -39,13 +39,15 @@
39
39
  "files": [
40
40
  "dist",
41
41
  "README.md",
42
- "LICENSE"
42
+ "LICENSE",
43
+ "CHANGELOG.md"
43
44
  ],
44
45
  "scripts": {
45
46
  "build": "tsup src/index.ts --format esm,cjs --dts --clean",
46
47
  "typecheck": "tsc --noEmit",
47
48
  "test": "vitest run",
48
- "test:watch": "vitest"
49
+ "test:watch": "vitest",
50
+ "prepublishOnly": "npm run build && npm run typecheck && npm test"
49
51
  },
50
52
  "peerDependencies": {
51
53
  "ai": "^6.0.0"