npm - ai-lcr - Versions diffs - 0.2.1 → 0.2.3 - Mend

ai-lcr 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,60 @@
+# Changelog
+All notable changes to `ai-lcr` are documented here. The format follows
+[Keep a Changelog](https://keepachangelog.com/), and the project adheres to
+[Semantic Versioning](https://semver.org/).
+## [0.2.3] — 2026-06-01
+Release-quality and engine-correctness pass.
+### Fixed
+- **Build was red on `main`.** `media.ts` set `CallRecord.baselineUsd` but the
+  type never declared it, so `tsc`/`npm run build` failed while `npm test`
+  (which doesn't typecheck) stayed green. `baselineUsd?: number` is now part of
+  `CallRecord`. The text router leaves it `undefined`; the media router sets it.
+- **Failover used shared mutable state across concurrent requests.** The active
+  provider index was an instance field used both as the per-request loop cursor
+  and the loop's termination check. Two requests sharing one model instance
+  could clobber each other's cursor mid-flight (skipped providers, wrong
+  termination). Each request now walks providers on a fully local cursor; the
+  only shared state is a "where to start next" hint, read once and written once.
+- **Cheapest provider was never re-probed under sustained traffic.** The
+  snap-back-to-cheapest timer reset on *every* call, so with calls more frequent
+  than `resetIntervalMs` it never fired — one blip pinned you on the expensive
+  fallback indefinitely (exactly when spend is highest). The timer now measures
+  from the last *failover*, so re-probe fires under load too.
+### Added
+- **`classifyErrorKind(error)` and `RouteAttempt.kind`** (`"transient" | "auth"
+  | "billing" | "client"`). 401/403 (auth) and 402/out-of-credit (billing)
+  still fail over so the request survives — but they're now tagged distinctly
+  from transient 429/5xx, so a misconfigured key silently burning the pricey
+  fallback is something you can alert on instead of mistaking for healthy
+  routing.
+- **Continuous Integration** (`.github/workflows/ci.yml`): `build` +
+  `typecheck` + `test` on Node 20 & 22, plus a `pack-smoke` job that installs
+  the actual `npm pack` tarball into a clean directory and imports it (ESM and
+  CJS) — catching dropped exports and broken `dist` that an in-repo test can't.
+- **`prepublishOnly` gate**: `npm publish` now runs build + typecheck + test
+  first, so a red tree can't be published.
+- **Public-export surface test** (`public-api.test.ts`): pins every runtime
+  export by name, so removing one fails loudly and adding one is deliberate.
+## [0.2.1] — earlier
+- `onCall` correlated `CallRecord` + `formatCallRecord` one-liner for the text
+  router, extended to the media router (image/video).
+## [0.2.0] — earlier
+- Observability: `onCall` / `CallRecord`, `formatCallRecord`.
+## [0.1.x] — earlier
+- Dual ESM/CJS build. Media (image/video) least-cost routing with the Runware
+  and Kunavo adapters; cap-aware failover for the text router.
+[0.2.3]: https://github.com/victorzhrn/ai-lcr/releases/tag/v0.2.3

package/dist/index.cjs CHANGED Viewed

@@ -24,8 +24,8 @@ __export(index_exports, {
   MEDIA_PRICING: () => MEDIA_PRICING,
   cheapestRoute: () => cheapestRoute,
   classifyError: () => classifyError,
+  classifyErrorKind: () => classifyErrorKind,
   comparePrices: () => comparePrices,
-  createHttpSink: () => createHttpSink,
   createKunavoMediaAdapter: () => createKunavoMediaAdapter,
   createLCR: () => createLCR,
   createMediaLCR: () => createMediaLCR,
@@ -86,6 +86,16 @@ function classifyError(error) {
   const text = (e?.message ? String(e.message) : safeStringify(error)).toLowerCase();
   return RETRYABLE_PATTERNS.find((p) => text.includes(p)) ?? "error";
 }
+var AUTH_STATUS = /* @__PURE__ */ new Set([401, 403]);
+var BILLING_PATTERNS = ["insufficient", "credit", "quota", "billing", "payment required"];
+function classifyErrorKind(error) {
+  const e = error;
+  const status = e?.statusCode ?? e?.status;
+  const text = (e?.message ? String(e.message) : safeStringify(error)).toLowerCase();
+  if (typeof status === "number" && AUTH_STATUS.has(status)) return "auth";
+  if (status === 402 || BILLING_PATTERNS.some((p) => text.includes(p))) return "billing";
+  return isRetryableError(error) ? "transient" : "client";
+}
 var callSeq = 0;
 function newCallId() {
   const c = globalThis.crypto;
@@ -102,11 +112,20 @@ var LcrFallbackModel = class {
   }
   opts;
   specificationVersion = "v3";
-  index = 0;
-  lastReset = Date.now();
+  // Cross-request *hint* for where the next request starts: after a failover we
+  // remember the provider that worked so we don't re-probe a dead cheap one on
+  // every call. This is the ONLY shared mutable state — and crucially it is read
+  // once per request (snapshotted into a local cursor) and written once on
+  // settle, never used as a per-request loop bound. The within-request iteration
+  // is fully local, so concurrent requests can't corrupt each other's routing.
+  sticky = 0;
+  // When `sticky` was last advanced (a failover). The re-probe timer measures
+  // from THIS, not from the last call — so it fires under sustained traffic too,
+  // instead of being pushed forward forever by a busy stream of requests.
+  lastFailoverAt = Date.now();
   resetIntervalMs;
   get current() {
-    return this.opts.providers[this.index];
+    return this.opts.providers[this.sticky];
   }
   get modelId() {
     return this.current.model.modelId;
@@ -117,14 +136,28 @@ var LcrFallbackModel = class {
   get supportedUrls() {
     return this.current.model.supportedUrls;
   }
-  checkReset() {
-    if (this.index !== 0 && Date.now() - this.lastReset >= this.resetIntervalMs) {
-      this.index = 0;
+  /**
+   * Index a new request should start at. If we're parked on a non-cheapest
+   * provider and it's been `resetIntervalMs` since the failover, snap back to
+   * the cheapest and re-probe it — this is what lets routing recover to the
+   * cheap source even during continuous traffic.
+   */
+  startIndex() {
+    if (this.sticky !== 0 && Date.now() - this.lastFailoverAt >= this.resetIntervalMs) {
+      this.sticky = 0;
     }
-    this.lastReset = Date.now();
+    return this.sticky;
   }
-  switchNext() {
-    this.index = (this.index + 1) % this.opts.providers.length;
+  /**
+   * A request settled on `winIndex`. Park there so the next request skips the
+   * providers we just learned are down. Stamp the failover time only when the
+   * parked provider actually CHANGES — so a steady stream of successful calls
+   * on the same fallback doesn't keep pushing the re-probe timer forward.
+   */
+  settleSticky(winIndex) {
+    if (winIndex === this.sticky) return;
+    this.sticky = winIndex;
+    this.lastFailoverAt = Date.now();
   }
   shouldRetry(error) {
     return (this.opts.shouldRetry ?? isRetryableError)(error);
@@ -138,23 +171,16 @@ var LcrFallbackModel = class {
       provider: provider.label,
       ok: false,
       latencyMs: Date.now() - attemptStart,
-      errorClass: classifyError(error)
+      errorClass: classifyError(error),
+      kind: classifyErrorKind(error)
     });
   }
-  /** Cost of one route for the given token counts; 0 if it has no price. */
-  routeCost(p, inputTokens, outputTokens) {
-    return p.cost ? inputTokens / 1e6 * p.cost.input + outputTokens / 1e6 * p.cost.output : 0;
-  }
   /** Winner settled: record the attempt, fire `onCost` (compat) + `onCall`. */
   finalizeOk(ctx, provider, attemptStart, usage) {
     ctx.attempts.push({ provider: provider.label, ok: true, latencyMs: Date.now() - attemptStart });
     const inputTokens = usage?.inputTokens?.total ?? 0;
     const outputTokens = usage?.outputTokens?.total ?? 0;
-    const costUsd = this.routeCost(provider, inputTokens, outputTokens);
-    const baselineUsd = this.opts.providers.reduce(
-      (max, p) => Math.max(max, this.routeCost(p, inputTokens, outputTokens)),
-      costUsd
-    );
+    const costUsd = provider.cost ? inputTokens / 1e6 * provider.cost.input + outputTokens / 1e6 * provider.cost.output : 0;
     this.opts.onCost?.({
       model: this.opts.modelName,
       provider: provider.label,
@@ -172,8 +198,7 @@ var LcrFallbackModel = class {
       latencyMs: Date.now() - ctx.startedAt,
       inputTokens,
       outputTokens,
-      costUsd,
-      baselineUsd
+      costUsd
     });
   }
   /** Every provider failed: fire `onCall` with no winner. */
@@ -188,20 +213,22 @@ var LcrFallbackModel = class {
       latencyMs: Date.now() - ctx.startedAt,
       inputTokens: 0,
       outputTokens: 0,
-      costUsd: 0,
-      baselineUsd: 0
+      costUsd: 0
     });
   }
   async doGenerate(options) {
-    this.checkReset();
     const ctx = this.startCall();
-    const start = this.index;
+    const providers = this.opts.providers;
+    const n = providers.length;
+    const start = this.startIndex();
     let lastError;
-    for (; ; ) {
-      const provider = this.current;
+    for (let tried = 0; tried < n; tried++) {
+      const idx = (start + tried) % n;
+      const provider = providers[idx];
       const attemptStart = Date.now();
       try {
         const result = await provider.model.doGenerate(options);
+        this.settleSticky(idx);
         this.finalizeOk(ctx, provider, attemptStart, result.usage);
         return result;
       } catch (error) {
@@ -213,29 +240,30 @@ var LcrFallbackModel = class {
         }
         this.opts.onError?.(error, provider.label);
         this.recordFail(ctx, provider, attemptStart, error);
-        this.switchNext();
-        if (this.index === start) {
-          this.finalizeFail(ctx);
-          throw lastError;
-        }
       }
     }
+    this.finalizeFail(ctx);
+    throw lastError;
   }
   async doStream(options) {
-    this.checkReset();
-    return this.doStreamWithCtx(options, this.startCall());
+    return this.doStreamWithCtx(options, this.startCall(), this.startIndex(), 0);
   }
-  // The stream's failover recursion re-enters here with the SAME `ctx`, so a
-  // mid-stream switch keeps appending to one CallRecord instead of starting a
-  // fresh one. `finalizeOk`/`finalizeFail` fire exactly once per outer request.
-  async doStreamWithCtx(options, ctx) {
+  // The stream's failover recursion re-enters here with the SAME `ctx` and a
+  // threaded-through local cursor (`idx`/`tried`), so a mid-stream switch keeps
+  // appending to one CallRecord and bounds itself on the local `tried` count —
+  // never on shared instance state. `finalizeOk`/`finalizeFail` fire exactly
+  // once per outer request.
+  async doStreamWithCtx(options, ctx, startIdx, alreadyTried) {
     const self = this;
-    const start = this.index;
+    const providers = this.opts.providers;
+    const n = providers.length;
     let result;
     let serving;
     let servingStart;
+    let idx = startIdx;
+    let tried = alreadyTried;
     for (; ; ) {
-      serving = this.current;
+      serving = providers[idx];
       servingStart = Date.now();
       try {
         result = await serving.model.doStream(options);
@@ -248,15 +276,18 @@ var LcrFallbackModel = class {
         }
         this.opts.onError?.(error, serving.label);
         this.recordFail(ctx, serving, servingStart, error);
-        this.switchNext();
-        if (this.index === start) {
+        tried++;
+        if (tried >= n) {
           this.finalizeFail(ctx);
           throw error;
         }
+        idx = (idx + 1) % n;
       }
     }
     const servingProvider = serving;
     const servingAttemptStart = servingStart;
+    const servingIdx = idx;
+    const triedBeforeServing = tried;
     let usage;
     let streamedAny = false;
     const stream = new ReadableStream({
@@ -275,20 +306,26 @@ var LcrFallbackModel = class {
             controller.enqueue(value);
             if (value.type !== "stream-start") streamedAny = true;
           }
+          self.settleSticky(servingIdx);
           self.finalizeOk(ctx, servingProvider, servingAttemptStart, usage);
           controller.close();
         } catch (error) {
           self.opts.onError?.(error, servingProvider.label);
           self.recordFail(ctx, servingProvider, servingAttemptStart, error);
           if (!streamedAny) {
-            self.switchNext();
-            if (self.index === start) {
+            const nextTried = triedBeforeServing + 1;
+            if (nextTried >= n) {
               self.finalizeFail(ctx);
               controller.error(error);
               return;
             }
             try {
-              const next = await self.doStreamWithCtx(options, ctx);
+              const next = await self.doStreamWithCtx(
+                options,
+                ctx,
+                (servingIdx + 1) % n,
+                nextTried
+              );
               const nextReader = next.stream.getReader();
               try {
                 for (; ; ) {
@@ -345,40 +382,6 @@ function formatCallRecord(record, opts = {}) {
   return line;
 }
-// src/sink.ts
-function createHttpSink(options) {
-  const {
-    url,
-    headers,
-    project,
-    dispatch = (task) => {
-      void task();
-    },
-    fetchImpl,
-    onError
-  } = options;
-  const doFetch = fetchImpl ?? globalThis.fetch;
-  return (record) => {
-    if (!doFetch) {
-      onError?.(new Error("ai-lcr: no fetch available for createHttpSink"));
-      return;
-    }
-    const payload = project ? { project, ...record } : record;
-    dispatch(async () => {
-      try {
-        await doFetch(url, {
-          method: "POST",
-          headers: { "content-type": "application/json", ...headers },
-          body: JSON.stringify(payload),
-          keepalive: true
-        });
-      } catch (err) {
-        onError?.(err);
-      }
-    });
-  };
-}
 // src/media.ts
 var DEFAULT_REFERENCE = {
   image: { width: 1920, height: 1080 },
@@ -822,8 +825,8 @@ function createLCR(config) {
   MEDIA_PRICING,
   cheapestRoute,
   classifyError,
+  classifyErrorKind,
   comparePrices,
-  createHttpSink,
   createKunavoMediaAdapter,
   createLCR,
   createMediaLCR,

package/dist/index.d.cts CHANGED Viewed

@@ -5,8 +5,10 @@ import { LanguageModelV3 } from '@ai-sdk/provider';
  *
  * A LanguageModelV3 that wraps an ordered, cheapest-first list of providers:
  * it serves from the first healthy one, switches to the next on a retryable
- * error (streaming-safe), and snaps back to the cheapest after an idle window.
- * It also computes per-call cost from each provider's price and fires `onCost`.
+ * error (streaming-safe), and periodically re-probes the cheapest provider
+ * (every `resetIntervalMs` after a failover — under load too, not only when
+ * idle). It also computes per-call cost from each provider's price and fires
+ * `onCost`.
  *
  * The switching loop is adapted from `ai-fallback` (MIT, © remorses) — its
  * streaming-safe fallback approach — reimplemented here so ai-lcr owns its core
@@ -28,6 +30,17 @@ interface CostEvent {
     /** Computed from the serving provider's `cost`; 0 if no price was given. */
     costUsd: number;
 }
+/**
+ * Coarse error category for a failed attempt — distinct from `errorClass`
+ * (which is the raw status/pattern). Use it to alert: `"auth"` and `"billing"`
+ * mean a config/account problem masquerading as a healthy failover, the thing
+ * you want to page on rather than silently keep burning the pricey fallback.
+ *   - "transient": rate limit / overload / 5xx — expected, self-healing.
+ *   - "auth":      401 / 403 — a misconfigured or revoked key.
+ *   - "billing":   402 / out-of-credit / quota — account needs topping up.
+ *   - "client":    a non-retryable caller error (e.g. 400 bad request).
+ */
+type ErrorKind = "transient" | "auth" | "billing" | "client";
 /** One provider attempt within a single request. */
 interface RouteAttempt {
     /** Provider label that was tried (e.g. "tokenmart"). */
@@ -38,6 +51,8 @@ interface RouteAttempt {
     latencyMs: number;
     /** Normalized failure reason when `ok` is false (e.g. "502", "rate_limit", "timeout"). */
     errorClass?: string;
+    /** Coarse category of the failure when `ok` is false. See {@link ErrorKind}. */
+    kind?: ErrorKind;
 }
 /**
  * One settled request, with its full failover chain. Emitted exactly once per
@@ -65,13 +80,12 @@ interface CallRecord {
     /** Computed from the winner's `cost`; 0 if no price was given or the call failed. */
     costUsd: number;
     /**
-     * What these same tokens would have cost at the **most expensive** configured
-     * provider for this model — the "if you never routed cheap" baseline. Savings
-     * = `baselineUsd - costUsd`. Equals `costUsd` (savings 0) when prices are
-     * missing or the priciest route is the one that served. Self-contained: no
-     * external price table needed.
+     * What the same request would have cost on the most expensive configured
+     * provider — the savings baseline (`baselineUsd - costUsd`). Set by the media
+     * router; the text router omits it (left undefined) until a per-call text
+     * baseline lands. Optional so both routers share one {@link CallRecord} shape.
      */
-    baselineUsd: number;
+    baselineUsd?: number;
 }
 /**
  * Normalize an error into a short, log-friendly class for {@link CallRecord}.
@@ -80,6 +94,13 @@ interface CallRecord {
  * Reuses the same signals as {@link isRetryableError} — no new vocabulary.
  */
 declare function classifyError(error: unknown): string;
+/**
+ * Categorize an error for alerting. Orthogonal to {@link isRetryableError}
+ * (which decides *whether* to fail over) — this decides *how alarming* the
+ * failover is. A run of `"auth"`/`"billing"` attempts means you're silently
+ * burning the pricey fallback because a key/account is broken: page on it.
+ */
+declare function classifyErrorKind(error: unknown): ErrorKind;
 /**
  * Human-readable one-liner for a {@link CallRecord}.
@@ -101,54 +122,6 @@ interface FormatOptions {
 }
 declare function formatCallRecord(record: CallRecord, opts?: FormatOptions): string;
-/**
- * Optional HTTP sink for `onCall` — ship each {@link CallRecord} as JSON to a
- * collector (e.g. a self-hosted ai-lcr-dashboard `/api/ingest`, or any endpoint
- * that accepts the CallRecord shape).
- *
- * Fully optional and dashboard-agnostic: omit it and ai-lcr stores nothing;
- * point `url` at whatever you run. Logging must never break your app, so a
- * failed POST is swallowed by default (surface it via `onError` if you want).
- *
- *   import { createLCR, createHttpSink } from "ai-lcr";
- *   import { after } from "next/server"; // serverless: don't block the response
- *
- *   const lcr = createLCR({
- *     models: { ... },
- *     onCall: createHttpSink({
- *       url: process.env.LCR_INGEST_URL + "/api/ingest",
- *       headers: { authorization: `Bearer ${process.env.LCR_INGEST_KEY}` },
- *       project: process.env.LCR_PROJECT,
- *       dispatch: after, // run after the response is sent
- *     }),
- *   });
- */
-interface HttpSinkOptions {
-    /** Where to POST each CallRecord (a collector that accepts the JSON shape). */
-    url: string;
-    /** Extra headers, e.g. `{ authorization: ` + "`Bearer ${key}`" + ` }`. */
-    headers?: Record<string, string>;
-    /** Optional tenant/project tag merged into each payload (`{ project, ...record }`). */
-    project?: string;
-    /**
-     * Wrap the dispatch so it survives a serverless function returning. On
-     * Next.js pass `after` from "next/server"; elsewhere pass a `waitUntil`-style
-     * function. Defaults to running immediately — correct for long-lived servers,
-     * but on serverless an un-awaited POST may be cut off, so pass `after`.
-     */
-    dispatch?: (task: () => void | Promise<void>) => void;
-    /** Custom fetch (tests / runtimes without a global `fetch`). */
-    fetchImpl?: typeof fetch;
-    /** Called if the POST fails. Failures are swallowed by default. */
-    onError?: (error: unknown) => void;
-}
-/**
- * Build an `onCall` handler that POSTs each {@link CallRecord} to `url`.
- * Returns a plain `(record) => void` — pass it straight to `createLCR`'s `onCall`.
- */
-declare function createHttpSink(options: HttpSinkOptions): (record: CallRecord) => void;
 /**
  * ai-lcr media routing — Least Cost Routing for image & video models.
  *
@@ -438,4 +411,4 @@ type LCRRouter = (modelName: string) => LanguageModelV3;
  */
 declare function createLCR(config: LCRConfig): LCRRouter;
-export { type CallRecord, type CostEvent, DEFAULT_REFERENCE, type FormatOptions, type HttpSinkOptions, type LCRConfig, type LCRRouter, MEDIA_PRICING, type MediaAdapter, type MediaCostEvent, type MediaGenerateRequest, type MediaGenerateResult, type MediaLCRConfig, type MediaModality, type MediaModelDef, type MediaOutput, type MediaPricing, type MediaRegistry, type MediaRoute, type MediaRunResult, type MediaUnit, type PriceComparisonRow, type ProviderCost, type ProviderEntry, type RankedRoute, type ReferenceSpec, type RouteAttempt, cheapestRoute, classifyError, comparePrices, createHttpSink, createKunavoMediaAdapter, createLCR, createMediaLCR, createRunwareMediaAdapter, formatCallRecord, normalizedCents, rankRoutes, referenceMegapixels };
+export { type CallRecord, type CostEvent, DEFAULT_REFERENCE, type ErrorKind, type FormatOptions, type LCRConfig, type LCRRouter, MEDIA_PRICING, type MediaAdapter, type MediaCostEvent, type MediaGenerateRequest, type MediaGenerateResult, type MediaLCRConfig, type MediaModality, type MediaModelDef, type MediaOutput, type MediaPricing, type MediaRegistry, type MediaRoute, type MediaRunResult, type MediaUnit, type PriceComparisonRow, type ProviderCost, type ProviderEntry, type RankedRoute, type ReferenceSpec, type RouteAttempt, cheapestRoute, classifyError, classifyErrorKind, comparePrices, createKunavoMediaAdapter, createLCR, createMediaLCR, createRunwareMediaAdapter, formatCallRecord, normalizedCents, rankRoutes, referenceMegapixels };

package/dist/index.d.ts CHANGED Viewed

@@ -5,8 +5,10 @@ import { LanguageModelV3 } from '@ai-sdk/provider';
  *
  * A LanguageModelV3 that wraps an ordered, cheapest-first list of providers:
  * it serves from the first healthy one, switches to the next on a retryable
- * error (streaming-safe), and snaps back to the cheapest after an idle window.
- * It also computes per-call cost from each provider's price and fires `onCost`.
+ * error (streaming-safe), and periodically re-probes the cheapest provider
+ * (every `resetIntervalMs` after a failover — under load too, not only when
+ * idle). It also computes per-call cost from each provider's price and fires
+ * `onCost`.
  *
  * The switching loop is adapted from `ai-fallback` (MIT, © remorses) — its
  * streaming-safe fallback approach — reimplemented here so ai-lcr owns its core
@@ -28,6 +30,17 @@ interface CostEvent {
     /** Computed from the serving provider's `cost`; 0 if no price was given. */
     costUsd: number;
 }
+/**
+ * Coarse error category for a failed attempt — distinct from `errorClass`
+ * (which is the raw status/pattern). Use it to alert: `"auth"` and `"billing"`
+ * mean a config/account problem masquerading as a healthy failover, the thing
+ * you want to page on rather than silently keep burning the pricey fallback.
+ *   - "transient": rate limit / overload / 5xx — expected, self-healing.
+ *   - "auth":      401 / 403 — a misconfigured or revoked key.
+ *   - "billing":   402 / out-of-credit / quota — account needs topping up.
+ *   - "client":    a non-retryable caller error (e.g. 400 bad request).
+ */
+type ErrorKind = "transient" | "auth" | "billing" | "client";
 /** One provider attempt within a single request. */
 interface RouteAttempt {
     /** Provider label that was tried (e.g. "tokenmart"). */
@@ -38,6 +51,8 @@ interface RouteAttempt {
     latencyMs: number;
     /** Normalized failure reason when `ok` is false (e.g. "502", "rate_limit", "timeout"). */
     errorClass?: string;
+    /** Coarse category of the failure when `ok` is false. See {@link ErrorKind}. */
+    kind?: ErrorKind;
 }
 /**
  * One settled request, with its full failover chain. Emitted exactly once per
@@ -65,13 +80,12 @@ interface CallRecord {
     /** Computed from the winner's `cost`; 0 if no price was given or the call failed. */
     costUsd: number;
     /**
-     * What these same tokens would have cost at the **most expensive** configured
-     * provider for this model — the "if you never routed cheap" baseline. Savings
-     * = `baselineUsd - costUsd`. Equals `costUsd` (savings 0) when prices are
-     * missing or the priciest route is the one that served. Self-contained: no
-     * external price table needed.
+     * What the same request would have cost on the most expensive configured
+     * provider — the savings baseline (`baselineUsd - costUsd`). Set by the media
+     * router; the text router omits it (left undefined) until a per-call text
+     * baseline lands. Optional so both routers share one {@link CallRecord} shape.
      */
-    baselineUsd: number;
+    baselineUsd?: number;
 }
 /**
  * Normalize an error into a short, log-friendly class for {@link CallRecord}.
@@ -80,6 +94,13 @@ interface CallRecord {
  * Reuses the same signals as {@link isRetryableError} — no new vocabulary.
  */
 declare function classifyError(error: unknown): string;
+/**
+ * Categorize an error for alerting. Orthogonal to {@link isRetryableError}
+ * (which decides *whether* to fail over) — this decides *how alarming* the
+ * failover is. A run of `"auth"`/`"billing"` attempts means you're silently
+ * burning the pricey fallback because a key/account is broken: page on it.
+ */
+declare function classifyErrorKind(error: unknown): ErrorKind;
 /**
  * Human-readable one-liner for a {@link CallRecord}.
@@ -101,54 +122,6 @@ interface FormatOptions {
 }
 declare function formatCallRecord(record: CallRecord, opts?: FormatOptions): string;
-/**
- * Optional HTTP sink for `onCall` — ship each {@link CallRecord} as JSON to a
- * collector (e.g. a self-hosted ai-lcr-dashboard `/api/ingest`, or any endpoint
- * that accepts the CallRecord shape).
- *
- * Fully optional and dashboard-agnostic: omit it and ai-lcr stores nothing;
- * point `url` at whatever you run. Logging must never break your app, so a
- * failed POST is swallowed by default (surface it via `onError` if you want).
- *
- *   import { createLCR, createHttpSink } from "ai-lcr";
- *   import { after } from "next/server"; // serverless: don't block the response
- *
- *   const lcr = createLCR({
- *     models: { ... },
- *     onCall: createHttpSink({
- *       url: process.env.LCR_INGEST_URL + "/api/ingest",
- *       headers: { authorization: `Bearer ${process.env.LCR_INGEST_KEY}` },
- *       project: process.env.LCR_PROJECT,
- *       dispatch: after, // run after the response is sent
- *     }),
- *   });
- */
-interface HttpSinkOptions {
-    /** Where to POST each CallRecord (a collector that accepts the JSON shape). */
-    url: string;
-    /** Extra headers, e.g. `{ authorization: ` + "`Bearer ${key}`" + ` }`. */
-    headers?: Record<string, string>;
-    /** Optional tenant/project tag merged into each payload (`{ project, ...record }`). */
-    project?: string;
-    /**
-     * Wrap the dispatch so it survives a serverless function returning. On
-     * Next.js pass `after` from "next/server"; elsewhere pass a `waitUntil`-style
-     * function. Defaults to running immediately — correct for long-lived servers,
-     * but on serverless an un-awaited POST may be cut off, so pass `after`.
-     */
-    dispatch?: (task: () => void | Promise<void>) => void;
-    /** Custom fetch (tests / runtimes without a global `fetch`). */
-    fetchImpl?: typeof fetch;
-    /** Called if the POST fails. Failures are swallowed by default. */
-    onError?: (error: unknown) => void;
-}
-/**
- * Build an `onCall` handler that POSTs each {@link CallRecord} to `url`.
- * Returns a plain `(record) => void` — pass it straight to `createLCR`'s `onCall`.
- */
-declare function createHttpSink(options: HttpSinkOptions): (record: CallRecord) => void;
 /**
  * ai-lcr media routing — Least Cost Routing for image & video models.
  *
@@ -438,4 +411,4 @@ type LCRRouter = (modelName: string) => LanguageModelV3;
  */
 declare function createLCR(config: LCRConfig): LCRRouter;
-export { type CallRecord, type CostEvent, DEFAULT_REFERENCE, type FormatOptions, type HttpSinkOptions, type LCRConfig, type LCRRouter, MEDIA_PRICING, type MediaAdapter, type MediaCostEvent, type MediaGenerateRequest, type MediaGenerateResult, type MediaLCRConfig, type MediaModality, type MediaModelDef, type MediaOutput, type MediaPricing, type MediaRegistry, type MediaRoute, type MediaRunResult, type MediaUnit, type PriceComparisonRow, type ProviderCost, type ProviderEntry, type RankedRoute, type ReferenceSpec, type RouteAttempt, cheapestRoute, classifyError, comparePrices, createHttpSink, createKunavoMediaAdapter, createLCR, createMediaLCR, createRunwareMediaAdapter, formatCallRecord, normalizedCents, rankRoutes, referenceMegapixels };
+export { type CallRecord, type CostEvent, DEFAULT_REFERENCE, type ErrorKind, type FormatOptions, type LCRConfig, type LCRRouter, MEDIA_PRICING, type MediaAdapter, type MediaCostEvent, type MediaGenerateRequest, type MediaGenerateResult, type MediaLCRConfig, type MediaModality, type MediaModelDef, type MediaOutput, type MediaPricing, type MediaRegistry, type MediaRoute, type MediaRunResult, type MediaUnit, type PriceComparisonRow, type ProviderCost, type ProviderEntry, type RankedRoute, type ReferenceSpec, type RouteAttempt, cheapestRoute, classifyError, classifyErrorKind, comparePrices, createKunavoMediaAdapter, createLCR, createMediaLCR, createRunwareMediaAdapter, formatCallRecord, normalizedCents, rankRoutes, referenceMegapixels };

package/dist/index.js CHANGED Viewed

@@ -47,6 +47,16 @@ function classifyError(error) {
   const text = (e?.message ? String(e.message) : safeStringify(error)).toLowerCase();
   return RETRYABLE_PATTERNS.find((p) => text.includes(p)) ?? "error";
 }
+var AUTH_STATUS = /* @__PURE__ */ new Set([401, 403]);
+var BILLING_PATTERNS = ["insufficient", "credit", "quota", "billing", "payment required"];
+function classifyErrorKind(error) {
+  const e = error;
+  const status = e?.statusCode ?? e?.status;
+  const text = (e?.message ? String(e.message) : safeStringify(error)).toLowerCase();
+  if (typeof status === "number" && AUTH_STATUS.has(status)) return "auth";
+  if (status === 402 || BILLING_PATTERNS.some((p) => text.includes(p))) return "billing";
+  return isRetryableError(error) ? "transient" : "client";
+}
 var callSeq = 0;
 function newCallId() {
   const c = globalThis.crypto;
@@ -63,11 +73,20 @@ var LcrFallbackModel = class {
   }
   opts;
   specificationVersion = "v3";
-  index = 0;
-  lastReset = Date.now();
+  // Cross-request *hint* for where the next request starts: after a failover we
+  // remember the provider that worked so we don't re-probe a dead cheap one on
+  // every call. This is the ONLY shared mutable state — and crucially it is read
+  // once per request (snapshotted into a local cursor) and written once on
+  // settle, never used as a per-request loop bound. The within-request iteration
+  // is fully local, so concurrent requests can't corrupt each other's routing.
+  sticky = 0;
+  // When `sticky` was last advanced (a failover). The re-probe timer measures
+  // from THIS, not from the last call — so it fires under sustained traffic too,
+  // instead of being pushed forward forever by a busy stream of requests.
+  lastFailoverAt = Date.now();
   resetIntervalMs;
   get current() {
-    return this.opts.providers[this.index];
+    return this.opts.providers[this.sticky];
   }
   get modelId() {
     return this.current.model.modelId;
@@ -78,14 +97,28 @@ var LcrFallbackModel = class {
   get supportedUrls() {
     return this.current.model.supportedUrls;
   }
-  checkReset() {
-    if (this.index !== 0 && Date.now() - this.lastReset >= this.resetIntervalMs) {
-      this.index = 0;
+  /**
+   * Index a new request should start at. If we're parked on a non-cheapest
+   * provider and it's been `resetIntervalMs` since the failover, snap back to
+   * the cheapest and re-probe it — this is what lets routing recover to the
+   * cheap source even during continuous traffic.
+   */
+  startIndex() {
+    if (this.sticky !== 0 && Date.now() - this.lastFailoverAt >= this.resetIntervalMs) {
+      this.sticky = 0;
     }
-    this.lastReset = Date.now();
+    return this.sticky;
   }
-  switchNext() {
-    this.index = (this.index + 1) % this.opts.providers.length;
+  /**
+   * A request settled on `winIndex`. Park there so the next request skips the
+   * providers we just learned are down. Stamp the failover time only when the
+   * parked provider actually CHANGES — so a steady stream of successful calls
+   * on the same fallback doesn't keep pushing the re-probe timer forward.
+   */
+  settleSticky(winIndex) {
+    if (winIndex === this.sticky) return;
+    this.sticky = winIndex;
+    this.lastFailoverAt = Date.now();
   }
   shouldRetry(error) {
     return (this.opts.shouldRetry ?? isRetryableError)(error);
@@ -99,23 +132,16 @@ var LcrFallbackModel = class {
       provider: provider.label,
       ok: false,
       latencyMs: Date.now() - attemptStart,
-      errorClass: classifyError(error)
+      errorClass: classifyError(error),
+      kind: classifyErrorKind(error)
     });
   }
-  /** Cost of one route for the given token counts; 0 if it has no price. */
-  routeCost(p, inputTokens, outputTokens) {
-    return p.cost ? inputTokens / 1e6 * p.cost.input + outputTokens / 1e6 * p.cost.output : 0;
-  }
   /** Winner settled: record the attempt, fire `onCost` (compat) + `onCall`. */
   finalizeOk(ctx, provider, attemptStart, usage) {
     ctx.attempts.push({ provider: provider.label, ok: true, latencyMs: Date.now() - attemptStart });
     const inputTokens = usage?.inputTokens?.total ?? 0;
     const outputTokens = usage?.outputTokens?.total ?? 0;
-    const costUsd = this.routeCost(provider, inputTokens, outputTokens);
-    const baselineUsd = this.opts.providers.reduce(
-      (max, p) => Math.max(max, this.routeCost(p, inputTokens, outputTokens)),
-      costUsd
-    );
+    const costUsd = provider.cost ? inputTokens / 1e6 * provider.cost.input + outputTokens / 1e6 * provider.cost.output : 0;
     this.opts.onCost?.({
       model: this.opts.modelName,
       provider: provider.label,
@@ -133,8 +159,7 @@ var LcrFallbackModel = class {
       latencyMs: Date.now() - ctx.startedAt,
       inputTokens,
       outputTokens,
-      costUsd,
-      baselineUsd
+      costUsd
     });
   }
   /** Every provider failed: fire `onCall` with no winner. */
@@ -149,20 +174,22 @@ var LcrFallbackModel = class {
       latencyMs: Date.now() - ctx.startedAt,
       inputTokens: 0,
       outputTokens: 0,
-      costUsd: 0,
-      baselineUsd: 0
+      costUsd: 0
     });
   }
   async doGenerate(options) {
-    this.checkReset();
     const ctx = this.startCall();
-    const start = this.index;
+    const providers = this.opts.providers;
+    const n = providers.length;
+    const start = this.startIndex();
     let lastError;
-    for (; ; ) {
-      const provider = this.current;
+    for (let tried = 0; tried < n; tried++) {
+      const idx = (start + tried) % n;
+      const provider = providers[idx];
       const attemptStart = Date.now();
       try {
         const result = await provider.model.doGenerate(options);
+        this.settleSticky(idx);
         this.finalizeOk(ctx, provider, attemptStart, result.usage);
         return result;
       } catch (error) {
@@ -174,29 +201,30 @@ var LcrFallbackModel = class {
         }
         this.opts.onError?.(error, provider.label);
         this.recordFail(ctx, provider, attemptStart, error);
-        this.switchNext();
-        if (this.index === start) {
-          this.finalizeFail(ctx);
-          throw lastError;
-        }
       }
     }
+    this.finalizeFail(ctx);
+    throw lastError;
   }
   async doStream(options) {
-    this.checkReset();
-    return this.doStreamWithCtx(options, this.startCall());
+    return this.doStreamWithCtx(options, this.startCall(), this.startIndex(), 0);
   }
-  // The stream's failover recursion re-enters here with the SAME `ctx`, so a
-  // mid-stream switch keeps appending to one CallRecord instead of starting a
-  // fresh one. `finalizeOk`/`finalizeFail` fire exactly once per outer request.
-  async doStreamWithCtx(options, ctx) {
+  // The stream's failover recursion re-enters here with the SAME `ctx` and a
+  // threaded-through local cursor (`idx`/`tried`), so a mid-stream switch keeps
+  // appending to one CallRecord and bounds itself on the local `tried` count —
+  // never on shared instance state. `finalizeOk`/`finalizeFail` fire exactly
+  // once per outer request.
+  async doStreamWithCtx(options, ctx, startIdx, alreadyTried) {
     const self = this;
-    const start = this.index;
+    const providers = this.opts.providers;
+    const n = providers.length;
     let result;
     let serving;
     let servingStart;
+    let idx = startIdx;
+    let tried = alreadyTried;
     for (; ; ) {
-      serving = this.current;
+      serving = providers[idx];
       servingStart = Date.now();
       try {
         result = await serving.model.doStream(options);
@@ -209,15 +237,18 @@ var LcrFallbackModel = class {
         }
         this.opts.onError?.(error, serving.label);
         this.recordFail(ctx, serving, servingStart, error);
-        this.switchNext();
-        if (this.index === start) {
+        tried++;
+        if (tried >= n) {
           this.finalizeFail(ctx);
           throw error;
         }
+        idx = (idx + 1) % n;
       }
     }
     const servingProvider = serving;
     const servingAttemptStart = servingStart;
+    const servingIdx = idx;
+    const triedBeforeServing = tried;
     let usage;
     let streamedAny = false;
     const stream = new ReadableStream({
@@ -236,20 +267,26 @@ var LcrFallbackModel = class {
             controller.enqueue(value);
             if (value.type !== "stream-start") streamedAny = true;
           }
+          self.settleSticky(servingIdx);
           self.finalizeOk(ctx, servingProvider, servingAttemptStart, usage);
           controller.close();
         } catch (error) {
           self.opts.onError?.(error, servingProvider.label);
           self.recordFail(ctx, servingProvider, servingAttemptStart, error);
           if (!streamedAny) {
-            self.switchNext();
-            if (self.index === start) {
+            const nextTried = triedBeforeServing + 1;
+            if (nextTried >= n) {
               self.finalizeFail(ctx);
               controller.error(error);
               return;
             }
             try {
-              const next = await self.doStreamWithCtx(options, ctx);
+              const next = await self.doStreamWithCtx(
+                options,
+                ctx,
+                (servingIdx + 1) % n,
+                nextTried
+              );
               const nextReader = next.stream.getReader();
               try {
                 for (; ; ) {
@@ -306,40 +343,6 @@ function formatCallRecord(record, opts = {}) {
   return line;
 }
-// src/sink.ts
-function createHttpSink(options) {
-  const {
-    url,
-    headers,
-    project,
-    dispatch = (task) => {
-      void task();
-    },
-    fetchImpl,
-    onError
-  } = options;
-  const doFetch = fetchImpl ?? globalThis.fetch;
-  return (record) => {
-    if (!doFetch) {
-      onError?.(new Error("ai-lcr: no fetch available for createHttpSink"));
-      return;
-    }
-    const payload = project ? { project, ...record } : record;
-    dispatch(async () => {
-      try {
-        await doFetch(url, {
-          method: "POST",
-          headers: { "content-type": "application/json", ...headers },
-          body: JSON.stringify(payload),
-          keepalive: true
-        });
-      } catch (err) {
-        onError?.(err);
-      }
-    });
-  };
-}
 // src/media.ts
 var DEFAULT_REFERENCE = {
   image: { width: 1920, height: 1080 },
@@ -782,8 +785,8 @@ export {
   MEDIA_PRICING,
   cheapestRoute,
   classifyError,
+  classifyErrorKind,
   comparePrices,
-  createHttpSink,
   createKunavoMediaAdapter,
   createLCR,
   createMediaLCR,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "ai-lcr",
-  "version": "0.2.1",
+  "version": "0.2.3",
   "description": "Least Cost Routing for LLMs — route every model call to the cheapest available provider, fall back automatically, and track real cost. Built for the Vercel AI SDK.",
   "keywords": [
     "ai",
@@ -39,13 +39,15 @@
   "files": [
     "dist",
     "README.md",
-    "LICENSE"
+    "LICENSE",
+    "CHANGELOG.md"
   ],
   "scripts": {
     "build": "tsup src/index.ts --format esm,cjs --dts --clean",
     "typecheck": "tsc --noEmit",
     "test": "vitest run",
-    "test:watch": "vitest"
+    "test:watch": "vitest",
+    "prepublishOnly": "npm run build && npm run typecheck && npm test"
   },
   "peerDependencies": {
     "ai": "^6.0.0"