npm - ai-lcr - Versions diffs - 0.2.0 → 0.2.1 - Mend

ai-lcr 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md CHANGED Viewed

@@ -135,25 +135,6 @@ const lcr = createLCR({
 onCall: (record) => console.log(JSON.stringify(record)),
 ```
-Or ship each record to an HTTP collector with the built-in `createHttpSink` (fire-and-forget, never throws, dashboard-agnostic):
-```ts
-import { createLCR, createHttpSink } from "ai-lcr";
-import { after } from "next/server"; // serverless: don't block the response
-const lcr = createLCR({
-  models: { /* … */ },
-  onCall: createHttpSink({
-    url: `${process.env.LCR_INGEST_URL}/api/ingest`,
-    headers: { authorization: `Bearer ${process.env.LCR_INGEST_KEY}` },
-    project: process.env.LCR_PROJECT, // optional tag if one collector serves several apps
-    dispatch: after,                  // run after the response is sent (serverless-safe)
-  }),
-});
-```
-Point `url` at anything that accepts the `CallRecord` JSON — including the self-hostable companion dashboard, **[ai-lcr-dashboard](https://github.com/victorzhrn/ai-lcr-dashboard)** (Spend / Calls / Failover rate + a live failover feed). You run your own instance, so the data never leaves your infrastructure; a [db9](https://db9.ai) database can be provisioned in seconds if you don't want to stand one up yourself.
 ```ts
 interface CallRecord {
   id: string;                // correlation id, one per request
@@ -165,8 +146,7 @@ interface CallRecord {
   latencyMs: number;
   inputTokens: number;
   outputTokens: number;
-  costUsd: number;            // what the winner charged for these tokens
-  baselineUsd: number;        // what the priciest configured route would cost → savings = baselineUsd - costUsd
+  costUsd: number;
 }
 ```

package/dist/index.cjs CHANGED Viewed

@@ -425,30 +425,75 @@ function comparePrices(registry, ref = DEFAULT_REFERENCE) {
     };
   });
 }
+function newMediaCallId() {
+  const c = globalThis.crypto;
+  return c?.randomUUID ? c.randomUUID() : `lcr_${Date.now().toString(36)}`;
+}
 function createMediaLCR(config) {
-  const { registry, adapters, reference = DEFAULT_REFERENCE, onError, onCost } = config;
+  const { registry, adapters, reference = DEFAULT_REFERENCE, onError, onCost, onCall } = config;
   return async function generate(modelId, input) {
     const def = registry[modelId];
     if (!def) {
       throw new Error(`ai-lcr: unknown media model "${modelId}" \u2014 add it to the registry`);
     }
     const ranked = rankRoutes(def, reference);
+    const baselineUsd = ranked.length > 0 ? Math.max(...ranked.map((r) => r.refCents)) / 100 : 0;
+    const startedAt = Date.now();
+    const attempts = [];
     let lastErr;
+    const emitFail = () => onCall?.({
+      id: newMediaCallId(),
+      model: modelId,
+      attempts,
+      winner: void 0,
+      ok: false,
+      failedOver: attempts.length > 1,
+      latencyMs: Date.now() - startedAt,
+      inputTokens: 0,
+      outputTokens: 0,
+      costUsd: 0,
+      baselineUsd
+    });
     for (const route of ranked) {
       const adapter = adapters[route.provider];
       if (!adapter) continue;
+      const attemptStart = Date.now();
       try {
         const result = await adapter.run({ externalId: route.externalId, input });
         const estimated = result.costCents === void 0;
         const costCents = estimated ? route.refCents * (result.units ?? 1) : result.costCents;
+        attempts.push({ provider: route.provider, ok: true, latencyMs: Date.now() - attemptStart });
         onCost?.({ modelId, provider: route.provider, costCents, estimated });
+        onCall?.({
+          id: newMediaCallId(),
+          model: modelId,
+          attempts,
+          winner: route.provider,
+          ok: true,
+          failedOver: attempts.length > 1,
+          latencyMs: Date.now() - startedAt,
+          inputTokens: 0,
+          outputTokens: 0,
+          costUsd: costCents / 100,
+          baselineUsd
+        });
         return { outputs: result.outputs, provider: route.provider, costCents, estimated };
       } catch (err) {
         lastErr = err;
+        attempts.push({
+          provider: route.provider,
+          ok: false,
+          latencyMs: Date.now() - attemptStart,
+          errorClass: classifyError(err)
+        });
         onError?.(err, route.provider);
-        if (!isRetryableError(err)) throw err;
+        if (!isRetryableError(err)) {
+          emitFail();
+          throw err;
+        }
       }
     }
+    emitFail();
     throw lastErr instanceof Error ? lastErr : new Error(`ai-lcr: no provider could serve media model "${modelId}"`);
   };
 }

package/dist/index.d.cts CHANGED Viewed

@@ -149,6 +149,22 @@ interface HttpSinkOptions {
  */
 declare function createHttpSink(options: HttpSinkOptions): (record: CallRecord) => void;
+/**
+ * ai-lcr media routing — Least Cost Routing for image & video models.
+ *
+ * The text router (./index, ./fallback) is built on the AI SDK's
+ * `LanguageModelV3` and only handles token-billed chat/completion. Image and
+ * video providers are a different world: outputs are files (URLs), pricing
+ * comes in incompatible units (per-image, per-second, per-call, per-megapixel),
+ * and video is a long-running async job. This module is the parallel, self-
+ * contained media side — no `LanguageModelV3` dependency.
+ *
+ * The core idea is the SAME as the text LCR: keep a list of providers per
+ * model, route to the cheapest healthy one, fall back on failure, report real
+ * cost. The only new problem is making prices comparable, which we solve by
+ * normalizing every provider's price to ONE reference output (see ReferenceSpec).
+ */
 type MediaModality = "image" | "video";
 /**
  * Pricing unit a provider bills in. `cents` on MediaPricing is the price for
@@ -268,6 +284,13 @@ interface MediaLCRConfig {
     reference?: ReferenceSpec;
     onError?: (error: Error, provider: string) => void;
     onCost?: (event: MediaCostEvent) => void;
+    /**
+     * One correlated {@link CallRecord} per settled request — the full failover
+     * chain, winner, latency, and cost — mirroring the text side's `onCall`, so
+     * the same dashboard sink works for image/video. Fire-and-forget; never
+     * throws. Media records carry no token counts (inputTokens/outputTokens = 0).
+     */
+    onCall?: (record: CallRecord) => void;
 }
 interface MediaRunResult {
     outputs: MediaOutput[];
@@ -275,11 +298,6 @@ interface MediaRunResult {
     costCents: number;
     estimated: boolean;
 }
-/**
- * Build a media Least Cost Router. Returns `generate(modelId, input)` which
- * tries providers cheapest-first and falls through on a retryable error —
- * exactly the text LCR's contract, for image/video.
- */
 declare function createMediaLCR(config: MediaLCRConfig): (modelId: string, input: Record<string, unknown>) => Promise<MediaRunResult>;
 /**

package/dist/index.d.ts CHANGED Viewed

@@ -149,6 +149,22 @@ interface HttpSinkOptions {
  */
 declare function createHttpSink(options: HttpSinkOptions): (record: CallRecord) => void;
+/**
+ * ai-lcr media routing — Least Cost Routing for image & video models.
+ *
+ * The text router (./index, ./fallback) is built on the AI SDK's
+ * `LanguageModelV3` and only handles token-billed chat/completion. Image and
+ * video providers are a different world: outputs are files (URLs), pricing
+ * comes in incompatible units (per-image, per-second, per-call, per-megapixel),
+ * and video is a long-running async job. This module is the parallel, self-
+ * contained media side — no `LanguageModelV3` dependency.
+ *
+ * The core idea is the SAME as the text LCR: keep a list of providers per
+ * model, route to the cheapest healthy one, fall back on failure, report real
+ * cost. The only new problem is making prices comparable, which we solve by
+ * normalizing every provider's price to ONE reference output (see ReferenceSpec).
+ */
 type MediaModality = "image" | "video";
 /**
  * Pricing unit a provider bills in. `cents` on MediaPricing is the price for
@@ -268,6 +284,13 @@ interface MediaLCRConfig {
     reference?: ReferenceSpec;
     onError?: (error: Error, provider: string) => void;
     onCost?: (event: MediaCostEvent) => void;
+    /**
+     * One correlated {@link CallRecord} per settled request — the full failover
+     * chain, winner, latency, and cost — mirroring the text side's `onCall`, so
+     * the same dashboard sink works for image/video. Fire-and-forget; never
+     * throws. Media records carry no token counts (inputTokens/outputTokens = 0).
+     */
+    onCall?: (record: CallRecord) => void;
 }
 interface MediaRunResult {
     outputs: MediaOutput[];
@@ -275,11 +298,6 @@ interface MediaRunResult {
     costCents: number;
     estimated: boolean;
 }
-/**
- * Build a media Least Cost Router. Returns `generate(modelId, input)` which
- * tries providers cheapest-first and falls through on a retryable error —
- * exactly the text LCR's contract, for image/video.
- */
 declare function createMediaLCR(config: MediaLCRConfig): (modelId: string, input: Record<string, unknown>) => Promise<MediaRunResult>;
 /**

package/dist/index.js CHANGED Viewed

@@ -386,30 +386,75 @@ function comparePrices(registry, ref = DEFAULT_REFERENCE) {
     };
   });
 }
+function newMediaCallId() {
+  const c = globalThis.crypto;
+  return c?.randomUUID ? c.randomUUID() : `lcr_${Date.now().toString(36)}`;
+}
 function createMediaLCR(config) {
-  const { registry, adapters, reference = DEFAULT_REFERENCE, onError, onCost } = config;
+  const { registry, adapters, reference = DEFAULT_REFERENCE, onError, onCost, onCall } = config;
   return async function generate(modelId, input) {
     const def = registry[modelId];
     if (!def) {
       throw new Error(`ai-lcr: unknown media model "${modelId}" \u2014 add it to the registry`);
     }
     const ranked = rankRoutes(def, reference);
+    const baselineUsd = ranked.length > 0 ? Math.max(...ranked.map((r) => r.refCents)) / 100 : 0;
+    const startedAt = Date.now();
+    const attempts = [];
     let lastErr;
+    const emitFail = () => onCall?.({
+      id: newMediaCallId(),
+      model: modelId,
+      attempts,
+      winner: void 0,
+      ok: false,
+      failedOver: attempts.length > 1,
+      latencyMs: Date.now() - startedAt,
+      inputTokens: 0,
+      outputTokens: 0,
+      costUsd: 0,
+      baselineUsd
+    });
     for (const route of ranked) {
       const adapter = adapters[route.provider];
       if (!adapter) continue;
+      const attemptStart = Date.now();
       try {
         const result = await adapter.run({ externalId: route.externalId, input });
         const estimated = result.costCents === void 0;
         const costCents = estimated ? route.refCents * (result.units ?? 1) : result.costCents;
+        attempts.push({ provider: route.provider, ok: true, latencyMs: Date.now() - attemptStart });
         onCost?.({ modelId, provider: route.provider, costCents, estimated });
+        onCall?.({
+          id: newMediaCallId(),
+          model: modelId,
+          attempts,
+          winner: route.provider,
+          ok: true,
+          failedOver: attempts.length > 1,
+          latencyMs: Date.now() - startedAt,
+          inputTokens: 0,
+          outputTokens: 0,
+          costUsd: costCents / 100,
+          baselineUsd
+        });
         return { outputs: result.outputs, provider: route.provider, costCents, estimated };
       } catch (err) {
         lastErr = err;
+        attempts.push({
+          provider: route.provider,
+          ok: false,
+          latencyMs: Date.now() - attemptStart,
+          errorClass: classifyError(err)
+        });
         onError?.(err, route.provider);
-        if (!isRetryableError(err)) throw err;
+        if (!isRetryableError(err)) {
+          emitFail();
+          throw err;
+        }
       }
     }
+    emitFail();
     throw lastErr instanceof Error ? lastErr : new Error(`ai-lcr: no provider could serve media model "${modelId}"`);
   };
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "ai-lcr",
-  "version": "0.2.0",
+  "version": "0.2.1",
   "description": "Least Cost Routing for LLMs — route every model call to the cheapest available provider, fall back automatically, and track real cost. Built for the Vercel AI SDK.",
   "keywords": [
     "ai",