npm - ai-lcr - Versions diffs - 0.6.5 → 0.7.1 - Mend

ai-lcr 0.6.5 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/CHANGELOG.md CHANGED Viewed

@@ -4,6 +4,59 @@ All notable changes to `ai-lcr` are documented here. The format follows
 [Keep a Changelog](https://keepachangelog.com/), and the project adheres to
 [Semantic Versioning](https://semver.org/).
+## [0.7.1] — 2026-06-20
+Async media adapters now forward a caller-supplied webhook URL to the provider,
+so async video jobs can complete by **push** instead of poll-only.
+### Added
+- **`metadata.webhookUrl` is forwarded to the provider on async `submit`.**
+  `runware-media` adds it as the `videoInference` task's `webhookURL`; `fal-media`
+  appends it as the `?fal_webhook=` query param on the submit POST. The webhook is
+  a push path — the caller still polls as a fallback. Fixed task fields stay
+  un-clobberable (placed after the input spread). `metadata` was previously
+  accepted but dropped, so the documented "webhook hint" never reached the
+  provider; now it does.
+## [0.7.0] — 2026-06-20
+The text router now records the **provider-reported actual cost** when a provider
+returns one, instead of always estimating from the price table. The table becomes
+the routing input and the drift baseline (`estCostUsd`); the recorded `costUsd` is
+the real bill wherever the provider gives it.
+### Why
+A static price table can only encode one price per model, but an aggregator
+(OpenRouter) routes a single model across many sub-providers whose prices differ
+several-fold, picking one per call — so `tokens × table` is structurally unable to
+match the bill for multi-provider models (measured: `deepseek-v4-pro` reconciled at
+~57% of the real cost, while single-provider models like Gemini/Claude/GPT matched
+at 100%). The provider's own number already accounts for which sub-provider served,
+every token kind (cache read/write, reasoning), and fees — none of which a flat
+table can track.
+### Added
+- **`costUsd` prefers the provider-reported actual cost** (text path). Read from
+  OpenRouter's `providerMetadata.openrouter.usage` —
+  `costDetails.upstreamInferenceCost` (the real upstream / BYOK model spend) when
+  present, otherwise `cost` (the credit charge) — and from an OpenAI-compatible
+  provider's `estimated_cost` on the raw usage body. Requires the caller to enable
+  usage accounting on the provider (e.g. OpenRouter `usage: { include: true }`);
+  without it, behavior is unchanged.
+- **`estCostUsd` is now set on text records** (previously media-only) — the
+  price-table prediction for the same usage. `costUsd − estCostUsd` is the
+  price-table drift signal, so a dashboard's drift panel now works for text too.
+### Changed
+- When no provider cost is reported, `costUsd` still equals the price-table
+  estimate (and `estCostUsd` equals it, so no drift is flagged) — a pure fallback,
+  fully backward-compatible. The streaming path reads the reported cost from the
+  `finish` chunk's `providerMetadata`.
 ## [0.6.5] — 2026-06-16
 Bundled price table now covers the open-weights labs, not just the Western

package/dist/index.cjs CHANGED Viewed

@@ -341,6 +341,20 @@ function cacheSavingForUsage(cost, inputTokens, cacheReadTokens) {
   const cached = Math.min(Math.max(cacheReadTokens, 0), inputTokens);
   return cached / 1e6 * (cost.input - cost.cacheRead);
 }
+function reportedCost(providerMetadata, usage) {
+  const orUsage = providerMetadata?.openrouter?.usage;
+  if (orUsage) {
+    const upstream = orUsage.costDetails?.upstreamInferenceCost;
+    if (typeof upstream === "number" && upstream > 0) return upstream;
+    if (typeof orUsage.cost === "number") return orUsage.cost;
+  }
+  const raw = usage?.raw;
+  if (raw) {
+    const est = raw["estimated_cost"] ?? raw["cost"];
+    if (typeof est === "number") return est;
+  }
+  return void 0;
+}
 function requestIdFrom(options) {
   const raw = options.providerOptions?.lcr?.requestId;
   return typeof raw === "string" && raw.length > 0 ? raw : void 0;
@@ -539,12 +553,13 @@ var LcrFallbackModel = class {
     return baseline;
   }
   /** Winner settled: record the attempt, fire `onCost` (compat) + `onCall`. */
-  finalizeOk(ctx, provider, attemptStart, usage, ttftMs) {
+  finalizeOk(ctx, provider, attemptStart, usage, ttftMs, providerMetadata) {
     ctx.attempts.push({ provider: provider.label, ok: true, latencyMs: Date.now() - attemptStart });
     const inputTokens = usage?.inputTokens?.total ?? 0;
     const outputTokens = usage?.outputTokens?.total ?? 0;
     const cacheReadTokens = usage?.inputTokens?.cacheRead ?? 0;
-    const costUsd = provider.cost ? costForUsage(provider.cost, inputTokens, outputTokens, cacheReadTokens) : 0;
+    const estCostUsd = provider.cost ? costForUsage(provider.cost, inputTokens, outputTokens, cacheReadTokens) : void 0;
+    const costUsd = reportedCost(providerMetadata, usage) ?? estCostUsd ?? 0;
     const cachedSavingUsd = provider.cost ? cacheSavingForUsage(provider.cost, inputTokens, cacheReadTokens) : 0;
     const usageMissing = inputTokens === 0 && outputTokens === 0;
     const emptyCompletion = inputTokens > 0 && outputTokens === 0;
@@ -579,6 +594,7 @@ var LcrFallbackModel = class {
       outputTokens,
       ...cacheReadTokens > 0 ? { cachedInputTokens: cacheReadTokens } : {},
       costUsd,
+      ...estCostUsd !== void 0 ? { estCostUsd } : {},
       ...baselineUsd !== void 0 ? { baselineUsd, baselineKind: "last-leg" } : {},
       ...cachedSavingUsd > 0 ? { cachedSavingUsd } : {},
       ...ctx.requestId ? { requestId: ctx.requestId } : {},
@@ -635,7 +651,7 @@ var LcrFallbackModel = class {
         }
         this.recordProviderSuccess(idx);
         this.settleSticky(idx);
-        this.finalizeOk(ctx, provider, attemptStart, result.usage);
+        this.finalizeOk(ctx, provider, attemptStart, result.usage, void 0, result.providerMetadata);
         if (cache && cacheKey !== void 0 && ctx.settled?.cacheable) {
           this.storeCache(cacheKey, { kind: "generate", result, meta: ctx.settled.meta });
         }
@@ -767,6 +783,7 @@ var LcrFallbackModel = class {
     const servingIdx = idx;
     const servingPos = p;
     let usage;
+    let finishProviderMetadata;
     let contentStreamed = false;
     let ttftMs;
     const stream = new ReadableStream({
@@ -783,6 +800,7 @@ var LcrFallbackModel = class {
             if (done) break;
             if (value.type === "finish") {
               usage = value.usage;
+              finishProviderMetadata = value.providerMetadata;
               const out = value.usage?.outputTokens?.total ?? 0;
               const inp = value.usage?.inputTokens?.total ?? 0;
               if (inp > 0 && out === 0 && !contentStreamed && servingPos + 1 < n) {
@@ -797,7 +815,7 @@ var LcrFallbackModel = class {
           }
           self.recordProviderSuccess(servingIdx);
           self.settleSticky(servingIdx);
-          self.finalizeOk(ctx, servingProvider, servingAttemptStart, usage, ttftMs);
+          self.finalizeOk(ctx, servingProvider, servingAttemptStart, usage, ttftMs, finishProviderMetadata);
           controller.close();
         } catch (error) {
           self.emitError(error, servingProvider.label);
@@ -2041,6 +2059,7 @@ function createRunwareMediaAdapter(config) {
     // (a `getResponse` poll). Image generation stays on the synchronous `run()`.
     async submit(req) {
       const taskUUID = crypto.randomUUID();
+      const webhookUrl = typeof req.metadata?.["webhookUrl"] === "string" ? req.metadata["webhookUrl"] : void 0;
       await postTask({
         outputType: "URL",
         includeCost: true,
@@ -2048,7 +2067,8 @@ function createRunwareMediaAdapter(config) {
         taskType: "videoInference",
         taskUUID,
         model: req.externalId,
-        deliveryMethod: "async"
+        deliveryMethod: "async",
+        ...webhookUrl ? { webhookURL: webhookUrl } : {}
       });
       return { requestId: taskUUID };
     },
@@ -2116,7 +2136,9 @@ function createFalMediaAdapter(config) {
   };
   const queueBase = (externalId) => externalId.split("/").slice(0, 2).join("/");
   async function submit(req) {
-    const submitRes = await fetchImpl(`${baseUrl}/${req.externalId}`, {
+    const webhookUrl = typeof req.metadata?.["webhookUrl"] === "string" ? req.metadata["webhookUrl"] : void 0;
+    const submitUrl = webhookUrl ? `${baseUrl}/${req.externalId}?fal_webhook=${encodeURIComponent(webhookUrl)}` : `${baseUrl}/${req.externalId}`;
+    const submitRes = await fetchImpl(submitUrl, {
       method: "POST",
       headers,
       body: JSON.stringify(req.input)

package/dist/index.js CHANGED Viewed

@@ -287,6 +287,20 @@ function cacheSavingForUsage(cost, inputTokens, cacheReadTokens) {
   const cached = Math.min(Math.max(cacheReadTokens, 0), inputTokens);
   return cached / 1e6 * (cost.input - cost.cacheRead);
 }
+function reportedCost(providerMetadata, usage) {
+  const orUsage = providerMetadata?.openrouter?.usage;
+  if (orUsage) {
+    const upstream = orUsage.costDetails?.upstreamInferenceCost;
+    if (typeof upstream === "number" && upstream > 0) return upstream;
+    if (typeof orUsage.cost === "number") return orUsage.cost;
+  }
+  const raw = usage?.raw;
+  if (raw) {
+    const est = raw["estimated_cost"] ?? raw["cost"];
+    if (typeof est === "number") return est;
+  }
+  return void 0;
+}
 function requestIdFrom(options) {
   const raw = options.providerOptions?.lcr?.requestId;
   return typeof raw === "string" && raw.length > 0 ? raw : void 0;
@@ -485,12 +499,13 @@ var LcrFallbackModel = class {
     return baseline;
   }
   /** Winner settled: record the attempt, fire `onCost` (compat) + `onCall`. */
-  finalizeOk(ctx, provider, attemptStart, usage, ttftMs) {
+  finalizeOk(ctx, provider, attemptStart, usage, ttftMs, providerMetadata) {
     ctx.attempts.push({ provider: provider.label, ok: true, latencyMs: Date.now() - attemptStart });
     const inputTokens = usage?.inputTokens?.total ?? 0;
     const outputTokens = usage?.outputTokens?.total ?? 0;
     const cacheReadTokens = usage?.inputTokens?.cacheRead ?? 0;
-    const costUsd = provider.cost ? costForUsage(provider.cost, inputTokens, outputTokens, cacheReadTokens) : 0;
+    const estCostUsd = provider.cost ? costForUsage(provider.cost, inputTokens, outputTokens, cacheReadTokens) : void 0;
+    const costUsd = reportedCost(providerMetadata, usage) ?? estCostUsd ?? 0;
     const cachedSavingUsd = provider.cost ? cacheSavingForUsage(provider.cost, inputTokens, cacheReadTokens) : 0;
     const usageMissing = inputTokens === 0 && outputTokens === 0;
     const emptyCompletion = inputTokens > 0 && outputTokens === 0;
@@ -525,6 +540,7 @@ var LcrFallbackModel = class {
       outputTokens,
       ...cacheReadTokens > 0 ? { cachedInputTokens: cacheReadTokens } : {},
       costUsd,
+      ...estCostUsd !== void 0 ? { estCostUsd } : {},
       ...baselineUsd !== void 0 ? { baselineUsd, baselineKind: "last-leg" } : {},
       ...cachedSavingUsd > 0 ? { cachedSavingUsd } : {},
       ...ctx.requestId ? { requestId: ctx.requestId } : {},
@@ -581,7 +597,7 @@ var LcrFallbackModel = class {
         }
         this.recordProviderSuccess(idx);
         this.settleSticky(idx);
-        this.finalizeOk(ctx, provider, attemptStart, result.usage);
+        this.finalizeOk(ctx, provider, attemptStart, result.usage, void 0, result.providerMetadata);
         if (cache && cacheKey !== void 0 && ctx.settled?.cacheable) {
           this.storeCache(cacheKey, { kind: "generate", result, meta: ctx.settled.meta });
         }
@@ -713,6 +729,7 @@ var LcrFallbackModel = class {
     const servingIdx = idx;
     const servingPos = p;
     let usage;
+    let finishProviderMetadata;
     let contentStreamed = false;
     let ttftMs;
     const stream = new ReadableStream({
@@ -729,6 +746,7 @@ var LcrFallbackModel = class {
             if (done) break;
             if (value.type === "finish") {
               usage = value.usage;
+              finishProviderMetadata = value.providerMetadata;
               const out = value.usage?.outputTokens?.total ?? 0;
               const inp = value.usage?.inputTokens?.total ?? 0;
               if (inp > 0 && out === 0 && !contentStreamed && servingPos + 1 < n) {
@@ -743,7 +761,7 @@ var LcrFallbackModel = class {
           }
           self.recordProviderSuccess(servingIdx);
           self.settleSticky(servingIdx);
-          self.finalizeOk(ctx, servingProvider, servingAttemptStart, usage, ttftMs);
+          self.finalizeOk(ctx, servingProvider, servingAttemptStart, usage, ttftMs, finishProviderMetadata);
           controller.close();
         } catch (error) {
           self.emitError(error, servingProvider.label);
@@ -1987,6 +2005,7 @@ function createRunwareMediaAdapter(config) {
     // (a `getResponse` poll). Image generation stays on the synchronous `run()`.
     async submit(req) {
       const taskUUID = crypto.randomUUID();
+      const webhookUrl = typeof req.metadata?.["webhookUrl"] === "string" ? req.metadata["webhookUrl"] : void 0;
       await postTask({
         outputType: "URL",
         includeCost: true,
@@ -1994,7 +2013,8 @@ function createRunwareMediaAdapter(config) {
         taskType: "videoInference",
         taskUUID,
         model: req.externalId,
-        deliveryMethod: "async"
+        deliveryMethod: "async",
+        ...webhookUrl ? { webhookURL: webhookUrl } : {}
       });
       return { requestId: taskUUID };
     },
@@ -2062,7 +2082,9 @@ function createFalMediaAdapter(config) {
   };
   const queueBase = (externalId) => externalId.split("/").slice(0, 2).join("/");
   async function submit(req) {
-    const submitRes = await fetchImpl(`${baseUrl}/${req.externalId}`, {
+    const webhookUrl = typeof req.metadata?.["webhookUrl"] === "string" ? req.metadata["webhookUrl"] : void 0;
+    const submitUrl = webhookUrl ? `${baseUrl}/${req.externalId}?fal_webhook=${encodeURIComponent(webhookUrl)}` : `${baseUrl}/${req.externalId}`;
+    const submitRes = await fetchImpl(submitUrl, {
       method: "POST",
       headers,
       body: JSON.stringify(req.input)

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "ai-lcr",
-  "version": "0.6.5",
+  "version": "0.7.1",
   "description": "Least Cost Routing for LLMs — route every model call to the cheapest available provider, fall back automatically, and track real cost. Built for the Vercel AI SDK.",
   "keywords": [
     "ai",