ai-lcr 0.6.5 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -4,6 +4,59 @@ All notable changes to `ai-lcr` are documented here. The format follows
4
4
  [Keep a Changelog](https://keepachangelog.com/), and the project adheres to
5
5
  [Semantic Versioning](https://semver.org/).
6
6
 
7
+ ## [0.7.1] — 2026-06-20
8
+
9
+ Async media adapters now forward a caller-supplied webhook URL to the provider,
10
+ so async video jobs can complete by **push** instead of poll-only.
11
+
12
+ ### Added
13
+
14
+ - **`metadata.webhookUrl` is forwarded to the provider on async `submit`.**
15
+ `runware-media` adds it as the `videoInference` task's `webhookURL`; `fal-media`
16
+ appends it as the `?fal_webhook=` query param on the submit POST. The webhook is
17
+ a push path — the caller still polls as a fallback. Fixed task fields stay
18
+ un-clobberable (placed after the input spread). `metadata` was previously
19
+ accepted but dropped, so the documented "webhook hint" never reached the
20
+ provider; now it does.
21
+
22
+ ## [0.7.0] — 2026-06-20
23
+
24
+ The text router now records the **provider-reported actual cost** when a provider
25
+ returns one, instead of always estimating from the price table. The table becomes
26
+ the routing input and the drift baseline (`estCostUsd`); the recorded `costUsd` is
27
+ the real bill wherever the provider gives it.
28
+
29
+ ### Why
30
+
31
+ A static price table can only encode one price per model, but an aggregator
32
+ (OpenRouter) routes a single model across many sub-providers whose prices differ
33
+ several-fold, picking one per call — so `tokens × table` is structurally unable to
34
+ match the bill for multi-provider models (measured: `deepseek-v4-pro` reconciled at
35
+ ~57% of the real cost, while single-provider models like Gemini/Claude/GPT matched
36
+ at 100%). The provider's own number already accounts for which sub-provider served,
37
+ every token kind (cache read/write, reasoning), and fees — none of which a flat
38
+ table can track.
39
+
40
+ ### Added
41
+
42
+ - **`costUsd` prefers the provider-reported actual cost** (text path). Read from
43
+ OpenRouter's `providerMetadata.openrouter.usage` —
44
+ `costDetails.upstreamInferenceCost` (the real upstream / BYOK model spend) when
45
+ present, otherwise `cost` (the credit charge) — and from an OpenAI-compatible
46
+ provider's `estimated_cost` on the raw usage body. Requires the caller to enable
47
+ usage accounting on the provider (e.g. OpenRouter `usage: { include: true }`);
48
+ without it, behavior is unchanged.
49
+ - **`estCostUsd` is now set on text records** (previously media-only) — the
50
+ price-table prediction for the same usage. `costUsd − estCostUsd` is the
51
+ price-table drift signal, so a dashboard's drift panel now works for text too.
52
+
53
+ ### Changed
54
+
55
+ - When no provider cost is reported, `costUsd` still equals the price-table
56
+ estimate (and `estCostUsd` equals it, so no drift is flagged) — a pure fallback,
57
+ fully backward-compatible. The streaming path reads the reported cost from the
58
+ `finish` chunk's `providerMetadata`.
59
+
7
60
  ## [0.6.5] — 2026-06-16
8
61
 
9
62
  Bundled price table now covers the open-weights labs, not just the Western
package/dist/index.cjs CHANGED
@@ -341,6 +341,20 @@ function cacheSavingForUsage(cost, inputTokens, cacheReadTokens) {
341
341
  const cached = Math.min(Math.max(cacheReadTokens, 0), inputTokens);
342
342
  return cached / 1e6 * (cost.input - cost.cacheRead);
343
343
  }
344
+ function reportedCost(providerMetadata, usage) {
345
+ const orUsage = providerMetadata?.openrouter?.usage;
346
+ if (orUsage) {
347
+ const upstream = orUsage.costDetails?.upstreamInferenceCost;
348
+ if (typeof upstream === "number" && upstream > 0) return upstream;
349
+ if (typeof orUsage.cost === "number") return orUsage.cost;
350
+ }
351
+ const raw = usage?.raw;
352
+ if (raw) {
353
+ const est = raw["estimated_cost"] ?? raw["cost"];
354
+ if (typeof est === "number") return est;
355
+ }
356
+ return void 0;
357
+ }
344
358
  function requestIdFrom(options) {
345
359
  const raw = options.providerOptions?.lcr?.requestId;
346
360
  return typeof raw === "string" && raw.length > 0 ? raw : void 0;
@@ -539,12 +553,13 @@ var LcrFallbackModel = class {
539
553
  return baseline;
540
554
  }
541
555
  /** Winner settled: record the attempt, fire `onCost` (compat) + `onCall`. */
542
- finalizeOk(ctx, provider, attemptStart, usage, ttftMs) {
556
+ finalizeOk(ctx, provider, attemptStart, usage, ttftMs, providerMetadata) {
543
557
  ctx.attempts.push({ provider: provider.label, ok: true, latencyMs: Date.now() - attemptStart });
544
558
  const inputTokens = usage?.inputTokens?.total ?? 0;
545
559
  const outputTokens = usage?.outputTokens?.total ?? 0;
546
560
  const cacheReadTokens = usage?.inputTokens?.cacheRead ?? 0;
547
- const costUsd = provider.cost ? costForUsage(provider.cost, inputTokens, outputTokens, cacheReadTokens) : 0;
561
+ const estCostUsd = provider.cost ? costForUsage(provider.cost, inputTokens, outputTokens, cacheReadTokens) : void 0;
562
+ const costUsd = reportedCost(providerMetadata, usage) ?? estCostUsd ?? 0;
548
563
  const cachedSavingUsd = provider.cost ? cacheSavingForUsage(provider.cost, inputTokens, cacheReadTokens) : 0;
549
564
  const usageMissing = inputTokens === 0 && outputTokens === 0;
550
565
  const emptyCompletion = inputTokens > 0 && outputTokens === 0;
@@ -579,6 +594,7 @@ var LcrFallbackModel = class {
579
594
  outputTokens,
580
595
  ...cacheReadTokens > 0 ? { cachedInputTokens: cacheReadTokens } : {},
581
596
  costUsd,
597
+ ...estCostUsd !== void 0 ? { estCostUsd } : {},
582
598
  ...baselineUsd !== void 0 ? { baselineUsd, baselineKind: "last-leg" } : {},
583
599
  ...cachedSavingUsd > 0 ? { cachedSavingUsd } : {},
584
600
  ...ctx.requestId ? { requestId: ctx.requestId } : {},
@@ -635,7 +651,7 @@ var LcrFallbackModel = class {
635
651
  }
636
652
  this.recordProviderSuccess(idx);
637
653
  this.settleSticky(idx);
638
- this.finalizeOk(ctx, provider, attemptStart, result.usage);
654
+ this.finalizeOk(ctx, provider, attemptStart, result.usage, void 0, result.providerMetadata);
639
655
  if (cache && cacheKey !== void 0 && ctx.settled?.cacheable) {
640
656
  this.storeCache(cacheKey, { kind: "generate", result, meta: ctx.settled.meta });
641
657
  }
@@ -767,6 +783,7 @@ var LcrFallbackModel = class {
767
783
  const servingIdx = idx;
768
784
  const servingPos = p;
769
785
  let usage;
786
+ let finishProviderMetadata;
770
787
  let contentStreamed = false;
771
788
  let ttftMs;
772
789
  const stream = new ReadableStream({
@@ -783,6 +800,7 @@ var LcrFallbackModel = class {
783
800
  if (done) break;
784
801
  if (value.type === "finish") {
785
802
  usage = value.usage;
803
+ finishProviderMetadata = value.providerMetadata;
786
804
  const out = value.usage?.outputTokens?.total ?? 0;
787
805
  const inp = value.usage?.inputTokens?.total ?? 0;
788
806
  if (inp > 0 && out === 0 && !contentStreamed && servingPos + 1 < n) {
@@ -797,7 +815,7 @@ var LcrFallbackModel = class {
797
815
  }
798
816
  self.recordProviderSuccess(servingIdx);
799
817
  self.settleSticky(servingIdx);
800
- self.finalizeOk(ctx, servingProvider, servingAttemptStart, usage, ttftMs);
818
+ self.finalizeOk(ctx, servingProvider, servingAttemptStart, usage, ttftMs, finishProviderMetadata);
801
819
  controller.close();
802
820
  } catch (error) {
803
821
  self.emitError(error, servingProvider.label);
@@ -2041,6 +2059,7 @@ function createRunwareMediaAdapter(config) {
2041
2059
  // (a `getResponse` poll). Image generation stays on the synchronous `run()`.
2042
2060
  async submit(req) {
2043
2061
  const taskUUID = crypto.randomUUID();
2062
+ const webhookUrl = typeof req.metadata?.["webhookUrl"] === "string" ? req.metadata["webhookUrl"] : void 0;
2044
2063
  await postTask({
2045
2064
  outputType: "URL",
2046
2065
  includeCost: true,
@@ -2048,7 +2067,8 @@ function createRunwareMediaAdapter(config) {
2048
2067
  taskType: "videoInference",
2049
2068
  taskUUID,
2050
2069
  model: req.externalId,
2051
- deliveryMethod: "async"
2070
+ deliveryMethod: "async",
2071
+ ...webhookUrl ? { webhookURL: webhookUrl } : {}
2052
2072
  });
2053
2073
  return { requestId: taskUUID };
2054
2074
  },
@@ -2116,7 +2136,9 @@ function createFalMediaAdapter(config) {
2116
2136
  };
2117
2137
  const queueBase = (externalId) => externalId.split("/").slice(0, 2).join("/");
2118
2138
  async function submit(req) {
2119
- const submitRes = await fetchImpl(`${baseUrl}/${req.externalId}`, {
2139
+ const webhookUrl = typeof req.metadata?.["webhookUrl"] === "string" ? req.metadata["webhookUrl"] : void 0;
2140
+ const submitUrl = webhookUrl ? `${baseUrl}/${req.externalId}?fal_webhook=${encodeURIComponent(webhookUrl)}` : `${baseUrl}/${req.externalId}`;
2141
+ const submitRes = await fetchImpl(submitUrl, {
2120
2142
  method: "POST",
2121
2143
  headers,
2122
2144
  body: JSON.stringify(req.input)
package/dist/index.js CHANGED
@@ -287,6 +287,20 @@ function cacheSavingForUsage(cost, inputTokens, cacheReadTokens) {
287
287
  const cached = Math.min(Math.max(cacheReadTokens, 0), inputTokens);
288
288
  return cached / 1e6 * (cost.input - cost.cacheRead);
289
289
  }
290
+ function reportedCost(providerMetadata, usage) {
291
+ const orUsage = providerMetadata?.openrouter?.usage;
292
+ if (orUsage) {
293
+ const upstream = orUsage.costDetails?.upstreamInferenceCost;
294
+ if (typeof upstream === "number" && upstream > 0) return upstream;
295
+ if (typeof orUsage.cost === "number") return orUsage.cost;
296
+ }
297
+ const raw = usage?.raw;
298
+ if (raw) {
299
+ const est = raw["estimated_cost"] ?? raw["cost"];
300
+ if (typeof est === "number") return est;
301
+ }
302
+ return void 0;
303
+ }
290
304
  function requestIdFrom(options) {
291
305
  const raw = options.providerOptions?.lcr?.requestId;
292
306
  return typeof raw === "string" && raw.length > 0 ? raw : void 0;
@@ -485,12 +499,13 @@ var LcrFallbackModel = class {
485
499
  return baseline;
486
500
  }
487
501
  /** Winner settled: record the attempt, fire `onCost` (compat) + `onCall`. */
488
- finalizeOk(ctx, provider, attemptStart, usage, ttftMs) {
502
+ finalizeOk(ctx, provider, attemptStart, usage, ttftMs, providerMetadata) {
489
503
  ctx.attempts.push({ provider: provider.label, ok: true, latencyMs: Date.now() - attemptStart });
490
504
  const inputTokens = usage?.inputTokens?.total ?? 0;
491
505
  const outputTokens = usage?.outputTokens?.total ?? 0;
492
506
  const cacheReadTokens = usage?.inputTokens?.cacheRead ?? 0;
493
- const costUsd = provider.cost ? costForUsage(provider.cost, inputTokens, outputTokens, cacheReadTokens) : 0;
507
+ const estCostUsd = provider.cost ? costForUsage(provider.cost, inputTokens, outputTokens, cacheReadTokens) : void 0;
508
+ const costUsd = reportedCost(providerMetadata, usage) ?? estCostUsd ?? 0;
494
509
  const cachedSavingUsd = provider.cost ? cacheSavingForUsage(provider.cost, inputTokens, cacheReadTokens) : 0;
495
510
  const usageMissing = inputTokens === 0 && outputTokens === 0;
496
511
  const emptyCompletion = inputTokens > 0 && outputTokens === 0;
@@ -525,6 +540,7 @@ var LcrFallbackModel = class {
525
540
  outputTokens,
526
541
  ...cacheReadTokens > 0 ? { cachedInputTokens: cacheReadTokens } : {},
527
542
  costUsd,
543
+ ...estCostUsd !== void 0 ? { estCostUsd } : {},
528
544
  ...baselineUsd !== void 0 ? { baselineUsd, baselineKind: "last-leg" } : {},
529
545
  ...cachedSavingUsd > 0 ? { cachedSavingUsd } : {},
530
546
  ...ctx.requestId ? { requestId: ctx.requestId } : {},
@@ -581,7 +597,7 @@ var LcrFallbackModel = class {
581
597
  }
582
598
  this.recordProviderSuccess(idx);
583
599
  this.settleSticky(idx);
584
- this.finalizeOk(ctx, provider, attemptStart, result.usage);
600
+ this.finalizeOk(ctx, provider, attemptStart, result.usage, void 0, result.providerMetadata);
585
601
  if (cache && cacheKey !== void 0 && ctx.settled?.cacheable) {
586
602
  this.storeCache(cacheKey, { kind: "generate", result, meta: ctx.settled.meta });
587
603
  }
@@ -713,6 +729,7 @@ var LcrFallbackModel = class {
713
729
  const servingIdx = idx;
714
730
  const servingPos = p;
715
731
  let usage;
732
+ let finishProviderMetadata;
716
733
  let contentStreamed = false;
717
734
  let ttftMs;
718
735
  const stream = new ReadableStream({
@@ -729,6 +746,7 @@ var LcrFallbackModel = class {
729
746
  if (done) break;
730
747
  if (value.type === "finish") {
731
748
  usage = value.usage;
749
+ finishProviderMetadata = value.providerMetadata;
732
750
  const out = value.usage?.outputTokens?.total ?? 0;
733
751
  const inp = value.usage?.inputTokens?.total ?? 0;
734
752
  if (inp > 0 && out === 0 && !contentStreamed && servingPos + 1 < n) {
@@ -743,7 +761,7 @@ var LcrFallbackModel = class {
743
761
  }
744
762
  self.recordProviderSuccess(servingIdx);
745
763
  self.settleSticky(servingIdx);
746
- self.finalizeOk(ctx, servingProvider, servingAttemptStart, usage, ttftMs);
764
+ self.finalizeOk(ctx, servingProvider, servingAttemptStart, usage, ttftMs, finishProviderMetadata);
747
765
  controller.close();
748
766
  } catch (error) {
749
767
  self.emitError(error, servingProvider.label);
@@ -1987,6 +2005,7 @@ function createRunwareMediaAdapter(config) {
1987
2005
  // (a `getResponse` poll). Image generation stays on the synchronous `run()`.
1988
2006
  async submit(req) {
1989
2007
  const taskUUID = crypto.randomUUID();
2008
+ const webhookUrl = typeof req.metadata?.["webhookUrl"] === "string" ? req.metadata["webhookUrl"] : void 0;
1990
2009
  await postTask({
1991
2010
  outputType: "URL",
1992
2011
  includeCost: true,
@@ -1994,7 +2013,8 @@ function createRunwareMediaAdapter(config) {
1994
2013
  taskType: "videoInference",
1995
2014
  taskUUID,
1996
2015
  model: req.externalId,
1997
- deliveryMethod: "async"
2016
+ deliveryMethod: "async",
2017
+ ...webhookUrl ? { webhookURL: webhookUrl } : {}
1998
2018
  });
1999
2019
  return { requestId: taskUUID };
2000
2020
  },
@@ -2062,7 +2082,9 @@ function createFalMediaAdapter(config) {
2062
2082
  };
2063
2083
  const queueBase = (externalId) => externalId.split("/").slice(0, 2).join("/");
2064
2084
  async function submit(req) {
2065
- const submitRes = await fetchImpl(`${baseUrl}/${req.externalId}`, {
2085
+ const webhookUrl = typeof req.metadata?.["webhookUrl"] === "string" ? req.metadata["webhookUrl"] : void 0;
2086
+ const submitUrl = webhookUrl ? `${baseUrl}/${req.externalId}?fal_webhook=${encodeURIComponent(webhookUrl)}` : `${baseUrl}/${req.externalId}`;
2087
+ const submitRes = await fetchImpl(submitUrl, {
2066
2088
  method: "POST",
2067
2089
  headers,
2068
2090
  body: JSON.stringify(req.input)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ai-lcr",
3
- "version": "0.6.5",
3
+ "version": "0.7.1",
4
4
  "description": "Least Cost Routing for LLMs — route every model call to the cheapest available provider, fall back automatically, and track real cost. Built for the Vercel AI SDK.",
5
5
  "keywords": [
6
6
  "ai",