ai-lcr 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -156,7 +156,7 @@ Any OpenAI-compatible endpoint works — and so does any AI SDK provider package
156
156
 
157
157
  - **Model vendors' own APIs (native):** route straight to [DeepSeek](https://platform.deepseek.com), [OpenAI](https://openai.com), [Anthropic](https://anthropic.com), [Google](https://ai.google.dev), [xAI](https://x.ai), etc. via their AI SDK provider packages — no markup, full native features. See [Route to a model vendor's own API](#route-to-a-model-vendors-own-api-native-providers).
158
158
  - **Text aggregators:** [OpenRouter](https://openrouter.ai) (widest coverage, list pricing) · [Kunavo](https://kunavo.com/?ref=victorimf) (**20% off** every model) · [TokenMart](https://thetokenmart.ai) (15–65% off, varies by model)
159
- - **Image / video:** [Kunavo](https://kunavo.com/?ref=victorimf) (**20% off**) · [TokenMart](https://thetokenmart.ai) · [fal.ai](https://fal.ai) · [Runware](https://runware.ai) — image routing available via `createMediaLCR` (Kunavo + Runware adapters); video on the roadmap
159
+ - **Image / video:** [Kunavo](https://kunavo.com/?ref=victorimf) (**20% off**) · [TokenMart](https://thetokenmart.ai) · [fal.ai](https://fal.ai) · [Runware](https://runware.ai) — routing via `createMediaLCR`. Image: Kunavo + Runware + fal. Video: fal (live, via its async queue API); Kunavo's Veo poll path is implemented but unverified
160
160
 
161
161
  ## Text model pricing
162
162
 
@@ -273,7 +273,8 @@ Two OpenAI-compatible providers, same probe, same day. Cells cover both families
273
273
  - [ ] Bundled price table for zero-config pricing (drop the manual `cost` numbers)
274
274
  - [ ] Provider-quirk middleware (transparently patch known per-provider request quirks, e.g. Kunavo's ignored `max_tokens`)
275
275
  - [ ] Feed probe results into routing automatically (auto-exclude a model from a provider that fails its probe)
276
- - [ ] Image & video model routing (fal.ai / Runware / Kunavo)
276
+ - [x] Image & video model routing (`createMediaLCR`) image via Kunavo + Runware + fal; **video live via fal** (async queue API)
277
+ - [ ] Normalized cross-provider video price comparison + verified Kunavo/Runware video adapters
277
278
 
278
279
  ## Affiliate disclosure
279
280
 
package/README.zh-CN.md CHANGED
@@ -114,7 +114,7 @@ const lcr = createLCR({
114
114
 
115
115
  - **模型厂商官方 API(原生):** 通过各自的 AI SDK provider 包直连 [DeepSeek](https://platform.deepseek.com)、[OpenAI](https://openai.com)、[Anthropic](https://anthropic.com)、[Google](https://ai.google.dev)、[xAI](https://x.ai) 等——无加价,原生特性齐全。见上方「直连模型厂商官方 API(原生 provider)」一节。
116
116
  - **文本聚合器:** [OpenRouter](https://openrouter.ai)(覆盖最广,列表定价)· [Kunavo](https://kunavo.com/?ref=victorimf)(**全模型 8 折**)· [TokenMart](https://thetokenmart.ai)(按模型 85 折–35 折不等)
117
- - **图像 / 视频:** [Kunavo](https://kunavo.com/?ref=victorimf)(**8 折**)· [TokenMart](https://thetokenmart.ai) · [fal.ai](https://fal.ai) · [Runware](https://runware.ai) —— 路由功能在路线图中
117
+ - **图像 / 视频:** [Kunavo](https://kunavo.com/?ref=victorimf)(**8 折**)· [TokenMart](https://thetokenmart.ai) · [fal.ai](https://fal.ai) · [Runware](https://runware.ai) —— 通过 `createMediaLCR` 路由。图像:Kunavo + Runware + fal。视频:fal(已可用,走其异步队列 API);Kunavo 的 Veo 轮询路径已实现但未验证
118
118
 
119
119
  ## 文本模型价格
120
120
 
@@ -229,7 +229,8 @@ API_KEY=$TOKENMART_API_KEY BASE=https://api.tokenmart.ai \
229
229
  - [ ] 内置价格表,实现零配置定价(省去手填 `cost` 数字)
230
230
  - [ ] provider 怪癖中间件(透明地修补已知怪癖,如 Kunavo 被忽略的 `max_tokens`)
231
231
  - [ ] 把 probe 结果自动接入路由(探测失败的 provider×model 自动从列表剔除)
232
- - [ ] 图像与视频模型路由(fal.ai / Runware / Kunavo
232
+ - [x] 图像与视频模型路由(`createMediaLCR`)—— 图像走 Kunavo + Runware + fal;**视频已可用,走 fal**(异步队列 API
233
+ - [ ] 归一化的跨 provider 视频价格对比 + 验证 Kunavo/Runware 视频适配器
233
234
 
234
235
  ## 联盟(Affiliate)披露
235
236
 
package/dist/index.cjs CHANGED
@@ -21,11 +21,12 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
21
21
  var index_exports = {};
22
22
  __export(index_exports, {
23
23
  DEFAULT_REFERENCE: () => DEFAULT_REFERENCE,
24
+ FalMediaError: () => FalMediaError,
24
25
  MEDIA_PRICING: () => MEDIA_PRICING,
25
26
  cheapestRoute: () => cheapestRoute,
26
27
  classifyError: () => classifyError,
27
28
  comparePrices: () => comparePrices,
28
- createHttpSink: () => createHttpSink,
29
+ createFalMediaAdapter: () => createFalMediaAdapter,
29
30
  createKunavoMediaAdapter: () => createKunavoMediaAdapter,
30
31
  createLCR: () => createLCR,
31
32
  createMediaLCR: () => createMediaLCR,
@@ -141,20 +142,12 @@ var LcrFallbackModel = class {
141
142
  errorClass: classifyError(error)
142
143
  });
143
144
  }
144
- /** Cost of one route for the given token counts; 0 if it has no price. */
145
- routeCost(p, inputTokens, outputTokens) {
146
- return p.cost ? inputTokens / 1e6 * p.cost.input + outputTokens / 1e6 * p.cost.output : 0;
147
- }
148
145
  /** Winner settled: record the attempt, fire `onCost` (compat) + `onCall`. */
149
146
  finalizeOk(ctx, provider, attemptStart, usage) {
150
147
  ctx.attempts.push({ provider: provider.label, ok: true, latencyMs: Date.now() - attemptStart });
151
148
  const inputTokens = usage?.inputTokens?.total ?? 0;
152
149
  const outputTokens = usage?.outputTokens?.total ?? 0;
153
- const costUsd = this.routeCost(provider, inputTokens, outputTokens);
154
- const baselineUsd = this.opts.providers.reduce(
155
- (max, p) => Math.max(max, this.routeCost(p, inputTokens, outputTokens)),
156
- costUsd
157
- );
150
+ const costUsd = provider.cost ? inputTokens / 1e6 * provider.cost.input + outputTokens / 1e6 * provider.cost.output : 0;
158
151
  this.opts.onCost?.({
159
152
  model: this.opts.modelName,
160
153
  provider: provider.label,
@@ -172,8 +165,7 @@ var LcrFallbackModel = class {
172
165
  latencyMs: Date.now() - ctx.startedAt,
173
166
  inputTokens,
174
167
  outputTokens,
175
- costUsd,
176
- baselineUsd
168
+ costUsd
177
169
  });
178
170
  }
179
171
  /** Every provider failed: fire `onCall` with no winner. */
@@ -188,8 +180,7 @@ var LcrFallbackModel = class {
188
180
  latencyMs: Date.now() - ctx.startedAt,
189
181
  inputTokens: 0,
190
182
  outputTokens: 0,
191
- costUsd: 0,
192
- baselineUsd: 0
183
+ costUsd: 0
193
184
  });
194
185
  }
195
186
  async doGenerate(options) {
@@ -345,40 +336,6 @@ function formatCallRecord(record, opts = {}) {
345
336
  return line;
346
337
  }
347
338
 
348
- // src/sink.ts
349
- function createHttpSink(options) {
350
- const {
351
- url,
352
- headers,
353
- project,
354
- dispatch = (task) => {
355
- void task();
356
- },
357
- fetchImpl,
358
- onError
359
- } = options;
360
- const doFetch = fetchImpl ?? globalThis.fetch;
361
- return (record) => {
362
- if (!doFetch) {
363
- onError?.(new Error("ai-lcr: no fetch available for createHttpSink"));
364
- return;
365
- }
366
- const payload = project ? { project, ...record } : record;
367
- dispatch(async () => {
368
- try {
369
- await doFetch(url, {
370
- method: "POST",
371
- headers: { "content-type": "application/json", ...headers },
372
- body: JSON.stringify(payload),
373
- keepalive: true
374
- });
375
- } catch (err) {
376
- onError?.(err);
377
- }
378
- });
379
- };
380
- }
381
-
382
339
  // src/media.ts
383
340
  var DEFAULT_REFERENCE = {
384
341
  image: { width: 1920, height: 1080 },
@@ -776,6 +733,108 @@ var RunwareMediaError = class extends Error {
776
733
  status;
777
734
  };
778
735
 
736
+ // src/adapters/fal-media.ts
737
+ var DEFAULT_BASE3 = "https://queue.fal.run";
738
+ function extractOutputs(raw) {
739
+ if (!raw || typeof raw !== "object") return [];
740
+ const data = raw;
741
+ const out = [];
742
+ const pushUrl = (url, type) => {
743
+ if (typeof url === "string" && url.length > 0) out.push({ url, type });
744
+ };
745
+ if (Array.isArray(data.images)) {
746
+ for (const img of data.images) pushUrl(img?.url, "image");
747
+ }
748
+ pushUrl(data.image?.url, "image");
749
+ if (Array.isArray(data.videos)) {
750
+ for (const v of data.videos) pushUrl(v?.url, "video");
751
+ }
752
+ pushUrl(data.video?.url, "video");
753
+ return out;
754
+ }
755
+ function createFalMediaAdapter(config) {
756
+ const {
757
+ apiKey,
758
+ baseUrl = DEFAULT_BASE3,
759
+ pollIntervalMs = 3e3,
760
+ pollTimeoutMs = 3e5,
761
+ fetchImpl = fetch
762
+ } = config;
763
+ const headers = {
764
+ "content-type": "application/json",
765
+ authorization: `Key ${apiKey}`
766
+ };
767
+ return {
768
+ provider: "fal",
769
+ async run(req) {
770
+ const submitRes = await fetchImpl(`${baseUrl}/${req.externalId}`, {
771
+ method: "POST",
772
+ headers,
773
+ body: JSON.stringify(req.input)
774
+ });
775
+ if (!submitRes.ok) {
776
+ throw new FalMediaError(submitRes.status, await safeText2(submitRes));
777
+ }
778
+ const submit = await submitRes.json();
779
+ const statusUrl = submit.status_url;
780
+ const responseUrl = submit.response_url;
781
+ if (!statusUrl || !responseUrl) {
782
+ throw new Error(
783
+ `ai-lcr: fal submit for "${req.externalId}" returned no status/response URL (keys: ${Object.keys(
784
+ submit
785
+ ).join(", ")})`
786
+ );
787
+ }
788
+ const deadline = Date.now() + pollTimeoutMs;
789
+ let completed = false;
790
+ while (Date.now() < deadline) {
791
+ const statusRes = await fetchImpl(statusUrl, { headers });
792
+ if (!statusRes.ok) {
793
+ throw new FalMediaError(statusRes.status, await safeText2(statusRes));
794
+ }
795
+ const status = String((await statusRes.json()).status ?? "");
796
+ if (status === "COMPLETED") {
797
+ completed = true;
798
+ break;
799
+ }
800
+ await sleep2(pollIntervalMs);
801
+ }
802
+ if (!completed) {
803
+ throw new Error(
804
+ `ai-lcr: fal job for "${req.externalId}" timed out after ${pollTimeoutMs}ms`
805
+ );
806
+ }
807
+ const resultRes = await fetchImpl(responseUrl, { headers });
808
+ if (!resultRes.ok) {
809
+ throw new FalMediaError(resultRes.status, await safeText2(resultRes));
810
+ }
811
+ const outputs = extractOutputs(await resultRes.json());
812
+ if (outputs.length === 0) {
813
+ throw new Error(`ai-lcr: fal returned no media URL for "${req.externalId}"`);
814
+ }
815
+ return { outputs, units: outputs.length };
816
+ }
817
+ };
818
+ }
819
+ var FalMediaError = class extends Error {
820
+ constructor(status, body) {
821
+ super(`fal media HTTP ${status}: ${body.slice(0, 300)}`);
822
+ this.status = status;
823
+ this.name = "FalMediaError";
824
+ }
825
+ status;
826
+ };
827
+ function sleep2(ms) {
828
+ return new Promise((r) => setTimeout(r, ms));
829
+ }
830
+ async function safeText2(res) {
831
+ try {
832
+ return await res.text();
833
+ } catch {
834
+ return "<no body>";
835
+ }
836
+ }
837
+
779
838
  // src/index.ts
780
839
  function isLanguageModel(entry) {
781
840
  return typeof entry.doGenerate === "function";
@@ -819,11 +878,12 @@ function createLCR(config) {
819
878
  // Annotate the CommonJS export names for ESM import in node:
820
879
  0 && (module.exports = {
821
880
  DEFAULT_REFERENCE,
881
+ FalMediaError,
822
882
  MEDIA_PRICING,
823
883
  cheapestRoute,
824
884
  classifyError,
825
885
  comparePrices,
826
- createHttpSink,
886
+ createFalMediaAdapter,
827
887
  createKunavoMediaAdapter,
828
888
  createLCR,
829
889
  createMediaLCR,
package/dist/index.d.cts CHANGED
@@ -65,13 +65,12 @@ interface CallRecord {
65
65
  /** Computed from the winner's `cost`; 0 if no price was given or the call failed. */
66
66
  costUsd: number;
67
67
  /**
68
- * What these same tokens would have cost at the **most expensive** configured
69
- * provider for this model the "if you never routed cheap" baseline. Savings
70
- * = `baselineUsd - costUsd`. Equals `costUsd` (savings 0) when prices are
71
- * missing or the priciest route is the one that served. Self-contained: no
72
- * external price table needed.
68
+ * What the priciest configured route would have cost for this request, so
69
+ * `baselineUsd - costUsd` is the saving from routing cheapest-first. Set by
70
+ * the media router (`createMediaLCR`), where every route has a known price;
71
+ * omitted by the text router, which can't price a baseline per call.
73
72
  */
74
- baselineUsd: number;
73
+ baselineUsd?: number;
75
74
  }
76
75
  /**
77
76
  * Normalize an error into a short, log-friendly class for {@link CallRecord}.
@@ -101,54 +100,6 @@ interface FormatOptions {
101
100
  }
102
101
  declare function formatCallRecord(record: CallRecord, opts?: FormatOptions): string;
103
102
 
104
- /**
105
- * Optional HTTP sink for `onCall` — ship each {@link CallRecord} as JSON to a
106
- * collector (e.g. a self-hosted ai-lcr-dashboard `/api/ingest`, or any endpoint
107
- * that accepts the CallRecord shape).
108
- *
109
- * Fully optional and dashboard-agnostic: omit it and ai-lcr stores nothing;
110
- * point `url` at whatever you run. Logging must never break your app, so a
111
- * failed POST is swallowed by default (surface it via `onError` if you want).
112
- *
113
- * import { createLCR, createHttpSink } from "ai-lcr";
114
- * import { after } from "next/server"; // serverless: don't block the response
115
- *
116
- * const lcr = createLCR({
117
- * models: { ... },
118
- * onCall: createHttpSink({
119
- * url: process.env.LCR_INGEST_URL + "/api/ingest",
120
- * headers: { authorization: `Bearer ${process.env.LCR_INGEST_KEY}` },
121
- * project: process.env.LCR_PROJECT,
122
- * dispatch: after, // run after the response is sent
123
- * }),
124
- * });
125
- */
126
-
127
- interface HttpSinkOptions {
128
- /** Where to POST each CallRecord (a collector that accepts the JSON shape). */
129
- url: string;
130
- /** Extra headers, e.g. `{ authorization: ` + "`Bearer ${key}`" + ` }`. */
131
- headers?: Record<string, string>;
132
- /** Optional tenant/project tag merged into each payload (`{ project, ...record }`). */
133
- project?: string;
134
- /**
135
- * Wrap the dispatch so it survives a serverless function returning. On
136
- * Next.js pass `after` from "next/server"; elsewhere pass a `waitUntil`-style
137
- * function. Defaults to running immediately — correct for long-lived servers,
138
- * but on serverless an un-awaited POST may be cut off, so pass `after`.
139
- */
140
- dispatch?: (task: () => void | Promise<void>) => void;
141
- /** Custom fetch (tests / runtimes without a global `fetch`). */
142
- fetchImpl?: typeof fetch;
143
- /** Called if the POST fails. Failures are swallowed by default. */
144
- onError?: (error: unknown) => void;
145
- }
146
- /**
147
- * Build an `onCall` handler that POSTs each {@link CallRecord} to `url`.
148
- * Returns a plain `(record) => void` — pass it straight to `createLCR`'s `onCall`.
149
- */
150
- declare function createHttpSink(options: HttpSinkOptions): (record: CallRecord) => void;
151
-
152
103
  /**
153
104
  * ai-lcr media routing — Least Cost Routing for image & video models.
154
105
  *
@@ -385,6 +336,53 @@ interface RunwareMediaConfig {
385
336
  }
386
337
  declare function createRunwareMediaAdapter(config: RunwareMediaConfig): MediaAdapter;
387
338
 
339
+ /**
340
+ * fal media adapter — image (queue) + video (queue, async poll).
341
+ *
342
+ * fal serves every model through one async queue API, so a single submit→poll→
343
+ * fetch-result path covers both image and video. That is the whole reason this
344
+ * adapter exists: it is ai-lcr's first VIDEO-capable execution path. (The
345
+ * Runware adapter is image-only; the Kunavo one's video poll loop is unverified.)
346
+ *
347
+ * Implementation note: ai-art's fal adapter uses the `@fal-ai/client` SDK, but
348
+ * ai-lcr deliberately keeps zero provider SDKs — every adapter is raw `fetch`
349
+ * with an injectable `fetchImpl` for testing (see runware-media, kunavo-media).
350
+ * So this re-implements the three queue calls against fal's REST endpoints:
351
+ *
352
+ * 1. submit POST https://queue.fal.run/{model} → { request_id, status_url, response_url }
353
+ * 2. status GET {status_url} → { status: IN_QUEUE | IN_PROGRESS | COMPLETED }
354
+ * 3. result GET {response_url} → { images:[…] } | { video:{url} } | …
355
+ *
356
+ * We follow the `status_url` / `response_url` returned by submit rather than
357
+ * rebuilding them, which sidesteps fal's sub-path quirk (a model like
358
+ * `fal-ai/flux/schnell` submits to the full path but its status/result live
359
+ * under the `fal-ai/flux` base).
360
+ *
361
+ * Auth: fal uses `Authorization: Key {FAL_KEY}` (NOT Bearer).
362
+ *
363
+ * Cost: fal's queue result does not carry a per-call price, so cost is left to
364
+ * the router's normalized estimate (costCents stays undefined; `units` is the
365
+ * output count — one image, or one clip).
366
+ */
367
+
368
+ interface FalMediaConfig {
369
+ apiKey: string;
370
+ /** Override for testing. Defaults to https://queue.fal.run. */
371
+ baseUrl?: string;
372
+ /** Video/job poll cadence (ms). Default 3000. */
373
+ pollIntervalMs?: number;
374
+ /** Max time to wait for a job before giving up (ms). Default 300000 (5m). */
375
+ pollTimeoutMs?: number;
376
+ /** Injected for testing; defaults to global fetch. */
377
+ fetchImpl?: typeof fetch;
378
+ }
379
+ declare function createFalMediaAdapter(config: FalMediaConfig): MediaAdapter;
380
+ /** Carries the HTTP status so the router's `isRetryableError` can classify it. */
381
+ declare class FalMediaError extends Error {
382
+ status: number;
383
+ constructor(status: number, body: string);
384
+ }
385
+
388
386
  /**
389
387
  * ai-lcr — Least Cost Routing for LLMs.
390
388
  *
@@ -438,4 +436,4 @@ type LCRRouter = (modelName: string) => LanguageModelV3;
438
436
  */
439
437
  declare function createLCR(config: LCRConfig): LCRRouter;
440
438
 
441
- export { type CallRecord, type CostEvent, DEFAULT_REFERENCE, type FormatOptions, type HttpSinkOptions, type LCRConfig, type LCRRouter, MEDIA_PRICING, type MediaAdapter, type MediaCostEvent, type MediaGenerateRequest, type MediaGenerateResult, type MediaLCRConfig, type MediaModality, type MediaModelDef, type MediaOutput, type MediaPricing, type MediaRegistry, type MediaRoute, type MediaRunResult, type MediaUnit, type PriceComparisonRow, type ProviderCost, type ProviderEntry, type RankedRoute, type ReferenceSpec, type RouteAttempt, cheapestRoute, classifyError, comparePrices, createHttpSink, createKunavoMediaAdapter, createLCR, createMediaLCR, createRunwareMediaAdapter, formatCallRecord, normalizedCents, rankRoutes, referenceMegapixels };
439
+ export { type CallRecord, type CostEvent, DEFAULT_REFERENCE, FalMediaError, type FormatOptions, type LCRConfig, type LCRRouter, MEDIA_PRICING, type MediaAdapter, type MediaCostEvent, type MediaGenerateRequest, type MediaGenerateResult, type MediaLCRConfig, type MediaModality, type MediaModelDef, type MediaOutput, type MediaPricing, type MediaRegistry, type MediaRoute, type MediaRunResult, type MediaUnit, type PriceComparisonRow, type ProviderCost, type ProviderEntry, type RankedRoute, type ReferenceSpec, type RouteAttempt, cheapestRoute, classifyError, comparePrices, createFalMediaAdapter, createKunavoMediaAdapter, createLCR, createMediaLCR, createRunwareMediaAdapter, formatCallRecord, normalizedCents, rankRoutes, referenceMegapixels };
package/dist/index.d.ts CHANGED
@@ -65,13 +65,12 @@ interface CallRecord {
65
65
  /** Computed from the winner's `cost`; 0 if no price was given or the call failed. */
66
66
  costUsd: number;
67
67
  /**
68
- * What these same tokens would have cost at the **most expensive** configured
69
- * provider for this model the "if you never routed cheap" baseline. Savings
70
- * = `baselineUsd - costUsd`. Equals `costUsd` (savings 0) when prices are
71
- * missing or the priciest route is the one that served. Self-contained: no
72
- * external price table needed.
68
+ * What the priciest configured route would have cost for this request, so
69
+ * `baselineUsd - costUsd` is the saving from routing cheapest-first. Set by
70
+ * the media router (`createMediaLCR`), where every route has a known price;
71
+ * omitted by the text router, which can't price a baseline per call.
73
72
  */
74
- baselineUsd: number;
73
+ baselineUsd?: number;
75
74
  }
76
75
  /**
77
76
  * Normalize an error into a short, log-friendly class for {@link CallRecord}.
@@ -101,54 +100,6 @@ interface FormatOptions {
101
100
  }
102
101
  declare function formatCallRecord(record: CallRecord, opts?: FormatOptions): string;
103
102
 
104
- /**
105
- * Optional HTTP sink for `onCall` — ship each {@link CallRecord} as JSON to a
106
- * collector (e.g. a self-hosted ai-lcr-dashboard `/api/ingest`, or any endpoint
107
- * that accepts the CallRecord shape).
108
- *
109
- * Fully optional and dashboard-agnostic: omit it and ai-lcr stores nothing;
110
- * point `url` at whatever you run. Logging must never break your app, so a
111
- * failed POST is swallowed by default (surface it via `onError` if you want).
112
- *
113
- * import { createLCR, createHttpSink } from "ai-lcr";
114
- * import { after } from "next/server"; // serverless: don't block the response
115
- *
116
- * const lcr = createLCR({
117
- * models: { ... },
118
- * onCall: createHttpSink({
119
- * url: process.env.LCR_INGEST_URL + "/api/ingest",
120
- * headers: { authorization: `Bearer ${process.env.LCR_INGEST_KEY}` },
121
- * project: process.env.LCR_PROJECT,
122
- * dispatch: after, // run after the response is sent
123
- * }),
124
- * });
125
- */
126
-
127
- interface HttpSinkOptions {
128
- /** Where to POST each CallRecord (a collector that accepts the JSON shape). */
129
- url: string;
130
- /** Extra headers, e.g. `{ authorization: ` + "`Bearer ${key}`" + ` }`. */
131
- headers?: Record<string, string>;
132
- /** Optional tenant/project tag merged into each payload (`{ project, ...record }`). */
133
- project?: string;
134
- /**
135
- * Wrap the dispatch so it survives a serverless function returning. On
136
- * Next.js pass `after` from "next/server"; elsewhere pass a `waitUntil`-style
137
- * function. Defaults to running immediately — correct for long-lived servers,
138
- * but on serverless an un-awaited POST may be cut off, so pass `after`.
139
- */
140
- dispatch?: (task: () => void | Promise<void>) => void;
141
- /** Custom fetch (tests / runtimes without a global `fetch`). */
142
- fetchImpl?: typeof fetch;
143
- /** Called if the POST fails. Failures are swallowed by default. */
144
- onError?: (error: unknown) => void;
145
- }
146
- /**
147
- * Build an `onCall` handler that POSTs each {@link CallRecord} to `url`.
148
- * Returns a plain `(record) => void` — pass it straight to `createLCR`'s `onCall`.
149
- */
150
- declare function createHttpSink(options: HttpSinkOptions): (record: CallRecord) => void;
151
-
152
103
  /**
153
104
  * ai-lcr media routing — Least Cost Routing for image & video models.
154
105
  *
@@ -385,6 +336,53 @@ interface RunwareMediaConfig {
385
336
  }
386
337
  declare function createRunwareMediaAdapter(config: RunwareMediaConfig): MediaAdapter;
387
338
 
339
+ /**
340
+ * fal media adapter — image (queue) + video (queue, async poll).
341
+ *
342
+ * fal serves every model through one async queue API, so a single submit→poll→
343
+ * fetch-result path covers both image and video. That is the whole reason this
344
+ * adapter exists: it is ai-lcr's first VIDEO-capable execution path. (The
345
+ * Runware adapter is image-only; the Kunavo one's video poll loop is unverified.)
346
+ *
347
+ * Implementation note: ai-art's fal adapter uses the `@fal-ai/client` SDK, but
348
+ * ai-lcr deliberately keeps zero provider SDKs — every adapter is raw `fetch`
349
+ * with an injectable `fetchImpl` for testing (see runware-media, kunavo-media).
350
+ * So this re-implements the three queue calls against fal's REST endpoints:
351
+ *
352
+ * 1. submit POST https://queue.fal.run/{model} → { request_id, status_url, response_url }
353
+ * 2. status GET {status_url} → { status: IN_QUEUE | IN_PROGRESS | COMPLETED }
354
+ * 3. result GET {response_url} → { images:[…] } | { video:{url} } | …
355
+ *
356
+ * We follow the `status_url` / `response_url` returned by submit rather than
357
+ * rebuilding them, which sidesteps fal's sub-path quirk (a model like
358
+ * `fal-ai/flux/schnell` submits to the full path but its status/result live
359
+ * under the `fal-ai/flux` base).
360
+ *
361
+ * Auth: fal uses `Authorization: Key {FAL_KEY}` (NOT Bearer).
362
+ *
363
+ * Cost: fal's queue result does not carry a per-call price, so cost is left to
364
+ * the router's normalized estimate (costCents stays undefined; `units` is the
365
+ * output count — one image, or one clip).
366
+ */
367
+
368
+ interface FalMediaConfig {
369
+ apiKey: string;
370
+ /** Override for testing. Defaults to https://queue.fal.run. */
371
+ baseUrl?: string;
372
+ /** Video/job poll cadence (ms). Default 3000. */
373
+ pollIntervalMs?: number;
374
+ /** Max time to wait for a job before giving up (ms). Default 300000 (5m). */
375
+ pollTimeoutMs?: number;
376
+ /** Injected for testing; defaults to global fetch. */
377
+ fetchImpl?: typeof fetch;
378
+ }
379
+ declare function createFalMediaAdapter(config: FalMediaConfig): MediaAdapter;
380
+ /** Carries the HTTP status so the router's `isRetryableError` can classify it. */
381
+ declare class FalMediaError extends Error {
382
+ status: number;
383
+ constructor(status: number, body: string);
384
+ }
385
+
388
386
  /**
389
387
  * ai-lcr — Least Cost Routing for LLMs.
390
388
  *
@@ -438,4 +436,4 @@ type LCRRouter = (modelName: string) => LanguageModelV3;
438
436
  */
439
437
  declare function createLCR(config: LCRConfig): LCRRouter;
440
438
 
441
- export { type CallRecord, type CostEvent, DEFAULT_REFERENCE, type FormatOptions, type HttpSinkOptions, type LCRConfig, type LCRRouter, MEDIA_PRICING, type MediaAdapter, type MediaCostEvent, type MediaGenerateRequest, type MediaGenerateResult, type MediaLCRConfig, type MediaModality, type MediaModelDef, type MediaOutput, type MediaPricing, type MediaRegistry, type MediaRoute, type MediaRunResult, type MediaUnit, type PriceComparisonRow, type ProviderCost, type ProviderEntry, type RankedRoute, type ReferenceSpec, type RouteAttempt, cheapestRoute, classifyError, comparePrices, createHttpSink, createKunavoMediaAdapter, createLCR, createMediaLCR, createRunwareMediaAdapter, formatCallRecord, normalizedCents, rankRoutes, referenceMegapixels };
439
+ export { type CallRecord, type CostEvent, DEFAULT_REFERENCE, FalMediaError, type FormatOptions, type LCRConfig, type LCRRouter, MEDIA_PRICING, type MediaAdapter, type MediaCostEvent, type MediaGenerateRequest, type MediaGenerateResult, type MediaLCRConfig, type MediaModality, type MediaModelDef, type MediaOutput, type MediaPricing, type MediaRegistry, type MediaRoute, type MediaRunResult, type MediaUnit, type PriceComparisonRow, type ProviderCost, type ProviderEntry, type RankedRoute, type ReferenceSpec, type RouteAttempt, cheapestRoute, classifyError, comparePrices, createFalMediaAdapter, createKunavoMediaAdapter, createLCR, createMediaLCR, createRunwareMediaAdapter, formatCallRecord, normalizedCents, rankRoutes, referenceMegapixels };
package/dist/index.js CHANGED
@@ -102,20 +102,12 @@ var LcrFallbackModel = class {
102
102
  errorClass: classifyError(error)
103
103
  });
104
104
  }
105
- /** Cost of one route for the given token counts; 0 if it has no price. */
106
- routeCost(p, inputTokens, outputTokens) {
107
- return p.cost ? inputTokens / 1e6 * p.cost.input + outputTokens / 1e6 * p.cost.output : 0;
108
- }
109
105
  /** Winner settled: record the attempt, fire `onCost` (compat) + `onCall`. */
110
106
  finalizeOk(ctx, provider, attemptStart, usage) {
111
107
  ctx.attempts.push({ provider: provider.label, ok: true, latencyMs: Date.now() - attemptStart });
112
108
  const inputTokens = usage?.inputTokens?.total ?? 0;
113
109
  const outputTokens = usage?.outputTokens?.total ?? 0;
114
- const costUsd = this.routeCost(provider, inputTokens, outputTokens);
115
- const baselineUsd = this.opts.providers.reduce(
116
- (max, p) => Math.max(max, this.routeCost(p, inputTokens, outputTokens)),
117
- costUsd
118
- );
110
+ const costUsd = provider.cost ? inputTokens / 1e6 * provider.cost.input + outputTokens / 1e6 * provider.cost.output : 0;
119
111
  this.opts.onCost?.({
120
112
  model: this.opts.modelName,
121
113
  provider: provider.label,
@@ -133,8 +125,7 @@ var LcrFallbackModel = class {
133
125
  latencyMs: Date.now() - ctx.startedAt,
134
126
  inputTokens,
135
127
  outputTokens,
136
- costUsd,
137
- baselineUsd
128
+ costUsd
138
129
  });
139
130
  }
140
131
  /** Every provider failed: fire `onCall` with no winner. */
@@ -149,8 +140,7 @@ var LcrFallbackModel = class {
149
140
  latencyMs: Date.now() - ctx.startedAt,
150
141
  inputTokens: 0,
151
142
  outputTokens: 0,
152
- costUsd: 0,
153
- baselineUsd: 0
143
+ costUsd: 0
154
144
  });
155
145
  }
156
146
  async doGenerate(options) {
@@ -306,40 +296,6 @@ function formatCallRecord(record, opts = {}) {
306
296
  return line;
307
297
  }
308
298
 
309
- // src/sink.ts
310
- function createHttpSink(options) {
311
- const {
312
- url,
313
- headers,
314
- project,
315
- dispatch = (task) => {
316
- void task();
317
- },
318
- fetchImpl,
319
- onError
320
- } = options;
321
- const doFetch = fetchImpl ?? globalThis.fetch;
322
- return (record) => {
323
- if (!doFetch) {
324
- onError?.(new Error("ai-lcr: no fetch available for createHttpSink"));
325
- return;
326
- }
327
- const payload = project ? { project, ...record } : record;
328
- dispatch(async () => {
329
- try {
330
- await doFetch(url, {
331
- method: "POST",
332
- headers: { "content-type": "application/json", ...headers },
333
- body: JSON.stringify(payload),
334
- keepalive: true
335
- });
336
- } catch (err) {
337
- onError?.(err);
338
- }
339
- });
340
- };
341
- }
342
-
343
299
  // src/media.ts
344
300
  var DEFAULT_REFERENCE = {
345
301
  image: { width: 1920, height: 1080 },
@@ -737,6 +693,108 @@ var RunwareMediaError = class extends Error {
737
693
  status;
738
694
  };
739
695
 
696
+ // src/adapters/fal-media.ts
697
+ var DEFAULT_BASE3 = "https://queue.fal.run";
698
+ function extractOutputs(raw) {
699
+ if (!raw || typeof raw !== "object") return [];
700
+ const data = raw;
701
+ const out = [];
702
+ const pushUrl = (url, type) => {
703
+ if (typeof url === "string" && url.length > 0) out.push({ url, type });
704
+ };
705
+ if (Array.isArray(data.images)) {
706
+ for (const img of data.images) pushUrl(img?.url, "image");
707
+ }
708
+ pushUrl(data.image?.url, "image");
709
+ if (Array.isArray(data.videos)) {
710
+ for (const v of data.videos) pushUrl(v?.url, "video");
711
+ }
712
+ pushUrl(data.video?.url, "video");
713
+ return out;
714
+ }
715
+ function createFalMediaAdapter(config) {
716
+ const {
717
+ apiKey,
718
+ baseUrl = DEFAULT_BASE3,
719
+ pollIntervalMs = 3e3,
720
+ pollTimeoutMs = 3e5,
721
+ fetchImpl = fetch
722
+ } = config;
723
+ const headers = {
724
+ "content-type": "application/json",
725
+ authorization: `Key ${apiKey}`
726
+ };
727
+ return {
728
+ provider: "fal",
729
+ async run(req) {
730
+ const submitRes = await fetchImpl(`${baseUrl}/${req.externalId}`, {
731
+ method: "POST",
732
+ headers,
733
+ body: JSON.stringify(req.input)
734
+ });
735
+ if (!submitRes.ok) {
736
+ throw new FalMediaError(submitRes.status, await safeText2(submitRes));
737
+ }
738
+ const submit = await submitRes.json();
739
+ const statusUrl = submit.status_url;
740
+ const responseUrl = submit.response_url;
741
+ if (!statusUrl || !responseUrl) {
742
+ throw new Error(
743
+ `ai-lcr: fal submit for "${req.externalId}" returned no status/response URL (keys: ${Object.keys(
744
+ submit
745
+ ).join(", ")})`
746
+ );
747
+ }
748
+ const deadline = Date.now() + pollTimeoutMs;
749
+ let completed = false;
750
+ while (Date.now() < deadline) {
751
+ const statusRes = await fetchImpl(statusUrl, { headers });
752
+ if (!statusRes.ok) {
753
+ throw new FalMediaError(statusRes.status, await safeText2(statusRes));
754
+ }
755
+ const status = String((await statusRes.json()).status ?? "");
756
+ if (status === "COMPLETED") {
757
+ completed = true;
758
+ break;
759
+ }
760
+ await sleep2(pollIntervalMs);
761
+ }
762
+ if (!completed) {
763
+ throw new Error(
764
+ `ai-lcr: fal job for "${req.externalId}" timed out after ${pollTimeoutMs}ms`
765
+ );
766
+ }
767
+ const resultRes = await fetchImpl(responseUrl, { headers });
768
+ if (!resultRes.ok) {
769
+ throw new FalMediaError(resultRes.status, await safeText2(resultRes));
770
+ }
771
+ const outputs = extractOutputs(await resultRes.json());
772
+ if (outputs.length === 0) {
773
+ throw new Error(`ai-lcr: fal returned no media URL for "${req.externalId}"`);
774
+ }
775
+ return { outputs, units: outputs.length };
776
+ }
777
+ };
778
+ }
779
+ var FalMediaError = class extends Error {
780
+ constructor(status, body) {
781
+ super(`fal media HTTP ${status}: ${body.slice(0, 300)}`);
782
+ this.status = status;
783
+ this.name = "FalMediaError";
784
+ }
785
+ status;
786
+ };
787
+ function sleep2(ms) {
788
+ return new Promise((r) => setTimeout(r, ms));
789
+ }
790
+ async function safeText2(res) {
791
+ try {
792
+ return await res.text();
793
+ } catch {
794
+ return "<no body>";
795
+ }
796
+ }
797
+
740
798
  // src/index.ts
741
799
  function isLanguageModel(entry) {
742
800
  return typeof entry.doGenerate === "function";
@@ -779,11 +837,12 @@ function createLCR(config) {
779
837
  }
780
838
  export {
781
839
  DEFAULT_REFERENCE,
840
+ FalMediaError,
782
841
  MEDIA_PRICING,
783
842
  cheapestRoute,
784
843
  classifyError,
785
844
  comparePrices,
786
- createHttpSink,
845
+ createFalMediaAdapter,
787
846
  createKunavoMediaAdapter,
788
847
  createLCR,
789
848
  createMediaLCR,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ai-lcr",
3
- "version": "0.2.1",
3
+ "version": "0.2.2",
4
4
  "description": "Least Cost Routing for LLMs — route every model call to the cheapest available provider, fall back automatically, and track real cost. Built for the Vercel AI SDK.",
5
5
  "keywords": [
6
6
  "ai",