ai-lcr 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -135,25 +135,6 @@ const lcr = createLCR({
135
135
  onCall: (record) => console.log(JSON.stringify(record)),
136
136
  ```
137
137
 
138
- Or ship each record to an HTTP collector with the built-in `createHttpSink` (fire-and-forget, never throws, dashboard-agnostic):
139
-
140
- ```ts
141
- import { createLCR, createHttpSink } from "ai-lcr";
142
- import { after } from "next/server"; // serverless: don't block the response
143
-
144
- const lcr = createLCR({
145
- models: { /* … */ },
146
- onCall: createHttpSink({
147
- url: `${process.env.LCR_INGEST_URL}/api/ingest`,
148
- headers: { authorization: `Bearer ${process.env.LCR_INGEST_KEY}` },
149
- project: process.env.LCR_PROJECT, // optional tag if one collector serves several apps
150
- dispatch: after, // run after the response is sent (serverless-safe)
151
- }),
152
- });
153
- ```
154
-
155
- Point `url` at anything that accepts the `CallRecord` JSON — including the self-hostable companion dashboard, **[ai-lcr-dashboard](https://github.com/victorzhrn/ai-lcr-dashboard)** (Spend / Calls / Failover rate + a live failover feed). You run your own instance, so the data never leaves your infrastructure; a [db9](https://db9.ai) database can be provisioned in seconds if you don't want to stand one up yourself.
156
-
157
138
  ```ts
158
139
  interface CallRecord {
159
140
  id: string; // correlation id, one per request
@@ -165,8 +146,7 @@ interface CallRecord {
165
146
  latencyMs: number;
166
147
  inputTokens: number;
167
148
  outputTokens: number;
168
- costUsd: number; // what the winner charged for these tokens
169
- baselineUsd: number; // what the priciest configured route would cost → savings = baselineUsd - costUsd
149
+ costUsd: number;
170
150
  }
171
151
  ```
172
152
 
package/dist/index.cjs CHANGED
@@ -425,30 +425,75 @@ function comparePrices(registry, ref = DEFAULT_REFERENCE) {
425
425
  };
426
426
  });
427
427
  }
428
+ function newMediaCallId() {
429
+ const c = globalThis.crypto;
430
+ return c?.randomUUID ? c.randomUUID() : `lcr_${Date.now().toString(36)}`;
431
+ }
428
432
  function createMediaLCR(config) {
429
- const { registry, adapters, reference = DEFAULT_REFERENCE, onError, onCost } = config;
433
+ const { registry, adapters, reference = DEFAULT_REFERENCE, onError, onCost, onCall } = config;
430
434
  return async function generate(modelId, input) {
431
435
  const def = registry[modelId];
432
436
  if (!def) {
433
437
  throw new Error(`ai-lcr: unknown media model "${modelId}" \u2014 add it to the registry`);
434
438
  }
435
439
  const ranked = rankRoutes(def, reference);
440
+ const baselineUsd = ranked.length > 0 ? Math.max(...ranked.map((r) => r.refCents)) / 100 : 0;
441
+ const startedAt = Date.now();
442
+ const attempts = [];
436
443
  let lastErr;
444
+ const emitFail = () => onCall?.({
445
+ id: newMediaCallId(),
446
+ model: modelId,
447
+ attempts,
448
+ winner: void 0,
449
+ ok: false,
450
+ failedOver: attempts.length > 1,
451
+ latencyMs: Date.now() - startedAt,
452
+ inputTokens: 0,
453
+ outputTokens: 0,
454
+ costUsd: 0,
455
+ baselineUsd
456
+ });
437
457
  for (const route of ranked) {
438
458
  const adapter = adapters[route.provider];
439
459
  if (!adapter) continue;
460
+ const attemptStart = Date.now();
440
461
  try {
441
462
  const result = await adapter.run({ externalId: route.externalId, input });
442
463
  const estimated = result.costCents === void 0;
443
464
  const costCents = estimated ? route.refCents * (result.units ?? 1) : result.costCents;
465
+ attempts.push({ provider: route.provider, ok: true, latencyMs: Date.now() - attemptStart });
444
466
  onCost?.({ modelId, provider: route.provider, costCents, estimated });
467
+ onCall?.({
468
+ id: newMediaCallId(),
469
+ model: modelId,
470
+ attempts,
471
+ winner: route.provider,
472
+ ok: true,
473
+ failedOver: attempts.length > 1,
474
+ latencyMs: Date.now() - startedAt,
475
+ inputTokens: 0,
476
+ outputTokens: 0,
477
+ costUsd: costCents / 100,
478
+ baselineUsd
479
+ });
445
480
  return { outputs: result.outputs, provider: route.provider, costCents, estimated };
446
481
  } catch (err) {
447
482
  lastErr = err;
483
+ attempts.push({
484
+ provider: route.provider,
485
+ ok: false,
486
+ latencyMs: Date.now() - attemptStart,
487
+ errorClass: classifyError(err)
488
+ });
448
489
  onError?.(err, route.provider);
449
- if (!isRetryableError(err)) throw err;
490
+ if (!isRetryableError(err)) {
491
+ emitFail();
492
+ throw err;
493
+ }
450
494
  }
451
495
  }
496
+ emitFail();
452
497
  throw lastErr instanceof Error ? lastErr : new Error(`ai-lcr: no provider could serve media model "${modelId}"`);
453
498
  };
454
499
  }
package/dist/index.d.cts CHANGED
@@ -149,6 +149,22 @@ interface HttpSinkOptions {
149
149
  */
150
150
  declare function createHttpSink(options: HttpSinkOptions): (record: CallRecord) => void;
151
151
 
152
+ /**
153
+ * ai-lcr media routing — Least Cost Routing for image & video models.
154
+ *
155
+ * The text router (./index, ./fallback) is built on the AI SDK's
156
+ * `LanguageModelV3` and only handles token-billed chat/completion. Image and
157
+ * video providers are a different world: outputs are files (URLs), pricing
158
+ * comes in incompatible units (per-image, per-second, per-call, per-megapixel),
159
+ * and video is a long-running async job. This module is the parallel, self-
160
+ * contained media side — no `LanguageModelV3` dependency.
161
+ *
162
+ * The core idea is the SAME as the text LCR: keep a list of providers per
163
+ * model, route to the cheapest healthy one, fall back on failure, report real
164
+ * cost. The only new problem is making prices comparable, which we solve by
165
+ * normalizing every provider's price to ONE reference output (see ReferenceSpec).
166
+ */
167
+
152
168
  type MediaModality = "image" | "video";
153
169
  /**
154
170
  * Pricing unit a provider bills in. `cents` on MediaPricing is the price for
@@ -268,6 +284,13 @@ interface MediaLCRConfig {
268
284
  reference?: ReferenceSpec;
269
285
  onError?: (error: Error, provider: string) => void;
270
286
  onCost?: (event: MediaCostEvent) => void;
287
+ /**
288
+ * One correlated {@link CallRecord} per settled request — the full failover
289
+ * chain, winner, latency, and cost — mirroring the text side's `onCall`, so
290
+ * the same dashboard sink works for image/video. Fire-and-forget; never
291
+ * throws. Media records carry no token counts (inputTokens/outputTokens = 0).
292
+ */
293
+ onCall?: (record: CallRecord) => void;
271
294
  }
272
295
  interface MediaRunResult {
273
296
  outputs: MediaOutput[];
@@ -275,11 +298,6 @@ interface MediaRunResult {
275
298
  costCents: number;
276
299
  estimated: boolean;
277
300
  }
278
- /**
279
- * Build a media Least Cost Router. Returns `generate(modelId, input)` which
280
- * tries providers cheapest-first and falls through on a retryable error —
281
- * exactly the text LCR's contract, for image/video.
282
- */
283
301
  declare function createMediaLCR(config: MediaLCRConfig): (modelId: string, input: Record<string, unknown>) => Promise<MediaRunResult>;
284
302
 
285
303
  /**
package/dist/index.d.ts CHANGED
@@ -149,6 +149,22 @@ interface HttpSinkOptions {
149
149
  */
150
150
  declare function createHttpSink(options: HttpSinkOptions): (record: CallRecord) => void;
151
151
 
152
+ /**
153
+ * ai-lcr media routing — Least Cost Routing for image & video models.
154
+ *
155
+ * The text router (./index, ./fallback) is built on the AI SDK's
156
+ * `LanguageModelV3` and only handles token-billed chat/completion. Image and
157
+ * video providers are a different world: outputs are files (URLs), pricing
158
+ * comes in incompatible units (per-image, per-second, per-call, per-megapixel),
159
+ * and video is a long-running async job. This module is the parallel, self-
160
+ * contained media side — no `LanguageModelV3` dependency.
161
+ *
162
+ * The core idea is the SAME as the text LCR: keep a list of providers per
163
+ * model, route to the cheapest healthy one, fall back on failure, report real
164
+ * cost. The only new problem is making prices comparable, which we solve by
165
+ * normalizing every provider's price to ONE reference output (see ReferenceSpec).
166
+ */
167
+
152
168
  type MediaModality = "image" | "video";
153
169
  /**
154
170
  * Pricing unit a provider bills in. `cents` on MediaPricing is the price for
@@ -268,6 +284,13 @@ interface MediaLCRConfig {
268
284
  reference?: ReferenceSpec;
269
285
  onError?: (error: Error, provider: string) => void;
270
286
  onCost?: (event: MediaCostEvent) => void;
287
+ /**
288
+ * One correlated {@link CallRecord} per settled request — the full failover
289
+ * chain, winner, latency, and cost — mirroring the text side's `onCall`, so
290
+ * the same dashboard sink works for image/video. Fire-and-forget; never
291
+ * throws. Media records carry no token counts (inputTokens/outputTokens = 0).
292
+ */
293
+ onCall?: (record: CallRecord) => void;
271
294
  }
272
295
  interface MediaRunResult {
273
296
  outputs: MediaOutput[];
@@ -275,11 +298,6 @@ interface MediaRunResult {
275
298
  costCents: number;
276
299
  estimated: boolean;
277
300
  }
278
- /**
279
- * Build a media Least Cost Router. Returns `generate(modelId, input)` which
280
- * tries providers cheapest-first and falls through on a retryable error —
281
- * exactly the text LCR's contract, for image/video.
282
- */
283
301
  declare function createMediaLCR(config: MediaLCRConfig): (modelId: string, input: Record<string, unknown>) => Promise<MediaRunResult>;
284
302
 
285
303
  /**
package/dist/index.js CHANGED
@@ -386,30 +386,75 @@ function comparePrices(registry, ref = DEFAULT_REFERENCE) {
386
386
  };
387
387
  });
388
388
  }
389
+ function newMediaCallId() {
390
+ const c = globalThis.crypto;
391
+ return c?.randomUUID ? c.randomUUID() : `lcr_${Date.now().toString(36)}`;
392
+ }
389
393
  function createMediaLCR(config) {
390
- const { registry, adapters, reference = DEFAULT_REFERENCE, onError, onCost } = config;
394
+ const { registry, adapters, reference = DEFAULT_REFERENCE, onError, onCost, onCall } = config;
391
395
  return async function generate(modelId, input) {
392
396
  const def = registry[modelId];
393
397
  if (!def) {
394
398
  throw new Error(`ai-lcr: unknown media model "${modelId}" \u2014 add it to the registry`);
395
399
  }
396
400
  const ranked = rankRoutes(def, reference);
401
+ const baselineUsd = ranked.length > 0 ? Math.max(...ranked.map((r) => r.refCents)) / 100 : 0;
402
+ const startedAt = Date.now();
403
+ const attempts = [];
397
404
  let lastErr;
405
+ const emitFail = () => onCall?.({
406
+ id: newMediaCallId(),
407
+ model: modelId,
408
+ attempts,
409
+ winner: void 0,
410
+ ok: false,
411
+ failedOver: attempts.length > 1,
412
+ latencyMs: Date.now() - startedAt,
413
+ inputTokens: 0,
414
+ outputTokens: 0,
415
+ costUsd: 0,
416
+ baselineUsd
417
+ });
398
418
  for (const route of ranked) {
399
419
  const adapter = adapters[route.provider];
400
420
  if (!adapter) continue;
421
+ const attemptStart = Date.now();
401
422
  try {
402
423
  const result = await adapter.run({ externalId: route.externalId, input });
403
424
  const estimated = result.costCents === void 0;
404
425
  const costCents = estimated ? route.refCents * (result.units ?? 1) : result.costCents;
426
+ attempts.push({ provider: route.provider, ok: true, latencyMs: Date.now() - attemptStart });
405
427
  onCost?.({ modelId, provider: route.provider, costCents, estimated });
428
+ onCall?.({
429
+ id: newMediaCallId(),
430
+ model: modelId,
431
+ attempts,
432
+ winner: route.provider,
433
+ ok: true,
434
+ failedOver: attempts.length > 1,
435
+ latencyMs: Date.now() - startedAt,
436
+ inputTokens: 0,
437
+ outputTokens: 0,
438
+ costUsd: costCents / 100,
439
+ baselineUsd
440
+ });
406
441
  return { outputs: result.outputs, provider: route.provider, costCents, estimated };
407
442
  } catch (err) {
408
443
  lastErr = err;
444
+ attempts.push({
445
+ provider: route.provider,
446
+ ok: false,
447
+ latencyMs: Date.now() - attemptStart,
448
+ errorClass: classifyError(err)
449
+ });
409
450
  onError?.(err, route.provider);
410
- if (!isRetryableError(err)) throw err;
451
+ if (!isRetryableError(err)) {
452
+ emitFail();
453
+ throw err;
454
+ }
411
455
  }
412
456
  }
457
+ emitFail();
413
458
  throw lastErr instanceof Error ? lastErr : new Error(`ai-lcr: no provider could serve media model "${modelId}"`);
414
459
  };
415
460
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ai-lcr",
3
- "version": "0.2.0",
3
+ "version": "0.2.1",
4
4
  "description": "Least Cost Routing for LLMs — route every model call to the cheapest available provider, fall back automatically, and track real cost. Built for the Vercel AI SDK.",
5
5
  "keywords": [
6
6
  "ai",