ai-lcr 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -21
- package/dist/index.cjs +47 -2
- package/dist/index.d.cts +23 -5
- package/dist/index.d.ts +23 -5
- package/dist/index.js +47 -2
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -135,25 +135,6 @@ const lcr = createLCR({
|
|
|
135
135
|
onCall: (record) => console.log(JSON.stringify(record)),
|
|
136
136
|
```
|
|
137
137
|
|
|
138
|
-
Or ship each record to an HTTP collector with the built-in `createHttpSink` (fire-and-forget, never throws, dashboard-agnostic):
|
|
139
|
-
|
|
140
|
-
```ts
|
|
141
|
-
import { createLCR, createHttpSink } from "ai-lcr";
|
|
142
|
-
import { after } from "next/server"; // serverless: don't block the response
|
|
143
|
-
|
|
144
|
-
const lcr = createLCR({
|
|
145
|
-
models: { /* … */ },
|
|
146
|
-
onCall: createHttpSink({
|
|
147
|
-
url: `${process.env.LCR_INGEST_URL}/api/ingest`,
|
|
148
|
-
headers: { authorization: `Bearer ${process.env.LCR_INGEST_KEY}` },
|
|
149
|
-
project: process.env.LCR_PROJECT, // optional tag if one collector serves several apps
|
|
150
|
-
dispatch: after, // run after the response is sent (serverless-safe)
|
|
151
|
-
}),
|
|
152
|
-
});
|
|
153
|
-
```
|
|
154
|
-
|
|
155
|
-
Point `url` at anything that accepts the `CallRecord` JSON — including the self-hostable companion dashboard, **[ai-lcr-dashboard](https://github.com/victorzhrn/ai-lcr-dashboard)** (Spend / Calls / Failover rate + a live failover feed). You run your own instance, so the data never leaves your infrastructure; a [db9](https://db9.ai) database can be provisioned in seconds if you don't want to stand one up yourself.
|
|
156
|
-
|
|
157
138
|
```ts
|
|
158
139
|
interface CallRecord {
|
|
159
140
|
id: string; // correlation id, one per request
|
|
@@ -165,8 +146,7 @@ interface CallRecord {
|
|
|
165
146
|
latencyMs: number;
|
|
166
147
|
inputTokens: number;
|
|
167
148
|
outputTokens: number;
|
|
168
|
-
costUsd: number;
|
|
169
|
-
baselineUsd: number; // what the priciest configured route would cost → savings = baselineUsd - costUsd
|
|
149
|
+
costUsd: number;
|
|
170
150
|
}
|
|
171
151
|
```
|
|
172
152
|
|
package/dist/index.cjs
CHANGED
|
@@ -425,30 +425,75 @@ function comparePrices(registry, ref = DEFAULT_REFERENCE) {
|
|
|
425
425
|
};
|
|
426
426
|
});
|
|
427
427
|
}
|
|
428
|
+
function newMediaCallId() {
|
|
429
|
+
const c = globalThis.crypto;
|
|
430
|
+
return c?.randomUUID ? c.randomUUID() : `lcr_${Date.now().toString(36)}`;
|
|
431
|
+
}
|
|
428
432
|
function createMediaLCR(config) {
|
|
429
|
-
const { registry, adapters, reference = DEFAULT_REFERENCE, onError, onCost } = config;
|
|
433
|
+
const { registry, adapters, reference = DEFAULT_REFERENCE, onError, onCost, onCall } = config;
|
|
430
434
|
return async function generate(modelId, input) {
|
|
431
435
|
const def = registry[modelId];
|
|
432
436
|
if (!def) {
|
|
433
437
|
throw new Error(`ai-lcr: unknown media model "${modelId}" \u2014 add it to the registry`);
|
|
434
438
|
}
|
|
435
439
|
const ranked = rankRoutes(def, reference);
|
|
440
|
+
const baselineUsd = ranked.length > 0 ? Math.max(...ranked.map((r) => r.refCents)) / 100 : 0;
|
|
441
|
+
const startedAt = Date.now();
|
|
442
|
+
const attempts = [];
|
|
436
443
|
let lastErr;
|
|
444
|
+
const emitFail = () => onCall?.({
|
|
445
|
+
id: newMediaCallId(),
|
|
446
|
+
model: modelId,
|
|
447
|
+
attempts,
|
|
448
|
+
winner: void 0,
|
|
449
|
+
ok: false,
|
|
450
|
+
failedOver: attempts.length > 1,
|
|
451
|
+
latencyMs: Date.now() - startedAt,
|
|
452
|
+
inputTokens: 0,
|
|
453
|
+
outputTokens: 0,
|
|
454
|
+
costUsd: 0,
|
|
455
|
+
baselineUsd
|
|
456
|
+
});
|
|
437
457
|
for (const route of ranked) {
|
|
438
458
|
const adapter = adapters[route.provider];
|
|
439
459
|
if (!adapter) continue;
|
|
460
|
+
const attemptStart = Date.now();
|
|
440
461
|
try {
|
|
441
462
|
const result = await adapter.run({ externalId: route.externalId, input });
|
|
442
463
|
const estimated = result.costCents === void 0;
|
|
443
464
|
const costCents = estimated ? route.refCents * (result.units ?? 1) : result.costCents;
|
|
465
|
+
attempts.push({ provider: route.provider, ok: true, latencyMs: Date.now() - attemptStart });
|
|
444
466
|
onCost?.({ modelId, provider: route.provider, costCents, estimated });
|
|
467
|
+
onCall?.({
|
|
468
|
+
id: newMediaCallId(),
|
|
469
|
+
model: modelId,
|
|
470
|
+
attempts,
|
|
471
|
+
winner: route.provider,
|
|
472
|
+
ok: true,
|
|
473
|
+
failedOver: attempts.length > 1,
|
|
474
|
+
latencyMs: Date.now() - startedAt,
|
|
475
|
+
inputTokens: 0,
|
|
476
|
+
outputTokens: 0,
|
|
477
|
+
costUsd: costCents / 100,
|
|
478
|
+
baselineUsd
|
|
479
|
+
});
|
|
445
480
|
return { outputs: result.outputs, provider: route.provider, costCents, estimated };
|
|
446
481
|
} catch (err) {
|
|
447
482
|
lastErr = err;
|
|
483
|
+
attempts.push({
|
|
484
|
+
provider: route.provider,
|
|
485
|
+
ok: false,
|
|
486
|
+
latencyMs: Date.now() - attemptStart,
|
|
487
|
+
errorClass: classifyError(err)
|
|
488
|
+
});
|
|
448
489
|
onError?.(err, route.provider);
|
|
449
|
-
if (!isRetryableError(err))
|
|
490
|
+
if (!isRetryableError(err)) {
|
|
491
|
+
emitFail();
|
|
492
|
+
throw err;
|
|
493
|
+
}
|
|
450
494
|
}
|
|
451
495
|
}
|
|
496
|
+
emitFail();
|
|
452
497
|
throw lastErr instanceof Error ? lastErr : new Error(`ai-lcr: no provider could serve media model "${modelId}"`);
|
|
453
498
|
};
|
|
454
499
|
}
|
package/dist/index.d.cts
CHANGED
|
@@ -149,6 +149,22 @@ interface HttpSinkOptions {
|
|
|
149
149
|
*/
|
|
150
150
|
declare function createHttpSink(options: HttpSinkOptions): (record: CallRecord) => void;
|
|
151
151
|
|
|
152
|
+
/**
|
|
153
|
+
* ai-lcr media routing — Least Cost Routing for image & video models.
|
|
154
|
+
*
|
|
155
|
+
* The text router (./index, ./fallback) is built on the AI SDK's
|
|
156
|
+
* `LanguageModelV3` and only handles token-billed chat/completion. Image and
|
|
157
|
+
* video providers are a different world: outputs are files (URLs), pricing
|
|
158
|
+
* comes in incompatible units (per-image, per-second, per-call, per-megapixel),
|
|
159
|
+
* and video is a long-running async job. This module is the parallel, self-
|
|
160
|
+
* contained media side — no `LanguageModelV3` dependency.
|
|
161
|
+
*
|
|
162
|
+
* The core idea is the SAME as the text LCR: keep a list of providers per
|
|
163
|
+
* model, route to the cheapest healthy one, fall back on failure, report real
|
|
164
|
+
* cost. The only new problem is making prices comparable, which we solve by
|
|
165
|
+
* normalizing every provider's price to ONE reference output (see ReferenceSpec).
|
|
166
|
+
*/
|
|
167
|
+
|
|
152
168
|
type MediaModality = "image" | "video";
|
|
153
169
|
/**
|
|
154
170
|
* Pricing unit a provider bills in. `cents` on MediaPricing is the price for
|
|
@@ -268,6 +284,13 @@ interface MediaLCRConfig {
|
|
|
268
284
|
reference?: ReferenceSpec;
|
|
269
285
|
onError?: (error: Error, provider: string) => void;
|
|
270
286
|
onCost?: (event: MediaCostEvent) => void;
|
|
287
|
+
/**
|
|
288
|
+
* One correlated {@link CallRecord} per settled request — the full failover
|
|
289
|
+
* chain, winner, latency, and cost — mirroring the text side's `onCall`, so
|
|
290
|
+
* the same dashboard sink works for image/video. Fire-and-forget; never
|
|
291
|
+
* throws. Media records carry no token counts (inputTokens/outputTokens = 0).
|
|
292
|
+
*/
|
|
293
|
+
onCall?: (record: CallRecord) => void;
|
|
271
294
|
}
|
|
272
295
|
interface MediaRunResult {
|
|
273
296
|
outputs: MediaOutput[];
|
|
@@ -275,11 +298,6 @@ interface MediaRunResult {
|
|
|
275
298
|
costCents: number;
|
|
276
299
|
estimated: boolean;
|
|
277
300
|
}
|
|
278
|
-
/**
|
|
279
|
-
* Build a media Least Cost Router. Returns `generate(modelId, input)` which
|
|
280
|
-
* tries providers cheapest-first and falls through on a retryable error —
|
|
281
|
-
* exactly the text LCR's contract, for image/video.
|
|
282
|
-
*/
|
|
283
301
|
declare function createMediaLCR(config: MediaLCRConfig): (modelId: string, input: Record<string, unknown>) => Promise<MediaRunResult>;
|
|
284
302
|
|
|
285
303
|
/**
|
package/dist/index.d.ts
CHANGED
|
@@ -149,6 +149,22 @@ interface HttpSinkOptions {
|
|
|
149
149
|
*/
|
|
150
150
|
declare function createHttpSink(options: HttpSinkOptions): (record: CallRecord) => void;
|
|
151
151
|
|
|
152
|
+
/**
|
|
153
|
+
* ai-lcr media routing — Least Cost Routing for image & video models.
|
|
154
|
+
*
|
|
155
|
+
* The text router (./index, ./fallback) is built on the AI SDK's
|
|
156
|
+
* `LanguageModelV3` and only handles token-billed chat/completion. Image and
|
|
157
|
+
* video providers are a different world: outputs are files (URLs), pricing
|
|
158
|
+
* comes in incompatible units (per-image, per-second, per-call, per-megapixel),
|
|
159
|
+
* and video is a long-running async job. This module is the parallel, self-
|
|
160
|
+
* contained media side — no `LanguageModelV3` dependency.
|
|
161
|
+
*
|
|
162
|
+
* The core idea is the SAME as the text LCR: keep a list of providers per
|
|
163
|
+
* model, route to the cheapest healthy one, fall back on failure, report real
|
|
164
|
+
* cost. The only new problem is making prices comparable, which we solve by
|
|
165
|
+
* normalizing every provider's price to ONE reference output (see ReferenceSpec).
|
|
166
|
+
*/
|
|
167
|
+
|
|
152
168
|
type MediaModality = "image" | "video";
|
|
153
169
|
/**
|
|
154
170
|
* Pricing unit a provider bills in. `cents` on MediaPricing is the price for
|
|
@@ -268,6 +284,13 @@ interface MediaLCRConfig {
|
|
|
268
284
|
reference?: ReferenceSpec;
|
|
269
285
|
onError?: (error: Error, provider: string) => void;
|
|
270
286
|
onCost?: (event: MediaCostEvent) => void;
|
|
287
|
+
/**
|
|
288
|
+
* One correlated {@link CallRecord} per settled request — the full failover
|
|
289
|
+
* chain, winner, latency, and cost — mirroring the text side's `onCall`, so
|
|
290
|
+
* the same dashboard sink works for image/video. Fire-and-forget; never
|
|
291
|
+
* throws. Media records carry no token counts (inputTokens/outputTokens = 0).
|
|
292
|
+
*/
|
|
293
|
+
onCall?: (record: CallRecord) => void;
|
|
271
294
|
}
|
|
272
295
|
interface MediaRunResult {
|
|
273
296
|
outputs: MediaOutput[];
|
|
@@ -275,11 +298,6 @@ interface MediaRunResult {
|
|
|
275
298
|
costCents: number;
|
|
276
299
|
estimated: boolean;
|
|
277
300
|
}
|
|
278
|
-
/**
|
|
279
|
-
* Build a media Least Cost Router. Returns `generate(modelId, input)` which
|
|
280
|
-
* tries providers cheapest-first and falls through on a retryable error —
|
|
281
|
-
* exactly the text LCR's contract, for image/video.
|
|
282
|
-
*/
|
|
283
301
|
declare function createMediaLCR(config: MediaLCRConfig): (modelId: string, input: Record<string, unknown>) => Promise<MediaRunResult>;
|
|
284
302
|
|
|
285
303
|
/**
|
package/dist/index.js
CHANGED
|
@@ -386,30 +386,75 @@ function comparePrices(registry, ref = DEFAULT_REFERENCE) {
|
|
|
386
386
|
};
|
|
387
387
|
});
|
|
388
388
|
}
|
|
389
|
+
function newMediaCallId() {
|
|
390
|
+
const c = globalThis.crypto;
|
|
391
|
+
return c?.randomUUID ? c.randomUUID() : `lcr_${Date.now().toString(36)}`;
|
|
392
|
+
}
|
|
389
393
|
function createMediaLCR(config) {
|
|
390
|
-
const { registry, adapters, reference = DEFAULT_REFERENCE, onError, onCost } = config;
|
|
394
|
+
const { registry, adapters, reference = DEFAULT_REFERENCE, onError, onCost, onCall } = config;
|
|
391
395
|
return async function generate(modelId, input) {
|
|
392
396
|
const def = registry[modelId];
|
|
393
397
|
if (!def) {
|
|
394
398
|
throw new Error(`ai-lcr: unknown media model "${modelId}" \u2014 add it to the registry`);
|
|
395
399
|
}
|
|
396
400
|
const ranked = rankRoutes(def, reference);
|
|
401
|
+
const baselineUsd = ranked.length > 0 ? Math.max(...ranked.map((r) => r.refCents)) / 100 : 0;
|
|
402
|
+
const startedAt = Date.now();
|
|
403
|
+
const attempts = [];
|
|
397
404
|
let lastErr;
|
|
405
|
+
const emitFail = () => onCall?.({
|
|
406
|
+
id: newMediaCallId(),
|
|
407
|
+
model: modelId,
|
|
408
|
+
attempts,
|
|
409
|
+
winner: void 0,
|
|
410
|
+
ok: false,
|
|
411
|
+
failedOver: attempts.length > 1,
|
|
412
|
+
latencyMs: Date.now() - startedAt,
|
|
413
|
+
inputTokens: 0,
|
|
414
|
+
outputTokens: 0,
|
|
415
|
+
costUsd: 0,
|
|
416
|
+
baselineUsd
|
|
417
|
+
});
|
|
398
418
|
for (const route of ranked) {
|
|
399
419
|
const adapter = adapters[route.provider];
|
|
400
420
|
if (!adapter) continue;
|
|
421
|
+
const attemptStart = Date.now();
|
|
401
422
|
try {
|
|
402
423
|
const result = await adapter.run({ externalId: route.externalId, input });
|
|
403
424
|
const estimated = result.costCents === void 0;
|
|
404
425
|
const costCents = estimated ? route.refCents * (result.units ?? 1) : result.costCents;
|
|
426
|
+
attempts.push({ provider: route.provider, ok: true, latencyMs: Date.now() - attemptStart });
|
|
405
427
|
onCost?.({ modelId, provider: route.provider, costCents, estimated });
|
|
428
|
+
onCall?.({
|
|
429
|
+
id: newMediaCallId(),
|
|
430
|
+
model: modelId,
|
|
431
|
+
attempts,
|
|
432
|
+
winner: route.provider,
|
|
433
|
+
ok: true,
|
|
434
|
+
failedOver: attempts.length > 1,
|
|
435
|
+
latencyMs: Date.now() - startedAt,
|
|
436
|
+
inputTokens: 0,
|
|
437
|
+
outputTokens: 0,
|
|
438
|
+
costUsd: costCents / 100,
|
|
439
|
+
baselineUsd
|
|
440
|
+
});
|
|
406
441
|
return { outputs: result.outputs, provider: route.provider, costCents, estimated };
|
|
407
442
|
} catch (err) {
|
|
408
443
|
lastErr = err;
|
|
444
|
+
attempts.push({
|
|
445
|
+
provider: route.provider,
|
|
446
|
+
ok: false,
|
|
447
|
+
latencyMs: Date.now() - attemptStart,
|
|
448
|
+
errorClass: classifyError(err)
|
|
449
|
+
});
|
|
409
450
|
onError?.(err, route.provider);
|
|
410
|
-
if (!isRetryableError(err))
|
|
451
|
+
if (!isRetryableError(err)) {
|
|
452
|
+
emitFail();
|
|
453
|
+
throw err;
|
|
454
|
+
}
|
|
411
455
|
}
|
|
412
456
|
}
|
|
457
|
+
emitFail();
|
|
413
458
|
throw lastErr instanceof Error ? lastErr : new Error(`ai-lcr: no provider could serve media model "${modelId}"`);
|
|
414
459
|
};
|
|
415
460
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ai-lcr",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.1",
|
|
4
4
|
"description": "Least Cost Routing for LLMs — route every model call to the cheapest available provider, fall back automatically, and track real cost. Built for the Vercel AI SDK.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai",
|