ai-lcr 0.2.2 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +108 -0
- package/README.md +2 -3
- package/README.zh-CN.md +2 -3
- package/dist/index.cjs +256 -132
- package/dist/index.d.cts +48 -38
- package/dist/index.d.ts +48 -38
- package/dist/index.js +255 -131
- package/package.json +5 -3
package/dist/index.d.cts
CHANGED
|
@@ -5,8 +5,10 @@ import { LanguageModelV3 } from '@ai-sdk/provider';
|
|
|
5
5
|
*
|
|
6
6
|
* A LanguageModelV3 that wraps an ordered, cheapest-first list of providers:
|
|
7
7
|
* it serves from the first healthy one, switches to the next on a retryable
|
|
8
|
-
* error (streaming-safe), and
|
|
9
|
-
*
|
|
8
|
+
* error (streaming-safe), and periodically re-probes the cheapest provider
|
|
9
|
+
* (every `resetIntervalMs` after a failover — under load too, not only when
|
|
10
|
+
* idle). It also computes per-call cost from each provider's price and fires
|
|
11
|
+
* `onCost`.
|
|
10
12
|
*
|
|
11
13
|
* The switching loop is adapted from `ai-fallback` (MIT, © remorses) — its
|
|
12
14
|
* streaming-safe fallback approach — reimplemented here so ai-lcr owns its core
|
|
@@ -28,6 +30,17 @@ interface CostEvent {
|
|
|
28
30
|
/** Computed from the serving provider's `cost`; 0 if no price was given. */
|
|
29
31
|
costUsd: number;
|
|
30
32
|
}
|
|
33
|
+
/**
|
|
34
|
+
* Coarse error category for a failed attempt — distinct from `errorClass`
|
|
35
|
+
* (which is the raw status/pattern). Use it to alert: `"auth"` and `"billing"`
|
|
36
|
+
* mean a config/account problem masquerading as a healthy failover, the thing
|
|
37
|
+
* you want to page on rather than silently keep burning the pricey fallback.
|
|
38
|
+
* - "transient": rate limit / overload / 5xx — expected, self-healing.
|
|
39
|
+
* - "auth": 401 / 403 — a misconfigured or revoked key.
|
|
40
|
+
* - "billing": 402 / out-of-credit / quota — account needs topping up.
|
|
41
|
+
* - "client": a non-retryable caller error (e.g. 400 bad request).
|
|
42
|
+
*/
|
|
43
|
+
type ErrorKind = "transient" | "auth" | "billing" | "client";
|
|
31
44
|
/** One provider attempt within a single request. */
|
|
32
45
|
interface RouteAttempt {
|
|
33
46
|
/** Provider label that was tried (e.g. "tokenmart"). */
|
|
@@ -38,6 +51,8 @@ interface RouteAttempt {
|
|
|
38
51
|
latencyMs: number;
|
|
39
52
|
/** Normalized failure reason when `ok` is false (e.g. "502", "rate_limit", "timeout"). */
|
|
40
53
|
errorClass?: string;
|
|
54
|
+
/** Coarse category of the failure when `ok` is false. See {@link ErrorKind}. */
|
|
55
|
+
kind?: ErrorKind;
|
|
41
56
|
}
|
|
42
57
|
/**
|
|
43
58
|
* One settled request, with its full failover chain. Emitted exactly once per
|
|
@@ -65,10 +80,10 @@ interface CallRecord {
|
|
|
65
80
|
/** Computed from the winner's `cost`; 0 if no price was given or the call failed. */
|
|
66
81
|
costUsd: number;
|
|
67
82
|
/**
|
|
68
|
-
* What the
|
|
69
|
-
*
|
|
70
|
-
* the
|
|
71
|
-
*
|
|
83
|
+
* What the same request would have cost on the most expensive configured
|
|
84
|
+
* provider — the savings baseline (`baselineUsd - costUsd`). Set by the media
|
|
85
|
+
* router; the text router omits it (left undefined) until a per-call text
|
|
86
|
+
* baseline lands. Optional so both routers share one {@link CallRecord} shape.
|
|
72
87
|
*/
|
|
73
88
|
baselineUsd?: number;
|
|
74
89
|
}
|
|
@@ -79,6 +94,13 @@ interface CallRecord {
|
|
|
79
94
|
* Reuses the same signals as {@link isRetryableError} — no new vocabulary.
|
|
80
95
|
*/
|
|
81
96
|
declare function classifyError(error: unknown): string;
|
|
97
|
+
/**
|
|
98
|
+
* Categorize an error for alerting. Orthogonal to {@link isRetryableError}
|
|
99
|
+
* (which decides *whether* to fail over) — this decides *how alarming* the
|
|
100
|
+
* failover is. A run of `"auth"`/`"billing"` attempts means you're silently
|
|
101
|
+
* burning the pricey fallback because a key/account is broken: page on it.
|
|
102
|
+
*/
|
|
103
|
+
declare function classifyErrorKind(error: unknown): ErrorKind;
|
|
82
104
|
|
|
83
105
|
/**
|
|
84
106
|
* Human-readable one-liner for a {@link CallRecord}.
|
|
@@ -337,51 +359,39 @@ interface RunwareMediaConfig {
|
|
|
337
359
|
declare function createRunwareMediaAdapter(config: RunwareMediaConfig): MediaAdapter;
|
|
338
360
|
|
|
339
361
|
/**
|
|
340
|
-
* fal media adapter — image
|
|
362
|
+
* fal.ai media adapter — image generation (synchronous).
|
|
341
363
|
*
|
|
342
|
-
* fal
|
|
343
|
-
*
|
|
344
|
-
* adapter
|
|
345
|
-
*
|
|
364
|
+
* fal exposes every model at `https://fal.run/<model-id>` (the synchronous API):
|
|
365
|
+
* POST the model's inputs as a flat JSON body, get the result back in the same
|
|
366
|
+
* response. This adapter passes the caller's `input` straight through, so any
|
|
367
|
+
* fal image model and any of its parameters (prompt, image_size, num_images,
|
|
368
|
+
* image_url for i2i/edit, …) work without this adapter knowing about them — it
|
|
369
|
+
* stays generic, not tied to one model family.
|
|
346
370
|
*
|
|
347
|
-
*
|
|
348
|
-
* ai-lcr deliberately keeps zero provider SDKs — every adapter is raw `fetch`
|
|
349
|
-
* with an injectable `fetchImpl` for testing (see runware-media, kunavo-media).
|
|
350
|
-
* So this re-implements the three queue calls against fal's REST endpoints:
|
|
371
|
+
* Auth: fal uses `Authorization: Key <FAL_KEY>` (NOT a Bearer token).
|
|
351
372
|
*
|
|
352
|
-
*
|
|
353
|
-
*
|
|
354
|
-
*
|
|
373
|
+
* Errors: fal returns a proper HTTP status — 401 (bad key), 403 (insufficient
|
|
374
|
+
* balance / no permission), 422 (bad input), 429 (rate limit), 5xx. We surface
|
|
375
|
+
* the status on the thrown error so the router's `isRetryableError` can decide
|
|
376
|
+
* whether to fail over. A 403 "exhausted balance" is retryable (fall over to the
|
|
377
|
+
* next provider); a 422 bad-input is not (don't waste the fallbacks).
|
|
355
378
|
*
|
|
356
|
-
*
|
|
357
|
-
*
|
|
358
|
-
*
|
|
359
|
-
* under the `fal-ai/flux` base).
|
|
379
|
+
* Cost: the synchronous response does NOT carry a per-call price (fal billing is
|
|
380
|
+
* a separate account-level API), so `costCents` stays undefined and the router
|
|
381
|
+
* falls back to its normalized estimate — same contract as the Kunavo adapter.
|
|
360
382
|
*
|
|
361
|
-
*
|
|
362
|
-
*
|
|
363
|
-
* Cost: fal's queue result does not carry a per-call price, so cost is left to
|
|
364
|
-
* the router's normalized estimate (costCents stays undefined; `units` is the
|
|
365
|
-
* output count — one image, or one clip).
|
|
383
|
+
* Video: fal video (e.g. veo3.1) is a long-running queue job, a different code
|
|
384
|
+
* path — out of scope here, like the Runware adapter. Image inference only.
|
|
366
385
|
*/
|
|
367
386
|
|
|
368
387
|
interface FalMediaConfig {
|
|
369
388
|
apiKey: string;
|
|
370
|
-
/** Override for testing. Defaults to https://
|
|
389
|
+
/** Override for testing. Defaults to https://fal.run. */
|
|
371
390
|
baseUrl?: string;
|
|
372
|
-
/** Video/job poll cadence (ms). Default 3000. */
|
|
373
|
-
pollIntervalMs?: number;
|
|
374
|
-
/** Max time to wait for a job before giving up (ms). Default 300000 (5m). */
|
|
375
|
-
pollTimeoutMs?: number;
|
|
376
391
|
/** Injected for testing; defaults to global fetch. */
|
|
377
392
|
fetchImpl?: typeof fetch;
|
|
378
393
|
}
|
|
379
394
|
declare function createFalMediaAdapter(config: FalMediaConfig): MediaAdapter;
|
|
380
|
-
/** Carries the HTTP status so the router's `isRetryableError` can classify it. */
|
|
381
|
-
declare class FalMediaError extends Error {
|
|
382
|
-
status: number;
|
|
383
|
-
constructor(status: number, body: string);
|
|
384
|
-
}
|
|
385
395
|
|
|
386
396
|
/**
|
|
387
397
|
* ai-lcr — Least Cost Routing for LLMs.
|
|
@@ -436,4 +446,4 @@ type LCRRouter = (modelName: string) => LanguageModelV3;
|
|
|
436
446
|
*/
|
|
437
447
|
declare function createLCR(config: LCRConfig): LCRRouter;
|
|
438
448
|
|
|
439
|
-
export { type CallRecord, type CostEvent, DEFAULT_REFERENCE,
|
|
449
|
+
export { type CallRecord, type CostEvent, DEFAULT_REFERENCE, type ErrorKind, type FormatOptions, type LCRConfig, type LCRRouter, MEDIA_PRICING, type MediaAdapter, type MediaCostEvent, type MediaGenerateRequest, type MediaGenerateResult, type MediaLCRConfig, type MediaModality, type MediaModelDef, type MediaOutput, type MediaPricing, type MediaRegistry, type MediaRoute, type MediaRunResult, type MediaUnit, type PriceComparisonRow, type ProviderCost, type ProviderEntry, type RankedRoute, type ReferenceSpec, type RouteAttempt, cheapestRoute, classifyError, classifyErrorKind, comparePrices, createFalMediaAdapter, createKunavoMediaAdapter, createLCR, createMediaLCR, createRunwareMediaAdapter, formatCallRecord, normalizedCents, rankRoutes, referenceMegapixels };
|
package/dist/index.d.ts
CHANGED
|
@@ -5,8 +5,10 @@ import { LanguageModelV3 } from '@ai-sdk/provider';
|
|
|
5
5
|
*
|
|
6
6
|
* A LanguageModelV3 that wraps an ordered, cheapest-first list of providers:
|
|
7
7
|
* it serves from the first healthy one, switches to the next on a retryable
|
|
8
|
-
* error (streaming-safe), and
|
|
9
|
-
*
|
|
8
|
+
* error (streaming-safe), and periodically re-probes the cheapest provider
|
|
9
|
+
* (every `resetIntervalMs` after a failover — under load too, not only when
|
|
10
|
+
* idle). It also computes per-call cost from each provider's price and fires
|
|
11
|
+
* `onCost`.
|
|
10
12
|
*
|
|
11
13
|
* The switching loop is adapted from `ai-fallback` (MIT, © remorses) — its
|
|
12
14
|
* streaming-safe fallback approach — reimplemented here so ai-lcr owns its core
|
|
@@ -28,6 +30,17 @@ interface CostEvent {
|
|
|
28
30
|
/** Computed from the serving provider's `cost`; 0 if no price was given. */
|
|
29
31
|
costUsd: number;
|
|
30
32
|
}
|
|
33
|
+
/**
|
|
34
|
+
* Coarse error category for a failed attempt — distinct from `errorClass`
|
|
35
|
+
* (which is the raw status/pattern). Use it to alert: `"auth"` and `"billing"`
|
|
36
|
+
* mean a config/account problem masquerading as a healthy failover, the thing
|
|
37
|
+
* you want to page on rather than silently keep burning the pricey fallback.
|
|
38
|
+
* - "transient": rate limit / overload / 5xx — expected, self-healing.
|
|
39
|
+
* - "auth": 401 / 403 — a misconfigured or revoked key.
|
|
40
|
+
* - "billing": 402 / out-of-credit / quota — account needs topping up.
|
|
41
|
+
* - "client": a non-retryable caller error (e.g. 400 bad request).
|
|
42
|
+
*/
|
|
43
|
+
type ErrorKind = "transient" | "auth" | "billing" | "client";
|
|
31
44
|
/** One provider attempt within a single request. */
|
|
32
45
|
interface RouteAttempt {
|
|
33
46
|
/** Provider label that was tried (e.g. "tokenmart"). */
|
|
@@ -38,6 +51,8 @@ interface RouteAttempt {
|
|
|
38
51
|
latencyMs: number;
|
|
39
52
|
/** Normalized failure reason when `ok` is false (e.g. "502", "rate_limit", "timeout"). */
|
|
40
53
|
errorClass?: string;
|
|
54
|
+
/** Coarse category of the failure when `ok` is false. See {@link ErrorKind}. */
|
|
55
|
+
kind?: ErrorKind;
|
|
41
56
|
}
|
|
42
57
|
/**
|
|
43
58
|
* One settled request, with its full failover chain. Emitted exactly once per
|
|
@@ -65,10 +80,10 @@ interface CallRecord {
|
|
|
65
80
|
/** Computed from the winner's `cost`; 0 if no price was given or the call failed. */
|
|
66
81
|
costUsd: number;
|
|
67
82
|
/**
|
|
68
|
-
* What the
|
|
69
|
-
*
|
|
70
|
-
* the
|
|
71
|
-
*
|
|
83
|
+
* What the same request would have cost on the most expensive configured
|
|
84
|
+
* provider — the savings baseline (`baselineUsd - costUsd`). Set by the media
|
|
85
|
+
* router; the text router omits it (left undefined) until a per-call text
|
|
86
|
+
* baseline lands. Optional so both routers share one {@link CallRecord} shape.
|
|
72
87
|
*/
|
|
73
88
|
baselineUsd?: number;
|
|
74
89
|
}
|
|
@@ -79,6 +94,13 @@ interface CallRecord {
|
|
|
79
94
|
* Reuses the same signals as {@link isRetryableError} — no new vocabulary.
|
|
80
95
|
*/
|
|
81
96
|
declare function classifyError(error: unknown): string;
|
|
97
|
+
/**
|
|
98
|
+
* Categorize an error for alerting. Orthogonal to {@link isRetryableError}
|
|
99
|
+
* (which decides *whether* to fail over) — this decides *how alarming* the
|
|
100
|
+
* failover is. A run of `"auth"`/`"billing"` attempts means you're silently
|
|
101
|
+
* burning the pricey fallback because a key/account is broken: page on it.
|
|
102
|
+
*/
|
|
103
|
+
declare function classifyErrorKind(error: unknown): ErrorKind;
|
|
82
104
|
|
|
83
105
|
/**
|
|
84
106
|
* Human-readable one-liner for a {@link CallRecord}.
|
|
@@ -337,51 +359,39 @@ interface RunwareMediaConfig {
|
|
|
337
359
|
declare function createRunwareMediaAdapter(config: RunwareMediaConfig): MediaAdapter;
|
|
338
360
|
|
|
339
361
|
/**
|
|
340
|
-
* fal media adapter — image
|
|
362
|
+
* fal.ai media adapter — image generation (synchronous).
|
|
341
363
|
*
|
|
342
|
-
* fal
|
|
343
|
-
*
|
|
344
|
-
* adapter
|
|
345
|
-
*
|
|
364
|
+
* fal exposes every model at `https://fal.run/<model-id>` (the synchronous API):
|
|
365
|
+
* POST the model's inputs as a flat JSON body, get the result back in the same
|
|
366
|
+
* response. This adapter passes the caller's `input` straight through, so any
|
|
367
|
+
* fal image model and any of its parameters (prompt, image_size, num_images,
|
|
368
|
+
* image_url for i2i/edit, …) work without this adapter knowing about them — it
|
|
369
|
+
* stays generic, not tied to one model family.
|
|
346
370
|
*
|
|
347
|
-
*
|
|
348
|
-
* ai-lcr deliberately keeps zero provider SDKs — every adapter is raw `fetch`
|
|
349
|
-
* with an injectable `fetchImpl` for testing (see runware-media, kunavo-media).
|
|
350
|
-
* So this re-implements the three queue calls against fal's REST endpoints:
|
|
371
|
+
* Auth: fal uses `Authorization: Key <FAL_KEY>` (NOT a Bearer token).
|
|
351
372
|
*
|
|
352
|
-
*
|
|
353
|
-
*
|
|
354
|
-
*
|
|
373
|
+
* Errors: fal returns a proper HTTP status — 401 (bad key), 403 (insufficient
|
|
374
|
+
* balance / no permission), 422 (bad input), 429 (rate limit), 5xx. We surface
|
|
375
|
+
* the status on the thrown error so the router's `isRetryableError` can decide
|
|
376
|
+
* whether to fail over. A 403 "exhausted balance" is retryable (fall over to the
|
|
377
|
+
* next provider); a 422 bad-input is not (don't waste the fallbacks).
|
|
355
378
|
*
|
|
356
|
-
*
|
|
357
|
-
*
|
|
358
|
-
*
|
|
359
|
-
* under the `fal-ai/flux` base).
|
|
379
|
+
* Cost: the synchronous response does NOT carry a per-call price (fal billing is
|
|
380
|
+
* a separate account-level API), so `costCents` stays undefined and the router
|
|
381
|
+
* falls back to its normalized estimate — same contract as the Kunavo adapter.
|
|
360
382
|
*
|
|
361
|
-
*
|
|
362
|
-
*
|
|
363
|
-
* Cost: fal's queue result does not carry a per-call price, so cost is left to
|
|
364
|
-
* the router's normalized estimate (costCents stays undefined; `units` is the
|
|
365
|
-
* output count — one image, or one clip).
|
|
383
|
+
* Video: fal video (e.g. veo3.1) is a long-running queue job, a different code
|
|
384
|
+
* path — out of scope here, like the Runware adapter. Image inference only.
|
|
366
385
|
*/
|
|
367
386
|
|
|
368
387
|
interface FalMediaConfig {
|
|
369
388
|
apiKey: string;
|
|
370
|
-
/** Override for testing. Defaults to https://
|
|
389
|
+
/** Override for testing. Defaults to https://fal.run. */
|
|
371
390
|
baseUrl?: string;
|
|
372
|
-
/** Video/job poll cadence (ms). Default 3000. */
|
|
373
|
-
pollIntervalMs?: number;
|
|
374
|
-
/** Max time to wait for a job before giving up (ms). Default 300000 (5m). */
|
|
375
|
-
pollTimeoutMs?: number;
|
|
376
391
|
/** Injected for testing; defaults to global fetch. */
|
|
377
392
|
fetchImpl?: typeof fetch;
|
|
378
393
|
}
|
|
379
394
|
declare function createFalMediaAdapter(config: FalMediaConfig): MediaAdapter;
|
|
380
|
-
/** Carries the HTTP status so the router's `isRetryableError` can classify it. */
|
|
381
|
-
declare class FalMediaError extends Error {
|
|
382
|
-
status: number;
|
|
383
|
-
constructor(status: number, body: string);
|
|
384
|
-
}
|
|
385
395
|
|
|
386
396
|
/**
|
|
387
397
|
* ai-lcr — Least Cost Routing for LLMs.
|
|
@@ -436,4 +446,4 @@ type LCRRouter = (modelName: string) => LanguageModelV3;
|
|
|
436
446
|
*/
|
|
437
447
|
declare function createLCR(config: LCRConfig): LCRRouter;
|
|
438
448
|
|
|
439
|
-
export { type CallRecord, type CostEvent, DEFAULT_REFERENCE,
|
|
449
|
+
export { type CallRecord, type CostEvent, DEFAULT_REFERENCE, type ErrorKind, type FormatOptions, type LCRConfig, type LCRRouter, MEDIA_PRICING, type MediaAdapter, type MediaCostEvent, type MediaGenerateRequest, type MediaGenerateResult, type MediaLCRConfig, type MediaModality, type MediaModelDef, type MediaOutput, type MediaPricing, type MediaRegistry, type MediaRoute, type MediaRunResult, type MediaUnit, type PriceComparisonRow, type ProviderCost, type ProviderEntry, type RankedRoute, type ReferenceSpec, type RouteAttempt, cheapestRoute, classifyError, classifyErrorKind, comparePrices, createFalMediaAdapter, createKunavoMediaAdapter, createLCR, createMediaLCR, createRunwareMediaAdapter, formatCallRecord, normalizedCents, rankRoutes, referenceMegapixels };
|