ai-lcr 0.2.2 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -5,8 +5,10 @@ import { LanguageModelV3 } from '@ai-sdk/provider';
5
5
  *
6
6
  * A LanguageModelV3 that wraps an ordered, cheapest-first list of providers:
7
7
  * it serves from the first healthy one, switches to the next on a retryable
8
- * error (streaming-safe), and snaps back to the cheapest after an idle window.
9
- * It also computes per-call cost from each provider's price and fires `onCost`.
8
+ * error (streaming-safe), and periodically re-probes the cheapest provider
9
+ * (every `resetIntervalMs` after a failover under load too, not only when
10
+ * idle). It also computes per-call cost from each provider's price and fires
11
+ * `onCost`.
10
12
  *
11
13
  * The switching loop is adapted from `ai-fallback` (MIT, © remorses) — its
12
14
  * streaming-safe fallback approach — reimplemented here so ai-lcr owns its core
@@ -28,6 +30,17 @@ interface CostEvent {
28
30
  /** Computed from the serving provider's `cost`; 0 if no price was given. */
29
31
  costUsd: number;
30
32
  }
33
+ /**
34
+ * Coarse error category for a failed attempt — distinct from `errorClass`
35
+ * (which is the raw status/pattern). Use it to alert: `"auth"` and `"billing"`
36
+ * mean a config/account problem masquerading as a healthy failover, the thing
37
+ * you want to page on rather than silently keep burning the pricey fallback.
38
+ * - "transient": rate limit / overload / 5xx — expected, self-healing.
39
+ * - "auth": 401 / 403 — a misconfigured or revoked key.
40
+ * - "billing": 402 / out-of-credit / quota — account needs topping up.
41
+ * - "client": a non-retryable caller error (e.g. 400 bad request).
42
+ */
43
+ type ErrorKind = "transient" | "auth" | "billing" | "client";
31
44
  /** One provider attempt within a single request. */
32
45
  interface RouteAttempt {
33
46
  /** Provider label that was tried (e.g. "tokenmart"). */
@@ -38,6 +51,8 @@ interface RouteAttempt {
38
51
  latencyMs: number;
39
52
  /** Normalized failure reason when `ok` is false (e.g. "502", "rate_limit", "timeout"). */
40
53
  errorClass?: string;
54
+ /** Coarse category of the failure when `ok` is false. See {@link ErrorKind}. */
55
+ kind?: ErrorKind;
41
56
  }
42
57
  /**
43
58
  * One settled request, with its full failover chain. Emitted exactly once per
@@ -65,10 +80,10 @@ interface CallRecord {
65
80
  /** Computed from the winner's `cost`; 0 if no price was given or the call failed. */
66
81
  costUsd: number;
67
82
  /**
68
- * What the priciest configured route would have cost for this request, so
69
- * `baselineUsd - costUsd` is the saving from routing cheapest-first. Set by
70
- * the media router (`createMediaLCR`), where every route has a known price;
71
- * omitted by the text router, which can't price a baseline per call.
83
+ * What the same request would have cost on the most expensive configured
84
+ * provider the savings baseline (`baselineUsd - costUsd`). Set by the media
85
+ * router; the text router omits it (left undefined) until a per-call text
86
+ * baseline lands. Optional so both routers share one {@link CallRecord} shape.
72
87
  */
73
88
  baselineUsd?: number;
74
89
  }
@@ -79,6 +94,13 @@ interface CallRecord {
79
94
  * Reuses the same signals as {@link isRetryableError} — no new vocabulary.
80
95
  */
81
96
  declare function classifyError(error: unknown): string;
97
+ /**
98
+ * Categorize an error for alerting. Orthogonal to {@link isRetryableError}
99
+ * (which decides *whether* to fail over) — this decides *how alarming* the
100
+ * failover is. A run of `"auth"`/`"billing"` attempts means you're silently
101
+ * burning the pricey fallback because a key/account is broken: page on it.
102
+ */
103
+ declare function classifyErrorKind(error: unknown): ErrorKind;
82
104
 
83
105
  /**
84
106
  * Human-readable one-liner for a {@link CallRecord}.
@@ -337,51 +359,39 @@ interface RunwareMediaConfig {
337
359
  declare function createRunwareMediaAdapter(config: RunwareMediaConfig): MediaAdapter;
338
360
 
339
361
  /**
340
- * fal media adapter — image (queue) + video (queue, async poll).
362
+ * fal.ai media adapter — image generation (synchronous).
341
363
  *
342
- * fal serves every model through one async queue API, so a single submit→poll→
343
- * fetch-result path covers both image and video. That is the whole reason this
344
- * adapter exists: it is ai-lcr's first VIDEO-capable execution path. (The
345
- * Runware adapter is image-only; the Kunavo one's video poll loop is unverified.)
364
+ * fal exposes every model at `https://fal.run/<model-id>` (the synchronous API):
365
+ * POST the model's inputs as a flat JSON body, get the result back in the same
366
+ * response. This adapter passes the caller's `input` straight through, so any
367
+ * fal image model and any of its parameters (prompt, image_size, num_images,
368
+ * image_url for i2i/edit, …) work without this adapter knowing about them — it
369
+ * stays generic, not tied to one model family.
346
370
  *
347
- * Implementation note: ai-art's fal adapter uses the `@fal-ai/client` SDK, but
348
- * ai-lcr deliberately keeps zero provider SDKs — every adapter is raw `fetch`
349
- * with an injectable `fetchImpl` for testing (see runware-media, kunavo-media).
350
- * So this re-implements the three queue calls against fal's REST endpoints:
371
+ * Auth: fal uses `Authorization: Key <FAL_KEY>` (NOT a Bearer token).
351
372
  *
352
- * 1. submit POST https://queue.fal.run/{model} → { request_id, status_url, response_url }
353
- * 2. status GET {status_url} → { status: IN_QUEUE | IN_PROGRESS | COMPLETED }
354
- * 3. result GET {response_url} → { images:[…] } | { video:{url} } |
373
+ * Errors: fal returns a proper HTTP status — 401 (bad key), 403 (insufficient
374
+ * balance / no permission), 422 (bad input), 429 (rate limit), 5xx. We surface
375
+ * the status on the thrown error so the router's `isRetryableError` can decide
376
+ * whether to fail over. A 403 "exhausted balance" is retryable (fall over to the
377
+ * next provider); a 422 bad-input is not (don't waste the fallbacks).
355
378
  *
356
- * We follow the `status_url` / `response_url` returned by submit rather than
357
- * rebuilding them, which sidesteps fal's sub-path quirk (a model like
358
- * `fal-ai/flux/schnell` submits to the full path but its status/result live
359
- * under the `fal-ai/flux` base).
379
+ * Cost: the synchronous response does NOT carry a per-call price (fal billing is
380
+ * a separate account-level API), so `costCents` stays undefined and the router
381
+ * falls back to its normalized estimate same contract as the Kunavo adapter.
360
382
  *
361
- * Auth: fal uses `Authorization: Key {FAL_KEY}` (NOT Bearer).
362
- *
363
- * Cost: fal's queue result does not carry a per-call price, so cost is left to
364
- * the router's normalized estimate (costCents stays undefined; `units` is the
365
- * output count — one image, or one clip).
383
+ * Video: fal video (e.g. veo3.1) is a long-running queue job, a different code
384
+ * path — out of scope here, like the Runware adapter. Image inference only.
366
385
  */
367
386
 
368
387
  interface FalMediaConfig {
369
388
  apiKey: string;
370
- /** Override for testing. Defaults to https://queue.fal.run. */
389
+ /** Override for testing. Defaults to https://fal.run. */
371
390
  baseUrl?: string;
372
- /** Video/job poll cadence (ms). Default 3000. */
373
- pollIntervalMs?: number;
374
- /** Max time to wait for a job before giving up (ms). Default 300000 (5m). */
375
- pollTimeoutMs?: number;
376
391
  /** Injected for testing; defaults to global fetch. */
377
392
  fetchImpl?: typeof fetch;
378
393
  }
379
394
  declare function createFalMediaAdapter(config: FalMediaConfig): MediaAdapter;
380
- /** Carries the HTTP status so the router's `isRetryableError` can classify it. */
381
- declare class FalMediaError extends Error {
382
- status: number;
383
- constructor(status: number, body: string);
384
- }
385
395
 
386
396
  /**
387
397
  * ai-lcr — Least Cost Routing for LLMs.
@@ -436,4 +446,4 @@ type LCRRouter = (modelName: string) => LanguageModelV3;
436
446
  */
437
447
  declare function createLCR(config: LCRConfig): LCRRouter;
438
448
 
439
- export { type CallRecord, type CostEvent, DEFAULT_REFERENCE, FalMediaError, type FormatOptions, type LCRConfig, type LCRRouter, MEDIA_PRICING, type MediaAdapter, type MediaCostEvent, type MediaGenerateRequest, type MediaGenerateResult, type MediaLCRConfig, type MediaModality, type MediaModelDef, type MediaOutput, type MediaPricing, type MediaRegistry, type MediaRoute, type MediaRunResult, type MediaUnit, type PriceComparisonRow, type ProviderCost, type ProviderEntry, type RankedRoute, type ReferenceSpec, type RouteAttempt, cheapestRoute, classifyError, comparePrices, createFalMediaAdapter, createKunavoMediaAdapter, createLCR, createMediaLCR, createRunwareMediaAdapter, formatCallRecord, normalizedCents, rankRoutes, referenceMegapixels };
449
+ export { type CallRecord, type CostEvent, DEFAULT_REFERENCE, type ErrorKind, type FormatOptions, type LCRConfig, type LCRRouter, MEDIA_PRICING, type MediaAdapter, type MediaCostEvent, type MediaGenerateRequest, type MediaGenerateResult, type MediaLCRConfig, type MediaModality, type MediaModelDef, type MediaOutput, type MediaPricing, type MediaRegistry, type MediaRoute, type MediaRunResult, type MediaUnit, type PriceComparisonRow, type ProviderCost, type ProviderEntry, type RankedRoute, type ReferenceSpec, type RouteAttempt, cheapestRoute, classifyError, classifyErrorKind, comparePrices, createFalMediaAdapter, createKunavoMediaAdapter, createLCR, createMediaLCR, createRunwareMediaAdapter, formatCallRecord, normalizedCents, rankRoutes, referenceMegapixels };
package/dist/index.d.ts CHANGED
@@ -5,8 +5,10 @@ import { LanguageModelV3 } from '@ai-sdk/provider';
5
5
  *
6
6
  * A LanguageModelV3 that wraps an ordered, cheapest-first list of providers:
7
7
  * it serves from the first healthy one, switches to the next on a retryable
8
- * error (streaming-safe), and snaps back to the cheapest after an idle window.
9
- * It also computes per-call cost from each provider's price and fires `onCost`.
8
+ * error (streaming-safe), and periodically re-probes the cheapest provider
9
+ * (every `resetIntervalMs` after a failover under load too, not only when
10
+ * idle). It also computes per-call cost from each provider's price and fires
11
+ * `onCost`.
10
12
  *
11
13
  * The switching loop is adapted from `ai-fallback` (MIT, © remorses) — its
12
14
  * streaming-safe fallback approach — reimplemented here so ai-lcr owns its core
@@ -28,6 +30,17 @@ interface CostEvent {
28
30
  /** Computed from the serving provider's `cost`; 0 if no price was given. */
29
31
  costUsd: number;
30
32
  }
33
+ /**
34
+ * Coarse error category for a failed attempt — distinct from `errorClass`
35
+ * (which is the raw status/pattern). Use it to alert: `"auth"` and `"billing"`
36
+ * mean a config/account problem masquerading as a healthy failover, the thing
37
+ * you want to page on rather than silently keep burning the pricey fallback.
38
+ * - "transient": rate limit / overload / 5xx — expected, self-healing.
39
+ * - "auth": 401 / 403 — a misconfigured or revoked key.
40
+ * - "billing": 402 / out-of-credit / quota — account needs topping up.
41
+ * - "client": a non-retryable caller error (e.g. 400 bad request).
42
+ */
43
+ type ErrorKind = "transient" | "auth" | "billing" | "client";
31
44
  /** One provider attempt within a single request. */
32
45
  interface RouteAttempt {
33
46
  /** Provider label that was tried (e.g. "tokenmart"). */
@@ -38,6 +51,8 @@ interface RouteAttempt {
38
51
  latencyMs: number;
39
52
  /** Normalized failure reason when `ok` is false (e.g. "502", "rate_limit", "timeout"). */
40
53
  errorClass?: string;
54
+ /** Coarse category of the failure when `ok` is false. See {@link ErrorKind}. */
55
+ kind?: ErrorKind;
41
56
  }
42
57
  /**
43
58
  * One settled request, with its full failover chain. Emitted exactly once per
@@ -65,10 +80,10 @@ interface CallRecord {
65
80
  /** Computed from the winner's `cost`; 0 if no price was given or the call failed. */
66
81
  costUsd: number;
67
82
  /**
68
- * What the priciest configured route would have cost for this request, so
69
- * `baselineUsd - costUsd` is the saving from routing cheapest-first. Set by
70
- * the media router (`createMediaLCR`), where every route has a known price;
71
- * omitted by the text router, which can't price a baseline per call.
83
+ * What the same request would have cost on the most expensive configured
84
+ * provider the savings baseline (`baselineUsd - costUsd`). Set by the media
85
+ * router; the text router omits it (left undefined) until a per-call text
86
+ * baseline lands. Optional so both routers share one {@link CallRecord} shape.
72
87
  */
73
88
  baselineUsd?: number;
74
89
  }
@@ -79,6 +94,13 @@ interface CallRecord {
79
94
  * Reuses the same signals as {@link isRetryableError} — no new vocabulary.
80
95
  */
81
96
  declare function classifyError(error: unknown): string;
97
+ /**
98
+ * Categorize an error for alerting. Orthogonal to {@link isRetryableError}
99
+ * (which decides *whether* to fail over) — this decides *how alarming* the
100
+ * failover is. A run of `"auth"`/`"billing"` attempts means you're silently
101
+ * burning the pricey fallback because a key/account is broken: page on it.
102
+ */
103
+ declare function classifyErrorKind(error: unknown): ErrorKind;
82
104
 
83
105
  /**
84
106
  * Human-readable one-liner for a {@link CallRecord}.
@@ -337,51 +359,39 @@ interface RunwareMediaConfig {
337
359
  declare function createRunwareMediaAdapter(config: RunwareMediaConfig): MediaAdapter;
338
360
 
339
361
  /**
340
- * fal media adapter — image (queue) + video (queue, async poll).
362
+ * fal.ai media adapter — image generation (synchronous).
341
363
  *
342
- * fal serves every model through one async queue API, so a single submit→poll→
343
- * fetch-result path covers both image and video. That is the whole reason this
344
- * adapter exists: it is ai-lcr's first VIDEO-capable execution path. (The
345
- * Runware adapter is image-only; the Kunavo one's video poll loop is unverified.)
364
+ * fal exposes every model at `https://fal.run/<model-id>` (the synchronous API):
365
+ * POST the model's inputs as a flat JSON body, get the result back in the same
366
+ * response. This adapter passes the caller's `input` straight through, so any
367
+ * fal image model and any of its parameters (prompt, image_size, num_images,
368
+ * image_url for i2i/edit, …) work without this adapter knowing about them — it
369
+ * stays generic, not tied to one model family.
346
370
  *
347
- * Implementation note: ai-art's fal adapter uses the `@fal-ai/client` SDK, but
348
- * ai-lcr deliberately keeps zero provider SDKs — every adapter is raw `fetch`
349
- * with an injectable `fetchImpl` for testing (see runware-media, kunavo-media).
350
- * So this re-implements the three queue calls against fal's REST endpoints:
371
+ * Auth: fal uses `Authorization: Key <FAL_KEY>` (NOT a Bearer token).
351
372
  *
352
- * 1. submit POST https://queue.fal.run/{model} → { request_id, status_url, response_url }
353
- * 2. status GET {status_url} → { status: IN_QUEUE | IN_PROGRESS | COMPLETED }
354
- * 3. result GET {response_url} → { images:[…] } | { video:{url} } |
373
+ * Errors: fal returns a proper HTTP status — 401 (bad key), 403 (insufficient
374
+ * balance / no permission), 422 (bad input), 429 (rate limit), 5xx. We surface
375
+ * the status on the thrown error so the router's `isRetryableError` can decide
376
+ * whether to fail over. A 403 "exhausted balance" is retryable (fall over to the
377
+ * next provider); a 422 bad-input is not (don't waste the fallbacks).
355
378
  *
356
- * We follow the `status_url` / `response_url` returned by submit rather than
357
- * rebuilding them, which sidesteps fal's sub-path quirk (a model like
358
- * `fal-ai/flux/schnell` submits to the full path but its status/result live
359
- * under the `fal-ai/flux` base).
379
+ * Cost: the synchronous response does NOT carry a per-call price (fal billing is
380
+ * a separate account-level API), so `costCents` stays undefined and the router
381
+ * falls back to its normalized estimate same contract as the Kunavo adapter.
360
382
  *
361
- * Auth: fal uses `Authorization: Key {FAL_KEY}` (NOT Bearer).
362
- *
363
- * Cost: fal's queue result does not carry a per-call price, so cost is left to
364
- * the router's normalized estimate (costCents stays undefined; `units` is the
365
- * output count — one image, or one clip).
383
+ * Video: fal video (e.g. veo3.1) is a long-running queue job, a different code
384
+ * path — out of scope here, like the Runware adapter. Image inference only.
366
385
  */
367
386
 
368
387
  interface FalMediaConfig {
369
388
  apiKey: string;
370
- /** Override for testing. Defaults to https://queue.fal.run. */
389
+ /** Override for testing. Defaults to https://fal.run. */
371
390
  baseUrl?: string;
372
- /** Video/job poll cadence (ms). Default 3000. */
373
- pollIntervalMs?: number;
374
- /** Max time to wait for a job before giving up (ms). Default 300000 (5m). */
375
- pollTimeoutMs?: number;
376
391
  /** Injected for testing; defaults to global fetch. */
377
392
  fetchImpl?: typeof fetch;
378
393
  }
379
394
  declare function createFalMediaAdapter(config: FalMediaConfig): MediaAdapter;
380
- /** Carries the HTTP status so the router's `isRetryableError` can classify it. */
381
- declare class FalMediaError extends Error {
382
- status: number;
383
- constructor(status: number, body: string);
384
- }
385
395
 
386
396
  /**
387
397
  * ai-lcr — Least Cost Routing for LLMs.
@@ -436,4 +446,4 @@ type LCRRouter = (modelName: string) => LanguageModelV3;
436
446
  */
437
447
  declare function createLCR(config: LCRConfig): LCRRouter;
438
448
 
439
- export { type CallRecord, type CostEvent, DEFAULT_REFERENCE, FalMediaError, type FormatOptions, type LCRConfig, type LCRRouter, MEDIA_PRICING, type MediaAdapter, type MediaCostEvent, type MediaGenerateRequest, type MediaGenerateResult, type MediaLCRConfig, type MediaModality, type MediaModelDef, type MediaOutput, type MediaPricing, type MediaRegistry, type MediaRoute, type MediaRunResult, type MediaUnit, type PriceComparisonRow, type ProviderCost, type ProviderEntry, type RankedRoute, type ReferenceSpec, type RouteAttempt, cheapestRoute, classifyError, comparePrices, createFalMediaAdapter, createKunavoMediaAdapter, createLCR, createMediaLCR, createRunwareMediaAdapter, formatCallRecord, normalizedCents, rankRoutes, referenceMegapixels };
449
+ export { type CallRecord, type CostEvent, DEFAULT_REFERENCE, type ErrorKind, type FormatOptions, type LCRConfig, type LCRRouter, MEDIA_PRICING, type MediaAdapter, type MediaCostEvent, type MediaGenerateRequest, type MediaGenerateResult, type MediaLCRConfig, type MediaModality, type MediaModelDef, type MediaOutput, type MediaPricing, type MediaRegistry, type MediaRoute, type MediaRunResult, type MediaUnit, type PriceComparisonRow, type ProviderCost, type ProviderEntry, type RankedRoute, type ReferenceSpec, type RouteAttempt, cheapestRoute, classifyError, classifyErrorKind, comparePrices, createFalMediaAdapter, createKunavoMediaAdapter, createLCR, createMediaLCR, createRunwareMediaAdapter, formatCallRecord, normalizedCents, rankRoutes, referenceMegapixels };