npm - ai-lcr - Versions diffs - 0.2.2 → 0.2.5 - Mend

ai-lcr 0.2.2 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/index.d.cts CHANGED Viewed

@@ -5,8 +5,10 @@ import { LanguageModelV3 } from '@ai-sdk/provider';
  *
  * A LanguageModelV3 that wraps an ordered, cheapest-first list of providers:
  * it serves from the first healthy one, switches to the next on a retryable
- * error (streaming-safe), and snaps back to the cheapest after an idle window.
- * It also computes per-call cost from each provider's price and fires `onCost`.
+ * error (streaming-safe), and periodically re-probes the cheapest provider
+ * (every `resetIntervalMs` after a failover — under load too, not only when
+ * idle). It also computes per-call cost from each provider's price and fires
+ * `onCost`.
  *
  * The switching loop is adapted from `ai-fallback` (MIT, © remorses) — its
  * streaming-safe fallback approach — reimplemented here so ai-lcr owns its core
@@ -28,6 +30,17 @@ interface CostEvent {
     /** Computed from the serving provider's `cost`; 0 if no price was given. */
     costUsd: number;
 }
+/**
+ * Coarse error category for a failed attempt — distinct from `errorClass`
+ * (which is the raw status/pattern). Use it to alert: `"auth"` and `"billing"`
+ * mean a config/account problem masquerading as a healthy failover, the thing
+ * you want to page on rather than silently keep burning the pricey fallback.
+ *   - "transient": rate limit / overload / 5xx — expected, self-healing.
+ *   - "auth":      401 / 403 — a misconfigured or revoked key.
+ *   - "billing":   402 / out-of-credit / quota — account needs topping up.
+ *   - "client":    a non-retryable caller error (e.g. 400 bad request).
+ */
+type ErrorKind = "transient" | "auth" | "billing" | "client";
 /** One provider attempt within a single request. */
 interface RouteAttempt {
     /** Provider label that was tried (e.g. "tokenmart"). */
@@ -38,6 +51,8 @@ interface RouteAttempt {
     latencyMs: number;
     /** Normalized failure reason when `ok` is false (e.g. "502", "rate_limit", "timeout"). */
     errorClass?: string;
+    /** Coarse category of the failure when `ok` is false. See {@link ErrorKind}. */
+    kind?: ErrorKind;
 }
 /**
  * One settled request, with its full failover chain. Emitted exactly once per
@@ -65,10 +80,10 @@ interface CallRecord {
     /** Computed from the winner's `cost`; 0 if no price was given or the call failed. */
     costUsd: number;
     /**
-     * What the priciest configured route would have cost for this request, so
-     * `baselineUsd - costUsd` is the saving from routing cheapest-first. Set by
-     * the media router (`createMediaLCR`), where every route has a known price;
-     * omitted by the text router, which can't price a baseline per call.
+     * What the same request would have cost on the most expensive configured
+     * provider — the savings baseline (`baselineUsd - costUsd`). Set by the media
+     * router; the text router omits it (left undefined) until a per-call text
+     * baseline lands. Optional so both routers share one {@link CallRecord} shape.
      */
     baselineUsd?: number;
 }
@@ -79,6 +94,13 @@ interface CallRecord {
  * Reuses the same signals as {@link isRetryableError} — no new vocabulary.
  */
 declare function classifyError(error: unknown): string;
+/**
+ * Categorize an error for alerting. Orthogonal to {@link isRetryableError}
+ * (which decides *whether* to fail over) — this decides *how alarming* the
+ * failover is. A run of `"auth"`/`"billing"` attempts means you're silently
+ * burning the pricey fallback because a key/account is broken: page on it.
+ */
+declare function classifyErrorKind(error: unknown): ErrorKind;
 /**
  * Human-readable one-liner for a {@link CallRecord}.
@@ -337,51 +359,39 @@ interface RunwareMediaConfig {
 declare function createRunwareMediaAdapter(config: RunwareMediaConfig): MediaAdapter;
 /**
- * fal media adapter — image (queue) + video (queue, async poll).
+ * fal.ai media adapter — image generation (synchronous).
  *
- * fal serves every model through one async queue API, so a single submit→poll→
- * fetch-result path covers both image and video. That is the whole reason this
- * adapter exists: it is ai-lcr's first VIDEO-capable execution path. (The
- * Runware adapter is image-only; the Kunavo one's video poll loop is unverified.)
+ * fal exposes every model at `https://fal.run/<model-id>` (the synchronous API):
+ * POST the model's inputs as a flat JSON body, get the result back in the same
+ * response. This adapter passes the caller's `input` straight through, so any
+ * fal image model and any of its parameters (prompt, image_size, num_images,
+ * image_url for i2i/edit, …) work without this adapter knowing about them — it
+ * stays generic, not tied to one model family.
  *
- * Implementation note: ai-art's fal adapter uses the `@fal-ai/client` SDK, but
- * ai-lcr deliberately keeps zero provider SDKs — every adapter is raw `fetch`
- * with an injectable `fetchImpl` for testing (see runware-media, kunavo-media).
- * So this re-implements the three queue calls against fal's REST endpoints:
+ * Auth: fal uses `Authorization: Key <FAL_KEY>` (NOT a Bearer token).
  *
- *   1. submit  POST https://queue.fal.run/{model}        → { request_id, status_url, response_url }
- *   2. status  GET  {status_url}                         → { status: IN_QUEUE | IN_PROGRESS | COMPLETED }
- *   3. result  GET  {response_url}                        → { images:[…] } | { video:{url} } | …
+ * Errors: fal returns a proper HTTP status — 401 (bad key), 403 (insufficient
+ * balance / no permission), 422 (bad input), 429 (rate limit), 5xx. We surface
+ * the status on the thrown error so the router's `isRetryableError` can decide
+ * whether to fail over. A 403 "exhausted balance" is retryable (fall over to the
+ * next provider); a 422 bad-input is not (don't waste the fallbacks).
  *
- * We follow the `status_url` / `response_url` returned by submit rather than
- * rebuilding them, which sidesteps fal's sub-path quirk (a model like
- * `fal-ai/flux/schnell` submits to the full path but its status/result live
- * under the `fal-ai/flux` base).
+ * Cost: the synchronous response does NOT carry a per-call price (fal billing is
+ * a separate account-level API), so `costCents` stays undefined and the router
+ * falls back to its normalized estimate — same contract as the Kunavo adapter.
  *
- * Auth: fal uses `Authorization: Key {FAL_KEY}` (NOT Bearer).
- *
- * Cost: fal's queue result does not carry a per-call price, so cost is left to
- * the router's normalized estimate (costCents stays undefined; `units` is the
- * output count — one image, or one clip).
+ * Video: fal video (e.g. veo3.1) is a long-running queue job, a different code
+ * path — out of scope here, like the Runware adapter. Image inference only.
  */
 interface FalMediaConfig {
     apiKey: string;
-    /** Override for testing. Defaults to https://queue.fal.run. */
+    /** Override for testing. Defaults to https://fal.run. */
     baseUrl?: string;
-    /** Video/job poll cadence (ms). Default 3000. */
-    pollIntervalMs?: number;
-    /** Max time to wait for a job before giving up (ms). Default 300000 (5m). */
-    pollTimeoutMs?: number;
     /** Injected for testing; defaults to global fetch. */
     fetchImpl?: typeof fetch;
 }
 declare function createFalMediaAdapter(config: FalMediaConfig): MediaAdapter;
-/** Carries the HTTP status so the router's `isRetryableError` can classify it. */
-declare class FalMediaError extends Error {
-    status: number;
-    constructor(status: number, body: string);
-}
 /**
  * ai-lcr — Least Cost Routing for LLMs.
@@ -436,4 +446,4 @@ type LCRRouter = (modelName: string) => LanguageModelV3;
  */
 declare function createLCR(config: LCRConfig): LCRRouter;
-export { type CallRecord, type CostEvent, DEFAULT_REFERENCE, FalMediaError, type FormatOptions, type LCRConfig, type LCRRouter, MEDIA_PRICING, type MediaAdapter, type MediaCostEvent, type MediaGenerateRequest, type MediaGenerateResult, type MediaLCRConfig, type MediaModality, type MediaModelDef, type MediaOutput, type MediaPricing, type MediaRegistry, type MediaRoute, type MediaRunResult, type MediaUnit, type PriceComparisonRow, type ProviderCost, type ProviderEntry, type RankedRoute, type ReferenceSpec, type RouteAttempt, cheapestRoute, classifyError, comparePrices, createFalMediaAdapter, createKunavoMediaAdapter, createLCR, createMediaLCR, createRunwareMediaAdapter, formatCallRecord, normalizedCents, rankRoutes, referenceMegapixels };
+export { type CallRecord, type CostEvent, DEFAULT_REFERENCE, type ErrorKind, type FormatOptions, type LCRConfig, type LCRRouter, MEDIA_PRICING, type MediaAdapter, type MediaCostEvent, type MediaGenerateRequest, type MediaGenerateResult, type MediaLCRConfig, type MediaModality, type MediaModelDef, type MediaOutput, type MediaPricing, type MediaRegistry, type MediaRoute, type MediaRunResult, type MediaUnit, type PriceComparisonRow, type ProviderCost, type ProviderEntry, type RankedRoute, type ReferenceSpec, type RouteAttempt, cheapestRoute, classifyError, classifyErrorKind, comparePrices, createFalMediaAdapter, createKunavoMediaAdapter, createLCR, createMediaLCR, createRunwareMediaAdapter, formatCallRecord, normalizedCents, rankRoutes, referenceMegapixels };

package/dist/index.d.ts CHANGED Viewed

@@ -5,8 +5,10 @@ import { LanguageModelV3 } from '@ai-sdk/provider';
  *
  * A LanguageModelV3 that wraps an ordered, cheapest-first list of providers:
  * it serves from the first healthy one, switches to the next on a retryable
- * error (streaming-safe), and snaps back to the cheapest after an idle window.
- * It also computes per-call cost from each provider's price and fires `onCost`.
+ * error (streaming-safe), and periodically re-probes the cheapest provider
+ * (every `resetIntervalMs` after a failover — under load too, not only when
+ * idle). It also computes per-call cost from each provider's price and fires
+ * `onCost`.
  *
  * The switching loop is adapted from `ai-fallback` (MIT, © remorses) — its
  * streaming-safe fallback approach — reimplemented here so ai-lcr owns its core
@@ -28,6 +30,17 @@ interface CostEvent {
     /** Computed from the serving provider's `cost`; 0 if no price was given. */
     costUsd: number;
 }
+/**
+ * Coarse error category for a failed attempt — distinct from `errorClass`
+ * (which is the raw status/pattern). Use it to alert: `"auth"` and `"billing"`
+ * mean a config/account problem masquerading as a healthy failover, the thing
+ * you want to page on rather than silently keep burning the pricey fallback.
+ *   - "transient": rate limit / overload / 5xx — expected, self-healing.
+ *   - "auth":      401 / 403 — a misconfigured or revoked key.
+ *   - "billing":   402 / out-of-credit / quota — account needs topping up.
+ *   - "client":    a non-retryable caller error (e.g. 400 bad request).
+ */
+type ErrorKind = "transient" | "auth" | "billing" | "client";
 /** One provider attempt within a single request. */
 interface RouteAttempt {
     /** Provider label that was tried (e.g. "tokenmart"). */
@@ -38,6 +51,8 @@ interface RouteAttempt {
     latencyMs: number;
     /** Normalized failure reason when `ok` is false (e.g. "502", "rate_limit", "timeout"). */
     errorClass?: string;
+    /** Coarse category of the failure when `ok` is false. See {@link ErrorKind}. */
+    kind?: ErrorKind;
 }
 /**
  * One settled request, with its full failover chain. Emitted exactly once per
@@ -65,10 +80,10 @@ interface CallRecord {
     /** Computed from the winner's `cost`; 0 if no price was given or the call failed. */
     costUsd: number;
     /**
-     * What the priciest configured route would have cost for this request, so
-     * `baselineUsd - costUsd` is the saving from routing cheapest-first. Set by
-     * the media router (`createMediaLCR`), where every route has a known price;
-     * omitted by the text router, which can't price a baseline per call.
+     * What the same request would have cost on the most expensive configured
+     * provider — the savings baseline (`baselineUsd - costUsd`). Set by the media
+     * router; the text router omits it (left undefined) until a per-call text
+     * baseline lands. Optional so both routers share one {@link CallRecord} shape.
      */
     baselineUsd?: number;
 }
@@ -79,6 +94,13 @@ interface CallRecord {
  * Reuses the same signals as {@link isRetryableError} — no new vocabulary.
  */
 declare function classifyError(error: unknown): string;
+/**
+ * Categorize an error for alerting. Orthogonal to {@link isRetryableError}
+ * (which decides *whether* to fail over) — this decides *how alarming* the
+ * failover is. A run of `"auth"`/`"billing"` attempts means you're silently
+ * burning the pricey fallback because a key/account is broken: page on it.
+ */
+declare function classifyErrorKind(error: unknown): ErrorKind;
 /**
  * Human-readable one-liner for a {@link CallRecord}.
@@ -337,51 +359,39 @@ interface RunwareMediaConfig {
 declare function createRunwareMediaAdapter(config: RunwareMediaConfig): MediaAdapter;
 /**
- * fal media adapter — image (queue) + video (queue, async poll).
+ * fal.ai media adapter — image generation (synchronous).
  *
- * fal serves every model through one async queue API, so a single submit→poll→
- * fetch-result path covers both image and video. That is the whole reason this
- * adapter exists: it is ai-lcr's first VIDEO-capable execution path. (The
- * Runware adapter is image-only; the Kunavo one's video poll loop is unverified.)
+ * fal exposes every model at `https://fal.run/<model-id>` (the synchronous API):
+ * POST the model's inputs as a flat JSON body, get the result back in the same
+ * response. This adapter passes the caller's `input` straight through, so any
+ * fal image model and any of its parameters (prompt, image_size, num_images,
+ * image_url for i2i/edit, …) work without this adapter knowing about them — it
+ * stays generic, not tied to one model family.
  *
- * Implementation note: ai-art's fal adapter uses the `@fal-ai/client` SDK, but
- * ai-lcr deliberately keeps zero provider SDKs — every adapter is raw `fetch`
- * with an injectable `fetchImpl` for testing (see runware-media, kunavo-media).
- * So this re-implements the three queue calls against fal's REST endpoints:
+ * Auth: fal uses `Authorization: Key <FAL_KEY>` (NOT a Bearer token).
  *
- *   1. submit  POST https://queue.fal.run/{model}        → { request_id, status_url, response_url }
- *   2. status  GET  {status_url}                         → { status: IN_QUEUE | IN_PROGRESS | COMPLETED }
- *   3. result  GET  {response_url}                        → { images:[…] } | { video:{url} } | …
+ * Errors: fal returns a proper HTTP status — 401 (bad key), 403 (insufficient
+ * balance / no permission), 422 (bad input), 429 (rate limit), 5xx. We surface
+ * the status on the thrown error so the router's `isRetryableError` can decide
+ * whether to fail over. A 403 "exhausted balance" is retryable (fall over to the
+ * next provider); a 422 bad-input is not (don't waste the fallbacks).
  *
- * We follow the `status_url` / `response_url` returned by submit rather than
- * rebuilding them, which sidesteps fal's sub-path quirk (a model like
- * `fal-ai/flux/schnell` submits to the full path but its status/result live
- * under the `fal-ai/flux` base).
+ * Cost: the synchronous response does NOT carry a per-call price (fal billing is
+ * a separate account-level API), so `costCents` stays undefined and the router
+ * falls back to its normalized estimate — same contract as the Kunavo adapter.
  *
- * Auth: fal uses `Authorization: Key {FAL_KEY}` (NOT Bearer).
- *
- * Cost: fal's queue result does not carry a per-call price, so cost is left to
- * the router's normalized estimate (costCents stays undefined; `units` is the
- * output count — one image, or one clip).
+ * Video: fal video (e.g. veo3.1) is a long-running queue job, a different code
+ * path — out of scope here, like the Runware adapter. Image inference only.
  */
 interface FalMediaConfig {
     apiKey: string;
-    /** Override for testing. Defaults to https://queue.fal.run. */
+    /** Override for testing. Defaults to https://fal.run. */
     baseUrl?: string;
-    /** Video/job poll cadence (ms). Default 3000. */
-    pollIntervalMs?: number;
-    /** Max time to wait for a job before giving up (ms). Default 300000 (5m). */
-    pollTimeoutMs?: number;
     /** Injected for testing; defaults to global fetch. */
     fetchImpl?: typeof fetch;
 }
 declare function createFalMediaAdapter(config: FalMediaConfig): MediaAdapter;
-/** Carries the HTTP status so the router's `isRetryableError` can classify it. */
-declare class FalMediaError extends Error {
-    status: number;
-    constructor(status: number, body: string);
-}
 /**
  * ai-lcr — Least Cost Routing for LLMs.
@@ -436,4 +446,4 @@ type LCRRouter = (modelName: string) => LanguageModelV3;
  */
 declare function createLCR(config: LCRConfig): LCRRouter;
-export { type CallRecord, type CostEvent, DEFAULT_REFERENCE, FalMediaError, type FormatOptions, type LCRConfig, type LCRRouter, MEDIA_PRICING, type MediaAdapter, type MediaCostEvent, type MediaGenerateRequest, type MediaGenerateResult, type MediaLCRConfig, type MediaModality, type MediaModelDef, type MediaOutput, type MediaPricing, type MediaRegistry, type MediaRoute, type MediaRunResult, type MediaUnit, type PriceComparisonRow, type ProviderCost, type ProviderEntry, type RankedRoute, type ReferenceSpec, type RouteAttempt, cheapestRoute, classifyError, comparePrices, createFalMediaAdapter, createKunavoMediaAdapter, createLCR, createMediaLCR, createRunwareMediaAdapter, formatCallRecord, normalizedCents, rankRoutes, referenceMegapixels };
+export { type CallRecord, type CostEvent, DEFAULT_REFERENCE, type ErrorKind, type FormatOptions, type LCRConfig, type LCRRouter, MEDIA_PRICING, type MediaAdapter, type MediaCostEvent, type MediaGenerateRequest, type MediaGenerateResult, type MediaLCRConfig, type MediaModality, type MediaModelDef, type MediaOutput, type MediaPricing, type MediaRegistry, type MediaRoute, type MediaRunResult, type MediaUnit, type PriceComparisonRow, type ProviderCost, type ProviderEntry, type RankedRoute, type ReferenceSpec, type RouteAttempt, cheapestRoute, classifyError, classifyErrorKind, comparePrices, createFalMediaAdapter, createKunavoMediaAdapter, createLCR, createMediaLCR, createRunwareMediaAdapter, formatCallRecord, normalizedCents, rankRoutes, referenceMegapixels };