npm - @eidentic/model - Versions diffs - 0.1.0 - Mend

@eidentic/model 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.d.cts ADDED Viewed

@@ -0,0 +1,309 @@
+import { LanguageModel, generateText, embed } from 'ai';
+import { ModelPort, ModelRequest, ModelResponse, ModelStreamPart, EmbeddingPort, PriceTable } from '@eidentic/types';
+type ModelResolver = (modelStr?: string) => LanguageModel | Promise<LanguageModel>;
+type GenParams = Parameters<typeof generateText>[0];
+/**
+ * Keys of the AI SDK's `generateText` parameters that are generation settings (sampling,
+ * limits, provider passthrough) — as opposed to prompt/tools/telemetry/internal options.
+ * Listing a name that the installed AI SDK does not accept is a **compile error** (`Pick`
+ * requires the key to exist), so this list can never silently drift out of sync.
+ */
+type GenerationSettingKey = "temperature" | "maxOutputTokens" | "topP" | "topK" | "presencePenalty" | "frequencyPenalty" | "stopSequences" | "seed" | "maxRetries" | "providerOptions" | "headers";
+/**
+ * Generation settings forwarded to every model call, derived directly from the AI SDK's
+ * `generateText` signature via `Pick`. Names and types always match the installed SDK — a
+ * renamed or removed setting becomes a compile error here rather than being silently ignored
+ * at runtime. All keys are optional (they are optional on the SDK type).
+ *
+ * @example new AIModel(anthropic("claude-sonnet-4-5"), { temperature: 0.2, maxOutputTokens: 1024 })
+ */
+type AIModelOptions = Pick<GenParams, GenerationSettingKey>;
+/**
+ * A Eidentic `ModelPort` backed by Vercel AI SDK v6.
+ * Pass a concrete AI SDK `LanguageModel` (e.g. `anthropic("claude-...")`) or a resolver
+ * that turns the request's `model` string into a `LanguageModel`. The optional second
+ * argument sets generation defaults (temperature, maxOutputTokens, …) applied to every call.
+ */
+declare class AIModel implements ModelPort {
+    private readonly resolve;
+    private readonly settings;
+    /** The model's own identifier, sourced from the AI SDK LanguageModel when a static model is passed. */
+    readonly modelId: string | undefined;
+    constructor(model: LanguageModel | ModelResolver, options?: AIModelOptions);
+    complete(request: ModelRequest): Promise<ModelResponse>;
+    stream(request: ModelRequest): AsyncIterable<ModelStreamPart>;
+}
+/** The embedding-model type accepted by AI SDK v6 `embed`, minus the bare-string branch. */
+type AIEmbeddingModel = Exclude<Parameters<typeof embed>[0]["model"], string>;
+/**
+ * Provider-agnostic hosted embedder over AI SDK v6. Bring your own provider + key + model:
+ *   const embedder = await AIEmbedder.create(openai.embedding("text-embedding-3-small"));
+ * Works with any `@ai-sdk/*` embedding model (OpenAI, Cohere, Google, Mistral, ...).
+ * A first-class peer to the local `@eidentic/transformers` embedder; pick whichever fits.
+ */
+declare class AIEmbedder implements EmbeddingPort {
+    private readonly model;
+    readonly dim: number;
+    private constructor();
+    /** Construct an embedder, probing the model once to discover its output dimension. */
+    static create(model: AIEmbeddingModel): Promise<AIEmbedder>;
+    embed(text: string): Promise<number[]>;
+    /**
+     * Batch embedding via AI SDK v6 `embedMany({ model, values })` → `{ embeddings: number[][] }`.
+     * Embeds all texts in a single provider call (fewer round-trips on the ingest hot path).
+     * Each returned vector is validated to have length === `this.dim`.
+     */
+    embedBatch(texts: string[]): Promise<number[][]>;
+}
+declare const pricesUpdatedAt = "2026-06-08T00:00:00.000Z";
+declare const defaultPrices: PriceTable;
+declare const LITELLM_URL = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json";
+/**
+ * Map a raw LiteLLM model_prices_and_context_window.json object to a PriceTable.
+ * Pure function — usable in tests without any network calls.
+ */
+declare function mapLiteLLM(raw: Record<string, unknown>): PriceTable;
+/**
+ * Fetch the latest prices live from LiteLLM (opt-in — schedule it yourself;
+ * the library never auto-fetches at runtime).
+ */
+declare function fetchLatestPrices(opts?: {
+    fetchImpl?: typeof fetch;
+    url?: string;
+}): Promise<PriceTable>;
+/**
+ * Options for `createOllamaModel`.
+ */
+interface OllamaModelOptions {
+    /**
+     * Base URL of the Ollama server (defaults to `http://localhost:11434/api`).
+     * Override when Ollama runs on a non-default port or a remote host.
+     */
+    baseURL?: string;
+    /**
+     * Injectable provider factory for testing. When provided, the `ollama-ai-provider`
+     * peer dependency is NOT resolved — the factory is called directly. In production
+     * code, leave this unset.
+     * @internal
+     */
+    _factory?: OllamaProviderFactory;
+}
+/**
+ * Minimal type describing what we need from `ollama-ai-provider`.
+ * @internal
+ */
+interface OllamaProvider {
+    (modelId: string): LanguageModel;
+}
+/** @internal */
+interface OllamaProviderFactory {
+    createOllama(opts?: {
+        baseURL?: string;
+    }): OllamaProvider;
+}
+/**
+ * Create a Vercel AI SDK `LanguageModel` backed by a locally-running Ollama instance.
+ *
+ * `ollama-ai-provider` is an **optional peer dependency** — install it separately:
+ * ```sh
+ * npm install ollama-ai-provider
+ * # or
+ * pnpm add ollama-ai-provider
+ * ```
+ *
+ * **Usage:**
+ * ```ts
+ * import { AIModel, createOllamaModel } from "@eidentic/model";
+ *
+ * // Default: connects to http://localhost:11434/api
+ * const model = new AIModel(createOllamaModel("llama3.2"));
+ *
+ * // Multimodal (vision-capable) model:
+ * const visionModel = new AIModel(createOllamaModel("llava"));
+ *
+ * // Custom server URL:
+ * const remoteModel = new AIModel(
+ *   createOllamaModel("mistral", { baseURL: "http://192.168.1.10:11434/api" }),
+ * );
+ * ```
+ *
+ * No API key required — works entirely offline.
+ *
+ * @param modelId - Ollama model identifier, e.g. `"llama3.2"`, `"mistral"`, `"llava"`.
+ * @param opts    - Optional configuration (baseURL + optional test factory).
+ * @returns A Vercel AI SDK `LanguageModel` that routes calls to the local Ollama server.
+ * @throws Error if `ollama-ai-provider` is not installed and no `_factory` is provided.
+ */
+declare function createOllamaModel(modelId: string, opts?: OllamaModelOptions): LanguageModel;
+interface WithFallbackOptions {
+    /**
+     * Custom predicate to decide whether an error should trigger fallback.
+     * Called with the thrown error. Return `true` to try the next model.
+     * Default: always fall back (except for AbortError — see below).
+     */
+    shouldFallback?: (err: unknown) => boolean;
+    /**
+     * Called each time a fallback transition happens.
+     * Useful for alerting/metrics — e.g. log to your observability stack.
+     */
+    onFallback?: (err: unknown, fromIndex: number, toIndex: number) => void;
+}
+/**
+ * Wraps a primary `ModelPort` with one or more fallback models.
+ *
+ * On failure of `complete()` or `stream()` (network error, provider 5xx/429,
+ * or a custom `shouldFallback` predicate), the next model in the chain is tried.
+ *
+ * **Stream caveat**: fallback is only attempted when the failed stream produced
+ * **zero** text deltas. If any delta was already yielded to the caller, the
+ * output would be corrupted by a mid-stream provider switch, so the error is
+ * re-thrown instead.
+ *
+ * **AbortError**: never triggers a fallback — the caller's cancellation intent
+ * is always respected.
+ *
+ * **Cost-optimization recipe**: pair a cheaper, faster tier as the primary with
+ * a slower but more-capable tier as fallback. Under normal conditions you pay
+ * the cheap rate; spikes or outages automatically route to the reliable tier
+ * without changing any call sites. Documented 55-65 % cost reductions in
+ * production systems that apply this pattern.
+ *
+ * @example
+ * const model = withFallback(cheap, [premium], {
+ *   onFallback: (err, from, to) => console.warn(`model[${from}] failed, trying [${to}]`, err),
+ * });
+ * // Drop into any AgentConfig.model unchanged — it is still a ModelPort.
+ */
+declare function withFallback(primary: ModelPort, fallbacks: ModelPort[], opts?: WithFallbackOptions): ModelPort;
+/**
+ * A threshold entry for `byTokenEstimate`.
+ * The selector routes to `tier` when the estimated token count is `<= upTo`.
+ * Entries are evaluated in ascending `upTo` order; the first match wins.
+ */
+interface TokenThreshold {
+    upTo: number;
+    tier: string;
+}
+/**
+ * Returns a `routeModel` selector that maps requests to tier names based on
+ * estimated prompt token count. Thresholds are evaluated in ascending `upTo`
+ * order; the first whose `upTo >= estimatedTokens` wins. Falls back to
+ * `fallbackTier` when no threshold matches.
+ *
+ * @example
+ * const sel = byTokenEstimate(
+ *   [{ upTo: 4_000, tier: "small" }, { upTo: 32_000, tier: "medium" }],
+ *   "large",
+ * );
+ * const model = routeModel(sel, { small: cheapModel, medium: midModel, large: bigModel });
+ */
+declare function byTokenEstimate(thresholds: TokenThreshold[], fallbackTier: string): (req: ModelRequest) => string;
+/**
+ * Routes each request to one of a named set of `ModelPort` implementations
+ * according to a caller-supplied `selector` function.
+ *
+ * The selector receives the full `ModelRequest` and returns a tier name.
+ * Use `byTokenEstimate` as a drop-in selector for token-count-based routing,
+ * or write your own (e.g. inspect a custom tag on the messages array).
+ *
+ * An unknown tier name throws immediately with a clear diagnostic message
+ * rather than silently failing or falling through.
+ *
+ * **Cost-optimization recipe**: assign a cheap fast model to the "small" tier
+ * and a powerful model only to the "large" tier. Short requests (most of them
+ * in practice) pay the cheap rate; only requests that genuinely need the
+ * bigger model incur the premium cost. This alone yields 40-60 % spend
+ * reductions in production workloads.
+ *
+ * @example
+ * const model = routeModel(
+ *   byTokenEstimate([{ upTo: 4_000, tier: "small" }], "large"),
+ *   { small: cheapModel, large: premiumModel },
+ * );
+ * agent.run({ model, ... }); // works as a plain ModelPort
+ */
+declare function routeModel(selector: (req: ModelRequest) => string, tiers: Record<string, ModelPort>): ModelPort;
+/** Pluggable backing store for `cachedModel`. Implement this to share the
+ *  cache across workers / survive restarts. */
+interface CacheStore {
+    get(key: string): Promise<ModelResponse | undefined> | ModelResponse | undefined;
+    set(key: string, value: ModelResponse, ttlMs?: number): Promise<void> | void;
+}
+interface CachedModelOptions {
+    /** How long a cached response is valid. Default: 5 minutes. */
+    ttlMs?: number;
+    /** Maximum number of entries in the in-process LRU. Default: 500. */
+    maxEntries?: number;
+    /**
+     * Custom key derivation function. Receives the full request; return a
+     * string that is unique for semantically-distinct requests.
+     * Default: stable-JSON hash of messages + tools + model + outputSchema.
+     */
+    keyFn?: (req: ModelRequest) => string;
+    /**
+     * External cache store (Redis, Memcached, …). When provided, the LRU is
+     * used as a local write-through layer and the store is consulted on misses.
+     */
+    store?: CacheStore;
+    /**
+     * Decide whether a successful result should be cached.
+     * Default: cache all non-error results (always `true` on the happy path).
+     *
+     * **Tool-call responses are cached** at the model layer — tool definitions
+     * are schemas only and carry no side-effects from the model's perspective.
+     * This is desirable for replay scenarios. Override here if you need
+     * per-request exclusions.
+     */
+    shouldCache?: (req: ModelRequest, result: ModelResponse) => boolean;
+    /** Called on every cache hit (key, cached response). Use for metrics. */
+    onCacheHit?: (key: string, result: ModelResponse) => void;
+    /**
+     * Clock used for TTL bookkeeping. Defaults to `Date.now`.
+     * Inject a fake clock in tests for deterministic expiry behaviour.
+     */
+    clock?: () => number;
+}
+/** Runtime statistics exposed by `cachedModel`. */
+interface CacheStats {
+    /** Number of requests served from cache. */
+    hits: number;
+    /** Number of requests that required a live model call. */
+    misses: number;
+    /** Current number of entries in the in-process LRU. */
+    size: number;
+}
+/**
+ * Wraps a `ModelPort` with an exact-match response cache for `complete()`.
+ *
+ * - **Streaming calls are never cached** and always pass through to the
+ *   underlying model.
+ * - The default key is a stable JSON hash of messages + tools + model +
+ *   outputSchema. Provide `keyFn` to override.
+ * - In-memory LRU (default 500 entries, 5-minute TTL) with optional
+ *   external `store` for cross-process sharing.
+ * - Inspect runtime behaviour via the returned `stats()` method.
+ *
+ * **Cost-optimization recipe**: enable caching for deterministic agent
+ * sub-tasks — system prompts, classification prompts, or any prompt that
+ * recurs verbatim within a session. Even a 20 % cache-hit rate on a
+ * high-volume workload reduces spend by 20 % with zero changes elsewhere.
+ * Documented combined savings of 55-65 % when combined with `withFallback`
+ * + `routeModel`.
+ *
+ * @example
+ * const model = cachedModel(baseModel, { ttlMs: 10 * 60_000, maxEntries: 1000 });
+ * const { hits, misses, size } = model.stats();
+ */
+declare function cachedModel(model: ModelPort, opts?: CachedModelOptions): ModelPort & {
+    stats(): CacheStats;
+};
+export { AIEmbedder, AIModel, type AIModelOptions, type CacheStats, type CacheStore, type CachedModelOptions, LITELLM_URL, type ModelResolver, type OllamaModelOptions, type TokenThreshold, type WithFallbackOptions, byTokenEstimate, cachedModel, createOllamaModel, defaultPrices, fetchLatestPrices, mapLiteLLM, pricesUpdatedAt, routeModel, withFallback };

package/dist/index.d.ts ADDED Viewed

@@ -0,0 +1,309 @@
+import { LanguageModel, generateText, embed } from 'ai';
+import { ModelPort, ModelRequest, ModelResponse, ModelStreamPart, EmbeddingPort, PriceTable } from '@eidentic/types';
+type ModelResolver = (modelStr?: string) => LanguageModel | Promise<LanguageModel>;
+type GenParams = Parameters<typeof generateText>[0];
+/**
+ * Keys of the AI SDK's `generateText` parameters that are generation settings (sampling,
+ * limits, provider passthrough) — as opposed to prompt/tools/telemetry/internal options.
+ * Listing a name that the installed AI SDK does not accept is a **compile error** (`Pick`
+ * requires the key to exist), so this list can never silently drift out of sync.
+ */
+type GenerationSettingKey = "temperature" | "maxOutputTokens" | "topP" | "topK" | "presencePenalty" | "frequencyPenalty" | "stopSequences" | "seed" | "maxRetries" | "providerOptions" | "headers";
+/**
+ * Generation settings forwarded to every model call, derived directly from the AI SDK's
+ * `generateText` signature via `Pick`. Names and types always match the installed SDK — a
+ * renamed or removed setting becomes a compile error here rather than being silently ignored
+ * at runtime. All keys are optional (they are optional on the SDK type).
+ *
+ * @example new AIModel(anthropic("claude-sonnet-4-5"), { temperature: 0.2, maxOutputTokens: 1024 })
+ */
+type AIModelOptions = Pick<GenParams, GenerationSettingKey>;
+/**
+ * A Eidentic `ModelPort` backed by Vercel AI SDK v6.
+ * Pass a concrete AI SDK `LanguageModel` (e.g. `anthropic("claude-...")`) or a resolver
+ * that turns the request's `model` string into a `LanguageModel`. The optional second
+ * argument sets generation defaults (temperature, maxOutputTokens, …) applied to every call.
+ */
+declare class AIModel implements ModelPort {
+    private readonly resolve;
+    private readonly settings;
+    /** The model's own identifier, sourced from the AI SDK LanguageModel when a static model is passed. */
+    readonly modelId: string | undefined;
+    constructor(model: LanguageModel | ModelResolver, options?: AIModelOptions);
+    complete(request: ModelRequest): Promise<ModelResponse>;
+    stream(request: ModelRequest): AsyncIterable<ModelStreamPart>;
+}
+/** The embedding-model type accepted by AI SDK v6 `embed`, minus the bare-string branch. */
+type AIEmbeddingModel = Exclude<Parameters<typeof embed>[0]["model"], string>;
+/**
+ * Provider-agnostic hosted embedder over AI SDK v6. Bring your own provider + key + model:
+ *   const embedder = await AIEmbedder.create(openai.embedding("text-embedding-3-small"));
+ * Works with any `@ai-sdk/*` embedding model (OpenAI, Cohere, Google, Mistral, ...).
+ * A first-class peer to the local `@eidentic/transformers` embedder; pick whichever fits.
+ */
+declare class AIEmbedder implements EmbeddingPort {
+    private readonly model;
+    readonly dim: number;
+    private constructor();
+    /** Construct an embedder, probing the model once to discover its output dimension. */
+    static create(model: AIEmbeddingModel): Promise<AIEmbedder>;
+    embed(text: string): Promise<number[]>;
+    /**
+     * Batch embedding via AI SDK v6 `embedMany({ model, values })` → `{ embeddings: number[][] }`.
+     * Embeds all texts in a single provider call (fewer round-trips on the ingest hot path).
+     * Each returned vector is validated to have length === `this.dim`.
+     */
+    embedBatch(texts: string[]): Promise<number[][]>;
+}
+declare const pricesUpdatedAt = "2026-06-08T00:00:00.000Z";
+declare const defaultPrices: PriceTable;
+declare const LITELLM_URL = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json";
+/**
+ * Map a raw LiteLLM model_prices_and_context_window.json object to a PriceTable.
+ * Pure function — usable in tests without any network calls.
+ */
+declare function mapLiteLLM(raw: Record<string, unknown>): PriceTable;
+/**
+ * Fetch the latest prices live from LiteLLM (opt-in — schedule it yourself;
+ * the library never auto-fetches at runtime).
+ */
+declare function fetchLatestPrices(opts?: {
+    fetchImpl?: typeof fetch;
+    url?: string;
+}): Promise<PriceTable>;
+/**
+ * Options for `createOllamaModel`.
+ */
+interface OllamaModelOptions {
+    /**
+     * Base URL of the Ollama server (defaults to `http://localhost:11434/api`).
+     * Override when Ollama runs on a non-default port or a remote host.
+     */
+    baseURL?: string;
+    /**
+     * Injectable provider factory for testing. When provided, the `ollama-ai-provider`
+     * peer dependency is NOT resolved — the factory is called directly. In production
+     * code, leave this unset.
+     * @internal
+     */
+    _factory?: OllamaProviderFactory;
+}
+/**
+ * Minimal type describing what we need from `ollama-ai-provider`.
+ * @internal
+ */
+interface OllamaProvider {
+    (modelId: string): LanguageModel;
+}
+/** @internal */
+interface OllamaProviderFactory {
+    createOllama(opts?: {
+        baseURL?: string;
+    }): OllamaProvider;
+}
+/**
+ * Create a Vercel AI SDK `LanguageModel` backed by a locally-running Ollama instance.
+ *
+ * `ollama-ai-provider` is an **optional peer dependency** — install it separately:
+ * ```sh
+ * npm install ollama-ai-provider
+ * # or
+ * pnpm add ollama-ai-provider
+ * ```
+ *
+ * **Usage:**
+ * ```ts
+ * import { AIModel, createOllamaModel } from "@eidentic/model";
+ *
+ * // Default: connects to http://localhost:11434/api
+ * const model = new AIModel(createOllamaModel("llama3.2"));
+ *
+ * // Multimodal (vision-capable) model:
+ * const visionModel = new AIModel(createOllamaModel("llava"));
+ *
+ * // Custom server URL:
+ * const remoteModel = new AIModel(
+ *   createOllamaModel("mistral", { baseURL: "http://192.168.1.10:11434/api" }),
+ * );
+ * ```
+ *
+ * No API key required — works entirely offline.
+ *
+ * @param modelId - Ollama model identifier, e.g. `"llama3.2"`, `"mistral"`, `"llava"`.
+ * @param opts    - Optional configuration (baseURL + optional test factory).
+ * @returns A Vercel AI SDK `LanguageModel` that routes calls to the local Ollama server.
+ * @throws Error if `ollama-ai-provider` is not installed and no `_factory` is provided.
+ */
+declare function createOllamaModel(modelId: string, opts?: OllamaModelOptions): LanguageModel;
+interface WithFallbackOptions {
+    /**
+     * Custom predicate to decide whether an error should trigger fallback.
+     * Called with the thrown error. Return `true` to try the next model.
+     * Default: always fall back (except for AbortError — see below).
+     */
+    shouldFallback?: (err: unknown) => boolean;
+    /**
+     * Called each time a fallback transition happens.
+     * Useful for alerting/metrics — e.g. log to your observability stack.
+     */
+    onFallback?: (err: unknown, fromIndex: number, toIndex: number) => void;
+}
+/**
+ * Wraps a primary `ModelPort` with one or more fallback models.
+ *
+ * On failure of `complete()` or `stream()` (network error, provider 5xx/429,
+ * or a custom `shouldFallback` predicate), the next model in the chain is tried.
+ *
+ * **Stream caveat**: fallback is only attempted when the failed stream produced
+ * **zero** text deltas. If any delta was already yielded to the caller, the
+ * output would be corrupted by a mid-stream provider switch, so the error is
+ * re-thrown instead.
+ *
+ * **AbortError**: never triggers a fallback — the caller's cancellation intent
+ * is always respected.
+ *
+ * **Cost-optimization recipe**: pair a cheaper, faster tier as the primary with
+ * a slower but more-capable tier as fallback. Under normal conditions you pay
+ * the cheap rate; spikes or outages automatically route to the reliable tier
+ * without changing any call sites. Documented 55-65 % cost reductions in
+ * production systems that apply this pattern.
+ *
+ * @example
+ * const model = withFallback(cheap, [premium], {
+ *   onFallback: (err, from, to) => console.warn(`model[${from}] failed, trying [${to}]`, err),
+ * });
+ * // Drop into any AgentConfig.model unchanged — it is still a ModelPort.
+ */
+declare function withFallback(primary: ModelPort, fallbacks: ModelPort[], opts?: WithFallbackOptions): ModelPort;
+/**
+ * A threshold entry for `byTokenEstimate`.
+ * The selector routes to `tier` when the estimated token count is `<= upTo`.
+ * Entries are evaluated in ascending `upTo` order; the first match wins.
+ */
+interface TokenThreshold {
+    upTo: number;
+    tier: string;
+}
+/**
+ * Returns a `routeModel` selector that maps requests to tier names based on
+ * estimated prompt token count. Thresholds are evaluated in ascending `upTo`
+ * order; the first whose `upTo >= estimatedTokens` wins. Falls back to
+ * `fallbackTier` when no threshold matches.
+ *
+ * @example
+ * const sel = byTokenEstimate(
+ *   [{ upTo: 4_000, tier: "small" }, { upTo: 32_000, tier: "medium" }],
+ *   "large",
+ * );
+ * const model = routeModel(sel, { small: cheapModel, medium: midModel, large: bigModel });
+ */
+declare function byTokenEstimate(thresholds: TokenThreshold[], fallbackTier: string): (req: ModelRequest) => string;
+/**
+ * Routes each request to one of a named set of `ModelPort` implementations
+ * according to a caller-supplied `selector` function.
+ *
+ * The selector receives the full `ModelRequest` and returns a tier name.
+ * Use `byTokenEstimate` as a drop-in selector for token-count-based routing,
+ * or write your own (e.g. inspect a custom tag on the messages array).
+ *
+ * An unknown tier name throws immediately with a clear diagnostic message
+ * rather than silently failing or falling through.
+ *
+ * **Cost-optimization recipe**: assign a cheap fast model to the "small" tier
+ * and a powerful model only to the "large" tier. Short requests (most of them
+ * in practice) pay the cheap rate; only requests that genuinely need the
+ * bigger model incur the premium cost. This alone yields 40-60 % spend
+ * reductions in production workloads.
+ *
+ * @example
+ * const model = routeModel(
+ *   byTokenEstimate([{ upTo: 4_000, tier: "small" }], "large"),
+ *   { small: cheapModel, large: premiumModel },
+ * );
+ * agent.run({ model, ... }); // works as a plain ModelPort
+ */
+declare function routeModel(selector: (req: ModelRequest) => string, tiers: Record<string, ModelPort>): ModelPort;
+/** Pluggable backing store for `cachedModel`. Implement this to share the
+ *  cache across workers / survive restarts. */
+interface CacheStore {
+    get(key: string): Promise<ModelResponse | undefined> | ModelResponse | undefined;
+    set(key: string, value: ModelResponse, ttlMs?: number): Promise<void> | void;
+}
+interface CachedModelOptions {
+    /** How long a cached response is valid. Default: 5 minutes. */
+    ttlMs?: number;
+    /** Maximum number of entries in the in-process LRU. Default: 500. */
+    maxEntries?: number;
+    /**
+     * Custom key derivation function. Receives the full request; return a
+     * string that is unique for semantically-distinct requests.
+     * Default: stable-JSON hash of messages + tools + model + outputSchema.
+     */
+    keyFn?: (req: ModelRequest) => string;
+    /**
+     * External cache store (Redis, Memcached, …). When provided, the LRU is
+     * used as a local write-through layer and the store is consulted on misses.
+     */
+    store?: CacheStore;
+    /**
+     * Decide whether a successful result should be cached.
+     * Default: cache all non-error results (always `true` on the happy path).
+     *
+     * **Tool-call responses are cached** at the model layer — tool definitions
+     * are schemas only and carry no side-effects from the model's perspective.
+     * This is desirable for replay scenarios. Override here if you need
+     * per-request exclusions.
+     */
+    shouldCache?: (req: ModelRequest, result: ModelResponse) => boolean;
+    /** Called on every cache hit (key, cached response). Use for metrics. */
+    onCacheHit?: (key: string, result: ModelResponse) => void;
+    /**
+     * Clock used for TTL bookkeeping. Defaults to `Date.now`.
+     * Inject a fake clock in tests for deterministic expiry behaviour.
+     */
+    clock?: () => number;
+}
+/** Runtime statistics exposed by `cachedModel`. */
+interface CacheStats {
+    /** Number of requests served from cache. */
+    hits: number;
+    /** Number of requests that required a live model call. */
+    misses: number;
+    /** Current number of entries in the in-process LRU. */
+    size: number;
+}
+/**
+ * Wraps a `ModelPort` with an exact-match response cache for `complete()`.
+ *
+ * - **Streaming calls are never cached** and always pass through to the
+ *   underlying model.
+ * - The default key is a stable JSON hash of messages + tools + model +
+ *   outputSchema. Provide `keyFn` to override.
+ * - In-memory LRU (default 500 entries, 5-minute TTL) with optional
+ *   external `store` for cross-process sharing.
+ * - Inspect runtime behaviour via the returned `stats()` method.
+ *
+ * **Cost-optimization recipe**: enable caching for deterministic agent
+ * sub-tasks — system prompts, classification prompts, or any prompt that
+ * recurs verbatim within a session. Even a 20 % cache-hit rate on a
+ * high-volume workload reduces spend by 20 % with zero changes elsewhere.
+ * Documented combined savings of 55-65 % when combined with `withFallback`
+ * + `routeModel`.
+ *
+ * @example
+ * const model = cachedModel(baseModel, { ttlMs: 10 * 60_000, maxEntries: 1000 });
+ * const { hits, misses, size } = model.stats();
+ */
+declare function cachedModel(model: ModelPort, opts?: CachedModelOptions): ModelPort & {
+    stats(): CacheStats;
+};
+export { AIEmbedder, AIModel, type AIModelOptions, type CacheStats, type CacheStore, type CachedModelOptions, LITELLM_URL, type ModelResolver, type OllamaModelOptions, type TokenThreshold, type WithFallbackOptions, byTokenEstimate, cachedModel, createOllamaModel, defaultPrices, fetchLatestPrices, mapLiteLLM, pricesUpdatedAt, routeModel, withFallback };