npm - @kreuzberg/liter-llm-node - Versions diffs - 1.5.1 → 1.6.0 - Mend

@kreuzberg/liter-llm-node 1.5.1 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md +84 -121
package/index.d.ts +742 -561
package/liter-llm-node.darwin-arm64.node +0 -0
package/liter-llm-node.linux-arm64-gnu.node +0 -0
package/liter-llm-node.linux-x64-gnu.node +0 -0
package/liter-llm-node.win32-x64-msvc.node +0 -0
package/package.json +7 -7

package/index.d.ts CHANGED Viewed

@@ -1,155 +1,139 @@
-// This file is auto-generated by alef — DO NOT EDIT.
-// alef:hash:fe64a8a06beeb01b5344005fa07dcdfdf3e244d772ace0f89dbba84b0541ca2d
-// To regenerate: alef generate
-// To verify freshness: alef verify --exit-code
+/* auto-generated by NAPI-RS */
 /* eslint-disable */
-export type JsonValue = string | number | boolean | null | JsonValue[] | { [key: string]: JsonValue };
-/**
- * Return all provider configs from the registry.
- *
- * Useful for tooling, documentation generation, or runtime enumeration.
- */
-export declare function allProviders(): Array<ProviderConfig>;
-/**
- * Calculate the estimated cost of a completion given a model name and token
- * counts.
- *
- * Returns `None` if the model is not present in the embedded pricing registry.
- * Returns `Some(cost_usd)` otherwise, where the value is in US dollars.
- *
- * When an exact model name match is not found, progressively shorter prefixes
- * are tried by stripping from the last `-` or `.` separator.  For example,
- * `gpt-4-0613` will match `gpt-4` if no `gpt-4-0613` entry exists.
- */
-export declare function completionCost(model: string, promptTokens: number, completionTokens: number): number | null;
 /**
- * Calculate the estimated cost of a completion, accounting for cached
- * (cache-hit) prompt tokens billed at the provider's discounted rate.
- *
- * `cached_tokens` is the count of prompt tokens served from the provider's
- * prompt cache. It must be `<= prompt_tokens` (cached tokens are a subset of
- * the prompt). The non-cached portion is billed at `input_cost_per_token`
- * and the cached portion at `cache_read_input_token_cost` when the model
- * has cache pricing; otherwise the entire prompt is billed at the regular
- * input rate.
+ * This type implements JavaScript's async iterable protocol.
+ * It can be used with `for await...of` loops.
  *
- * Returns `None` if the model is not present in the embedded pricing
- * registry, mirroring [`completion_cost`].
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Iteration_protocols#the_async_iterator_and_async_iterable_protocols
  */
-export declare function completionCostWithCache(model: string, promptTokens: number, cachedTokens: number, completionTokens: number): number | null;
+export declare class ChatStreamIterator {}
 /**
- * Return the set of complex provider names.
- *
- * Complex providers require custom auth/routing logic beyond simple bearer
- * tokens (e.g. AWS Bedrock SigV4, Vertex AI OAuth2).
+ * Default client implementation backed by `reqwest`.
  *
- * The returned reference points into the static registry — no allocation.
- */
-export declare function complexProviderNames(): Array<string>;
-/**
- * Count tokens for a full [`ChatCompletionRequest`].
+ * Sends requests to 140+ LLM providers with automatic provider detection
+ * and per-request routing. The provider is resolved at construction time
+ * from `model_hint` (or defaults to OpenAI), but individual requests can
+ * override the provider via model name prefix (e.g. `"anthropic/claude-3-5-sonnet"`
+ * routes to Anthropic regardless of construction-time setting).
  *
- * Sums tokens across all message text contents plus a per-message overhead
- * of ~4 tokens (for role, separators, and formatting metadata). Tool
- * definitions and multimodal content parts (images, audio, documents) are
- * not counted — only textual content contributes to the token total.
- * @throws Returns [`LiterLlmError::BadRequest`] if the tokenizer cannot be loaded or
- * if tokenization fails for any message.
- */
-export declare function countRequestTokens(model: string, req?: ChatCompletionRequest | undefined | null): number;
-/**
- * Count tokens in a text string using the tokenizer for the given model.
+ * When the model prefix does not match any known provider, the construction-time
+ * provider is used as the fallback. This enables seamless migration between
+ * providers by changing only the model name.
  *
- * The tokenizer is resolved from the model name prefix (e.g. `"gpt-4o"` maps
- * to the `Xenova/gpt-4o` HuggingFace tokenizer). Tokenizers are cached after
- * first load.
- * @throws Returns [`LiterLlmError::BadRequest`] if the tokenizer cannot be loaded
- * (e.g. network failure on first use) or if tokenization itself fails.
+ * The provider is stored behind an `Arc` so it can be shared cheaply into
+ * async closures and streaming tasks. Pre-computed auth headers and extra
+ * headers are cached at construction to avoid redundant encoding on every request.
  */
-export declare function countTokens(model: string, text: string): number;
+export declare class DefaultClient {
+  chat(req: ChatCompletionRequest): Promise<ChatCompletionResponse>;
+  chatStream(req: ChatCompletionRequest): Promise<ChatStreamIterator>;
+  embed(req: EmbeddingRequest): Promise<EmbeddingResponse>;
+  listModels(): Promise<ModelsListResponse>;
+  imageGenerate(req: CreateImageRequest): Promise<ImagesResponse>;
+  speech(req: CreateSpeechRequest): Promise<Buffer>;
+  transcribe(req: CreateTranscriptionRequest): Promise<TranscriptionResponse>;
+  moderate(req: ModerationRequest): Promise<ModerationResponse>;
+  rerank(req: RerankRequest): Promise<RerankResponse>;
+  search(req: SearchRequest): Promise<SearchResponse>;
+  ocr(req: OcrRequest): Promise<OcrResponse>;
+  createFile(req: CreateFileRequest): Promise<FileObject>;
+  retrieveFile(fileId: string): Promise<FileObject>;
+  deleteFile(fileId: string): Promise<DeleteResponse>;
+  listFiles(query?: FileListQuery | undefined | null): Promise<FileListResponse>;
+  fileContent(fileId: string): Promise<Buffer>;
+  createBatch(req: CreateBatchRequest): Promise<BatchObject>;
+  retrieveBatch(batchId: string): Promise<BatchObject>;
+  listBatches(query?: BatchListQuery | undefined | null): Promise<BatchListResponse>;
+  cancelBatch(batchId: string): Promise<BatchObject>;
+  fetchBatchForPolling(batchId: string): Promise<BatchObject>;
+  /**
+   * Poll a batch until it reaches a terminal status (Completed, Failed, Expired, Cancelled).
+   *
+   * Uses exponential backoff with configurable initial interval, maximum interval, and backoff multiplier.
+   * Optionally supports a timeout that aborts polling if exceeded.
+   *
+   * # Errors
+   *
+   * Returns `BatchWaitError.Failed` if the batch reaches a failure terminal status.
+   * Returns `BatchWaitError.Timeout` if the configured timeout is exceeded.
+   * Returns `BatchWaitError.Client` for underlying client errors.
+   *
+   * # Example
+   */
+  waitForBatch(batchId: string, config: WaitForBatchConfig): Promise<BatchObject>;
+  createResponse(req: CreateResponseRequest): Promise<ResponseObject>;
+  retrieveResponse(responseId: string): Promise<ResponseObject>;
+  cancelResponse(responseId: string): Promise<ResponseObject>;
+}
+export type JsDefaultClient = DefaultClient;
-/**
- * Create a new LLM client with simple scalar configuration.
- *
- * This is the primary binding entry-point. All parameters except `api_key`
- * are optional — omitting them uses the same defaults as
- * [`ClientConfigBuilder`].
- * @throws Returns [`LiterLlmError`] if the underlying HTTP client cannot be
- * constructed, or if the resolved provider configuration is invalid.
- */
-export declare function createClient(apiKey: string, baseUrl?: string | undefined | null, timeoutSecs?: number | undefined | null, maxRetries?: number | undefined | null, modelHint?: string | undefined | null): DefaultClient;
+export declare class JsLiterLlmErrorInfo {
+  statusCode: number;
+  isTransient: boolean;
+  errorType: string;
+  /** HTTP status code for this error (0 means no associated status). */
+  statusCode(): number;
+  /** Returns `true` if the error is transient and a retry may succeed. */
+  isTransient(): boolean;
+  /** Machine-readable error category string for matching and logging. */
+  errorType(): string;
+}
 /**
- * Create a new LLM client from a JSON string.
+ * The value broadcast from a singleflight leader to all followers.
  *
- * The JSON object accepts the same fields as `liter-llm.toml` (snake_case).
- * @throws Returns [`LiterLlmError::BadRequest`] if `json` is not valid JSON or
- * contains unknown fields.
+ * `Arc<LiterLlmError>` is used because `LiterLlmError` is not `Clone` and
+ * broadcast channels require `T: Clone`.  The `Arc` adds only a reference-count
+ * bump per follower, which is negligible under the burst loads this layer targets.
  */
-export declare function createClientFromJson(json: string): DefaultClient;
+export declare class SingleflightResult {}
+export type JsSingleflightResult = SingleflightResult;
 /**
- * Install the `ring` crypto provider as the rustls process default, idempotently.
- *
- * rustls 0.23+ removed the implicit default provider. This function installs
- * `ring` once per process. Subsequent calls are no-ops. Calling it from a
- * downstream Rust app that has already installed `aws-lc-rs` is safe — the
- * `Err` from `install_default()` is silently ignored.
- *
- * Called automatically by every internal `reqwest::Client` constructor
- * (auth providers, default HTTP client). Bindings and downstream consumers
- * reach those constructors transitively, so no manual init is required.
- *
- * WASM builds are exempt — the WASM target uses the browser/Node.js fetch
- * API instead of rustls, so no crypto provider is needed.
+ * Return all provider configs from the registry.
  *
- * Windows builds use native-tls (SChannel) via reqwest, so rustls is not
- * present and no crypto provider installation is needed.
+ * Useful for tooling, documentation generation, or runtime enumeration.
+ * Returns the public `ProviderConfig` slice (without capability flags).
+ * To query capability flags for a specific provider use `capabilities`.
  */
-export declare function ensureCryptoProvider(): void;
+export declare function allProviders(): Array<ProviderConfig>;
 /** Assistant's response to a user message. */
 export interface AssistantMessage {
   /** The assistant's text response. Absent if tool calls are returned instead. */
-  readonly content?: string
+  content?: string;
   /** Optional name for the assistant. */
-  readonly name?: string
+  name?: string;
   /** Tool calls the model wants to execute, if any. */
-  readonly toolCalls?: Array<ToolCall>
+  toolCalls?: Array<JsToolCall>;
   /** Refusal reason, if the model declined to respond per safety policies. */
-  readonly refusal?: string
+  refusal?: string;
   /** Deprecated legacy function_call field; retained for API compatibility. */
-  readonly functionCall?: FunctionCall
+  functionCall?: JsFunctionCall;
 }
 /** Audio content part for speech-capable models. */
 export interface AudioContent {
   /** Base64-encoded audio data. */
-  readonly data?: string
+  data?: string;
   /** Audio format (e.g., "wav", "mp3", "ogg"). */
-  readonly format?: string
+  format?: string;
 }
 /** Auth configuration block. */
 export interface AuthConfig {
   /** Auth scheme classification. */
-  readonly authType: AuthType
+  type: JsAuthType;
   /**
    * Name of the environment variable that holds the API key (e.g. `"OPENAI_API_KEY"`).
    * Holds the variable name, never the secret value.
    */
-  readonly envVar?: string
+  envVar?: string;
 }
 /** How the API key is sent in the HTTP request. */
-export declare enum AuthHeaderFormat {
+export declare const enum AuthHeaderFormat {
   /** Bearer token: `Authorization: Bearer <key>` */
   Bearer = "Bearer",
   /** Custom header: e.g., `X-Api-Key: <key>` */
@@ -159,7 +143,7 @@ export declare enum AuthHeaderFormat {
 }
 /** Auth scheme used by a provider. */
-export declare enum AuthType {
+export declare const enum AuthType {
   /** Standard `Authorization: Bearer <key>` header. */
   Bearer = "bearer",
   /** `x-api-key: <key>` header (also handles `"header"` and `"x-api-key"` aliases). */
@@ -173,69 +157,69 @@ export declare enum AuthType {
 /** Query parameters for listing batches. */
 export interface BatchListQuery {
   /** Maximum number of results to return. Defaults to 20. */
-  readonly limit?: number
+  limit?: number;
   /** Pagination cursor: return results after this batch ID. */
-  readonly after?: string
+  after?: string;
 }
 /** Response from listing batches. */
 export interface BatchListResponse {
   /** Object type (always `"list"`). */
-  readonly object?: string
+  object?: string;
   /** List of batch objects. */
-  readonly data?: Array<BatchObject>
+  data?: Array<BatchObject>;
   /** Whether more results are available. */
-  readonly hasMore?: boolean
+  hasMore?: boolean;
   /** First batch ID in the result set (for pagination). */
-  readonly firstId?: string
+  firstId?: string;
   /** Last batch ID in the result set (for pagination). */
-  readonly lastId?: string
+  lastId?: string;
 }
 /** A batch job object. */
 export interface BatchObject {
   /** Unique batch ID. */
-  readonly id?: string
+  id?: string;
   /** Object type (always `"batch"`). */
-  readonly object?: string
+  object?: string;
   /** API endpoint (e.g., `"/v1/chat/completions"`). */
-  readonly endpoint?: string
+  endpoint?: string;
   /** ID of the input file. */
-  readonly inputFileId?: string
+  inputFileId?: string;
   /** Completion window (e.g., `"24h"`). */
-  readonly completionWindow?: string
+  completionWindow?: string;
   /** Current job status. */
-  readonly status?: BatchStatus
+  status?: JsBatchStatus;
   /** ID of the output file (present when completed). */
-  readonly outputFileId?: string
+  outputFileId?: string;
   /** ID of the error file (present if some requests failed). */
-  readonly errorFileId?: string
+  errorFileId?: string;
   /** Unix timestamp of batch creation. */
-  readonly createdAt?: number
+  createdAt?: number;
   /** Unix timestamp of completion (if completed). */
-  readonly completedAt?: number
+  completedAt?: number;
   /** Unix timestamp of failure (if failed). */
-  readonly failedAt?: number
+  failedAt?: number;
   /** Unix timestamp of expiration (if expired). */
-  readonly expiredAt?: number
+  expiredAt?: number;
   /** Request processing counts. */
-  readonly requestCounts?: BatchRequestCounts
+  requestCounts?: JsBatchRequestCounts;
   /** Metadata attached to the batch. */
-  readonly metadata?: JsonValue
+  metadata?: any;
 }
 /** Request processing counts for a batch. */
 export interface BatchRequestCounts {
   /** Total requests in the batch. */
-  readonly total?: number
+  total?: number;
   /** Completed requests. */
-  readonly completed?: number
+  completed?: number;
   /** Failed requests. */
-  readonly failed?: number
+  failed?: number;
 }
 /** Status of a batch job. */
-export declare enum BatchStatus {
+export declare const enum BatchStatus {
   /** Validating the input file. */
   Validating = "validating",
   /** Job failed. */
@@ -257,368 +241,494 @@ export declare enum BatchStatus {
 /** Configuration for budget enforcement. */
 export interface BudgetConfig {
   /** Maximum total spend across all models, in USD.  `None` means unlimited. */
-  readonly globalLimit?: number
+  globalLimit?: number;
   /**
    * Per-model spending limits in USD.  Models not listed here are only
    * constrained by `global_limit`.
    */
-  readonly modelLimits?: Record<string, number>
+  modelLimits?: Record<string, number>;
   /** Whether to reject requests or merely warn when a limit is exceeded. */
-  readonly enforcement?: Enforcement
+  enforcement?: JsEnforcement;
 }
+export declare function budgetConfigDefault(): BudgetConfig;
 /** Storage backend for the response cache. */
-export type CacheBackend =
-  | { type: 'memory' }
-  | { type: 'open_dal'; scheme: string; config: Record<string, string> }
+export interface CacheBackend {
+  type: string;
+  scheme?: string;
+  config?: Record<string, string>;
+}
 /** Configuration for the response cache. */
 export interface CacheConfig {
   /** Maximum number of cached entries. */
-  readonly maxEntries?: number
+  maxEntries?: number;
   /** Time-to-live for each cached entry. */
-  readonly ttl?: number
+  ttl?: number;
   /** Storage backend to use. */
-  readonly backend?: CacheBackend
+  backend?: JsCacheBackend;
 }
+export declare function cacheConfigDefault(): CacheConfig;
+/**
+ * Return the capability flags for a named provider.
+ *
+ * Performs an O(n) linear scan over the embedded registry (142 entries).
+ * Returns an owned value so that bindings can box/copy it across the FFI
+ * boundary without dealing with lifetimes. `ProviderCapabilities` is `Copy`,
+ * so this is a cheap memcpy of seven `bool` fields.
+ *
+ * For unknown `provider_name` values the function returns an all-`false`
+ * sentinel so callers never need to handle `Option`.
+ */
+export declare function capabilities(providerName: string): ProviderCapabilities;
 /** A streamed chunk of a chat completion response. */
 export interface ChatCompletionChunk {
   /** Unique identifier for this stream. */
-  readonly id?: string
+  id?: string;
   /**
    * Always `"chat.completion.chunk"` from OpenAI-compatible APIs.  Stored
    * as a plain `String` so non-standard provider values do not fail parsing.
    */
-  readonly object?: string
+  object?: string;
   /** Unix timestamp of chunk creation. */
-  readonly created?: number
+  created?: number;
   /** Model used to generate the chunk. */
-  readonly model?: string
+  model?: string;
   /** Streaming choices (delta updates). */
-  readonly choices?: Array<StreamChoice>
+  choices?: Array<JsStreamChoice>;
   /** Token usage (typically only in the final chunk). */
-  readonly usage?: Usage
+  usage?: Usage;
   /** Fingerprint of the system configuration (OpenAI-specific). */
-  readonly systemFingerprint?: string
+  systemFingerprint?: string;
   /** Service tier used (OpenAI-specific). */
-  readonly serviceTier?: string
+  serviceTier?: string;
 }
 /** Chat completion request (compatible with OpenAI and similar APIs). */
 export interface ChatCompletionRequest {
   /** Model ID (e.g., `"gpt-4o-mini"`, `"claude-3-5-sonnet"`). */
-  readonly model?: string
+  model?: string;
   /** Conversation history from oldest to newest. */
-  readonly messages?: Array<Message>
+  messages?: Array<JsMessage>;
   /** Sampling temperature in `[0.0, 2.0]`. Higher increases randomness. Defaults to 1.0. */
-  readonly temperature?: number
+  temperature?: number;
   /** Nucleus sampling parameter in `[0.0, 1.0]`. Lower is more focused. */
-  readonly topP?: number
+  topP?: number;
   /** Number of chat completions to generate. Defaults to 1. */
-  readonly n?: number
+  n?: number;
   /**
    * Whether to stream the response.
    *
    * Managed by the client layer — do not set directly.
    */
-  readonly stream?: boolean
+  stream?: boolean;
   /** Stop sequence(s) that halt token generation. */
-  readonly stop?: StopSequence
+  stop?: JsStopSequence;
   /** Max output tokens. Different from max_completion_tokens in some providers. */
-  readonly maxTokens?: number
+  maxTokens?: number;
   /** Presence penalty in `[-2.0, 2.0]`. Positive discourages repeated topics. */
-  readonly presencePenalty?: number
+  presencePenalty?: number;
   /** Frequency penalty in `[-2.0, 2.0]`. Positive discourages repeated tokens. */
-  readonly frequencyPenalty?: number
+  frequencyPenalty?: number;
   /**
    * Token bias map.  Uses `BTreeMap` (sorted keys) for deterministic
    * serialization order — important when hashing or signing requests.
    */
-  readonly logitBias?: Record<string, number>
+  logitBias?: Record<string, number>;
   /** User identifier for request tracking and abuse detection. */
-  readonly user?: string
+  user?: string;
   /** Tools the model can invoke. */
-  readonly tools?: Array<ChatCompletionTool>
+  tools?: Array<ChatCompletionTool>;
   /** Tool usage mode (auto, required, none, or specific tool). */
-  readonly toolChoice?: ToolChoice
+  toolChoice?: JsToolChoice;
   /** Whether the model can call multiple tools in parallel. Defaults to true. */
-  readonly parallelToolCalls?: boolean
+  parallelToolCalls?: boolean;
   /** Output format constraint (text, JSON, JSON schema). */
-  readonly responseFormat?: ResponseFormat
+  responseFormat?: JsResponseFormat;
   /** Streaming options (e.g., include_usage). */
-  readonly streamOptions?: StreamOptions
+  streamOptions?: JsStreamOptions;
   /** Random seed for reproducible outputs. Provider support varies. */
-  readonly seed?: number
+  seed?: number;
   /** Reasoning effort level (low, medium, high) for extended-thinking models. */
-  readonly reasoningEffort?: ReasoningEffort
+  reasoningEffort?: JsReasoningEffort;
   /**
    * Provider-specific extra parameters merged into the request body.
    * Use for guardrails, safety settings, grounding config, etc.
    */
-  readonly extraBody?: JsonValue
+  extraBody?: any;
 }
 /** Chat completion response from the API. */
 export interface ChatCompletionResponse {
   /** Unique identifier for this response. */
-  readonly id?: string
+  id?: string;
   /**
    * Always `"chat.completion"` from OpenAI-compatible APIs.  Stored as a
    * plain `String` so non-standard provider values do not break deserialization.
    */
-  readonly object?: string
+  object?: string;
   /** Unix timestamp of response creation. */
-  readonly created?: number
+  created?: number;
   /** Model used to generate the response. */
-  readonly model?: string
+  model?: string;
   /** List of completion choices. */
-  readonly choices?: Array<Choice>
+  choices?: Array<JsChoice>;
   /** Token usage statistics. */
-  readonly usage?: Usage
+  usage?: Usage;
   /** Fingerprint of the system configuration (OpenAI-specific). */
-  readonly systemFingerprint?: string
+  systemFingerprint?: string;
   /** Service tier used (OpenAI-specific). */
-  readonly serviceTier?: string
+  serviceTier?: string;
 }
 /** A tool the model can invoke (currently, all tools are functions). */
 export interface ChatCompletionTool {
   /** Tool type (always "function" in OpenAI spec). */
-  readonly toolType: ToolType
+  type: JsToolType;
   /** Function definition with name, description, and JSON schema parameters. */
-  readonly function: FunctionDefinition
+  function: JsFunctionDefinition;
 }
+export declare function chatStream(
+  engine: DefaultClient,
+  model: string,
+): Promise<ChatStreamIterator>;
+/**
+ * Assert that `current_len + incoming` does not exceed `limit`.
+ *
+ * Call this before appending `incoming` bytes to any buffer that must
+ * stay below `limit`.  Returns `Err(LiterLlmError.Streaming)` on overflow
+ * and emits a `tracing.warn!` with context.
+ *
+ * # Example
+ */
+export declare function checkBound(
+  context: string,
+  currentLen: number,
+  incoming: number,
+  limit: number,
+): void;
 /** A single completion choice. */
 export interface Choice {
   /** Index of this choice in the choices array. */
-  readonly index?: number
+  index?: number;
   /** The assistant's message response. */
-  readonly message?: AssistantMessage
+  message?: AssistantMessage;
   /** Why the model stopped generating (stop, length, tool_calls, content_filter, etc.). */
-  readonly finishReason?: FinishReason
+  finishReason?: JsFinishReason;
+}
+/** Observable state of a circuit breaker. */
+export declare const enum CircuitState {
+  /** Requests flow through normally. */
+  Closed = "Closed",
+  /** All requests are rejected; the circuit is waiting for the backoff to elapse. */
+  Open = "Open",
+  /** One probe request is allowed through to test service health. */
+  HalfOpen = "HalfOpen",
 }
+/**
+ * Remove all guardrails from the global registry.
+ *
+ * Primarily useful in tests to reset state between test cases.
+ *
+ * # Panics
+ *
+ * Panics if the global registry lock is poisoned.
+ */
+export declare function clear(): void;
+/**
+ * Calculate the estimated cost of a completion given a model name and token
+ * counts.
+ *
+ * Returns `None` if the model is not present in the embedded pricing registry.
+ * Returns `Some(cost_usd)` otherwise, where the value is in US dollars.
+ *
+ * When an exact model name match is not found, progressively shorter prefixes
+ * are tried by stripping from the last `-` or `.` separator.  For example,
+ * `gpt-4-0613` will match `gpt-4` if no `gpt-4-0613` entry exists.
+ *
+ * # Example
+ */
+export declare function completionCost(
+  model: string,
+  promptTokens: number,
+  completionTokens: number,
+): number | null;
+/**
+ * Calculate the estimated cost of a completion, accounting for cached
+ * (cache-hit) prompt tokens billed at the provider's discounted rate.
+ *
+ * `cached_tokens` is the count of prompt tokens served from the provider's
+ * prompt cache. It must be `<= prompt_tokens` (cached tokens are a subset of
+ * the prompt). The non-cached portion is billed at `input_cost_per_token`
+ * and the cached portion at `cache_read_input_token_cost` when the model
+ * has cache pricing; otherwise the entire prompt is billed at the regular
+ * input rate.
+ *
+ * Returns `None` if the model is not present in the embedded pricing
+ * registry, mirroring `completion_cost`.
+ */
+export declare function completionCostWithCache(
+  model: string,
+  promptTokens: number,
+  cachedTokens: number,
+  completionTokens: number,
+): number | null;
+/**
+ * Return the set of complex provider names.
+ *
+ * Complex providers require custom auth/routing logic beyond simple bearer
+ * tokens (e.g. AWS Bedrock SigV4, Vertex AI OAuth2).
+ *
+ * The returned reference points into the static registry — no allocation.
+ */
+export declare function complexProviderNames(): Array<string>;
 /** A single content part in a user message — text, image, document, or audio. */
-export type ContentPart =
-  | { type: 'text'; text: string }
-  | { type: 'image_url'; imageUrl: ImageUrl }
-  | { type: 'document'; document: DocumentContent }
-  | { type: 'input_audio'; inputAudio: AudioContent }
+export interface ContentPart {
+  type: string;
+  text?: string;
+  imageUrl?: ImageUrl;
+  document?: DocumentContent;
+  inputAudio?: AudioContent;
+}
+/**
+ * Count tokens for a full `ChatCompletionRequest`.
+ *
+ * Sums tokens across all message text contents plus a per-message overhead
+ * of ~4 tokens (for role, separators, and formatting metadata). Tool
+ * definitions and multimodal content parts (images, audio, documents) are
+ * not counted — only textual content contributes to the token total.
+ *
+ * # Errors
+ *
+ * Returns `LiterLlmError.BadRequest` if the tokenizer cannot be loaded or
+ * if tokenization fails for any message.
+ */
+export declare function countRequestTokens(
+  model: string,
+  req?: ChatCompletionRequest | undefined | null,
+): number;
+/**
+ * Count tokens in a text string using the tokenizer for the given model.
+ *
+ * The tokenizer is resolved from the model name prefix (e.g. `"gpt-4o"` maps
+ * to the `Xenova/gpt-4o` HuggingFace tokenizer). Tokenizers are cached after
+ * first load.
+ *
+ * # Errors
+ *
+ * Returns `LiterLlmError.BadRequest` if the tokenizer cannot be loaded
+ * (e.g. network failure on first use) or if tokenization itself fails.
+ */
+export declare function countTokens(model: string, text: string): number;
 /** Request to create a batch job. */
 export interface CreateBatchRequest {
   /** ID of the uploaded input file (JSONL format). */
-  readonly inputFileId?: string
+  inputFileId?: string;
   /** API endpoint (e.g., `"/v1/chat/completions"`). */
-  readonly endpoint?: string
+  endpoint?: string;
   /** Completion window (e.g., `"24h"`). */
-  readonly completionWindow?: string
+  completionWindow?: string;
   /** Optional metadata to attach to the batch. */
-  readonly metadata?: JsonValue
+  metadata?: any;
 }
+/**
+ * Create a new LLM client with simple scalar configuration.
+ *
+ * This is the primary binding entry-point. All parameters except `api_key`
+ * are optional — omitting them uses the same defaults as
+ * `ClientConfigBuilder`.
+ *
+ * # Errors
+ *
+ * Returns `LiterLlmError` if the underlying HTTP client cannot be
+ * constructed, or if the resolved provider configuration is invalid.
+ */
+export declare function createClient(
+  apiKey: string,
+  baseUrl?: string | undefined | null,
+  timeoutSecs?: number | undefined | null,
+  maxRetries?: number | undefined | null,
+  modelHint?: string | undefined | null,
+): DefaultClient;
+/**
+ * Create a new LLM client from a JSON string.
+ *
+ * The JSON object accepts the same fields as `liter-llm.toml` (snake_case).
+ *
+ * # Errors
+ *
+ * Returns `LiterLlmError.BadRequest` if `json` is not valid JSON or
+ * contains unknown fields.
+ */
+export declare function createClientFromJson(json: string): DefaultClient;
 /** Request to upload a file. */
 export interface CreateFileRequest {
   /** Base64-encoded file data. */
-  readonly file?: string
+  file?: string;
   /** Purpose for the file. */
-  readonly purpose?: FilePurpose
+  purpose?: JsFilePurpose;
   /** Optional filename to associate with the upload. */
-  readonly filename?: string
+  filename?: string;
 }
 /** Request to create images from a text prompt. */
 export interface CreateImageRequest {
   /** Text description of the image to generate. */
-  readonly prompt?: string
+  prompt?: string;
   /** Model ID (e.g., `"dall-e-3"`). Optional; API may use default if unset. */
-  readonly model?: string
+  model?: string;
   /** Number of images to generate. Defaults to 1. */
-  readonly n?: number
+  n?: number;
   /** Image size (e.g., `"1024x1024"`, `"1792x1024"`). */
-  readonly size?: string
+  size?: string;
   /** Image quality: `"standard"` or `"hd"`. */
-  readonly quality?: string
+  quality?: string;
   /** Style: `"natural"` or `"vivid"` (DALL-E 3 only). */
-  readonly style?: string
+  style?: string;
   /** Response format: `"url"` or `"b64_json"`. */
-  readonly responseFormat?: string
+  responseFormat?: string;
   /** User identifier for request tracking. */
-  readonly user?: string
+  user?: string;
 }
 /** Request to create a structured response. */
 export interface CreateResponseRequest {
   /** Model ID. */
-  readonly model?: string
+  model?: string;
   /** Input data to process (e.g., a document to extract from). */
-  readonly input?: JsonValue
+  input?: any;
   /** Instructions for processing the input. */
-  readonly instructions?: string
+  instructions?: string;
   /** Available tools the model can use. */
-  readonly tools?: Array<ResponseTool>
+  tools?: Array<JsResponseTool>;
   /** Sampling temperature in `[0.0, 2.0]`. Defaults to 1.0. */
-  readonly temperature?: number
+  temperature?: number;
   /** Maximum output tokens. */
-  readonly maxOutputTokens?: number
+  maxOutputTokens?: number;
   /** Optional metadata. */
-  readonly metadata?: JsonValue
+  metadata?: any;
 }
 /** Request to generate speech audio from text. */
 export interface CreateSpeechRequest {
   /** Model ID (e.g., `"tts-1"`, `"tts-1-hd"`). */
-  readonly model?: string
+  model?: string;
   /** Text to synthesize into speech. */
-  readonly input?: string
+  input?: string;
   /** Voice name (e.g., `"alloy"`, `"echo"`, `"fable"`, `"onyx"`, `"nova"`, `"shimmer"`). */
-  readonly voice?: string
+  voice?: string;
   /** Audio format (e.g., `"mp3"`, `"opus"`, `"aac"`, `"flac"`, `"wav"`, `"pcm"`). */
-  readonly responseFormat?: string
+  responseFormat?: string;
   /** Playback speed in `[0.25, 4.0]`. Defaults to 1.0. */
-  readonly speed?: number
+  speed?: number;
 }
 /** Request to transcribe audio into text. */
 export interface CreateTranscriptionRequest {
   /** Model ID (e.g., `"whisper-1"`). */
-  readonly model?: string
+  model?: string;
   /** Base64-encoded audio file data. */
-  readonly file?: string
+  file?: string;
   /** Language ISO-639-1 code (e.g., `"en"`, `"fr"`, `"de"`). Optional; model auto-detects. */
-  readonly language?: string
+  language?: string;
   /** Optional text to guide the model (improves accuracy for domain-specific terms). */
-  readonly prompt?: string
+  prompt?: string;
   /** Output format (e.g., `"json"`, `"text"`, `"vtt"`, `"srt"`, `"verbose_json"`). */
-  readonly responseFormat?: string
+  responseFormat?: string;
   /** Sampling temperature in `[0.0, 1.0]`. Higher increases variability. Defaults to 0. */
-  readonly temperature?: number
+  temperature?: number;
 }
 /** Configuration for registering a custom LLM provider at runtime. */
 export interface CustomProviderConfig {
   /** Unique name for this provider (e.g., "my-provider"). */
-  readonly name: string
+  name: string;
   /** Base URL for the provider's API (e.g., "https://api.my-provider.com/v1"). */
-  readonly baseUrl: string
+  baseUrl: string;
   /** Authentication header format. */
-  readonly authHeader: AuthHeaderFormat
+  authHeader: JsAuthHeaderFormat;
   /** Model name prefixes that route to this provider (e.g., `["my-"]`). */
-  readonly modelPrefixes: Array<string>
-}
-/**
- * Default client implementation backed by `reqwest`.
- *
- * Sends requests to 140+ LLM providers with automatic provider detection
- * and per-request routing. The provider is resolved at construction time
- * from `model_hint` (or defaults to OpenAI), but individual requests can
- * override the provider via model name prefix (e.g. `"anthropic/claude-3-5-sonnet"`
- * routes to Anthropic regardless of construction-time setting).
- *
- * When the model prefix does not match any known provider, the construction-time
- * provider is used as the fallback. This enables seamless migration between
- * providers by changing only the model name.
- *
- * The provider is stored behind an [`Arc`] so it can be shared cheaply into
- * async closures and streaming tasks. Pre-computed auth headers and extra
- * headers are cached at construction to avoid redundant encoding on every request.
- */
-export declare class DefaultClient {
-  chat(req?: ChatCompletionRequest | undefined | null): Promise<ChatCompletionResponse>
-  chatStream(req?: ChatCompletionRequest | undefined | null): Promise<AsyncGenerator<ChatCompletionChunk, void, undefined>>
-  embed(req?: EmbeddingRequest | undefined | null): Promise<EmbeddingResponse>
-  listModels(): Promise<ModelsListResponse>
-  imageGenerate(req?: CreateImageRequest | undefined | null): Promise<ImagesResponse>
-  speech(req?: CreateSpeechRequest | undefined | null): Promise<Uint8Array>
-  transcribe(req?: CreateTranscriptionRequest | undefined | null): Promise<TranscriptionResponse>
-  moderate(req?: ModerationRequest | undefined | null): Promise<ModerationResponse>
-  rerank(req?: RerankRequest | undefined | null): Promise<RerankResponse>
-  search(req?: SearchRequest | undefined | null): Promise<SearchResponse>
-  ocr(req?: OcrRequest | undefined | null): Promise<OcrResponse>
-  createFile(req?: CreateFileRequest | undefined | null): Promise<FileObject>
-  retrieveFile(fileId: string): Promise<FileObject>
-  deleteFile(fileId: string): Promise<DeleteResponse>
-  listFiles(query?: FileListQuery | undefined | null): Promise<FileListResponse>
-  fileContent(fileId: string): Promise<Uint8Array>
-  createBatch(req?: CreateBatchRequest | undefined | null): Promise<BatchObject>
-  retrieveBatch(batchId: string): Promise<BatchObject>
-  listBatches(query?: BatchListQuery | undefined | null): Promise<BatchListResponse>
-  cancelBatch(batchId: string): Promise<BatchObject>
-  createResponse(req?: CreateResponseRequest | undefined | null): Promise<ResponseObject>
-  retrieveResponse(responseId: string): Promise<ResponseObject>
-  cancelResponse(responseId: string): Promise<ResponseObject>
+  modelPrefixes: Array<string>;
 }
 /** Response from a delete operation. */
 export interface DeleteResponse {
   /** ID of the deleted resource. */
-  readonly id?: string
+  id?: string;
   /** Object type. */
-  readonly object?: string
+  object?: string;
   /** Confirmation that the resource was deleted. */
-  readonly deleted?: boolean
+  deleted?: boolean;
 }
 /** Developer message (system-like message for Claude models). */
 export interface DeveloperMessage {
   /** Developer-specific instructions or context. */
-  readonly content?: string
+  content?: string;
   /** Optional name for the developer message source. */
-  readonly name?: string
+  name?: string;
 }
 /** PDF/document content part for vision-capable models. */
 export interface DocumentContent {
   /** Base64-encoded document data or URL. */
-  readonly data?: string
+  data?: string;
   /** MIME type (e.g., "application/pdf", "text/csv"). */
-  readonly mediaType?: string
+  mediaType?: string;
 }
 /** The format in which the embedding vectors are returned. */
-export declare enum EmbeddingFormat {
+export declare const enum EmbeddingFormat {
   /** 32-bit floating-point numbers (default). */
   Float = "float",
   /** Base64-encoded string representation of the floats. */
   Base64 = "base64",
 }
-/** Text or texts to embed. */
-export declare enum EmbeddingInput {
-  /** Single text string. */
-  Single = "Single",
-  /** Multiple text strings (batch embedding). */
-  Multiple = "Multiple",
-}
 /** A single embedding vector. */
 export interface EmbeddingObject {
   /**
    * Always `"embedding"` from OpenAI-compatible APIs.  Stored as a plain
    * `String` so non-standard provider values do not break deserialization.
    */
-  readonly object: string
+  object: string;
   /** The embedding vector. */
-  readonly embedding: Array<number>
+  embedding: Array<number>;
   /** Index in the batch (corresponds to input order). */
-  readonly index: number
+  index: number;
 }
 /** Embedding request. */
 export interface EmbeddingRequest {
   /** Model ID (e.g., `"text-embedding-3-small"`). */
-  readonly model?: string
+  model?: string;
   /** Text or texts to embed. */
-  readonly input?: EmbeddingInput
+  input?: JsEmbeddingInput;
   /** Output format: float (native) or base64. */
-  readonly encodingFormat?: EmbeddingFormat
+  encodingFormat?: JsEmbeddingFormat;
   /** Requested embedding dimensions (if supported by the model). */
-  readonly dimensions?: number
+  dimensions?: number;
   /** User identifier for request tracking. */
-  readonly user?: string
+  user?: string;
 }
 /** Embedding response. */
@@ -627,69 +737,89 @@ export interface EmbeddingResponse {
    * Always `"list"` from OpenAI-compatible APIs.  Stored as a plain
    * `String` so non-standard provider values do not break deserialization.
    */
-  readonly object: string
+  object: string;
   /** List of embeddings. */
-  readonly data: Array<EmbeddingObject>
+  data: Array<JsEmbeddingObject>;
   /** Model used to generate embeddings. */
-  readonly model: string
+  model: string;
   /** Token usage (input tokens only; embeddings have zero output tokens). */
-  readonly usage?: Usage
+  usage?: Usage;
 }
 /** How budget limits are enforced. */
-export declare enum Enforcement {
+export declare const enum Enforcement {
   /**
    * Reject requests that would exceed the budget with
-   * [`LiterLlmError::BudgetExceeded`].
+   * `LiterLlmError.BudgetExceeded`.
    */
   Hard = "Hard",
   /**
-   * Allow requests through but emit a `tracing::warn!` when the budget is
+   * Allow requests through but emit a `tracing.warn!` when the budget is
    * exceeded.
    */
   Soft = "Soft",
 }
+/**
+ * Install the `ring` crypto provider as the rustls process default, idempotently.
+ *
+ * rustls 0.23+ removed the implicit default provider. This function installs
+ * `ring` once per process. Subsequent calls are no-ops. Calling it from a
+ * downstream Rust app that has already installed `aws-lc-rs` is safe — the
+ * `Err` from `install_default()` is silently ignored.
+ *
+ * Called automatically by every internal `reqwest.Client` constructor
+ * (auth providers, default HTTP client). Bindings and downstream consumers
+ * reach those constructors transitively, so no manual init is required.
+ *
+ * WASM builds are exempt — the WASM target uses the browser/Node.js fetch
+ * API instead of rustls, so no crypto provider is needed.
+ *
+ * Windows builds use native-tls (SChannel) via reqwest, so rustls is not
+ * present and no crypto provider installation is needed.
+ */
+export declare function ensureCryptoProvider(): void;
 /** Query parameters for listing files. */
 export interface FileListQuery {
   /** Filter by file purpose (e.g., `"batch"`, `"fine-tune"`). */
-  readonly purpose?: string
+  purpose?: string;
   /** Maximum number of results to return. Defaults to 20. */
-  readonly limit?: number
+  limit?: number;
   /** Pagination cursor: return results after this file ID. */
-  readonly after?: string
+  after?: string;
 }
 /** Response from listing files. */
 export interface FileListResponse {
   /** Object type (always `"list"`). */
-  readonly object?: string
+  object?: string;
   /** List of file objects. */
-  readonly data?: Array<FileObject>
+  data?: Array<FileObject>;
   /** Whether more results are available. */
-  readonly hasMore?: boolean
+  hasMore?: boolean;
 }
 /** An uploaded file object. */
 export interface FileObject {
   /** Unique file ID. */
-  readonly id?: string
+  id?: string;
   /** Object type (always `"file"`). */
-  readonly object?: string
+  object?: string;
   /** File size in bytes. */
-  readonly bytes?: number
+  bytes?: number;
   /** Unix timestamp of file creation. */
-  readonly createdAt?: number
+  createdAt?: number;
   /** Filename. */
-  readonly filename?: string
+  filename?: string;
   /** File purpose. */
-  readonly purpose?: string
+  purpose?: string;
   /** Processing status (e.g., `"uploaded"`, `"processed"`). */
-  readonly status?: string
+  status?: string;
 }
 /** Purpose of an uploaded file. */
-export declare enum FilePurpose {
+export declare const enum FilePurpose {
   /** File for use with Assistants API. */
   Assistants = "assistants",
   /** File for batch processing. */
@@ -701,7 +831,7 @@ export declare enum FilePurpose {
 }
 /** Why a choice stopped generating tokens. */
-export declare enum FinishReason {
+export declare const enum FinishReason {
   Stop = "stop",
   Length = "length",
   ToolCalls = "tool_calls",
@@ -723,41 +853,49 @@ export declare enum FinishReason {
 /** Function call details. */
 export interface FunctionCall {
   /** Function name. */
-  readonly name: string
-  /** Arguments as a JSON string (parse with serde_json::from_str). */
-  readonly arguments: string
+  name: string;
+  /** Arguments as a JSON string (parse with serde_json.from_str). */
+  arguments: string;
 }
 /** Function definition exposed to the model. */
 export interface FunctionDefinition {
   /** Name of the function. Required and must be alphanumeric + underscores. */
-  readonly name: string
+  name: string;
   /** Human-readable description explaining what the function does. */
-  readonly description?: string
+  description?: string;
   /** JSON Schema defining the function's parameters. */
-  readonly parameters?: JsonValue
+  parameters?: any;
   /** If true, enforce strict JSON schema validation for arguments. */
-  readonly strict?: boolean
+  strict?: boolean;
 }
 /** Deprecated legacy function-role message body. */
 export interface FunctionMessage {
-  readonly content?: string
-  readonly name?: string
+  content?: string;
+  name?: string;
+}
+/** The result of a single health probe. */
+export declare const enum HealthStatus {
+  /** The probe succeeded; the upstream is reachable. */
+  Healthy = "Healthy",
+  /** The probe failed; the upstream may be down. */
+  Unhealthy = "Unhealthy",
 }
 /** A single generated image, returned as either a URL or base64 data. */
 export interface Image {
   /** Image URL (if response_format was "url"). */
-  readonly url?: string
+  url?: string;
   /** Base64-encoded image data (if response_format was "b64_json"). */
-  readonly b64Json?: string
+  b64Json?: string;
   /** The final prompt used to generate the image (DALL-E 3). */
-  readonly revisedPrompt?: string
+  revisedPrompt?: string;
 }
 /** Image detail level controlling token cost and processing. */
-export declare enum ImageDetail {
+export declare const enum ImageDetail {
   /** Low detail: scales image to 512x512, uses fewer tokens. */
   Low = "low",
   /** High detail: processes up to 2x2 grid of tiles, higher token cost. */
@@ -769,53 +907,65 @@ export declare enum ImageDetail {
 /** Response containing generated images. */
 export interface ImagesResponse {
   /** Unix timestamp of image creation. */
-  readonly created?: number
+  created?: number;
   /** List of generated images. */
-  readonly data?: Array<Image>
+  data?: Array<JsImage>;
 }
 /** An image URL reference with optional detail level for processing. */
 export interface ImageUrl {
   /** URL of the image (data URI or HTTP/HTTPS URL). */
-  readonly url?: string
+  url?: string;
   /** Detail level: low (512x512), high (2x2 tiles), or auto (model-selected). */
-  readonly detail?: ImageDetail
+  detail?: JsImageDetail;
+}
+/** An intent prototype: `(intent_name, prototype_embedding, target_model_id)`. */
+export interface IntentPrototype {
+  /** Human-readable name for the intent (used in logs/metrics). */
+  name: string;
+  /** Pre-computed embedding vector for this intent. */
+  embedding: Array<number>;
+  /** Model to route to when this intent is detected. */
+  model: string;
 }
 /** JSON Schema specification for constrained output. */
 export interface JsonSchemaFormat {
   /** Name of the schema (must be unique in the request). */
-  readonly name?: string
+  name?: string;
   /** Description of what the schema represents. */
-  readonly description?: string
+  description?: string;
   /** JSON Schema object defining the output structure. */
-  readonly schema?: JsonValue
+  schema?: any;
   /** If true, enforce strict schema validation. */
-  readonly strict?: boolean
+  strict?: boolean;
 }
 /** A chat message in a conversation. */
-export type Message =
-  | { role: 'system'; 0: SystemMessage }
-  | { role: 'user'; 0: UserMessage }
-  | { role: 'assistant'; 0: AssistantMessage }
-  | { role: 'tool'; 0: ToolMessage }
-  | { role: 'developer'; 0: DeveloperMessage }
-  | { role: 'function'; 0: FunctionMessage }
+export interface Message {
+  role: string;
+  system?: SystemMessage;
+  user?: UserMessage;
+  assistant?: AssistantMessage;
+  tool?: ToolMessage;
+  developer?: DeveloperMessage;
+  function?: FunctionMessage;
+}
 /** A model available from the API. */
 export interface ModelObject {
   /** Model ID (e.g., `"gpt-4o"`, `"claude-3-5-sonnet"`). */
-  readonly id?: string
+  id?: string;
   /**
    * Always `"model"` from OpenAI-compatible APIs.  Stored as a plain
    * `String` so non-standard provider values do not break deserialization.
    */
-  readonly object?: string
+  object?: string;
   /** Unix timestamp of model creation (or release date). */
-  readonly created?: number
+  created?: number;
   /** Organization or entity that owns the model. */
-  readonly ownedBy?: string
+  ownedBy?: string;
 }
 /** Response listing available models. */
@@ -824,444 +974,482 @@ export interface ModelsListResponse {
    * Always `"list"` from OpenAI-compatible APIs.  Stored as a plain
    * `String` so non-standard provider values do not break deserialization.
    */
-  readonly object?: string
+  object?: string;
   /** List of available models. */
-  readonly data?: Array<ModelObject>
+  data?: Array<JsModelObject>;
 }
 /** Boolean flags for each moderation category. */
 export interface ModerationCategories {
   /** Sexual content. */
-  readonly sexual?: boolean
+  sexual?: boolean;
   /** Hate speech. */
-  readonly hate?: boolean
+  hate?: boolean;
   /** Harassment. */
-  readonly harassment?: boolean
+  harassment?: boolean;
   /** Self-harm content. */
-  readonly selfHarm?: boolean
+  "self-harm"?: boolean;
   /** Sexual content involving minors. */
-  readonly sexualMinors?: boolean
+  "sexual/minors"?: boolean;
   /** Hate speech that threatens violence. */
-  readonly hateThreatening?: boolean
+  "hate/threatening"?: boolean;
   /** Graphic violence. */
-  readonly violenceGraphic?: boolean
+  "violence/graphic"?: boolean;
   /** Intent to self-harm. */
-  readonly selfHarmIntent?: boolean
+  "self-harm/intent"?: boolean;
   /** Instructions for self-harm. */
-  readonly selfHarmInstructions?: boolean
+  "self-harm/instructions"?: boolean;
   /** Harassment that threatens violence. */
-  readonly harassmentThreatening?: boolean
+  "harassment/threatening"?: boolean;
   /** Non-graphic violence. */
-  readonly violence?: boolean
+  violence?: boolean;
 }
 /** Confidence scores for each moderation category. */
 export interface ModerationCategoryScores {
   /** Sexual content score. */
-  readonly sexual?: number
+  sexual?: number;
   /** Hate speech score. */
-  readonly hate?: number
+  hate?: number;
   /** Harassment score. */
-  readonly harassment?: number
+  harassment?: number;
   /** Self-harm content score. */
-  readonly selfHarm?: number
+  "self-harm"?: number;
   /** Sexual content involving minors score. */
-  readonly sexualMinors?: number
+  "sexual/minors"?: number;
   /** Hate speech that threatens violence score. */
-  readonly hateThreatening?: number
+  "hate/threatening"?: number;
   /** Graphic violence score. */
-  readonly violenceGraphic?: number
+  "violence/graphic"?: number;
   /** Intent to self-harm score. */
-  readonly selfHarmIntent?: number
+  "self-harm/intent"?: number;
   /** Instructions for self-harm score. */
-  readonly selfHarmInstructions?: number
+  "self-harm/instructions"?: number;
   /** Harassment that threatens violence score. */
-  readonly harassmentThreatening?: number
+  "harassment/threatening"?: number;
   /** Non-graphic violence score. */
-  readonly violence?: number
-}
-/** Input to the moderation endpoint — a single string or multiple strings. */
-export declare enum ModerationInput {
-  /** Single text string. */
-  Single = "Single",
-  /** Multiple text strings (batch moderation). */
-  Multiple = "Multiple",
+  violence?: number;
 }
 /** Request to classify content for policy violations. */
 export interface ModerationRequest {
   /** Text or texts to check. */
-  readonly input?: ModerationInput
+  input?: JsModerationInput;
   /** Model ID (e.g., `"text-moderation-latest"`). Optional; API uses default if unset. */
-  readonly model?: string
+  model?: string;
 }
 /** Response from the moderation endpoint. */
 export interface ModerationResponse {
   /** Unique identifier for this moderation request. */
-  readonly id: string
+  id: string;
   /** Model used for classification. */
-  readonly model: string
+  model: string;
   /** Results for each input string. */
-  readonly results: Array<ModerationResult>
+  results: Array<JsModerationResult>;
 }
 /** A single moderation classification result. */
 export interface ModerationResult {
   /** True if any category was flagged. */
-  readonly flagged: boolean
+  flagged: boolean;
   /** Boolean flags for each moderation category. */
-  readonly categories: ModerationCategories
+  categories: JsModerationCategories;
   /** Confidence scores for each category. */
-  readonly categoryScores: ModerationCategoryScores
+  categoryScores: JsModerationCategoryScores;
 }
 /** Document input for OCR — either a URL or inline base64 data. */
-export type OcrDocument =
-  | { type: 'document_url'; url: string }
-  | { type: 'base64'; data: string; mediaType: string }
+export interface OcrDocument {
+  type: string;
+  url?: string;
+  data?: string;
+  mediaType?: string;
+}
 /** An image extracted from an OCR page. */
 export interface OcrImage {
   /** Unique image identifier within the document. */
-  readonly id: string
+  id: string;
   /** Base64-encoded image data (if `include_image_base64` was true). */
-  readonly imageBase64?: string
+  imageBase64?: string;
 }
 /** A single page of OCR output. */
 export interface OcrPage {
   /** Page index (0-based). */
-  readonly index: number
+  index: number;
   /** Extracted page content as Markdown. */
-  readonly markdown: string
+  markdown: string;
   /** Embedded images extracted from the page (if `include_image_base64` was true). */
-  readonly images?: Array<OcrImage>
+  images?: Array<JsOcrImage>;
   /** Page dimensions in pixels, if available. */
-  readonly dimensions?: PageDimensions
+  dimensions?: JsPageDimensions;
 }
 /** An OCR request. */
 export interface OcrRequest {
   /** The model/provider to use (e.g. `"mistral/mistral-ocr-latest"`). */
-  readonly model?: string
+  model?: string;
   /** The document to process (URL or base64). */
-  readonly document?: OcrDocument
+  document?: JsOcrDocument;
   /** Specific pages to process (1-indexed). `None` means all pages. */
-  readonly pages?: Array<number>
+  pages?: Array<number>;
   /** Whether to include base64-encoded images of each processed page. */
-  readonly includeImageBase64?: boolean
+  includeImageBase64?: boolean;
 }
 /** An OCR response. */
 export interface OcrResponse {
   /** Extracted pages in order. */
-  readonly pages: Array<OcrPage>
+  pages: Array<JsOcrPage>;
   /** Model/provider used for OCR. */
-  readonly model: string
+  model: string;
   /** Token usage, if reported by the provider. */
-  readonly usage?: Usage
+  usage?: Usage;
 }
 /** Page dimensions in pixels. */
 export interface PageDimensions {
   /** Width in pixels. */
-  readonly width: number
+  width: number;
   /** Height in pixels. */
-  readonly height: number
+  height: number;
 }
 /**
  * Breakdown of tokens used in the prompt portion of a request.
  *
- * `cached_tokens` is included in `Usage::prompt_tokens` — it is *not* an
+ * `cached_tokens` is included in `Usage.prompt_tokens` — it is *not* an
  * additional charge on top of the prompt token count. When pricing supports
  * a `cache_read_input_token_cost`, the cached portion is billed at the
  * discounted rate and the remainder at the regular input rate.
  */
 export interface PromptTokensDetails {
   /** Cached tokens present in the prompt. Defaults to 0 when absent. */
-  readonly cachedTokens?: number
+  cachedTokens?: number;
   /** Audio input tokens present in the prompt. Defaults to 0 when absent. */
-  readonly audioTokens?: number
+  audioTokens?: number;
+}
+/**
+ * Static capability flags for a provider.
+ *
+ * Each flag indicates whether the provider's models *generally* support that
+ * feature.  For providers that aggregate many underlying models (e.g. Bedrock,
+ * OpenRouter, vLLM) the flags reflect the superset of available model
+ * capabilities — a flag being `true` means at least one model supports the
+ * feature, not every model.
+ *
+ * All flags default to `false` so that newly added providers are safe.
+ *
+ * Access via the crate-level `capabilities` function:
+ */
+export interface ProviderCapabilities {
+  /** The provider accepts image input in chat messages. */
+  vision?: boolean;
+  /** The provider supports extended-thinking / reasoning tokens. */
+  reasoning?: boolean;
+  /** The provider supports JSON-mode or `response_format` structured output. */
+  structuredOutput?: boolean;
+  /** The provider supports tool / function calling. */
+  functionCalling?: boolean;
+  /** The provider accepts audio as input. */
+  audioIn?: boolean;
+  /** The provider can generate audio / TTS output. */
+  audioOut?: boolean;
+  /** The provider accepts video as input. */
+  videoIn?: boolean;
 }
-/** Static configuration for a single provider entry in providers.json. */
+/**
+ * Static configuration for a single provider entry in providers.json.
+ *
+ * This struct deliberately does not include capability flags or streaming
+ * format, which are accessed via the `capabilities` function.  Keeping
+ * these fields separate preserves backward compatibility with all generated
+ * binding code that constructs `ProviderConfig` using struct literal syntax.
+ */
 export interface ProviderConfig {
   /** Provider identifier (matches the entry key in providers.json). */
-  readonly name: string
+  name: string;
   /** Human-readable provider name shown in UIs. */
-  readonly displayName?: string
+  displayName?: string;
   /** Base URL used as the default for this provider's HTTP client. */
-  readonly baseUrl?: string
+  baseUrl?: string;
   /** Authentication scheme metadata (auth type + env var holding the key). */
-  readonly auth?: AuthConfig
+  auth?: JsAuthConfig;
   /** Supported endpoint kinds (e.g. `chat`, `embeddings`). */
-  readonly endpoints?: Array<string>
+  endpoints?: Array<string>;
   /** Model-name prefixes claimed by this provider (e.g. `["gpt-", "o1-"]`). */
-  readonly modelPrefixes?: Array<string>
+  modelPrefixes?: Array<string>;
   /**
    * Parameter key renaming for this provider.
    *
    * Each entry maps an OpenAI-spec field name (e.g. `"max_completion_tokens"`)
    * to the name this provider expects (e.g. `"max_tokens"`).  Applied
-   * automatically by [`ConfigDrivenProvider::transform_request`].
+   * automatically by `ConfigDrivenProvider.transform_request`.
    */
-  readonly paramMappings?: Record<string, string>
+  paramMappings?: Record<string, string>;
 }
 /** Configuration for per-model rate limits. */
 export interface RateLimitConfig {
   /** Maximum requests per window.  `None` means unlimited. */
-  readonly rpm?: number
+  rpm?: number;
   /** Maximum tokens per window.  `None` means unlimited. */
-  readonly tpm?: number
+  tpm?: number;
   /** Fixed window duration (defaults to 60 s). */
-  readonly window?: number
+  window?: number;
 }
+export declare function rateLimitConfigDefault(): RateLimitConfig;
 /** Controls how much reasoning effort the model should use. */
-export declare enum ReasoningEffort {
+export declare const enum ReasoningEffort {
   Low = "low",
   Medium = "medium",
   High = "high",
 }
-/** A document to be reranked — either a plain string or an object with a text field. */
-export declare enum RerankDocument {
-  /** Plain text document content. */
-  Text = "Text",
-  /** Document with explicit text field (may include metadata). */
-  Object = "Object",
-}
+/**
+ * Register a custom provider in the global runtime registry.
+ *
+ * The provider will be checked **before** all built-in providers during model
+ * detection. If a provider with the same `name` already exists it is replaced.
+ *
+ * # Errors
+ *
+ * Returns an error if the config is invalid (empty name, empty base_url, or
+ * no model prefixes).
+ */
+export declare function registerCustomProvider(config: CustomProviderConfig): void;
 /** Request to rerank documents by relevance to a query. */
 export interface RerankRequest {
   /** Model ID (e.g., `"cohere/rerank-english-v3.0"`). */
-  readonly model?: string
+  model?: string;
   /** The search query. */
-  readonly query?: string
+  query?: string;
   /** Documents to rerank. */
-  readonly documents?: Array<RerankDocument>
+  documents?: Array<JsRerankDocument>;
   /** Return only the top N results. Optional. */
-  readonly topN?: number
+  topN?: number;
   /** Include the document content in results. Defaults to false. */
-  readonly returnDocuments?: boolean
+  returnDocuments?: boolean;
 }
 /** Response from the rerank endpoint. */
 export interface RerankResponse {
   /** Unique identifier for this rerank request. */
-  readonly id?: string
+  id?: string;
   /** Reranked documents in order of relevance. */
-  readonly results: Array<RerankResult>
+  results: Array<JsRerankResult>;
   /** Optional metadata about the reranking operation. */
-  readonly meta?: JsonValue
+  meta?: any;
 }
 /** A single reranked document with its relevance score. */
 export interface RerankResult {
   /** Original document index in the input list. */
-  readonly index: number
+  index: number;
   /** Relevance score in `[0, 1]`. Higher indicates more relevant. */
-  readonly relevanceScore: number
+  relevanceScore: number;
   /** Original document content (if `return_documents` was true). */
-  readonly document?: RerankResultDocument
+  document?: JsRerankResultDocument;
 }
 /** The text content of a reranked document, returned when `return_documents` is true. */
 export interface RerankResultDocument {
   /** Document text. */
-  readonly text: string
+  text: string;
 }
 /** Response format constraint. */
-export type ResponseFormat =
-  | { type: 'text' }
-  | { type: 'json_object' }
-  | { type: 'json_schema'; jsonSchema: JsonSchemaFormat }
+export interface ResponseFormat {
+  type: string;
+  jsonSchema?: JsonSchemaFormat;
+}
 /** Response from a structured response request. */
 export interface ResponseObject {
   /** Unique response ID. */
-  readonly id?: string
+  id?: string;
   /** Object type (e.g., `"response"`). */
-  readonly object?: string
+  object?: string;
   /** Unix timestamp of response creation. */
-  readonly createdAt?: number
+  createdAt?: number;
   /** Model used to generate the response. */
-  readonly model?: string
+  model?: string;
   /** Status (e.g., `"succeeded"`, `"failed"`). */
-  readonly status?: string
+  status?: string;
   /** Output items from the response. */
-  readonly output?: Array<ResponseOutputItem>
+  output?: Array<JsResponseOutputItem>;
   /** Token usage. */
-  readonly usage?: ResponseUsage
+  usage?: JsResponseUsage;
   /** Error details (if status is "failed"). */
-  readonly error?: JsonValue
+  error?: any;
 }
 /** A single output item from the response. */
 export interface ResponseOutputItem {
   /** Output type (e.g., `"text"`, `"object"`, `"error"`). */
-  readonly itemType?: string
+  type?: string;
   /** Output content (flattened into the object). */
-  readonly content?: JsonValue
+  content?: any;
 }
 /** A tool available for the response request. */
 export interface ResponseTool {
   /** Tool type (e.g., "extractor", "search"). */
-  readonly toolType?: string
+  type?: string;
   /** Tool configuration (flattened into the object). */
-  readonly config?: JsonValue
+  config?: any;
 }
 /** Token usage for a response. */
 export interface ResponseUsage {
   /** Input tokens used. */
-  readonly inputTokens?: number
+  inputTokens?: number;
   /** Output tokens used. */
-  readonly outputTokens?: number
+  outputTokens?: number;
   /** Total tokens used. */
-  readonly totalTokens?: number
+  totalTokens?: number;
 }
 /** A search request. */
 export interface SearchRequest {
   /** The model/provider to use (e.g. `"brave/web-search"`, `"tavily/search"`). */
-  readonly model?: string
+  model?: string;
   /** The search query string. */
-  readonly query?: string
+  query?: string;
   /** Maximum number of results to return. */
-  readonly maxResults?: number
+  maxResults?: number;
   /** Domain filter — restrict results to specific domains. */
-  readonly searchDomainFilter?: Array<string>
+  searchDomainFilter?: Array<string>;
   /** Country code for localized results (ISO 3166-1 alpha-2, e.g., `"US"`, `"FR"`). */
-  readonly country?: string
+  country?: string;
 }
 /** A search response. */
 export interface SearchResponse {
   /** List of search results. */
-  readonly results: Array<SearchResult>
+  results: Array<JsSearchResult>;
   /** Model/provider that performed the search. */
-  readonly model: string
+  model: string;
 }
 /** An individual search result. */
 export interface SearchResult {
   /** Result title. */
-  readonly title: string
+  title: string;
   /** Result URL. */
-  readonly url: string
+  url: string;
   /** Text snippet or excerpt from the page. */
-  readonly snippet: string
+  snippet: string;
   /** Publication or last-updated date, if available. */
-  readonly date?: string
+  date?: string;
 }
 /** Name of the specific function to invoke. */
 export interface SpecificFunction {
   /** Function name. */
-  readonly name?: string
+  name?: string;
 }
 /** Directive to call a specific tool. */
 export interface SpecificToolChoice {
   /** Tool type (always "function"). */
-  readonly choiceType?: ToolType
+  type?: JsToolType;
   /** The specific function to invoke. */
-  readonly function?: SpecificFunction
-}
-/** Stop sequence(s) that cause the model to stop generating. */
-export declare enum StopSequence {
-  /** Single stop sequence. */
-  Single = "Single",
-  /** Multiple stop sequences. */
-  Multiple = "Multiple",
+  function?: JsSpecificFunction;
 }
 /** A streaming choice with incremental delta. */
 export interface StreamChoice {
   /** Index of this choice in the choices array. */
-  readonly index?: number
+  index?: number;
   /** Incremental update to the message (content, tool calls, etc.). */
-  readonly delta?: StreamDelta
+  delta?: JsStreamDelta;
   /** Why the stream ended (present only in final chunk). */
-  readonly finishReason?: FinishReason
+  finishReason?: JsFinishReason;
 }
 /** Incremental delta in a stream chunk. */
 export interface StreamDelta {
   /** Role (typically present only in the first chunk). */
-  readonly role?: string
+  role?: string;
   /** Partial content chunk (e.g., a few words of the response). */
-  readonly content?: string
+  content?: string;
   /** Partial tool calls being streamed. */
-  readonly toolCalls?: Array<StreamToolCall>
+  toolCalls?: Array<JsStreamToolCall>;
   /** Deprecated legacy function_call delta; retained for API compatibility. */
-  readonly functionCall?: StreamFunctionCall
+  functionCall?: JsStreamFunctionCall;
   /** Partial refusal message. */
-  readonly refusal?: string
+  refusal?: string;
+}
+/**
+ * The streaming wire format a provider uses for its response stream.
+ *
+ * Most providers use standard Server-Sent Events (SSE).  AWS Bedrock uses
+ * a proprietary binary EventStream framing.
+ *
+ * Deserialized from the `streaming_format` JSON field via `serde`.
+ */
+export declare const enum StreamFormat {
+  /** Standard Server-Sent Events (text/event-stream). */
+  Sse = "sse",
+  /** AWS EventStream binary framing (application/vnd.amazon.eventstream). */
+  AwsEventStream = "aws_event_stream",
 }
 /** Partial function call details in a stream. */
 export interface StreamFunctionCall {
   /** Function name (typically in the first chunk). */
-  readonly name?: string
+  name?: string;
   /** Partial JSON arguments chunk. */
-  readonly arguments?: string
+  arguments?: string;
 }
 /** Options for streaming responses. */
 export interface StreamOptions {
   /** If true, include token usage in the final stream chunk. */
-  readonly includeUsage?: boolean
+  includeUsage?: boolean;
 }
 /** A streaming tool call being built incrementally. */
 export interface StreamToolCall {
   /** Index of this tool call in the tool_calls array. */
-  readonly index?: number
+  index?: number;
   /** Tool call ID (typically in the first chunk for this call). */
-  readonly id?: string
+  id?: string;
   /** Tool type (typically "function"). */
-  readonly callType?: ToolType
+  type?: JsToolType;
   /** Partial function name and arguments. */
-  readonly function?: StreamFunctionCall
+  function?: JsStreamFunctionCall;
 }
 /** System message guiding model behavior for the entire conversation. */
 export interface SystemMessage {
   /** Instructions or context that apply throughout the conversation. */
-  readonly content?: string
+  content?: string;
   /** Optional name for the system message source. */
-  readonly name?: string
+  name?: string;
 }
 /** A tool call the model wants to execute. */
 export interface ToolCall {
   /** Unique ID for this call, used to reference in tool result messages. */
-  readonly id: string
+  id: string;
   /** Tool type (always "function"). */
-  readonly callType: ToolType
+  type: JsToolType;
   /** Function name and arguments. */
-  readonly function: FunctionCall
-}
-/** Tool usage mode or a specific tool to call. */
-export declare enum ToolChoice {
-  /** Predefined mode: auto, required, or none. */
-  Mode = "Mode",
-  /** Force a specific tool to be called. */
-  Specific = "Specific",
+  function: JsFunctionCall;
 }
 /** Tool choice mode. */
-export declare enum ToolChoiceMode {
+export declare const enum ToolChoiceMode {
   /** Model may or may not call tools; default behavior. */
   Auto = "auto",
   /** Model must call at least one tool. */
@@ -1273,11 +1461,11 @@ export declare enum ToolChoiceMode {
 /** Tool execution result returned to the model. */
 export interface ToolMessage {
   /** Result of the tool execution. */
-  readonly content?: string
+  content?: string;
   /** ID of the tool call this result responds to. */
-  readonly toolCallId?: string
+  toolCallId?: string;
   /** Optional tool/function name. */
-  readonly name?: string
+  name?: string;
 }
 /**
@@ -1287,92 +1475,85 @@ export interface ToolMessage {
  * that constraint at the type level and rejects any other value on
  * deserialization.
  */
-export declare enum ToolType {
+export declare const enum ToolType {
   Function = "function",
 }
 /** Response from a transcription request. */
 export interface TranscriptionResponse {
   /** The transcribed text. */
-  readonly text?: string
+  text?: string;
   /** Detected language (ISO-639-1 code). */
-  readonly language?: string
+  language?: string;
   /** Total audio duration in seconds. */
-  readonly duration?: number
+  duration?: number;
   /** Detailed segment-level transcription (if response_format is "verbose_json"). */
-  readonly segments?: Array<TranscriptionSegment>
+  segments?: Array<JsTranscriptionSegment>;
 }
 /** A segment of transcribed audio with timing information. */
 export interface TranscriptionSegment {
   /** Segment index (0-based). */
-  readonly id?: number
+  id?: number;
   /** Start time in seconds. */
-  readonly start?: number
+  start?: number;
   /** End time in seconds. */
-  readonly end?: number
+  end?: number;
   /** Transcribed text for this segment. */
-  readonly text?: string
+  text?: string;
 }
+/**
+ * Remove a previously registered custom provider by name.
+ *
+ * Returns `true` if a provider with the given name was found and removed,
+ * `false` if no such provider existed.
+ *
+ * # Errors
+ *
+ * Returns an error only if the internal lock is poisoned.
+ */
+export declare function unregisterCustomProvider(name: string): boolean;
 /** Token-usage accounting returned by the provider on each completion / embedding call. */
 export interface Usage {
   /** Prompt tokens used. Defaults to 0 when absent (some providers omit this). */
-  readonly promptTokens?: number
+  promptTokens?: number;
   /** Completion tokens used. Defaults to 0 when absent (e.g. embedding responses). */
-  readonly completionTokens?: number
+  completionTokens?: number;
   /** Total tokens used. Defaults to 0 when absent (some providers omit this). */
-  readonly totalTokens?: number
+  totalTokens?: number;
   /**
    * Breakdown of tokens used in the prompt, including cached tokens served
    * at the provider's discounted cache-read rate. Absent when the provider
    * does not return prompt-token details.
    */
-  readonly promptTokensDetails?: PromptTokensDetails
-}
-/** User message content as either plain text or a list of multimodal parts. */
-export declare enum UserContent {
-  /** Plain text content. */
-  Text = "Text",
-  /** Array of content parts (text, images, documents, audio). */
-  Parts = "Parts",
+  promptTokensDetails?: JsPromptTokensDetails;
 }
 /** User message in the conversation. */
 export interface UserMessage {
   /** Message content as plain text or array of content parts (text, images, documents, audio). */
-  readonly content?: UserContent
+  content?: JsUserContent;
   /** Optional name for the user. */
-  readonly name?: string
+  name?: string;
 }
 /**
- * Register a custom provider in the global runtime registry.
+ * Configuration for polling a batch until terminal status.
  *
- * The provider will be checked **before** all built-in providers during model
- * detection. If a provider with the same `name` already exists it is replaced.
- * @throws Returns an error if the config is invalid (empty name, empty base_url, or
- * no model prefixes).
+ * All time values are in seconds as `f64` so the struct bridges across FFI
+ * boundaries without requiring a `Duration` shim.
  */
-export declare function registerCustomProvider(config: CustomProviderConfig): void;
-/**
- * Remove a previously registered custom provider by name.
- *
- * Returns `true` if a provider with the given name was found and removed,
- * `false` if no such provider existed.
- * @throws Returns an error only if the internal lock is poisoned.
- */
-export declare function unregisterCustomProvider(name: string): boolean;
-export declare class ChatStreamIterator {
-  next(value?: undefined): Promise<IteratorResult<ChatCompletionChunk, void>>
-  [Symbol.asyncIterator](): AsyncGenerator<ChatCompletionChunk, void, undefined>
-}
-export declare class LiterLlmErrorInfo {
-  statusCode(): number
-  isTransient(): boolean
-  errorType(): string
-}
+export interface WaitForBatchConfig {
+  /** Initial interval between polls, in seconds. */
+  initialIntervalSecs?: number;
+  /** Maximum interval between polls (backoff plateau), in seconds. */
+  maxIntervalSecs?: number;
+  /** Exponential backoff multiplier (e.g., 1.5 increases delay by 50% each poll). */
+  backoffMultiplier?: number;
+  /** Optional timeout in seconds — polling fails if this duration is exceeded. */
+  timeoutSecs?: number;
+}
+export declare function waitForBatchConfigDefault(): WaitForBatchConfig;