@kreuzberg/liter-llm-node 1.6.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.d.ts CHANGED
@@ -1,139 +1,186 @@
1
- /* auto-generated by NAPI-RS */
1
+ // This file is auto-generated by alef — DO NOT EDIT.
2
+ // alef:hash:9ebda2bedb27bc07d8a4cfcbfadd55fd159de7f875fcb63308eaee6e1c3a895d
3
+ // To regenerate: alef generate
4
+ // To verify freshness: alef verify --exit-code
2
5
  /* eslint-disable */
6
+
7
+ export type JsonValue = string | number | boolean | null | JsonValue[] | { [key: string]: JsonValue };
8
+
3
9
  /**
4
- * This type implements JavaScript's async iterable protocol.
5
- * It can be used with `for await...of` loops.
10
+ * Return all provider configs from the registry.
6
11
  *
7
- * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Iteration_protocols#the_async_iterator_and_async_iterable_protocols
12
+ * Useful for tooling, documentation generation, or runtime enumeration.
13
+ * Returns the public [`ProviderConfig`] slice (without capability flags).
14
+ * To query capability flags for a specific provider use [`capabilities`].
8
15
  */
9
- export declare class ChatStreamIterator {}
16
+ export declare function allProviders(): Array<ProviderConfig>;
10
17
 
11
18
  /**
12
- * Default client implementation backed by `reqwest`.
19
+ * Return the capability flags for a named provider.
13
20
  *
14
- * Sends requests to 140+ LLM providers with automatic provider detection
15
- * and per-request routing. The provider is resolved at construction time
16
- * from `model_hint` (or defaults to OpenAI), but individual requests can
17
- * override the provider via model name prefix (e.g. `"anthropic/claude-3-5-sonnet"`
18
- * routes to Anthropic regardless of construction-time setting).
21
+ * Performs an O(n) linear scan over the embedded registry (143 entries).
22
+ * Returns an owned value so that bindings can box/copy it across the FFI
23
+ * boundary without dealing with lifetimes. `ProviderCapabilities` is `Copy`,
24
+ * so this is a cheap memcpy of seven `bool` fields.
19
25
  *
20
- * When the model prefix does not match any known provider, the construction-time
21
- * provider is used as the fallback. This enables seamless migration between
22
- * providers by changing only the model name.
26
+ * For unknown `provider_name` values the function returns an all-`false`
27
+ * sentinel so callers never need to handle `Option`.
28
+ */
29
+ export declare function capabilities(providerName: string): ProviderCapabilities;
30
+
31
+ /**
32
+ * Assert that `current_len + incoming` does not exceed `limit`.
23
33
  *
24
- * The provider is stored behind an `Arc` so it can be shared cheaply into
25
- * async closures and streaming tasks. Pre-computed auth headers and extra
26
- * headers are cached at construction to avoid redundant encoding on every request.
34
+ * Call this before appending `incoming` bytes to any buffer that must
35
+ * stay below `limit`. Returns `Err(LiterLlmError::Streaming)` on overflow
36
+ * and emits a `tracing::warn!` with context.
27
37
  */
28
- export declare class DefaultClient {
29
- chat(req: ChatCompletionRequest): Promise<ChatCompletionResponse>;
30
- chatStream(req: ChatCompletionRequest): Promise<ChatStreamIterator>;
31
- embed(req: EmbeddingRequest): Promise<EmbeddingResponse>;
32
- listModels(): Promise<ModelsListResponse>;
33
- imageGenerate(req: CreateImageRequest): Promise<ImagesResponse>;
34
- speech(req: CreateSpeechRequest): Promise<Buffer>;
35
- transcribe(req: CreateTranscriptionRequest): Promise<TranscriptionResponse>;
36
- moderate(req: ModerationRequest): Promise<ModerationResponse>;
37
- rerank(req: RerankRequest): Promise<RerankResponse>;
38
- search(req: SearchRequest): Promise<SearchResponse>;
39
- ocr(req: OcrRequest): Promise<OcrResponse>;
40
- createFile(req: CreateFileRequest): Promise<FileObject>;
41
- retrieveFile(fileId: string): Promise<FileObject>;
42
- deleteFile(fileId: string): Promise<DeleteResponse>;
43
- listFiles(query?: FileListQuery | undefined | null): Promise<FileListResponse>;
44
- fileContent(fileId: string): Promise<Buffer>;
45
- createBatch(req: CreateBatchRequest): Promise<BatchObject>;
46
- retrieveBatch(batchId: string): Promise<BatchObject>;
47
- listBatches(query?: BatchListQuery | undefined | null): Promise<BatchListResponse>;
48
- cancelBatch(batchId: string): Promise<BatchObject>;
49
- fetchBatchForPolling(batchId: string): Promise<BatchObject>;
50
- /**
51
- * Poll a batch until it reaches a terminal status (Completed, Failed, Expired, Cancelled).
52
- *
53
- * Uses exponential backoff with configurable initial interval, maximum interval, and backoff multiplier.
54
- * Optionally supports a timeout that aborts polling if exceeded.
55
- *
56
- * # Errors
57
- *
58
- * Returns `BatchWaitError.Failed` if the batch reaches a failure terminal status.
59
- * Returns `BatchWaitError.Timeout` if the configured timeout is exceeded.
60
- * Returns `BatchWaitError.Client` for underlying client errors.
61
- *
62
- * # Example
63
- */
64
- waitForBatch(batchId: string, config: WaitForBatchConfig): Promise<BatchObject>;
65
- createResponse(req: CreateResponseRequest): Promise<ResponseObject>;
66
- retrieveResponse(responseId: string): Promise<ResponseObject>;
67
- cancelResponse(responseId: string): Promise<ResponseObject>;
68
- }
69
- export type JsDefaultClient = DefaultClient;
38
+ export declare function checkBound(context: string, currentLen: number, incoming: number, limit: number): void;
70
39
 
71
- export declare class JsLiterLlmErrorInfo {
72
- statusCode: number;
73
- isTransient: boolean;
74
- errorType: string;
75
- /** HTTP status code for this error (0 means no associated status). */
76
- statusCode(): number;
77
- /** Returns `true` if the error is transient and a retry may succeed. */
78
- isTransient(): boolean;
79
- /** Machine-readable error category string for matching and logging. */
80
- errorType(): string;
81
- }
40
+ /**
41
+ * Remove all guardrails from the global registry.
42
+ *
43
+ * Primarily useful in tests to reset state between test cases.
44
+ */
45
+ export declare function clear(): void;
82
46
 
83
47
  /**
84
- * The value broadcast from a singleflight leader to all followers.
48
+ * Calculate the estimated cost of a completion given a model name and token
49
+ * counts.
85
50
  *
86
- * `Arc<LiterLlmError>` is used because `LiterLlmError` is not `Clone` and
87
- * broadcast channels require `T: Clone`. The `Arc` adds only a reference-count
88
- * bump per follower, which is negligible under the burst loads this layer targets.
51
+ * Returns `None` if the model is not present in the embedded pricing registry.
52
+ * Returns `Some(cost_usd)` otherwise, where the value is in US dollars.
53
+ *
54
+ * When an exact model name match is not found, progressively shorter prefixes
55
+ * are tried by stripping from the last `-` or `.` separator. For example,
56
+ * `gpt-4-0613` will match `gpt-4` if no `gpt-4-0613` entry exists.
89
57
  */
90
- export declare class SingleflightResult {}
91
- export type JsSingleflightResult = SingleflightResult;
58
+ export declare function completionCost(model: string, promptTokens: number, completionTokens: number): number | null;
92
59
 
93
60
  /**
94
- * Return all provider configs from the registry.
61
+ * Calculate the estimated cost of a completion, accounting for cached
62
+ * (cache-hit) prompt tokens billed at the provider's discounted rate.
95
63
  *
96
- * Useful for tooling, documentation generation, or runtime enumeration.
97
- * Returns the public `ProviderConfig` slice (without capability flags).
98
- * To query capability flags for a specific provider use `capabilities`.
64
+ * `cached_tokens` is the count of prompt tokens served from the provider's
65
+ * prompt cache. It must be `<= prompt_tokens` (cached tokens are a subset of
66
+ * the prompt). The non-cached portion is billed at `input_cost_per_token`
67
+ * and the cached portion at `cache_read_input_token_cost` when the model
68
+ * has cache pricing; otherwise the entire prompt is billed at the regular
69
+ * input rate.
70
+ *
71
+ * Returns `None` if the model is not present in the embedded pricing
72
+ * registry, mirroring [`completion_cost`].
99
73
  */
100
- export declare function allProviders(): Array<ProviderConfig>;
74
+ export declare function completionCostWithCache(model: string, promptTokens: number, cachedTokens: number, completionTokens: number): number | null;
75
+
76
+ /**
77
+ * Return the set of complex provider names.
78
+ *
79
+ * Complex providers require custom auth/routing logic beyond simple bearer
80
+ * tokens (e.g. AWS Bedrock SigV4, Vertex AI OAuth2).
81
+ *
82
+ * The returned reference points into the static registry — no allocation.
83
+ */
84
+ export declare function complexProviderNames(): Array<string>;
85
+
86
+ /**
87
+ * Count tokens for a full [`ChatCompletionRequest`].
88
+ *
89
+ * Sums tokens across all message text contents plus a per-message overhead
90
+ * of ~4 tokens (for role, separators, and formatting metadata). Tool
91
+ * definitions and multimodal content parts (images, audio, documents) are
92
+ * not counted — only textual content contributes to the token total.
93
+ * @throws Returns [`LiterLlmError::BadRequest`] if the tokenizer cannot be loaded or
94
+ * if tokenization fails for any message.
95
+ */
96
+ export declare function countRequestTokens(model: string, req?: ChatCompletionRequest | undefined | null): number;
97
+
98
+ /**
99
+ * Count tokens in a text string using the tokenizer for the given model.
100
+ *
101
+ * The tokenizer is resolved from the model name prefix (e.g. `"gpt-4o"` maps
102
+ * to the `Xenova/gpt-4o` HuggingFace tokenizer). Tokenizers are cached after
103
+ * first load.
104
+ * @throws Returns [`LiterLlmError::BadRequest`] if the tokenizer cannot be loaded
105
+ * (e.g. network failure on first use) or if tokenization itself fails.
106
+ */
107
+ export declare function countTokens(model: string, text: string): number;
108
+
109
+ /**
110
+ * Create a new LLM client with simple scalar configuration.
111
+ *
112
+ * This is the primary binding entry-point. All parameters except `api_key`
113
+ * are optional — omitting them uses the same defaults as
114
+ * [`ClientConfigBuilder`].
115
+ * @throws Returns [`LiterLlmError`] if the underlying HTTP client cannot be
116
+ * constructed, or if the resolved provider configuration is invalid.
117
+ */
118
+ export declare function createClient(apiKey: string, baseUrl?: string | undefined | null, timeoutSecs?: number | undefined | null, maxRetries?: number | undefined | null, modelHint?: string | undefined | null): DefaultClient;
119
+
120
+ /**
121
+ * Create a new LLM client from a JSON string.
122
+ *
123
+ * The JSON object accepts the same fields as `liter-llm.toml` (snake_case).
124
+ * @throws Returns [`LiterLlmError::BadRequest`] if `json` is not valid JSON or
125
+ * contains unknown fields.
126
+ */
127
+ export declare function createClientFromJson(json: string): DefaultClient;
128
+
129
+ /**
130
+ * Install the `ring` crypto provider as the rustls process default, idempotently.
131
+ *
132
+ * rustls 0.23+ removed the implicit default provider. This function installs
133
+ * `ring` once per process. Subsequent calls are no-ops. Calling it from a
134
+ * downstream Rust app that has already installed `aws-lc-rs` is safe — the
135
+ * `Err` from `install_default()` is silently ignored.
136
+ *
137
+ * Called automatically by every internal `reqwest::Client` constructor
138
+ * (auth providers, default HTTP client). Bindings and downstream consumers
139
+ * reach those constructors transitively, so no manual init is required.
140
+ *
141
+ * WASM builds are exempt — the WASM target uses the browser/Node.js fetch
142
+ * API instead of rustls, so no crypto provider is needed.
143
+ *
144
+ * Windows builds use native-tls (SChannel) via reqwest, so rustls is not
145
+ * present and no crypto provider installation is needed.
146
+ */
147
+ export declare function ensureCryptoProvider(): void;
101
148
 
102
149
  /** Assistant's response to a user message. */
103
150
  export interface AssistantMessage {
104
151
  /** The assistant's text response. Absent if tool calls are returned instead. */
105
- content?: string;
152
+ readonly content?: string
106
153
  /** Optional name for the assistant. */
107
- name?: string;
154
+ readonly name?: string
108
155
  /** Tool calls the model wants to execute, if any. */
109
- toolCalls?: Array<JsToolCall>;
156
+ readonly toolCalls?: Array<ToolCall>
110
157
  /** Refusal reason, if the model declined to respond per safety policies. */
111
- refusal?: string;
158
+ readonly refusal?: string
112
159
  /** Deprecated legacy function_call field; retained for API compatibility. */
113
- functionCall?: JsFunctionCall;
160
+ readonly functionCall?: FunctionCall
114
161
  }
115
162
 
116
163
  /** Audio content part for speech-capable models. */
117
164
  export interface AudioContent {
118
165
  /** Base64-encoded audio data. */
119
- data?: string;
166
+ readonly data?: string
120
167
  /** Audio format (e.g., "wav", "mp3", "ogg"). */
121
- format?: string;
168
+ readonly format?: string
122
169
  }
123
170
 
124
171
  /** Auth configuration block. */
125
172
  export interface AuthConfig {
126
173
  /** Auth scheme classification. */
127
- type: JsAuthType;
174
+ readonly authType: AuthType
128
175
  /**
129
176
  * Name of the environment variable that holds the API key (e.g. `"OPENAI_API_KEY"`).
130
177
  * Holds the variable name, never the secret value.
131
178
  */
132
- envVar?: string;
179
+ readonly envVar?: string
133
180
  }
134
181
 
135
182
  /** How the API key is sent in the HTTP request. */
136
- export declare const enum AuthHeaderFormat {
183
+ export declare enum AuthHeaderFormat {
137
184
  /** Bearer token: `Authorization: Bearer <key>` */
138
185
  Bearer = "Bearer",
139
186
  /** Custom header: e.g., `X-Api-Key: <key>` */
@@ -143,7 +190,7 @@ export declare const enum AuthHeaderFormat {
143
190
  }
144
191
 
145
192
  /** Auth scheme used by a provider. */
146
- export declare const enum AuthType {
193
+ export declare enum AuthType {
147
194
  /** Standard `Authorization: Bearer <key>` header. */
148
195
  Bearer = "bearer",
149
196
  /** `x-api-key: <key>` header (also handles `"header"` and `"x-api-key"` aliases). */
@@ -157,69 +204,69 @@ export declare const enum AuthType {
157
204
  /** Query parameters for listing batches. */
158
205
  export interface BatchListQuery {
159
206
  /** Maximum number of results to return. Defaults to 20. */
160
- limit?: number;
207
+ readonly limit?: number
161
208
  /** Pagination cursor: return results after this batch ID. */
162
- after?: string;
209
+ readonly after?: string
163
210
  }
164
211
 
165
212
  /** Response from listing batches. */
166
213
  export interface BatchListResponse {
167
214
  /** Object type (always `"list"`). */
168
- object?: string;
215
+ readonly object?: string
169
216
  /** List of batch objects. */
170
- data?: Array<BatchObject>;
217
+ readonly data?: Array<BatchObject>
171
218
  /** Whether more results are available. */
172
- hasMore?: boolean;
219
+ readonly hasMore?: boolean
173
220
  /** First batch ID in the result set (for pagination). */
174
- firstId?: string;
221
+ readonly firstId?: string
175
222
  /** Last batch ID in the result set (for pagination). */
176
- lastId?: string;
223
+ readonly lastId?: string
177
224
  }
178
225
 
179
226
  /** A batch job object. */
180
227
  export interface BatchObject {
181
228
  /** Unique batch ID. */
182
- id?: string;
229
+ readonly id?: string
183
230
  /** Object type (always `"batch"`). */
184
- object?: string;
231
+ readonly object?: string
185
232
  /** API endpoint (e.g., `"/v1/chat/completions"`). */
186
- endpoint?: string;
233
+ readonly endpoint?: string
187
234
  /** ID of the input file. */
188
- inputFileId?: string;
235
+ readonly inputFileId?: string
189
236
  /** Completion window (e.g., `"24h"`). */
190
- completionWindow?: string;
237
+ readonly completionWindow?: string
191
238
  /** Current job status. */
192
- status?: JsBatchStatus;
239
+ readonly status?: BatchStatus
193
240
  /** ID of the output file (present when completed). */
194
- outputFileId?: string;
241
+ readonly outputFileId?: string
195
242
  /** ID of the error file (present if some requests failed). */
196
- errorFileId?: string;
243
+ readonly errorFileId?: string
197
244
  /** Unix timestamp of batch creation. */
198
- createdAt?: number;
245
+ readonly createdAt?: number
199
246
  /** Unix timestamp of completion (if completed). */
200
- completedAt?: number;
247
+ readonly completedAt?: number
201
248
  /** Unix timestamp of failure (if failed). */
202
- failedAt?: number;
249
+ readonly failedAt?: number
203
250
  /** Unix timestamp of expiration (if expired). */
204
- expiredAt?: number;
251
+ readonly expiredAt?: number
205
252
  /** Request processing counts. */
206
- requestCounts?: JsBatchRequestCounts;
253
+ readonly requestCounts?: BatchRequestCounts
207
254
  /** Metadata attached to the batch. */
208
- metadata?: any;
255
+ readonly metadata?: JsonValue
209
256
  }
210
257
 
211
258
  /** Request processing counts for a batch. */
212
259
  export interface BatchRequestCounts {
213
260
  /** Total requests in the batch. */
214
- total?: number;
261
+ readonly total?: number
215
262
  /** Completed requests. */
216
- completed?: number;
263
+ readonly completed?: number
217
264
  /** Failed requests. */
218
- failed?: number;
265
+ readonly failed?: number
219
266
  }
220
267
 
221
268
  /** Status of a batch job. */
222
- export declare const enum BatchStatus {
269
+ export declare enum BatchStatus {
223
270
  /** Validating the input file. */
224
271
  Validating = "validating",
225
272
  /** Job failed. */
@@ -241,191 +288,172 @@ export declare const enum BatchStatus {
241
288
  /** Configuration for budget enforcement. */
242
289
  export interface BudgetConfig {
243
290
  /** Maximum total spend across all models, in USD. `None` means unlimited. */
244
- globalLimit?: number;
291
+ readonly globalLimit?: number
245
292
  /**
246
293
  * Per-model spending limits in USD. Models not listed here are only
247
294
  * constrained by `global_limit`.
248
295
  */
249
- modelLimits?: Record<string, number>;
296
+ readonly modelLimits?: Record<string, number>
250
297
  /** Whether to reject requests or merely warn when a limit is exceeded. */
251
- enforcement?: JsEnforcement;
298
+ readonly enforcement?: Enforcement
252
299
  }
253
300
 
254
- export declare function budgetConfigDefault(): BudgetConfig;
255
-
256
301
  /** Storage backend for the response cache. */
257
- export interface CacheBackend {
258
- type: string;
259
- scheme?: string;
260
- config?: Record<string, string>;
261
- }
302
+ export type CacheBackend =
303
+ | { type: 'memory' }
304
+ | { type: 'open_dal'; scheme: string; config: Record<string, string> }
262
305
 
263
306
  /** Configuration for the response cache. */
264
307
  export interface CacheConfig {
265
308
  /** Maximum number of cached entries. */
266
- maxEntries?: number;
309
+ readonly maxEntries?: number
267
310
  /** Time-to-live for each cached entry. */
268
- ttl?: number;
311
+ readonly ttl?: number
269
312
  /** Storage backend to use. */
270
- backend?: JsCacheBackend;
313
+ readonly backend?: CacheBackend
271
314
  }
272
315
 
273
- export declare function cacheConfigDefault(): CacheConfig;
274
-
275
- /**
276
- * Return the capability flags for a named provider.
277
- *
278
- * Performs an O(n) linear scan over the embedded registry (142 entries).
279
- * Returns an owned value so that bindings can box/copy it across the FFI
280
- * boundary without dealing with lifetimes. `ProviderCapabilities` is `Copy`,
281
- * so this is a cheap memcpy of seven `bool` fields.
282
- *
283
- * For unknown `provider_name` values the function returns an all-`false`
284
- * sentinel so callers never need to handle `Option`.
285
- */
286
- export declare function capabilities(providerName: string): ProviderCapabilities;
287
-
288
316
  /** A streamed chunk of a chat completion response. */
289
317
  export interface ChatCompletionChunk {
290
318
  /** Unique identifier for this stream. */
291
- id?: string;
319
+ readonly id?: string
292
320
  /**
293
321
  * Always `"chat.completion.chunk"` from OpenAI-compatible APIs. Stored
294
322
  * as a plain `String` so non-standard provider values do not fail parsing.
295
323
  */
296
- object?: string;
324
+ readonly object?: string
297
325
  /** Unix timestamp of chunk creation. */
298
- created?: number;
326
+ readonly created?: number
299
327
  /** Model used to generate the chunk. */
300
- model?: string;
328
+ readonly model?: string
301
329
  /** Streaming choices (delta updates). */
302
- choices?: Array<JsStreamChoice>;
330
+ readonly choices?: Array<StreamChoice>
303
331
  /** Token usage (typically only in the final chunk). */
304
- usage?: Usage;
332
+ readonly usage?: Usage
305
333
  /** Fingerprint of the system configuration (OpenAI-specific). */
306
- systemFingerprint?: string;
334
+ readonly systemFingerprint?: string
307
335
  /** Service tier used (OpenAI-specific). */
308
- serviceTier?: string;
336
+ readonly serviceTier?: string
309
337
  }
310
338
 
311
339
  /** Chat completion request (compatible with OpenAI and similar APIs). */
312
340
  export interface ChatCompletionRequest {
313
341
  /** Model ID (e.g., `"gpt-4o-mini"`, `"claude-3-5-sonnet"`). */
314
- model?: string;
342
+ readonly model?: string
315
343
  /** Conversation history from oldest to newest. */
316
- messages?: Array<JsMessage>;
344
+ readonly messages?: Array<Message>
317
345
  /** Sampling temperature in `[0.0, 2.0]`. Higher increases randomness. Defaults to 1.0. */
318
- temperature?: number;
346
+ readonly temperature?: number
319
347
  /** Nucleus sampling parameter in `[0.0, 1.0]`. Lower is more focused. */
320
- topP?: number;
348
+ readonly topP?: number
321
349
  /** Number of chat completions to generate. Defaults to 1. */
322
- n?: number;
350
+ readonly n?: number
323
351
  /**
324
352
  * Whether to stream the response.
325
353
  *
326
354
  * Managed by the client layer — do not set directly.
327
355
  */
328
- stream?: boolean;
356
+ readonly stream?: boolean
329
357
  /** Stop sequence(s) that halt token generation. */
330
- stop?: JsStopSequence;
358
+ readonly stop?: StopSequence
331
359
  /** Max output tokens. Different from max_completion_tokens in some providers. */
332
- maxTokens?: number;
360
+ readonly maxTokens?: number
333
361
  /** Presence penalty in `[-2.0, 2.0]`. Positive discourages repeated topics. */
334
- presencePenalty?: number;
362
+ readonly presencePenalty?: number
335
363
  /** Frequency penalty in `[-2.0, 2.0]`. Positive discourages repeated tokens. */
336
- frequencyPenalty?: number;
364
+ readonly frequencyPenalty?: number
337
365
  /**
338
366
  * Token bias map. Uses `BTreeMap` (sorted keys) for deterministic
339
367
  * serialization order — important when hashing or signing requests.
340
368
  */
341
- logitBias?: Record<string, number>;
369
+ readonly logitBias?: Record<string, number>
342
370
  /** User identifier for request tracking and abuse detection. */
343
- user?: string;
371
+ readonly user?: string
344
372
  /** Tools the model can invoke. */
345
- tools?: Array<ChatCompletionTool>;
373
+ readonly tools?: Array<ChatCompletionTool>
346
374
  /** Tool usage mode (auto, required, none, or specific tool). */
347
- toolChoice?: JsToolChoice;
375
+ readonly toolChoice?: ToolChoice
348
376
  /** Whether the model can call multiple tools in parallel. Defaults to true. */
349
- parallelToolCalls?: boolean;
377
+ readonly parallelToolCalls?: boolean
350
378
  /** Output format constraint (text, JSON, JSON schema). */
351
- responseFormat?: JsResponseFormat;
379
+ readonly responseFormat?: ResponseFormat
352
380
  /** Streaming options (e.g., include_usage). */
353
- streamOptions?: JsStreamOptions;
381
+ readonly streamOptions?: StreamOptions
354
382
  /** Random seed for reproducible outputs. Provider support varies. */
355
- seed?: number;
383
+ readonly seed?: number
356
384
  /** Reasoning effort level (low, medium, high) for extended-thinking models. */
357
- reasoningEffort?: JsReasoningEffort;
385
+ readonly reasoningEffort?: ReasoningEffort
358
386
  /**
359
387
  * Provider-specific extra parameters merged into the request body.
360
388
  * Use for guardrails, safety settings, grounding config, etc.
361
389
  */
362
- extraBody?: any;
390
+ readonly extraBody?: JsonValue
363
391
  }
364
392
 
365
393
  /** Chat completion response from the API. */
366
394
  export interface ChatCompletionResponse {
367
395
  /** Unique identifier for this response. */
368
- id?: string;
396
+ readonly id?: string
369
397
  /**
370
398
  * Always `"chat.completion"` from OpenAI-compatible APIs. Stored as a
371
399
  * plain `String` so non-standard provider values do not break deserialization.
372
400
  */
373
- object?: string;
401
+ readonly object?: string
374
402
  /** Unix timestamp of response creation. */
375
- created?: number;
403
+ readonly created?: number
376
404
  /** Model used to generate the response. */
377
- model?: string;
405
+ readonly model?: string
378
406
  /** List of completion choices. */
379
- choices?: Array<JsChoice>;
407
+ readonly choices?: Array<Choice>
380
408
  /** Token usage statistics. */
381
- usage?: Usage;
409
+ readonly usage?: Usage
382
410
  /** Fingerprint of the system configuration (OpenAI-specific). */
383
- systemFingerprint?: string;
411
+ readonly systemFingerprint?: string
384
412
  /** Service tier used (OpenAI-specific). */
385
- serviceTier?: string;
413
+ readonly serviceTier?: string
386
414
  }
387
415
 
388
416
  /** A tool the model can invoke (currently, all tools are functions). */
389
417
  export interface ChatCompletionTool {
390
418
  /** Tool type (always "function" in OpenAI spec). */
391
- type: JsToolType;
419
+ readonly toolType: ToolType
392
420
  /** Function definition with name, description, and JSON schema parameters. */
393
- function: JsFunctionDefinition;
421
+ readonly function: FunctionDefinition
394
422
  }
395
423
 
396
- export declare function chatStream(
397
- engine: DefaultClient,
398
- model: string,
399
- ): Promise<ChatStreamIterator>;
400
-
401
- /**
402
- * Assert that `current_len + incoming` does not exceed `limit`.
403
- *
404
- * Call this before appending `incoming` bytes to any buffer that must
405
- * stay below `limit`. Returns `Err(LiterLlmError.Streaming)` on overflow
406
- * and emits a `tracing.warn!` with context.
407
- *
408
- * # Example
409
- */
410
- export declare function checkBound(
411
- context: string,
412
- currentLen: number,
413
- incoming: number,
414
- limit: number,
415
- ): void;
416
-
417
424
  /** A single completion choice. */
418
425
  export interface Choice {
419
426
  /** Index of this choice in the choices array. */
420
- index?: number;
427
+ readonly index?: number
421
428
  /** The assistant's message response. */
422
- message?: AssistantMessage;
429
+ readonly message?: AssistantMessage
423
430
  /** Why the model stopped generating (stop, length, tool_calls, content_filter, etc.). */
424
- finishReason?: JsFinishReason;
431
+ readonly finishReason?: FinishReason
432
+ }
433
+
434
+ /**
435
+ * A per-chunk transformation in the [`StreamPipeline`].
436
+ *
437
+ * Each middleware receives a typed chunk and returns `Ok(Some(chunk))`
438
+ * to pass it through (optionally modified), `Ok(None)` to drop the chunk,
439
+ * or `Err(e)` to propagate a stream error.
440
+ *
441
+ * The trait is object-safe so implementations can be stored in a
442
+ * `Vec<Box<dyn ChunkMiddleware>>` inside [`StreamPipeline`].
443
+ */
444
+ export interface ChunkMiddleware {
445
+ /**
446
+ * Process a single chunk.
447
+ *
448
+ * - `Ok(Some(chunk))` — emit (possibly transformed) chunk.
449
+ * - `Ok(None)` — drop this chunk silently.
450
+ * - `Err(e)` — propagate as a stream error.
451
+ */
452
+ process(chunk?: ChatCompletionChunk | undefined | null): string
425
453
  }
426
454
 
427
455
  /** Observable state of a circuit breaker. */
428
- export declare const enum CircuitState {
456
+ export declare enum CircuitState {
429
457
  /** Requests flow through normally. */
430
458
  Closed = "Closed",
431
459
  /** All requests are rejected; the circuit is waiting for the backoff to elapse. */
@@ -434,301 +462,236 @@ export declare const enum CircuitState {
434
462
  HalfOpen = "HalfOpen",
435
463
  }
436
464
 
437
- /**
438
- * Remove all guardrails from the global registry.
439
- *
440
- * Primarily useful in tests to reset state between test cases.
441
- *
442
- * # Panics
443
- *
444
- * Panics if the global registry lock is poisoned.
445
- */
446
- export declare function clear(): void;
447
-
448
- /**
449
- * Calculate the estimated cost of a completion given a model name and token
450
- * counts.
451
- *
452
- * Returns `None` if the model is not present in the embedded pricing registry.
453
- * Returns `Some(cost_usd)` otherwise, where the value is in US dollars.
454
- *
455
- * When an exact model name match is not found, progressively shorter prefixes
456
- * are tried by stripping from the last `-` or `.` separator. For example,
457
- * `gpt-4-0613` will match `gpt-4` if no `gpt-4-0613` entry exists.
458
- *
459
- * # Example
460
- */
461
- export declare function completionCost(
462
- model: string,
463
- promptTokens: number,
464
- completionTokens: number,
465
- ): number | null;
466
-
467
- /**
468
- * Calculate the estimated cost of a completion, accounting for cached
469
- * (cache-hit) prompt tokens billed at the provider's discounted rate.
470
- *
471
- * `cached_tokens` is the count of prompt tokens served from the provider's
472
- * prompt cache. It must be `<= prompt_tokens` (cached tokens are a subset of
473
- * the prompt). The non-cached portion is billed at `input_cost_per_token`
474
- * and the cached portion at `cache_read_input_token_cost` when the model
475
- * has cache pricing; otherwise the entire prompt is billed at the regular
476
- * input rate.
477
- *
478
- * Returns `None` if the model is not present in the embedded pricing
479
- * registry, mirroring `completion_cost`.
480
- */
481
- export declare function completionCostWithCache(
482
- model: string,
483
- promptTokens: number,
484
- cachedTokens: number,
485
- completionTokens: number,
486
- ): number | null;
487
-
488
- /**
489
- * Return the set of complex provider names.
490
- *
491
- * Complex providers require custom auth/routing logic beyond simple bearer
492
- * tokens (e.g. AWS Bedrock SigV4, Vertex AI OAuth2).
493
- *
494
- * The returned reference points into the static registry — no allocation.
495
- */
496
- export declare function complexProviderNames(): Array<string>;
497
-
498
465
  /** A single content part in a user message — text, image, document, or audio. */
499
- export interface ContentPart {
500
- type: string;
501
- text?: string;
502
- imageUrl?: ImageUrl;
503
- document?: DocumentContent;
504
- inputAudio?: AudioContent;
505
- }
506
-
507
- /**
508
- * Count tokens for a full `ChatCompletionRequest`.
509
- *
510
- * Sums tokens across all message text contents plus a per-message overhead
511
- * of ~4 tokens (for role, separators, and formatting metadata). Tool
512
- * definitions and multimodal content parts (images, audio, documents) are
513
- * not counted — only textual content contributes to the token total.
514
- *
515
- * # Errors
516
- *
517
- * Returns `LiterLlmError.BadRequest` if the tokenizer cannot be loaded or
518
- * if tokenization fails for any message.
519
- */
520
- export declare function countRequestTokens(
521
- model: string,
522
- req?: ChatCompletionRequest | undefined | null,
523
- ): number;
524
-
525
- /**
526
- * Count tokens in a text string using the tokenizer for the given model.
527
- *
528
- * The tokenizer is resolved from the model name prefix (e.g. `"gpt-4o"` maps
529
- * to the `Xenova/gpt-4o` HuggingFace tokenizer). Tokenizers are cached after
530
- * first load.
531
- *
532
- * # Errors
533
- *
534
- * Returns `LiterLlmError.BadRequest` if the tokenizer cannot be loaded
535
- * (e.g. network failure on first use) or if tokenization itself fails.
536
- */
537
- export declare function countTokens(model: string, text: string): number;
466
+ export type ContentPart =
467
+ | { type: 'text'; text: string }
468
+ | { type: 'image_url'; imageUrl: ImageUrl }
469
+ | { type: 'document'; document: DocumentContent }
470
+ | { type: 'input_audio'; inputAudio: AudioContent }
538
471
 
539
472
  /** Request to create a batch job. */
540
473
  export interface CreateBatchRequest {
541
474
  /** ID of the uploaded input file (JSONL format). */
542
- inputFileId?: string;
475
+ readonly inputFileId?: string
543
476
  /** API endpoint (e.g., `"/v1/chat/completions"`). */
544
- endpoint?: string;
477
+ readonly endpoint?: string
545
478
  /** Completion window (e.g., `"24h"`). */
546
- completionWindow?: string;
479
+ readonly completionWindow?: string
547
480
  /** Optional metadata to attach to the batch. */
548
- metadata?: any;
481
+ readonly metadata?: JsonValue
549
482
  }
550
483
 
551
- /**
552
- * Create a new LLM client with simple scalar configuration.
553
- *
554
- * This is the primary binding entry-point. All parameters except `api_key`
555
- * are optional — omitting them uses the same defaults as
556
- * `ClientConfigBuilder`.
557
- *
558
- * # Errors
559
- *
560
- * Returns `LiterLlmError` if the underlying HTTP client cannot be
561
- * constructed, or if the resolved provider configuration is invalid.
562
- */
563
- export declare function createClient(
564
- apiKey: string,
565
- baseUrl?: string | undefined | null,
566
- timeoutSecs?: number | undefined | null,
567
- maxRetries?: number | undefined | null,
568
- modelHint?: string | undefined | null,
569
- ): DefaultClient;
570
-
571
- /**
572
- * Create a new LLM client from a JSON string.
573
- *
574
- * The JSON object accepts the same fields as `liter-llm.toml` (snake_case).
575
- *
576
- * # Errors
577
- *
578
- * Returns `LiterLlmError.BadRequest` if `json` is not valid JSON or
579
- * contains unknown fields.
580
- */
581
- export declare function createClientFromJson(json: string): DefaultClient;
582
-
583
484
  /** Request to upload a file. */
584
485
  export interface CreateFileRequest {
585
486
  /** Base64-encoded file data. */
586
- file?: string;
487
+ readonly file?: string
587
488
  /** Purpose for the file. */
588
- purpose?: JsFilePurpose;
489
+ readonly purpose?: FilePurpose
589
490
  /** Optional filename to associate with the upload. */
590
- filename?: string;
491
+ readonly filename?: string
591
492
  }
592
493
 
593
494
  /** Request to create images from a text prompt. */
594
495
  export interface CreateImageRequest {
595
496
  /** Text description of the image to generate. */
596
- prompt?: string;
497
+ readonly prompt?: string
597
498
  /** Model ID (e.g., `"dall-e-3"`). Optional; API may use default if unset. */
598
- model?: string;
499
+ readonly model?: string
599
500
  /** Number of images to generate. Defaults to 1. */
600
- n?: number;
501
+ readonly n?: number
601
502
  /** Image size (e.g., `"1024x1024"`, `"1792x1024"`). */
602
- size?: string;
503
+ readonly size?: string
603
504
  /** Image quality: `"standard"` or `"hd"`. */
604
- quality?: string;
505
+ readonly quality?: string
605
506
  /** Style: `"natural"` or `"vivid"` (DALL-E 3 only). */
606
- style?: string;
507
+ readonly style?: string
607
508
  /** Response format: `"url"` or `"b64_json"`. */
608
- responseFormat?: string;
509
+ readonly responseFormat?: string
609
510
  /** User identifier for request tracking. */
610
- user?: string;
511
+ readonly user?: string
611
512
  }
612
513
 
613
514
  /** Request to create a structured response. */
614
515
  export interface CreateResponseRequest {
615
516
  /** Model ID. */
616
- model?: string;
517
+ readonly model?: string
617
518
  /** Input data to process (e.g., a document to extract from). */
618
- input?: any;
519
+ readonly input?: JsonValue
619
520
  /** Instructions for processing the input. */
620
- instructions?: string;
521
+ readonly instructions?: string
621
522
  /** Available tools the model can use. */
622
- tools?: Array<JsResponseTool>;
523
+ readonly tools?: Array<ResponseTool>
623
524
  /** Sampling temperature in `[0.0, 2.0]`. Defaults to 1.0. */
624
- temperature?: number;
525
+ readonly temperature?: number
625
526
  /** Maximum output tokens. */
626
- maxOutputTokens?: number;
527
+ readonly maxOutputTokens?: number
627
528
  /** Optional metadata. */
628
- metadata?: any;
529
+ readonly metadata?: JsonValue
629
530
  }
630
531
 
631
532
  /** Request to generate speech audio from text. */
632
533
  export interface CreateSpeechRequest {
633
534
  /** Model ID (e.g., `"tts-1"`, `"tts-1-hd"`). */
634
- model?: string;
535
+ readonly model?: string
635
536
  /** Text to synthesize into speech. */
636
- input?: string;
537
+ readonly input?: string
637
538
  /** Voice name (e.g., `"alloy"`, `"echo"`, `"fable"`, `"onyx"`, `"nova"`, `"shimmer"`). */
638
- voice?: string;
539
+ readonly voice?: string
639
540
  /** Audio format (e.g., `"mp3"`, `"opus"`, `"aac"`, `"flac"`, `"wav"`, `"pcm"`). */
640
- responseFormat?: string;
541
+ readonly responseFormat?: string
641
542
  /** Playback speed in `[0.25, 4.0]`. Defaults to 1.0. */
642
- speed?: number;
543
+ readonly speed?: number
643
544
  }
644
545
 
645
546
  /** Request to transcribe audio into text. */
646
547
  export interface CreateTranscriptionRequest {
647
548
  /** Model ID (e.g., `"whisper-1"`). */
648
- model?: string;
549
+ readonly model?: string
649
550
  /** Base64-encoded audio file data. */
650
- file?: string;
551
+ readonly file?: string
651
552
  /** Language ISO-639-1 code (e.g., `"en"`, `"fr"`, `"de"`). Optional; model auto-detects. */
652
- language?: string;
553
+ readonly language?: string
653
554
  /** Optional text to guide the model (improves accuracy for domain-specific terms). */
654
- prompt?: string;
555
+ readonly prompt?: string
655
556
  /** Output format (e.g., `"json"`, `"text"`, `"vtt"`, `"srt"`, `"verbose_json"`). */
656
- responseFormat?: string;
557
+ readonly responseFormat?: string
657
558
  /** Sampling temperature in `[0.0, 1.0]`. Higher increases variability. Defaults to 0. */
658
- temperature?: number;
559
+ readonly temperature?: number
659
560
  }
660
561
 
661
562
  /** Configuration for registering a custom LLM provider at runtime. */
662
563
  export interface CustomProviderConfig {
663
564
  /** Unique name for this provider (e.g., "my-provider"). */
664
- name: string;
565
+ readonly name: string
665
566
  /** Base URL for the provider's API (e.g., "https://api.my-provider.com/v1"). */
666
- baseUrl: string;
567
+ readonly baseUrl: string
667
568
  /** Authentication header format. */
668
- authHeader: JsAuthHeaderFormat;
569
+ readonly authHeader: AuthHeaderFormat
669
570
  /** Model name prefixes that route to this provider (e.g., `["my-"]`). */
670
- modelPrefixes: Array<string>;
571
+ readonly modelPrefixes: Array<string>
572
+ }
573
+
574
+ /**
575
+ * Default client implementation backed by `reqwest`.
576
+ *
577
+ * Sends requests to 143 LLM providers with automatic provider detection
578
+ * and per-request routing. The provider is resolved at construction time
579
+ * from `model_hint` (or defaults to OpenAI), but individual requests can
580
+ * override the provider via model name prefix (e.g. `"anthropic/claude-3-5-sonnet"`
581
+ * routes to Anthropic regardless of construction-time setting).
582
+ *
583
+ * When the model prefix does not match any known provider, the construction-time
584
+ * provider is used as the fallback. This enables seamless migration between
585
+ * providers by changing only the model name.
586
+ *
587
+ * The provider is stored behind an [`Arc`] so it can be shared cheaply into
588
+ * async closures and streaming tasks. Pre-computed auth headers and extra
589
+ * headers are cached at construction to avoid redundant encoding on every request.
590
+ */
591
+ export declare class DefaultClient {
592
+ chat(req?: ChatCompletionRequest | undefined | null): Promise<ChatCompletionResponse>
593
+ chatStream(req?: ChatCompletionRequest | undefined | null): Promise<AsyncGenerator<ChatCompletionChunk, void, undefined>>
594
+ embed(req?: EmbeddingRequest | undefined | null): Promise<EmbeddingResponse>
595
+ listModels(): Promise<ModelsListResponse>
596
+ imageGenerate(req?: CreateImageRequest | undefined | null): Promise<ImagesResponse>
597
+ speech(req?: CreateSpeechRequest | undefined | null): Promise<Uint8Array>
598
+ transcribe(req?: CreateTranscriptionRequest | undefined | null): Promise<TranscriptionResponse>
599
+ moderate(req?: ModerationRequest | undefined | null): Promise<ModerationResponse>
600
+ rerank(req?: RerankRequest | undefined | null): Promise<RerankResponse>
601
+ search(req?: SearchRequest | undefined | null): Promise<SearchResponse>
602
+ ocr(req?: OcrRequest | undefined | null): Promise<OcrResponse>
603
+ createFile(req?: CreateFileRequest | undefined | null): Promise<FileObject>
604
+ retrieveFile(fileId: string): Promise<FileObject>
605
+ deleteFile(fileId: string): Promise<DeleteResponse>
606
+ listFiles(query?: FileListQuery | undefined | null): Promise<FileListResponse>
607
+ fileContent(fileId: string): Promise<Uint8Array>
608
+ createBatch(req?: CreateBatchRequest | undefined | null): Promise<BatchObject>
609
+ retrieveBatch(batchId: string): Promise<BatchObject>
610
+ listBatches(query?: BatchListQuery | undefined | null): Promise<BatchListResponse>
611
+ cancelBatch(batchId: string): Promise<BatchObject>
612
+ fetchBatchForPolling(batchId: string): Promise<BatchObject>
613
+ /**
614
+ * Poll a batch until it reaches a terminal status (Completed, Failed, Expired, Cancelled).
615
+ *
616
+ * Uses exponential backoff with configurable initial interval, maximum interval, and backoff multiplier.
617
+ * Optionally supports a timeout that aborts polling if exceeded.
618
+ * @throws Returns `BatchWaitError::Failed` if the batch reaches a failure terminal status.
619
+ * Returns `BatchWaitError::Timeout` if the configured timeout is exceeded.
620
+ * Returns `BatchWaitError::Client` for underlying client errors.
621
+ */
622
+ waitForBatch(batchId: string, config?: WaitForBatchConfig | undefined | null): Promise<BatchObject>
623
+ createResponse(req?: CreateResponseRequest | undefined | null): Promise<ResponseObject>
624
+ retrieveResponse(responseId: string): Promise<ResponseObject>
625
+ cancelResponse(responseId: string): Promise<ResponseObject>
671
626
  }
672
627
 
673
628
  /** Response from a delete operation. */
674
629
  export interface DeleteResponse {
675
630
  /** ID of the deleted resource. */
676
- id?: string;
631
+ readonly id?: string
677
632
  /** Object type. */
678
- object?: string;
633
+ readonly object?: string
679
634
  /** Confirmation that the resource was deleted. */
680
- deleted?: boolean;
635
+ readonly deleted?: boolean
681
636
  }
682
637
 
683
638
  /** Developer message (system-like message for Claude models). */
684
639
  export interface DeveloperMessage {
685
640
  /** Developer-specific instructions or context. */
686
- content?: string;
641
+ readonly content?: string
687
642
  /** Optional name for the developer message source. */
688
- name?: string;
643
+ readonly name?: string
689
644
  }
690
645
 
691
646
  /** PDF/document content part for vision-capable models. */
692
647
  export interface DocumentContent {
693
648
  /** Base64-encoded document data or URL. */
694
- data?: string;
649
+ readonly data?: string
695
650
  /** MIME type (e.g., "application/pdf", "text/csv"). */
696
- mediaType?: string;
651
+ readonly mediaType?: string
697
652
  }
698
653
 
699
654
  /** The format in which the embedding vectors are returned. */
700
- export declare const enum EmbeddingFormat {
655
+ export declare enum EmbeddingFormat {
701
656
  /** 32-bit floating-point numbers (default). */
702
657
  Float = "float",
703
658
  /** Base64-encoded string representation of the floats. */
704
659
  Base64 = "base64",
705
660
  }
706
661
 
662
+ /** Text or texts to embed. */
663
+ export declare enum EmbeddingInput {
664
+ /** Single text string. */
665
+ Single = "Single",
666
+ /** Multiple text strings (batch embedding). */
667
+ Multiple = "Multiple",
668
+ }
669
+
707
670
  /** A single embedding vector. */
708
671
  export interface EmbeddingObject {
709
672
  /**
710
673
  * Always `"embedding"` from OpenAI-compatible APIs. Stored as a plain
711
674
  * `String` so non-standard provider values do not break deserialization.
712
675
  */
713
- object: string;
676
+ readonly object: string
714
677
  /** The embedding vector. */
715
- embedding: Array<number>;
678
+ readonly embedding: Array<number>
716
679
  /** Index in the batch (corresponds to input order). */
717
- index: number;
680
+ readonly index: number
718
681
  }
719
682
 
720
683
  /** Embedding request. */
721
684
  export interface EmbeddingRequest {
722
685
  /** Model ID (e.g., `"text-embedding-3-small"`). */
723
- model?: string;
686
+ readonly model?: string
724
687
  /** Text or texts to embed. */
725
- input?: JsEmbeddingInput;
688
+ readonly input?: EmbeddingInput
726
689
  /** Output format: float (native) or base64. */
727
- encodingFormat?: JsEmbeddingFormat;
690
+ readonly encodingFormat?: EmbeddingFormat
728
691
  /** Requested embedding dimensions (if supported by the model). */
729
- dimensions?: number;
692
+ readonly dimensions?: number
730
693
  /** User identifier for request tracking. */
731
- user?: string;
694
+ readonly user?: string
732
695
  }
733
696
 
734
697
  /** Embedding response. */
@@ -737,89 +700,69 @@ export interface EmbeddingResponse {
737
700
  * Always `"list"` from OpenAI-compatible APIs. Stored as a plain
738
701
  * `String` so non-standard provider values do not break deserialization.
739
702
  */
740
- object: string;
703
+ readonly object: string
741
704
  /** List of embeddings. */
742
- data: Array<JsEmbeddingObject>;
705
+ readonly data: Array<EmbeddingObject>
743
706
  /** Model used to generate embeddings. */
744
- model: string;
707
+ readonly model: string
745
708
  /** Token usage (input tokens only; embeddings have zero output tokens). */
746
- usage?: Usage;
709
+ readonly usage?: Usage
747
710
  }
748
711
 
749
712
  /** How budget limits are enforced. */
750
- export declare const enum Enforcement {
713
+ export declare enum Enforcement {
751
714
  /**
752
715
  * Reject requests that would exceed the budget with
753
- * `LiterLlmError.BudgetExceeded`.
716
+ * [`LiterLlmError::BudgetExceeded`].
754
717
  */
755
718
  Hard = "Hard",
756
719
  /**
757
- * Allow requests through but emit a `tracing.warn!` when the budget is
720
+ * Allow requests through but emit a `tracing::warn!` when the budget is
758
721
  * exceeded.
759
722
  */
760
723
  Soft = "Soft",
761
724
  }
762
725
 
763
- /**
764
- * Install the `ring` crypto provider as the rustls process default, idempotently.
765
- *
766
- * rustls 0.23+ removed the implicit default provider. This function installs
767
- * `ring` once per process. Subsequent calls are no-ops. Calling it from a
768
- * downstream Rust app that has already installed `aws-lc-rs` is safe — the
769
- * `Err` from `install_default()` is silently ignored.
770
- *
771
- * Called automatically by every internal `reqwest.Client` constructor
772
- * (auth providers, default HTTP client). Bindings and downstream consumers
773
- * reach those constructors transitively, so no manual init is required.
774
- *
775
- * WASM builds are exempt — the WASM target uses the browser/Node.js fetch
776
- * API instead of rustls, so no crypto provider is needed.
777
- *
778
- * Windows builds use native-tls (SChannel) via reqwest, so rustls is not
779
- * present and no crypto provider installation is needed.
780
- */
781
- export declare function ensureCryptoProvider(): void;
782
-
783
726
  /** Query parameters for listing files. */
784
727
  export interface FileListQuery {
785
728
  /** Filter by file purpose (e.g., `"batch"`, `"fine-tune"`). */
786
- purpose?: string;
729
+ readonly purpose?: string
787
730
  /** Maximum number of results to return. Defaults to 20. */
788
- limit?: number;
731
+ readonly limit?: number
789
732
  /** Pagination cursor: return results after this file ID. */
790
- after?: string;
733
+ readonly after?: string
791
734
  }
792
735
 
793
736
  /** Response from listing files. */
794
737
  export interface FileListResponse {
795
738
  /** Object type (always `"list"`). */
796
- object?: string;
739
+ readonly object?: string
797
740
  /** List of file objects. */
798
- data?: Array<FileObject>;
741
+ readonly data?: Array<FileObject>
799
742
  /** Whether more results are available. */
800
- hasMore?: boolean;
743
+ readonly hasMore?: boolean
801
744
  }
802
745
 
803
746
  /** An uploaded file object. */
804
747
  export interface FileObject {
805
748
  /** Unique file ID. */
806
- id?: string;
749
+ readonly id?: string
807
750
  /** Object type (always `"file"`). */
808
- object?: string;
751
+ readonly object?: string
809
752
  /** File size in bytes. */
810
- bytes?: number;
753
+ readonly bytes?: number
811
754
  /** Unix timestamp of file creation. */
812
- createdAt?: number;
755
+ readonly createdAt?: number
813
756
  /** Filename. */
814
- filename?: string;
757
+ readonly filename?: string
815
758
  /** File purpose. */
816
- purpose?: string;
759
+ readonly purpose?: string
817
760
  /** Processing status (e.g., `"uploaded"`, `"processed"`). */
818
- status?: string;
761
+ readonly status?: string
819
762
  }
820
763
 
821
764
  /** Purpose of an uploaded file. */
822
- export declare const enum FilePurpose {
765
+ export declare enum FilePurpose {
823
766
  /** File for use with Assistants API. */
824
767
  Assistants = "assistants",
825
768
  /** File for batch processing. */
@@ -831,7 +774,7 @@ export declare const enum FilePurpose {
831
774
  }
832
775
 
833
776
  /** Why a choice stopped generating tokens. */
834
- export declare const enum FinishReason {
777
+ export declare enum FinishReason {
835
778
  Stop = "stop",
836
779
  Length = "length",
837
780
  ToolCalls = "tool_calls",
@@ -853,31 +796,48 @@ export declare const enum FinishReason {
853
796
  /** Function call details. */
854
797
  export interface FunctionCall {
855
798
  /** Function name. */
856
- name: string;
857
- /** Arguments as a JSON string (parse with serde_json.from_str). */
858
- arguments: string;
799
+ readonly name: string
800
+ /** Arguments as a JSON string (parse with serde_json::from_str). */
801
+ readonly arguments: string
859
802
  }
860
803
 
861
804
  /** Function definition exposed to the model. */
862
805
  export interface FunctionDefinition {
863
806
  /** Name of the function. Required and must be alphanumeric + underscores. */
864
- name: string;
807
+ readonly name: string
865
808
  /** Human-readable description explaining what the function does. */
866
- description?: string;
809
+ readonly description?: string
867
810
  /** JSON Schema defining the function's parameters. */
868
- parameters?: any;
811
+ readonly parameters?: JsonValue
869
812
  /** If true, enforce strict JSON schema validation for arguments. */
870
- strict?: boolean;
813
+ readonly strict?: boolean
871
814
  }
872
815
 
873
816
  /** Deprecated legacy function-role message body. */
874
817
  export interface FunctionMessage {
875
- content?: string;
876
- name?: string;
818
+ readonly content?: string
819
+ readonly name?: string
820
+ }
821
+
822
+ /**
823
+ * Abstraction over a health probe strategy.
824
+ *
825
+ * Implementors issue a lightweight probe against `upstream` (typically a
826
+ * provider base URL or named identifier) and report [`HealthStatus`].
827
+ */
828
+ export interface HealthChecker {
829
+ /**
830
+ * Probe `upstream` and return its current [`HealthStatus`].
831
+ *
832
+ * The parameter is taken by value (`String`) so that implementations can
833
+ * move it into the returned future without a clone, making the
834
+ * `'static + Send` bound on the future trivially satisfiable.
835
+ */
836
+ check(upstream: string): Promise<string>
877
837
  }
878
838
 
879
839
  /** The result of a single health probe. */
880
- export declare const enum HealthStatus {
840
+ export declare enum HealthStatus {
881
841
  /** The probe succeeded; the upstream is reachable. */
882
842
  Healthy = "Healthy",
883
843
  /** The probe failed; the upstream may be down. */
@@ -887,15 +847,15 @@ export declare const enum HealthStatus {
887
847
  /** A single generated image, returned as either a URL or base64 data. */
888
848
  export interface Image {
889
849
  /** Image URL (if response_format was "url"). */
890
- url?: string;
850
+ readonly url?: string
891
851
  /** Base64-encoded image data (if response_format was "b64_json"). */
892
- b64Json?: string;
852
+ readonly b64Json?: string
893
853
  /** The final prompt used to generate the image (DALL-E 3). */
894
- revisedPrompt?: string;
854
+ readonly revisedPrompt?: string
895
855
  }
896
856
 
897
857
  /** Image detail level controlling token cost and processing. */
898
- export declare const enum ImageDetail {
858
+ export declare enum ImageDetail {
899
859
  /** Low detail: scales image to 512x512, uses fewer tokens. */
900
860
  Low = "low",
901
861
  /** High detail: processes up to 2x2 grid of tiles, higher token cost. */
@@ -907,65 +867,63 @@ export declare const enum ImageDetail {
907
867
  /** Response containing generated images. */
908
868
  export interface ImagesResponse {
909
869
  /** Unix timestamp of image creation. */
910
- created?: number;
870
+ readonly created?: number
911
871
  /** List of generated images. */
912
- data?: Array<JsImage>;
872
+ readonly data?: Array<Image>
913
873
  }
914
874
 
915
875
  /** An image URL reference with optional detail level for processing. */
916
876
  export interface ImageUrl {
917
877
  /** URL of the image (data URI or HTTP/HTTPS URL). */
918
- url?: string;
878
+ readonly url?: string
919
879
  /** Detail level: low (512x512), high (2x2 tiles), or auto (model-selected). */
920
- detail?: JsImageDetail;
880
+ readonly detail?: ImageDetail
921
881
  }
922
882
 
923
883
  /** An intent prototype: `(intent_name, prototype_embedding, target_model_id)`. */
924
884
  export interface IntentPrototype {
925
885
  /** Human-readable name for the intent (used in logs/metrics). */
926
- name: string;
886
+ readonly name: string
927
887
  /** Pre-computed embedding vector for this intent. */
928
- embedding: Array<number>;
888
+ readonly embedding: Array<number>
929
889
  /** Model to route to when this intent is detected. */
930
- model: string;
890
+ readonly model: string
931
891
  }
932
892
 
933
893
  /** JSON Schema specification for constrained output. */
934
894
  export interface JsonSchemaFormat {
935
895
  /** Name of the schema (must be unique in the request). */
936
- name?: string;
896
+ readonly name?: string
937
897
  /** Description of what the schema represents. */
938
- description?: string;
898
+ readonly description?: string
939
899
  /** JSON Schema object defining the output structure. */
940
- schema?: any;
900
+ readonly schema?: JsonValue
941
901
  /** If true, enforce strict schema validation. */
942
- strict?: boolean;
902
+ readonly strict?: boolean
943
903
  }
944
904
 
945
905
  /** A chat message in a conversation. */
946
- export interface Message {
947
- role: string;
948
- system?: SystemMessage;
949
- user?: UserMessage;
950
- assistant?: AssistantMessage;
951
- tool?: ToolMessage;
952
- developer?: DeveloperMessage;
953
- function?: FunctionMessage;
954
- }
906
+ export type Message =
907
+ | { role: 'system'; 0: SystemMessage }
908
+ | { role: 'user'; 0: UserMessage }
909
+ | { role: 'assistant'; 0: AssistantMessage }
910
+ | { role: 'tool'; 0: ToolMessage }
911
+ | { role: 'developer'; 0: DeveloperMessage }
912
+ | { role: 'function'; 0: FunctionMessage }
955
913
 
956
914
  /** A model available from the API. */
957
915
  export interface ModelObject {
958
916
  /** Model ID (e.g., `"gpt-4o"`, `"claude-3-5-sonnet"`). */
959
- id?: string;
917
+ readonly id?: string
960
918
  /**
961
919
  * Always `"model"` from OpenAI-compatible APIs. Stored as a plain
962
920
  * `String` so non-standard provider values do not break deserialization.
963
921
  */
964
- object?: string;
922
+ readonly object?: string
965
923
  /** Unix timestamp of model creation (or release date). */
966
- created?: number;
924
+ readonly created?: number
967
925
  /** Organization or entity that owns the model. */
968
- ownedBy?: string;
926
+ readonly ownedBy?: string
969
927
  }
970
928
 
971
929
  /** Response listing available models. */
@@ -974,162 +932,167 @@ export interface ModelsListResponse {
974
932
  * Always `"list"` from OpenAI-compatible APIs. Stored as a plain
975
933
  * `String` so non-standard provider values do not break deserialization.
976
934
  */
977
- object?: string;
935
+ readonly object?: string
978
936
  /** List of available models. */
979
- data?: Array<JsModelObject>;
937
+ readonly data?: Array<ModelObject>
980
938
  }
981
939
 
982
940
  /** Boolean flags for each moderation category. */
983
941
  export interface ModerationCategories {
984
942
  /** Sexual content. */
985
- sexual?: boolean;
943
+ readonly sexual?: boolean
986
944
  /** Hate speech. */
987
- hate?: boolean;
945
+ readonly hate?: boolean
988
946
  /** Harassment. */
989
- harassment?: boolean;
947
+ readonly harassment?: boolean
990
948
  /** Self-harm content. */
991
- "self-harm"?: boolean;
949
+ readonly selfHarm?: boolean
992
950
  /** Sexual content involving minors. */
993
- "sexual/minors"?: boolean;
951
+ readonly sexualMinors?: boolean
994
952
  /** Hate speech that threatens violence. */
995
- "hate/threatening"?: boolean;
953
+ readonly hateThreatening?: boolean
996
954
  /** Graphic violence. */
997
- "violence/graphic"?: boolean;
955
+ readonly violenceGraphic?: boolean
998
956
  /** Intent to self-harm. */
999
- "self-harm/intent"?: boolean;
957
+ readonly selfHarmIntent?: boolean
1000
958
  /** Instructions for self-harm. */
1001
- "self-harm/instructions"?: boolean;
959
+ readonly selfHarmInstructions?: boolean
1002
960
  /** Harassment that threatens violence. */
1003
- "harassment/threatening"?: boolean;
961
+ readonly harassmentThreatening?: boolean
1004
962
  /** Non-graphic violence. */
1005
- violence?: boolean;
963
+ readonly violence?: boolean
1006
964
  }
1007
965
 
1008
966
  /** Confidence scores for each moderation category. */
1009
967
  export interface ModerationCategoryScores {
1010
968
  /** Sexual content score. */
1011
- sexual?: number;
969
+ readonly sexual?: number
1012
970
  /** Hate speech score. */
1013
- hate?: number;
971
+ readonly hate?: number
1014
972
  /** Harassment score. */
1015
- harassment?: number;
973
+ readonly harassment?: number
1016
974
  /** Self-harm content score. */
1017
- "self-harm"?: number;
975
+ readonly selfHarm?: number
1018
976
  /** Sexual content involving minors score. */
1019
- "sexual/minors"?: number;
977
+ readonly sexualMinors?: number
1020
978
  /** Hate speech that threatens violence score. */
1021
- "hate/threatening"?: number;
979
+ readonly hateThreatening?: number
1022
980
  /** Graphic violence score. */
1023
- "violence/graphic"?: number;
981
+ readonly violenceGraphic?: number
1024
982
  /** Intent to self-harm score. */
1025
- "self-harm/intent"?: number;
983
+ readonly selfHarmIntent?: number
1026
984
  /** Instructions for self-harm score. */
1027
- "self-harm/instructions"?: number;
985
+ readonly selfHarmInstructions?: number
1028
986
  /** Harassment that threatens violence score. */
1029
- "harassment/threatening"?: number;
987
+ readonly harassmentThreatening?: number
1030
988
  /** Non-graphic violence score. */
1031
- violence?: number;
989
+ readonly violence?: number
990
+ }
991
+
992
+ /** Input to the moderation endpoint — a single string or multiple strings. */
993
+ export declare enum ModerationInput {
994
+ /** Single text string. */
995
+ Single = "Single",
996
+ /** Multiple text strings (batch moderation). */
997
+ Multiple = "Multiple",
1032
998
  }
1033
999
 
1034
1000
  /** Request to classify content for policy violations. */
1035
1001
  export interface ModerationRequest {
1036
1002
  /** Text or texts to check. */
1037
- input?: JsModerationInput;
1003
+ readonly input?: ModerationInput
1038
1004
  /** Model ID (e.g., `"text-moderation-latest"`). Optional; API uses default if unset. */
1039
- model?: string;
1005
+ readonly model?: string
1040
1006
  }
1041
1007
 
1042
1008
  /** Response from the moderation endpoint. */
1043
1009
  export interface ModerationResponse {
1044
1010
  /** Unique identifier for this moderation request. */
1045
- id: string;
1011
+ readonly id: string
1046
1012
  /** Model used for classification. */
1047
- model: string;
1013
+ readonly model: string
1048
1014
  /** Results for each input string. */
1049
- results: Array<JsModerationResult>;
1015
+ readonly results: Array<ModerationResult>
1050
1016
  }
1051
1017
 
1052
1018
  /** A single moderation classification result. */
1053
1019
  export interface ModerationResult {
1054
1020
  /** True if any category was flagged. */
1055
- flagged: boolean;
1021
+ readonly flagged: boolean
1056
1022
  /** Boolean flags for each moderation category. */
1057
- categories: JsModerationCategories;
1023
+ readonly categories: ModerationCategories
1058
1024
  /** Confidence scores for each category. */
1059
- categoryScores: JsModerationCategoryScores;
1025
+ readonly categoryScores: ModerationCategoryScores
1060
1026
  }
1061
1027
 
1062
1028
  /** Document input for OCR — either a URL or inline base64 data. */
1063
- export interface OcrDocument {
1064
- type: string;
1065
- url?: string;
1066
- data?: string;
1067
- mediaType?: string;
1068
- }
1029
+ export type OcrDocument =
1030
+ | { type: 'document_url'; url: string }
1031
+ | { type: 'base64'; data: string; mediaType: string }
1069
1032
 
1070
1033
  /** An image extracted from an OCR page. */
1071
1034
  export interface OcrImage {
1072
1035
  /** Unique image identifier within the document. */
1073
- id: string;
1036
+ readonly id: string
1074
1037
  /** Base64-encoded image data (if `include_image_base64` was true). */
1075
- imageBase64?: string;
1038
+ readonly imageBase64?: string
1076
1039
  }
1077
1040
 
1078
1041
  /** A single page of OCR output. */
1079
1042
  export interface OcrPage {
1080
1043
  /** Page index (0-based). */
1081
- index: number;
1044
+ readonly index: number
1082
1045
  /** Extracted page content as Markdown. */
1083
- markdown: string;
1046
+ readonly markdown: string
1084
1047
  /** Embedded images extracted from the page (if `include_image_base64` was true). */
1085
- images?: Array<JsOcrImage>;
1048
+ readonly images?: Array<OcrImage>
1086
1049
  /** Page dimensions in pixels, if available. */
1087
- dimensions?: JsPageDimensions;
1050
+ readonly dimensions?: PageDimensions
1088
1051
  }
1089
1052
 
1090
1053
  /** An OCR request. */
1091
1054
  export interface OcrRequest {
1092
1055
  /** The model/provider to use (e.g. `"mistral/mistral-ocr-latest"`). */
1093
- model?: string;
1056
+ readonly model?: string
1094
1057
  /** The document to process (URL or base64). */
1095
- document?: JsOcrDocument;
1058
+ readonly document?: OcrDocument
1096
1059
  /** Specific pages to process (1-indexed). `None` means all pages. */
1097
- pages?: Array<number>;
1060
+ readonly pages?: Array<number>
1098
1061
  /** Whether to include base64-encoded images of each processed page. */
1099
- includeImageBase64?: boolean;
1062
+ readonly includeImageBase64?: boolean
1100
1063
  }
1101
1064
 
1102
1065
  /** An OCR response. */
1103
1066
  export interface OcrResponse {
1104
1067
  /** Extracted pages in order. */
1105
- pages: Array<JsOcrPage>;
1068
+ readonly pages: Array<OcrPage>
1106
1069
  /** Model/provider used for OCR. */
1107
- model: string;
1070
+ readonly model: string
1108
1071
  /** Token usage, if reported by the provider. */
1109
- usage?: Usage;
1072
+ readonly usage?: Usage
1110
1073
  }
1111
1074
 
1112
1075
  /** Page dimensions in pixels. */
1113
1076
  export interface PageDimensions {
1114
1077
  /** Width in pixels. */
1115
- width: number;
1078
+ readonly width: number
1116
1079
  /** Height in pixels. */
1117
- height: number;
1080
+ readonly height: number
1118
1081
  }
1119
1082
 
1120
1083
  /**
1121
1084
  * Breakdown of tokens used in the prompt portion of a request.
1122
1085
  *
1123
- * `cached_tokens` is included in `Usage.prompt_tokens` — it is *not* an
1086
+ * `cached_tokens` is included in `Usage::prompt_tokens` — it is *not* an
1124
1087
  * additional charge on top of the prompt token count. When pricing supports
1125
1088
  * a `cache_read_input_token_cost`, the cached portion is billed at the
1126
1089
  * discounted rate and the remainder at the regular input rate.
1127
1090
  */
1128
1091
  export interface PromptTokensDetails {
1129
1092
  /** Cached tokens present in the prompt. Defaults to 0 when absent. */
1130
- cachedTokens?: number;
1093
+ readonly cachedTokens?: number
1131
1094
  /** Audio input tokens present in the prompt. Defaults to 0 when absent. */
1132
- audioTokens?: number;
1095
+ readonly audioTokens?: number
1133
1096
  }
1134
1097
 
1135
1098
  /**
@@ -1143,250 +1106,273 @@ export interface PromptTokensDetails {
1143
1106
  *
1144
1107
  * All flags default to `false` so that newly added providers are safe.
1145
1108
  *
1146
- * Access via the crate-level `capabilities` function:
1109
+ * Access via the crate-level [`capabilities`] function:
1110
+ *
1111
+ * ```rust
1112
+ * use liter_llm::capabilities;
1113
+ *
1114
+ * let caps = capabilities("openai");
1115
+ * assert!(caps.function_calling);
1116
+ * assert!(caps.vision);
1117
+ *
1118
+ * // Unknown providers return a default-all-false reference.
1119
+ * let unknown = capabilities("my-private-model");
1120
+ * assert!(!unknown.function_calling);
1121
+ * ```
1147
1122
  */
1148
1123
  export interface ProviderCapabilities {
1149
1124
  /** The provider accepts image input in chat messages. */
1150
- vision?: boolean;
1125
+ readonly vision?: boolean
1151
1126
  /** The provider supports extended-thinking / reasoning tokens. */
1152
- reasoning?: boolean;
1127
+ readonly reasoning?: boolean
1153
1128
  /** The provider supports JSON-mode or `response_format` structured output. */
1154
- structuredOutput?: boolean;
1129
+ readonly structuredOutput?: boolean
1155
1130
  /** The provider supports tool / function calling. */
1156
- functionCalling?: boolean;
1131
+ readonly functionCalling?: boolean
1157
1132
  /** The provider accepts audio as input. */
1158
- audioIn?: boolean;
1133
+ readonly audioIn?: boolean
1159
1134
  /** The provider can generate audio / TTS output. */
1160
- audioOut?: boolean;
1135
+ readonly audioOut?: boolean
1161
1136
  /** The provider accepts video as input. */
1162
- videoIn?: boolean;
1137
+ readonly videoIn?: boolean
1163
1138
  }
1164
1139
 
1165
1140
  /**
1166
1141
  * Static configuration for a single provider entry in providers.json.
1167
1142
  *
1168
1143
  * This struct deliberately does not include capability flags or streaming
1169
- * format, which are accessed via the `capabilities` function. Keeping
1144
+ * format, which are accessed via the [`capabilities`] function. Keeping
1170
1145
  * these fields separate preserves backward compatibility with all generated
1171
1146
  * binding code that constructs `ProviderConfig` using struct literal syntax.
1172
1147
  */
1173
1148
  export interface ProviderConfig {
1174
1149
  /** Provider identifier (matches the entry key in providers.json). */
1175
- name: string;
1150
+ readonly name: string
1176
1151
  /** Human-readable provider name shown in UIs. */
1177
- displayName?: string;
1152
+ readonly displayName?: string
1178
1153
  /** Base URL used as the default for this provider's HTTP client. */
1179
- baseUrl?: string;
1154
+ readonly baseUrl?: string
1180
1155
  /** Authentication scheme metadata (auth type + env var holding the key). */
1181
- auth?: JsAuthConfig;
1156
+ readonly auth?: AuthConfig
1182
1157
  /** Supported endpoint kinds (e.g. `chat`, `embeddings`). */
1183
- endpoints?: Array<string>;
1158
+ readonly endpoints?: Array<string>
1184
1159
  /** Model-name prefixes claimed by this provider (e.g. `["gpt-", "o1-"]`). */
1185
- modelPrefixes?: Array<string>;
1160
+ readonly modelPrefixes?: Array<string>
1186
1161
  /**
1187
1162
  * Parameter key renaming for this provider.
1188
1163
  *
1189
1164
  * Each entry maps an OpenAI-spec field name (e.g. `"max_completion_tokens"`)
1190
1165
  * to the name this provider expects (e.g. `"max_tokens"`). Applied
1191
- * automatically by `ConfigDrivenProvider.transform_request`.
1166
+ * automatically by [`ConfigDrivenProvider::transform_request`].
1192
1167
  */
1193
- paramMappings?: Record<string, string>;
1168
+ readonly paramMappings?: Record<string, string>
1194
1169
  }
1195
1170
 
1196
1171
  /** Configuration for per-model rate limits. */
1197
1172
  export interface RateLimitConfig {
1198
1173
  /** Maximum requests per window. `None` means unlimited. */
1199
- rpm?: number;
1174
+ readonly rpm?: number
1200
1175
  /** Maximum tokens per window. `None` means unlimited. */
1201
- tpm?: number;
1176
+ readonly tpm?: number
1202
1177
  /** Fixed window duration (defaults to 60 s). */
1203
- window?: number;
1178
+ readonly window?: number
1204
1179
  }
1205
1180
 
1206
- export declare function rateLimitConfigDefault(): RateLimitConfig;
1207
-
1208
1181
  /** Controls how much reasoning effort the model should use. */
1209
- export declare const enum ReasoningEffort {
1182
+ export declare enum ReasoningEffort {
1210
1183
  Low = "low",
1211
1184
  Medium = "medium",
1212
1185
  High = "high",
1213
1186
  }
1214
1187
 
1215
- /**
1216
- * Register a custom provider in the global runtime registry.
1217
- *
1218
- * The provider will be checked **before** all built-in providers during model
1219
- * detection. If a provider with the same `name` already exists it is replaced.
1220
- *
1221
- * # Errors
1222
- *
1223
- * Returns an error if the config is invalid (empty name, empty base_url, or
1224
- * no model prefixes).
1225
- */
1226
- export declare function registerCustomProvider(config: CustomProviderConfig): void;
1188
+ /** A document to be reranked — either a plain string or an object with a text field. */
1189
+ export declare enum RerankDocument {
1190
+ /** Plain text document content. */
1191
+ Text = "Text",
1192
+ /** Document with explicit text field (may include metadata). */
1193
+ Object = "Object",
1194
+ }
1227
1195
 
1228
1196
  /** Request to rerank documents by relevance to a query. */
1229
1197
  export interface RerankRequest {
1230
1198
  /** Model ID (e.g., `"cohere/rerank-english-v3.0"`). */
1231
- model?: string;
1199
+ readonly model?: string
1232
1200
  /** The search query. */
1233
- query?: string;
1201
+ readonly query?: string
1234
1202
  /** Documents to rerank. */
1235
- documents?: Array<JsRerankDocument>;
1203
+ readonly documents?: Array<RerankDocument>
1236
1204
  /** Return only the top N results. Optional. */
1237
- topN?: number;
1205
+ readonly topN?: number
1238
1206
  /** Include the document content in results. Defaults to false. */
1239
- returnDocuments?: boolean;
1207
+ readonly returnDocuments?: boolean
1240
1208
  }
1241
1209
 
1242
1210
  /** Response from the rerank endpoint. */
1243
1211
  export interface RerankResponse {
1244
1212
  /** Unique identifier for this rerank request. */
1245
- id?: string;
1213
+ readonly id?: string
1246
1214
  /** Reranked documents in order of relevance. */
1247
- results: Array<JsRerankResult>;
1215
+ readonly results: Array<RerankResult>
1248
1216
  /** Optional metadata about the reranking operation. */
1249
- meta?: any;
1217
+ readonly meta?: JsonValue
1250
1218
  }
1251
1219
 
1252
1220
  /** A single reranked document with its relevance score. */
1253
1221
  export interface RerankResult {
1254
1222
  /** Original document index in the input list. */
1255
- index: number;
1223
+ readonly index: number
1256
1224
  /** Relevance score in `[0, 1]`. Higher indicates more relevant. */
1257
- relevanceScore: number;
1225
+ readonly relevanceScore: number
1258
1226
  /** Original document content (if `return_documents` was true). */
1259
- document?: JsRerankResultDocument;
1227
+ readonly document?: RerankResultDocument
1260
1228
  }
1261
1229
 
1262
1230
  /** The text content of a reranked document, returned when `return_documents` is true. */
1263
1231
  export interface RerankResultDocument {
1264
1232
  /** Document text. */
1265
- text: string;
1233
+ readonly text: string
1266
1234
  }
1267
1235
 
1268
1236
  /** Response format constraint. */
1269
- export interface ResponseFormat {
1270
- type: string;
1271
- jsonSchema?: JsonSchemaFormat;
1272
- }
1237
+ export type ResponseFormat =
1238
+ | { type: 'text' }
1239
+ | { type: 'json_object' }
1240
+ | { type: 'json_schema'; jsonSchema: JsonSchemaFormat }
1273
1241
 
1274
1242
  /** Response from a structured response request. */
1275
1243
  export interface ResponseObject {
1276
1244
  /** Unique response ID. */
1277
- id?: string;
1245
+ readonly id?: string
1278
1246
  /** Object type (e.g., `"response"`). */
1279
- object?: string;
1247
+ readonly object?: string
1280
1248
  /** Unix timestamp of response creation. */
1281
- createdAt?: number;
1249
+ readonly createdAt?: number
1282
1250
  /** Model used to generate the response. */
1283
- model?: string;
1251
+ readonly model?: string
1284
1252
  /** Status (e.g., `"succeeded"`, `"failed"`). */
1285
- status?: string;
1253
+ readonly status?: string
1286
1254
  /** Output items from the response. */
1287
- output?: Array<JsResponseOutputItem>;
1255
+ readonly output?: Array<ResponseOutputItem>
1288
1256
  /** Token usage. */
1289
- usage?: JsResponseUsage;
1257
+ readonly usage?: ResponseUsage
1290
1258
  /** Error details (if status is "failed"). */
1291
- error?: any;
1259
+ readonly error?: JsonValue
1292
1260
  }
1293
1261
 
1294
1262
  /** A single output item from the response. */
1295
1263
  export interface ResponseOutputItem {
1296
1264
  /** Output type (e.g., `"text"`, `"object"`, `"error"`). */
1297
- type?: string;
1265
+ readonly itemType?: string
1298
1266
  /** Output content (flattened into the object). */
1299
- content?: any;
1267
+ readonly content?: JsonValue
1300
1268
  }
1301
1269
 
1302
1270
  /** A tool available for the response request. */
1303
1271
  export interface ResponseTool {
1304
1272
  /** Tool type (e.g., "extractor", "search"). */
1305
- type?: string;
1273
+ readonly toolType?: string
1306
1274
  /** Tool configuration (flattened into the object). */
1307
- config?: any;
1275
+ readonly config?: JsonValue
1308
1276
  }
1309
1277
 
1310
1278
  /** Token usage for a response. */
1311
1279
  export interface ResponseUsage {
1312
1280
  /** Input tokens used. */
1313
- inputTokens?: number;
1281
+ readonly inputTokens?: number
1314
1282
  /** Output tokens used. */
1315
- outputTokens?: number;
1283
+ readonly outputTokens?: number
1316
1284
  /** Total tokens used. */
1317
- totalTokens?: number;
1285
+ readonly totalTokens?: number
1318
1286
  }
1319
1287
 
1320
1288
  /** A search request. */
1321
1289
  export interface SearchRequest {
1322
1290
  /** The model/provider to use (e.g. `"brave/web-search"`, `"tavily/search"`). */
1323
- model?: string;
1291
+ readonly model?: string
1324
1292
  /** The search query string. */
1325
- query?: string;
1293
+ readonly query?: string
1326
1294
  /** Maximum number of results to return. */
1327
- maxResults?: number;
1295
+ readonly maxResults?: number
1328
1296
  /** Domain filter — restrict results to specific domains. */
1329
- searchDomainFilter?: Array<string>;
1297
+ readonly searchDomainFilter?: Array<string>
1330
1298
  /** Country code for localized results (ISO 3166-1 alpha-2, e.g., `"US"`, `"FR"`). */
1331
- country?: string;
1299
+ readonly country?: string
1332
1300
  }
1333
1301
 
1334
1302
  /** A search response. */
1335
1303
  export interface SearchResponse {
1336
1304
  /** List of search results. */
1337
- results: Array<JsSearchResult>;
1305
+ readonly results: Array<SearchResult>
1338
1306
  /** Model/provider that performed the search. */
1339
- model: string;
1307
+ readonly model: string
1340
1308
  }
1341
1309
 
1342
1310
  /** An individual search result. */
1343
1311
  export interface SearchResult {
1344
1312
  /** Result title. */
1345
- title: string;
1313
+ readonly title: string
1346
1314
  /** Result URL. */
1347
- url: string;
1315
+ readonly url: string
1348
1316
  /** Text snippet or excerpt from the page. */
1349
- snippet: string;
1317
+ readonly snippet: string
1350
1318
  /** Publication or last-updated date, if available. */
1351
- date?: string;
1319
+ readonly date?: string
1320
+ }
1321
+
1322
+ /**
1323
+ * The value broadcast from a singleflight leader to all followers.
1324
+ *
1325
+ * `Arc<LiterLlmError>` is used because `LiterLlmError` is not `Clone` and
1326
+ * broadcast channels require `T: Clone`. The `Arc` adds only a reference-count
1327
+ * bump per follower, which is negligible under the burst loads this layer targets.
1328
+ */
1329
+ export declare class SingleflightResult {
1352
1330
  }
1353
1331
 
1354
1332
  /** Name of the specific function to invoke. */
1355
1333
  export interface SpecificFunction {
1356
1334
  /** Function name. */
1357
- name?: string;
1335
+ readonly name?: string
1358
1336
  }
1359
1337
 
1360
1338
  /** Directive to call a specific tool. */
1361
1339
  export interface SpecificToolChoice {
1362
1340
  /** Tool type (always "function"). */
1363
- type?: JsToolType;
1341
+ readonly choiceType?: ToolType
1364
1342
  /** The specific function to invoke. */
1365
- function?: JsSpecificFunction;
1343
+ readonly function?: SpecificFunction
1344
+ }
1345
+
1346
+ /** Stop sequence(s) that cause the model to stop generating. */
1347
+ export declare enum StopSequence {
1348
+ /** Single stop sequence. */
1349
+ Single = "Single",
1350
+ /** Multiple stop sequences. */
1351
+ Multiple = "Multiple",
1366
1352
  }
1367
1353
 
1368
1354
  /** A streaming choice with incremental delta. */
1369
1355
  export interface StreamChoice {
1370
1356
  /** Index of this choice in the choices array. */
1371
- index?: number;
1357
+ readonly index?: number
1372
1358
  /** Incremental update to the message (content, tool calls, etc.). */
1373
- delta?: JsStreamDelta;
1359
+ readonly delta?: StreamDelta
1374
1360
  /** Why the stream ended (present only in final chunk). */
1375
- finishReason?: JsFinishReason;
1361
+ readonly finishReason?: FinishReason
1376
1362
  }
1377
1363
 
1378
1364
  /** Incremental delta in a stream chunk. */
1379
1365
  export interface StreamDelta {
1380
1366
  /** Role (typically present only in the first chunk). */
1381
- role?: string;
1367
+ readonly role?: string
1382
1368
  /** Partial content chunk (e.g., a few words of the response). */
1383
- content?: string;
1369
+ readonly content?: string
1384
1370
  /** Partial tool calls being streamed. */
1385
- toolCalls?: Array<JsStreamToolCall>;
1371
+ readonly toolCalls?: Array<StreamToolCall>
1386
1372
  /** Deprecated legacy function_call delta; retained for API compatibility. */
1387
- functionCall?: JsStreamFunctionCall;
1373
+ readonly functionCall?: StreamFunctionCall
1388
1374
  /** Partial refusal message. */
1389
- refusal?: string;
1375
+ readonly refusal?: string
1390
1376
  }
1391
1377
 
1392
1378
  /**
@@ -1395,9 +1381,9 @@ export interface StreamDelta {
1395
1381
  * Most providers use standard Server-Sent Events (SSE). AWS Bedrock uses
1396
1382
  * a proprietary binary EventStream framing.
1397
1383
  *
1398
- * Deserialized from the `streaming_format` JSON field via `serde`.
1384
+ * Deserialized from the `streaming_format` JSON field via [`serde`].
1399
1385
  */
1400
- export declare const enum StreamFormat {
1386
+ export declare enum StreamFormat {
1401
1387
  /** Standard Server-Sent Events (text/event-stream). */
1402
1388
  Sse = "sse",
1403
1389
  /** AWS EventStream binary framing (application/vnd.amazon.eventstream). */
@@ -1407,49 +1393,57 @@ export declare const enum StreamFormat {
1407
1393
  /** Partial function call details in a stream. */
1408
1394
  export interface StreamFunctionCall {
1409
1395
  /** Function name (typically in the first chunk). */
1410
- name?: string;
1396
+ readonly name?: string
1411
1397
  /** Partial JSON arguments chunk. */
1412
- arguments?: string;
1398
+ readonly arguments?: string
1413
1399
  }
1414
1400
 
1415
1401
  /** Options for streaming responses. */
1416
1402
  export interface StreamOptions {
1417
1403
  /** If true, include token usage in the final stream chunk. */
1418
- includeUsage?: boolean;
1404
+ readonly includeUsage?: boolean
1419
1405
  }
1420
1406
 
1421
1407
  /** A streaming tool call being built incrementally. */
1422
1408
  export interface StreamToolCall {
1423
1409
  /** Index of this tool call in the tool_calls array. */
1424
- index?: number;
1410
+ readonly index?: number
1425
1411
  /** Tool call ID (typically in the first chunk for this call). */
1426
- id?: string;
1412
+ readonly id?: string
1427
1413
  /** Tool type (typically "function"). */
1428
- type?: JsToolType;
1414
+ readonly callType?: ToolType
1429
1415
  /** Partial function name and arguments. */
1430
- function?: JsStreamFunctionCall;
1416
+ readonly function?: StreamFunctionCall
1431
1417
  }
1432
1418
 
1433
1419
  /** System message guiding model behavior for the entire conversation. */
1434
1420
  export interface SystemMessage {
1435
1421
  /** Instructions or context that apply throughout the conversation. */
1436
- content?: string;
1422
+ readonly content?: string
1437
1423
  /** Optional name for the system message source. */
1438
- name?: string;
1424
+ readonly name?: string
1439
1425
  }
1440
1426
 
1441
1427
  /** A tool call the model wants to execute. */
1442
1428
  export interface ToolCall {
1443
1429
  /** Unique ID for this call, used to reference in tool result messages. */
1444
- id: string;
1430
+ readonly id: string
1445
1431
  /** Tool type (always "function"). */
1446
- type: JsToolType;
1432
+ readonly callType: ToolType
1447
1433
  /** Function name and arguments. */
1448
- function: JsFunctionCall;
1434
+ readonly function: FunctionCall
1435
+ }
1436
+
1437
+ /** Tool usage mode or a specific tool to call. */
1438
+ export declare enum ToolChoice {
1439
+ /** Predefined mode: auto, required, or none. */
1440
+ Mode = "Mode",
1441
+ /** Force a specific tool to be called. */
1442
+ Specific = "Specific",
1449
1443
  }
1450
1444
 
1451
1445
  /** Tool choice mode. */
1452
- export declare const enum ToolChoiceMode {
1446
+ export declare enum ToolChoiceMode {
1453
1447
  /** Model may or may not call tools; default behavior. */
1454
1448
  Auto = "auto",
1455
1449
  /** Model must call at least one tool. */
@@ -1461,11 +1455,11 @@ export declare const enum ToolChoiceMode {
1461
1455
  /** Tool execution result returned to the model. */
1462
1456
  export interface ToolMessage {
1463
1457
  /** Result of the tool execution. */
1464
- content?: string;
1458
+ readonly content?: string
1465
1459
  /** ID of the tool call this result responds to. */
1466
- toolCallId?: string;
1460
+ readonly toolCallId?: string
1467
1461
  /** Optional tool/function name. */
1468
- name?: string;
1462
+ readonly name?: string
1469
1463
  }
1470
1464
 
1471
1465
  /**
@@ -1475,68 +1469,64 @@ export interface ToolMessage {
1475
1469
  * that constraint at the type level and rejects any other value on
1476
1470
  * deserialization.
1477
1471
  */
1478
- export declare const enum ToolType {
1472
+ export declare enum ToolType {
1479
1473
  Function = "function",
1480
1474
  }
1481
1475
 
1482
1476
  /** Response from a transcription request. */
1483
1477
  export interface TranscriptionResponse {
1484
1478
  /** The transcribed text. */
1485
- text?: string;
1479
+ readonly text?: string
1486
1480
  /** Detected language (ISO-639-1 code). */
1487
- language?: string;
1481
+ readonly language?: string
1488
1482
  /** Total audio duration in seconds. */
1489
- duration?: number;
1483
+ readonly duration?: number
1490
1484
  /** Detailed segment-level transcription (if response_format is "verbose_json"). */
1491
- segments?: Array<JsTranscriptionSegment>;
1485
+ readonly segments?: Array<TranscriptionSegment>
1492
1486
  }
1493
1487
 
1494
1488
  /** A segment of transcribed audio with timing information. */
1495
1489
  export interface TranscriptionSegment {
1496
1490
  /** Segment index (0-based). */
1497
- id?: number;
1491
+ readonly id?: number
1498
1492
  /** Start time in seconds. */
1499
- start?: number;
1493
+ readonly start?: number
1500
1494
  /** End time in seconds. */
1501
- end?: number;
1495
+ readonly end?: number
1502
1496
  /** Transcribed text for this segment. */
1503
- text?: string;
1497
+ readonly text?: string
1504
1498
  }
1505
1499
 
1506
- /**
1507
- * Remove a previously registered custom provider by name.
1508
- *
1509
- * Returns `true` if a provider with the given name was found and removed,
1510
- * `false` if no such provider existed.
1511
- *
1512
- * # Errors
1513
- *
1514
- * Returns an error only if the internal lock is poisoned.
1515
- */
1516
- export declare function unregisterCustomProvider(name: string): boolean;
1517
-
1518
1500
  /** Token-usage accounting returned by the provider on each completion / embedding call. */
1519
1501
  export interface Usage {
1520
1502
  /** Prompt tokens used. Defaults to 0 when absent (some providers omit this). */
1521
- promptTokens?: number;
1503
+ readonly promptTokens?: number
1522
1504
  /** Completion tokens used. Defaults to 0 when absent (e.g. embedding responses). */
1523
- completionTokens?: number;
1505
+ readonly completionTokens?: number
1524
1506
  /** Total tokens used. Defaults to 0 when absent (some providers omit this). */
1525
- totalTokens?: number;
1507
+ readonly totalTokens?: number
1526
1508
  /**
1527
1509
  * Breakdown of tokens used in the prompt, including cached tokens served
1528
1510
  * at the provider's discounted cache-read rate. Absent when the provider
1529
1511
  * does not return prompt-token details.
1530
1512
  */
1531
- promptTokensDetails?: JsPromptTokensDetails;
1513
+ readonly promptTokensDetails?: PromptTokensDetails
1514
+ }
1515
+
1516
+ /** User message content as either plain text or a list of multimodal parts. */
1517
+ export declare enum UserContent {
1518
+ /** Plain text content. */
1519
+ Text = "Text",
1520
+ /** Array of content parts (text, images, documents, audio). */
1521
+ Parts = "Parts",
1532
1522
  }
1533
1523
 
1534
1524
  /** User message in the conversation. */
1535
1525
  export interface UserMessage {
1536
1526
  /** Message content as plain text or array of content parts (text, images, documents, audio). */
1537
- content?: JsUserContent;
1527
+ readonly content?: UserContent
1538
1528
  /** Optional name for the user. */
1539
- name?: string;
1529
+ readonly name?: string
1540
1530
  }
1541
1531
 
1542
1532
  /**
@@ -1547,13 +1537,41 @@ export interface UserMessage {
1547
1537
  */
1548
1538
  export interface WaitForBatchConfig {
1549
1539
  /** Initial interval between polls, in seconds. */
1550
- initialIntervalSecs?: number;
1540
+ readonly initialIntervalSecs?: number
1551
1541
  /** Maximum interval between polls (backoff plateau), in seconds. */
1552
- maxIntervalSecs?: number;
1542
+ readonly maxIntervalSecs?: number
1553
1543
  /** Exponential backoff multiplier (e.g., 1.5 increases delay by 50% each poll). */
1554
- backoffMultiplier?: number;
1544
+ readonly backoffMultiplier?: number
1555
1545
  /** Optional timeout in seconds — polling fails if this duration is exceeded. */
1556
- timeoutSecs?: number;
1546
+ readonly timeoutSecs?: number
1557
1547
  }
1558
1548
 
1559
- export declare function waitForBatchConfigDefault(): WaitForBatchConfig;
1549
+ /**
1550
+ * Register a custom provider in the global runtime registry.
1551
+ *
1552
+ * The provider will be checked **before** all built-in providers during model
1553
+ * detection. If a provider with the same `name` already exists it is replaced.
1554
+ * @throws Returns an error if the config is invalid (empty name, empty base_url, or
1555
+ * no model prefixes).
1556
+ */
1557
+ export declare function registerCustomProvider(config: CustomProviderConfig): void;
1558
+
1559
+ /**
1560
+ * Remove a previously registered custom provider by name.
1561
+ *
1562
+ * Returns `true` if a provider with the given name was found and removed,
1563
+ * `false` if no such provider existed.
1564
+ * @throws Returns an error only if the internal lock is poisoned.
1565
+ */
1566
+ export declare function unregisterCustomProvider(name: string): boolean;
1567
+
1568
+ export declare class ChatStreamIterator {
1569
+ next(value?: undefined): Promise<IteratorResult<ChatCompletionChunk, void>>
1570
+ [Symbol.asyncIterator](): AsyncGenerator<ChatCompletionChunk, void, undefined>
1571
+ }
1572
+
1573
+ export declare class LiterLlmErrorInfo {
1574
+ statusCode(): number
1575
+ isTransient(): boolean
1576
+ errorType(): string
1577
+ }