@kreuzberg/liter-llm-node 1.6.0 → 1.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.d.ts CHANGED
@@ -1,139 +1,215 @@
1
- /* auto-generated by NAPI-RS */
1
+ // This file is auto-generated by alef — DO NOT EDIT.
2
+ // alef:hash:797e09398ae0b95dd0e3de94d7374eedafcd20d08532c7cf378cbcd09e3083a7
3
+ // To regenerate: alef generate
4
+ // To verify freshness: alef verify --exit-code
2
5
  /* eslint-disable */
6
+
7
+ export type JsonValue =
8
+ | string
9
+ | number
10
+ | boolean
11
+ | null
12
+ | JsonValue[]
13
+ | { [key: string]: JsonValue };
14
+
3
15
  /**
4
- * This type implements JavaScript's async iterable protocol.
5
- * It can be used with `for await...of` loops.
16
+ * Return all provider configs from the registry.
6
17
  *
7
- * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Iteration_protocols#the_async_iterator_and_async_iterable_protocols
18
+ * Useful for tooling, documentation generation, or runtime enumeration.
19
+ * Returns the public [`ProviderConfig`] slice (without capability flags).
20
+ * To query capability flags for a specific provider use [`capabilities`].
8
21
  */
9
- export declare class ChatStreamIterator {}
22
+ export declare function allProviders(): Array<ProviderConfig>;
10
23
 
11
24
  /**
12
- * Default client implementation backed by `reqwest`.
25
+ * Return the capability flags for a named provider.
13
26
  *
14
- * Sends requests to 140+ LLM providers with automatic provider detection
15
- * and per-request routing. The provider is resolved at construction time
16
- * from `model_hint` (or defaults to OpenAI), but individual requests can
17
- * override the provider via model name prefix (e.g. `"anthropic/claude-3-5-sonnet"`
18
- * routes to Anthropic regardless of construction-time setting).
27
+ * Performs an O(n) linear scan over the embedded registry (143 entries).
28
+ * Returns an owned value so that bindings can box/copy it across the FFI
29
+ * boundary without dealing with lifetimes. `ProviderCapabilities` is `Copy`,
30
+ * so this is a cheap memcpy of seven `bool` fields.
19
31
  *
20
- * When the model prefix does not match any known provider, the construction-time
21
- * provider is used as the fallback. This enables seamless migration between
22
- * providers by changing only the model name.
32
+ * For unknown `provider_name` values the function returns an all-`false`
33
+ * sentinel so callers never need to handle `Option`.
34
+ */
35
+ export declare function capabilities(providerName: string): ProviderCapabilities;
36
+
37
+ /**
38
+ * Assert that `current_len + incoming` does not exceed `limit`.
23
39
  *
24
- * The provider is stored behind an `Arc` so it can be shared cheaply into
25
- * async closures and streaming tasks. Pre-computed auth headers and extra
26
- * headers are cached at construction to avoid redundant encoding on every request.
40
+ * Call this before appending `incoming` bytes to any buffer that must
41
+ * stay below `limit`. Returns `Err(LiterLlmError::Streaming)` on overflow
42
+ * and emits a `tracing::warn!` with context.
27
43
  */
28
- export declare class DefaultClient {
29
- chat(req: ChatCompletionRequest): Promise<ChatCompletionResponse>;
30
- chatStream(req: ChatCompletionRequest): Promise<ChatStreamIterator>;
31
- embed(req: EmbeddingRequest): Promise<EmbeddingResponse>;
32
- listModels(): Promise<ModelsListResponse>;
33
- imageGenerate(req: CreateImageRequest): Promise<ImagesResponse>;
34
- speech(req: CreateSpeechRequest): Promise<Buffer>;
35
- transcribe(req: CreateTranscriptionRequest): Promise<TranscriptionResponse>;
36
- moderate(req: ModerationRequest): Promise<ModerationResponse>;
37
- rerank(req: RerankRequest): Promise<RerankResponse>;
38
- search(req: SearchRequest): Promise<SearchResponse>;
39
- ocr(req: OcrRequest): Promise<OcrResponse>;
40
- createFile(req: CreateFileRequest): Promise<FileObject>;
41
- retrieveFile(fileId: string): Promise<FileObject>;
42
- deleteFile(fileId: string): Promise<DeleteResponse>;
43
- listFiles(query?: FileListQuery | undefined | null): Promise<FileListResponse>;
44
- fileContent(fileId: string): Promise<Buffer>;
45
- createBatch(req: CreateBatchRequest): Promise<BatchObject>;
46
- retrieveBatch(batchId: string): Promise<BatchObject>;
47
- listBatches(query?: BatchListQuery | undefined | null): Promise<BatchListResponse>;
48
- cancelBatch(batchId: string): Promise<BatchObject>;
49
- fetchBatchForPolling(batchId: string): Promise<BatchObject>;
50
- /**
51
- * Poll a batch until it reaches a terminal status (Completed, Failed, Expired, Cancelled).
52
- *
53
- * Uses exponential backoff with configurable initial interval, maximum interval, and backoff multiplier.
54
- * Optionally supports a timeout that aborts polling if exceeded.
55
- *
56
- * # Errors
57
- *
58
- * Returns `BatchWaitError.Failed` if the batch reaches a failure terminal status.
59
- * Returns `BatchWaitError.Timeout` if the configured timeout is exceeded.
60
- * Returns `BatchWaitError.Client` for underlying client errors.
61
- *
62
- * # Example
63
- */
64
- waitForBatch(batchId: string, config: WaitForBatchConfig): Promise<BatchObject>;
65
- createResponse(req: CreateResponseRequest): Promise<ResponseObject>;
66
- retrieveResponse(responseId: string): Promise<ResponseObject>;
67
- cancelResponse(responseId: string): Promise<ResponseObject>;
68
- }
69
- export type JsDefaultClient = DefaultClient;
44
+ export declare function checkBound(
45
+ context: string,
46
+ currentLen: number,
47
+ incoming: number,
48
+ limit: number,
49
+ ): void;
70
50
 
71
- export declare class JsLiterLlmErrorInfo {
72
- statusCode: number;
73
- isTransient: boolean;
74
- errorType: string;
75
- /** HTTP status code for this error (0 means no associated status). */
76
- statusCode(): number;
77
- /** Returns `true` if the error is transient and a retry may succeed. */
78
- isTransient(): boolean;
79
- /** Machine-readable error category string for matching and logging. */
80
- errorType(): string;
81
- }
51
+ /**
52
+ * Remove all guardrails from the global registry.
53
+ *
54
+ * Primarily useful in tests to reset state between test cases.
55
+ */
56
+ export declare function clear(): void;
82
57
 
83
58
  /**
84
- * The value broadcast from a singleflight leader to all followers.
59
+ * Calculate the estimated cost of a completion given a model name and token
60
+ * counts.
85
61
  *
86
- * `Arc<LiterLlmError>` is used because `LiterLlmError` is not `Clone` and
87
- * broadcast channels require `T: Clone`. The `Arc` adds only a reference-count
88
- * bump per follower, which is negligible under the burst loads this layer targets.
62
+ * Returns `None` if the model is not present in the embedded pricing registry.
63
+ * Returns `Some(cost_usd)` otherwise, where the value is in US dollars.
64
+ *
65
+ * When an exact model name match is not found, progressively shorter prefixes
66
+ * are tried by stripping from the last `-` or `.` separator. For example,
67
+ * `gpt-4-0613` will match `gpt-4` if no `gpt-4-0613` entry exists.
89
68
  */
90
- export declare class SingleflightResult {}
91
- export type JsSingleflightResult = SingleflightResult;
69
+ export declare function completionCost(
70
+ model: string,
71
+ promptTokens: number,
72
+ completionTokens: number,
73
+ ): number | null;
92
74
 
93
75
  /**
94
- * Return all provider configs from the registry.
76
+ * Calculate the estimated cost of a completion, accounting for cached
77
+ * (cache-hit) prompt tokens billed at the provider's discounted rate.
95
78
  *
96
- * Useful for tooling, documentation generation, or runtime enumeration.
97
- * Returns the public `ProviderConfig` slice (without capability flags).
98
- * To query capability flags for a specific provider use `capabilities`.
79
+ * `cached_tokens` is the count of prompt tokens served from the provider's
80
+ * prompt cache. It must be `<= prompt_tokens` (cached tokens are a subset of
81
+ * the prompt). The non-cached portion is billed at `input_cost_per_token`
82
+ * and the cached portion at `cache_read_input_token_cost` when the model
83
+ * has cache pricing; otherwise the entire prompt is billed at the regular
84
+ * input rate.
85
+ *
86
+ * Returns `None` if the model is not present in the embedded pricing
87
+ * registry, mirroring [`completion_cost`].
99
88
  */
100
- export declare function allProviders(): Array<ProviderConfig>;
89
+ export declare function completionCostWithCache(
90
+ model: string,
91
+ promptTokens: number,
92
+ cachedTokens: number,
93
+ completionTokens: number,
94
+ ): number | null;
95
+
96
+ /**
97
+ * Return the set of complex provider names.
98
+ *
99
+ * Complex providers require custom auth/routing logic beyond simple bearer
100
+ * tokens (e.g. AWS Bedrock SigV4, Vertex AI OAuth2).
101
+ *
102
+ * The returned reference points into the static registry — no allocation.
103
+ */
104
+ export declare function complexProviderNames(): Array<string>;
105
+
106
+ /**
107
+ * Count tokens for a full [`ChatCompletionRequest`].
108
+ *
109
+ * Sums tokens across all message text contents plus a per-message overhead
110
+ * of ~4 tokens (for role, separators, and formatting metadata). Tool
111
+ * definitions and multimodal content parts (images, audio, documents) are
112
+ * not counted — only textual content contributes to the token total.
113
+ * @throws Returns [`LiterLlmError::BadRequest`] if the tokenizer cannot be loaded or
114
+ * if tokenization fails for any message.
115
+ */
116
+ export declare function countRequestTokens(
117
+ model: string,
118
+ req?: ChatCompletionRequest | undefined | null,
119
+ ): number;
120
+
121
+ /**
122
+ * Count tokens in a text string using the tokenizer for the given model.
123
+ *
124
+ * The tokenizer is resolved from the model name prefix (e.g. `"gpt-4o"` maps
125
+ * to the `Xenova/gpt-4o` HuggingFace tokenizer). Tokenizers are cached after
126
+ * first load.
127
+ * @throws Returns [`LiterLlmError::BadRequest`] if the tokenizer cannot be loaded
128
+ * (e.g. network failure on first use) or if tokenization itself fails.
129
+ */
130
+ export declare function countTokens(model: string, text: string): number;
131
+
132
+ /**
133
+ * Create a new LLM client with simple scalar configuration.
134
+ *
135
+ * This is the primary binding entry-point. All parameters except `api_key`
136
+ * are optional — omitting them uses the same defaults as
137
+ * [`ClientConfigBuilder`].
138
+ * @throws Returns [`LiterLlmError`] if the underlying HTTP client cannot be
139
+ * constructed, or if the resolved provider configuration is invalid.
140
+ */
141
+ export declare function createClient(
142
+ apiKey: string,
143
+ baseUrl?: string | undefined | null,
144
+ timeoutSecs?: number | undefined | null,
145
+ maxRetries?: number | undefined | null,
146
+ modelHint?: string | undefined | null,
147
+ ): DefaultClient;
148
+
149
+ /**
150
+ * Create a new LLM client from a JSON string.
151
+ *
152
+ * The JSON object accepts the same fields as `liter-llm.toml` (snake_case).
153
+ * @throws Returns [`LiterLlmError::BadRequest`] if `json` is not valid JSON or
154
+ * contains unknown fields.
155
+ */
156
+ export declare function createClientFromJson(json: string): DefaultClient;
157
+
158
+ /**
159
+ * Install the `ring` crypto provider as the rustls process default, idempotently.
160
+ *
161
+ * rustls 0.23+ removed the implicit default provider. This function installs
162
+ * `ring` once per process. Subsequent calls are no-ops. Calling it from a
163
+ * downstream Rust app that has already installed `aws-lc-rs` is safe — the
164
+ * `Err` from `install_default()` is silently ignored.
165
+ *
166
+ * Called automatically by every internal `reqwest::Client` constructor
167
+ * (auth providers, default HTTP client). Bindings and downstream consumers
168
+ * reach those constructors transitively, so no manual init is required.
169
+ *
170
+ * WASM builds are exempt — the WASM target uses the browser/Node.js fetch
171
+ * API instead of rustls, so no crypto provider is needed.
172
+ *
173
+ * Windows builds use native-tls (SChannel) via reqwest, so rustls is not
174
+ * present and no crypto provider installation is needed.
175
+ */
176
+ export declare function ensureCryptoProvider(): void;
101
177
 
102
178
  /** Assistant's response to a user message. */
103
179
  export interface AssistantMessage {
104
180
  /** The assistant's text response. Absent if tool calls are returned instead. */
105
- content?: string;
181
+ readonly content?: string;
106
182
  /** Optional name for the assistant. */
107
- name?: string;
183
+ readonly name?: string;
108
184
  /** Tool calls the model wants to execute, if any. */
109
- toolCalls?: Array<JsToolCall>;
185
+ readonly toolCalls?: Array<ToolCall>;
110
186
  /** Refusal reason, if the model declined to respond per safety policies. */
111
- refusal?: string;
187
+ readonly refusal?: string;
112
188
  /** Deprecated legacy function_call field; retained for API compatibility. */
113
- functionCall?: JsFunctionCall;
189
+ readonly functionCall?: FunctionCall;
114
190
  }
115
191
 
116
192
  /** Audio content part for speech-capable models. */
117
193
  export interface AudioContent {
118
194
  /** Base64-encoded audio data. */
119
- data?: string;
195
+ readonly data?: string;
120
196
  /** Audio format (e.g., "wav", "mp3", "ogg"). */
121
- format?: string;
197
+ readonly format?: string;
122
198
  }
123
199
 
124
200
  /** Auth configuration block. */
125
201
  export interface AuthConfig {
126
202
  /** Auth scheme classification. */
127
- type: JsAuthType;
203
+ readonly authType: AuthType;
128
204
  /**
129
205
  * Name of the environment variable that holds the API key (e.g. `"OPENAI_API_KEY"`).
130
206
  * Holds the variable name, never the secret value.
131
207
  */
132
- envVar?: string;
208
+ readonly envVar?: string;
133
209
  }
134
210
 
135
211
  /** How the API key is sent in the HTTP request. */
136
- export declare const enum AuthHeaderFormat {
212
+ export declare enum AuthHeaderFormat {
137
213
  /** Bearer token: `Authorization: Bearer <key>` */
138
214
  Bearer = "Bearer",
139
215
  /** Custom header: e.g., `X-Api-Key: <key>` */
@@ -143,7 +219,7 @@ export declare const enum AuthHeaderFormat {
143
219
  }
144
220
 
145
221
  /** Auth scheme used by a provider. */
146
- export declare const enum AuthType {
222
+ export declare enum AuthType {
147
223
  /** Standard `Authorization: Bearer <key>` header. */
148
224
  Bearer = "bearer",
149
225
  /** `x-api-key: <key>` header (also handles `"header"` and `"x-api-key"` aliases). */
@@ -157,69 +233,69 @@ export declare const enum AuthType {
157
233
  /** Query parameters for listing batches. */
158
234
  export interface BatchListQuery {
159
235
  /** Maximum number of results to return. Defaults to 20. */
160
- limit?: number;
236
+ readonly limit?: number;
161
237
  /** Pagination cursor: return results after this batch ID. */
162
- after?: string;
238
+ readonly after?: string;
163
239
  }
164
240
 
165
241
  /** Response from listing batches. */
166
242
  export interface BatchListResponse {
167
243
  /** Object type (always `"list"`). */
168
- object?: string;
244
+ readonly object?: string;
169
245
  /** List of batch objects. */
170
- data?: Array<BatchObject>;
246
+ readonly data?: Array<BatchObject>;
171
247
  /** Whether more results are available. */
172
- hasMore?: boolean;
248
+ readonly hasMore?: boolean;
173
249
  /** First batch ID in the result set (for pagination). */
174
- firstId?: string;
250
+ readonly firstId?: string;
175
251
  /** Last batch ID in the result set (for pagination). */
176
- lastId?: string;
252
+ readonly lastId?: string;
177
253
  }
178
254
 
179
255
  /** A batch job object. */
180
256
  export interface BatchObject {
181
257
  /** Unique batch ID. */
182
- id?: string;
258
+ readonly id?: string;
183
259
  /** Object type (always `"batch"`). */
184
- object?: string;
260
+ readonly object?: string;
185
261
  /** API endpoint (e.g., `"/v1/chat/completions"`). */
186
- endpoint?: string;
262
+ readonly endpoint?: string;
187
263
  /** ID of the input file. */
188
- inputFileId?: string;
264
+ readonly inputFileId?: string;
189
265
  /** Completion window (e.g., `"24h"`). */
190
- completionWindow?: string;
266
+ readonly completionWindow?: string;
191
267
  /** Current job status. */
192
- status?: JsBatchStatus;
268
+ readonly status?: BatchStatus;
193
269
  /** ID of the output file (present when completed). */
194
- outputFileId?: string;
270
+ readonly outputFileId?: string;
195
271
  /** ID of the error file (present if some requests failed). */
196
- errorFileId?: string;
272
+ readonly errorFileId?: string;
197
273
  /** Unix timestamp of batch creation. */
198
- createdAt?: number;
274
+ readonly createdAt?: number;
199
275
  /** Unix timestamp of completion (if completed). */
200
- completedAt?: number;
276
+ readonly completedAt?: number;
201
277
  /** Unix timestamp of failure (if failed). */
202
- failedAt?: number;
278
+ readonly failedAt?: number;
203
279
  /** Unix timestamp of expiration (if expired). */
204
- expiredAt?: number;
280
+ readonly expiredAt?: number;
205
281
  /** Request processing counts. */
206
- requestCounts?: JsBatchRequestCounts;
282
+ readonly requestCounts?: BatchRequestCounts;
207
283
  /** Metadata attached to the batch. */
208
- metadata?: any;
284
+ readonly metadata?: JsonValue;
209
285
  }
210
286
 
211
287
  /** Request processing counts for a batch. */
212
288
  export interface BatchRequestCounts {
213
289
  /** Total requests in the batch. */
214
- total?: number;
290
+ readonly total?: number;
215
291
  /** Completed requests. */
216
- completed?: number;
292
+ readonly completed?: number;
217
293
  /** Failed requests. */
218
- failed?: number;
294
+ readonly failed?: number;
219
295
  }
220
296
 
221
297
  /** Status of a batch job. */
222
- export declare const enum BatchStatus {
298
+ export declare enum BatchStatus {
223
299
  /** Validating the input file. */
224
300
  Validating = "validating",
225
301
  /** Job failed. */
@@ -241,191 +317,172 @@ export declare const enum BatchStatus {
241
317
  /** Configuration for budget enforcement. */
242
318
  export interface BudgetConfig {
243
319
  /** Maximum total spend across all models, in USD. `None` means unlimited. */
244
- globalLimit?: number;
320
+ readonly globalLimit?: number;
245
321
  /**
246
322
  * Per-model spending limits in USD. Models not listed here are only
247
323
  * constrained by `global_limit`.
248
324
  */
249
- modelLimits?: Record<string, number>;
325
+ readonly modelLimits?: Record<string, number>;
250
326
  /** Whether to reject requests or merely warn when a limit is exceeded. */
251
- enforcement?: JsEnforcement;
327
+ readonly enforcement?: Enforcement;
252
328
  }
253
329
 
254
- export declare function budgetConfigDefault(): BudgetConfig;
255
-
256
330
  /** Storage backend for the response cache. */
257
- export interface CacheBackend {
258
- type: string;
259
- scheme?: string;
260
- config?: Record<string, string>;
261
- }
331
+ export type CacheBackend =
332
+ | { type: "memory" }
333
+ | { type: "open_dal"; scheme: string; config: Record<string, string> };
262
334
 
263
335
  /** Configuration for the response cache. */
264
336
  export interface CacheConfig {
265
337
  /** Maximum number of cached entries. */
266
- maxEntries?: number;
338
+ readonly maxEntries?: number;
267
339
  /** Time-to-live for each cached entry. */
268
- ttl?: number;
340
+ readonly ttl?: number;
269
341
  /** Storage backend to use. */
270
- backend?: JsCacheBackend;
342
+ readonly backend?: CacheBackend;
271
343
  }
272
344
 
273
- export declare function cacheConfigDefault(): CacheConfig;
274
-
275
- /**
276
- * Return the capability flags for a named provider.
277
- *
278
- * Performs an O(n) linear scan over the embedded registry (142 entries).
279
- * Returns an owned value so that bindings can box/copy it across the FFI
280
- * boundary without dealing with lifetimes. `ProviderCapabilities` is `Copy`,
281
- * so this is a cheap memcpy of seven `bool` fields.
282
- *
283
- * For unknown `provider_name` values the function returns an all-`false`
284
- * sentinel so callers never need to handle `Option`.
285
- */
286
- export declare function capabilities(providerName: string): ProviderCapabilities;
287
-
288
345
  /** A streamed chunk of a chat completion response. */
289
346
  export interface ChatCompletionChunk {
290
347
  /** Unique identifier for this stream. */
291
- id?: string;
348
+ readonly id?: string;
292
349
  /**
293
350
  * Always `"chat.completion.chunk"` from OpenAI-compatible APIs. Stored
294
351
  * as a plain `String` so non-standard provider values do not fail parsing.
295
352
  */
296
- object?: string;
353
+ readonly object?: string;
297
354
  /** Unix timestamp of chunk creation. */
298
- created?: number;
355
+ readonly created?: number;
299
356
  /** Model used to generate the chunk. */
300
- model?: string;
357
+ readonly model?: string;
301
358
  /** Streaming choices (delta updates). */
302
- choices?: Array<JsStreamChoice>;
359
+ readonly choices?: Array<StreamChoice>;
303
360
  /** Token usage (typically only in the final chunk). */
304
- usage?: Usage;
361
+ readonly usage?: Usage;
305
362
  /** Fingerprint of the system configuration (OpenAI-specific). */
306
- systemFingerprint?: string;
363
+ readonly systemFingerprint?: string;
307
364
  /** Service tier used (OpenAI-specific). */
308
- serviceTier?: string;
365
+ readonly serviceTier?: string;
309
366
  }
310
367
 
311
368
  /** Chat completion request (compatible with OpenAI and similar APIs). */
312
369
  export interface ChatCompletionRequest {
313
370
  /** Model ID (e.g., `"gpt-4o-mini"`, `"claude-3-5-sonnet"`). */
314
- model?: string;
371
+ readonly model?: string;
315
372
  /** Conversation history from oldest to newest. */
316
- messages?: Array<JsMessage>;
373
+ readonly messages?: Array<Message>;
317
374
  /** Sampling temperature in `[0.0, 2.0]`. Higher increases randomness. Defaults to 1.0. */
318
- temperature?: number;
375
+ readonly temperature?: number;
319
376
  /** Nucleus sampling parameter in `[0.0, 1.0]`. Lower is more focused. */
320
- topP?: number;
377
+ readonly topP?: number;
321
378
  /** Number of chat completions to generate. Defaults to 1. */
322
- n?: number;
379
+ readonly n?: number;
323
380
  /**
324
381
  * Whether to stream the response.
325
382
  *
326
383
  * Managed by the client layer — do not set directly.
327
384
  */
328
- stream?: boolean;
385
+ readonly stream?: boolean;
329
386
  /** Stop sequence(s) that halt token generation. */
330
- stop?: JsStopSequence;
387
+ readonly stop?: StopSequence;
331
388
  /** Max output tokens. Different from max_completion_tokens in some providers. */
332
- maxTokens?: number;
389
+ readonly maxTokens?: number;
333
390
  /** Presence penalty in `[-2.0, 2.0]`. Positive discourages repeated topics. */
334
- presencePenalty?: number;
391
+ readonly presencePenalty?: number;
335
392
  /** Frequency penalty in `[-2.0, 2.0]`. Positive discourages repeated tokens. */
336
- frequencyPenalty?: number;
393
+ readonly frequencyPenalty?: number;
337
394
  /**
338
395
  * Token bias map. Uses `BTreeMap` (sorted keys) for deterministic
339
396
  * serialization order — important when hashing or signing requests.
340
397
  */
341
- logitBias?: Record<string, number>;
398
+ readonly logitBias?: Record<string, number>;
342
399
  /** User identifier for request tracking and abuse detection. */
343
- user?: string;
400
+ readonly user?: string;
344
401
  /** Tools the model can invoke. */
345
- tools?: Array<ChatCompletionTool>;
402
+ readonly tools?: Array<ChatCompletionTool>;
346
403
  /** Tool usage mode (auto, required, none, or specific tool). */
347
- toolChoice?: JsToolChoice;
404
+ readonly toolChoice?: ToolChoice;
348
405
  /** Whether the model can call multiple tools in parallel. Defaults to true. */
349
- parallelToolCalls?: boolean;
406
+ readonly parallelToolCalls?: boolean;
350
407
  /** Output format constraint (text, JSON, JSON schema). */
351
- responseFormat?: JsResponseFormat;
408
+ readonly responseFormat?: ResponseFormat;
352
409
  /** Streaming options (e.g., include_usage). */
353
- streamOptions?: JsStreamOptions;
410
+ readonly streamOptions?: StreamOptions;
354
411
  /** Random seed for reproducible outputs. Provider support varies. */
355
- seed?: number;
412
+ readonly seed?: number;
356
413
  /** Reasoning effort level (low, medium, high) for extended-thinking models. */
357
- reasoningEffort?: JsReasoningEffort;
414
+ readonly reasoningEffort?: ReasoningEffort;
358
415
  /**
359
416
  * Provider-specific extra parameters merged into the request body.
360
417
  * Use for guardrails, safety settings, grounding config, etc.
361
418
  */
362
- extraBody?: any;
419
+ readonly extraBody?: JsonValue;
363
420
  }
364
421
 
365
422
  /** Chat completion response from the API. */
366
423
  export interface ChatCompletionResponse {
367
424
  /** Unique identifier for this response. */
368
- id?: string;
425
+ readonly id?: string;
369
426
  /**
370
427
  * Always `"chat.completion"` from OpenAI-compatible APIs. Stored as a
371
428
  * plain `String` so non-standard provider values do not break deserialization.
372
429
  */
373
- object?: string;
430
+ readonly object?: string;
374
431
  /** Unix timestamp of response creation. */
375
- created?: number;
432
+ readonly created?: number;
376
433
  /** Model used to generate the response. */
377
- model?: string;
434
+ readonly model?: string;
378
435
  /** List of completion choices. */
379
- choices?: Array<JsChoice>;
436
+ readonly choices?: Array<Choice>;
380
437
  /** Token usage statistics. */
381
- usage?: Usage;
438
+ readonly usage?: Usage;
382
439
  /** Fingerprint of the system configuration (OpenAI-specific). */
383
- systemFingerprint?: string;
440
+ readonly systemFingerprint?: string;
384
441
  /** Service tier used (OpenAI-specific). */
385
- serviceTier?: string;
442
+ readonly serviceTier?: string;
386
443
  }
387
444
 
388
445
  /** A tool the model can invoke (currently, all tools are functions). */
389
446
  export interface ChatCompletionTool {
390
447
  /** Tool type (always "function" in OpenAI spec). */
391
- type: JsToolType;
448
+ readonly toolType: ToolType;
392
449
  /** Function definition with name, description, and JSON schema parameters. */
393
- function: JsFunctionDefinition;
450
+ readonly function: FunctionDefinition;
394
451
  }
395
452
 
396
- export declare function chatStream(
397
- engine: DefaultClient,
398
- model: string,
399
- ): Promise<ChatStreamIterator>;
400
-
401
- /**
402
- * Assert that `current_len + incoming` does not exceed `limit`.
403
- *
404
- * Call this before appending `incoming` bytes to any buffer that must
405
- * stay below `limit`. Returns `Err(LiterLlmError.Streaming)` on overflow
406
- * and emits a `tracing.warn!` with context.
407
- *
408
- * # Example
409
- */
410
- export declare function checkBound(
411
- context: string,
412
- currentLen: number,
413
- incoming: number,
414
- limit: number,
415
- ): void;
416
-
417
453
  /** A single completion choice. */
418
454
  export interface Choice {
419
455
  /** Index of this choice in the choices array. */
420
- index?: number;
456
+ readonly index?: number;
421
457
  /** The assistant's message response. */
422
- message?: AssistantMessage;
458
+ readonly message?: AssistantMessage;
423
459
  /** Why the model stopped generating (stop, length, tool_calls, content_filter, etc.). */
424
- finishReason?: JsFinishReason;
460
+ readonly finishReason?: FinishReason;
461
+ }
462
+
463
+ /**
464
+ * A per-chunk transformation in the [`StreamPipeline`].
465
+ *
466
+ * Each middleware receives a typed chunk and returns `Ok(Some(chunk))`
467
+ * to pass it through (optionally modified), `Ok(None)` to drop the chunk,
468
+ * or `Err(e)` to propagate a stream error.
469
+ *
470
+ * The trait is object-safe so implementations can be stored in a
471
+ * `Vec<Box<dyn ChunkMiddleware>>` inside [`StreamPipeline`].
472
+ */
473
+ export interface ChunkMiddleware {
474
+ /**
475
+ * Process a single chunk.
476
+ *
477
+ * - `Ok(Some(chunk))` — emit (possibly transformed) chunk.
478
+ * - `Ok(None)` — drop this chunk silently.
479
+ * - `Err(e)` — propagate as a stream error.
480
+ */
481
+ process(chunk?: ChatCompletionChunk | undefined | null): string;
425
482
  }
426
483
 
427
484
  /** Observable state of a circuit breaker. */
428
- export declare const enum CircuitState {
485
+ export declare enum CircuitState {
429
486
  /** Requests flow through normally. */
430
487
  Closed = "Closed",
431
488
  /** All requests are rejected; the circuit is waiting for the backoff to elapse. */
@@ -434,301 +491,241 @@ export declare const enum CircuitState {
434
491
  HalfOpen = "HalfOpen",
435
492
  }
436
493
 
437
- /**
438
- * Remove all guardrails from the global registry.
439
- *
440
- * Primarily useful in tests to reset state between test cases.
441
- *
442
- * # Panics
443
- *
444
- * Panics if the global registry lock is poisoned.
445
- */
446
- export declare function clear(): void;
447
-
448
- /**
449
- * Calculate the estimated cost of a completion given a model name and token
450
- * counts.
451
- *
452
- * Returns `None` if the model is not present in the embedded pricing registry.
453
- * Returns `Some(cost_usd)` otherwise, where the value is in US dollars.
454
- *
455
- * When an exact model name match is not found, progressively shorter prefixes
456
- * are tried by stripping from the last `-` or `.` separator. For example,
457
- * `gpt-4-0613` will match `gpt-4` if no `gpt-4-0613` entry exists.
458
- *
459
- * # Example
460
- */
461
- export declare function completionCost(
462
- model: string,
463
- promptTokens: number,
464
- completionTokens: number,
465
- ): number | null;
466
-
467
- /**
468
- * Calculate the estimated cost of a completion, accounting for cached
469
- * (cache-hit) prompt tokens billed at the provider's discounted rate.
470
- *
471
- * `cached_tokens` is the count of prompt tokens served from the provider's
472
- * prompt cache. It must be `<= prompt_tokens` (cached tokens are a subset of
473
- * the prompt). The non-cached portion is billed at `input_cost_per_token`
474
- * and the cached portion at `cache_read_input_token_cost` when the model
475
- * has cache pricing; otherwise the entire prompt is billed at the regular
476
- * input rate.
477
- *
478
- * Returns `None` if the model is not present in the embedded pricing
479
- * registry, mirroring `completion_cost`.
480
- */
481
- export declare function completionCostWithCache(
482
- model: string,
483
- promptTokens: number,
484
- cachedTokens: number,
485
- completionTokens: number,
486
- ): number | null;
487
-
488
- /**
489
- * Return the set of complex provider names.
490
- *
491
- * Complex providers require custom auth/routing logic beyond simple bearer
492
- * tokens (e.g. AWS Bedrock SigV4, Vertex AI OAuth2).
493
- *
494
- * The returned reference points into the static registry — no allocation.
495
- */
496
- export declare function complexProviderNames(): Array<string>;
497
-
498
494
  /** A single content part in a user message — text, image, document, or audio. */
499
- export interface ContentPart {
500
- type: string;
501
- text?: string;
502
- imageUrl?: ImageUrl;
503
- document?: DocumentContent;
504
- inputAudio?: AudioContent;
505
- }
506
-
507
- /**
508
- * Count tokens for a full `ChatCompletionRequest`.
509
- *
510
- * Sums tokens across all message text contents plus a per-message overhead
511
- * of ~4 tokens (for role, separators, and formatting metadata). Tool
512
- * definitions and multimodal content parts (images, audio, documents) are
513
- * not counted — only textual content contributes to the token total.
514
- *
515
- * # Errors
516
- *
517
- * Returns `LiterLlmError.BadRequest` if the tokenizer cannot be loaded or
518
- * if tokenization fails for any message.
519
- */
520
- export declare function countRequestTokens(
521
- model: string,
522
- req?: ChatCompletionRequest | undefined | null,
523
- ): number;
524
-
525
- /**
526
- * Count tokens in a text string using the tokenizer for the given model.
527
- *
528
- * The tokenizer is resolved from the model name prefix (e.g. `"gpt-4o"` maps
529
- * to the `Xenova/gpt-4o` HuggingFace tokenizer). Tokenizers are cached after
530
- * first load.
531
- *
532
- * # Errors
533
- *
534
- * Returns `LiterLlmError.BadRequest` if the tokenizer cannot be loaded
535
- * (e.g. network failure on first use) or if tokenization itself fails.
536
- */
537
- export declare function countTokens(model: string, text: string): number;
495
+ export type ContentPart =
496
+ | { type: "text"; text: string }
497
+ | { type: "image_url"; imageUrl: ImageUrl }
498
+ | { type: "document"; document: DocumentContent }
499
+ | { type: "input_audio"; inputAudio: AudioContent };
538
500
 
539
501
  /** Request to create a batch job. */
540
502
  export interface CreateBatchRequest {
541
503
  /** ID of the uploaded input file (JSONL format). */
542
- inputFileId?: string;
504
+ readonly inputFileId?: string;
543
505
  /** API endpoint (e.g., `"/v1/chat/completions"`). */
544
- endpoint?: string;
506
+ readonly endpoint?: string;
545
507
  /** Completion window (e.g., `"24h"`). */
546
- completionWindow?: string;
508
+ readonly completionWindow?: string;
547
509
  /** Optional metadata to attach to the batch. */
548
- metadata?: any;
510
+ readonly metadata?: JsonValue;
549
511
  }
550
512
 
551
- /**
552
- * Create a new LLM client with simple scalar configuration.
553
- *
554
- * This is the primary binding entry-point. All parameters except `api_key`
555
- * are optional — omitting them uses the same defaults as
556
- * `ClientConfigBuilder`.
557
- *
558
- * # Errors
559
- *
560
- * Returns `LiterLlmError` if the underlying HTTP client cannot be
561
- * constructed, or if the resolved provider configuration is invalid.
562
- */
563
- export declare function createClient(
564
- apiKey: string,
565
- baseUrl?: string | undefined | null,
566
- timeoutSecs?: number | undefined | null,
567
- maxRetries?: number | undefined | null,
568
- modelHint?: string | undefined | null,
569
- ): DefaultClient;
570
-
571
- /**
572
- * Create a new LLM client from a JSON string.
573
- *
574
- * The JSON object accepts the same fields as `liter-llm.toml` (snake_case).
575
- *
576
- * # Errors
577
- *
578
- * Returns `LiterLlmError.BadRequest` if `json` is not valid JSON or
579
- * contains unknown fields.
580
- */
581
- export declare function createClientFromJson(json: string): DefaultClient;
582
-
583
513
  /** Request to upload a file. */
584
514
  export interface CreateFileRequest {
585
515
  /** Base64-encoded file data. */
586
- file?: string;
516
+ readonly file?: string;
587
517
  /** Purpose for the file. */
588
- purpose?: JsFilePurpose;
518
+ readonly purpose?: FilePurpose;
589
519
  /** Optional filename to associate with the upload. */
590
- filename?: string;
520
+ readonly filename?: string;
591
521
  }
592
522
 
593
523
  /** Request to create images from a text prompt. */
594
524
  export interface CreateImageRequest {
595
525
  /** Text description of the image to generate. */
596
- prompt?: string;
526
+ readonly prompt?: string;
597
527
  /** Model ID (e.g., `"dall-e-3"`). Optional; API may use default if unset. */
598
- model?: string;
528
+ readonly model?: string;
599
529
  /** Number of images to generate. Defaults to 1. */
600
- n?: number;
530
+ readonly n?: number;
601
531
  /** Image size (e.g., `"1024x1024"`, `"1792x1024"`). */
602
- size?: string;
532
+ readonly size?: string;
603
533
  /** Image quality: `"standard"` or `"hd"`. */
604
- quality?: string;
534
+ readonly quality?: string;
605
535
  /** Style: `"natural"` or `"vivid"` (DALL-E 3 only). */
606
- style?: string;
536
+ readonly style?: string;
607
537
  /** Response format: `"url"` or `"b64_json"`. */
608
- responseFormat?: string;
538
+ readonly responseFormat?: string;
609
539
  /** User identifier for request tracking. */
610
- user?: string;
540
+ readonly user?: string;
611
541
  }
612
542
 
613
543
  /** Request to create a structured response. */
614
544
  export interface CreateResponseRequest {
615
545
  /** Model ID. */
616
- model?: string;
546
+ readonly model?: string;
617
547
  /** Input data to process (e.g., a document to extract from). */
618
- input?: any;
548
+ readonly input?: JsonValue;
619
549
  /** Instructions for processing the input. */
620
- instructions?: string;
550
+ readonly instructions?: string;
621
551
  /** Available tools the model can use. */
622
- tools?: Array<JsResponseTool>;
552
+ readonly tools?: Array<ResponseTool>;
623
553
  /** Sampling temperature in `[0.0, 2.0]`. Defaults to 1.0. */
624
- temperature?: number;
554
+ readonly temperature?: number;
625
555
  /** Maximum output tokens. */
626
- maxOutputTokens?: number;
556
+ readonly maxOutputTokens?: number;
627
557
  /** Optional metadata. */
628
- metadata?: any;
558
+ readonly metadata?: JsonValue;
629
559
  }
630
560
 
631
561
  /** Request to generate speech audio from text. */
632
562
  export interface CreateSpeechRequest {
633
563
  /** Model ID (e.g., `"tts-1"`, `"tts-1-hd"`). */
634
- model?: string;
564
+ readonly model?: string;
635
565
  /** Text to synthesize into speech. */
636
- input?: string;
566
+ readonly input?: string;
637
567
  /** Voice name (e.g., `"alloy"`, `"echo"`, `"fable"`, `"onyx"`, `"nova"`, `"shimmer"`). */
638
- voice?: string;
568
+ readonly voice?: string;
639
569
  /** Audio format (e.g., `"mp3"`, `"opus"`, `"aac"`, `"flac"`, `"wav"`, `"pcm"`). */
640
- responseFormat?: string;
570
+ readonly responseFormat?: string;
641
571
  /** Playback speed in `[0.25, 4.0]`. Defaults to 1.0. */
642
- speed?: number;
572
+ readonly speed?: number;
643
573
  }
644
574
 
645
575
  /** Request to transcribe audio into text. */
646
576
  export interface CreateTranscriptionRequest {
647
577
  /** Model ID (e.g., `"whisper-1"`). */
648
- model?: string;
578
+ readonly model?: string;
649
579
  /** Base64-encoded audio file data. */
650
- file?: string;
580
+ readonly file?: string;
651
581
  /** Language ISO-639-1 code (e.g., `"en"`, `"fr"`, `"de"`). Optional; model auto-detects. */
652
- language?: string;
582
+ readonly language?: string;
653
583
  /** Optional text to guide the model (improves accuracy for domain-specific terms). */
654
- prompt?: string;
584
+ readonly prompt?: string;
655
585
  /** Output format (e.g., `"json"`, `"text"`, `"vtt"`, `"srt"`, `"verbose_json"`). */
656
- responseFormat?: string;
586
+ readonly responseFormat?: string;
657
587
  /** Sampling temperature in `[0.0, 1.0]`. Higher increases variability. Defaults to 0. */
658
- temperature?: number;
588
+ readonly temperature?: number;
659
589
  }
660
590
 
661
591
  /** Configuration for registering a custom LLM provider at runtime. */
662
592
  export interface CustomProviderConfig {
663
593
  /** Unique name for this provider (e.g., "my-provider"). */
664
- name: string;
594
+ readonly name: string;
665
595
  /** Base URL for the provider's API (e.g., "https://api.my-provider.com/v1"). */
666
- baseUrl: string;
596
+ readonly baseUrl: string;
667
597
  /** Authentication header format. */
668
- authHeader: JsAuthHeaderFormat;
598
+ readonly authHeader: AuthHeaderFormat;
669
599
  /** Model name prefixes that route to this provider (e.g., `["my-"]`). */
670
- modelPrefixes: Array<string>;
600
+ readonly modelPrefixes: Array<string>;
601
+ }
602
+
603
+ /**
604
+ * Default client implementation backed by `reqwest`.
605
+ *
606
+ * Sends requests to 143 LLM providers with automatic provider detection
607
+ * and per-request routing. The provider is resolved at construction time
608
+ * from `model_hint` (or defaults to OpenAI), but individual requests can
609
+ * override the provider via model name prefix (e.g. `"anthropic/claude-3-5-sonnet"`
610
+ * routes to Anthropic regardless of construction-time setting).
611
+ *
612
+ * When the model prefix does not match any known provider, the construction-time
613
+ * provider is used as the fallback. This enables seamless migration between
614
+ * providers by changing only the model name.
615
+ *
616
+ * The provider is stored behind an [`Arc`] so it can be shared cheaply into
617
+ * async closures and streaming tasks. Pre-computed auth headers and extra
618
+ * headers are cached at construction to avoid redundant encoding on every request.
619
+ */
620
+ export declare class DefaultClient {
621
+ chat(req?: ChatCompletionRequest | undefined | null): Promise<ChatCompletionResponse>;
622
+ chatStream(
623
+ req?: ChatCompletionRequest | undefined | null,
624
+ ): Promise<AsyncGenerator<ChatCompletionChunk, void, undefined>>;
625
+ embed(req?: EmbeddingRequest | undefined | null): Promise<EmbeddingResponse>;
626
+ listModels(): Promise<ModelsListResponse>;
627
+ imageGenerate(req?: CreateImageRequest | undefined | null): Promise<ImagesResponse>;
628
+ speech(req?: CreateSpeechRequest | undefined | null): Promise<Uint8Array>;
629
+ transcribe(req?: CreateTranscriptionRequest | undefined | null): Promise<TranscriptionResponse>;
630
+ moderate(req?: ModerationRequest | undefined | null): Promise<ModerationResponse>;
631
+ rerank(req?: RerankRequest | undefined | null): Promise<RerankResponse>;
632
+ search(req?: SearchRequest | undefined | null): Promise<SearchResponse>;
633
+ ocr(req?: OcrRequest | undefined | null): Promise<OcrResponse>;
634
+ createFile(req?: CreateFileRequest | undefined | null): Promise<FileObject>;
635
+ retrieveFile(fileId: string): Promise<FileObject>;
636
+ deleteFile(fileId: string): Promise<DeleteResponse>;
637
+ listFiles(query?: FileListQuery | undefined | null): Promise<FileListResponse>;
638
+ fileContent(fileId: string): Promise<Uint8Array>;
639
+ createBatch(req?: CreateBatchRequest | undefined | null): Promise<BatchObject>;
640
+ retrieveBatch(batchId: string): Promise<BatchObject>;
641
+ listBatches(query?: BatchListQuery | undefined | null): Promise<BatchListResponse>;
642
+ cancelBatch(batchId: string): Promise<BatchObject>;
643
+ fetchBatchForPolling(batchId: string): Promise<BatchObject>;
644
+ /**
645
+ * Poll a batch until it reaches a terminal status (Completed, Failed, Expired, Cancelled).
646
+ *
647
+ * Uses exponential backoff with configurable initial interval, maximum interval, and backoff multiplier.
648
+ * Optionally supports a timeout that aborts polling if exceeded.
649
+ * @throws Returns `BatchWaitError::Failed` if the batch reaches a failure terminal status.
650
+ * Returns `BatchWaitError::Timeout` if the configured timeout is exceeded.
651
+ * Returns `BatchWaitError::Client` for underlying client errors.
652
+ */
653
+ waitForBatch(
654
+ batchId: string,
655
+ config?: WaitForBatchConfig | undefined | null,
656
+ ): Promise<BatchObject>;
657
+ createResponse(req?: CreateResponseRequest | undefined | null): Promise<ResponseObject>;
658
+ retrieveResponse(responseId: string): Promise<ResponseObject>;
659
+ cancelResponse(responseId: string): Promise<ResponseObject>;
671
660
  }
672
661
 
673
662
  /** Response from a delete operation. */
674
663
  export interface DeleteResponse {
675
664
  /** ID of the deleted resource. */
676
- id?: string;
665
+ readonly id?: string;
677
666
  /** Object type. */
678
- object?: string;
667
+ readonly object?: string;
679
668
  /** Confirmation that the resource was deleted. */
680
- deleted?: boolean;
669
+ readonly deleted?: boolean;
681
670
  }
682
671
 
683
672
  /** Developer message (system-like message for Claude models). */
684
673
  export interface DeveloperMessage {
685
674
  /** Developer-specific instructions or context. */
686
- content?: string;
675
+ readonly content?: string;
687
676
  /** Optional name for the developer message source. */
688
- name?: string;
677
+ readonly name?: string;
689
678
  }
690
679
 
691
680
  /** PDF/document content part for vision-capable models. */
692
681
  export interface DocumentContent {
693
682
  /** Base64-encoded document data or URL. */
694
- data?: string;
683
+ readonly data?: string;
695
684
  /** MIME type (e.g., "application/pdf", "text/csv"). */
696
- mediaType?: string;
685
+ readonly mediaType?: string;
697
686
  }
698
687
 
699
688
  /** The format in which the embedding vectors are returned. */
700
- export declare const enum EmbeddingFormat {
689
+ export declare enum EmbeddingFormat {
701
690
  /** 32-bit floating-point numbers (default). */
702
691
  Float = "float",
703
692
  /** Base64-encoded string representation of the floats. */
704
693
  Base64 = "base64",
705
694
  }
706
695
 
696
+ /** Text or texts to embed. */
697
+ export declare enum EmbeddingInput {
698
+ /** Single text string. */
699
+ Single = "Single",
700
+ /** Multiple text strings (batch embedding). */
701
+ Multiple = "Multiple",
702
+ }
703
+
707
704
  /** A single embedding vector. */
708
705
  export interface EmbeddingObject {
709
706
  /**
710
707
  * Always `"embedding"` from OpenAI-compatible APIs. Stored as a plain
711
708
  * `String` so non-standard provider values do not break deserialization.
712
709
  */
713
- object: string;
710
+ readonly object: string;
714
711
  /** The embedding vector. */
715
- embedding: Array<number>;
712
+ readonly embedding: Array<number>;
716
713
  /** Index in the batch (corresponds to input order). */
717
- index: number;
714
+ readonly index: number;
718
715
  }
719
716
 
720
717
  /** Embedding request. */
721
718
  export interface EmbeddingRequest {
722
719
  /** Model ID (e.g., `"text-embedding-3-small"`). */
723
- model?: string;
720
+ readonly model?: string;
724
721
  /** Text or texts to embed. */
725
- input?: JsEmbeddingInput;
722
+ readonly input?: EmbeddingInput;
726
723
  /** Output format: float (native) or base64. */
727
- encodingFormat?: JsEmbeddingFormat;
724
+ readonly encodingFormat?: EmbeddingFormat;
728
725
  /** Requested embedding dimensions (if supported by the model). */
729
- dimensions?: number;
726
+ readonly dimensions?: number;
730
727
  /** User identifier for request tracking. */
731
- user?: string;
728
+ readonly user?: string;
732
729
  }
733
730
 
734
731
  /** Embedding response. */
@@ -737,89 +734,69 @@ export interface EmbeddingResponse {
737
734
  * Always `"list"` from OpenAI-compatible APIs. Stored as a plain
738
735
  * `String` so non-standard provider values do not break deserialization.
739
736
  */
740
- object: string;
737
+ readonly object: string;
741
738
  /** List of embeddings. */
742
- data: Array<JsEmbeddingObject>;
739
+ readonly data: Array<EmbeddingObject>;
743
740
  /** Model used to generate embeddings. */
744
- model: string;
741
+ readonly model: string;
745
742
  /** Token usage (input tokens only; embeddings have zero output tokens). */
746
- usage?: Usage;
743
+ readonly usage?: Usage;
747
744
  }
748
745
 
749
746
  /** How budget limits are enforced. */
750
- export declare const enum Enforcement {
747
+ export declare enum Enforcement {
751
748
  /**
752
749
  * Reject requests that would exceed the budget with
753
- * `LiterLlmError.BudgetExceeded`.
750
+ * [`LiterLlmError::BudgetExceeded`].
754
751
  */
755
752
  Hard = "Hard",
756
753
  /**
757
- * Allow requests through but emit a `tracing.warn!` when the budget is
754
+ * Allow requests through but emit a `tracing::warn!` when the budget is
758
755
  * exceeded.
759
756
  */
760
757
  Soft = "Soft",
761
758
  }
762
759
 
763
- /**
764
- * Install the `ring` crypto provider as the rustls process default, idempotently.
765
- *
766
- * rustls 0.23+ removed the implicit default provider. This function installs
767
- * `ring` once per process. Subsequent calls are no-ops. Calling it from a
768
- * downstream Rust app that has already installed `aws-lc-rs` is safe — the
769
- * `Err` from `install_default()` is silently ignored.
770
- *
771
- * Called automatically by every internal `reqwest.Client` constructor
772
- * (auth providers, default HTTP client). Bindings and downstream consumers
773
- * reach those constructors transitively, so no manual init is required.
774
- *
775
- * WASM builds are exempt — the WASM target uses the browser/Node.js fetch
776
- * API instead of rustls, so no crypto provider is needed.
777
- *
778
- * Windows builds use native-tls (SChannel) via reqwest, so rustls is not
779
- * present and no crypto provider installation is needed.
780
- */
781
- export declare function ensureCryptoProvider(): void;
782
-
783
760
  /** Query parameters for listing files. */
784
761
  export interface FileListQuery {
785
762
  /** Filter by file purpose (e.g., `"batch"`, `"fine-tune"`). */
786
- purpose?: string;
763
+ readonly purpose?: string;
787
764
  /** Maximum number of results to return. Defaults to 20. */
788
- limit?: number;
765
+ readonly limit?: number;
789
766
  /** Pagination cursor: return results after this file ID. */
790
- after?: string;
767
+ readonly after?: string;
791
768
  }
792
769
 
793
770
  /** Response from listing files. */
794
771
  export interface FileListResponse {
795
772
  /** Object type (always `"list"`). */
796
- object?: string;
773
+ readonly object?: string;
797
774
  /** List of file objects. */
798
- data?: Array<FileObject>;
775
+ readonly data?: Array<FileObject>;
799
776
  /** Whether more results are available. */
800
- hasMore?: boolean;
777
+ readonly hasMore?: boolean;
801
778
  }
802
779
 
803
780
  /** An uploaded file object. */
804
781
  export interface FileObject {
805
782
  /** Unique file ID. */
806
- id?: string;
783
+ readonly id?: string;
807
784
  /** Object type (always `"file"`). */
808
- object?: string;
785
+ readonly object?: string;
809
786
  /** File size in bytes. */
810
- bytes?: number;
787
+ readonly bytes?: number;
811
788
  /** Unix timestamp of file creation. */
812
- createdAt?: number;
789
+ readonly createdAt?: number;
813
790
  /** Filename. */
814
- filename?: string;
791
+ readonly filename?: string;
815
792
  /** File purpose. */
816
- purpose?: string;
793
+ readonly purpose?: string;
817
794
  /** Processing status (e.g., `"uploaded"`, `"processed"`). */
818
- status?: string;
795
+ readonly status?: string;
819
796
  }
820
797
 
821
798
  /** Purpose of an uploaded file. */
822
- export declare const enum FilePurpose {
799
+ export declare enum FilePurpose {
823
800
  /** File for use with Assistants API. */
824
801
  Assistants = "assistants",
825
802
  /** File for batch processing. */
@@ -831,7 +808,7 @@ export declare const enum FilePurpose {
831
808
  }
832
809
 
833
810
  /** Why a choice stopped generating tokens. */
834
- export declare const enum FinishReason {
811
+ export declare enum FinishReason {
835
812
  Stop = "stop",
836
813
  Length = "length",
837
814
  ToolCalls = "tool_calls",
@@ -853,31 +830,48 @@ export declare const enum FinishReason {
853
830
  /** Function call details. */
854
831
  export interface FunctionCall {
855
832
  /** Function name. */
856
- name: string;
857
- /** Arguments as a JSON string (parse with serde_json.from_str). */
858
- arguments: string;
833
+ readonly name: string;
834
+ /** Arguments as a JSON string (parse with serde_json::from_str). */
835
+ readonly arguments: string;
859
836
  }
860
837
 
861
838
  /** Function definition exposed to the model. */
862
839
  export interface FunctionDefinition {
863
840
  /** Name of the function. Required and must be alphanumeric + underscores. */
864
- name: string;
841
+ readonly name: string;
865
842
  /** Human-readable description explaining what the function does. */
866
- description?: string;
843
+ readonly description?: string;
867
844
  /** JSON Schema defining the function's parameters. */
868
- parameters?: any;
845
+ readonly parameters?: JsonValue;
869
846
  /** If true, enforce strict JSON schema validation for arguments. */
870
- strict?: boolean;
847
+ readonly strict?: boolean;
871
848
  }
872
849
 
873
850
  /** Deprecated legacy function-role message body. */
874
851
  export interface FunctionMessage {
875
- content?: string;
876
- name?: string;
852
+ readonly content?: string;
853
+ readonly name?: string;
854
+ }
855
+
856
+ /**
857
+ * Abstraction over a health probe strategy.
858
+ *
859
+ * Implementors issue a lightweight probe against `upstream` (typically a
860
+ * provider base URL or named identifier) and report [`HealthStatus`].
861
+ */
862
+ export interface HealthChecker {
863
+ /**
864
+ * Probe `upstream` and return its current [`HealthStatus`].
865
+ *
866
+ * The parameter is taken by value (`String`) so that implementations can
867
+ * move it into the returned future without a clone, making the
868
+ * `'static + Send` bound on the future trivially satisfiable.
869
+ */
870
+ check(upstream: string): Promise<string>;
877
871
  }
878
872
 
879
873
  /** The result of a single health probe. */
880
- export declare const enum HealthStatus {
874
+ export declare enum HealthStatus {
881
875
  /** The probe succeeded; the upstream is reachable. */
882
876
  Healthy = "Healthy",
883
877
  /** The probe failed; the upstream may be down. */
@@ -887,15 +881,15 @@ export declare const enum HealthStatus {
887
881
  /** A single generated image, returned as either a URL or base64 data. */
888
882
  export interface Image {
889
883
  /** Image URL (if response_format was "url"). */
890
- url?: string;
884
+ readonly url?: string;
891
885
  /** Base64-encoded image data (if response_format was "b64_json"). */
892
- b64Json?: string;
886
+ readonly b64Json?: string;
893
887
  /** The final prompt used to generate the image (DALL-E 3). */
894
- revisedPrompt?: string;
888
+ readonly revisedPrompt?: string;
895
889
  }
896
890
 
897
891
  /** Image detail level controlling token cost and processing. */
898
- export declare const enum ImageDetail {
892
+ export declare enum ImageDetail {
899
893
  /** Low detail: scales image to 512x512, uses fewer tokens. */
900
894
  Low = "low",
901
895
  /** High detail: processes up to 2x2 grid of tiles, higher token cost. */
@@ -907,65 +901,63 @@ export declare const enum ImageDetail {
907
901
  /** Response containing generated images. */
908
902
  export interface ImagesResponse {
909
903
  /** Unix timestamp of image creation. */
910
- created?: number;
904
+ readonly created?: number;
911
905
  /** List of generated images. */
912
- data?: Array<JsImage>;
906
+ readonly data?: Array<Image>;
913
907
  }
914
908
 
915
909
  /** An image URL reference with optional detail level for processing. */
916
910
  export interface ImageUrl {
917
911
  /** URL of the image (data URI or HTTP/HTTPS URL). */
918
- url?: string;
912
+ readonly url?: string;
919
913
  /** Detail level: low (512x512), high (2x2 tiles), or auto (model-selected). */
920
- detail?: JsImageDetail;
914
+ readonly detail?: ImageDetail;
921
915
  }
922
916
 
923
917
  /** An intent prototype: `(intent_name, prototype_embedding, target_model_id)`. */
924
918
  export interface IntentPrototype {
925
919
  /** Human-readable name for the intent (used in logs/metrics). */
926
- name: string;
920
+ readonly name: string;
927
921
  /** Pre-computed embedding vector for this intent. */
928
- embedding: Array<number>;
922
+ readonly embedding: Array<number>;
929
923
  /** Model to route to when this intent is detected. */
930
- model: string;
924
+ readonly model: string;
931
925
  }
932
926
 
933
927
  /** JSON Schema specification for constrained output. */
934
928
  export interface JsonSchemaFormat {
935
929
  /** Name of the schema (must be unique in the request). */
936
- name?: string;
930
+ readonly name?: string;
937
931
  /** Description of what the schema represents. */
938
- description?: string;
932
+ readonly description?: string;
939
933
  /** JSON Schema object defining the output structure. */
940
- schema?: any;
934
+ readonly schema?: JsonValue;
941
935
  /** If true, enforce strict schema validation. */
942
- strict?: boolean;
936
+ readonly strict?: boolean;
943
937
  }
944
938
 
945
939
  /** A chat message in a conversation. */
946
- export interface Message {
947
- role: string;
948
- system?: SystemMessage;
949
- user?: UserMessage;
950
- assistant?: AssistantMessage;
951
- tool?: ToolMessage;
952
- developer?: DeveloperMessage;
953
- function?: FunctionMessage;
954
- }
940
+ export type Message =
941
+ | { role: "system"; 0: SystemMessage }
942
+ | { role: "user"; 0: UserMessage }
943
+ | { role: "assistant"; 0: AssistantMessage }
944
+ | { role: "tool"; 0: ToolMessage }
945
+ | { role: "developer"; 0: DeveloperMessage }
946
+ | { role: "function"; 0: FunctionMessage };
955
947
 
956
948
  /** A model available from the API. */
957
949
  export interface ModelObject {
958
950
  /** Model ID (e.g., `"gpt-4o"`, `"claude-3-5-sonnet"`). */
959
- id?: string;
951
+ readonly id?: string;
960
952
  /**
961
953
  * Always `"model"` from OpenAI-compatible APIs. Stored as a plain
962
954
  * `String` so non-standard provider values do not break deserialization.
963
955
  */
964
- object?: string;
956
+ readonly object?: string;
965
957
  /** Unix timestamp of model creation (or release date). */
966
- created?: number;
958
+ readonly created?: number;
967
959
  /** Organization or entity that owns the model. */
968
- ownedBy?: string;
960
+ readonly ownedBy?: string;
969
961
  }
970
962
 
971
963
  /** Response listing available models. */
@@ -974,162 +966,167 @@ export interface ModelsListResponse {
974
966
  * Always `"list"` from OpenAI-compatible APIs. Stored as a plain
975
967
  * `String` so non-standard provider values do not break deserialization.
976
968
  */
977
- object?: string;
969
+ readonly object?: string;
978
970
  /** List of available models. */
979
- data?: Array<JsModelObject>;
971
+ readonly data?: Array<ModelObject>;
980
972
  }
981
973
 
982
974
  /** Boolean flags for each moderation category. */
983
975
  export interface ModerationCategories {
984
976
  /** Sexual content. */
985
- sexual?: boolean;
977
+ readonly sexual?: boolean;
986
978
  /** Hate speech. */
987
- hate?: boolean;
979
+ readonly hate?: boolean;
988
980
  /** Harassment. */
989
- harassment?: boolean;
981
+ readonly harassment?: boolean;
990
982
  /** Self-harm content. */
991
- "self-harm"?: boolean;
983
+ readonly selfHarm?: boolean;
992
984
  /** Sexual content involving minors. */
993
- "sexual/minors"?: boolean;
985
+ readonly sexualMinors?: boolean;
994
986
  /** Hate speech that threatens violence. */
995
- "hate/threatening"?: boolean;
987
+ readonly hateThreatening?: boolean;
996
988
  /** Graphic violence. */
997
- "violence/graphic"?: boolean;
989
+ readonly violenceGraphic?: boolean;
998
990
  /** Intent to self-harm. */
999
- "self-harm/intent"?: boolean;
991
+ readonly selfHarmIntent?: boolean;
1000
992
  /** Instructions for self-harm. */
1001
- "self-harm/instructions"?: boolean;
993
+ readonly selfHarmInstructions?: boolean;
1002
994
  /** Harassment that threatens violence. */
1003
- "harassment/threatening"?: boolean;
995
+ readonly harassmentThreatening?: boolean;
1004
996
  /** Non-graphic violence. */
1005
- violence?: boolean;
997
+ readonly violence?: boolean;
1006
998
  }
1007
999
 
1008
1000
  /** Confidence scores for each moderation category. */
1009
1001
  export interface ModerationCategoryScores {
1010
1002
  /** Sexual content score. */
1011
- sexual?: number;
1003
+ readonly sexual?: number;
1012
1004
  /** Hate speech score. */
1013
- hate?: number;
1005
+ readonly hate?: number;
1014
1006
  /** Harassment score. */
1015
- harassment?: number;
1007
+ readonly harassment?: number;
1016
1008
  /** Self-harm content score. */
1017
- "self-harm"?: number;
1009
+ readonly selfHarm?: number;
1018
1010
  /** Sexual content involving minors score. */
1019
- "sexual/minors"?: number;
1011
+ readonly sexualMinors?: number;
1020
1012
  /** Hate speech that threatens violence score. */
1021
- "hate/threatening"?: number;
1013
+ readonly hateThreatening?: number;
1022
1014
  /** Graphic violence score. */
1023
- "violence/graphic"?: number;
1015
+ readonly violenceGraphic?: number;
1024
1016
  /** Intent to self-harm score. */
1025
- "self-harm/intent"?: number;
1017
+ readonly selfHarmIntent?: number;
1026
1018
  /** Instructions for self-harm score. */
1027
- "self-harm/instructions"?: number;
1019
+ readonly selfHarmInstructions?: number;
1028
1020
  /** Harassment that threatens violence score. */
1029
- "harassment/threatening"?: number;
1021
+ readonly harassmentThreatening?: number;
1030
1022
  /** Non-graphic violence score. */
1031
- violence?: number;
1023
+ readonly violence?: number;
1024
+ }
1025
+
1026
+ /** Input to the moderation endpoint — a single string or multiple strings. */
1027
+ export declare enum ModerationInput {
1028
+ /** Single text string. */
1029
+ Single = "Single",
1030
+ /** Multiple text strings (batch moderation). */
1031
+ Multiple = "Multiple",
1032
1032
  }
1033
1033
 
1034
1034
  /** Request to classify content for policy violations. */
1035
1035
  export interface ModerationRequest {
1036
1036
  /** Text or texts to check. */
1037
- input?: JsModerationInput;
1037
+ readonly input?: ModerationInput;
1038
1038
  /** Model ID (e.g., `"text-moderation-latest"`). Optional; API uses default if unset. */
1039
- model?: string;
1039
+ readonly model?: string;
1040
1040
  }
1041
1041
 
1042
1042
  /** Response from the moderation endpoint. */
1043
1043
  export interface ModerationResponse {
1044
1044
  /** Unique identifier for this moderation request. */
1045
- id: string;
1045
+ readonly id: string;
1046
1046
  /** Model used for classification. */
1047
- model: string;
1047
+ readonly model: string;
1048
1048
  /** Results for each input string. */
1049
- results: Array<JsModerationResult>;
1049
+ readonly results: Array<ModerationResult>;
1050
1050
  }
1051
1051
 
1052
1052
  /** A single moderation classification result. */
1053
1053
  export interface ModerationResult {
1054
1054
  /** True if any category was flagged. */
1055
- flagged: boolean;
1055
+ readonly flagged: boolean;
1056
1056
  /** Boolean flags for each moderation category. */
1057
- categories: JsModerationCategories;
1057
+ readonly categories: ModerationCategories;
1058
1058
  /** Confidence scores for each category. */
1059
- categoryScores: JsModerationCategoryScores;
1059
+ readonly categoryScores: ModerationCategoryScores;
1060
1060
  }
1061
1061
 
1062
1062
  /** Document input for OCR — either a URL or inline base64 data. */
1063
- export interface OcrDocument {
1064
- type: string;
1065
- url?: string;
1066
- data?: string;
1067
- mediaType?: string;
1068
- }
1063
+ export type OcrDocument =
1064
+ | { type: "document_url"; url: string }
1065
+ | { type: "base64"; data: string; mediaType: string };
1069
1066
 
1070
1067
  /** An image extracted from an OCR page. */
1071
1068
  export interface OcrImage {
1072
1069
  /** Unique image identifier within the document. */
1073
- id: string;
1070
+ readonly id: string;
1074
1071
  /** Base64-encoded image data (if `include_image_base64` was true). */
1075
- imageBase64?: string;
1072
+ readonly imageBase64?: string;
1076
1073
  }
1077
1074
 
1078
1075
  /** A single page of OCR output. */
1079
1076
  export interface OcrPage {
1080
1077
  /** Page index (0-based). */
1081
- index: number;
1078
+ readonly index: number;
1082
1079
  /** Extracted page content as Markdown. */
1083
- markdown: string;
1080
+ readonly markdown: string;
1084
1081
  /** Embedded images extracted from the page (if `include_image_base64` was true). */
1085
- images?: Array<JsOcrImage>;
1082
+ readonly images?: Array<OcrImage>;
1086
1083
  /** Page dimensions in pixels, if available. */
1087
- dimensions?: JsPageDimensions;
1084
+ readonly dimensions?: PageDimensions;
1088
1085
  }
1089
1086
 
1090
1087
  /** An OCR request. */
1091
1088
  export interface OcrRequest {
1092
1089
  /** The model/provider to use (e.g. `"mistral/mistral-ocr-latest"`). */
1093
- model?: string;
1090
+ readonly model?: string;
1094
1091
  /** The document to process (URL or base64). */
1095
- document?: JsOcrDocument;
1092
+ readonly document?: OcrDocument;
1096
1093
  /** Specific pages to process (1-indexed). `None` means all pages. */
1097
- pages?: Array<number>;
1094
+ readonly pages?: Array<number>;
1098
1095
  /** Whether to include base64-encoded images of each processed page. */
1099
- includeImageBase64?: boolean;
1096
+ readonly includeImageBase64?: boolean;
1100
1097
  }
1101
1098
 
1102
1099
  /** An OCR response. */
1103
1100
  export interface OcrResponse {
1104
1101
  /** Extracted pages in order. */
1105
- pages: Array<JsOcrPage>;
1102
+ readonly pages: Array<OcrPage>;
1106
1103
  /** Model/provider used for OCR. */
1107
- model: string;
1104
+ readonly model: string;
1108
1105
  /** Token usage, if reported by the provider. */
1109
- usage?: Usage;
1106
+ readonly usage?: Usage;
1110
1107
  }
1111
1108
 
1112
1109
  /** Page dimensions in pixels. */
1113
1110
  export interface PageDimensions {
1114
1111
  /** Width in pixels. */
1115
- width: number;
1112
+ readonly width: number;
1116
1113
  /** Height in pixels. */
1117
- height: number;
1114
+ readonly height: number;
1118
1115
  }
1119
1116
 
1120
1117
  /**
1121
1118
  * Breakdown of tokens used in the prompt portion of a request.
1122
1119
  *
1123
- * `cached_tokens` is included in `Usage.prompt_tokens` — it is *not* an
1120
+ * `cached_tokens` is included in `Usage::prompt_tokens` — it is *not* an
1124
1121
  * additional charge on top of the prompt token count. When pricing supports
1125
1122
  * a `cache_read_input_token_cost`, the cached portion is billed at the
1126
1123
  * discounted rate and the remainder at the regular input rate.
1127
1124
  */
1128
1125
  export interface PromptTokensDetails {
1129
1126
  /** Cached tokens present in the prompt. Defaults to 0 when absent. */
1130
- cachedTokens?: number;
1127
+ readonly cachedTokens?: number;
1131
1128
  /** Audio input tokens present in the prompt. Defaults to 0 when absent. */
1132
- audioTokens?: number;
1129
+ readonly audioTokens?: number;
1133
1130
  }
1134
1131
 
1135
1132
  /**
@@ -1143,250 +1140,272 @@ export interface PromptTokensDetails {
1143
1140
  *
1144
1141
  * All flags default to `false` so that newly added providers are safe.
1145
1142
  *
1146
- * Access via the crate-level `capabilities` function:
1143
+ * Access via the crate-level [`capabilities`] function:
1144
+ *
1145
+ * ```rust
1146
+ * use liter_llm::capabilities;
1147
+ *
1148
+ * let caps = capabilities("openai");
1149
+ * assert!(caps.function_calling);
1150
+ * assert!(caps.vision);
1151
+ *
1152
+ * // Unknown providers return a default-all-false reference.
1153
+ * let unknown = capabilities("my-private-model");
1154
+ * assert!(!unknown.function_calling);
1155
+ * ```
1147
1156
  */
1148
1157
  export interface ProviderCapabilities {
1149
1158
  /** The provider accepts image input in chat messages. */
1150
- vision?: boolean;
1159
+ readonly vision?: boolean;
1151
1160
  /** The provider supports extended-thinking / reasoning tokens. */
1152
- reasoning?: boolean;
1161
+ readonly reasoning?: boolean;
1153
1162
  /** The provider supports JSON-mode or `response_format` structured output. */
1154
- structuredOutput?: boolean;
1163
+ readonly structuredOutput?: boolean;
1155
1164
  /** The provider supports tool / function calling. */
1156
- functionCalling?: boolean;
1165
+ readonly functionCalling?: boolean;
1157
1166
  /** The provider accepts audio as input. */
1158
- audioIn?: boolean;
1167
+ readonly audioIn?: boolean;
1159
1168
  /** The provider can generate audio / TTS output. */
1160
- audioOut?: boolean;
1169
+ readonly audioOut?: boolean;
1161
1170
  /** The provider accepts video as input. */
1162
- videoIn?: boolean;
1171
+ readonly videoIn?: boolean;
1163
1172
  }
1164
1173
 
1165
1174
  /**
1166
1175
  * Static configuration for a single provider entry in providers.json.
1167
1176
  *
1168
1177
  * This struct deliberately does not include capability flags or streaming
1169
- * format, which are accessed via the `capabilities` function. Keeping
1178
+ * format, which are accessed via the [`capabilities`] function. Keeping
1170
1179
  * these fields separate preserves backward compatibility with all generated
1171
1180
  * binding code that constructs `ProviderConfig` using struct literal syntax.
1172
1181
  */
1173
1182
  export interface ProviderConfig {
1174
1183
  /** Provider identifier (matches the entry key in providers.json). */
1175
- name: string;
1184
+ readonly name: string;
1176
1185
  /** Human-readable provider name shown in UIs. */
1177
- displayName?: string;
1186
+ readonly displayName?: string;
1178
1187
  /** Base URL used as the default for this provider's HTTP client. */
1179
- baseUrl?: string;
1188
+ readonly baseUrl?: string;
1180
1189
  /** Authentication scheme metadata (auth type + env var holding the key). */
1181
- auth?: JsAuthConfig;
1190
+ readonly auth?: AuthConfig;
1182
1191
  /** Supported endpoint kinds (e.g. `chat`, `embeddings`). */
1183
- endpoints?: Array<string>;
1192
+ readonly endpoints?: Array<string>;
1184
1193
  /** Model-name prefixes claimed by this provider (e.g. `["gpt-", "o1-"]`). */
1185
- modelPrefixes?: Array<string>;
1194
+ readonly modelPrefixes?: Array<string>;
1186
1195
  /**
1187
1196
  * Parameter key renaming for this provider.
1188
1197
  *
1189
1198
  * Each entry maps an OpenAI-spec field name (e.g. `"max_completion_tokens"`)
1190
1199
  * to the name this provider expects (e.g. `"max_tokens"`). Applied
1191
- * automatically by `ConfigDrivenProvider.transform_request`.
1200
+ * automatically by [`ConfigDrivenProvider::transform_request`].
1192
1201
  */
1193
- paramMappings?: Record<string, string>;
1202
+ readonly paramMappings?: Record<string, string>;
1194
1203
  }
1195
1204
 
1196
1205
  /** Configuration for per-model rate limits. */
1197
1206
  export interface RateLimitConfig {
1198
1207
  /** Maximum requests per window. `None` means unlimited. */
1199
- rpm?: number;
1208
+ readonly rpm?: number;
1200
1209
  /** Maximum tokens per window. `None` means unlimited. */
1201
- tpm?: number;
1210
+ readonly tpm?: number;
1202
1211
  /** Fixed window duration (defaults to 60 s). */
1203
- window?: number;
1212
+ readonly window?: number;
1204
1213
  }
1205
1214
 
1206
- export declare function rateLimitConfigDefault(): RateLimitConfig;
1207
-
1208
1215
  /** Controls how much reasoning effort the model should use. */
1209
- export declare const enum ReasoningEffort {
1216
+ export declare enum ReasoningEffort {
1210
1217
  Low = "low",
1211
1218
  Medium = "medium",
1212
1219
  High = "high",
1213
1220
  }
1214
1221
 
1215
- /**
1216
- * Register a custom provider in the global runtime registry.
1217
- *
1218
- * The provider will be checked **before** all built-in providers during model
1219
- * detection. If a provider with the same `name` already exists it is replaced.
1220
- *
1221
- * # Errors
1222
- *
1223
- * Returns an error if the config is invalid (empty name, empty base_url, or
1224
- * no model prefixes).
1225
- */
1226
- export declare function registerCustomProvider(config: CustomProviderConfig): void;
1222
+ /** A document to be reranked — either a plain string or an object with a text field. */
1223
+ export declare enum RerankDocument {
1224
+ /** Plain text document content. */
1225
+ Text = "Text",
1226
+ /** Document with explicit text field (may include metadata). */
1227
+ Object = "Object",
1228
+ }
1227
1229
 
1228
1230
  /** Request to rerank documents by relevance to a query. */
1229
1231
  export interface RerankRequest {
1230
1232
  /** Model ID (e.g., `"cohere/rerank-english-v3.0"`). */
1231
- model?: string;
1233
+ readonly model?: string;
1232
1234
  /** The search query. */
1233
- query?: string;
1235
+ readonly query?: string;
1234
1236
  /** Documents to rerank. */
1235
- documents?: Array<JsRerankDocument>;
1237
+ readonly documents?: Array<RerankDocument>;
1236
1238
  /** Return only the top N results. Optional. */
1237
- topN?: number;
1239
+ readonly topN?: number;
1238
1240
  /** Include the document content in results. Defaults to false. */
1239
- returnDocuments?: boolean;
1241
+ readonly returnDocuments?: boolean;
1240
1242
  }
1241
1243
 
1242
1244
  /** Response from the rerank endpoint. */
1243
1245
  export interface RerankResponse {
1244
1246
  /** Unique identifier for this rerank request. */
1245
- id?: string;
1247
+ readonly id?: string;
1246
1248
  /** Reranked documents in order of relevance. */
1247
- results: Array<JsRerankResult>;
1249
+ readonly results: Array<RerankResult>;
1248
1250
  /** Optional metadata about the reranking operation. */
1249
- meta?: any;
1251
+ readonly meta?: JsonValue;
1250
1252
  }
1251
1253
 
1252
1254
  /** A single reranked document with its relevance score. */
1253
1255
  export interface RerankResult {
1254
1256
  /** Original document index in the input list. */
1255
- index: number;
1257
+ readonly index: number;
1256
1258
  /** Relevance score in `[0, 1]`. Higher indicates more relevant. */
1257
- relevanceScore: number;
1259
+ readonly relevanceScore: number;
1258
1260
  /** Original document content (if `return_documents` was true). */
1259
- document?: JsRerankResultDocument;
1261
+ readonly document?: RerankResultDocument;
1260
1262
  }
1261
1263
 
1262
1264
  /** The text content of a reranked document, returned when `return_documents` is true. */
1263
1265
  export interface RerankResultDocument {
1264
1266
  /** Document text. */
1265
- text: string;
1267
+ readonly text: string;
1266
1268
  }
1267
1269
 
1268
1270
  /** Response format constraint. */
1269
- export interface ResponseFormat {
1270
- type: string;
1271
- jsonSchema?: JsonSchemaFormat;
1272
- }
1271
+ export type ResponseFormat =
1272
+ | { type: "text" }
1273
+ | { type: "json_object" }
1274
+ | { type: "json_schema"; jsonSchema: JsonSchemaFormat };
1273
1275
 
1274
1276
  /** Response from a structured response request. */
1275
1277
  export interface ResponseObject {
1276
1278
  /** Unique response ID. */
1277
- id?: string;
1279
+ readonly id?: string;
1278
1280
  /** Object type (e.g., `"response"`). */
1279
- object?: string;
1281
+ readonly object?: string;
1280
1282
  /** Unix timestamp of response creation. */
1281
- createdAt?: number;
1283
+ readonly createdAt?: number;
1282
1284
  /** Model used to generate the response. */
1283
- model?: string;
1285
+ readonly model?: string;
1284
1286
  /** Status (e.g., `"succeeded"`, `"failed"`). */
1285
- status?: string;
1287
+ readonly status?: string;
1286
1288
  /** Output items from the response. */
1287
- output?: Array<JsResponseOutputItem>;
1289
+ readonly output?: Array<ResponseOutputItem>;
1288
1290
  /** Token usage. */
1289
- usage?: JsResponseUsage;
1291
+ readonly usage?: ResponseUsage;
1290
1292
  /** Error details (if status is "failed"). */
1291
- error?: any;
1293
+ readonly error?: JsonValue;
1292
1294
  }
1293
1295
 
1294
1296
  /** A single output item from the response. */
1295
1297
  export interface ResponseOutputItem {
1296
1298
  /** Output type (e.g., `"text"`, `"object"`, `"error"`). */
1297
- type?: string;
1299
+ readonly itemType?: string;
1298
1300
  /** Output content (flattened into the object). */
1299
- content?: any;
1301
+ readonly content?: JsonValue;
1300
1302
  }
1301
1303
 
1302
1304
  /** A tool available for the response request. */
1303
1305
  export interface ResponseTool {
1304
1306
  /** Tool type (e.g., "extractor", "search"). */
1305
- type?: string;
1307
+ readonly toolType?: string;
1306
1308
  /** Tool configuration (flattened into the object). */
1307
- config?: any;
1309
+ readonly config?: JsonValue;
1308
1310
  }
1309
1311
 
1310
1312
  /** Token usage for a response. */
1311
1313
  export interface ResponseUsage {
1312
1314
  /** Input tokens used. */
1313
- inputTokens?: number;
1315
+ readonly inputTokens?: number;
1314
1316
  /** Output tokens used. */
1315
- outputTokens?: number;
1317
+ readonly outputTokens?: number;
1316
1318
  /** Total tokens used. */
1317
- totalTokens?: number;
1319
+ readonly totalTokens?: number;
1318
1320
  }
1319
1321
 
1320
1322
  /** A search request. */
1321
1323
  export interface SearchRequest {
1322
1324
  /** The model/provider to use (e.g. `"brave/web-search"`, `"tavily/search"`). */
1323
- model?: string;
1325
+ readonly model?: string;
1324
1326
  /** The search query string. */
1325
- query?: string;
1327
+ readonly query?: string;
1326
1328
  /** Maximum number of results to return. */
1327
- maxResults?: number;
1329
+ readonly maxResults?: number;
1328
1330
  /** Domain filter — restrict results to specific domains. */
1329
- searchDomainFilter?: Array<string>;
1331
+ readonly searchDomainFilter?: Array<string>;
1330
1332
  /** Country code for localized results (ISO 3166-1 alpha-2, e.g., `"US"`, `"FR"`). */
1331
- country?: string;
1333
+ readonly country?: string;
1332
1334
  }
1333
1335
 
1334
1336
  /** A search response. */
1335
1337
  export interface SearchResponse {
1336
1338
  /** List of search results. */
1337
- results: Array<JsSearchResult>;
1339
+ readonly results: Array<SearchResult>;
1338
1340
  /** Model/provider that performed the search. */
1339
- model: string;
1341
+ readonly model: string;
1340
1342
  }
1341
1343
 
1342
1344
  /** An individual search result. */
1343
1345
  export interface SearchResult {
1344
1346
  /** Result title. */
1345
- title: string;
1347
+ readonly title: string;
1346
1348
  /** Result URL. */
1347
- url: string;
1349
+ readonly url: string;
1348
1350
  /** Text snippet or excerpt from the page. */
1349
- snippet: string;
1351
+ readonly snippet: string;
1350
1352
  /** Publication or last-updated date, if available. */
1351
- date?: string;
1353
+ readonly date?: string;
1352
1354
  }
1353
1355
 
1356
+ /**
1357
+ * The value broadcast from a singleflight leader to all followers.
1358
+ *
1359
+ * `Arc<LiterLlmError>` is used because `LiterLlmError` is not `Clone` and
1360
+ * broadcast channels require `T: Clone`. The `Arc` adds only a reference-count
1361
+ * bump per follower, which is negligible under the burst loads this layer targets.
1362
+ */
1363
+ export declare class SingleflightResult {}
1364
+
1354
1365
  /** Name of the specific function to invoke. */
1355
1366
  export interface SpecificFunction {
1356
1367
  /** Function name. */
1357
- name?: string;
1368
+ readonly name?: string;
1358
1369
  }
1359
1370
 
1360
1371
  /** Directive to call a specific tool. */
1361
1372
  export interface SpecificToolChoice {
1362
1373
  /** Tool type (always "function"). */
1363
- type?: JsToolType;
1374
+ readonly choiceType?: ToolType;
1364
1375
  /** The specific function to invoke. */
1365
- function?: JsSpecificFunction;
1376
+ readonly function?: SpecificFunction;
1377
+ }
1378
+
1379
+ /** Stop sequence(s) that cause the model to stop generating. */
1380
+ export declare enum StopSequence {
1381
+ /** Single stop sequence. */
1382
+ Single = "Single",
1383
+ /** Multiple stop sequences. */
1384
+ Multiple = "Multiple",
1366
1385
  }
1367
1386
 
1368
1387
  /** A streaming choice with incremental delta. */
1369
1388
  export interface StreamChoice {
1370
1389
  /** Index of this choice in the choices array. */
1371
- index?: number;
1390
+ readonly index?: number;
1372
1391
  /** Incremental update to the message (content, tool calls, etc.). */
1373
- delta?: JsStreamDelta;
1392
+ readonly delta?: StreamDelta;
1374
1393
  /** Why the stream ended (present only in final chunk). */
1375
- finishReason?: JsFinishReason;
1394
+ readonly finishReason?: FinishReason;
1376
1395
  }
1377
1396
 
1378
1397
  /** Incremental delta in a stream chunk. */
1379
1398
  export interface StreamDelta {
1380
1399
  /** Role (typically present only in the first chunk). */
1381
- role?: string;
1400
+ readonly role?: string;
1382
1401
  /** Partial content chunk (e.g., a few words of the response). */
1383
- content?: string;
1402
+ readonly content?: string;
1384
1403
  /** Partial tool calls being streamed. */
1385
- toolCalls?: Array<JsStreamToolCall>;
1404
+ readonly toolCalls?: Array<StreamToolCall>;
1386
1405
  /** Deprecated legacy function_call delta; retained for API compatibility. */
1387
- functionCall?: JsStreamFunctionCall;
1406
+ readonly functionCall?: StreamFunctionCall;
1388
1407
  /** Partial refusal message. */
1389
- refusal?: string;
1408
+ readonly refusal?: string;
1390
1409
  }
1391
1410
 
1392
1411
  /**
@@ -1395,9 +1414,9 @@ export interface StreamDelta {
1395
1414
  * Most providers use standard Server-Sent Events (SSE). AWS Bedrock uses
1396
1415
  * a proprietary binary EventStream framing.
1397
1416
  *
1398
- * Deserialized from the `streaming_format` JSON field via `serde`.
1417
+ * Deserialized from the `streaming_format` JSON field via [`serde`].
1399
1418
  */
1400
- export declare const enum StreamFormat {
1419
+ export declare enum StreamFormat {
1401
1420
  /** Standard Server-Sent Events (text/event-stream). */
1402
1421
  Sse = "sse",
1403
1422
  /** AWS EventStream binary framing (application/vnd.amazon.eventstream). */
@@ -1407,49 +1426,57 @@ export declare const enum StreamFormat {
1407
1426
  /** Partial function call details in a stream. */
1408
1427
  export interface StreamFunctionCall {
1409
1428
  /** Function name (typically in the first chunk). */
1410
- name?: string;
1429
+ readonly name?: string;
1411
1430
  /** Partial JSON arguments chunk. */
1412
- arguments?: string;
1431
+ readonly arguments?: string;
1413
1432
  }
1414
1433
 
1415
1434
  /** Options for streaming responses. */
1416
1435
  export interface StreamOptions {
1417
1436
  /** If true, include token usage in the final stream chunk. */
1418
- includeUsage?: boolean;
1437
+ readonly includeUsage?: boolean;
1419
1438
  }
1420
1439
 
1421
1440
  /** A streaming tool call being built incrementally. */
1422
1441
  export interface StreamToolCall {
1423
1442
  /** Index of this tool call in the tool_calls array. */
1424
- index?: number;
1443
+ readonly index?: number;
1425
1444
  /** Tool call ID (typically in the first chunk for this call). */
1426
- id?: string;
1445
+ readonly id?: string;
1427
1446
  /** Tool type (typically "function"). */
1428
- type?: JsToolType;
1447
+ readonly callType?: ToolType;
1429
1448
  /** Partial function name and arguments. */
1430
- function?: JsStreamFunctionCall;
1449
+ readonly function?: StreamFunctionCall;
1431
1450
  }
1432
1451
 
1433
1452
  /** System message guiding model behavior for the entire conversation. */
1434
1453
  export interface SystemMessage {
1435
1454
  /** Instructions or context that apply throughout the conversation. */
1436
- content?: string;
1455
+ readonly content?: string;
1437
1456
  /** Optional name for the system message source. */
1438
- name?: string;
1457
+ readonly name?: string;
1439
1458
  }
1440
1459
 
1441
1460
  /** A tool call the model wants to execute. */
1442
1461
  export interface ToolCall {
1443
1462
  /** Unique ID for this call, used to reference in tool result messages. */
1444
- id: string;
1463
+ readonly id: string;
1445
1464
  /** Tool type (always "function"). */
1446
- type: JsToolType;
1465
+ readonly callType: ToolType;
1447
1466
  /** Function name and arguments. */
1448
- function: JsFunctionCall;
1467
+ readonly function: FunctionCall;
1468
+ }
1469
+
1470
+ /** Tool usage mode or a specific tool to call. */
1471
+ export declare enum ToolChoice {
1472
+ /** Predefined mode: auto, required, or none. */
1473
+ Mode = "Mode",
1474
+ /** Force a specific tool to be called. */
1475
+ Specific = "Specific",
1449
1476
  }
1450
1477
 
1451
1478
  /** Tool choice mode. */
1452
- export declare const enum ToolChoiceMode {
1479
+ export declare enum ToolChoiceMode {
1453
1480
  /** Model may or may not call tools; default behavior. */
1454
1481
  Auto = "auto",
1455
1482
  /** Model must call at least one tool. */
@@ -1461,11 +1488,11 @@ export declare const enum ToolChoiceMode {
1461
1488
  /** Tool execution result returned to the model. */
1462
1489
  export interface ToolMessage {
1463
1490
  /** Result of the tool execution. */
1464
- content?: string;
1491
+ readonly content?: string;
1465
1492
  /** ID of the tool call this result responds to. */
1466
- toolCallId?: string;
1493
+ readonly toolCallId?: string;
1467
1494
  /** Optional tool/function name. */
1468
- name?: string;
1495
+ readonly name?: string;
1469
1496
  }
1470
1497
 
1471
1498
  /**
@@ -1475,68 +1502,64 @@ export interface ToolMessage {
1475
1502
  * that constraint at the type level and rejects any other value on
1476
1503
  * deserialization.
1477
1504
  */
1478
- export declare const enum ToolType {
1505
+ export declare enum ToolType {
1479
1506
  Function = "function",
1480
1507
  }
1481
1508
 
1482
1509
  /** Response from a transcription request. */
1483
1510
  export interface TranscriptionResponse {
1484
1511
  /** The transcribed text. */
1485
- text?: string;
1512
+ readonly text?: string;
1486
1513
  /** Detected language (ISO-639-1 code). */
1487
- language?: string;
1514
+ readonly language?: string;
1488
1515
  /** Total audio duration in seconds. */
1489
- duration?: number;
1516
+ readonly duration?: number;
1490
1517
  /** Detailed segment-level transcription (if response_format is "verbose_json"). */
1491
- segments?: Array<JsTranscriptionSegment>;
1518
+ readonly segments?: Array<TranscriptionSegment>;
1492
1519
  }
1493
1520
 
1494
1521
  /** A segment of transcribed audio with timing information. */
1495
1522
  export interface TranscriptionSegment {
1496
1523
  /** Segment index (0-based). */
1497
- id?: number;
1524
+ readonly id?: number;
1498
1525
  /** Start time in seconds. */
1499
- start?: number;
1526
+ readonly start?: number;
1500
1527
  /** End time in seconds. */
1501
- end?: number;
1528
+ readonly end?: number;
1502
1529
  /** Transcribed text for this segment. */
1503
- text?: string;
1530
+ readonly text?: string;
1504
1531
  }
1505
1532
 
1506
- /**
1507
- * Remove a previously registered custom provider by name.
1508
- *
1509
- * Returns `true` if a provider with the given name was found and removed,
1510
- * `false` if no such provider existed.
1511
- *
1512
- * # Errors
1513
- *
1514
- * Returns an error only if the internal lock is poisoned.
1515
- */
1516
- export declare function unregisterCustomProvider(name: string): boolean;
1517
-
1518
1533
  /** Token-usage accounting returned by the provider on each completion / embedding call. */
1519
1534
  export interface Usage {
1520
1535
  /** Prompt tokens used. Defaults to 0 when absent (some providers omit this). */
1521
- promptTokens?: number;
1536
+ readonly promptTokens?: number;
1522
1537
  /** Completion tokens used. Defaults to 0 when absent (e.g. embedding responses). */
1523
- completionTokens?: number;
1538
+ readonly completionTokens?: number;
1524
1539
  /** Total tokens used. Defaults to 0 when absent (some providers omit this). */
1525
- totalTokens?: number;
1540
+ readonly totalTokens?: number;
1526
1541
  /**
1527
1542
  * Breakdown of tokens used in the prompt, including cached tokens served
1528
1543
  * at the provider's discounted cache-read rate. Absent when the provider
1529
1544
  * does not return prompt-token details.
1530
1545
  */
1531
- promptTokensDetails?: JsPromptTokensDetails;
1546
+ readonly promptTokensDetails?: PromptTokensDetails;
1547
+ }
1548
+
1549
+ /** User message content as either plain text or a list of multimodal parts. */
1550
+ export declare enum UserContent {
1551
+ /** Plain text content. */
1552
+ Text = "Text",
1553
+ /** Array of content parts (text, images, documents, audio). */
1554
+ Parts = "Parts",
1532
1555
  }
1533
1556
 
1534
1557
  /** User message in the conversation. */
1535
1558
  export interface UserMessage {
1536
1559
  /** Message content as plain text or array of content parts (text, images, documents, audio). */
1537
- content?: JsUserContent;
1560
+ readonly content?: UserContent;
1538
1561
  /** Optional name for the user. */
1539
- name?: string;
1562
+ readonly name?: string;
1540
1563
  }
1541
1564
 
1542
1565
  /**
@@ -1547,13 +1570,41 @@ export interface UserMessage {
1547
1570
  */
1548
1571
  export interface WaitForBatchConfig {
1549
1572
  /** Initial interval between polls, in seconds. */
1550
- initialIntervalSecs?: number;
1573
+ readonly initialIntervalSecs?: number;
1551
1574
  /** Maximum interval between polls (backoff plateau), in seconds. */
1552
- maxIntervalSecs?: number;
1575
+ readonly maxIntervalSecs?: number;
1553
1576
  /** Exponential backoff multiplier (e.g., 1.5 increases delay by 50% each poll). */
1554
- backoffMultiplier?: number;
1577
+ readonly backoffMultiplier?: number;
1555
1578
  /** Optional timeout in seconds — polling fails if this duration is exceeded. */
1556
- timeoutSecs?: number;
1579
+ readonly timeoutSecs?: number;
1580
+ }
1581
+
1582
+ /**
1583
+ * Register a custom provider in the global runtime registry.
1584
+ *
1585
+ * The provider will be checked **before** all built-in providers during model
1586
+ * detection. If a provider with the same `name` already exists it is replaced.
1587
+ * @throws Returns an error if the config is invalid (empty name, empty base_url, or
1588
+ * no model prefixes).
1589
+ */
1590
+ export declare function registerCustomProvider(config: CustomProviderConfig): void;
1591
+
1592
+ /**
1593
+ * Remove a previously registered custom provider by name.
1594
+ *
1595
+ * Returns `true` if a provider with the given name was found and removed,
1596
+ * `false` if no such provider existed.
1597
+ * @throws Returns an error only if the internal lock is poisoned.
1598
+ */
1599
+ export declare function unregisterCustomProvider(name: string): boolean;
1600
+
1601
+ export declare class ChatStreamIterator {
1602
+ next(value?: undefined): Promise<IteratorResult<ChatCompletionChunk, void>>;
1603
+ [Symbol.asyncIterator](): AsyncGenerator<ChatCompletionChunk, void, undefined>;
1557
1604
  }
1558
1605
 
1559
- export declare function waitForBatchConfigDefault(): WaitForBatchConfig;
1606
+ export declare class LiterLlmErrorInfo {
1607
+ statusCode(): number;
1608
+ isTransient(): boolean;
1609
+ errorType(): string;
1610
+ }