@kreuzberg/liter-llm-node 1.5.1 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.d.ts CHANGED
@@ -1,155 +1,139 @@
1
- // This file is auto-generated by alef — DO NOT EDIT.
2
- // alef:hash:fe64a8a06beeb01b5344005fa07dcdfdf3e244d772ace0f89dbba84b0541ca2d
3
- // To regenerate: alef generate
4
- // To verify freshness: alef verify --exit-code
1
+ /* auto-generated by NAPI-RS */
5
2
  /* eslint-disable */
6
-
7
- export type JsonValue = string | number | boolean | null | JsonValue[] | { [key: string]: JsonValue };
8
-
9
- /**
10
- * Return all provider configs from the registry.
11
- *
12
- * Useful for tooling, documentation generation, or runtime enumeration.
13
- */
14
- export declare function allProviders(): Array<ProviderConfig>;
15
-
16
- /**
17
- * Calculate the estimated cost of a completion given a model name and token
18
- * counts.
19
- *
20
- * Returns `None` if the model is not present in the embedded pricing registry.
21
- * Returns `Some(cost_usd)` otherwise, where the value is in US dollars.
22
- *
23
- * When an exact model name match is not found, progressively shorter prefixes
24
- * are tried by stripping from the last `-` or `.` separator. For example,
25
- * `gpt-4-0613` will match `gpt-4` if no `gpt-4-0613` entry exists.
26
- */
27
- export declare function completionCost(model: string, promptTokens: number, completionTokens: number): number | null;
28
-
29
3
  /**
30
- * Calculate the estimated cost of a completion, accounting for cached
31
- * (cache-hit) prompt tokens billed at the provider's discounted rate.
32
- *
33
- * `cached_tokens` is the count of prompt tokens served from the provider's
34
- * prompt cache. It must be `<= prompt_tokens` (cached tokens are a subset of
35
- * the prompt). The non-cached portion is billed at `input_cost_per_token`
36
- * and the cached portion at `cache_read_input_token_cost` when the model
37
- * has cache pricing; otherwise the entire prompt is billed at the regular
38
- * input rate.
4
+ * This type implements JavaScript's async iterable protocol.
5
+ * It can be used with `for await...of` loops.
39
6
  *
40
- * Returns `None` if the model is not present in the embedded pricing
41
- * registry, mirroring [`completion_cost`].
7
+ * @see https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Iteration_protocols#the_async_iterator_and_async_iterable_protocols
42
8
  */
43
- export declare function completionCostWithCache(model: string, promptTokens: number, cachedTokens: number, completionTokens: number): number | null;
9
+ export declare class ChatStreamIterator {}
44
10
 
45
11
  /**
46
- * Return the set of complex provider names.
47
- *
48
- * Complex providers require custom auth/routing logic beyond simple bearer
49
- * tokens (e.g. AWS Bedrock SigV4, Vertex AI OAuth2).
12
+ * Default client implementation backed by `reqwest`.
50
13
  *
51
- * The returned reference points into the static registry no allocation.
52
- */
53
- export declare function complexProviderNames(): Array<string>;
54
-
55
- /**
56
- * Count tokens for a full [`ChatCompletionRequest`].
14
+ * Sends requests to 140+ LLM providers with automatic provider detection
15
+ * and per-request routing. The provider is resolved at construction time
16
+ * from `model_hint` (or defaults to OpenAI), but individual requests can
17
+ * override the provider via model name prefix (e.g. `"anthropic/claude-3-5-sonnet"`
18
+ * routes to Anthropic regardless of construction-time setting).
57
19
  *
58
- * Sums tokens across all message text contents plus a per-message overhead
59
- * of ~4 tokens (for role, separators, and formatting metadata). Tool
60
- * definitions and multimodal content parts (images, audio, documents) are
61
- * not counted — only textual content contributes to the token total.
62
- * @throws Returns [`LiterLlmError::BadRequest`] if the tokenizer cannot be loaded or
63
- * if tokenization fails for any message.
64
- */
65
- export declare function countRequestTokens(model: string, req?: ChatCompletionRequest | undefined | null): number;
66
-
67
- /**
68
- * Count tokens in a text string using the tokenizer for the given model.
20
+ * When the model prefix does not match any known provider, the construction-time
21
+ * provider is used as the fallback. This enables seamless migration between
22
+ * providers by changing only the model name.
69
23
  *
70
- * The tokenizer is resolved from the model name prefix (e.g. `"gpt-4o"` maps
71
- * to the `Xenova/gpt-4o` HuggingFace tokenizer). Tokenizers are cached after
72
- * first load.
73
- * @throws Returns [`LiterLlmError::BadRequest`] if the tokenizer cannot be loaded
74
- * (e.g. network failure on first use) or if tokenization itself fails.
24
+ * The provider is stored behind an `Arc` so it can be shared cheaply into
25
+ * async closures and streaming tasks. Pre-computed auth headers and extra
26
+ * headers are cached at construction to avoid redundant encoding on every request.
75
27
  */
76
- export declare function countTokens(model: string, text: string): number;
28
+ export declare class DefaultClient {
29
+ chat(req: ChatCompletionRequest): Promise<ChatCompletionResponse>;
30
+ chatStream(req: ChatCompletionRequest): Promise<ChatStreamIterator>;
31
+ embed(req: EmbeddingRequest): Promise<EmbeddingResponse>;
32
+ listModels(): Promise<ModelsListResponse>;
33
+ imageGenerate(req: CreateImageRequest): Promise<ImagesResponse>;
34
+ speech(req: CreateSpeechRequest): Promise<Buffer>;
35
+ transcribe(req: CreateTranscriptionRequest): Promise<TranscriptionResponse>;
36
+ moderate(req: ModerationRequest): Promise<ModerationResponse>;
37
+ rerank(req: RerankRequest): Promise<RerankResponse>;
38
+ search(req: SearchRequest): Promise<SearchResponse>;
39
+ ocr(req: OcrRequest): Promise<OcrResponse>;
40
+ createFile(req: CreateFileRequest): Promise<FileObject>;
41
+ retrieveFile(fileId: string): Promise<FileObject>;
42
+ deleteFile(fileId: string): Promise<DeleteResponse>;
43
+ listFiles(query?: FileListQuery | undefined | null): Promise<FileListResponse>;
44
+ fileContent(fileId: string): Promise<Buffer>;
45
+ createBatch(req: CreateBatchRequest): Promise<BatchObject>;
46
+ retrieveBatch(batchId: string): Promise<BatchObject>;
47
+ listBatches(query?: BatchListQuery | undefined | null): Promise<BatchListResponse>;
48
+ cancelBatch(batchId: string): Promise<BatchObject>;
49
+ fetchBatchForPolling(batchId: string): Promise<BatchObject>;
50
+ /**
51
+ * Poll a batch until it reaches a terminal status (Completed, Failed, Expired, Cancelled).
52
+ *
53
+ * Uses exponential backoff with configurable initial interval, maximum interval, and backoff multiplier.
54
+ * Optionally supports a timeout that aborts polling if exceeded.
55
+ *
56
+ * # Errors
57
+ *
58
+ * Returns `BatchWaitError.Failed` if the batch reaches a failure terminal status.
59
+ * Returns `BatchWaitError.Timeout` if the configured timeout is exceeded.
60
+ * Returns `BatchWaitError.Client` for underlying client errors.
61
+ *
62
+ * # Example
63
+ */
64
+ waitForBatch(batchId: string, config: WaitForBatchConfig): Promise<BatchObject>;
65
+ createResponse(req: CreateResponseRequest): Promise<ResponseObject>;
66
+ retrieveResponse(responseId: string): Promise<ResponseObject>;
67
+ cancelResponse(responseId: string): Promise<ResponseObject>;
68
+ }
69
+ export type JsDefaultClient = DefaultClient;
77
70
 
78
- /**
79
- * Create a new LLM client with simple scalar configuration.
80
- *
81
- * This is the primary binding entry-point. All parameters except `api_key`
82
- * are optional omitting them uses the same defaults as
83
- * [`ClientConfigBuilder`].
84
- * @throws Returns [`LiterLlmError`] if the underlying HTTP client cannot be
85
- * constructed, or if the resolved provider configuration is invalid.
86
- */
87
- export declare function createClient(apiKey: string, baseUrl?: string | undefined | null, timeoutSecs?: number | undefined | null, maxRetries?: number | undefined | null, modelHint?: string | undefined | null): DefaultClient;
71
+ export declare class JsLiterLlmErrorInfo {
72
+ statusCode: number;
73
+ isTransient: boolean;
74
+ errorType: string;
75
+ /** HTTP status code for this error (0 means no associated status). */
76
+ statusCode(): number;
77
+ /** Returns `true` if the error is transient and a retry may succeed. */
78
+ isTransient(): boolean;
79
+ /** Machine-readable error category string for matching and logging. */
80
+ errorType(): string;
81
+ }
88
82
 
89
83
  /**
90
- * Create a new LLM client from a JSON string.
84
+ * The value broadcast from a singleflight leader to all followers.
91
85
  *
92
- * The JSON object accepts the same fields as `liter-llm.toml` (snake_case).
93
- * @throws Returns [`LiterLlmError::BadRequest`] if `json` is not valid JSON or
94
- * contains unknown fields.
86
+ * `Arc<LiterLlmError>` is used because `LiterLlmError` is not `Clone` and
87
+ * broadcast channels require `T: Clone`. The `Arc` adds only a reference-count
88
+ * bump per follower, which is negligible under the burst loads this layer targets.
95
89
  */
96
- export declare function createClientFromJson(json: string): DefaultClient;
90
+ export declare class SingleflightResult {}
91
+ export type JsSingleflightResult = SingleflightResult;
97
92
 
98
93
  /**
99
- * Install the `ring` crypto provider as the rustls process default, idempotently.
100
- *
101
- * rustls 0.23+ removed the implicit default provider. This function installs
102
- * `ring` once per process. Subsequent calls are no-ops. Calling it from a
103
- * downstream Rust app that has already installed `aws-lc-rs` is safe — the
104
- * `Err` from `install_default()` is silently ignored.
105
- *
106
- * Called automatically by every internal `reqwest::Client` constructor
107
- * (auth providers, default HTTP client). Bindings and downstream consumers
108
- * reach those constructors transitively, so no manual init is required.
109
- *
110
- * WASM builds are exempt — the WASM target uses the browser/Node.js fetch
111
- * API instead of rustls, so no crypto provider is needed.
94
+ * Return all provider configs from the registry.
112
95
  *
113
- * Windows builds use native-tls (SChannel) via reqwest, so rustls is not
114
- * present and no crypto provider installation is needed.
96
+ * Useful for tooling, documentation generation, or runtime enumeration.
97
+ * Returns the public `ProviderConfig` slice (without capability flags).
98
+ * To query capability flags for a specific provider use `capabilities`.
115
99
  */
116
- export declare function ensureCryptoProvider(): void;
100
+ export declare function allProviders(): Array<ProviderConfig>;
117
101
 
118
102
  /** Assistant's response to a user message. */
119
103
  export interface AssistantMessage {
120
104
  /** The assistant's text response. Absent if tool calls are returned instead. */
121
- readonly content?: string
105
+ content?: string;
122
106
  /** Optional name for the assistant. */
123
- readonly name?: string
107
+ name?: string;
124
108
  /** Tool calls the model wants to execute, if any. */
125
- readonly toolCalls?: Array<ToolCall>
109
+ toolCalls?: Array<JsToolCall>;
126
110
  /** Refusal reason, if the model declined to respond per safety policies. */
127
- readonly refusal?: string
111
+ refusal?: string;
128
112
  /** Deprecated legacy function_call field; retained for API compatibility. */
129
- readonly functionCall?: FunctionCall
113
+ functionCall?: JsFunctionCall;
130
114
  }
131
115
 
132
116
  /** Audio content part for speech-capable models. */
133
117
  export interface AudioContent {
134
118
  /** Base64-encoded audio data. */
135
- readonly data?: string
119
+ data?: string;
136
120
  /** Audio format (e.g., "wav", "mp3", "ogg"). */
137
- readonly format?: string
121
+ format?: string;
138
122
  }
139
123
 
140
124
  /** Auth configuration block. */
141
125
  export interface AuthConfig {
142
126
  /** Auth scheme classification. */
143
- readonly authType: AuthType
127
+ type: JsAuthType;
144
128
  /**
145
129
  * Name of the environment variable that holds the API key (e.g. `"OPENAI_API_KEY"`).
146
130
  * Holds the variable name, never the secret value.
147
131
  */
148
- readonly envVar?: string
132
+ envVar?: string;
149
133
  }
150
134
 
151
135
  /** How the API key is sent in the HTTP request. */
152
- export declare enum AuthHeaderFormat {
136
+ export declare const enum AuthHeaderFormat {
153
137
  /** Bearer token: `Authorization: Bearer <key>` */
154
138
  Bearer = "Bearer",
155
139
  /** Custom header: e.g., `X-Api-Key: <key>` */
@@ -159,7 +143,7 @@ export declare enum AuthHeaderFormat {
159
143
  }
160
144
 
161
145
  /** Auth scheme used by a provider. */
162
- export declare enum AuthType {
146
+ export declare const enum AuthType {
163
147
  /** Standard `Authorization: Bearer <key>` header. */
164
148
  Bearer = "bearer",
165
149
  /** `x-api-key: <key>` header (also handles `"header"` and `"x-api-key"` aliases). */
@@ -173,69 +157,69 @@ export declare enum AuthType {
173
157
  /** Query parameters for listing batches. */
174
158
  export interface BatchListQuery {
175
159
  /** Maximum number of results to return. Defaults to 20. */
176
- readonly limit?: number
160
+ limit?: number;
177
161
  /** Pagination cursor: return results after this batch ID. */
178
- readonly after?: string
162
+ after?: string;
179
163
  }
180
164
 
181
165
  /** Response from listing batches. */
182
166
  export interface BatchListResponse {
183
167
  /** Object type (always `"list"`). */
184
- readonly object?: string
168
+ object?: string;
185
169
  /** List of batch objects. */
186
- readonly data?: Array<BatchObject>
170
+ data?: Array<BatchObject>;
187
171
  /** Whether more results are available. */
188
- readonly hasMore?: boolean
172
+ hasMore?: boolean;
189
173
  /** First batch ID in the result set (for pagination). */
190
- readonly firstId?: string
174
+ firstId?: string;
191
175
  /** Last batch ID in the result set (for pagination). */
192
- readonly lastId?: string
176
+ lastId?: string;
193
177
  }
194
178
 
195
179
  /** A batch job object. */
196
180
  export interface BatchObject {
197
181
  /** Unique batch ID. */
198
- readonly id?: string
182
+ id?: string;
199
183
  /** Object type (always `"batch"`). */
200
- readonly object?: string
184
+ object?: string;
201
185
  /** API endpoint (e.g., `"/v1/chat/completions"`). */
202
- readonly endpoint?: string
186
+ endpoint?: string;
203
187
  /** ID of the input file. */
204
- readonly inputFileId?: string
188
+ inputFileId?: string;
205
189
  /** Completion window (e.g., `"24h"`). */
206
- readonly completionWindow?: string
190
+ completionWindow?: string;
207
191
  /** Current job status. */
208
- readonly status?: BatchStatus
192
+ status?: JsBatchStatus;
209
193
  /** ID of the output file (present when completed). */
210
- readonly outputFileId?: string
194
+ outputFileId?: string;
211
195
  /** ID of the error file (present if some requests failed). */
212
- readonly errorFileId?: string
196
+ errorFileId?: string;
213
197
  /** Unix timestamp of batch creation. */
214
- readonly createdAt?: number
198
+ createdAt?: number;
215
199
  /** Unix timestamp of completion (if completed). */
216
- readonly completedAt?: number
200
+ completedAt?: number;
217
201
  /** Unix timestamp of failure (if failed). */
218
- readonly failedAt?: number
202
+ failedAt?: number;
219
203
  /** Unix timestamp of expiration (if expired). */
220
- readonly expiredAt?: number
204
+ expiredAt?: number;
221
205
  /** Request processing counts. */
222
- readonly requestCounts?: BatchRequestCounts
206
+ requestCounts?: JsBatchRequestCounts;
223
207
  /** Metadata attached to the batch. */
224
- readonly metadata?: JsonValue
208
+ metadata?: any;
225
209
  }
226
210
 
227
211
  /** Request processing counts for a batch. */
228
212
  export interface BatchRequestCounts {
229
213
  /** Total requests in the batch. */
230
- readonly total?: number
214
+ total?: number;
231
215
  /** Completed requests. */
232
- readonly completed?: number
216
+ completed?: number;
233
217
  /** Failed requests. */
234
- readonly failed?: number
218
+ failed?: number;
235
219
  }
236
220
 
237
221
  /** Status of a batch job. */
238
- export declare enum BatchStatus {
222
+ export declare const enum BatchStatus {
239
223
  /** Validating the input file. */
240
224
  Validating = "validating",
241
225
  /** Job failed. */
@@ -257,368 +241,494 @@ export declare enum BatchStatus {
257
241
  /** Configuration for budget enforcement. */
258
242
  export interface BudgetConfig {
259
243
  /** Maximum total spend across all models, in USD. `None` means unlimited. */
260
- readonly globalLimit?: number
244
+ globalLimit?: number;
261
245
  /**
262
246
  * Per-model spending limits in USD. Models not listed here are only
263
247
  * constrained by `global_limit`.
264
248
  */
265
- readonly modelLimits?: Record<string, number>
249
+ modelLimits?: Record<string, number>;
266
250
  /** Whether to reject requests or merely warn when a limit is exceeded. */
267
- readonly enforcement?: Enforcement
251
+ enforcement?: JsEnforcement;
268
252
  }
269
253
 
254
+ export declare function budgetConfigDefault(): BudgetConfig;
255
+
270
256
  /** Storage backend for the response cache. */
271
- export type CacheBackend =
272
- | { type: 'memory' }
273
- | { type: 'open_dal'; scheme: string; config: Record<string, string> }
257
+ export interface CacheBackend {
258
+ type: string;
259
+ scheme?: string;
260
+ config?: Record<string, string>;
261
+ }
274
262
 
275
263
  /** Configuration for the response cache. */
276
264
  export interface CacheConfig {
277
265
  /** Maximum number of cached entries. */
278
- readonly maxEntries?: number
266
+ maxEntries?: number;
279
267
  /** Time-to-live for each cached entry. */
280
- readonly ttl?: number
268
+ ttl?: number;
281
269
  /** Storage backend to use. */
282
- readonly backend?: CacheBackend
270
+ backend?: JsCacheBackend;
283
271
  }
284
272
 
273
+ export declare function cacheConfigDefault(): CacheConfig;
274
+
275
+ /**
276
+ * Return the capability flags for a named provider.
277
+ *
278
+ * Performs an O(n) linear scan over the embedded registry (142 entries).
279
+ * Returns an owned value so that bindings can box/copy it across the FFI
280
+ * boundary without dealing with lifetimes. `ProviderCapabilities` is `Copy`,
281
+ * so this is a cheap memcpy of seven `bool` fields.
282
+ *
283
+ * For unknown `provider_name` values the function returns an all-`false`
284
+ * sentinel so callers never need to handle `Option`.
285
+ */
286
+ export declare function capabilities(providerName: string): ProviderCapabilities;
287
+
285
288
  /** A streamed chunk of a chat completion response. */
286
289
  export interface ChatCompletionChunk {
287
290
  /** Unique identifier for this stream. */
288
- readonly id?: string
291
+ id?: string;
289
292
  /**
290
293
  * Always `"chat.completion.chunk"` from OpenAI-compatible APIs. Stored
291
294
  * as a plain `String` so non-standard provider values do not fail parsing.
292
295
  */
293
- readonly object?: string
296
+ object?: string;
294
297
  /** Unix timestamp of chunk creation. */
295
- readonly created?: number
298
+ created?: number;
296
299
  /** Model used to generate the chunk. */
297
- readonly model?: string
300
+ model?: string;
298
301
  /** Streaming choices (delta updates). */
299
- readonly choices?: Array<StreamChoice>
302
+ choices?: Array<JsStreamChoice>;
300
303
  /** Token usage (typically only in the final chunk). */
301
- readonly usage?: Usage
304
+ usage?: Usage;
302
305
  /** Fingerprint of the system configuration (OpenAI-specific). */
303
- readonly systemFingerprint?: string
306
+ systemFingerprint?: string;
304
307
  /** Service tier used (OpenAI-specific). */
305
- readonly serviceTier?: string
308
+ serviceTier?: string;
306
309
  }
307
310
 
308
311
  /** Chat completion request (compatible with OpenAI and similar APIs). */
309
312
  export interface ChatCompletionRequest {
310
313
  /** Model ID (e.g., `"gpt-4o-mini"`, `"claude-3-5-sonnet"`). */
311
- readonly model?: string
314
+ model?: string;
312
315
  /** Conversation history from oldest to newest. */
313
- readonly messages?: Array<Message>
316
+ messages?: Array<JsMessage>;
314
317
  /** Sampling temperature in `[0.0, 2.0]`. Higher increases randomness. Defaults to 1.0. */
315
- readonly temperature?: number
318
+ temperature?: number;
316
319
  /** Nucleus sampling parameter in `[0.0, 1.0]`. Lower is more focused. */
317
- readonly topP?: number
320
+ topP?: number;
318
321
  /** Number of chat completions to generate. Defaults to 1. */
319
- readonly n?: number
322
+ n?: number;
320
323
  /**
321
324
  * Whether to stream the response.
322
325
  *
323
326
  * Managed by the client layer — do not set directly.
324
327
  */
325
- readonly stream?: boolean
328
+ stream?: boolean;
326
329
  /** Stop sequence(s) that halt token generation. */
327
- readonly stop?: StopSequence
330
+ stop?: JsStopSequence;
328
331
  /** Max output tokens. Different from max_completion_tokens in some providers. */
329
- readonly maxTokens?: number
332
+ maxTokens?: number;
330
333
  /** Presence penalty in `[-2.0, 2.0]`. Positive discourages repeated topics. */
331
- readonly presencePenalty?: number
334
+ presencePenalty?: number;
332
335
  /** Frequency penalty in `[-2.0, 2.0]`. Positive discourages repeated tokens. */
333
- readonly frequencyPenalty?: number
336
+ frequencyPenalty?: number;
334
337
  /**
335
338
  * Token bias map. Uses `BTreeMap` (sorted keys) for deterministic
336
339
  * serialization order — important when hashing or signing requests.
337
340
  */
338
- readonly logitBias?: Record<string, number>
341
+ logitBias?: Record<string, number>;
339
342
  /** User identifier for request tracking and abuse detection. */
340
- readonly user?: string
343
+ user?: string;
341
344
  /** Tools the model can invoke. */
342
- readonly tools?: Array<ChatCompletionTool>
345
+ tools?: Array<ChatCompletionTool>;
343
346
  /** Tool usage mode (auto, required, none, or specific tool). */
344
- readonly toolChoice?: ToolChoice
347
+ toolChoice?: JsToolChoice;
345
348
  /** Whether the model can call multiple tools in parallel. Defaults to true. */
346
- readonly parallelToolCalls?: boolean
349
+ parallelToolCalls?: boolean;
347
350
  /** Output format constraint (text, JSON, JSON schema). */
348
- readonly responseFormat?: ResponseFormat
351
+ responseFormat?: JsResponseFormat;
349
352
  /** Streaming options (e.g., include_usage). */
350
- readonly streamOptions?: StreamOptions
353
+ streamOptions?: JsStreamOptions;
351
354
  /** Random seed for reproducible outputs. Provider support varies. */
352
- readonly seed?: number
355
+ seed?: number;
353
356
  /** Reasoning effort level (low, medium, high) for extended-thinking models. */
354
- readonly reasoningEffort?: ReasoningEffort
357
+ reasoningEffort?: JsReasoningEffort;
355
358
  /**
356
359
  * Provider-specific extra parameters merged into the request body.
357
360
  * Use for guardrails, safety settings, grounding config, etc.
358
361
  */
359
- readonly extraBody?: JsonValue
362
+ extraBody?: any;
360
363
  }
361
364
 
362
365
  /** Chat completion response from the API. */
363
366
  export interface ChatCompletionResponse {
364
367
  /** Unique identifier for this response. */
365
- readonly id?: string
368
+ id?: string;
366
369
  /**
367
370
  * Always `"chat.completion"` from OpenAI-compatible APIs. Stored as a
368
371
  * plain `String` so non-standard provider values do not break deserialization.
369
372
  */
370
- readonly object?: string
373
+ object?: string;
371
374
  /** Unix timestamp of response creation. */
372
- readonly created?: number
375
+ created?: number;
373
376
  /** Model used to generate the response. */
374
- readonly model?: string
377
+ model?: string;
375
378
  /** List of completion choices. */
376
- readonly choices?: Array<Choice>
379
+ choices?: Array<JsChoice>;
377
380
  /** Token usage statistics. */
378
- readonly usage?: Usage
381
+ usage?: Usage;
379
382
  /** Fingerprint of the system configuration (OpenAI-specific). */
380
- readonly systemFingerprint?: string
383
+ systemFingerprint?: string;
381
384
  /** Service tier used (OpenAI-specific). */
382
- readonly serviceTier?: string
385
+ serviceTier?: string;
383
386
  }
384
387
 
385
388
  /** A tool the model can invoke (currently, all tools are functions). */
386
389
  export interface ChatCompletionTool {
387
390
  /** Tool type (always "function" in OpenAI spec). */
388
- readonly toolType: ToolType
391
+ type: JsToolType;
389
392
  /** Function definition with name, description, and JSON schema parameters. */
390
- readonly function: FunctionDefinition
393
+ function: JsFunctionDefinition;
391
394
  }
392
395
 
396
+ export declare function chatStream(
397
+ engine: DefaultClient,
398
+ model: string,
399
+ ): Promise<ChatStreamIterator>;
400
+
401
+ /**
402
+ * Assert that `current_len + incoming` does not exceed `limit`.
403
+ *
404
+ * Call this before appending `incoming` bytes to any buffer that must
405
+ * stay below `limit`. Returns `Err(LiterLlmError.Streaming)` on overflow
406
+ * and emits a `tracing.warn!` with context.
407
+ *
408
+ * # Example
409
+ */
410
+ export declare function checkBound(
411
+ context: string,
412
+ currentLen: number,
413
+ incoming: number,
414
+ limit: number,
415
+ ): void;
416
+
393
417
  /** A single completion choice. */
394
418
  export interface Choice {
395
419
  /** Index of this choice in the choices array. */
396
- readonly index?: number
420
+ index?: number;
397
421
  /** The assistant's message response. */
398
- readonly message?: AssistantMessage
422
+ message?: AssistantMessage;
399
423
  /** Why the model stopped generating (stop, length, tool_calls, content_filter, etc.). */
400
- readonly finishReason?: FinishReason
424
+ finishReason?: JsFinishReason;
425
+ }
426
+
427
+ /** Observable state of a circuit breaker. */
428
+ export declare const enum CircuitState {
429
+ /** Requests flow through normally. */
430
+ Closed = "Closed",
431
+ /** All requests are rejected; the circuit is waiting for the backoff to elapse. */
432
+ Open = "Open",
433
+ /** One probe request is allowed through to test service health. */
434
+ HalfOpen = "HalfOpen",
401
435
  }
402
436
 
437
+ /**
438
+ * Remove all guardrails from the global registry.
439
+ *
440
+ * Primarily useful in tests to reset state between test cases.
441
+ *
442
+ * # Panics
443
+ *
444
+ * Panics if the global registry lock is poisoned.
445
+ */
446
+ export declare function clear(): void;
447
+
448
+ /**
449
+ * Calculate the estimated cost of a completion given a model name and token
450
+ * counts.
451
+ *
452
+ * Returns `None` if the model is not present in the embedded pricing registry.
453
+ * Returns `Some(cost_usd)` otherwise, where the value is in US dollars.
454
+ *
455
+ * When an exact model name match is not found, progressively shorter prefixes
456
+ * are tried by stripping from the last `-` or `.` separator. For example,
457
+ * `gpt-4-0613` will match `gpt-4` if no `gpt-4-0613` entry exists.
458
+ *
459
+ * # Example
460
+ */
461
+ export declare function completionCost(
462
+ model: string,
463
+ promptTokens: number,
464
+ completionTokens: number,
465
+ ): number | null;
466
+
467
+ /**
468
+ * Calculate the estimated cost of a completion, accounting for cached
469
+ * (cache-hit) prompt tokens billed at the provider's discounted rate.
470
+ *
471
+ * `cached_tokens` is the count of prompt tokens served from the provider's
472
+ * prompt cache. It must be `<= prompt_tokens` (cached tokens are a subset of
473
+ * the prompt). The non-cached portion is billed at `input_cost_per_token`
474
+ * and the cached portion at `cache_read_input_token_cost` when the model
475
+ * has cache pricing; otherwise the entire prompt is billed at the regular
476
+ * input rate.
477
+ *
478
+ * Returns `None` if the model is not present in the embedded pricing
479
+ * registry, mirroring `completion_cost`.
480
+ */
481
+ export declare function completionCostWithCache(
482
+ model: string,
483
+ promptTokens: number,
484
+ cachedTokens: number,
485
+ completionTokens: number,
486
+ ): number | null;
487
+
488
+ /**
489
+ * Return the set of complex provider names.
490
+ *
491
+ * Complex providers require custom auth/routing logic beyond simple bearer
492
+ * tokens (e.g. AWS Bedrock SigV4, Vertex AI OAuth2).
493
+ *
494
+ * The returned reference points into the static registry — no allocation.
495
+ */
496
+ export declare function complexProviderNames(): Array<string>;
497
+
403
498
  /** A single content part in a user message — text, image, document, or audio. */
404
- export type ContentPart =
405
- | { type: 'text'; text: string }
406
- | { type: 'image_url'; imageUrl: ImageUrl }
407
- | { type: 'document'; document: DocumentContent }
408
- | { type: 'input_audio'; inputAudio: AudioContent }
499
+ export interface ContentPart {
500
+ type: string;
501
+ text?: string;
502
+ imageUrl?: ImageUrl;
503
+ document?: DocumentContent;
504
+ inputAudio?: AudioContent;
505
+ }
506
+
507
+ /**
508
+ * Count tokens for a full `ChatCompletionRequest`.
509
+ *
510
+ * Sums tokens across all message text contents plus a per-message overhead
511
+ * of ~4 tokens (for role, separators, and formatting metadata). Tool
512
+ * definitions and multimodal content parts (images, audio, documents) are
513
+ * not counted — only textual content contributes to the token total.
514
+ *
515
+ * # Errors
516
+ *
517
+ * Returns `LiterLlmError.BadRequest` if the tokenizer cannot be loaded or
518
+ * if tokenization fails for any message.
519
+ */
520
+ export declare function countRequestTokens(
521
+ model: string,
522
+ req?: ChatCompletionRequest | undefined | null,
523
+ ): number;
524
+
525
+ /**
526
+ * Count tokens in a text string using the tokenizer for the given model.
527
+ *
528
+ * The tokenizer is resolved from the model name prefix (e.g. `"gpt-4o"` maps
529
+ * to the `Xenova/gpt-4o` HuggingFace tokenizer). Tokenizers are cached after
530
+ * first load.
531
+ *
532
+ * # Errors
533
+ *
534
+ * Returns `LiterLlmError.BadRequest` if the tokenizer cannot be loaded
535
+ * (e.g. network failure on first use) or if tokenization itself fails.
536
+ */
537
+ export declare function countTokens(model: string, text: string): number;
409
538
 
410
539
  /** Request to create a batch job. */
411
540
  export interface CreateBatchRequest {
412
541
  /** ID of the uploaded input file (JSONL format). */
413
- readonly inputFileId?: string
542
+ inputFileId?: string;
414
543
  /** API endpoint (e.g., `"/v1/chat/completions"`). */
415
- readonly endpoint?: string
544
+ endpoint?: string;
416
545
  /** Completion window (e.g., `"24h"`). */
417
- readonly completionWindow?: string
546
+ completionWindow?: string;
418
547
  /** Optional metadata to attach to the batch. */
419
- readonly metadata?: JsonValue
548
+ metadata?: any;
420
549
  }
421
550
 
551
+ /**
552
+ * Create a new LLM client with simple scalar configuration.
553
+ *
554
+ * This is the primary binding entry-point. All parameters except `api_key`
555
+ * are optional — omitting them uses the same defaults as
556
+ * `ClientConfigBuilder`.
557
+ *
558
+ * # Errors
559
+ *
560
+ * Returns `LiterLlmError` if the underlying HTTP client cannot be
561
+ * constructed, or if the resolved provider configuration is invalid.
562
+ */
563
+ export declare function createClient(
564
+ apiKey: string,
565
+ baseUrl?: string | undefined | null,
566
+ timeoutSecs?: number | undefined | null,
567
+ maxRetries?: number | undefined | null,
568
+ modelHint?: string | undefined | null,
569
+ ): DefaultClient;
570
+
571
+ /**
572
+ * Create a new LLM client from a JSON string.
573
+ *
574
+ * The JSON object accepts the same fields as `liter-llm.toml` (snake_case).
575
+ *
576
+ * # Errors
577
+ *
578
+ * Returns `LiterLlmError.BadRequest` if `json` is not valid JSON or
579
+ * contains unknown fields.
580
+ */
581
+ export declare function createClientFromJson(json: string): DefaultClient;
582
+
422
583
  /** Request to upload a file. */
423
584
  export interface CreateFileRequest {
424
585
  /** Base64-encoded file data. */
425
- readonly file?: string
586
+ file?: string;
426
587
  /** Purpose for the file. */
427
- readonly purpose?: FilePurpose
588
+ purpose?: JsFilePurpose;
428
589
  /** Optional filename to associate with the upload. */
429
- readonly filename?: string
590
+ filename?: string;
430
591
  }
431
592
 
432
593
  /** Request to create images from a text prompt. */
433
594
  export interface CreateImageRequest {
434
595
  /** Text description of the image to generate. */
435
- readonly prompt?: string
596
+ prompt?: string;
436
597
  /** Model ID (e.g., `"dall-e-3"`). Optional; API may use default if unset. */
437
- readonly model?: string
598
+ model?: string;
438
599
  /** Number of images to generate. Defaults to 1. */
439
- readonly n?: number
600
+ n?: number;
440
601
  /** Image size (e.g., `"1024x1024"`, `"1792x1024"`). */
441
- readonly size?: string
602
+ size?: string;
442
603
  /** Image quality: `"standard"` or `"hd"`. */
443
- readonly quality?: string
604
+ quality?: string;
444
605
  /** Style: `"natural"` or `"vivid"` (DALL-E 3 only). */
445
- readonly style?: string
606
+ style?: string;
446
607
  /** Response format: `"url"` or `"b64_json"`. */
447
- readonly responseFormat?: string
608
+ responseFormat?: string;
448
609
  /** User identifier for request tracking. */
449
- readonly user?: string
610
+ user?: string;
450
611
  }
451
612
 
452
613
  /** Request to create a structured response. */
453
614
  export interface CreateResponseRequest {
454
615
  /** Model ID. */
455
- readonly model?: string
616
+ model?: string;
456
617
  /** Input data to process (e.g., a document to extract from). */
457
- readonly input?: JsonValue
618
+ input?: any;
458
619
  /** Instructions for processing the input. */
459
- readonly instructions?: string
620
+ instructions?: string;
460
621
  /** Available tools the model can use. */
461
- readonly tools?: Array<ResponseTool>
622
+ tools?: Array<JsResponseTool>;
462
623
  /** Sampling temperature in `[0.0, 2.0]`. Defaults to 1.0. */
463
- readonly temperature?: number
624
+ temperature?: number;
464
625
  /** Maximum output tokens. */
465
- readonly maxOutputTokens?: number
626
+ maxOutputTokens?: number;
466
627
  /** Optional metadata. */
467
- readonly metadata?: JsonValue
628
+ metadata?: any;
468
629
  }
469
630
 
470
631
  /** Request to generate speech audio from text. */
471
632
  export interface CreateSpeechRequest {
472
633
  /** Model ID (e.g., `"tts-1"`, `"tts-1-hd"`). */
473
- readonly model?: string
634
+ model?: string;
474
635
  /** Text to synthesize into speech. */
475
- readonly input?: string
636
+ input?: string;
476
637
  /** Voice name (e.g., `"alloy"`, `"echo"`, `"fable"`, `"onyx"`, `"nova"`, `"shimmer"`). */
477
- readonly voice?: string
638
+ voice?: string;
478
639
  /** Audio format (e.g., `"mp3"`, `"opus"`, `"aac"`, `"flac"`, `"wav"`, `"pcm"`). */
479
- readonly responseFormat?: string
640
+ responseFormat?: string;
480
641
  /** Playback speed in `[0.25, 4.0]`. Defaults to 1.0. */
481
- readonly speed?: number
642
+ speed?: number;
482
643
  }
483
644
 
484
645
  /** Request to transcribe audio into text. */
485
646
  export interface CreateTranscriptionRequest {
486
647
  /** Model ID (e.g., `"whisper-1"`). */
487
- readonly model?: string
648
+ model?: string;
488
649
  /** Base64-encoded audio file data. */
489
- readonly file?: string
650
+ file?: string;
490
651
  /** Language ISO-639-1 code (e.g., `"en"`, `"fr"`, `"de"`). Optional; model auto-detects. */
491
- readonly language?: string
652
+ language?: string;
492
653
  /** Optional text to guide the model (improves accuracy for domain-specific terms). */
493
- readonly prompt?: string
654
+ prompt?: string;
494
655
  /** Output format (e.g., `"json"`, `"text"`, `"vtt"`, `"srt"`, `"verbose_json"`). */
495
- readonly responseFormat?: string
656
+ responseFormat?: string;
496
657
  /** Sampling temperature in `[0.0, 1.0]`. Higher increases variability. Defaults to 0. */
497
- readonly temperature?: number
658
+ temperature?: number;
498
659
  }
499
660
 
500
661
  /** Configuration for registering a custom LLM provider at runtime. */
501
662
  export interface CustomProviderConfig {
502
663
  /** Unique name for this provider (e.g., "my-provider"). */
503
- readonly name: string
664
+ name: string;
504
665
  /** Base URL for the provider's API (e.g., "https://api.my-provider.com/v1"). */
505
- readonly baseUrl: string
666
+ baseUrl: string;
506
667
  /** Authentication header format. */
507
- readonly authHeader: AuthHeaderFormat
668
+ authHeader: JsAuthHeaderFormat;
508
669
  /** Model name prefixes that route to this provider (e.g., `["my-"]`). */
509
- readonly modelPrefixes: Array<string>
510
- }
511
-
512
- /**
513
- * Default client implementation backed by `reqwest`.
514
- *
515
- * Sends requests to 140+ LLM providers with automatic provider detection
516
- * and per-request routing. The provider is resolved at construction time
517
- * from `model_hint` (or defaults to OpenAI), but individual requests can
518
- * override the provider via model name prefix (e.g. `"anthropic/claude-3-5-sonnet"`
519
- * routes to Anthropic regardless of construction-time setting).
520
- *
521
- * When the model prefix does not match any known provider, the construction-time
522
- * provider is used as the fallback. This enables seamless migration between
523
- * providers by changing only the model name.
524
- *
525
- * The provider is stored behind an [`Arc`] so it can be shared cheaply into
526
- * async closures and streaming tasks. Pre-computed auth headers and extra
527
- * headers are cached at construction to avoid redundant encoding on every request.
528
- */
529
- export declare class DefaultClient {
530
- chat(req?: ChatCompletionRequest | undefined | null): Promise<ChatCompletionResponse>
531
- chatStream(req?: ChatCompletionRequest | undefined | null): Promise<AsyncGenerator<ChatCompletionChunk, void, undefined>>
532
- embed(req?: EmbeddingRequest | undefined | null): Promise<EmbeddingResponse>
533
- listModels(): Promise<ModelsListResponse>
534
- imageGenerate(req?: CreateImageRequest | undefined | null): Promise<ImagesResponse>
535
- speech(req?: CreateSpeechRequest | undefined | null): Promise<Uint8Array>
536
- transcribe(req?: CreateTranscriptionRequest | undefined | null): Promise<TranscriptionResponse>
537
- moderate(req?: ModerationRequest | undefined | null): Promise<ModerationResponse>
538
- rerank(req?: RerankRequest | undefined | null): Promise<RerankResponse>
539
- search(req?: SearchRequest | undefined | null): Promise<SearchResponse>
540
- ocr(req?: OcrRequest | undefined | null): Promise<OcrResponse>
541
- createFile(req?: CreateFileRequest | undefined | null): Promise<FileObject>
542
- retrieveFile(fileId: string): Promise<FileObject>
543
- deleteFile(fileId: string): Promise<DeleteResponse>
544
- listFiles(query?: FileListQuery | undefined | null): Promise<FileListResponse>
545
- fileContent(fileId: string): Promise<Uint8Array>
546
- createBatch(req?: CreateBatchRequest | undefined | null): Promise<BatchObject>
547
- retrieveBatch(batchId: string): Promise<BatchObject>
548
- listBatches(query?: BatchListQuery | undefined | null): Promise<BatchListResponse>
549
- cancelBatch(batchId: string): Promise<BatchObject>
550
- createResponse(req?: CreateResponseRequest | undefined | null): Promise<ResponseObject>
551
- retrieveResponse(responseId: string): Promise<ResponseObject>
552
- cancelResponse(responseId: string): Promise<ResponseObject>
670
+ modelPrefixes: Array<string>;
553
671
  }
554
672
 
555
673
  /** Response from a delete operation. */
556
674
  export interface DeleteResponse {
557
675
  /** ID of the deleted resource. */
558
- readonly id?: string
676
+ id?: string;
559
677
  /** Object type. */
560
- readonly object?: string
678
+ object?: string;
561
679
  /** Confirmation that the resource was deleted. */
562
- readonly deleted?: boolean
680
+ deleted?: boolean;
563
681
  }
564
682
 
565
683
  /** Developer message (system-like message for Claude models). */
566
684
  export interface DeveloperMessage {
567
685
  /** Developer-specific instructions or context. */
568
- readonly content?: string
686
+ content?: string;
569
687
  /** Optional name for the developer message source. */
570
- readonly name?: string
688
+ name?: string;
571
689
  }
572
690
 
573
691
  /** PDF/document content part for vision-capable models. */
574
692
  export interface DocumentContent {
575
693
  /** Base64-encoded document data or URL. */
576
- readonly data?: string
694
+ data?: string;
577
695
  /** MIME type (e.g., "application/pdf", "text/csv"). */
578
- readonly mediaType?: string
696
+ mediaType?: string;
579
697
  }
580
698
 
581
699
  /** The format in which the embedding vectors are returned. */
582
- export declare enum EmbeddingFormat {
700
+ export declare const enum EmbeddingFormat {
583
701
  /** 32-bit floating-point numbers (default). */
584
702
  Float = "float",
585
703
  /** Base64-encoded string representation of the floats. */
586
704
  Base64 = "base64",
587
705
  }
588
706
 
589
- /** Text or texts to embed. */
590
- export declare enum EmbeddingInput {
591
- /** Single text string. */
592
- Single = "Single",
593
- /** Multiple text strings (batch embedding). */
594
- Multiple = "Multiple",
595
- }
596
-
597
707
  /** A single embedding vector. */
598
708
  export interface EmbeddingObject {
599
709
  /**
600
710
  * Always `"embedding"` from OpenAI-compatible APIs. Stored as a plain
601
711
  * `String` so non-standard provider values do not break deserialization.
602
712
  */
603
- readonly object: string
713
+ object: string;
604
714
  /** The embedding vector. */
605
- readonly embedding: Array<number>
715
+ embedding: Array<number>;
606
716
  /** Index in the batch (corresponds to input order). */
607
- readonly index: number
717
+ index: number;
608
718
  }
609
719
 
610
720
  /** Embedding request. */
611
721
  export interface EmbeddingRequest {
612
722
  /** Model ID (e.g., `"text-embedding-3-small"`). */
613
- readonly model?: string
723
+ model?: string;
614
724
  /** Text or texts to embed. */
615
- readonly input?: EmbeddingInput
725
+ input?: JsEmbeddingInput;
616
726
  /** Output format: float (native) or base64. */
617
- readonly encodingFormat?: EmbeddingFormat
727
+ encodingFormat?: JsEmbeddingFormat;
618
728
  /** Requested embedding dimensions (if supported by the model). */
619
- readonly dimensions?: number
729
+ dimensions?: number;
620
730
  /** User identifier for request tracking. */
621
- readonly user?: string
731
+ user?: string;
622
732
  }
623
733
 
624
734
  /** Embedding response. */
@@ -627,69 +737,89 @@ export interface EmbeddingResponse {
627
737
  * Always `"list"` from OpenAI-compatible APIs. Stored as a plain
628
738
  * `String` so non-standard provider values do not break deserialization.
629
739
  */
630
- readonly object: string
740
+ object: string;
631
741
  /** List of embeddings. */
632
- readonly data: Array<EmbeddingObject>
742
+ data: Array<JsEmbeddingObject>;
633
743
  /** Model used to generate embeddings. */
634
- readonly model: string
744
+ model: string;
635
745
  /** Token usage (input tokens only; embeddings have zero output tokens). */
636
- readonly usage?: Usage
746
+ usage?: Usage;
637
747
  }
638
748
 
639
749
  /** How budget limits are enforced. */
640
- export declare enum Enforcement {
750
+ export declare const enum Enforcement {
641
751
  /**
642
752
  * Reject requests that would exceed the budget with
643
- * [`LiterLlmError::BudgetExceeded`].
753
+ * `LiterLlmError.BudgetExceeded`.
644
754
  */
645
755
  Hard = "Hard",
646
756
  /**
647
- * Allow requests through but emit a `tracing::warn!` when the budget is
757
+ * Allow requests through but emit a `tracing.warn!` when the budget is
648
758
  * exceeded.
649
759
  */
650
760
  Soft = "Soft",
651
761
  }
652
762
 
763
+ /**
764
+ * Install the `ring` crypto provider as the rustls process default, idempotently.
765
+ *
766
+ * rustls 0.23+ removed the implicit default provider. This function installs
767
+ * `ring` once per process. Subsequent calls are no-ops. Calling it from a
768
+ * downstream Rust app that has already installed `aws-lc-rs` is safe — the
769
+ * `Err` from `install_default()` is silently ignored.
770
+ *
771
+ * Called automatically by every internal `reqwest.Client` constructor
772
+ * (auth providers, default HTTP client). Bindings and downstream consumers
773
+ * reach those constructors transitively, so no manual init is required.
774
+ *
775
+ * WASM builds are exempt — the WASM target uses the browser/Node.js fetch
776
+ * API instead of rustls, so no crypto provider is needed.
777
+ *
778
+ * Windows builds use native-tls (SChannel) via reqwest, so rustls is not
779
+ * present and no crypto provider installation is needed.
780
+ */
781
+ export declare function ensureCryptoProvider(): void;
782
+
653
783
  /** Query parameters for listing files. */
654
784
  export interface FileListQuery {
655
785
  /** Filter by file purpose (e.g., `"batch"`, `"fine-tune"`). */
656
- readonly purpose?: string
786
+ purpose?: string;
657
787
  /** Maximum number of results to return. Defaults to 20. */
658
- readonly limit?: number
788
+ limit?: number;
659
789
  /** Pagination cursor: return results after this file ID. */
660
- readonly after?: string
790
+ after?: string;
661
791
  }
662
792
 
663
793
  /** Response from listing files. */
664
794
  export interface FileListResponse {
665
795
  /** Object type (always `"list"`). */
666
- readonly object?: string
796
+ object?: string;
667
797
  /** List of file objects. */
668
- readonly data?: Array<FileObject>
798
+ data?: Array<FileObject>;
669
799
  /** Whether more results are available. */
670
- readonly hasMore?: boolean
800
+ hasMore?: boolean;
671
801
  }
672
802
 
673
803
  /** An uploaded file object. */
674
804
  export interface FileObject {
675
805
  /** Unique file ID. */
676
- readonly id?: string
806
+ id?: string;
677
807
  /** Object type (always `"file"`). */
678
- readonly object?: string
808
+ object?: string;
679
809
  /** File size in bytes. */
680
- readonly bytes?: number
810
+ bytes?: number;
681
811
  /** Unix timestamp of file creation. */
682
- readonly createdAt?: number
812
+ createdAt?: number;
683
813
  /** Filename. */
684
- readonly filename?: string
814
+ filename?: string;
685
815
  /** File purpose. */
686
- readonly purpose?: string
816
+ purpose?: string;
687
817
  /** Processing status (e.g., `"uploaded"`, `"processed"`). */
688
- readonly status?: string
818
+ status?: string;
689
819
  }
690
820
 
691
821
  /** Purpose of an uploaded file. */
692
- export declare enum FilePurpose {
822
+ export declare const enum FilePurpose {
693
823
  /** File for use with Assistants API. */
694
824
  Assistants = "assistants",
695
825
  /** File for batch processing. */
@@ -701,7 +831,7 @@ export declare enum FilePurpose {
701
831
  }
702
832
 
703
833
  /** Why a choice stopped generating tokens. */
704
- export declare enum FinishReason {
834
+ export declare const enum FinishReason {
705
835
  Stop = "stop",
706
836
  Length = "length",
707
837
  ToolCalls = "tool_calls",
@@ -723,41 +853,49 @@ export declare enum FinishReason {
723
853
  /** Function call details. */
724
854
  export interface FunctionCall {
725
855
  /** Function name. */
726
- readonly name: string
727
- /** Arguments as a JSON string (parse with serde_json::from_str). */
728
- readonly arguments: string
856
+ name: string;
857
+ /** Arguments as a JSON string (parse with serde_json.from_str). */
858
+ arguments: string;
729
859
  }
730
860
 
731
861
  /** Function definition exposed to the model. */
732
862
  export interface FunctionDefinition {
733
863
  /** Name of the function. Required and must be alphanumeric + underscores. */
734
- readonly name: string
864
+ name: string;
735
865
  /** Human-readable description explaining what the function does. */
736
- readonly description?: string
866
+ description?: string;
737
867
  /** JSON Schema defining the function's parameters. */
738
- readonly parameters?: JsonValue
868
+ parameters?: any;
739
869
  /** If true, enforce strict JSON schema validation for arguments. */
740
- readonly strict?: boolean
870
+ strict?: boolean;
741
871
  }
742
872
 
743
873
  /** Deprecated legacy function-role message body. */
744
874
  export interface FunctionMessage {
745
- readonly content?: string
746
- readonly name?: string
875
+ content?: string;
876
+ name?: string;
877
+ }
878
+
879
+ /** The result of a single health probe. */
880
+ export declare const enum HealthStatus {
881
+ /** The probe succeeded; the upstream is reachable. */
882
+ Healthy = "Healthy",
883
+ /** The probe failed; the upstream may be down. */
884
+ Unhealthy = "Unhealthy",
747
885
  }
748
886
 
749
887
  /** A single generated image, returned as either a URL or base64 data. */
750
888
  export interface Image {
751
889
  /** Image URL (if response_format was "url"). */
752
- readonly url?: string
890
+ url?: string;
753
891
  /** Base64-encoded image data (if response_format was "b64_json"). */
754
- readonly b64Json?: string
892
+ b64Json?: string;
755
893
  /** The final prompt used to generate the image (DALL-E 3). */
756
- readonly revisedPrompt?: string
894
+ revisedPrompt?: string;
757
895
  }
758
896
 
759
897
  /** Image detail level controlling token cost and processing. */
760
- export declare enum ImageDetail {
898
+ export declare const enum ImageDetail {
761
899
  /** Low detail: scales image to 512x512, uses fewer tokens. */
762
900
  Low = "low",
763
901
  /** High detail: processes up to 2x2 grid of tiles, higher token cost. */
@@ -769,53 +907,65 @@ export declare enum ImageDetail {
769
907
  /** Response containing generated images. */
770
908
  export interface ImagesResponse {
771
909
  /** Unix timestamp of image creation. */
772
- readonly created?: number
910
+ created?: number;
773
911
  /** List of generated images. */
774
- readonly data?: Array<Image>
912
+ data?: Array<JsImage>;
775
913
  }
776
914
 
777
915
  /** An image URL reference with optional detail level for processing. */
778
916
  export interface ImageUrl {
779
917
  /** URL of the image (data URI or HTTP/HTTPS URL). */
780
- readonly url?: string
918
+ url?: string;
781
919
  /** Detail level: low (512x512), high (2x2 tiles), or auto (model-selected). */
782
- readonly detail?: ImageDetail
920
+ detail?: JsImageDetail;
921
+ }
922
+
923
+ /** An intent prototype: `(intent_name, prototype_embedding, target_model_id)`. */
924
+ export interface IntentPrototype {
925
+ /** Human-readable name for the intent (used in logs/metrics). */
926
+ name: string;
927
+ /** Pre-computed embedding vector for this intent. */
928
+ embedding: Array<number>;
929
+ /** Model to route to when this intent is detected. */
930
+ model: string;
783
931
  }
784
932
 
785
933
  /** JSON Schema specification for constrained output. */
786
934
  export interface JsonSchemaFormat {
787
935
  /** Name of the schema (must be unique in the request). */
788
- readonly name?: string
936
+ name?: string;
789
937
  /** Description of what the schema represents. */
790
- readonly description?: string
938
+ description?: string;
791
939
  /** JSON Schema object defining the output structure. */
792
- readonly schema?: JsonValue
940
+ schema?: any;
793
941
  /** If true, enforce strict schema validation. */
794
- readonly strict?: boolean
942
+ strict?: boolean;
795
943
  }
796
944
 
797
945
  /** A chat message in a conversation. */
798
- export type Message =
799
- | { role: 'system'; 0: SystemMessage }
800
- | { role: 'user'; 0: UserMessage }
801
- | { role: 'assistant'; 0: AssistantMessage }
802
- | { role: 'tool'; 0: ToolMessage }
803
- | { role: 'developer'; 0: DeveloperMessage }
804
- | { role: 'function'; 0: FunctionMessage }
946
+ export interface Message {
947
+ role: string;
948
+ system?: SystemMessage;
949
+ user?: UserMessage;
950
+ assistant?: AssistantMessage;
951
+ tool?: ToolMessage;
952
+ developer?: DeveloperMessage;
953
+ function?: FunctionMessage;
954
+ }
805
955
 
806
956
  /** A model available from the API. */
807
957
  export interface ModelObject {
808
958
  /** Model ID (e.g., `"gpt-4o"`, `"claude-3-5-sonnet"`). */
809
- readonly id?: string
959
+ id?: string;
810
960
  /**
811
961
  * Always `"model"` from OpenAI-compatible APIs. Stored as a plain
812
962
  * `String` so non-standard provider values do not break deserialization.
813
963
  */
814
- readonly object?: string
964
+ object?: string;
815
965
  /** Unix timestamp of model creation (or release date). */
816
- readonly created?: number
966
+ created?: number;
817
967
  /** Organization or entity that owns the model. */
818
- readonly ownedBy?: string
968
+ ownedBy?: string;
819
969
  }
820
970
 
821
971
  /** Response listing available models. */
@@ -824,444 +974,482 @@ export interface ModelsListResponse {
824
974
  * Always `"list"` from OpenAI-compatible APIs. Stored as a plain
825
975
  * `String` so non-standard provider values do not break deserialization.
826
976
  */
827
- readonly object?: string
977
+ object?: string;
828
978
  /** List of available models. */
829
- readonly data?: Array<ModelObject>
979
+ data?: Array<JsModelObject>;
830
980
  }
831
981
 
832
982
  /** Boolean flags for each moderation category. */
833
983
  export interface ModerationCategories {
834
984
  /** Sexual content. */
835
- readonly sexual?: boolean
985
+ sexual?: boolean;
836
986
  /** Hate speech. */
837
- readonly hate?: boolean
987
+ hate?: boolean;
838
988
  /** Harassment. */
839
- readonly harassment?: boolean
989
+ harassment?: boolean;
840
990
  /** Self-harm content. */
841
- readonly selfHarm?: boolean
991
+ "self-harm"?: boolean;
842
992
  /** Sexual content involving minors. */
843
- readonly sexualMinors?: boolean
993
+ "sexual/minors"?: boolean;
844
994
  /** Hate speech that threatens violence. */
845
- readonly hateThreatening?: boolean
995
+ "hate/threatening"?: boolean;
846
996
  /** Graphic violence. */
847
- readonly violenceGraphic?: boolean
997
+ "violence/graphic"?: boolean;
848
998
  /** Intent to self-harm. */
849
- readonly selfHarmIntent?: boolean
999
+ "self-harm/intent"?: boolean;
850
1000
  /** Instructions for self-harm. */
851
- readonly selfHarmInstructions?: boolean
1001
+ "self-harm/instructions"?: boolean;
852
1002
  /** Harassment that threatens violence. */
853
- readonly harassmentThreatening?: boolean
1003
+ "harassment/threatening"?: boolean;
854
1004
  /** Non-graphic violence. */
855
- readonly violence?: boolean
1005
+ violence?: boolean;
856
1006
  }
857
1007
 
858
1008
  /** Confidence scores for each moderation category. */
859
1009
  export interface ModerationCategoryScores {
860
1010
  /** Sexual content score. */
861
- readonly sexual?: number
1011
+ sexual?: number;
862
1012
  /** Hate speech score. */
863
- readonly hate?: number
1013
+ hate?: number;
864
1014
  /** Harassment score. */
865
- readonly harassment?: number
1015
+ harassment?: number;
866
1016
  /** Self-harm content score. */
867
- readonly selfHarm?: number
1017
+ "self-harm"?: number;
868
1018
  /** Sexual content involving minors score. */
869
- readonly sexualMinors?: number
1019
+ "sexual/minors"?: number;
870
1020
  /** Hate speech that threatens violence score. */
871
- readonly hateThreatening?: number
1021
+ "hate/threatening"?: number;
872
1022
  /** Graphic violence score. */
873
- readonly violenceGraphic?: number
1023
+ "violence/graphic"?: number;
874
1024
  /** Intent to self-harm score. */
875
- readonly selfHarmIntent?: number
1025
+ "self-harm/intent"?: number;
876
1026
  /** Instructions for self-harm score. */
877
- readonly selfHarmInstructions?: number
1027
+ "self-harm/instructions"?: number;
878
1028
  /** Harassment that threatens violence score. */
879
- readonly harassmentThreatening?: number
1029
+ "harassment/threatening"?: number;
880
1030
  /** Non-graphic violence score. */
881
- readonly violence?: number
882
- }
883
-
884
- /** Input to the moderation endpoint — a single string or multiple strings. */
885
- export declare enum ModerationInput {
886
- /** Single text string. */
887
- Single = "Single",
888
- /** Multiple text strings (batch moderation). */
889
- Multiple = "Multiple",
1031
+ violence?: number;
890
1032
  }
891
1033
 
892
1034
  /** Request to classify content for policy violations. */
893
1035
  export interface ModerationRequest {
894
1036
  /** Text or texts to check. */
895
- readonly input?: ModerationInput
1037
+ input?: JsModerationInput;
896
1038
  /** Model ID (e.g., `"text-moderation-latest"`). Optional; API uses default if unset. */
897
- readonly model?: string
1039
+ model?: string;
898
1040
  }
899
1041
 
900
1042
  /** Response from the moderation endpoint. */
901
1043
  export interface ModerationResponse {
902
1044
  /** Unique identifier for this moderation request. */
903
- readonly id: string
1045
+ id: string;
904
1046
  /** Model used for classification. */
905
- readonly model: string
1047
+ model: string;
906
1048
  /** Results for each input string. */
907
- readonly results: Array<ModerationResult>
1049
+ results: Array<JsModerationResult>;
908
1050
  }
909
1051
 
910
1052
  /** A single moderation classification result. */
911
1053
  export interface ModerationResult {
912
1054
  /** True if any category was flagged. */
913
- readonly flagged: boolean
1055
+ flagged: boolean;
914
1056
  /** Boolean flags for each moderation category. */
915
- readonly categories: ModerationCategories
1057
+ categories: JsModerationCategories;
916
1058
  /** Confidence scores for each category. */
917
- readonly categoryScores: ModerationCategoryScores
1059
+ categoryScores: JsModerationCategoryScores;
918
1060
  }
919
1061
 
920
1062
  /** Document input for OCR — either a URL or inline base64 data. */
921
- export type OcrDocument =
922
- | { type: 'document_url'; url: string }
923
- | { type: 'base64'; data: string; mediaType: string }
1063
+ export interface OcrDocument {
1064
+ type: string;
1065
+ url?: string;
1066
+ data?: string;
1067
+ mediaType?: string;
1068
+ }
924
1069
 
925
1070
  /** An image extracted from an OCR page. */
926
1071
  export interface OcrImage {
927
1072
  /** Unique image identifier within the document. */
928
- readonly id: string
1073
+ id: string;
929
1074
  /** Base64-encoded image data (if `include_image_base64` was true). */
930
- readonly imageBase64?: string
1075
+ imageBase64?: string;
931
1076
  }
932
1077
 
933
1078
  /** A single page of OCR output. */
934
1079
  export interface OcrPage {
935
1080
  /** Page index (0-based). */
936
- readonly index: number
1081
+ index: number;
937
1082
  /** Extracted page content as Markdown. */
938
- readonly markdown: string
1083
+ markdown: string;
939
1084
  /** Embedded images extracted from the page (if `include_image_base64` was true). */
940
- readonly images?: Array<OcrImage>
1085
+ images?: Array<JsOcrImage>;
941
1086
  /** Page dimensions in pixels, if available. */
942
- readonly dimensions?: PageDimensions
1087
+ dimensions?: JsPageDimensions;
943
1088
  }
944
1089
 
945
1090
  /** An OCR request. */
946
1091
  export interface OcrRequest {
947
1092
  /** The model/provider to use (e.g. `"mistral/mistral-ocr-latest"`). */
948
- readonly model?: string
1093
+ model?: string;
949
1094
  /** The document to process (URL or base64). */
950
- readonly document?: OcrDocument
1095
+ document?: JsOcrDocument;
951
1096
  /** Specific pages to process (1-indexed). `None` means all pages. */
952
- readonly pages?: Array<number>
1097
+ pages?: Array<number>;
953
1098
  /** Whether to include base64-encoded images of each processed page. */
954
- readonly includeImageBase64?: boolean
1099
+ includeImageBase64?: boolean;
955
1100
  }
956
1101
 
957
1102
  /** An OCR response. */
958
1103
  export interface OcrResponse {
959
1104
  /** Extracted pages in order. */
960
- readonly pages: Array<OcrPage>
1105
+ pages: Array<JsOcrPage>;
961
1106
  /** Model/provider used for OCR. */
962
- readonly model: string
1107
+ model: string;
963
1108
  /** Token usage, if reported by the provider. */
964
- readonly usage?: Usage
1109
+ usage?: Usage;
965
1110
  }
966
1111
 
967
1112
  /** Page dimensions in pixels. */
968
1113
  export interface PageDimensions {
969
1114
  /** Width in pixels. */
970
- readonly width: number
1115
+ width: number;
971
1116
  /** Height in pixels. */
972
- readonly height: number
1117
+ height: number;
973
1118
  }
974
1119
 
975
1120
  /**
976
1121
  * Breakdown of tokens used in the prompt portion of a request.
977
1122
  *
978
- * `cached_tokens` is included in `Usage::prompt_tokens` — it is *not* an
1123
+ * `cached_tokens` is included in `Usage.prompt_tokens` — it is *not* an
979
1124
  * additional charge on top of the prompt token count. When pricing supports
980
1125
  * a `cache_read_input_token_cost`, the cached portion is billed at the
981
1126
  * discounted rate and the remainder at the regular input rate.
982
1127
  */
983
1128
  export interface PromptTokensDetails {
984
1129
  /** Cached tokens present in the prompt. Defaults to 0 when absent. */
985
- readonly cachedTokens?: number
1130
+ cachedTokens?: number;
986
1131
  /** Audio input tokens present in the prompt. Defaults to 0 when absent. */
987
- readonly audioTokens?: number
1132
+ audioTokens?: number;
1133
+ }
1134
+
1135
+ /**
1136
+ * Static capability flags for a provider.
1137
+ *
1138
+ * Each flag indicates whether the provider's models *generally* support that
1139
+ * feature. For providers that aggregate many underlying models (e.g. Bedrock,
1140
+ * OpenRouter, vLLM) the flags reflect the superset of available model
1141
+ * capabilities — a flag being `true` means at least one model supports the
1142
+ * feature, not every model.
1143
+ *
1144
+ * All flags default to `false` so that newly added providers are safe.
1145
+ *
1146
+ * Access via the crate-level `capabilities` function:
1147
+ */
1148
+ export interface ProviderCapabilities {
1149
+ /** The provider accepts image input in chat messages. */
1150
+ vision?: boolean;
1151
+ /** The provider supports extended-thinking / reasoning tokens. */
1152
+ reasoning?: boolean;
1153
+ /** The provider supports JSON-mode or `response_format` structured output. */
1154
+ structuredOutput?: boolean;
1155
+ /** The provider supports tool / function calling. */
1156
+ functionCalling?: boolean;
1157
+ /** The provider accepts audio as input. */
1158
+ audioIn?: boolean;
1159
+ /** The provider can generate audio / TTS output. */
1160
+ audioOut?: boolean;
1161
+ /** The provider accepts video as input. */
1162
+ videoIn?: boolean;
988
1163
  }
989
1164
 
990
- /** Static configuration for a single provider entry in providers.json. */
1165
+ /**
1166
+ * Static configuration for a single provider entry in providers.json.
1167
+ *
1168
+ * This struct deliberately does not include capability flags or streaming
1169
+ * format, which are accessed via the `capabilities` function. Keeping
1170
+ * these fields separate preserves backward compatibility with all generated
1171
+ * binding code that constructs `ProviderConfig` using struct literal syntax.
1172
+ */
991
1173
  export interface ProviderConfig {
992
1174
  /** Provider identifier (matches the entry key in providers.json). */
993
- readonly name: string
1175
+ name: string;
994
1176
  /** Human-readable provider name shown in UIs. */
995
- readonly displayName?: string
1177
+ displayName?: string;
996
1178
  /** Base URL used as the default for this provider's HTTP client. */
997
- readonly baseUrl?: string
1179
+ baseUrl?: string;
998
1180
  /** Authentication scheme metadata (auth type + env var holding the key). */
999
- readonly auth?: AuthConfig
1181
+ auth?: JsAuthConfig;
1000
1182
  /** Supported endpoint kinds (e.g. `chat`, `embeddings`). */
1001
- readonly endpoints?: Array<string>
1183
+ endpoints?: Array<string>;
1002
1184
  /** Model-name prefixes claimed by this provider (e.g. `["gpt-", "o1-"]`). */
1003
- readonly modelPrefixes?: Array<string>
1185
+ modelPrefixes?: Array<string>;
1004
1186
  /**
1005
1187
  * Parameter key renaming for this provider.
1006
1188
  *
1007
1189
  * Each entry maps an OpenAI-spec field name (e.g. `"max_completion_tokens"`)
1008
1190
  * to the name this provider expects (e.g. `"max_tokens"`). Applied
1009
- * automatically by [`ConfigDrivenProvider::transform_request`].
1191
+ * automatically by `ConfigDrivenProvider.transform_request`.
1010
1192
  */
1011
- readonly paramMappings?: Record<string, string>
1193
+ paramMappings?: Record<string, string>;
1012
1194
  }
1013
1195
 
1014
1196
  /** Configuration for per-model rate limits. */
1015
1197
  export interface RateLimitConfig {
1016
1198
  /** Maximum requests per window. `None` means unlimited. */
1017
- readonly rpm?: number
1199
+ rpm?: number;
1018
1200
  /** Maximum tokens per window. `None` means unlimited. */
1019
- readonly tpm?: number
1201
+ tpm?: number;
1020
1202
  /** Fixed window duration (defaults to 60 s). */
1021
- readonly window?: number
1203
+ window?: number;
1022
1204
  }
1023
1205
 
1206
+ export declare function rateLimitConfigDefault(): RateLimitConfig;
1207
+
1024
1208
  /** Controls how much reasoning effort the model should use. */
1025
- export declare enum ReasoningEffort {
1209
+ export declare const enum ReasoningEffort {
1026
1210
  Low = "low",
1027
1211
  Medium = "medium",
1028
1212
  High = "high",
1029
1213
  }
1030
1214
 
1031
- /** A document to be reranked — either a plain string or an object with a text field. */
1032
- export declare enum RerankDocument {
1033
- /** Plain text document content. */
1034
- Text = "Text",
1035
- /** Document with explicit text field (may include metadata). */
1036
- Object = "Object",
1037
- }
1215
+ /**
1216
+ * Register a custom provider in the global runtime registry.
1217
+ *
1218
+ * The provider will be checked **before** all built-in providers during model
1219
+ * detection. If a provider with the same `name` already exists it is replaced.
1220
+ *
1221
+ * # Errors
1222
+ *
1223
+ * Returns an error if the config is invalid (empty name, empty base_url, or
1224
+ * no model prefixes).
1225
+ */
1226
+ export declare function registerCustomProvider(config: CustomProviderConfig): void;
1038
1227
 
1039
1228
  /** Request to rerank documents by relevance to a query. */
1040
1229
  export interface RerankRequest {
1041
1230
  /** Model ID (e.g., `"cohere/rerank-english-v3.0"`). */
1042
- readonly model?: string
1231
+ model?: string;
1043
1232
  /** The search query. */
1044
- readonly query?: string
1233
+ query?: string;
1045
1234
  /** Documents to rerank. */
1046
- readonly documents?: Array<RerankDocument>
1235
+ documents?: Array<JsRerankDocument>;
1047
1236
  /** Return only the top N results. Optional. */
1048
- readonly topN?: number
1237
+ topN?: number;
1049
1238
  /** Include the document content in results. Defaults to false. */
1050
- readonly returnDocuments?: boolean
1239
+ returnDocuments?: boolean;
1051
1240
  }
1052
1241
 
1053
1242
  /** Response from the rerank endpoint. */
1054
1243
  export interface RerankResponse {
1055
1244
  /** Unique identifier for this rerank request. */
1056
- readonly id?: string
1245
+ id?: string;
1057
1246
  /** Reranked documents in order of relevance. */
1058
- readonly results: Array<RerankResult>
1247
+ results: Array<JsRerankResult>;
1059
1248
  /** Optional metadata about the reranking operation. */
1060
- readonly meta?: JsonValue
1249
+ meta?: any;
1061
1250
  }
1062
1251
 
1063
1252
  /** A single reranked document with its relevance score. */
1064
1253
  export interface RerankResult {
1065
1254
  /** Original document index in the input list. */
1066
- readonly index: number
1255
+ index: number;
1067
1256
  /** Relevance score in `[0, 1]`. Higher indicates more relevant. */
1068
- readonly relevanceScore: number
1257
+ relevanceScore: number;
1069
1258
  /** Original document content (if `return_documents` was true). */
1070
- readonly document?: RerankResultDocument
1259
+ document?: JsRerankResultDocument;
1071
1260
  }
1072
1261
 
1073
1262
  /** The text content of a reranked document, returned when `return_documents` is true. */
1074
1263
  export interface RerankResultDocument {
1075
1264
  /** Document text. */
1076
- readonly text: string
1265
+ text: string;
1077
1266
  }
1078
1267
 
1079
1268
  /** Response format constraint. */
1080
- export type ResponseFormat =
1081
- | { type: 'text' }
1082
- | { type: 'json_object' }
1083
- | { type: 'json_schema'; jsonSchema: JsonSchemaFormat }
1269
+ export interface ResponseFormat {
1270
+ type: string;
1271
+ jsonSchema?: JsonSchemaFormat;
1272
+ }
1084
1273
 
1085
1274
  /** Response from a structured response request. */
1086
1275
  export interface ResponseObject {
1087
1276
  /** Unique response ID. */
1088
- readonly id?: string
1277
+ id?: string;
1089
1278
  /** Object type (e.g., `"response"`). */
1090
- readonly object?: string
1279
+ object?: string;
1091
1280
  /** Unix timestamp of response creation. */
1092
- readonly createdAt?: number
1281
+ createdAt?: number;
1093
1282
  /** Model used to generate the response. */
1094
- readonly model?: string
1283
+ model?: string;
1095
1284
  /** Status (e.g., `"succeeded"`, `"failed"`). */
1096
- readonly status?: string
1285
+ status?: string;
1097
1286
  /** Output items from the response. */
1098
- readonly output?: Array<ResponseOutputItem>
1287
+ output?: Array<JsResponseOutputItem>;
1099
1288
  /** Token usage. */
1100
- readonly usage?: ResponseUsage
1289
+ usage?: JsResponseUsage;
1101
1290
  /** Error details (if status is "failed"). */
1102
- readonly error?: JsonValue
1291
+ error?: any;
1103
1292
  }
1104
1293
 
1105
1294
  /** A single output item from the response. */
1106
1295
  export interface ResponseOutputItem {
1107
1296
  /** Output type (e.g., `"text"`, `"object"`, `"error"`). */
1108
- readonly itemType?: string
1297
+ type?: string;
1109
1298
  /** Output content (flattened into the object). */
1110
- readonly content?: JsonValue
1299
+ content?: any;
1111
1300
  }
1112
1301
 
1113
1302
  /** A tool available for the response request. */
1114
1303
  export interface ResponseTool {
1115
1304
  /** Tool type (e.g., "extractor", "search"). */
1116
- readonly toolType?: string
1305
+ type?: string;
1117
1306
  /** Tool configuration (flattened into the object). */
1118
- readonly config?: JsonValue
1307
+ config?: any;
1119
1308
  }
1120
1309
 
1121
1310
  /** Token usage for a response. */
1122
1311
  export interface ResponseUsage {
1123
1312
  /** Input tokens used. */
1124
- readonly inputTokens?: number
1313
+ inputTokens?: number;
1125
1314
  /** Output tokens used. */
1126
- readonly outputTokens?: number
1315
+ outputTokens?: number;
1127
1316
  /** Total tokens used. */
1128
- readonly totalTokens?: number
1317
+ totalTokens?: number;
1129
1318
  }
1130
1319
 
1131
1320
  /** A search request. */
1132
1321
  export interface SearchRequest {
1133
1322
  /** The model/provider to use (e.g. `"brave/web-search"`, `"tavily/search"`). */
1134
- readonly model?: string
1323
+ model?: string;
1135
1324
  /** The search query string. */
1136
- readonly query?: string
1325
+ query?: string;
1137
1326
  /** Maximum number of results to return. */
1138
- readonly maxResults?: number
1327
+ maxResults?: number;
1139
1328
  /** Domain filter — restrict results to specific domains. */
1140
- readonly searchDomainFilter?: Array<string>
1329
+ searchDomainFilter?: Array<string>;
1141
1330
  /** Country code for localized results (ISO 3166-1 alpha-2, e.g., `"US"`, `"FR"`). */
1142
- readonly country?: string
1331
+ country?: string;
1143
1332
  }
1144
1333
 
1145
1334
  /** A search response. */
1146
1335
  export interface SearchResponse {
1147
1336
  /** List of search results. */
1148
- readonly results: Array<SearchResult>
1337
+ results: Array<JsSearchResult>;
1149
1338
  /** Model/provider that performed the search. */
1150
- readonly model: string
1339
+ model: string;
1151
1340
  }
1152
1341
 
1153
1342
  /** An individual search result. */
1154
1343
  export interface SearchResult {
1155
1344
  /** Result title. */
1156
- readonly title: string
1345
+ title: string;
1157
1346
  /** Result URL. */
1158
- readonly url: string
1347
+ url: string;
1159
1348
  /** Text snippet or excerpt from the page. */
1160
- readonly snippet: string
1349
+ snippet: string;
1161
1350
  /** Publication or last-updated date, if available. */
1162
- readonly date?: string
1351
+ date?: string;
1163
1352
  }
1164
1353
 
1165
1354
  /** Name of the specific function to invoke. */
1166
1355
  export interface SpecificFunction {
1167
1356
  /** Function name. */
1168
- readonly name?: string
1357
+ name?: string;
1169
1358
  }
1170
1359
 
1171
1360
  /** Directive to call a specific tool. */
1172
1361
  export interface SpecificToolChoice {
1173
1362
  /** Tool type (always "function"). */
1174
- readonly choiceType?: ToolType
1363
+ type?: JsToolType;
1175
1364
  /** The specific function to invoke. */
1176
- readonly function?: SpecificFunction
1177
- }
1178
-
1179
- /** Stop sequence(s) that cause the model to stop generating. */
1180
- export declare enum StopSequence {
1181
- /** Single stop sequence. */
1182
- Single = "Single",
1183
- /** Multiple stop sequences. */
1184
- Multiple = "Multiple",
1365
+ function?: JsSpecificFunction;
1185
1366
  }
1186
1367
 
1187
1368
  /** A streaming choice with incremental delta. */
1188
1369
  export interface StreamChoice {
1189
1370
  /** Index of this choice in the choices array. */
1190
- readonly index?: number
1371
+ index?: number;
1191
1372
  /** Incremental update to the message (content, tool calls, etc.). */
1192
- readonly delta?: StreamDelta
1373
+ delta?: JsStreamDelta;
1193
1374
  /** Why the stream ended (present only in final chunk). */
1194
- readonly finishReason?: FinishReason
1375
+ finishReason?: JsFinishReason;
1195
1376
  }
1196
1377
 
1197
1378
  /** Incremental delta in a stream chunk. */
1198
1379
  export interface StreamDelta {
1199
1380
  /** Role (typically present only in the first chunk). */
1200
- readonly role?: string
1381
+ role?: string;
1201
1382
  /** Partial content chunk (e.g., a few words of the response). */
1202
- readonly content?: string
1383
+ content?: string;
1203
1384
  /** Partial tool calls being streamed. */
1204
- readonly toolCalls?: Array<StreamToolCall>
1385
+ toolCalls?: Array<JsStreamToolCall>;
1205
1386
  /** Deprecated legacy function_call delta; retained for API compatibility. */
1206
- readonly functionCall?: StreamFunctionCall
1387
+ functionCall?: JsStreamFunctionCall;
1207
1388
  /** Partial refusal message. */
1208
- readonly refusal?: string
1389
+ refusal?: string;
1390
+ }
1391
+
1392
+ /**
1393
+ * The streaming wire format a provider uses for its response stream.
1394
+ *
1395
+ * Most providers use standard Server-Sent Events (SSE). AWS Bedrock uses
1396
+ * a proprietary binary EventStream framing.
1397
+ *
1398
+ * Deserialized from the `streaming_format` JSON field via `serde`.
1399
+ */
1400
+ export declare const enum StreamFormat {
1401
+ /** Standard Server-Sent Events (text/event-stream). */
1402
+ Sse = "sse",
1403
+ /** AWS EventStream binary framing (application/vnd.amazon.eventstream). */
1404
+ AwsEventStream = "aws_event_stream",
1209
1405
  }
1210
1406
 
1211
1407
  /** Partial function call details in a stream. */
1212
1408
  export interface StreamFunctionCall {
1213
1409
  /** Function name (typically in the first chunk). */
1214
- readonly name?: string
1410
+ name?: string;
1215
1411
  /** Partial JSON arguments chunk. */
1216
- readonly arguments?: string
1412
+ arguments?: string;
1217
1413
  }
1218
1414
 
1219
1415
  /** Options for streaming responses. */
1220
1416
  export interface StreamOptions {
1221
1417
  /** If true, include token usage in the final stream chunk. */
1222
- readonly includeUsage?: boolean
1418
+ includeUsage?: boolean;
1223
1419
  }
1224
1420
 
1225
1421
  /** A streaming tool call being built incrementally. */
1226
1422
  export interface StreamToolCall {
1227
1423
  /** Index of this tool call in the tool_calls array. */
1228
- readonly index?: number
1424
+ index?: number;
1229
1425
  /** Tool call ID (typically in the first chunk for this call). */
1230
- readonly id?: string
1426
+ id?: string;
1231
1427
  /** Tool type (typically "function"). */
1232
- readonly callType?: ToolType
1428
+ type?: JsToolType;
1233
1429
  /** Partial function name and arguments. */
1234
- readonly function?: StreamFunctionCall
1430
+ function?: JsStreamFunctionCall;
1235
1431
  }
1236
1432
 
1237
1433
  /** System message guiding model behavior for the entire conversation. */
1238
1434
  export interface SystemMessage {
1239
1435
  /** Instructions or context that apply throughout the conversation. */
1240
- readonly content?: string
1436
+ content?: string;
1241
1437
  /** Optional name for the system message source. */
1242
- readonly name?: string
1438
+ name?: string;
1243
1439
  }
1244
1440
 
1245
1441
  /** A tool call the model wants to execute. */
1246
1442
  export interface ToolCall {
1247
1443
  /** Unique ID for this call, used to reference in tool result messages. */
1248
- readonly id: string
1444
+ id: string;
1249
1445
  /** Tool type (always "function"). */
1250
- readonly callType: ToolType
1446
+ type: JsToolType;
1251
1447
  /** Function name and arguments. */
1252
- readonly function: FunctionCall
1253
- }
1254
-
1255
- /** Tool usage mode or a specific tool to call. */
1256
- export declare enum ToolChoice {
1257
- /** Predefined mode: auto, required, or none. */
1258
- Mode = "Mode",
1259
- /** Force a specific tool to be called. */
1260
- Specific = "Specific",
1448
+ function: JsFunctionCall;
1261
1449
  }
1262
1450
 
1263
1451
  /** Tool choice mode. */
1264
- export declare enum ToolChoiceMode {
1452
+ export declare const enum ToolChoiceMode {
1265
1453
  /** Model may or may not call tools; default behavior. */
1266
1454
  Auto = "auto",
1267
1455
  /** Model must call at least one tool. */
@@ -1273,11 +1461,11 @@ export declare enum ToolChoiceMode {
1273
1461
  /** Tool execution result returned to the model. */
1274
1462
  export interface ToolMessage {
1275
1463
  /** Result of the tool execution. */
1276
- readonly content?: string
1464
+ content?: string;
1277
1465
  /** ID of the tool call this result responds to. */
1278
- readonly toolCallId?: string
1466
+ toolCallId?: string;
1279
1467
  /** Optional tool/function name. */
1280
- readonly name?: string
1468
+ name?: string;
1281
1469
  }
1282
1470
 
1283
1471
  /**
@@ -1287,92 +1475,85 @@ export interface ToolMessage {
1287
1475
  * that constraint at the type level and rejects any other value on
1288
1476
  * deserialization.
1289
1477
  */
1290
- export declare enum ToolType {
1478
+ export declare const enum ToolType {
1291
1479
  Function = "function",
1292
1480
  }
1293
1481
 
1294
1482
  /** Response from a transcription request. */
1295
1483
  export interface TranscriptionResponse {
1296
1484
  /** The transcribed text. */
1297
- readonly text?: string
1485
+ text?: string;
1298
1486
  /** Detected language (ISO-639-1 code). */
1299
- readonly language?: string
1487
+ language?: string;
1300
1488
  /** Total audio duration in seconds. */
1301
- readonly duration?: number
1489
+ duration?: number;
1302
1490
  /** Detailed segment-level transcription (if response_format is "verbose_json"). */
1303
- readonly segments?: Array<TranscriptionSegment>
1491
+ segments?: Array<JsTranscriptionSegment>;
1304
1492
  }
1305
1493
 
1306
1494
  /** A segment of transcribed audio with timing information. */
1307
1495
  export interface TranscriptionSegment {
1308
1496
  /** Segment index (0-based). */
1309
- readonly id?: number
1497
+ id?: number;
1310
1498
  /** Start time in seconds. */
1311
- readonly start?: number
1499
+ start?: number;
1312
1500
  /** End time in seconds. */
1313
- readonly end?: number
1501
+ end?: number;
1314
1502
  /** Transcribed text for this segment. */
1315
- readonly text?: string
1503
+ text?: string;
1316
1504
  }
1317
1505
 
1506
+ /**
1507
+ * Remove a previously registered custom provider by name.
1508
+ *
1509
+ * Returns `true` if a provider with the given name was found and removed,
1510
+ * `false` if no such provider existed.
1511
+ *
1512
+ * # Errors
1513
+ *
1514
+ * Returns an error only if the internal lock is poisoned.
1515
+ */
1516
+ export declare function unregisterCustomProvider(name: string): boolean;
1517
+
1318
1518
  /** Token-usage accounting returned by the provider on each completion / embedding call. */
1319
1519
  export interface Usage {
1320
1520
  /** Prompt tokens used. Defaults to 0 when absent (some providers omit this). */
1321
- readonly promptTokens?: number
1521
+ promptTokens?: number;
1322
1522
  /** Completion tokens used. Defaults to 0 when absent (e.g. embedding responses). */
1323
- readonly completionTokens?: number
1523
+ completionTokens?: number;
1324
1524
  /** Total tokens used. Defaults to 0 when absent (some providers omit this). */
1325
- readonly totalTokens?: number
1525
+ totalTokens?: number;
1326
1526
  /**
1327
1527
  * Breakdown of tokens used in the prompt, including cached tokens served
1328
1528
  * at the provider's discounted cache-read rate. Absent when the provider
1329
1529
  * does not return prompt-token details.
1330
1530
  */
1331
- readonly promptTokensDetails?: PromptTokensDetails
1332
- }
1333
-
1334
- /** User message content as either plain text or a list of multimodal parts. */
1335
- export declare enum UserContent {
1336
- /** Plain text content. */
1337
- Text = "Text",
1338
- /** Array of content parts (text, images, documents, audio). */
1339
- Parts = "Parts",
1531
+ promptTokensDetails?: JsPromptTokensDetails;
1340
1532
  }
1341
1533
 
1342
1534
  /** User message in the conversation. */
1343
1535
  export interface UserMessage {
1344
1536
  /** Message content as plain text or array of content parts (text, images, documents, audio). */
1345
- readonly content?: UserContent
1537
+ content?: JsUserContent;
1346
1538
  /** Optional name for the user. */
1347
- readonly name?: string
1539
+ name?: string;
1348
1540
  }
1349
1541
 
1350
1542
  /**
1351
- * Register a custom provider in the global runtime registry.
1543
+ * Configuration for polling a batch until terminal status.
1352
1544
  *
1353
- * The provider will be checked **before** all built-in providers during model
1354
- * detection. If a provider with the same `name` already exists it is replaced.
1355
- * @throws Returns an error if the config is invalid (empty name, empty base_url, or
1356
- * no model prefixes).
1545
+ * All time values are in seconds as `f64` so the struct bridges across FFI
1546
+ * boundaries without requiring a `Duration` shim.
1357
1547
  */
1358
- export declare function registerCustomProvider(config: CustomProviderConfig): void;
1359
-
1360
- /**
1361
- * Remove a previously registered custom provider by name.
1362
- *
1363
- * Returns `true` if a provider with the given name was found and removed,
1364
- * `false` if no such provider existed.
1365
- * @throws Returns an error only if the internal lock is poisoned.
1366
- */
1367
- export declare function unregisterCustomProvider(name: string): boolean;
1368
-
1369
- export declare class ChatStreamIterator {
1370
- next(value?: undefined): Promise<IteratorResult<ChatCompletionChunk, void>>
1371
- [Symbol.asyncIterator](): AsyncGenerator<ChatCompletionChunk, void, undefined>
1372
- }
1373
-
1374
- export declare class LiterLlmErrorInfo {
1375
- statusCode(): number
1376
- isTransient(): boolean
1377
- errorType(): string
1378
- }
1548
+ export interface WaitForBatchConfig {
1549
+ /** Initial interval between polls, in seconds. */
1550
+ initialIntervalSecs?: number;
1551
+ /** Maximum interval between polls (backoff plateau), in seconds. */
1552
+ maxIntervalSecs?: number;
1553
+ /** Exponential backoff multiplier (e.g., 1.5 increases delay by 50% each poll). */
1554
+ backoffMultiplier?: number;
1555
+ /** Optional timeout in seconds polling fails if this duration is exceeded. */
1556
+ timeoutSecs?: number;
1557
+ }
1558
+
1559
+ export declare function waitForBatchConfigDefault(): WaitForBatchConfig;