@kreuzberg/liter-llm-node 1.6.0 → 1.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -16
- package/index.d.ts +718 -667
- package/index.js +45 -16
- package/liter-llm-node.darwin-arm64.node +0 -0
- package/liter-llm-node.linux-arm64-gnu.node +0 -0
- package/liter-llm-node.linux-x64-gnu.node +0 -0
- package/liter-llm-node.win32-x64-msvc.node +0 -0
- package/package.json +27 -17
package/index.d.ts
CHANGED
|
@@ -1,139 +1,215 @@
|
|
|
1
|
-
|
|
1
|
+
// This file is auto-generated by alef — DO NOT EDIT.
|
|
2
|
+
// alef:hash:797e09398ae0b95dd0e3de94d7374eedafcd20d08532c7cf378cbcd09e3083a7
|
|
3
|
+
// To regenerate: alef generate
|
|
4
|
+
// To verify freshness: alef verify --exit-code
|
|
2
5
|
/* eslint-disable */
|
|
6
|
+
|
|
7
|
+
export type JsonValue =
|
|
8
|
+
| string
|
|
9
|
+
| number
|
|
10
|
+
| boolean
|
|
11
|
+
| null
|
|
12
|
+
| JsonValue[]
|
|
13
|
+
| { [key: string]: JsonValue };
|
|
14
|
+
|
|
3
15
|
/**
|
|
4
|
-
*
|
|
5
|
-
* It can be used with `for await...of` loops.
|
|
16
|
+
* Return all provider configs from the registry.
|
|
6
17
|
*
|
|
7
|
-
*
|
|
18
|
+
* Useful for tooling, documentation generation, or runtime enumeration.
|
|
19
|
+
* Returns the public [`ProviderConfig`] slice (without capability flags).
|
|
20
|
+
* To query capability flags for a specific provider use [`capabilities`].
|
|
8
21
|
*/
|
|
9
|
-
export declare
|
|
22
|
+
export declare function allProviders(): Array<ProviderConfig>;
|
|
10
23
|
|
|
11
24
|
/**
|
|
12
|
-
*
|
|
25
|
+
* Return the capability flags for a named provider.
|
|
13
26
|
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
16
|
-
*
|
|
17
|
-
*
|
|
18
|
-
* routes to Anthropic regardless of construction-time setting).
|
|
27
|
+
* Performs an O(n) linear scan over the embedded registry (143 entries).
|
|
28
|
+
* Returns an owned value so that bindings can box/copy it across the FFI
|
|
29
|
+
* boundary without dealing with lifetimes. `ProviderCapabilities` is `Copy`,
|
|
30
|
+
* so this is a cheap memcpy of seven `bool` fields.
|
|
19
31
|
*
|
|
20
|
-
*
|
|
21
|
-
*
|
|
22
|
-
|
|
32
|
+
* For unknown `provider_name` values the function returns an all-`false`
|
|
33
|
+
* sentinel so callers never need to handle `Option`.
|
|
34
|
+
*/
|
|
35
|
+
export declare function capabilities(providerName: string): ProviderCapabilities;
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Assert that `current_len + incoming` does not exceed `limit`.
|
|
23
39
|
*
|
|
24
|
-
*
|
|
25
|
-
*
|
|
26
|
-
*
|
|
40
|
+
* Call this before appending `incoming` bytes to any buffer that must
|
|
41
|
+
* stay below `limit`. Returns `Err(LiterLlmError::Streaming)` on overflow
|
|
42
|
+
* and emits a `tracing::warn!` with context.
|
|
27
43
|
*/
|
|
28
|
-
export declare
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
speech(req: CreateSpeechRequest): Promise<Buffer>;
|
|
35
|
-
transcribe(req: CreateTranscriptionRequest): Promise<TranscriptionResponse>;
|
|
36
|
-
moderate(req: ModerationRequest): Promise<ModerationResponse>;
|
|
37
|
-
rerank(req: RerankRequest): Promise<RerankResponse>;
|
|
38
|
-
search(req: SearchRequest): Promise<SearchResponse>;
|
|
39
|
-
ocr(req: OcrRequest): Promise<OcrResponse>;
|
|
40
|
-
createFile(req: CreateFileRequest): Promise<FileObject>;
|
|
41
|
-
retrieveFile(fileId: string): Promise<FileObject>;
|
|
42
|
-
deleteFile(fileId: string): Promise<DeleteResponse>;
|
|
43
|
-
listFiles(query?: FileListQuery | undefined | null): Promise<FileListResponse>;
|
|
44
|
-
fileContent(fileId: string): Promise<Buffer>;
|
|
45
|
-
createBatch(req: CreateBatchRequest): Promise<BatchObject>;
|
|
46
|
-
retrieveBatch(batchId: string): Promise<BatchObject>;
|
|
47
|
-
listBatches(query?: BatchListQuery | undefined | null): Promise<BatchListResponse>;
|
|
48
|
-
cancelBatch(batchId: string): Promise<BatchObject>;
|
|
49
|
-
fetchBatchForPolling(batchId: string): Promise<BatchObject>;
|
|
50
|
-
/**
|
|
51
|
-
* Poll a batch until it reaches a terminal status (Completed, Failed, Expired, Cancelled).
|
|
52
|
-
*
|
|
53
|
-
* Uses exponential backoff with configurable initial interval, maximum interval, and backoff multiplier.
|
|
54
|
-
* Optionally supports a timeout that aborts polling if exceeded.
|
|
55
|
-
*
|
|
56
|
-
* # Errors
|
|
57
|
-
*
|
|
58
|
-
* Returns `BatchWaitError.Failed` if the batch reaches a failure terminal status.
|
|
59
|
-
* Returns `BatchWaitError.Timeout` if the configured timeout is exceeded.
|
|
60
|
-
* Returns `BatchWaitError.Client` for underlying client errors.
|
|
61
|
-
*
|
|
62
|
-
* # Example
|
|
63
|
-
*/
|
|
64
|
-
waitForBatch(batchId: string, config: WaitForBatchConfig): Promise<BatchObject>;
|
|
65
|
-
createResponse(req: CreateResponseRequest): Promise<ResponseObject>;
|
|
66
|
-
retrieveResponse(responseId: string): Promise<ResponseObject>;
|
|
67
|
-
cancelResponse(responseId: string): Promise<ResponseObject>;
|
|
68
|
-
}
|
|
69
|
-
export type JsDefaultClient = DefaultClient;
|
|
44
|
+
export declare function checkBound(
|
|
45
|
+
context: string,
|
|
46
|
+
currentLen: number,
|
|
47
|
+
incoming: number,
|
|
48
|
+
limit: number,
|
|
49
|
+
): void;
|
|
70
50
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
/** Returns `true` if the error is transient and a retry may succeed. */
|
|
78
|
-
isTransient(): boolean;
|
|
79
|
-
/** Machine-readable error category string for matching and logging. */
|
|
80
|
-
errorType(): string;
|
|
81
|
-
}
|
|
51
|
+
/**
|
|
52
|
+
* Remove all guardrails from the global registry.
|
|
53
|
+
*
|
|
54
|
+
* Primarily useful in tests to reset state between test cases.
|
|
55
|
+
*/
|
|
56
|
+
export declare function clear(): void;
|
|
82
57
|
|
|
83
58
|
/**
|
|
84
|
-
*
|
|
59
|
+
* Calculate the estimated cost of a completion given a model name and token
|
|
60
|
+
* counts.
|
|
85
61
|
*
|
|
86
|
-
* `
|
|
87
|
-
*
|
|
88
|
-
*
|
|
62
|
+
* Returns `None` if the model is not present in the embedded pricing registry.
|
|
63
|
+
* Returns `Some(cost_usd)` otherwise, where the value is in US dollars.
|
|
64
|
+
*
|
|
65
|
+
* When an exact model name match is not found, progressively shorter prefixes
|
|
66
|
+
* are tried by stripping from the last `-` or `.` separator. For example,
|
|
67
|
+
* `gpt-4-0613` will match `gpt-4` if no `gpt-4-0613` entry exists.
|
|
89
68
|
*/
|
|
90
|
-
export declare
|
|
91
|
-
|
|
69
|
+
export declare function completionCost(
|
|
70
|
+
model: string,
|
|
71
|
+
promptTokens: number,
|
|
72
|
+
completionTokens: number,
|
|
73
|
+
): number | null;
|
|
92
74
|
|
|
93
75
|
/**
|
|
94
|
-
*
|
|
76
|
+
* Calculate the estimated cost of a completion, accounting for cached
|
|
77
|
+
* (cache-hit) prompt tokens billed at the provider's discounted rate.
|
|
95
78
|
*
|
|
96
|
-
*
|
|
97
|
-
*
|
|
98
|
-
*
|
|
79
|
+
* `cached_tokens` is the count of prompt tokens served from the provider's
|
|
80
|
+
* prompt cache. It must be `<= prompt_tokens` (cached tokens are a subset of
|
|
81
|
+
* the prompt). The non-cached portion is billed at `input_cost_per_token`
|
|
82
|
+
* and the cached portion at `cache_read_input_token_cost` when the model
|
|
83
|
+
* has cache pricing; otherwise the entire prompt is billed at the regular
|
|
84
|
+
* input rate.
|
|
85
|
+
*
|
|
86
|
+
* Returns `None` if the model is not present in the embedded pricing
|
|
87
|
+
* registry, mirroring [`completion_cost`].
|
|
99
88
|
*/
|
|
100
|
-
export declare function
|
|
89
|
+
export declare function completionCostWithCache(
|
|
90
|
+
model: string,
|
|
91
|
+
promptTokens: number,
|
|
92
|
+
cachedTokens: number,
|
|
93
|
+
completionTokens: number,
|
|
94
|
+
): number | null;
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Return the set of complex provider names.
|
|
98
|
+
*
|
|
99
|
+
* Complex providers require custom auth/routing logic beyond simple bearer
|
|
100
|
+
* tokens (e.g. AWS Bedrock SigV4, Vertex AI OAuth2).
|
|
101
|
+
*
|
|
102
|
+
* The returned reference points into the static registry — no allocation.
|
|
103
|
+
*/
|
|
104
|
+
export declare function complexProviderNames(): Array<string>;
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Count tokens for a full [`ChatCompletionRequest`].
|
|
108
|
+
*
|
|
109
|
+
* Sums tokens across all message text contents plus a per-message overhead
|
|
110
|
+
* of ~4 tokens (for role, separators, and formatting metadata). Tool
|
|
111
|
+
* definitions and multimodal content parts (images, audio, documents) are
|
|
112
|
+
* not counted — only textual content contributes to the token total.
|
|
113
|
+
* @throws Returns [`LiterLlmError::BadRequest`] if the tokenizer cannot be loaded or
|
|
114
|
+
* if tokenization fails for any message.
|
|
115
|
+
*/
|
|
116
|
+
export declare function countRequestTokens(
|
|
117
|
+
model: string,
|
|
118
|
+
req?: ChatCompletionRequest | undefined | null,
|
|
119
|
+
): number;
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Count tokens in a text string using the tokenizer for the given model.
|
|
123
|
+
*
|
|
124
|
+
* The tokenizer is resolved from the model name prefix (e.g. `"gpt-4o"` maps
|
|
125
|
+
* to the `Xenova/gpt-4o` HuggingFace tokenizer). Tokenizers are cached after
|
|
126
|
+
* first load.
|
|
127
|
+
* @throws Returns [`LiterLlmError::BadRequest`] if the tokenizer cannot be loaded
|
|
128
|
+
* (e.g. network failure on first use) or if tokenization itself fails.
|
|
129
|
+
*/
|
|
130
|
+
export declare function countTokens(model: string, text: string): number;
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Create a new LLM client with simple scalar configuration.
|
|
134
|
+
*
|
|
135
|
+
* This is the primary binding entry-point. All parameters except `api_key`
|
|
136
|
+
* are optional — omitting them uses the same defaults as
|
|
137
|
+
* [`ClientConfigBuilder`].
|
|
138
|
+
* @throws Returns [`LiterLlmError`] if the underlying HTTP client cannot be
|
|
139
|
+
* constructed, or if the resolved provider configuration is invalid.
|
|
140
|
+
*/
|
|
141
|
+
export declare function createClient(
|
|
142
|
+
apiKey: string,
|
|
143
|
+
baseUrl?: string | undefined | null,
|
|
144
|
+
timeoutSecs?: number | undefined | null,
|
|
145
|
+
maxRetries?: number | undefined | null,
|
|
146
|
+
modelHint?: string | undefined | null,
|
|
147
|
+
): DefaultClient;
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Create a new LLM client from a JSON string.
|
|
151
|
+
*
|
|
152
|
+
* The JSON object accepts the same fields as `liter-llm.toml` (snake_case).
|
|
153
|
+
* @throws Returns [`LiterLlmError::BadRequest`] if `json` is not valid JSON or
|
|
154
|
+
* contains unknown fields.
|
|
155
|
+
*/
|
|
156
|
+
export declare function createClientFromJson(json: string): DefaultClient;
|
|
157
|
+
|
|
158
|
+
/**
|
|
159
|
+
* Install the `ring` crypto provider as the rustls process default, idempotently.
|
|
160
|
+
*
|
|
161
|
+
* rustls 0.23+ removed the implicit default provider. This function installs
|
|
162
|
+
* `ring` once per process. Subsequent calls are no-ops. Calling it from a
|
|
163
|
+
* downstream Rust app that has already installed `aws-lc-rs` is safe — the
|
|
164
|
+
* `Err` from `install_default()` is silently ignored.
|
|
165
|
+
*
|
|
166
|
+
* Called automatically by every internal `reqwest::Client` constructor
|
|
167
|
+
* (auth providers, default HTTP client). Bindings and downstream consumers
|
|
168
|
+
* reach those constructors transitively, so no manual init is required.
|
|
169
|
+
*
|
|
170
|
+
* WASM builds are exempt — the WASM target uses the browser/Node.js fetch
|
|
171
|
+
* API instead of rustls, so no crypto provider is needed.
|
|
172
|
+
*
|
|
173
|
+
* Windows builds use native-tls (SChannel) via reqwest, so rustls is not
|
|
174
|
+
* present and no crypto provider installation is needed.
|
|
175
|
+
*/
|
|
176
|
+
export declare function ensureCryptoProvider(): void;
|
|
101
177
|
|
|
102
178
|
/** Assistant's response to a user message. */
|
|
103
179
|
export interface AssistantMessage {
|
|
104
180
|
/** The assistant's text response. Absent if tool calls are returned instead. */
|
|
105
|
-
content?: string;
|
|
181
|
+
readonly content?: string;
|
|
106
182
|
/** Optional name for the assistant. */
|
|
107
|
-
name?: string;
|
|
183
|
+
readonly name?: string;
|
|
108
184
|
/** Tool calls the model wants to execute, if any. */
|
|
109
|
-
toolCalls?: Array<
|
|
185
|
+
readonly toolCalls?: Array<ToolCall>;
|
|
110
186
|
/** Refusal reason, if the model declined to respond per safety policies. */
|
|
111
|
-
refusal?: string;
|
|
187
|
+
readonly refusal?: string;
|
|
112
188
|
/** Deprecated legacy function_call field; retained for API compatibility. */
|
|
113
|
-
functionCall?:
|
|
189
|
+
readonly functionCall?: FunctionCall;
|
|
114
190
|
}
|
|
115
191
|
|
|
116
192
|
/** Audio content part for speech-capable models. */
|
|
117
193
|
export interface AudioContent {
|
|
118
194
|
/** Base64-encoded audio data. */
|
|
119
|
-
data?: string;
|
|
195
|
+
readonly data?: string;
|
|
120
196
|
/** Audio format (e.g., "wav", "mp3", "ogg"). */
|
|
121
|
-
format?: string;
|
|
197
|
+
readonly format?: string;
|
|
122
198
|
}
|
|
123
199
|
|
|
124
200
|
/** Auth configuration block. */
|
|
125
201
|
export interface AuthConfig {
|
|
126
202
|
/** Auth scheme classification. */
|
|
127
|
-
|
|
203
|
+
readonly authType: AuthType;
|
|
128
204
|
/**
|
|
129
205
|
* Name of the environment variable that holds the API key (e.g. `"OPENAI_API_KEY"`).
|
|
130
206
|
* Holds the variable name, never the secret value.
|
|
131
207
|
*/
|
|
132
|
-
envVar?: string;
|
|
208
|
+
readonly envVar?: string;
|
|
133
209
|
}
|
|
134
210
|
|
|
135
211
|
/** How the API key is sent in the HTTP request. */
|
|
136
|
-
export declare
|
|
212
|
+
export declare enum AuthHeaderFormat {
|
|
137
213
|
/** Bearer token: `Authorization: Bearer <key>` */
|
|
138
214
|
Bearer = "Bearer",
|
|
139
215
|
/** Custom header: e.g., `X-Api-Key: <key>` */
|
|
@@ -143,7 +219,7 @@ export declare const enum AuthHeaderFormat {
|
|
|
143
219
|
}
|
|
144
220
|
|
|
145
221
|
/** Auth scheme used by a provider. */
|
|
146
|
-
export declare
|
|
222
|
+
export declare enum AuthType {
|
|
147
223
|
/** Standard `Authorization: Bearer <key>` header. */
|
|
148
224
|
Bearer = "bearer",
|
|
149
225
|
/** `x-api-key: <key>` header (also handles `"header"` and `"x-api-key"` aliases). */
|
|
@@ -157,69 +233,69 @@ export declare const enum AuthType {
|
|
|
157
233
|
/** Query parameters for listing batches. */
|
|
158
234
|
export interface BatchListQuery {
|
|
159
235
|
/** Maximum number of results to return. Defaults to 20. */
|
|
160
|
-
limit?: number;
|
|
236
|
+
readonly limit?: number;
|
|
161
237
|
/** Pagination cursor: return results after this batch ID. */
|
|
162
|
-
after?: string;
|
|
238
|
+
readonly after?: string;
|
|
163
239
|
}
|
|
164
240
|
|
|
165
241
|
/** Response from listing batches. */
|
|
166
242
|
export interface BatchListResponse {
|
|
167
243
|
/** Object type (always `"list"`). */
|
|
168
|
-
object?: string;
|
|
244
|
+
readonly object?: string;
|
|
169
245
|
/** List of batch objects. */
|
|
170
|
-
data?: Array<BatchObject>;
|
|
246
|
+
readonly data?: Array<BatchObject>;
|
|
171
247
|
/** Whether more results are available. */
|
|
172
|
-
hasMore?: boolean;
|
|
248
|
+
readonly hasMore?: boolean;
|
|
173
249
|
/** First batch ID in the result set (for pagination). */
|
|
174
|
-
firstId?: string;
|
|
250
|
+
readonly firstId?: string;
|
|
175
251
|
/** Last batch ID in the result set (for pagination). */
|
|
176
|
-
lastId?: string;
|
|
252
|
+
readonly lastId?: string;
|
|
177
253
|
}
|
|
178
254
|
|
|
179
255
|
/** A batch job object. */
|
|
180
256
|
export interface BatchObject {
|
|
181
257
|
/** Unique batch ID. */
|
|
182
|
-
id?: string;
|
|
258
|
+
readonly id?: string;
|
|
183
259
|
/** Object type (always `"batch"`). */
|
|
184
|
-
object?: string;
|
|
260
|
+
readonly object?: string;
|
|
185
261
|
/** API endpoint (e.g., `"/v1/chat/completions"`). */
|
|
186
|
-
endpoint?: string;
|
|
262
|
+
readonly endpoint?: string;
|
|
187
263
|
/** ID of the input file. */
|
|
188
|
-
inputFileId?: string;
|
|
264
|
+
readonly inputFileId?: string;
|
|
189
265
|
/** Completion window (e.g., `"24h"`). */
|
|
190
|
-
completionWindow?: string;
|
|
266
|
+
readonly completionWindow?: string;
|
|
191
267
|
/** Current job status. */
|
|
192
|
-
status?:
|
|
268
|
+
readonly status?: BatchStatus;
|
|
193
269
|
/** ID of the output file (present when completed). */
|
|
194
|
-
outputFileId?: string;
|
|
270
|
+
readonly outputFileId?: string;
|
|
195
271
|
/** ID of the error file (present if some requests failed). */
|
|
196
|
-
errorFileId?: string;
|
|
272
|
+
readonly errorFileId?: string;
|
|
197
273
|
/** Unix timestamp of batch creation. */
|
|
198
|
-
createdAt?: number;
|
|
274
|
+
readonly createdAt?: number;
|
|
199
275
|
/** Unix timestamp of completion (if completed). */
|
|
200
|
-
completedAt?: number;
|
|
276
|
+
readonly completedAt?: number;
|
|
201
277
|
/** Unix timestamp of failure (if failed). */
|
|
202
|
-
failedAt?: number;
|
|
278
|
+
readonly failedAt?: number;
|
|
203
279
|
/** Unix timestamp of expiration (if expired). */
|
|
204
|
-
expiredAt?: number;
|
|
280
|
+
readonly expiredAt?: number;
|
|
205
281
|
/** Request processing counts. */
|
|
206
|
-
requestCounts?:
|
|
282
|
+
readonly requestCounts?: BatchRequestCounts;
|
|
207
283
|
/** Metadata attached to the batch. */
|
|
208
|
-
metadata?:
|
|
284
|
+
readonly metadata?: JsonValue;
|
|
209
285
|
}
|
|
210
286
|
|
|
211
287
|
/** Request processing counts for a batch. */
|
|
212
288
|
export interface BatchRequestCounts {
|
|
213
289
|
/** Total requests in the batch. */
|
|
214
|
-
total?: number;
|
|
290
|
+
readonly total?: number;
|
|
215
291
|
/** Completed requests. */
|
|
216
|
-
completed?: number;
|
|
292
|
+
readonly completed?: number;
|
|
217
293
|
/** Failed requests. */
|
|
218
|
-
failed?: number;
|
|
294
|
+
readonly failed?: number;
|
|
219
295
|
}
|
|
220
296
|
|
|
221
297
|
/** Status of a batch job. */
|
|
222
|
-
export declare
|
|
298
|
+
export declare enum BatchStatus {
|
|
223
299
|
/** Validating the input file. */
|
|
224
300
|
Validating = "validating",
|
|
225
301
|
/** Job failed. */
|
|
@@ -241,191 +317,172 @@ export declare const enum BatchStatus {
|
|
|
241
317
|
/** Configuration for budget enforcement. */
|
|
242
318
|
export interface BudgetConfig {
|
|
243
319
|
/** Maximum total spend across all models, in USD. `None` means unlimited. */
|
|
244
|
-
globalLimit?: number;
|
|
320
|
+
readonly globalLimit?: number;
|
|
245
321
|
/**
|
|
246
322
|
* Per-model spending limits in USD. Models not listed here are only
|
|
247
323
|
* constrained by `global_limit`.
|
|
248
324
|
*/
|
|
249
|
-
modelLimits?: Record<string, number>;
|
|
325
|
+
readonly modelLimits?: Record<string, number>;
|
|
250
326
|
/** Whether to reject requests or merely warn when a limit is exceeded. */
|
|
251
|
-
enforcement?:
|
|
327
|
+
readonly enforcement?: Enforcement;
|
|
252
328
|
}
|
|
253
329
|
|
|
254
|
-
export declare function budgetConfigDefault(): BudgetConfig;
|
|
255
|
-
|
|
256
330
|
/** Storage backend for the response cache. */
|
|
257
|
-
export
|
|
258
|
-
type:
|
|
259
|
-
scheme
|
|
260
|
-
config?: Record<string, string>;
|
|
261
|
-
}
|
|
331
|
+
export type CacheBackend =
|
|
332
|
+
| { type: "memory" }
|
|
333
|
+
| { type: "open_dal"; scheme: string; config: Record<string, string> };
|
|
262
334
|
|
|
263
335
|
/** Configuration for the response cache. */
|
|
264
336
|
export interface CacheConfig {
|
|
265
337
|
/** Maximum number of cached entries. */
|
|
266
|
-
maxEntries?: number;
|
|
338
|
+
readonly maxEntries?: number;
|
|
267
339
|
/** Time-to-live for each cached entry. */
|
|
268
|
-
ttl?: number;
|
|
340
|
+
readonly ttl?: number;
|
|
269
341
|
/** Storage backend to use. */
|
|
270
|
-
backend?:
|
|
342
|
+
readonly backend?: CacheBackend;
|
|
271
343
|
}
|
|
272
344
|
|
|
273
|
-
export declare function cacheConfigDefault(): CacheConfig;
|
|
274
|
-
|
|
275
|
-
/**
|
|
276
|
-
* Return the capability flags for a named provider.
|
|
277
|
-
*
|
|
278
|
-
* Performs an O(n) linear scan over the embedded registry (142 entries).
|
|
279
|
-
* Returns an owned value so that bindings can box/copy it across the FFI
|
|
280
|
-
* boundary without dealing with lifetimes. `ProviderCapabilities` is `Copy`,
|
|
281
|
-
* so this is a cheap memcpy of seven `bool` fields.
|
|
282
|
-
*
|
|
283
|
-
* For unknown `provider_name` values the function returns an all-`false`
|
|
284
|
-
* sentinel so callers never need to handle `Option`.
|
|
285
|
-
*/
|
|
286
|
-
export declare function capabilities(providerName: string): ProviderCapabilities;
|
|
287
|
-
|
|
288
345
|
/** A streamed chunk of a chat completion response. */
|
|
289
346
|
export interface ChatCompletionChunk {
|
|
290
347
|
/** Unique identifier for this stream. */
|
|
291
|
-
id?: string;
|
|
348
|
+
readonly id?: string;
|
|
292
349
|
/**
|
|
293
350
|
* Always `"chat.completion.chunk"` from OpenAI-compatible APIs. Stored
|
|
294
351
|
* as a plain `String` so non-standard provider values do not fail parsing.
|
|
295
352
|
*/
|
|
296
|
-
object?: string;
|
|
353
|
+
readonly object?: string;
|
|
297
354
|
/** Unix timestamp of chunk creation. */
|
|
298
|
-
created?: number;
|
|
355
|
+
readonly created?: number;
|
|
299
356
|
/** Model used to generate the chunk. */
|
|
300
|
-
model?: string;
|
|
357
|
+
readonly model?: string;
|
|
301
358
|
/** Streaming choices (delta updates). */
|
|
302
|
-
choices?: Array<
|
|
359
|
+
readonly choices?: Array<StreamChoice>;
|
|
303
360
|
/** Token usage (typically only in the final chunk). */
|
|
304
|
-
usage?: Usage;
|
|
361
|
+
readonly usage?: Usage;
|
|
305
362
|
/** Fingerprint of the system configuration (OpenAI-specific). */
|
|
306
|
-
systemFingerprint?: string;
|
|
363
|
+
readonly systemFingerprint?: string;
|
|
307
364
|
/** Service tier used (OpenAI-specific). */
|
|
308
|
-
serviceTier?: string;
|
|
365
|
+
readonly serviceTier?: string;
|
|
309
366
|
}
|
|
310
367
|
|
|
311
368
|
/** Chat completion request (compatible with OpenAI and similar APIs). */
|
|
312
369
|
export interface ChatCompletionRequest {
|
|
313
370
|
/** Model ID (e.g., `"gpt-4o-mini"`, `"claude-3-5-sonnet"`). */
|
|
314
|
-
model?: string;
|
|
371
|
+
readonly model?: string;
|
|
315
372
|
/** Conversation history from oldest to newest. */
|
|
316
|
-
messages?: Array<
|
|
373
|
+
readonly messages?: Array<Message>;
|
|
317
374
|
/** Sampling temperature in `[0.0, 2.0]`. Higher increases randomness. Defaults to 1.0. */
|
|
318
|
-
temperature?: number;
|
|
375
|
+
readonly temperature?: number;
|
|
319
376
|
/** Nucleus sampling parameter in `[0.0, 1.0]`. Lower is more focused. */
|
|
320
|
-
topP?: number;
|
|
377
|
+
readonly topP?: number;
|
|
321
378
|
/** Number of chat completions to generate. Defaults to 1. */
|
|
322
|
-
n?: number;
|
|
379
|
+
readonly n?: number;
|
|
323
380
|
/**
|
|
324
381
|
* Whether to stream the response.
|
|
325
382
|
*
|
|
326
383
|
* Managed by the client layer — do not set directly.
|
|
327
384
|
*/
|
|
328
|
-
stream?: boolean;
|
|
385
|
+
readonly stream?: boolean;
|
|
329
386
|
/** Stop sequence(s) that halt token generation. */
|
|
330
|
-
stop?:
|
|
387
|
+
readonly stop?: StopSequence;
|
|
331
388
|
/** Max output tokens. Different from max_completion_tokens in some providers. */
|
|
332
|
-
maxTokens?: number;
|
|
389
|
+
readonly maxTokens?: number;
|
|
333
390
|
/** Presence penalty in `[-2.0, 2.0]`. Positive discourages repeated topics. */
|
|
334
|
-
presencePenalty?: number;
|
|
391
|
+
readonly presencePenalty?: number;
|
|
335
392
|
/** Frequency penalty in `[-2.0, 2.0]`. Positive discourages repeated tokens. */
|
|
336
|
-
frequencyPenalty?: number;
|
|
393
|
+
readonly frequencyPenalty?: number;
|
|
337
394
|
/**
|
|
338
395
|
* Token bias map. Uses `BTreeMap` (sorted keys) for deterministic
|
|
339
396
|
* serialization order — important when hashing or signing requests.
|
|
340
397
|
*/
|
|
341
|
-
logitBias?: Record<string, number>;
|
|
398
|
+
readonly logitBias?: Record<string, number>;
|
|
342
399
|
/** User identifier for request tracking and abuse detection. */
|
|
343
|
-
user?: string;
|
|
400
|
+
readonly user?: string;
|
|
344
401
|
/** Tools the model can invoke. */
|
|
345
|
-
tools?: Array<ChatCompletionTool>;
|
|
402
|
+
readonly tools?: Array<ChatCompletionTool>;
|
|
346
403
|
/** Tool usage mode (auto, required, none, or specific tool). */
|
|
347
|
-
toolChoice?:
|
|
404
|
+
readonly toolChoice?: ToolChoice;
|
|
348
405
|
/** Whether the model can call multiple tools in parallel. Defaults to true. */
|
|
349
|
-
parallelToolCalls?: boolean;
|
|
406
|
+
readonly parallelToolCalls?: boolean;
|
|
350
407
|
/** Output format constraint (text, JSON, JSON schema). */
|
|
351
|
-
responseFormat?:
|
|
408
|
+
readonly responseFormat?: ResponseFormat;
|
|
352
409
|
/** Streaming options (e.g., include_usage). */
|
|
353
|
-
streamOptions?:
|
|
410
|
+
readonly streamOptions?: StreamOptions;
|
|
354
411
|
/** Random seed for reproducible outputs. Provider support varies. */
|
|
355
|
-
seed?: number;
|
|
412
|
+
readonly seed?: number;
|
|
356
413
|
/** Reasoning effort level (low, medium, high) for extended-thinking models. */
|
|
357
|
-
reasoningEffort?:
|
|
414
|
+
readonly reasoningEffort?: ReasoningEffort;
|
|
358
415
|
/**
|
|
359
416
|
* Provider-specific extra parameters merged into the request body.
|
|
360
417
|
* Use for guardrails, safety settings, grounding config, etc.
|
|
361
418
|
*/
|
|
362
|
-
extraBody?:
|
|
419
|
+
readonly extraBody?: JsonValue;
|
|
363
420
|
}
|
|
364
421
|
|
|
365
422
|
/** Chat completion response from the API. */
|
|
366
423
|
export interface ChatCompletionResponse {
|
|
367
424
|
/** Unique identifier for this response. */
|
|
368
|
-
id?: string;
|
|
425
|
+
readonly id?: string;
|
|
369
426
|
/**
|
|
370
427
|
* Always `"chat.completion"` from OpenAI-compatible APIs. Stored as a
|
|
371
428
|
* plain `String` so non-standard provider values do not break deserialization.
|
|
372
429
|
*/
|
|
373
|
-
object?: string;
|
|
430
|
+
readonly object?: string;
|
|
374
431
|
/** Unix timestamp of response creation. */
|
|
375
|
-
created?: number;
|
|
432
|
+
readonly created?: number;
|
|
376
433
|
/** Model used to generate the response. */
|
|
377
|
-
model?: string;
|
|
434
|
+
readonly model?: string;
|
|
378
435
|
/** List of completion choices. */
|
|
379
|
-
choices?: Array<
|
|
436
|
+
readonly choices?: Array<Choice>;
|
|
380
437
|
/** Token usage statistics. */
|
|
381
|
-
usage?: Usage;
|
|
438
|
+
readonly usage?: Usage;
|
|
382
439
|
/** Fingerprint of the system configuration (OpenAI-specific). */
|
|
383
|
-
systemFingerprint?: string;
|
|
440
|
+
readonly systemFingerprint?: string;
|
|
384
441
|
/** Service tier used (OpenAI-specific). */
|
|
385
|
-
serviceTier?: string;
|
|
442
|
+
readonly serviceTier?: string;
|
|
386
443
|
}
|
|
387
444
|
|
|
388
445
|
/** A tool the model can invoke (currently, all tools are functions). */
|
|
389
446
|
export interface ChatCompletionTool {
|
|
390
447
|
/** Tool type (always "function" in OpenAI spec). */
|
|
391
|
-
|
|
448
|
+
readonly toolType: ToolType;
|
|
392
449
|
/** Function definition with name, description, and JSON schema parameters. */
|
|
393
|
-
function:
|
|
450
|
+
readonly function: FunctionDefinition;
|
|
394
451
|
}
|
|
395
452
|
|
|
396
|
-
export declare function chatStream(
|
|
397
|
-
engine: DefaultClient,
|
|
398
|
-
model: string,
|
|
399
|
-
): Promise<ChatStreamIterator>;
|
|
400
|
-
|
|
401
|
-
/**
|
|
402
|
-
* Assert that `current_len + incoming` does not exceed `limit`.
|
|
403
|
-
*
|
|
404
|
-
* Call this before appending `incoming` bytes to any buffer that must
|
|
405
|
-
* stay below `limit`. Returns `Err(LiterLlmError.Streaming)` on overflow
|
|
406
|
-
* and emits a `tracing.warn!` with context.
|
|
407
|
-
*
|
|
408
|
-
* # Example
|
|
409
|
-
*/
|
|
410
|
-
export declare function checkBound(
|
|
411
|
-
context: string,
|
|
412
|
-
currentLen: number,
|
|
413
|
-
incoming: number,
|
|
414
|
-
limit: number,
|
|
415
|
-
): void;
|
|
416
|
-
|
|
417
453
|
/** A single completion choice. */
|
|
418
454
|
export interface Choice {
|
|
419
455
|
/** Index of this choice in the choices array. */
|
|
420
|
-
index?: number;
|
|
456
|
+
readonly index?: number;
|
|
421
457
|
/** The assistant's message response. */
|
|
422
|
-
message?: AssistantMessage;
|
|
458
|
+
readonly message?: AssistantMessage;
|
|
423
459
|
/** Why the model stopped generating (stop, length, tool_calls, content_filter, etc.). */
|
|
424
|
-
finishReason?:
|
|
460
|
+
readonly finishReason?: FinishReason;
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
/**
|
|
464
|
+
* A per-chunk transformation in the [`StreamPipeline`].
|
|
465
|
+
*
|
|
466
|
+
* Each middleware receives a typed chunk and returns `Ok(Some(chunk))`
|
|
467
|
+
* to pass it through (optionally modified), `Ok(None)` to drop the chunk,
|
|
468
|
+
* or `Err(e)` to propagate a stream error.
|
|
469
|
+
*
|
|
470
|
+
* The trait is object-safe so implementations can be stored in a
|
|
471
|
+
* `Vec<Box<dyn ChunkMiddleware>>` inside [`StreamPipeline`].
|
|
472
|
+
*/
|
|
473
|
+
export interface ChunkMiddleware {
|
|
474
|
+
/**
|
|
475
|
+
* Process a single chunk.
|
|
476
|
+
*
|
|
477
|
+
* - `Ok(Some(chunk))` — emit (possibly transformed) chunk.
|
|
478
|
+
* - `Ok(None)` — drop this chunk silently.
|
|
479
|
+
* - `Err(e)` — propagate as a stream error.
|
|
480
|
+
*/
|
|
481
|
+
process(chunk?: ChatCompletionChunk | undefined | null): string;
|
|
425
482
|
}
|
|
426
483
|
|
|
427
484
|
/** Observable state of a circuit breaker. */
|
|
428
|
-
export declare
|
|
485
|
+
export declare enum CircuitState {
|
|
429
486
|
/** Requests flow through normally. */
|
|
430
487
|
Closed = "Closed",
|
|
431
488
|
/** All requests are rejected; the circuit is waiting for the backoff to elapse. */
|
|
@@ -434,301 +491,241 @@ export declare const enum CircuitState {
|
|
|
434
491
|
HalfOpen = "HalfOpen",
|
|
435
492
|
}
|
|
436
493
|
|
|
437
|
-
/**
|
|
438
|
-
* Remove all guardrails from the global registry.
|
|
439
|
-
*
|
|
440
|
-
* Primarily useful in tests to reset state between test cases.
|
|
441
|
-
*
|
|
442
|
-
* # Panics
|
|
443
|
-
*
|
|
444
|
-
* Panics if the global registry lock is poisoned.
|
|
445
|
-
*/
|
|
446
|
-
export declare function clear(): void;
|
|
447
|
-
|
|
448
|
-
/**
|
|
449
|
-
* Calculate the estimated cost of a completion given a model name and token
|
|
450
|
-
* counts.
|
|
451
|
-
*
|
|
452
|
-
* Returns `None` if the model is not present in the embedded pricing registry.
|
|
453
|
-
* Returns `Some(cost_usd)` otherwise, where the value is in US dollars.
|
|
454
|
-
*
|
|
455
|
-
* When an exact model name match is not found, progressively shorter prefixes
|
|
456
|
-
* are tried by stripping from the last `-` or `.` separator. For example,
|
|
457
|
-
* `gpt-4-0613` will match `gpt-4` if no `gpt-4-0613` entry exists.
|
|
458
|
-
*
|
|
459
|
-
* # Example
|
|
460
|
-
*/
|
|
461
|
-
export declare function completionCost(
|
|
462
|
-
model: string,
|
|
463
|
-
promptTokens: number,
|
|
464
|
-
completionTokens: number,
|
|
465
|
-
): number | null;
|
|
466
|
-
|
|
467
|
-
/**
|
|
468
|
-
* Calculate the estimated cost of a completion, accounting for cached
|
|
469
|
-
* (cache-hit) prompt tokens billed at the provider's discounted rate.
|
|
470
|
-
*
|
|
471
|
-
* `cached_tokens` is the count of prompt tokens served from the provider's
|
|
472
|
-
* prompt cache. It must be `<= prompt_tokens` (cached tokens are a subset of
|
|
473
|
-
* the prompt). The non-cached portion is billed at `input_cost_per_token`
|
|
474
|
-
* and the cached portion at `cache_read_input_token_cost` when the model
|
|
475
|
-
* has cache pricing; otherwise the entire prompt is billed at the regular
|
|
476
|
-
* input rate.
|
|
477
|
-
*
|
|
478
|
-
* Returns `None` if the model is not present in the embedded pricing
|
|
479
|
-
* registry, mirroring `completion_cost`.
|
|
480
|
-
*/
|
|
481
|
-
export declare function completionCostWithCache(
|
|
482
|
-
model: string,
|
|
483
|
-
promptTokens: number,
|
|
484
|
-
cachedTokens: number,
|
|
485
|
-
completionTokens: number,
|
|
486
|
-
): number | null;
|
|
487
|
-
|
|
488
|
-
/**
|
|
489
|
-
* Return the set of complex provider names.
|
|
490
|
-
*
|
|
491
|
-
* Complex providers require custom auth/routing logic beyond simple bearer
|
|
492
|
-
* tokens (e.g. AWS Bedrock SigV4, Vertex AI OAuth2).
|
|
493
|
-
*
|
|
494
|
-
* The returned reference points into the static registry — no allocation.
|
|
495
|
-
*/
|
|
496
|
-
export declare function complexProviderNames(): Array<string>;
|
|
497
|
-
|
|
498
494
|
/** A single content part in a user message — text, image, document, or audio. */
|
|
499
|
-
export
|
|
500
|
-
type: string
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
inputAudio?: AudioContent;
|
|
505
|
-
}
|
|
506
|
-
|
|
507
|
-
/**
|
|
508
|
-
* Count tokens for a full `ChatCompletionRequest`.
|
|
509
|
-
*
|
|
510
|
-
* Sums tokens across all message text contents plus a per-message overhead
|
|
511
|
-
* of ~4 tokens (for role, separators, and formatting metadata). Tool
|
|
512
|
-
* definitions and multimodal content parts (images, audio, documents) are
|
|
513
|
-
* not counted — only textual content contributes to the token total.
|
|
514
|
-
*
|
|
515
|
-
* # Errors
|
|
516
|
-
*
|
|
517
|
-
* Returns `LiterLlmError.BadRequest` if the tokenizer cannot be loaded or
|
|
518
|
-
* if tokenization fails for any message.
|
|
519
|
-
*/
|
|
520
|
-
export declare function countRequestTokens(
|
|
521
|
-
model: string,
|
|
522
|
-
req?: ChatCompletionRequest | undefined | null,
|
|
523
|
-
): number;
|
|
524
|
-
|
|
525
|
-
/**
|
|
526
|
-
* Count tokens in a text string using the tokenizer for the given model.
|
|
527
|
-
*
|
|
528
|
-
* The tokenizer is resolved from the model name prefix (e.g. `"gpt-4o"` maps
|
|
529
|
-
* to the `Xenova/gpt-4o` HuggingFace tokenizer). Tokenizers are cached after
|
|
530
|
-
* first load.
|
|
531
|
-
*
|
|
532
|
-
* # Errors
|
|
533
|
-
*
|
|
534
|
-
* Returns `LiterLlmError.BadRequest` if the tokenizer cannot be loaded
|
|
535
|
-
* (e.g. network failure on first use) or if tokenization itself fails.
|
|
536
|
-
*/
|
|
537
|
-
export declare function countTokens(model: string, text: string): number;
|
|
495
|
+
export type ContentPart =
|
|
496
|
+
| { type: "text"; text: string }
|
|
497
|
+
| { type: "image_url"; imageUrl: ImageUrl }
|
|
498
|
+
| { type: "document"; document: DocumentContent }
|
|
499
|
+
| { type: "input_audio"; inputAudio: AudioContent };
|
|
538
500
|
|
|
539
501
|
/** Request to create a batch job. */
|
|
540
502
|
export interface CreateBatchRequest {
|
|
541
503
|
/** ID of the uploaded input file (JSONL format). */
|
|
542
|
-
inputFileId?: string;
|
|
504
|
+
readonly inputFileId?: string;
|
|
543
505
|
/** API endpoint (e.g., `"/v1/chat/completions"`). */
|
|
544
|
-
endpoint?: string;
|
|
506
|
+
readonly endpoint?: string;
|
|
545
507
|
/** Completion window (e.g., `"24h"`). */
|
|
546
|
-
completionWindow?: string;
|
|
508
|
+
readonly completionWindow?: string;
|
|
547
509
|
/** Optional metadata to attach to the batch. */
|
|
548
|
-
metadata?:
|
|
510
|
+
readonly metadata?: JsonValue;
|
|
549
511
|
}
|
|
550
512
|
|
|
551
|
-
/**
|
|
552
|
-
* Create a new LLM client with simple scalar configuration.
|
|
553
|
-
*
|
|
554
|
-
* This is the primary binding entry-point. All parameters except `api_key`
|
|
555
|
-
* are optional — omitting them uses the same defaults as
|
|
556
|
-
* `ClientConfigBuilder`.
|
|
557
|
-
*
|
|
558
|
-
* # Errors
|
|
559
|
-
*
|
|
560
|
-
* Returns `LiterLlmError` if the underlying HTTP client cannot be
|
|
561
|
-
* constructed, or if the resolved provider configuration is invalid.
|
|
562
|
-
*/
|
|
563
|
-
export declare function createClient(
|
|
564
|
-
apiKey: string,
|
|
565
|
-
baseUrl?: string | undefined | null,
|
|
566
|
-
timeoutSecs?: number | undefined | null,
|
|
567
|
-
maxRetries?: number | undefined | null,
|
|
568
|
-
modelHint?: string | undefined | null,
|
|
569
|
-
): DefaultClient;
|
|
570
|
-
|
|
571
|
-
/**
|
|
572
|
-
* Create a new LLM client from a JSON string.
|
|
573
|
-
*
|
|
574
|
-
* The JSON object accepts the same fields as `liter-llm.toml` (snake_case).
|
|
575
|
-
*
|
|
576
|
-
* # Errors
|
|
577
|
-
*
|
|
578
|
-
* Returns `LiterLlmError.BadRequest` if `json` is not valid JSON or
|
|
579
|
-
* contains unknown fields.
|
|
580
|
-
*/
|
|
581
|
-
export declare function createClientFromJson(json: string): DefaultClient;
|
|
582
|
-
|
|
583
513
|
/** Request to upload a file. */
|
|
584
514
|
export interface CreateFileRequest {
|
|
585
515
|
/** Base64-encoded file data. */
|
|
586
|
-
file?: string;
|
|
516
|
+
readonly file?: string;
|
|
587
517
|
/** Purpose for the file. */
|
|
588
|
-
purpose?:
|
|
518
|
+
readonly purpose?: FilePurpose;
|
|
589
519
|
/** Optional filename to associate with the upload. */
|
|
590
|
-
filename?: string;
|
|
520
|
+
readonly filename?: string;
|
|
591
521
|
}
|
|
592
522
|
|
|
593
523
|
/** Request to create images from a text prompt. */
|
|
594
524
|
export interface CreateImageRequest {
|
|
595
525
|
/** Text description of the image to generate. */
|
|
596
|
-
prompt?: string;
|
|
526
|
+
readonly prompt?: string;
|
|
597
527
|
/** Model ID (e.g., `"dall-e-3"`). Optional; API may use default if unset. */
|
|
598
|
-
model?: string;
|
|
528
|
+
readonly model?: string;
|
|
599
529
|
/** Number of images to generate. Defaults to 1. */
|
|
600
|
-
n?: number;
|
|
530
|
+
readonly n?: number;
|
|
601
531
|
/** Image size (e.g., `"1024x1024"`, `"1792x1024"`). */
|
|
602
|
-
size?: string;
|
|
532
|
+
readonly size?: string;
|
|
603
533
|
/** Image quality: `"standard"` or `"hd"`. */
|
|
604
|
-
quality?: string;
|
|
534
|
+
readonly quality?: string;
|
|
605
535
|
/** Style: `"natural"` or `"vivid"` (DALL-E 3 only). */
|
|
606
|
-
style?: string;
|
|
536
|
+
readonly style?: string;
|
|
607
537
|
/** Response format: `"url"` or `"b64_json"`. */
|
|
608
|
-
responseFormat?: string;
|
|
538
|
+
readonly responseFormat?: string;
|
|
609
539
|
/** User identifier for request tracking. */
|
|
610
|
-
user?: string;
|
|
540
|
+
readonly user?: string;
|
|
611
541
|
}
|
|
612
542
|
|
|
613
543
|
/** Request to create a structured response. */
|
|
614
544
|
export interface CreateResponseRequest {
|
|
615
545
|
/** Model ID. */
|
|
616
|
-
model?: string;
|
|
546
|
+
readonly model?: string;
|
|
617
547
|
/** Input data to process (e.g., a document to extract from). */
|
|
618
|
-
input?:
|
|
548
|
+
readonly input?: JsonValue;
|
|
619
549
|
/** Instructions for processing the input. */
|
|
620
|
-
instructions?: string;
|
|
550
|
+
readonly instructions?: string;
|
|
621
551
|
/** Available tools the model can use. */
|
|
622
|
-
tools?: Array<
|
|
552
|
+
readonly tools?: Array<ResponseTool>;
|
|
623
553
|
/** Sampling temperature in `[0.0, 2.0]`. Defaults to 1.0. */
|
|
624
|
-
temperature?: number;
|
|
554
|
+
readonly temperature?: number;
|
|
625
555
|
/** Maximum output tokens. */
|
|
626
|
-
maxOutputTokens?: number;
|
|
556
|
+
readonly maxOutputTokens?: number;
|
|
627
557
|
/** Optional metadata. */
|
|
628
|
-
metadata?:
|
|
558
|
+
readonly metadata?: JsonValue;
|
|
629
559
|
}
|
|
630
560
|
|
|
631
561
|
/** Request to generate speech audio from text. */
|
|
632
562
|
export interface CreateSpeechRequest {
|
|
633
563
|
/** Model ID (e.g., `"tts-1"`, `"tts-1-hd"`). */
|
|
634
|
-
model?: string;
|
|
564
|
+
readonly model?: string;
|
|
635
565
|
/** Text to synthesize into speech. */
|
|
636
|
-
input?: string;
|
|
566
|
+
readonly input?: string;
|
|
637
567
|
/** Voice name (e.g., `"alloy"`, `"echo"`, `"fable"`, `"onyx"`, `"nova"`, `"shimmer"`). */
|
|
638
|
-
voice?: string;
|
|
568
|
+
readonly voice?: string;
|
|
639
569
|
/** Audio format (e.g., `"mp3"`, `"opus"`, `"aac"`, `"flac"`, `"wav"`, `"pcm"`). */
|
|
640
|
-
responseFormat?: string;
|
|
570
|
+
readonly responseFormat?: string;
|
|
641
571
|
/** Playback speed in `[0.25, 4.0]`. Defaults to 1.0. */
|
|
642
|
-
speed?: number;
|
|
572
|
+
readonly speed?: number;
|
|
643
573
|
}
|
|
644
574
|
|
|
645
575
|
/** Request to transcribe audio into text. */
|
|
646
576
|
export interface CreateTranscriptionRequest {
|
|
647
577
|
/** Model ID (e.g., `"whisper-1"`). */
|
|
648
|
-
model?: string;
|
|
578
|
+
readonly model?: string;
|
|
649
579
|
/** Base64-encoded audio file data. */
|
|
650
|
-
file?: string;
|
|
580
|
+
readonly file?: string;
|
|
651
581
|
/** Language ISO-639-1 code (e.g., `"en"`, `"fr"`, `"de"`). Optional; model auto-detects. */
|
|
652
|
-
language?: string;
|
|
582
|
+
readonly language?: string;
|
|
653
583
|
/** Optional text to guide the model (improves accuracy for domain-specific terms). */
|
|
654
|
-
prompt?: string;
|
|
584
|
+
readonly prompt?: string;
|
|
655
585
|
/** Output format (e.g., `"json"`, `"text"`, `"vtt"`, `"srt"`, `"verbose_json"`). */
|
|
656
|
-
responseFormat?: string;
|
|
586
|
+
readonly responseFormat?: string;
|
|
657
587
|
/** Sampling temperature in `[0.0, 1.0]`. Higher increases variability. Defaults to 0. */
|
|
658
|
-
temperature?: number;
|
|
588
|
+
readonly temperature?: number;
|
|
659
589
|
}
|
|
660
590
|
|
|
661
591
|
/** Configuration for registering a custom LLM provider at runtime. */
|
|
662
592
|
export interface CustomProviderConfig {
|
|
663
593
|
/** Unique name for this provider (e.g., "my-provider"). */
|
|
664
|
-
name: string;
|
|
594
|
+
readonly name: string;
|
|
665
595
|
/** Base URL for the provider's API (e.g., "https://api.my-provider.com/v1"). */
|
|
666
|
-
baseUrl: string;
|
|
596
|
+
readonly baseUrl: string;
|
|
667
597
|
/** Authentication header format. */
|
|
668
|
-
authHeader:
|
|
598
|
+
readonly authHeader: AuthHeaderFormat;
|
|
669
599
|
/** Model name prefixes that route to this provider (e.g., `["my-"]`). */
|
|
670
|
-
modelPrefixes: Array<string>;
|
|
600
|
+
readonly modelPrefixes: Array<string>;
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
/**
|
|
604
|
+
* Default client implementation backed by `reqwest`.
|
|
605
|
+
*
|
|
606
|
+
* Sends requests to 143 LLM providers with automatic provider detection
|
|
607
|
+
* and per-request routing. The provider is resolved at construction time
|
|
608
|
+
* from `model_hint` (or defaults to OpenAI), but individual requests can
|
|
609
|
+
* override the provider via model name prefix (e.g. `"anthropic/claude-3-5-sonnet"`
|
|
610
|
+
* routes to Anthropic regardless of construction-time setting).
|
|
611
|
+
*
|
|
612
|
+
* When the model prefix does not match any known provider, the construction-time
|
|
613
|
+
* provider is used as the fallback. This enables seamless migration between
|
|
614
|
+
* providers by changing only the model name.
|
|
615
|
+
*
|
|
616
|
+
* The provider is stored behind an [`Arc`] so it can be shared cheaply into
|
|
617
|
+
* async closures and streaming tasks. Pre-computed auth headers and extra
|
|
618
|
+
* headers are cached at construction to avoid redundant encoding on every request.
|
|
619
|
+
*/
|
|
620
|
+
export declare class DefaultClient {
|
|
621
|
+
chat(req?: ChatCompletionRequest | undefined | null): Promise<ChatCompletionResponse>;
|
|
622
|
+
chatStream(
|
|
623
|
+
req?: ChatCompletionRequest | undefined | null,
|
|
624
|
+
): Promise<AsyncGenerator<ChatCompletionChunk, void, undefined>>;
|
|
625
|
+
embed(req?: EmbeddingRequest | undefined | null): Promise<EmbeddingResponse>;
|
|
626
|
+
listModels(): Promise<ModelsListResponse>;
|
|
627
|
+
imageGenerate(req?: CreateImageRequest | undefined | null): Promise<ImagesResponse>;
|
|
628
|
+
speech(req?: CreateSpeechRequest | undefined | null): Promise<Uint8Array>;
|
|
629
|
+
transcribe(req?: CreateTranscriptionRequest | undefined | null): Promise<TranscriptionResponse>;
|
|
630
|
+
moderate(req?: ModerationRequest | undefined | null): Promise<ModerationResponse>;
|
|
631
|
+
rerank(req?: RerankRequest | undefined | null): Promise<RerankResponse>;
|
|
632
|
+
search(req?: SearchRequest | undefined | null): Promise<SearchResponse>;
|
|
633
|
+
ocr(req?: OcrRequest | undefined | null): Promise<OcrResponse>;
|
|
634
|
+
createFile(req?: CreateFileRequest | undefined | null): Promise<FileObject>;
|
|
635
|
+
retrieveFile(fileId: string): Promise<FileObject>;
|
|
636
|
+
deleteFile(fileId: string): Promise<DeleteResponse>;
|
|
637
|
+
listFiles(query?: FileListQuery | undefined | null): Promise<FileListResponse>;
|
|
638
|
+
fileContent(fileId: string): Promise<Uint8Array>;
|
|
639
|
+
createBatch(req?: CreateBatchRequest | undefined | null): Promise<BatchObject>;
|
|
640
|
+
retrieveBatch(batchId: string): Promise<BatchObject>;
|
|
641
|
+
listBatches(query?: BatchListQuery | undefined | null): Promise<BatchListResponse>;
|
|
642
|
+
cancelBatch(batchId: string): Promise<BatchObject>;
|
|
643
|
+
fetchBatchForPolling(batchId: string): Promise<BatchObject>;
|
|
644
|
+
/**
|
|
645
|
+
* Poll a batch until it reaches a terminal status (Completed, Failed, Expired, Cancelled).
|
|
646
|
+
*
|
|
647
|
+
* Uses exponential backoff with configurable initial interval, maximum interval, and backoff multiplier.
|
|
648
|
+
* Optionally supports a timeout that aborts polling if exceeded.
|
|
649
|
+
* @throws Returns `BatchWaitError::Failed` if the batch reaches a failure terminal status.
|
|
650
|
+
* Returns `BatchWaitError::Timeout` if the configured timeout is exceeded.
|
|
651
|
+
* Returns `BatchWaitError::Client` for underlying client errors.
|
|
652
|
+
*/
|
|
653
|
+
waitForBatch(
|
|
654
|
+
batchId: string,
|
|
655
|
+
config?: WaitForBatchConfig | undefined | null,
|
|
656
|
+
): Promise<BatchObject>;
|
|
657
|
+
createResponse(req?: CreateResponseRequest | undefined | null): Promise<ResponseObject>;
|
|
658
|
+
retrieveResponse(responseId: string): Promise<ResponseObject>;
|
|
659
|
+
cancelResponse(responseId: string): Promise<ResponseObject>;
|
|
671
660
|
}
|
|
672
661
|
|
|
673
662
|
/** Response from a delete operation. */
|
|
674
663
|
export interface DeleteResponse {
|
|
675
664
|
/** ID of the deleted resource. */
|
|
676
|
-
id?: string;
|
|
665
|
+
readonly id?: string;
|
|
677
666
|
/** Object type. */
|
|
678
|
-
object?: string;
|
|
667
|
+
readonly object?: string;
|
|
679
668
|
/** Confirmation that the resource was deleted. */
|
|
680
|
-
deleted?: boolean;
|
|
669
|
+
readonly deleted?: boolean;
|
|
681
670
|
}
|
|
682
671
|
|
|
683
672
|
/** Developer message (system-like message for Claude models). */
|
|
684
673
|
export interface DeveloperMessage {
|
|
685
674
|
/** Developer-specific instructions or context. */
|
|
686
|
-
content?: string;
|
|
675
|
+
readonly content?: string;
|
|
687
676
|
/** Optional name for the developer message source. */
|
|
688
|
-
name?: string;
|
|
677
|
+
readonly name?: string;
|
|
689
678
|
}
|
|
690
679
|
|
|
691
680
|
/** PDF/document content part for vision-capable models. */
|
|
692
681
|
export interface DocumentContent {
|
|
693
682
|
/** Base64-encoded document data or URL. */
|
|
694
|
-
data?: string;
|
|
683
|
+
readonly data?: string;
|
|
695
684
|
/** MIME type (e.g., "application/pdf", "text/csv"). */
|
|
696
|
-
mediaType?: string;
|
|
685
|
+
readonly mediaType?: string;
|
|
697
686
|
}
|
|
698
687
|
|
|
699
688
|
/** The format in which the embedding vectors are returned. */
|
|
700
|
-
export declare
|
|
689
|
+
export declare enum EmbeddingFormat {
|
|
701
690
|
/** 32-bit floating-point numbers (default). */
|
|
702
691
|
Float = "float",
|
|
703
692
|
/** Base64-encoded string representation of the floats. */
|
|
704
693
|
Base64 = "base64",
|
|
705
694
|
}
|
|
706
695
|
|
|
696
|
+
/** Text or texts to embed. */
|
|
697
|
+
export declare enum EmbeddingInput {
|
|
698
|
+
/** Single text string. */
|
|
699
|
+
Single = "Single",
|
|
700
|
+
/** Multiple text strings (batch embedding). */
|
|
701
|
+
Multiple = "Multiple",
|
|
702
|
+
}
|
|
703
|
+
|
|
707
704
|
/** A single embedding vector. */
|
|
708
705
|
export interface EmbeddingObject {
|
|
709
706
|
/**
|
|
710
707
|
* Always `"embedding"` from OpenAI-compatible APIs. Stored as a plain
|
|
711
708
|
* `String` so non-standard provider values do not break deserialization.
|
|
712
709
|
*/
|
|
713
|
-
object: string;
|
|
710
|
+
readonly object: string;
|
|
714
711
|
/** The embedding vector. */
|
|
715
|
-
embedding: Array<number>;
|
|
712
|
+
readonly embedding: Array<number>;
|
|
716
713
|
/** Index in the batch (corresponds to input order). */
|
|
717
|
-
index: number;
|
|
714
|
+
readonly index: number;
|
|
718
715
|
}
|
|
719
716
|
|
|
720
717
|
/** Embedding request. */
|
|
721
718
|
export interface EmbeddingRequest {
|
|
722
719
|
/** Model ID (e.g., `"text-embedding-3-small"`). */
|
|
723
|
-
model?: string;
|
|
720
|
+
readonly model?: string;
|
|
724
721
|
/** Text or texts to embed. */
|
|
725
|
-
input?:
|
|
722
|
+
readonly input?: EmbeddingInput;
|
|
726
723
|
/** Output format: float (native) or base64. */
|
|
727
|
-
encodingFormat?:
|
|
724
|
+
readonly encodingFormat?: EmbeddingFormat;
|
|
728
725
|
/** Requested embedding dimensions (if supported by the model). */
|
|
729
|
-
dimensions?: number;
|
|
726
|
+
readonly dimensions?: number;
|
|
730
727
|
/** User identifier for request tracking. */
|
|
731
|
-
user?: string;
|
|
728
|
+
readonly user?: string;
|
|
732
729
|
}
|
|
733
730
|
|
|
734
731
|
/** Embedding response. */
|
|
@@ -737,89 +734,69 @@ export interface EmbeddingResponse {
|
|
|
737
734
|
* Always `"list"` from OpenAI-compatible APIs. Stored as a plain
|
|
738
735
|
* `String` so non-standard provider values do not break deserialization.
|
|
739
736
|
*/
|
|
740
|
-
object: string;
|
|
737
|
+
readonly object: string;
|
|
741
738
|
/** List of embeddings. */
|
|
742
|
-
data: Array<
|
|
739
|
+
readonly data: Array<EmbeddingObject>;
|
|
743
740
|
/** Model used to generate embeddings. */
|
|
744
|
-
model: string;
|
|
741
|
+
readonly model: string;
|
|
745
742
|
/** Token usage (input tokens only; embeddings have zero output tokens). */
|
|
746
|
-
usage?: Usage;
|
|
743
|
+
readonly usage?: Usage;
|
|
747
744
|
}
|
|
748
745
|
|
|
749
746
|
/** How budget limits are enforced. */
|
|
750
|
-
export declare
|
|
747
|
+
export declare enum Enforcement {
|
|
751
748
|
/**
|
|
752
749
|
* Reject requests that would exceed the budget with
|
|
753
|
-
* `LiterLlmError.
|
|
750
|
+
* [`LiterLlmError::BudgetExceeded`].
|
|
754
751
|
*/
|
|
755
752
|
Hard = "Hard",
|
|
756
753
|
/**
|
|
757
|
-
* Allow requests through but emit a `tracing
|
|
754
|
+
* Allow requests through but emit a `tracing::warn!` when the budget is
|
|
758
755
|
* exceeded.
|
|
759
756
|
*/
|
|
760
757
|
Soft = "Soft",
|
|
761
758
|
}
|
|
762
759
|
|
|
763
|
-
/**
|
|
764
|
-
* Install the `ring` crypto provider as the rustls process default, idempotently.
|
|
765
|
-
*
|
|
766
|
-
* rustls 0.23+ removed the implicit default provider. This function installs
|
|
767
|
-
* `ring` once per process. Subsequent calls are no-ops. Calling it from a
|
|
768
|
-
* downstream Rust app that has already installed `aws-lc-rs` is safe — the
|
|
769
|
-
* `Err` from `install_default()` is silently ignored.
|
|
770
|
-
*
|
|
771
|
-
* Called automatically by every internal `reqwest.Client` constructor
|
|
772
|
-
* (auth providers, default HTTP client). Bindings and downstream consumers
|
|
773
|
-
* reach those constructors transitively, so no manual init is required.
|
|
774
|
-
*
|
|
775
|
-
* WASM builds are exempt — the WASM target uses the browser/Node.js fetch
|
|
776
|
-
* API instead of rustls, so no crypto provider is needed.
|
|
777
|
-
*
|
|
778
|
-
* Windows builds use native-tls (SChannel) via reqwest, so rustls is not
|
|
779
|
-
* present and no crypto provider installation is needed.
|
|
780
|
-
*/
|
|
781
|
-
export declare function ensureCryptoProvider(): void;
|
|
782
|
-
|
|
783
760
|
/** Query parameters for listing files. */
|
|
784
761
|
export interface FileListQuery {
|
|
785
762
|
/** Filter by file purpose (e.g., `"batch"`, `"fine-tune"`). */
|
|
786
|
-
purpose?: string;
|
|
763
|
+
readonly purpose?: string;
|
|
787
764
|
/** Maximum number of results to return. Defaults to 20. */
|
|
788
|
-
limit?: number;
|
|
765
|
+
readonly limit?: number;
|
|
789
766
|
/** Pagination cursor: return results after this file ID. */
|
|
790
|
-
after?: string;
|
|
767
|
+
readonly after?: string;
|
|
791
768
|
}
|
|
792
769
|
|
|
793
770
|
/** Response from listing files. */
|
|
794
771
|
export interface FileListResponse {
|
|
795
772
|
/** Object type (always `"list"`). */
|
|
796
|
-
object?: string;
|
|
773
|
+
readonly object?: string;
|
|
797
774
|
/** List of file objects. */
|
|
798
|
-
data?: Array<FileObject>;
|
|
775
|
+
readonly data?: Array<FileObject>;
|
|
799
776
|
/** Whether more results are available. */
|
|
800
|
-
hasMore?: boolean;
|
|
777
|
+
readonly hasMore?: boolean;
|
|
801
778
|
}
|
|
802
779
|
|
|
803
780
|
/** An uploaded file object. */
|
|
804
781
|
export interface FileObject {
|
|
805
782
|
/** Unique file ID. */
|
|
806
|
-
id?: string;
|
|
783
|
+
readonly id?: string;
|
|
807
784
|
/** Object type (always `"file"`). */
|
|
808
|
-
object?: string;
|
|
785
|
+
readonly object?: string;
|
|
809
786
|
/** File size in bytes. */
|
|
810
|
-
bytes?: number;
|
|
787
|
+
readonly bytes?: number;
|
|
811
788
|
/** Unix timestamp of file creation. */
|
|
812
|
-
createdAt?: number;
|
|
789
|
+
readonly createdAt?: number;
|
|
813
790
|
/** Filename. */
|
|
814
|
-
filename?: string;
|
|
791
|
+
readonly filename?: string;
|
|
815
792
|
/** File purpose. */
|
|
816
|
-
purpose?: string;
|
|
793
|
+
readonly purpose?: string;
|
|
817
794
|
/** Processing status (e.g., `"uploaded"`, `"processed"`). */
|
|
818
|
-
status?: string;
|
|
795
|
+
readonly status?: string;
|
|
819
796
|
}
|
|
820
797
|
|
|
821
798
|
/** Purpose of an uploaded file. */
|
|
822
|
-
export declare
|
|
799
|
+
export declare enum FilePurpose {
|
|
823
800
|
/** File for use with Assistants API. */
|
|
824
801
|
Assistants = "assistants",
|
|
825
802
|
/** File for batch processing. */
|
|
@@ -831,7 +808,7 @@ export declare const enum FilePurpose {
|
|
|
831
808
|
}
|
|
832
809
|
|
|
833
810
|
/** Why a choice stopped generating tokens. */
|
|
834
|
-
export declare
|
|
811
|
+
export declare enum FinishReason {
|
|
835
812
|
Stop = "stop",
|
|
836
813
|
Length = "length",
|
|
837
814
|
ToolCalls = "tool_calls",
|
|
@@ -853,31 +830,48 @@ export declare const enum FinishReason {
|
|
|
853
830
|
/** Function call details. */
|
|
854
831
|
export interface FunctionCall {
|
|
855
832
|
/** Function name. */
|
|
856
|
-
name: string;
|
|
857
|
-
/** Arguments as a JSON string (parse with serde_json
|
|
858
|
-
arguments: string;
|
|
833
|
+
readonly name: string;
|
|
834
|
+
/** Arguments as a JSON string (parse with serde_json::from_str). */
|
|
835
|
+
readonly arguments: string;
|
|
859
836
|
}
|
|
860
837
|
|
|
861
838
|
/** Function definition exposed to the model. */
|
|
862
839
|
export interface FunctionDefinition {
|
|
863
840
|
/** Name of the function. Required and must be alphanumeric + underscores. */
|
|
864
|
-
name: string;
|
|
841
|
+
readonly name: string;
|
|
865
842
|
/** Human-readable description explaining what the function does. */
|
|
866
|
-
description?: string;
|
|
843
|
+
readonly description?: string;
|
|
867
844
|
/** JSON Schema defining the function's parameters. */
|
|
868
|
-
parameters?:
|
|
845
|
+
readonly parameters?: JsonValue;
|
|
869
846
|
/** If true, enforce strict JSON schema validation for arguments. */
|
|
870
|
-
strict?: boolean;
|
|
847
|
+
readonly strict?: boolean;
|
|
871
848
|
}
|
|
872
849
|
|
|
873
850
|
/** Deprecated legacy function-role message body. */
|
|
874
851
|
export interface FunctionMessage {
|
|
875
|
-
content?: string;
|
|
876
|
-
name?: string;
|
|
852
|
+
readonly content?: string;
|
|
853
|
+
readonly name?: string;
|
|
854
|
+
}
|
|
855
|
+
|
|
856
|
+
/**
|
|
857
|
+
* Abstraction over a health probe strategy.
|
|
858
|
+
*
|
|
859
|
+
* Implementors issue a lightweight probe against `upstream` (typically a
|
|
860
|
+
* provider base URL or named identifier) and report [`HealthStatus`].
|
|
861
|
+
*/
|
|
862
|
+
export interface HealthChecker {
|
|
863
|
+
/**
|
|
864
|
+
* Probe `upstream` and return its current [`HealthStatus`].
|
|
865
|
+
*
|
|
866
|
+
* The parameter is taken by value (`String`) so that implementations can
|
|
867
|
+
* move it into the returned future without a clone, making the
|
|
868
|
+
* `'static + Send` bound on the future trivially satisfiable.
|
|
869
|
+
*/
|
|
870
|
+
check(upstream: string): Promise<string>;
|
|
877
871
|
}
|
|
878
872
|
|
|
879
873
|
/** The result of a single health probe. */
|
|
880
|
-
export declare
|
|
874
|
+
export declare enum HealthStatus {
|
|
881
875
|
/** The probe succeeded; the upstream is reachable. */
|
|
882
876
|
Healthy = "Healthy",
|
|
883
877
|
/** The probe failed; the upstream may be down. */
|
|
@@ -887,15 +881,15 @@ export declare const enum HealthStatus {
|
|
|
887
881
|
/** A single generated image, returned as either a URL or base64 data. */
|
|
888
882
|
export interface Image {
|
|
889
883
|
/** Image URL (if response_format was "url"). */
|
|
890
|
-
url?: string;
|
|
884
|
+
readonly url?: string;
|
|
891
885
|
/** Base64-encoded image data (if response_format was "b64_json"). */
|
|
892
|
-
b64Json?: string;
|
|
886
|
+
readonly b64Json?: string;
|
|
893
887
|
/** The final prompt used to generate the image (DALL-E 3). */
|
|
894
|
-
revisedPrompt?: string;
|
|
888
|
+
readonly revisedPrompt?: string;
|
|
895
889
|
}
|
|
896
890
|
|
|
897
891
|
/** Image detail level controlling token cost and processing. */
|
|
898
|
-
export declare
|
|
892
|
+
export declare enum ImageDetail {
|
|
899
893
|
/** Low detail: scales image to 512x512, uses fewer tokens. */
|
|
900
894
|
Low = "low",
|
|
901
895
|
/** High detail: processes up to 2x2 grid of tiles, higher token cost. */
|
|
@@ -907,65 +901,63 @@ export declare const enum ImageDetail {
|
|
|
907
901
|
/** Response containing generated images. */
|
|
908
902
|
export interface ImagesResponse {
|
|
909
903
|
/** Unix timestamp of image creation. */
|
|
910
|
-
created?: number;
|
|
904
|
+
readonly created?: number;
|
|
911
905
|
/** List of generated images. */
|
|
912
|
-
data?: Array<
|
|
906
|
+
readonly data?: Array<Image>;
|
|
913
907
|
}
|
|
914
908
|
|
|
915
909
|
/** An image URL reference with optional detail level for processing. */
|
|
916
910
|
export interface ImageUrl {
|
|
917
911
|
/** URL of the image (data URI or HTTP/HTTPS URL). */
|
|
918
|
-
url?: string;
|
|
912
|
+
readonly url?: string;
|
|
919
913
|
/** Detail level: low (512x512), high (2x2 tiles), or auto (model-selected). */
|
|
920
|
-
detail?:
|
|
914
|
+
readonly detail?: ImageDetail;
|
|
921
915
|
}
|
|
922
916
|
|
|
923
917
|
/** An intent prototype: `(intent_name, prototype_embedding, target_model_id)`. */
|
|
924
918
|
export interface IntentPrototype {
|
|
925
919
|
/** Human-readable name for the intent (used in logs/metrics). */
|
|
926
|
-
name: string;
|
|
920
|
+
readonly name: string;
|
|
927
921
|
/** Pre-computed embedding vector for this intent. */
|
|
928
|
-
embedding: Array<number>;
|
|
922
|
+
readonly embedding: Array<number>;
|
|
929
923
|
/** Model to route to when this intent is detected. */
|
|
930
|
-
model: string;
|
|
924
|
+
readonly model: string;
|
|
931
925
|
}
|
|
932
926
|
|
|
933
927
|
/** JSON Schema specification for constrained output. */
|
|
934
928
|
export interface JsonSchemaFormat {
|
|
935
929
|
/** Name of the schema (must be unique in the request). */
|
|
936
|
-
name?: string;
|
|
930
|
+
readonly name?: string;
|
|
937
931
|
/** Description of what the schema represents. */
|
|
938
|
-
description?: string;
|
|
932
|
+
readonly description?: string;
|
|
939
933
|
/** JSON Schema object defining the output structure. */
|
|
940
|
-
schema?:
|
|
934
|
+
readonly schema?: JsonValue;
|
|
941
935
|
/** If true, enforce strict schema validation. */
|
|
942
|
-
strict?: boolean;
|
|
936
|
+
readonly strict?: boolean;
|
|
943
937
|
}
|
|
944
938
|
|
|
945
939
|
/** A chat message in a conversation. */
|
|
946
|
-
export
|
|
947
|
-
role:
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
function?: FunctionMessage;
|
|
954
|
-
}
|
|
940
|
+
export type Message =
|
|
941
|
+
| { role: "system"; 0: SystemMessage }
|
|
942
|
+
| { role: "user"; 0: UserMessage }
|
|
943
|
+
| { role: "assistant"; 0: AssistantMessage }
|
|
944
|
+
| { role: "tool"; 0: ToolMessage }
|
|
945
|
+
| { role: "developer"; 0: DeveloperMessage }
|
|
946
|
+
| { role: "function"; 0: FunctionMessage };
|
|
955
947
|
|
|
956
948
|
/** A model available from the API. */
|
|
957
949
|
export interface ModelObject {
|
|
958
950
|
/** Model ID (e.g., `"gpt-4o"`, `"claude-3-5-sonnet"`). */
|
|
959
|
-
id?: string;
|
|
951
|
+
readonly id?: string;
|
|
960
952
|
/**
|
|
961
953
|
* Always `"model"` from OpenAI-compatible APIs. Stored as a plain
|
|
962
954
|
* `String` so non-standard provider values do not break deserialization.
|
|
963
955
|
*/
|
|
964
|
-
object?: string;
|
|
956
|
+
readonly object?: string;
|
|
965
957
|
/** Unix timestamp of model creation (or release date). */
|
|
966
|
-
created?: number;
|
|
958
|
+
readonly created?: number;
|
|
967
959
|
/** Organization or entity that owns the model. */
|
|
968
|
-
ownedBy?: string;
|
|
960
|
+
readonly ownedBy?: string;
|
|
969
961
|
}
|
|
970
962
|
|
|
971
963
|
/** Response listing available models. */
|
|
@@ -974,162 +966,167 @@ export interface ModelsListResponse {
|
|
|
974
966
|
* Always `"list"` from OpenAI-compatible APIs. Stored as a plain
|
|
975
967
|
* `String` so non-standard provider values do not break deserialization.
|
|
976
968
|
*/
|
|
977
|
-
object?: string;
|
|
969
|
+
readonly object?: string;
|
|
978
970
|
/** List of available models. */
|
|
979
|
-
data?: Array<
|
|
971
|
+
readonly data?: Array<ModelObject>;
|
|
980
972
|
}
|
|
981
973
|
|
|
982
974
|
/** Boolean flags for each moderation category. */
|
|
983
975
|
export interface ModerationCategories {
|
|
984
976
|
/** Sexual content. */
|
|
985
|
-
sexual?: boolean;
|
|
977
|
+
readonly sexual?: boolean;
|
|
986
978
|
/** Hate speech. */
|
|
987
|
-
hate?: boolean;
|
|
979
|
+
readonly hate?: boolean;
|
|
988
980
|
/** Harassment. */
|
|
989
|
-
harassment?: boolean;
|
|
981
|
+
readonly harassment?: boolean;
|
|
990
982
|
/** Self-harm content. */
|
|
991
|
-
|
|
983
|
+
readonly selfHarm?: boolean;
|
|
992
984
|
/** Sexual content involving minors. */
|
|
993
|
-
|
|
985
|
+
readonly sexualMinors?: boolean;
|
|
994
986
|
/** Hate speech that threatens violence. */
|
|
995
|
-
|
|
987
|
+
readonly hateThreatening?: boolean;
|
|
996
988
|
/** Graphic violence. */
|
|
997
|
-
|
|
989
|
+
readonly violenceGraphic?: boolean;
|
|
998
990
|
/** Intent to self-harm. */
|
|
999
|
-
|
|
991
|
+
readonly selfHarmIntent?: boolean;
|
|
1000
992
|
/** Instructions for self-harm. */
|
|
1001
|
-
|
|
993
|
+
readonly selfHarmInstructions?: boolean;
|
|
1002
994
|
/** Harassment that threatens violence. */
|
|
1003
|
-
|
|
995
|
+
readonly harassmentThreatening?: boolean;
|
|
1004
996
|
/** Non-graphic violence. */
|
|
1005
|
-
violence?: boolean;
|
|
997
|
+
readonly violence?: boolean;
|
|
1006
998
|
}
|
|
1007
999
|
|
|
1008
1000
|
/** Confidence scores for each moderation category. */
|
|
1009
1001
|
export interface ModerationCategoryScores {
|
|
1010
1002
|
/** Sexual content score. */
|
|
1011
|
-
sexual?: number;
|
|
1003
|
+
readonly sexual?: number;
|
|
1012
1004
|
/** Hate speech score. */
|
|
1013
|
-
hate?: number;
|
|
1005
|
+
readonly hate?: number;
|
|
1014
1006
|
/** Harassment score. */
|
|
1015
|
-
harassment?: number;
|
|
1007
|
+
readonly harassment?: number;
|
|
1016
1008
|
/** Self-harm content score. */
|
|
1017
|
-
|
|
1009
|
+
readonly selfHarm?: number;
|
|
1018
1010
|
/** Sexual content involving minors score. */
|
|
1019
|
-
|
|
1011
|
+
readonly sexualMinors?: number;
|
|
1020
1012
|
/** Hate speech that threatens violence score. */
|
|
1021
|
-
|
|
1013
|
+
readonly hateThreatening?: number;
|
|
1022
1014
|
/** Graphic violence score. */
|
|
1023
|
-
|
|
1015
|
+
readonly violenceGraphic?: number;
|
|
1024
1016
|
/** Intent to self-harm score. */
|
|
1025
|
-
|
|
1017
|
+
readonly selfHarmIntent?: number;
|
|
1026
1018
|
/** Instructions for self-harm score. */
|
|
1027
|
-
|
|
1019
|
+
readonly selfHarmInstructions?: number;
|
|
1028
1020
|
/** Harassment that threatens violence score. */
|
|
1029
|
-
|
|
1021
|
+
readonly harassmentThreatening?: number;
|
|
1030
1022
|
/** Non-graphic violence score. */
|
|
1031
|
-
violence?: number;
|
|
1023
|
+
readonly violence?: number;
|
|
1024
|
+
}
|
|
1025
|
+
|
|
1026
|
+
/** Input to the moderation endpoint — a single string or multiple strings. */
|
|
1027
|
+
export declare enum ModerationInput {
|
|
1028
|
+
/** Single text string. */
|
|
1029
|
+
Single = "Single",
|
|
1030
|
+
/** Multiple text strings (batch moderation). */
|
|
1031
|
+
Multiple = "Multiple",
|
|
1032
1032
|
}
|
|
1033
1033
|
|
|
1034
1034
|
/** Request to classify content for policy violations. */
|
|
1035
1035
|
export interface ModerationRequest {
|
|
1036
1036
|
/** Text or texts to check. */
|
|
1037
|
-
input?:
|
|
1037
|
+
readonly input?: ModerationInput;
|
|
1038
1038
|
/** Model ID (e.g., `"text-moderation-latest"`). Optional; API uses default if unset. */
|
|
1039
|
-
model?: string;
|
|
1039
|
+
readonly model?: string;
|
|
1040
1040
|
}
|
|
1041
1041
|
|
|
1042
1042
|
/** Response from the moderation endpoint. */
|
|
1043
1043
|
export interface ModerationResponse {
|
|
1044
1044
|
/** Unique identifier for this moderation request. */
|
|
1045
|
-
id: string;
|
|
1045
|
+
readonly id: string;
|
|
1046
1046
|
/** Model used for classification. */
|
|
1047
|
-
model: string;
|
|
1047
|
+
readonly model: string;
|
|
1048
1048
|
/** Results for each input string. */
|
|
1049
|
-
results: Array<
|
|
1049
|
+
readonly results: Array<ModerationResult>;
|
|
1050
1050
|
}
|
|
1051
1051
|
|
|
1052
1052
|
/** A single moderation classification result. */
|
|
1053
1053
|
export interface ModerationResult {
|
|
1054
1054
|
/** True if any category was flagged. */
|
|
1055
|
-
flagged: boolean;
|
|
1055
|
+
readonly flagged: boolean;
|
|
1056
1056
|
/** Boolean flags for each moderation category. */
|
|
1057
|
-
categories:
|
|
1057
|
+
readonly categories: ModerationCategories;
|
|
1058
1058
|
/** Confidence scores for each category. */
|
|
1059
|
-
categoryScores:
|
|
1059
|
+
readonly categoryScores: ModerationCategoryScores;
|
|
1060
1060
|
}
|
|
1061
1061
|
|
|
1062
1062
|
/** Document input for OCR — either a URL or inline base64 data. */
|
|
1063
|
-
export
|
|
1064
|
-
type: string
|
|
1065
|
-
|
|
1066
|
-
data?: string;
|
|
1067
|
-
mediaType?: string;
|
|
1068
|
-
}
|
|
1063
|
+
export type OcrDocument =
|
|
1064
|
+
| { type: "document_url"; url: string }
|
|
1065
|
+
| { type: "base64"; data: string; mediaType: string };
|
|
1069
1066
|
|
|
1070
1067
|
/** An image extracted from an OCR page. */
|
|
1071
1068
|
export interface OcrImage {
|
|
1072
1069
|
/** Unique image identifier within the document. */
|
|
1073
|
-
id: string;
|
|
1070
|
+
readonly id: string;
|
|
1074
1071
|
/** Base64-encoded image data (if `include_image_base64` was true). */
|
|
1075
|
-
imageBase64?: string;
|
|
1072
|
+
readonly imageBase64?: string;
|
|
1076
1073
|
}
|
|
1077
1074
|
|
|
1078
1075
|
/** A single page of OCR output. */
|
|
1079
1076
|
export interface OcrPage {
|
|
1080
1077
|
/** Page index (0-based). */
|
|
1081
|
-
index: number;
|
|
1078
|
+
readonly index: number;
|
|
1082
1079
|
/** Extracted page content as Markdown. */
|
|
1083
|
-
markdown: string;
|
|
1080
|
+
readonly markdown: string;
|
|
1084
1081
|
/** Embedded images extracted from the page (if `include_image_base64` was true). */
|
|
1085
|
-
images?: Array<
|
|
1082
|
+
readonly images?: Array<OcrImage>;
|
|
1086
1083
|
/** Page dimensions in pixels, if available. */
|
|
1087
|
-
dimensions?:
|
|
1084
|
+
readonly dimensions?: PageDimensions;
|
|
1088
1085
|
}
|
|
1089
1086
|
|
|
1090
1087
|
/** An OCR request. */
|
|
1091
1088
|
export interface OcrRequest {
|
|
1092
1089
|
/** The model/provider to use (e.g. `"mistral/mistral-ocr-latest"`). */
|
|
1093
|
-
model?: string;
|
|
1090
|
+
readonly model?: string;
|
|
1094
1091
|
/** The document to process (URL or base64). */
|
|
1095
|
-
document?:
|
|
1092
|
+
readonly document?: OcrDocument;
|
|
1096
1093
|
/** Specific pages to process (1-indexed). `None` means all pages. */
|
|
1097
|
-
pages?: Array<number>;
|
|
1094
|
+
readonly pages?: Array<number>;
|
|
1098
1095
|
/** Whether to include base64-encoded images of each processed page. */
|
|
1099
|
-
includeImageBase64?: boolean;
|
|
1096
|
+
readonly includeImageBase64?: boolean;
|
|
1100
1097
|
}
|
|
1101
1098
|
|
|
1102
1099
|
/** An OCR response. */
|
|
1103
1100
|
export interface OcrResponse {
|
|
1104
1101
|
/** Extracted pages in order. */
|
|
1105
|
-
pages: Array<
|
|
1102
|
+
readonly pages: Array<OcrPage>;
|
|
1106
1103
|
/** Model/provider used for OCR. */
|
|
1107
|
-
model: string;
|
|
1104
|
+
readonly model: string;
|
|
1108
1105
|
/** Token usage, if reported by the provider. */
|
|
1109
|
-
usage?: Usage;
|
|
1106
|
+
readonly usage?: Usage;
|
|
1110
1107
|
}
|
|
1111
1108
|
|
|
1112
1109
|
/** Page dimensions in pixels. */
|
|
1113
1110
|
export interface PageDimensions {
|
|
1114
1111
|
/** Width in pixels. */
|
|
1115
|
-
width: number;
|
|
1112
|
+
readonly width: number;
|
|
1116
1113
|
/** Height in pixels. */
|
|
1117
|
-
height: number;
|
|
1114
|
+
readonly height: number;
|
|
1118
1115
|
}
|
|
1119
1116
|
|
|
1120
1117
|
/**
|
|
1121
1118
|
* Breakdown of tokens used in the prompt portion of a request.
|
|
1122
1119
|
*
|
|
1123
|
-
* `cached_tokens` is included in `Usage
|
|
1120
|
+
* `cached_tokens` is included in `Usage::prompt_tokens` — it is *not* an
|
|
1124
1121
|
* additional charge on top of the prompt token count. When pricing supports
|
|
1125
1122
|
* a `cache_read_input_token_cost`, the cached portion is billed at the
|
|
1126
1123
|
* discounted rate and the remainder at the regular input rate.
|
|
1127
1124
|
*/
|
|
1128
1125
|
export interface PromptTokensDetails {
|
|
1129
1126
|
/** Cached tokens present in the prompt. Defaults to 0 when absent. */
|
|
1130
|
-
cachedTokens?: number;
|
|
1127
|
+
readonly cachedTokens?: number;
|
|
1131
1128
|
/** Audio input tokens present in the prompt. Defaults to 0 when absent. */
|
|
1132
|
-
audioTokens?: number;
|
|
1129
|
+
readonly audioTokens?: number;
|
|
1133
1130
|
}
|
|
1134
1131
|
|
|
1135
1132
|
/**
|
|
@@ -1143,250 +1140,272 @@ export interface PromptTokensDetails {
|
|
|
1143
1140
|
*
|
|
1144
1141
|
* All flags default to `false` so that newly added providers are safe.
|
|
1145
1142
|
*
|
|
1146
|
-
* Access via the crate-level `capabilities` function:
|
|
1143
|
+
* Access via the crate-level [`capabilities`] function:
|
|
1144
|
+
*
|
|
1145
|
+
* ```rust
|
|
1146
|
+
* use liter_llm::capabilities;
|
|
1147
|
+
*
|
|
1148
|
+
* let caps = capabilities("openai");
|
|
1149
|
+
* assert!(caps.function_calling);
|
|
1150
|
+
* assert!(caps.vision);
|
|
1151
|
+
*
|
|
1152
|
+
* // Unknown providers return a default-all-false reference.
|
|
1153
|
+
* let unknown = capabilities("my-private-model");
|
|
1154
|
+
* assert!(!unknown.function_calling);
|
|
1155
|
+
* ```
|
|
1147
1156
|
*/
|
|
1148
1157
|
export interface ProviderCapabilities {
|
|
1149
1158
|
/** The provider accepts image input in chat messages. */
|
|
1150
|
-
vision?: boolean;
|
|
1159
|
+
readonly vision?: boolean;
|
|
1151
1160
|
/** The provider supports extended-thinking / reasoning tokens. */
|
|
1152
|
-
reasoning?: boolean;
|
|
1161
|
+
readonly reasoning?: boolean;
|
|
1153
1162
|
/** The provider supports JSON-mode or `response_format` structured output. */
|
|
1154
|
-
structuredOutput?: boolean;
|
|
1163
|
+
readonly structuredOutput?: boolean;
|
|
1155
1164
|
/** The provider supports tool / function calling. */
|
|
1156
|
-
functionCalling?: boolean;
|
|
1165
|
+
readonly functionCalling?: boolean;
|
|
1157
1166
|
/** The provider accepts audio as input. */
|
|
1158
|
-
audioIn?: boolean;
|
|
1167
|
+
readonly audioIn?: boolean;
|
|
1159
1168
|
/** The provider can generate audio / TTS output. */
|
|
1160
|
-
audioOut?: boolean;
|
|
1169
|
+
readonly audioOut?: boolean;
|
|
1161
1170
|
/** The provider accepts video as input. */
|
|
1162
|
-
videoIn?: boolean;
|
|
1171
|
+
readonly videoIn?: boolean;
|
|
1163
1172
|
}
|
|
1164
1173
|
|
|
1165
1174
|
/**
|
|
1166
1175
|
* Static configuration for a single provider entry in providers.json.
|
|
1167
1176
|
*
|
|
1168
1177
|
* This struct deliberately does not include capability flags or streaming
|
|
1169
|
-
* format, which are accessed via the `capabilities` function. Keeping
|
|
1178
|
+
* format, which are accessed via the [`capabilities`] function. Keeping
|
|
1170
1179
|
* these fields separate preserves backward compatibility with all generated
|
|
1171
1180
|
* binding code that constructs `ProviderConfig` using struct literal syntax.
|
|
1172
1181
|
*/
|
|
1173
1182
|
export interface ProviderConfig {
|
|
1174
1183
|
/** Provider identifier (matches the entry key in providers.json). */
|
|
1175
|
-
name: string;
|
|
1184
|
+
readonly name: string;
|
|
1176
1185
|
/** Human-readable provider name shown in UIs. */
|
|
1177
|
-
displayName?: string;
|
|
1186
|
+
readonly displayName?: string;
|
|
1178
1187
|
/** Base URL used as the default for this provider's HTTP client. */
|
|
1179
|
-
baseUrl?: string;
|
|
1188
|
+
readonly baseUrl?: string;
|
|
1180
1189
|
/** Authentication scheme metadata (auth type + env var holding the key). */
|
|
1181
|
-
auth?:
|
|
1190
|
+
readonly auth?: AuthConfig;
|
|
1182
1191
|
/** Supported endpoint kinds (e.g. `chat`, `embeddings`). */
|
|
1183
|
-
endpoints?: Array<string>;
|
|
1192
|
+
readonly endpoints?: Array<string>;
|
|
1184
1193
|
/** Model-name prefixes claimed by this provider (e.g. `["gpt-", "o1-"]`). */
|
|
1185
|
-
modelPrefixes?: Array<string>;
|
|
1194
|
+
readonly modelPrefixes?: Array<string>;
|
|
1186
1195
|
/**
|
|
1187
1196
|
* Parameter key renaming for this provider.
|
|
1188
1197
|
*
|
|
1189
1198
|
* Each entry maps an OpenAI-spec field name (e.g. `"max_completion_tokens"`)
|
|
1190
1199
|
* to the name this provider expects (e.g. `"max_tokens"`). Applied
|
|
1191
|
-
* automatically by `ConfigDrivenProvider.
|
|
1200
|
+
* automatically by [`ConfigDrivenProvider::transform_request`].
|
|
1192
1201
|
*/
|
|
1193
|
-
paramMappings?: Record<string, string>;
|
|
1202
|
+
readonly paramMappings?: Record<string, string>;
|
|
1194
1203
|
}
|
|
1195
1204
|
|
|
1196
1205
|
/** Configuration for per-model rate limits. */
|
|
1197
1206
|
export interface RateLimitConfig {
|
|
1198
1207
|
/** Maximum requests per window. `None` means unlimited. */
|
|
1199
|
-
rpm?: number;
|
|
1208
|
+
readonly rpm?: number;
|
|
1200
1209
|
/** Maximum tokens per window. `None` means unlimited. */
|
|
1201
|
-
tpm?: number;
|
|
1210
|
+
readonly tpm?: number;
|
|
1202
1211
|
/** Fixed window duration (defaults to 60 s). */
|
|
1203
|
-
window?: number;
|
|
1212
|
+
readonly window?: number;
|
|
1204
1213
|
}
|
|
1205
1214
|
|
|
1206
|
-
export declare function rateLimitConfigDefault(): RateLimitConfig;
|
|
1207
|
-
|
|
1208
1215
|
/** Controls how much reasoning effort the model should use. */
|
|
1209
|
-
export declare
|
|
1216
|
+
export declare enum ReasoningEffort {
|
|
1210
1217
|
Low = "low",
|
|
1211
1218
|
Medium = "medium",
|
|
1212
1219
|
High = "high",
|
|
1213
1220
|
}
|
|
1214
1221
|
|
|
1215
|
-
/**
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
*
|
|
1223
|
-
* Returns an error if the config is invalid (empty name, empty base_url, or
|
|
1224
|
-
* no model prefixes).
|
|
1225
|
-
*/
|
|
1226
|
-
export declare function registerCustomProvider(config: CustomProviderConfig): void;
|
|
1222
|
+
/** A document to be reranked — either a plain string or an object with a text field. */
|
|
1223
|
+
export declare enum RerankDocument {
|
|
1224
|
+
/** Plain text document content. */
|
|
1225
|
+
Text = "Text",
|
|
1226
|
+
/** Document with explicit text field (may include metadata). */
|
|
1227
|
+
Object = "Object",
|
|
1228
|
+
}
|
|
1227
1229
|
|
|
1228
1230
|
/** Request to rerank documents by relevance to a query. */
|
|
1229
1231
|
export interface RerankRequest {
|
|
1230
1232
|
/** Model ID (e.g., `"cohere/rerank-english-v3.0"`). */
|
|
1231
|
-
model?: string;
|
|
1233
|
+
readonly model?: string;
|
|
1232
1234
|
/** The search query. */
|
|
1233
|
-
query?: string;
|
|
1235
|
+
readonly query?: string;
|
|
1234
1236
|
/** Documents to rerank. */
|
|
1235
|
-
documents?: Array<
|
|
1237
|
+
readonly documents?: Array<RerankDocument>;
|
|
1236
1238
|
/** Return only the top N results. Optional. */
|
|
1237
|
-
topN?: number;
|
|
1239
|
+
readonly topN?: number;
|
|
1238
1240
|
/** Include the document content in results. Defaults to false. */
|
|
1239
|
-
returnDocuments?: boolean;
|
|
1241
|
+
readonly returnDocuments?: boolean;
|
|
1240
1242
|
}
|
|
1241
1243
|
|
|
1242
1244
|
/** Response from the rerank endpoint. */
|
|
1243
1245
|
export interface RerankResponse {
|
|
1244
1246
|
/** Unique identifier for this rerank request. */
|
|
1245
|
-
id?: string;
|
|
1247
|
+
readonly id?: string;
|
|
1246
1248
|
/** Reranked documents in order of relevance. */
|
|
1247
|
-
results: Array<
|
|
1249
|
+
readonly results: Array<RerankResult>;
|
|
1248
1250
|
/** Optional metadata about the reranking operation. */
|
|
1249
|
-
meta?:
|
|
1251
|
+
readonly meta?: JsonValue;
|
|
1250
1252
|
}
|
|
1251
1253
|
|
|
1252
1254
|
/** A single reranked document with its relevance score. */
|
|
1253
1255
|
export interface RerankResult {
|
|
1254
1256
|
/** Original document index in the input list. */
|
|
1255
|
-
index: number;
|
|
1257
|
+
readonly index: number;
|
|
1256
1258
|
/** Relevance score in `[0, 1]`. Higher indicates more relevant. */
|
|
1257
|
-
relevanceScore: number;
|
|
1259
|
+
readonly relevanceScore: number;
|
|
1258
1260
|
/** Original document content (if `return_documents` was true). */
|
|
1259
|
-
document?:
|
|
1261
|
+
readonly document?: RerankResultDocument;
|
|
1260
1262
|
}
|
|
1261
1263
|
|
|
1262
1264
|
/** The text content of a reranked document, returned when `return_documents` is true. */
|
|
1263
1265
|
export interface RerankResultDocument {
|
|
1264
1266
|
/** Document text. */
|
|
1265
|
-
text: string;
|
|
1267
|
+
readonly text: string;
|
|
1266
1268
|
}
|
|
1267
1269
|
|
|
1268
1270
|
/** Response format constraint. */
|
|
1269
|
-
export
|
|
1270
|
-
type:
|
|
1271
|
-
|
|
1272
|
-
}
|
|
1271
|
+
export type ResponseFormat =
|
|
1272
|
+
| { type: "text" }
|
|
1273
|
+
| { type: "json_object" }
|
|
1274
|
+
| { type: "json_schema"; jsonSchema: JsonSchemaFormat };
|
|
1273
1275
|
|
|
1274
1276
|
/** Response from a structured response request. */
|
|
1275
1277
|
export interface ResponseObject {
|
|
1276
1278
|
/** Unique response ID. */
|
|
1277
|
-
id?: string;
|
|
1279
|
+
readonly id?: string;
|
|
1278
1280
|
/** Object type (e.g., `"response"`). */
|
|
1279
|
-
object?: string;
|
|
1281
|
+
readonly object?: string;
|
|
1280
1282
|
/** Unix timestamp of response creation. */
|
|
1281
|
-
createdAt?: number;
|
|
1283
|
+
readonly createdAt?: number;
|
|
1282
1284
|
/** Model used to generate the response. */
|
|
1283
|
-
model?: string;
|
|
1285
|
+
readonly model?: string;
|
|
1284
1286
|
/** Status (e.g., `"succeeded"`, `"failed"`). */
|
|
1285
|
-
status?: string;
|
|
1287
|
+
readonly status?: string;
|
|
1286
1288
|
/** Output items from the response. */
|
|
1287
|
-
output?: Array<
|
|
1289
|
+
readonly output?: Array<ResponseOutputItem>;
|
|
1288
1290
|
/** Token usage. */
|
|
1289
|
-
usage?:
|
|
1291
|
+
readonly usage?: ResponseUsage;
|
|
1290
1292
|
/** Error details (if status is "failed"). */
|
|
1291
|
-
error?:
|
|
1293
|
+
readonly error?: JsonValue;
|
|
1292
1294
|
}
|
|
1293
1295
|
|
|
1294
1296
|
/** A single output item from the response. */
|
|
1295
1297
|
export interface ResponseOutputItem {
|
|
1296
1298
|
/** Output type (e.g., `"text"`, `"object"`, `"error"`). */
|
|
1297
|
-
|
|
1299
|
+
readonly itemType?: string;
|
|
1298
1300
|
/** Output content (flattened into the object). */
|
|
1299
|
-
content?:
|
|
1301
|
+
readonly content?: JsonValue;
|
|
1300
1302
|
}
|
|
1301
1303
|
|
|
1302
1304
|
/** A tool available for the response request. */
|
|
1303
1305
|
export interface ResponseTool {
|
|
1304
1306
|
/** Tool type (e.g., "extractor", "search"). */
|
|
1305
|
-
|
|
1307
|
+
readonly toolType?: string;
|
|
1306
1308
|
/** Tool configuration (flattened into the object). */
|
|
1307
|
-
config?:
|
|
1309
|
+
readonly config?: JsonValue;
|
|
1308
1310
|
}
|
|
1309
1311
|
|
|
1310
1312
|
/** Token usage for a response. */
|
|
1311
1313
|
export interface ResponseUsage {
|
|
1312
1314
|
/** Input tokens used. */
|
|
1313
|
-
inputTokens?: number;
|
|
1315
|
+
readonly inputTokens?: number;
|
|
1314
1316
|
/** Output tokens used. */
|
|
1315
|
-
outputTokens?: number;
|
|
1317
|
+
readonly outputTokens?: number;
|
|
1316
1318
|
/** Total tokens used. */
|
|
1317
|
-
totalTokens?: number;
|
|
1319
|
+
readonly totalTokens?: number;
|
|
1318
1320
|
}
|
|
1319
1321
|
|
|
1320
1322
|
/** A search request. */
|
|
1321
1323
|
export interface SearchRequest {
|
|
1322
1324
|
/** The model/provider to use (e.g. `"brave/web-search"`, `"tavily/search"`). */
|
|
1323
|
-
model?: string;
|
|
1325
|
+
readonly model?: string;
|
|
1324
1326
|
/** The search query string. */
|
|
1325
|
-
query?: string;
|
|
1327
|
+
readonly query?: string;
|
|
1326
1328
|
/** Maximum number of results to return. */
|
|
1327
|
-
maxResults?: number;
|
|
1329
|
+
readonly maxResults?: number;
|
|
1328
1330
|
/** Domain filter — restrict results to specific domains. */
|
|
1329
|
-
searchDomainFilter?: Array<string>;
|
|
1331
|
+
readonly searchDomainFilter?: Array<string>;
|
|
1330
1332
|
/** Country code for localized results (ISO 3166-1 alpha-2, e.g., `"US"`, `"FR"`). */
|
|
1331
|
-
country?: string;
|
|
1333
|
+
readonly country?: string;
|
|
1332
1334
|
}
|
|
1333
1335
|
|
|
1334
1336
|
/** A search response. */
|
|
1335
1337
|
export interface SearchResponse {
|
|
1336
1338
|
/** List of search results. */
|
|
1337
|
-
results: Array<
|
|
1339
|
+
readonly results: Array<SearchResult>;
|
|
1338
1340
|
/** Model/provider that performed the search. */
|
|
1339
|
-
model: string;
|
|
1341
|
+
readonly model: string;
|
|
1340
1342
|
}
|
|
1341
1343
|
|
|
1342
1344
|
/** An individual search result. */
|
|
1343
1345
|
export interface SearchResult {
|
|
1344
1346
|
/** Result title. */
|
|
1345
|
-
title: string;
|
|
1347
|
+
readonly title: string;
|
|
1346
1348
|
/** Result URL. */
|
|
1347
|
-
url: string;
|
|
1349
|
+
readonly url: string;
|
|
1348
1350
|
/** Text snippet or excerpt from the page. */
|
|
1349
|
-
snippet: string;
|
|
1351
|
+
readonly snippet: string;
|
|
1350
1352
|
/** Publication or last-updated date, if available. */
|
|
1351
|
-
date?: string;
|
|
1353
|
+
readonly date?: string;
|
|
1352
1354
|
}
|
|
1353
1355
|
|
|
1356
|
+
/**
|
|
1357
|
+
* The value broadcast from a singleflight leader to all followers.
|
|
1358
|
+
*
|
|
1359
|
+
* `Arc<LiterLlmError>` is used because `LiterLlmError` is not `Clone` and
|
|
1360
|
+
* broadcast channels require `T: Clone`. The `Arc` adds only a reference-count
|
|
1361
|
+
* bump per follower, which is negligible under the burst loads this layer targets.
|
|
1362
|
+
*/
|
|
1363
|
+
export declare class SingleflightResult {}
|
|
1364
|
+
|
|
1354
1365
|
/** Name of the specific function to invoke. */
|
|
1355
1366
|
export interface SpecificFunction {
|
|
1356
1367
|
/** Function name. */
|
|
1357
|
-
name?: string;
|
|
1368
|
+
readonly name?: string;
|
|
1358
1369
|
}
|
|
1359
1370
|
|
|
1360
1371
|
/** Directive to call a specific tool. */
|
|
1361
1372
|
export interface SpecificToolChoice {
|
|
1362
1373
|
/** Tool type (always "function"). */
|
|
1363
|
-
|
|
1374
|
+
readonly choiceType?: ToolType;
|
|
1364
1375
|
/** The specific function to invoke. */
|
|
1365
|
-
function?:
|
|
1376
|
+
readonly function?: SpecificFunction;
|
|
1377
|
+
}
|
|
1378
|
+
|
|
1379
|
+
/** Stop sequence(s) that cause the model to stop generating. */
|
|
1380
|
+
export declare enum StopSequence {
|
|
1381
|
+
/** Single stop sequence. */
|
|
1382
|
+
Single = "Single",
|
|
1383
|
+
/** Multiple stop sequences. */
|
|
1384
|
+
Multiple = "Multiple",
|
|
1366
1385
|
}
|
|
1367
1386
|
|
|
1368
1387
|
/** A streaming choice with incremental delta. */
|
|
1369
1388
|
export interface StreamChoice {
|
|
1370
1389
|
/** Index of this choice in the choices array. */
|
|
1371
|
-
index?: number;
|
|
1390
|
+
readonly index?: number;
|
|
1372
1391
|
/** Incremental update to the message (content, tool calls, etc.). */
|
|
1373
|
-
delta?:
|
|
1392
|
+
readonly delta?: StreamDelta;
|
|
1374
1393
|
/** Why the stream ended (present only in final chunk). */
|
|
1375
|
-
finishReason?:
|
|
1394
|
+
readonly finishReason?: FinishReason;
|
|
1376
1395
|
}
|
|
1377
1396
|
|
|
1378
1397
|
/** Incremental delta in a stream chunk. */
|
|
1379
1398
|
export interface StreamDelta {
|
|
1380
1399
|
/** Role (typically present only in the first chunk). */
|
|
1381
|
-
role?: string;
|
|
1400
|
+
readonly role?: string;
|
|
1382
1401
|
/** Partial content chunk (e.g., a few words of the response). */
|
|
1383
|
-
content?: string;
|
|
1402
|
+
readonly content?: string;
|
|
1384
1403
|
/** Partial tool calls being streamed. */
|
|
1385
|
-
toolCalls?: Array<
|
|
1404
|
+
readonly toolCalls?: Array<StreamToolCall>;
|
|
1386
1405
|
/** Deprecated legacy function_call delta; retained for API compatibility. */
|
|
1387
|
-
functionCall?:
|
|
1406
|
+
readonly functionCall?: StreamFunctionCall;
|
|
1388
1407
|
/** Partial refusal message. */
|
|
1389
|
-
refusal?: string;
|
|
1408
|
+
readonly refusal?: string;
|
|
1390
1409
|
}
|
|
1391
1410
|
|
|
1392
1411
|
/**
|
|
@@ -1395,9 +1414,9 @@ export interface StreamDelta {
|
|
|
1395
1414
|
* Most providers use standard Server-Sent Events (SSE). AWS Bedrock uses
|
|
1396
1415
|
* a proprietary binary EventStream framing.
|
|
1397
1416
|
*
|
|
1398
|
-
* Deserialized from the `streaming_format` JSON field via `serde
|
|
1417
|
+
* Deserialized from the `streaming_format` JSON field via [`serde`].
|
|
1399
1418
|
*/
|
|
1400
|
-
export declare
|
|
1419
|
+
export declare enum StreamFormat {
|
|
1401
1420
|
/** Standard Server-Sent Events (text/event-stream). */
|
|
1402
1421
|
Sse = "sse",
|
|
1403
1422
|
/** AWS EventStream binary framing (application/vnd.amazon.eventstream). */
|
|
@@ -1407,49 +1426,57 @@ export declare const enum StreamFormat {
|
|
|
1407
1426
|
/** Partial function call details in a stream. */
|
|
1408
1427
|
export interface StreamFunctionCall {
|
|
1409
1428
|
/** Function name (typically in the first chunk). */
|
|
1410
|
-
name?: string;
|
|
1429
|
+
readonly name?: string;
|
|
1411
1430
|
/** Partial JSON arguments chunk. */
|
|
1412
|
-
arguments?: string;
|
|
1431
|
+
readonly arguments?: string;
|
|
1413
1432
|
}
|
|
1414
1433
|
|
|
1415
1434
|
/** Options for streaming responses. */
|
|
1416
1435
|
export interface StreamOptions {
|
|
1417
1436
|
/** If true, include token usage in the final stream chunk. */
|
|
1418
|
-
includeUsage?: boolean;
|
|
1437
|
+
readonly includeUsage?: boolean;
|
|
1419
1438
|
}
|
|
1420
1439
|
|
|
1421
1440
|
/** A streaming tool call being built incrementally. */
|
|
1422
1441
|
export interface StreamToolCall {
|
|
1423
1442
|
/** Index of this tool call in the tool_calls array. */
|
|
1424
|
-
index?: number;
|
|
1443
|
+
readonly index?: number;
|
|
1425
1444
|
/** Tool call ID (typically in the first chunk for this call). */
|
|
1426
|
-
id?: string;
|
|
1445
|
+
readonly id?: string;
|
|
1427
1446
|
/** Tool type (typically "function"). */
|
|
1428
|
-
|
|
1447
|
+
readonly callType?: ToolType;
|
|
1429
1448
|
/** Partial function name and arguments. */
|
|
1430
|
-
function?:
|
|
1449
|
+
readonly function?: StreamFunctionCall;
|
|
1431
1450
|
}
|
|
1432
1451
|
|
|
1433
1452
|
/** System message guiding model behavior for the entire conversation. */
|
|
1434
1453
|
export interface SystemMessage {
|
|
1435
1454
|
/** Instructions or context that apply throughout the conversation. */
|
|
1436
|
-
content?: string;
|
|
1455
|
+
readonly content?: string;
|
|
1437
1456
|
/** Optional name for the system message source. */
|
|
1438
|
-
name?: string;
|
|
1457
|
+
readonly name?: string;
|
|
1439
1458
|
}
|
|
1440
1459
|
|
|
1441
1460
|
/** A tool call the model wants to execute. */
|
|
1442
1461
|
export interface ToolCall {
|
|
1443
1462
|
/** Unique ID for this call, used to reference in tool result messages. */
|
|
1444
|
-
id: string;
|
|
1463
|
+
readonly id: string;
|
|
1445
1464
|
/** Tool type (always "function"). */
|
|
1446
|
-
|
|
1465
|
+
readonly callType: ToolType;
|
|
1447
1466
|
/** Function name and arguments. */
|
|
1448
|
-
function:
|
|
1467
|
+
readonly function: FunctionCall;
|
|
1468
|
+
}
|
|
1469
|
+
|
|
1470
|
+
/** Tool usage mode or a specific tool to call. */
|
|
1471
|
+
export declare enum ToolChoice {
|
|
1472
|
+
/** Predefined mode: auto, required, or none. */
|
|
1473
|
+
Mode = "Mode",
|
|
1474
|
+
/** Force a specific tool to be called. */
|
|
1475
|
+
Specific = "Specific",
|
|
1449
1476
|
}
|
|
1450
1477
|
|
|
1451
1478
|
/** Tool choice mode. */
|
|
1452
|
-
export declare
|
|
1479
|
+
export declare enum ToolChoiceMode {
|
|
1453
1480
|
/** Model may or may not call tools; default behavior. */
|
|
1454
1481
|
Auto = "auto",
|
|
1455
1482
|
/** Model must call at least one tool. */
|
|
@@ -1461,11 +1488,11 @@ export declare const enum ToolChoiceMode {
|
|
|
1461
1488
|
/** Tool execution result returned to the model. */
|
|
1462
1489
|
export interface ToolMessage {
|
|
1463
1490
|
/** Result of the tool execution. */
|
|
1464
|
-
content?: string;
|
|
1491
|
+
readonly content?: string;
|
|
1465
1492
|
/** ID of the tool call this result responds to. */
|
|
1466
|
-
toolCallId?: string;
|
|
1493
|
+
readonly toolCallId?: string;
|
|
1467
1494
|
/** Optional tool/function name. */
|
|
1468
|
-
name?: string;
|
|
1495
|
+
readonly name?: string;
|
|
1469
1496
|
}
|
|
1470
1497
|
|
|
1471
1498
|
/**
|
|
@@ -1475,68 +1502,64 @@ export interface ToolMessage {
|
|
|
1475
1502
|
* that constraint at the type level and rejects any other value on
|
|
1476
1503
|
* deserialization.
|
|
1477
1504
|
*/
|
|
1478
|
-
export declare
|
|
1505
|
+
export declare enum ToolType {
|
|
1479
1506
|
Function = "function",
|
|
1480
1507
|
}
|
|
1481
1508
|
|
|
1482
1509
|
/** Response from a transcription request. */
|
|
1483
1510
|
export interface TranscriptionResponse {
|
|
1484
1511
|
/** The transcribed text. */
|
|
1485
|
-
text?: string;
|
|
1512
|
+
readonly text?: string;
|
|
1486
1513
|
/** Detected language (ISO-639-1 code). */
|
|
1487
|
-
language?: string;
|
|
1514
|
+
readonly language?: string;
|
|
1488
1515
|
/** Total audio duration in seconds. */
|
|
1489
|
-
duration?: number;
|
|
1516
|
+
readonly duration?: number;
|
|
1490
1517
|
/** Detailed segment-level transcription (if response_format is "verbose_json"). */
|
|
1491
|
-
segments?: Array<
|
|
1518
|
+
readonly segments?: Array<TranscriptionSegment>;
|
|
1492
1519
|
}
|
|
1493
1520
|
|
|
1494
1521
|
/** A segment of transcribed audio with timing information. */
|
|
1495
1522
|
export interface TranscriptionSegment {
|
|
1496
1523
|
/** Segment index (0-based). */
|
|
1497
|
-
id?: number;
|
|
1524
|
+
readonly id?: number;
|
|
1498
1525
|
/** Start time in seconds. */
|
|
1499
|
-
start?: number;
|
|
1526
|
+
readonly start?: number;
|
|
1500
1527
|
/** End time in seconds. */
|
|
1501
|
-
end?: number;
|
|
1528
|
+
readonly end?: number;
|
|
1502
1529
|
/** Transcribed text for this segment. */
|
|
1503
|
-
text?: string;
|
|
1530
|
+
readonly text?: string;
|
|
1504
1531
|
}
|
|
1505
1532
|
|
|
1506
|
-
/**
|
|
1507
|
-
* Remove a previously registered custom provider by name.
|
|
1508
|
-
*
|
|
1509
|
-
* Returns `true` if a provider with the given name was found and removed,
|
|
1510
|
-
* `false` if no such provider existed.
|
|
1511
|
-
*
|
|
1512
|
-
* # Errors
|
|
1513
|
-
*
|
|
1514
|
-
* Returns an error only if the internal lock is poisoned.
|
|
1515
|
-
*/
|
|
1516
|
-
export declare function unregisterCustomProvider(name: string): boolean;
|
|
1517
|
-
|
|
1518
1533
|
/** Token-usage accounting returned by the provider on each completion / embedding call. */
|
|
1519
1534
|
export interface Usage {
|
|
1520
1535
|
/** Prompt tokens used. Defaults to 0 when absent (some providers omit this). */
|
|
1521
|
-
promptTokens?: number;
|
|
1536
|
+
readonly promptTokens?: number;
|
|
1522
1537
|
/** Completion tokens used. Defaults to 0 when absent (e.g. embedding responses). */
|
|
1523
|
-
completionTokens?: number;
|
|
1538
|
+
readonly completionTokens?: number;
|
|
1524
1539
|
/** Total tokens used. Defaults to 0 when absent (some providers omit this). */
|
|
1525
|
-
totalTokens?: number;
|
|
1540
|
+
readonly totalTokens?: number;
|
|
1526
1541
|
/**
|
|
1527
1542
|
* Breakdown of tokens used in the prompt, including cached tokens served
|
|
1528
1543
|
* at the provider's discounted cache-read rate. Absent when the provider
|
|
1529
1544
|
* does not return prompt-token details.
|
|
1530
1545
|
*/
|
|
1531
|
-
promptTokensDetails?:
|
|
1546
|
+
readonly promptTokensDetails?: PromptTokensDetails;
|
|
1547
|
+
}
|
|
1548
|
+
|
|
1549
|
+
/** User message content as either plain text or a list of multimodal parts. */
|
|
1550
|
+
export declare enum UserContent {
|
|
1551
|
+
/** Plain text content. */
|
|
1552
|
+
Text = "Text",
|
|
1553
|
+
/** Array of content parts (text, images, documents, audio). */
|
|
1554
|
+
Parts = "Parts",
|
|
1532
1555
|
}
|
|
1533
1556
|
|
|
1534
1557
|
/** User message in the conversation. */
|
|
1535
1558
|
export interface UserMessage {
|
|
1536
1559
|
/** Message content as plain text or array of content parts (text, images, documents, audio). */
|
|
1537
|
-
content?:
|
|
1560
|
+
readonly content?: UserContent;
|
|
1538
1561
|
/** Optional name for the user. */
|
|
1539
|
-
name?: string;
|
|
1562
|
+
readonly name?: string;
|
|
1540
1563
|
}
|
|
1541
1564
|
|
|
1542
1565
|
/**
|
|
@@ -1547,13 +1570,41 @@ export interface UserMessage {
|
|
|
1547
1570
|
*/
|
|
1548
1571
|
export interface WaitForBatchConfig {
|
|
1549
1572
|
/** Initial interval between polls, in seconds. */
|
|
1550
|
-
initialIntervalSecs?: number;
|
|
1573
|
+
readonly initialIntervalSecs?: number;
|
|
1551
1574
|
/** Maximum interval between polls (backoff plateau), in seconds. */
|
|
1552
|
-
maxIntervalSecs?: number;
|
|
1575
|
+
readonly maxIntervalSecs?: number;
|
|
1553
1576
|
/** Exponential backoff multiplier (e.g., 1.5 increases delay by 50% each poll). */
|
|
1554
|
-
backoffMultiplier?: number;
|
|
1577
|
+
readonly backoffMultiplier?: number;
|
|
1555
1578
|
/** Optional timeout in seconds — polling fails if this duration is exceeded. */
|
|
1556
|
-
timeoutSecs?: number;
|
|
1579
|
+
readonly timeoutSecs?: number;
|
|
1580
|
+
}
|
|
1581
|
+
|
|
1582
|
+
/**
|
|
1583
|
+
* Register a custom provider in the global runtime registry.
|
|
1584
|
+
*
|
|
1585
|
+
* The provider will be checked **before** all built-in providers during model
|
|
1586
|
+
* detection. If a provider with the same `name` already exists it is replaced.
|
|
1587
|
+
* @throws Returns an error if the config is invalid (empty name, empty base_url, or
|
|
1588
|
+
* no model prefixes).
|
|
1589
|
+
*/
|
|
1590
|
+
export declare function registerCustomProvider(config: CustomProviderConfig): void;
|
|
1591
|
+
|
|
1592
|
+
/**
|
|
1593
|
+
* Remove a previously registered custom provider by name.
|
|
1594
|
+
*
|
|
1595
|
+
* Returns `true` if a provider with the given name was found and removed,
|
|
1596
|
+
* `false` if no such provider existed.
|
|
1597
|
+
* @throws Returns an error only if the internal lock is poisoned.
|
|
1598
|
+
*/
|
|
1599
|
+
export declare function unregisterCustomProvider(name: string): boolean;
|
|
1600
|
+
|
|
1601
|
+
export declare class ChatStreamIterator {
|
|
1602
|
+
next(value?: undefined): Promise<IteratorResult<ChatCompletionChunk, void>>;
|
|
1603
|
+
[Symbol.asyncIterator](): AsyncGenerator<ChatCompletionChunk, void, undefined>;
|
|
1557
1604
|
}
|
|
1558
1605
|
|
|
1559
|
-
export declare
|
|
1606
|
+
export declare class LiterLlmErrorInfo {
|
|
1607
|
+
statusCode(): number;
|
|
1608
|
+
isTransient(): boolean;
|
|
1609
|
+
errorType(): string;
|
|
1610
|
+
}
|