@happyvertical/ai 0.74.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENT.md +33 -0
- package/LICENSE +7 -0
- package/README.md +384 -0
- package/dist/chunks/anthropic-BRwbhwIl.js +463 -0
- package/dist/chunks/anthropic-BRwbhwIl.js.map +1 -0
- package/dist/chunks/bedrock-Cf1xUerN.js +808 -0
- package/dist/chunks/bedrock-Cf1xUerN.js.map +1 -0
- package/dist/chunks/bifrost-3mXtQsTj.js +233 -0
- package/dist/chunks/bifrost-3mXtQsTj.js.map +1 -0
- package/dist/chunks/claude-cli-BrHRfkry.js +603 -0
- package/dist/chunks/claude-cli-BrHRfkry.js.map +1 -0
- package/dist/chunks/gateway-admin-C4GFPbZF.js +359 -0
- package/dist/chunks/gateway-admin-C4GFPbZF.js.map +1 -0
- package/dist/chunks/gemini-BfpHXDIQ.js +662 -0
- package/dist/chunks/gemini-BfpHXDIQ.js.map +1 -0
- package/dist/chunks/huggingface-280qv9iv.js +366 -0
- package/dist/chunks/huggingface-280qv9iv.js.map +1 -0
- package/dist/chunks/index-BT4thAvS.js +934 -0
- package/dist/chunks/index-BT4thAvS.js.map +1 -0
- package/dist/chunks/litellm-DhPKa_Jz.js +220 -0
- package/dist/chunks/litellm-DhPKa_Jz.js.map +1 -0
- package/dist/chunks/ollama-Di1ldur0.js +851 -0
- package/dist/chunks/ollama-Di1ldur0.js.map +1 -0
- package/dist/chunks/openai-5snI2diE.js +749 -0
- package/dist/chunks/openai-5snI2diE.js.map +1 -0
- package/dist/chunks/qwen-tts-DgPgdXxG.js +365 -0
- package/dist/chunks/qwen-tts-DgPgdXxG.js.map +1 -0
- package/dist/chunks/usage-DMWiJ2oB.js +21 -0
- package/dist/chunks/usage-DMWiJ2oB.js.map +1 -0
- package/dist/cli/claude-context.d.ts +3 -0
- package/dist/cli/claude-context.d.ts.map +1 -0
- package/dist/cli/claude-context.js +21 -0
- package/dist/cli/claude-context.js.map +1 -0
- package/dist/index.d.ts +20 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +21 -0
- package/dist/index.js.map +1 -0
- package/dist/node/factory.d.ts +27 -0
- package/dist/node/factory.d.ts.map +1 -0
- package/dist/shared/client.d.ts +410 -0
- package/dist/shared/client.d.ts.map +1 -0
- package/dist/shared/factory.d.ts +83 -0
- package/dist/shared/factory.d.ts.map +1 -0
- package/dist/shared/message.d.ts +71 -0
- package/dist/shared/message.d.ts.map +1 -0
- package/dist/shared/providers/anthropic.d.ts +82 -0
- package/dist/shared/providers/anthropic.d.ts.map +1 -0
- package/dist/shared/providers/bedrock.d.ts +49 -0
- package/dist/shared/providers/bedrock.d.ts.map +1 -0
- package/dist/shared/providers/bifrost.d.ts +25 -0
- package/dist/shared/providers/bifrost.d.ts.map +1 -0
- package/dist/shared/providers/claude-cli.d.ts +139 -0
- package/dist/shared/providers/claude-cli.d.ts.map +1 -0
- package/dist/shared/providers/gateway-admin.d.ts +35 -0
- package/dist/shared/providers/gateway-admin.d.ts.map +1 -0
- package/dist/shared/providers/gemini.d.ts +116 -0
- package/dist/shared/providers/gemini.d.ts.map +1 -0
- package/dist/shared/providers/huggingface.d.ts +33 -0
- package/dist/shared/providers/huggingface.d.ts.map +1 -0
- package/dist/shared/providers/litellm.d.ts +25 -0
- package/dist/shared/providers/litellm.d.ts.map +1 -0
- package/dist/shared/providers/ollama.d.ts +47 -0
- package/dist/shared/providers/ollama.d.ts.map +1 -0
- package/dist/shared/providers/openai.d.ts +272 -0
- package/dist/shared/providers/openai.d.ts.map +1 -0
- package/dist/shared/providers/qwen-tts.d.ts +85 -0
- package/dist/shared/providers/qwen-tts.d.ts.map +1 -0
- package/dist/shared/providers/usage.d.ts +14 -0
- package/dist/shared/providers/usage.d.ts.map +1 -0
- package/dist/shared/rate-limit.d.ts +13 -0
- package/dist/shared/rate-limit.d.ts.map +1 -0
- package/dist/shared/thread.d.ts +104 -0
- package/dist/shared/thread.d.ts.map +1 -0
- package/dist/shared/types.d.ts +1779 -0
- package/dist/shared/types.d.ts.map +1 -0
- package/metadata.json +35 -0
- package/package.json +62 -0
|
@@ -0,0 +1,1779 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Core types and interfaces for the AI library
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Thinking level options for Gemini 3 models
|
|
6
|
+
* Controls the amount of internal reasoning the model performs
|
|
7
|
+
*/
|
|
8
|
+
export type GeminiThinkingLevel = 'minimal' | 'low' | 'medium' | 'high';
|
|
9
|
+
/**
|
|
10
|
+
* Supported AI provider types
|
|
11
|
+
*/
|
|
12
|
+
export declare const AI_PROVIDER_TYPES: readonly ["openai", "litellm", "bifrost", "ollama", "gemini", "anthropic", "huggingface", "bedrock", "claude-cli", "qwen3-tts"];
|
|
13
|
+
/**
|
|
14
|
+
* Supported AI provider type union
|
|
15
|
+
*/
|
|
16
|
+
export type AIProviderType = (typeof AI_PROVIDER_TYPES)[number];
|
|
17
|
+
/**
|
|
18
|
+
* Text content part for multimodal messages
|
|
19
|
+
*/
|
|
20
|
+
export interface TextContentPart {
|
|
21
|
+
type: 'text';
|
|
22
|
+
text: string;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Image content part for vision-capable models
|
|
26
|
+
*/
|
|
27
|
+
export interface ImageContentPart {
|
|
28
|
+
type: 'image_url';
|
|
29
|
+
image_url: {
|
|
30
|
+
/** Image URL (http/https) or base64 data URL */
|
|
31
|
+
url: string;
|
|
32
|
+
/** Image detail level for processing */
|
|
33
|
+
detail?: 'auto' | 'low' | 'high';
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Union type for all content parts in multimodal messages
|
|
38
|
+
*/
|
|
39
|
+
export type ContentPart = TextContentPart | ImageContentPart;
|
|
40
|
+
/**
|
|
41
|
+
* Extract text content from a message content field.
|
|
42
|
+
*
|
|
43
|
+
* Handles both simple string content and multimodal content arrays,
|
|
44
|
+
* extracting only the text parts and concatenating them.
|
|
45
|
+
*
|
|
46
|
+
* @param content - The message content (string or ContentPart array)
|
|
47
|
+
* @returns The extracted text content
|
|
48
|
+
*/
|
|
49
|
+
export declare function extractTextContent(content: string | ContentPart[]): string;
|
|
50
|
+
/**
|
|
51
|
+
* AI message structure for chat interactions
|
|
52
|
+
*
|
|
53
|
+
* Supports both simple string content and multimodal content arrays
|
|
54
|
+
* for vision-capable models.
|
|
55
|
+
*
|
|
56
|
+
* @example Simple text message
|
|
57
|
+
* ```typescript
|
|
58
|
+
* const message: AIMessage = {
|
|
59
|
+
* role: 'user',
|
|
60
|
+
* content: 'Hello, how are you?'
|
|
61
|
+
* };
|
|
62
|
+
* ```
|
|
63
|
+
*
|
|
64
|
+
* @example Multimodal message with image
|
|
65
|
+
* ```typescript
|
|
66
|
+
* const message: AIMessage = {
|
|
67
|
+
* role: 'user',
|
|
68
|
+
* content: [
|
|
69
|
+
* { type: 'text', text: 'What is in this image?' },
|
|
70
|
+
* { type: 'image_url', image_url: { url: 'data:image/png;base64,...' } }
|
|
71
|
+
* ]
|
|
72
|
+
* };
|
|
73
|
+
* ```
|
|
74
|
+
*/
|
|
75
|
+
export interface AIMessage {
|
|
76
|
+
/**
|
|
77
|
+
* Role of the message sender
|
|
78
|
+
*/
|
|
79
|
+
role: 'system' | 'user' | 'assistant' | 'function' | 'tool';
|
|
80
|
+
/**
|
|
81
|
+
* Content of the message.
|
|
82
|
+
*
|
|
83
|
+
* Can be a simple string for text-only messages, or an array of content parts
|
|
84
|
+
* for multimodal messages (e.g., text + images for vision models).
|
|
85
|
+
*/
|
|
86
|
+
content: string | ContentPart[];
|
|
87
|
+
/**
|
|
88
|
+
* Optional name for the message sender
|
|
89
|
+
*/
|
|
90
|
+
name?: string;
|
|
91
|
+
/**
|
|
92
|
+
* Optional tool calls
|
|
93
|
+
*/
|
|
94
|
+
tool_calls?: Array<{
|
|
95
|
+
id: string;
|
|
96
|
+
type: 'function';
|
|
97
|
+
function: {
|
|
98
|
+
name: string;
|
|
99
|
+
arguments: string;
|
|
100
|
+
};
|
|
101
|
+
}>;
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* Options for chat completion requests
|
|
105
|
+
*/
|
|
106
|
+
export interface ChatOptions {
|
|
107
|
+
/**
|
|
108
|
+
* Model to use for completion
|
|
109
|
+
*/
|
|
110
|
+
model?: string;
|
|
111
|
+
/**
|
|
112
|
+
* Maximum number of tokens to generate
|
|
113
|
+
*/
|
|
114
|
+
maxTokens?: number;
|
|
115
|
+
/**
|
|
116
|
+
* Sampling temperature (0-2)
|
|
117
|
+
*/
|
|
118
|
+
temperature?: number;
|
|
119
|
+
/**
|
|
120
|
+
* Top-p sampling parameter
|
|
121
|
+
*/
|
|
122
|
+
topP?: number;
|
|
123
|
+
/**
|
|
124
|
+
* Number of completions to generate
|
|
125
|
+
*/
|
|
126
|
+
n?: number;
|
|
127
|
+
/**
|
|
128
|
+
* Sequences that stop generation
|
|
129
|
+
*/
|
|
130
|
+
stop?: string | string[];
|
|
131
|
+
/**
|
|
132
|
+
* Whether to stream the response
|
|
133
|
+
*/
|
|
134
|
+
stream?: boolean;
|
|
135
|
+
/**
|
|
136
|
+
* Penalty for frequency of tokens
|
|
137
|
+
*/
|
|
138
|
+
frequencyPenalty?: number;
|
|
139
|
+
/**
|
|
140
|
+
* Penalty for presence of tokens
|
|
141
|
+
*/
|
|
142
|
+
presencePenalty?: number;
|
|
143
|
+
/**
|
|
144
|
+
* User identifier for monitoring
|
|
145
|
+
*/
|
|
146
|
+
user?: string;
|
|
147
|
+
/**
|
|
148
|
+
* Available tools/functions
|
|
149
|
+
*/
|
|
150
|
+
tools?: AITool[];
|
|
151
|
+
/**
|
|
152
|
+
* Tool choice behavior
|
|
153
|
+
*/
|
|
154
|
+
toolChoice?: 'auto' | 'none' | {
|
|
155
|
+
type: 'function';
|
|
156
|
+
function: {
|
|
157
|
+
name: string;
|
|
158
|
+
};
|
|
159
|
+
};
|
|
160
|
+
/**
|
|
161
|
+
* Response format specification
|
|
162
|
+
*/
|
|
163
|
+
responseFormat?: {
|
|
164
|
+
type: 'text' | 'json_object';
|
|
165
|
+
};
|
|
166
|
+
/**
|
|
167
|
+
* Random seed for deterministic results
|
|
168
|
+
*/
|
|
169
|
+
seed?: number;
|
|
170
|
+
/**
|
|
171
|
+
* Callback for streaming responses
|
|
172
|
+
*/
|
|
173
|
+
onProgress?: (chunk: string) => void;
|
|
174
|
+
/**
|
|
175
|
+
* Thinking level for providers that expose reasoning controls.
|
|
176
|
+
* Gemini 3 models use named levels:
|
|
177
|
+
* - 'minimal': No thinking for most queries (Gemini 3 Flash only)
|
|
178
|
+
* - 'low': Minimizes latency and cost, good for simple tasks
|
|
179
|
+
* - 'medium': Balanced thinking for most tasks (Gemini 3 Flash only)
|
|
180
|
+
* - 'high': Maximizes reasoning depth (default for Gemini 3)
|
|
181
|
+
*
|
|
182
|
+
* Ollama also accepts `false` to explicitly disable visible/internal thinking
|
|
183
|
+
* for models that support it.
|
|
184
|
+
*/
|
|
185
|
+
thinkingLevel?: GeminiThinkingLevel | false;
|
|
186
|
+
/**
|
|
187
|
+
* Whether to include the model's internal thoughts in the response
|
|
188
|
+
* Only applicable for Gemini 3 models with thinking enabled
|
|
189
|
+
*/
|
|
190
|
+
includeThoughts?: boolean;
|
|
191
|
+
/**
|
|
192
|
+
* Custom tags to attach to the usage event for this call.
|
|
193
|
+
* Merged over any global `usageTags` from provider options.
|
|
194
|
+
*/
|
|
195
|
+
usageTags?: Record<string, string>;
|
|
196
|
+
}
|
|
197
|
+
/**
|
|
198
|
+
* Options for text completion requests (non-chat models)
|
|
199
|
+
*/
|
|
200
|
+
export interface CompletionOptions {
|
|
201
|
+
/**
|
|
202
|
+
* Model to use for completion
|
|
203
|
+
*/
|
|
204
|
+
model?: string;
|
|
205
|
+
/**
|
|
206
|
+
* Maximum number of tokens to generate
|
|
207
|
+
*/
|
|
208
|
+
maxTokens?: number;
|
|
209
|
+
/**
|
|
210
|
+
* Sampling temperature
|
|
211
|
+
*/
|
|
212
|
+
temperature?: number;
|
|
213
|
+
/**
|
|
214
|
+
* Top-p sampling parameter
|
|
215
|
+
*/
|
|
216
|
+
topP?: number;
|
|
217
|
+
/**
|
|
218
|
+
* Number of completions to generate
|
|
219
|
+
*/
|
|
220
|
+
n?: number;
|
|
221
|
+
/**
|
|
222
|
+
* Sequences that stop generation
|
|
223
|
+
*/
|
|
224
|
+
stop?: string | string[];
|
|
225
|
+
/**
|
|
226
|
+
* Whether to stream the response
|
|
227
|
+
*/
|
|
228
|
+
stream?: boolean;
|
|
229
|
+
/**
|
|
230
|
+
* Callback for streaming responses
|
|
231
|
+
*/
|
|
232
|
+
onProgress?: (chunk: string) => void;
|
|
233
|
+
/**
|
|
234
|
+
* Custom tags to attach to the usage event for this call.
|
|
235
|
+
* Merged over any global `usageTags` from provider options.
|
|
236
|
+
*/
|
|
237
|
+
usageTags?: Record<string, string>;
|
|
238
|
+
}
|
|
239
|
+
/**
|
|
240
|
+
* Options for embedding generation
|
|
241
|
+
*/
|
|
242
|
+
export interface EmbeddingOptions {
|
|
243
|
+
/**
|
|
244
|
+
* Model to use for embeddings
|
|
245
|
+
*/
|
|
246
|
+
model?: string;
|
|
247
|
+
/**
|
|
248
|
+
* User identifier for monitoring
|
|
249
|
+
*/
|
|
250
|
+
user?: string;
|
|
251
|
+
/**
|
|
252
|
+
* Encoding format for embeddings
|
|
253
|
+
*/
|
|
254
|
+
encodingFormat?: 'float' | 'base64';
|
|
255
|
+
/**
|
|
256
|
+
* Number of dimensions for the embedding
|
|
257
|
+
*/
|
|
258
|
+
dimensions?: number;
|
|
259
|
+
/**
|
|
260
|
+
* Custom tags to attach to the usage event for this call.
|
|
261
|
+
* Merged over any global `usageTags` from provider options.
|
|
262
|
+
*/
|
|
263
|
+
usageTags?: Record<string, string>;
|
|
264
|
+
}
|
|
265
|
+
/**
|
|
266
|
+
* Options for image embedding generation
|
|
267
|
+
*/
|
|
268
|
+
export interface ImageEmbeddingOptions {
|
|
269
|
+
/**
|
|
270
|
+
* Model to use for image embeddings
|
|
271
|
+
* - Gemini: 'multimodalembedding@001' or similar
|
|
272
|
+
* - OpenAI: Uses describe-then-embed with text-embedding-3-small
|
|
273
|
+
*/
|
|
274
|
+
model?: string;
|
|
275
|
+
/**
|
|
276
|
+
* Number of dimensions for the embedding output
|
|
277
|
+
*/
|
|
278
|
+
dimensions?: number;
|
|
279
|
+
/**
|
|
280
|
+
* User identifier for monitoring
|
|
281
|
+
*/
|
|
282
|
+
user?: string;
|
|
283
|
+
}
|
|
284
|
+
/**
|
|
285
|
+
* Options for image description generation
|
|
286
|
+
*/
|
|
287
|
+
export interface ImageDescriptionOptions {
|
|
288
|
+
/**
|
|
289
|
+
* Model to use for image description
|
|
290
|
+
* - OpenAI: defaults to 'gpt-4o'
|
|
291
|
+
* - Gemini: defaults to 'gemini-2.5-flash'
|
|
292
|
+
*/
|
|
293
|
+
model?: string;
|
|
294
|
+
/**
|
|
295
|
+
* Maximum tokens for the description
|
|
296
|
+
*/
|
|
297
|
+
maxTokens?: number;
|
|
298
|
+
/**
|
|
299
|
+
* Detail level for image processing (OpenAI-specific)
|
|
300
|
+
*/
|
|
301
|
+
detail?: 'auto' | 'low' | 'high';
|
|
302
|
+
}
|
|
303
|
+
/**
|
|
304
|
+
* Options for image generation
|
|
305
|
+
*/
|
|
306
|
+
export interface ImageGenerationOptions {
|
|
307
|
+
/**
|
|
308
|
+
* Model to use for image generation
|
|
309
|
+
* - OpenAI: 'dall-e-3' (default), 'dall-e-2'
|
|
310
|
+
* - Gemini: 'imagen-3.0-generate-002' (default)
|
|
311
|
+
*/
|
|
312
|
+
model?: string;
|
|
313
|
+
/**
|
|
314
|
+
* Input image for image-to-image workflows
|
|
315
|
+
* Can be a URL (http/https), base64 data URL, or Buffer
|
|
316
|
+
*/
|
|
317
|
+
imageInput?: string | Buffer;
|
|
318
|
+
/**
|
|
319
|
+
* Aspect ratio for the generated image
|
|
320
|
+
* e.g., "16:9", "1:1", "4:3", "3:4", "9:16"
|
|
321
|
+
*/
|
|
322
|
+
aspectRatio?: string;
|
|
323
|
+
/**
|
|
324
|
+
* Output format for the generated image
|
|
325
|
+
* - 'buffer': Returns raw image bytes (default)
|
|
326
|
+
* - 'base64': Returns base64-encoded string
|
|
327
|
+
* - 'url': Returns temporary URL (provider-dependent, may expire)
|
|
328
|
+
*/
|
|
329
|
+
outputFormat?: 'buffer' | 'base64' | 'url';
|
|
330
|
+
/**
|
|
331
|
+
* Number of images to generate (provider-dependent)
|
|
332
|
+
* - DALL-E 3: Only 1 supported
|
|
333
|
+
* - Imagen 3: 1-4 supported
|
|
334
|
+
*/
|
|
335
|
+
n?: number;
|
|
336
|
+
/**
|
|
337
|
+
* Image style (OpenAI DALL-E 3 specific)
|
|
338
|
+
*/
|
|
339
|
+
style?: 'vivid' | 'natural';
|
|
340
|
+
/**
|
|
341
|
+
* Quality setting
|
|
342
|
+
* - OpenAI: 'standard' | 'hd'
|
|
343
|
+
*/
|
|
344
|
+
quality?: string;
|
|
345
|
+
/**
|
|
346
|
+
* Size specification (for providers that use fixed sizes)
|
|
347
|
+
* - OpenAI DALL-E 3: '1024x1024' | '1792x1024' | '1024x1792'
|
|
348
|
+
*/
|
|
349
|
+
size?: string;
|
|
350
|
+
}
|
|
351
|
+
/**
|
|
352
|
+
* Response from image generation
|
|
353
|
+
*/
|
|
354
|
+
export interface ImageGenerationResponse {
|
|
355
|
+
/**
|
|
356
|
+
* Generated image(s) - format depends on outputFormat option
|
|
357
|
+
*/
|
|
358
|
+
images: Array<{
|
|
359
|
+
/**
|
|
360
|
+
* Image data - Buffer for 'buffer' format, string for 'base64' or 'url'
|
|
361
|
+
*/
|
|
362
|
+
data: Buffer | string;
|
|
363
|
+
/**
|
|
364
|
+
* MIME type of the image (e.g., 'image/png', 'image/jpeg')
|
|
365
|
+
*/
|
|
366
|
+
mimeType: string;
|
|
367
|
+
/**
|
|
368
|
+
* Revised prompt (if provider modified the original)
|
|
369
|
+
*/
|
|
370
|
+
revisedPrompt?: string;
|
|
371
|
+
}>;
|
|
372
|
+
/**
|
|
373
|
+
* Model used for generation
|
|
374
|
+
*/
|
|
375
|
+
model?: string;
|
|
376
|
+
}
|
|
377
|
+
/**
|
|
378
|
+
* Options for simple message requests (convenience method)
|
|
379
|
+
* This provides a simpler interface than chat() for single-turn interactions
|
|
380
|
+
*/
|
|
381
|
+
export interface MessageOptions {
|
|
382
|
+
/**
|
|
383
|
+
* Model to use for completion
|
|
384
|
+
*/
|
|
385
|
+
model?: string;
|
|
386
|
+
/**
|
|
387
|
+
* Role of the message sender (default: 'user')
|
|
388
|
+
*/
|
|
389
|
+
role?: 'user' | 'assistant' | 'system';
|
|
390
|
+
/**
|
|
391
|
+
* Conversation history (previous messages)
|
|
392
|
+
*/
|
|
393
|
+
history?: AIMessage[];
|
|
394
|
+
/**
|
|
395
|
+
* Maximum number of tokens to generate
|
|
396
|
+
*/
|
|
397
|
+
maxTokens?: number;
|
|
398
|
+
/**
|
|
399
|
+
* Sampling temperature (0-2)
|
|
400
|
+
*/
|
|
401
|
+
temperature?: number;
|
|
402
|
+
/**
|
|
403
|
+
* Top-p sampling parameter
|
|
404
|
+
*/
|
|
405
|
+
topP?: number;
|
|
406
|
+
/**
|
|
407
|
+
* Sequences that stop generation
|
|
408
|
+
*/
|
|
409
|
+
stop?: string | string[];
|
|
410
|
+
/**
|
|
411
|
+
* Whether to stream the response
|
|
412
|
+
*/
|
|
413
|
+
stream?: boolean;
|
|
414
|
+
/**
|
|
415
|
+
* Penalty for frequency of tokens
|
|
416
|
+
*/
|
|
417
|
+
frequencyPenalty?: number;
|
|
418
|
+
/**
|
|
419
|
+
* Penalty for presence of tokens
|
|
420
|
+
*/
|
|
421
|
+
presencePenalty?: number;
|
|
422
|
+
/**
|
|
423
|
+
* Response format specification
|
|
424
|
+
*/
|
|
425
|
+
responseFormat?: {
|
|
426
|
+
type: 'text' | 'json_object';
|
|
427
|
+
};
|
|
428
|
+
/**
|
|
429
|
+
* Random seed for deterministic results
|
|
430
|
+
*/
|
|
431
|
+
seed?: number;
|
|
432
|
+
/**
|
|
433
|
+
* Available tools/functions
|
|
434
|
+
*/
|
|
435
|
+
tools?: AITool[];
|
|
436
|
+
/**
|
|
437
|
+
* Tool choice behavior
|
|
438
|
+
*/
|
|
439
|
+
toolChoice?: 'auto' | 'none' | {
|
|
440
|
+
type: 'function';
|
|
441
|
+
function: {
|
|
442
|
+
name: string;
|
|
443
|
+
};
|
|
444
|
+
};
|
|
445
|
+
/**
|
|
446
|
+
* Callback for streaming responses
|
|
447
|
+
*/
|
|
448
|
+
onProgress?: (chunk: string) => void;
|
|
449
|
+
/**
|
|
450
|
+
* Custom tags to attach to the usage event for this call.
|
|
451
|
+
* Merged over any global `usageTags` from provider options.
|
|
452
|
+
*/
|
|
453
|
+
usageTags?: Record<string, string>;
|
|
454
|
+
}
|
|
455
|
+
/**
|
|
456
|
+
* Tool/function definition for AI models
|
|
457
|
+
*/
|
|
458
|
+
export interface AITool {
|
|
459
|
+
/**
|
|
460
|
+
* Type of tool
|
|
461
|
+
*/
|
|
462
|
+
type: 'function';
|
|
463
|
+
/**
|
|
464
|
+
* Function definition
|
|
465
|
+
*/
|
|
466
|
+
function: {
|
|
467
|
+
/**
|
|
468
|
+
* Function name
|
|
469
|
+
*/
|
|
470
|
+
name: string;
|
|
471
|
+
/**
|
|
472
|
+
* Function description
|
|
473
|
+
*/
|
|
474
|
+
description?: string;
|
|
475
|
+
/**
|
|
476
|
+
* JSON schema for function parameters
|
|
477
|
+
*/
|
|
478
|
+
parameters?: Record<string, any>;
|
|
479
|
+
};
|
|
480
|
+
}
|
|
481
|
+
/**
|
|
482
|
+
* Model information structure
|
|
483
|
+
*/
|
|
484
|
+
export interface AIModel {
|
|
485
|
+
/**
|
|
486
|
+
* Model identifier
|
|
487
|
+
*/
|
|
488
|
+
id: string;
|
|
489
|
+
/**
|
|
490
|
+
* Human-readable model name
|
|
491
|
+
*/
|
|
492
|
+
name: string;
|
|
493
|
+
/**
|
|
494
|
+
* Model description
|
|
495
|
+
*/
|
|
496
|
+
description?: string;
|
|
497
|
+
/**
|
|
498
|
+
* Maximum context length in tokens
|
|
499
|
+
*/
|
|
500
|
+
contextLength: number;
|
|
501
|
+
/**
|
|
502
|
+
* Supported capabilities
|
|
503
|
+
*/
|
|
504
|
+
capabilities: string[];
|
|
505
|
+
/**
|
|
506
|
+
* Whether the model supports function calling
|
|
507
|
+
*/
|
|
508
|
+
supportsFunctions: boolean;
|
|
509
|
+
/**
|
|
510
|
+
* Whether the model supports vision/multimodal input
|
|
511
|
+
*/
|
|
512
|
+
supportsVision: boolean;
|
|
513
|
+
/**
|
|
514
|
+
* Cost per input token (if available)
|
|
515
|
+
*/
|
|
516
|
+
inputCostPer1k?: number;
|
|
517
|
+
/**
|
|
518
|
+
* Cost per output token (if available)
|
|
519
|
+
*/
|
|
520
|
+
outputCostPer1k?: number;
|
|
521
|
+
}
|
|
522
|
+
/**
|
|
523
|
+
* Budget configuration for AI gateway admin operations.
|
|
524
|
+
*
|
|
525
|
+
* Providers translate this to their native field names:
|
|
526
|
+
* - Bifrost: `budget.max_limit` / `budget.reset_duration`
|
|
527
|
+
* - LiteLLM: `max_budget` / `budget_duration`
|
|
528
|
+
*/
|
|
529
|
+
export interface AIAdminBudget {
|
|
530
|
+
/**
|
|
531
|
+
* Maximum spend in USD.
|
|
532
|
+
*/
|
|
533
|
+
maxLimit?: number;
|
|
534
|
+
/**
|
|
535
|
+
* Reset duration such as `1h`, `1d`, `30d`, or `1M`.
|
|
536
|
+
*/
|
|
537
|
+
resetDuration?: string;
|
|
538
|
+
/**
|
|
539
|
+
* Bifrost only: reset at calendar boundaries for day/week/month/year periods.
|
|
540
|
+
*/
|
|
541
|
+
calendarAligned?: boolean;
|
|
542
|
+
}
|
|
543
|
+
/**
|
|
544
|
+
* Rate-limit configuration for AI gateway admin operations.
|
|
545
|
+
*/
|
|
546
|
+
export interface AIAdminRateLimit {
|
|
547
|
+
/**
|
|
548
|
+
* Provider-agnostic token limit.
|
|
549
|
+
*
|
|
550
|
+
* Bifrost maps this to `token_max_limit`; LiteLLM maps it to `tpm_limit`.
|
|
551
|
+
*/
|
|
552
|
+
tokenMaxLimit?: number;
|
|
553
|
+
/**
|
|
554
|
+
* Bifrost token reset duration such as `1h`.
|
|
555
|
+
*/
|
|
556
|
+
tokenResetDuration?: string;
|
|
557
|
+
/**
|
|
558
|
+
* Provider-agnostic request limit.
|
|
559
|
+
*
|
|
560
|
+
* Bifrost maps this to `request_max_limit`; LiteLLM maps it to `rpm_limit`.
|
|
561
|
+
*/
|
|
562
|
+
requestMaxLimit?: number;
|
|
563
|
+
/**
|
|
564
|
+
* Bifrost request reset duration such as `1m`.
|
|
565
|
+
*/
|
|
566
|
+
requestResetDuration?: string;
|
|
567
|
+
/**
|
|
568
|
+
* LiteLLM tokens-per-minute limit. Overrides `tokenMaxLimit` for LiteLLM.
|
|
569
|
+
*/
|
|
570
|
+
tpmLimit?: number;
|
|
571
|
+
/**
|
|
572
|
+
* LiteLLM requests-per-minute limit. Overrides `requestMaxLimit` for LiteLLM.
|
|
573
|
+
*/
|
|
574
|
+
rpmLimit?: number;
|
|
575
|
+
}
|
|
576
|
+
/**
|
|
577
|
+
* Bifrost virtual-key routing configuration.
|
|
578
|
+
*/
|
|
579
|
+
export interface AIAdminProviderConfig {
|
|
580
|
+
/**
|
|
581
|
+
* Provider identifier such as `openai` or `anthropic`.
|
|
582
|
+
*/
|
|
583
|
+
provider: string;
|
|
584
|
+
/**
|
|
585
|
+
* Routing weight for this provider.
|
|
586
|
+
*/
|
|
587
|
+
weight?: number;
|
|
588
|
+
/**
|
|
589
|
+
* Models this virtual key may use for the provider.
|
|
590
|
+
*/
|
|
591
|
+
allowedModels?: string[];
|
|
592
|
+
/**
|
|
593
|
+
* Bifrost provider key IDs that this virtual key may use.
|
|
594
|
+
*/
|
|
595
|
+
keyIds?: string[];
|
|
596
|
+
}
|
|
597
|
+
/**
|
|
598
|
+
* Options for creating a gateway-scoped project.
|
|
599
|
+
*
|
|
600
|
+
* In Bifrost, projects are implemented as governance teams, optionally attached
|
|
601
|
+
* to a customer via `tenantId`. In LiteLLM, projects are implemented as teams.
|
|
602
|
+
*/
|
|
603
|
+
export interface CreateAIProjectOptions {
|
|
604
|
+
/**
|
|
605
|
+
* Stable project ID. LiteLLM requires one; if omitted, a slug is derived from
|
|
606
|
+
* the tenant and project name. Bifrost generates its own team ID.
|
|
607
|
+
*/
|
|
608
|
+
id?: string;
|
|
609
|
+
/**
|
|
610
|
+
* Human-readable project name.
|
|
611
|
+
*/
|
|
612
|
+
name: string;
|
|
613
|
+
/**
|
|
614
|
+
* Tenant/customer identifier to attach the project to where supported.
|
|
615
|
+
*/
|
|
616
|
+
tenantId?: string;
|
|
617
|
+
/**
|
|
618
|
+
* Human-readable description. Stored in metadata for providers that support it.
|
|
619
|
+
*/
|
|
620
|
+
description?: string;
|
|
621
|
+
/**
|
|
622
|
+
* Models the project may access.
|
|
623
|
+
*/
|
|
624
|
+
models?: string[];
|
|
625
|
+
/**
|
|
626
|
+
* Shared project budget.
|
|
627
|
+
*/
|
|
628
|
+
budget?: AIAdminBudget;
|
|
629
|
+
/**
|
|
630
|
+
* Shared project rate limits.
|
|
631
|
+
*/
|
|
632
|
+
rateLimit?: AIAdminRateLimit;
|
|
633
|
+
/**
|
|
634
|
+
* Provider-specific metadata.
|
|
635
|
+
*/
|
|
636
|
+
metadata?: Record<string, unknown>;
|
|
637
|
+
/**
|
|
638
|
+
* Whether the project should be blocked on creation where supported.
|
|
639
|
+
*/
|
|
640
|
+
isBlocked?: boolean;
|
|
641
|
+
/**
|
|
642
|
+
* Provider-specific request body overrides.
|
|
643
|
+
*/
|
|
644
|
+
raw?: Record<string, unknown>;
|
|
645
|
+
}
|
|
646
|
+
/**
|
|
647
|
+
* Gateway project descriptor returned by admin providers.
|
|
648
|
+
*/
|
|
649
|
+
export interface AIAdminProject {
|
|
650
|
+
/**
|
|
651
|
+
* Provider project ID.
|
|
652
|
+
*/
|
|
653
|
+
id: string;
|
|
654
|
+
/**
|
|
655
|
+
* Human-readable project name.
|
|
656
|
+
*/
|
|
657
|
+
name: string;
|
|
658
|
+
/**
|
|
659
|
+
* Tenant/customer identifier where available.
|
|
660
|
+
*/
|
|
661
|
+
tenantId?: string;
|
|
662
|
+
/**
|
|
663
|
+
* Provider budget ID where available.
|
|
664
|
+
*/
|
|
665
|
+
budgetId?: string;
|
|
666
|
+
/**
|
|
667
|
+
* Admin provider that created this project.
|
|
668
|
+
*/
|
|
669
|
+
provider: string;
|
|
670
|
+
/**
|
|
671
|
+
* Raw provider response.
|
|
672
|
+
*/
|
|
673
|
+
raw?: unknown;
|
|
674
|
+
}
|
|
675
|
+
/**
|
|
676
|
+
* Options for creating a gateway virtual key.
|
|
677
|
+
*/
|
|
678
|
+
export interface CreateAIVirtualKeyOptions {
|
|
679
|
+
/**
|
|
680
|
+
* Human-readable key name or alias.
|
|
681
|
+
*/
|
|
682
|
+
name: string;
|
|
683
|
+
/**
|
|
684
|
+
* Human-readable key description.
|
|
685
|
+
*/
|
|
686
|
+
description?: string;
|
|
687
|
+
/**
|
|
688
|
+
* Project/team ID to attach the key to.
|
|
689
|
+
*/
|
|
690
|
+
projectId?: string;
|
|
691
|
+
/**
|
|
692
|
+
* Tenant/customer ID to attach the key to when no project is supplied, or to
|
|
693
|
+
* record in LiteLLM metadata.
|
|
694
|
+
*/
|
|
695
|
+
tenantId?: string;
|
|
696
|
+
/**
|
|
697
|
+
* Optional end-user ID associated with the key.
|
|
698
|
+
*/
|
|
699
|
+
userId?: string;
|
|
700
|
+
/**
|
|
701
|
+
* Models this key may access.
|
|
702
|
+
*/
|
|
703
|
+
models?: string[];
|
|
704
|
+
/**
|
|
705
|
+
* Bifrost provider routing configuration.
|
|
706
|
+
*/
|
|
707
|
+
providerConfigs?: AIAdminProviderConfig[];
|
|
708
|
+
/**
|
|
709
|
+
* Key-level budget.
|
|
710
|
+
*/
|
|
711
|
+
budget?: AIAdminBudget;
|
|
712
|
+
/**
|
|
713
|
+
* Key-level rate limits.
|
|
714
|
+
*/
|
|
715
|
+
rateLimit?: AIAdminRateLimit;
|
|
716
|
+
/**
|
|
717
|
+
* Key duration such as `30d`, `1h`, or `permanent` where supported.
|
|
718
|
+
*/
|
|
719
|
+
duration?: string;
|
|
720
|
+
/**
|
|
721
|
+
* Provider-specific metadata.
|
|
722
|
+
*/
|
|
723
|
+
metadata?: Record<string, unknown>;
|
|
724
|
+
/**
|
|
725
|
+
* Bifrost provider API key IDs this virtual key may use. Use `["*"]` to allow
|
|
726
|
+
* all configured provider keys.
|
|
727
|
+
*/
|
|
728
|
+
keyIds?: string[];
|
|
729
|
+
/**
|
|
730
|
+
* Whether the key should be active on creation.
|
|
731
|
+
*/
|
|
732
|
+
isActive?: boolean;
|
|
733
|
+
/**
|
|
734
|
+
* LiteLLM model aliases for this key.
|
|
735
|
+
*/
|
|
736
|
+
aliases?: Record<string, string>;
|
|
737
|
+
/**
|
|
738
|
+
* LiteLLM key-specific config.
|
|
739
|
+
*/
|
|
740
|
+
config?: Record<string, unknown>;
|
|
741
|
+
/**
|
|
742
|
+
* LiteLLM key-specific permissions.
|
|
743
|
+
*/
|
|
744
|
+
permissions?: Record<string, unknown>;
|
|
745
|
+
/**
|
|
746
|
+
* Provider-specific request body overrides.
|
|
747
|
+
*/
|
|
748
|
+
raw?: Record<string, unknown>;
|
|
749
|
+
}
|
|
750
|
+
/**
|
|
751
|
+
* Gateway virtual key descriptor returned by admin providers.
|
|
752
|
+
*/
|
|
753
|
+
export interface AIVirtualKey {
|
|
754
|
+
/**
|
|
755
|
+
* Provider key ID, when returned separately from the key value.
|
|
756
|
+
*/
|
|
757
|
+
id?: string;
|
|
758
|
+
/**
|
|
759
|
+
* Human-readable key name or alias.
|
|
760
|
+
*/
|
|
761
|
+
name?: string;
|
|
762
|
+
/**
|
|
763
|
+
* Newly generated key value. Some provider list/detail responses may only
|
|
764
|
+
* expose a masked value.
|
|
765
|
+
*/
|
|
766
|
+
key?: string;
|
|
767
|
+
/**
|
|
768
|
+
* Masked key value or key name, when provided.
|
|
769
|
+
*/
|
|
770
|
+
maskedKey?: string;
|
|
771
|
+
/**
|
|
772
|
+
* Attached project/team ID.
|
|
773
|
+
*/
|
|
774
|
+
projectId?: string;
|
|
775
|
+
/**
|
|
776
|
+
* Attached tenant/customer ID.
|
|
777
|
+
*/
|
|
778
|
+
tenantId?: string;
|
|
779
|
+
/**
|
|
780
|
+
* Expiration timestamp where supported.
|
|
781
|
+
*/
|
|
782
|
+
expiresAt?: string;
|
|
783
|
+
/**
|
|
784
|
+
* Admin provider that created this key.
|
|
785
|
+
*/
|
|
786
|
+
provider: string;
|
|
787
|
+
/**
|
|
788
|
+
* Raw provider response.
|
|
789
|
+
*/
|
|
790
|
+
raw?: unknown;
|
|
791
|
+
}
|
|
792
|
+
/**
|
|
793
|
+
* Admin operations exposed by gateway providers that support provisioning.
|
|
794
|
+
*/
|
|
795
|
+
export interface AIAdminInterface {
|
|
796
|
+
/**
|
|
797
|
+
* Create a project/team for a tenant.
|
|
798
|
+
*/
|
|
799
|
+
createProject(options: CreateAIProjectOptions): Promise<AIAdminProject>;
|
|
800
|
+
/**
|
|
801
|
+
* Create a virtual key, optionally attached to a project or tenant.
|
|
802
|
+
*/
|
|
803
|
+
createVirtualKey(options: CreateAIVirtualKeyOptions): Promise<AIVirtualKey>;
|
|
804
|
+
}
|
|
805
|
+
/**
|
|
806
|
+
* AI provider capabilities
|
|
807
|
+
*/
|
|
808
|
+
export interface AICapabilities {
|
|
809
|
+
/**
|
|
810
|
+
* Whether the provider supports chat completions
|
|
811
|
+
*/
|
|
812
|
+
chat: boolean;
|
|
813
|
+
/**
|
|
814
|
+
* Whether the provider supports text completions
|
|
815
|
+
*/
|
|
816
|
+
completion: boolean;
|
|
817
|
+
/**
|
|
818
|
+
* Whether the provider supports embeddings
|
|
819
|
+
*/
|
|
820
|
+
embeddings: boolean;
|
|
821
|
+
/**
|
|
822
|
+
* Whether the provider supports streaming
|
|
823
|
+
*/
|
|
824
|
+
streaming: boolean;
|
|
825
|
+
/**
|
|
826
|
+
* Whether the provider supports function calling
|
|
827
|
+
*/
|
|
828
|
+
functions: boolean;
|
|
829
|
+
/**
|
|
830
|
+
* Whether the provider supports vision/multimodal
|
|
831
|
+
*/
|
|
832
|
+
vision: boolean;
|
|
833
|
+
/**
|
|
834
|
+
* Whether the provider supports fine-tuning
|
|
835
|
+
*/
|
|
836
|
+
fineTuning: boolean;
|
|
837
|
+
/**
|
|
838
|
+
* Whether the provider supports image embeddings
|
|
839
|
+
*/
|
|
840
|
+
imageEmbeddings: boolean;
|
|
841
|
+
/**
|
|
842
|
+
* Whether the provider supports image generation
|
|
843
|
+
*/
|
|
844
|
+
imageGeneration: boolean;
|
|
845
|
+
/**
|
|
846
|
+
* Whether the provider supports text-to-speech synthesis
|
|
847
|
+
*/
|
|
848
|
+
tts: boolean;
|
|
849
|
+
/**
|
|
850
|
+
* Whether the provider supports voice cloning from samples
|
|
851
|
+
*/
|
|
852
|
+
voiceCloning: boolean;
|
|
853
|
+
/**
|
|
854
|
+
* Whether the provider supports voice design via description
|
|
855
|
+
*/
|
|
856
|
+
voiceDesign: boolean;
|
|
857
|
+
/**
|
|
858
|
+
* Maximum context length supported
|
|
859
|
+
*/
|
|
860
|
+
maxContextLength: number;
|
|
861
|
+
/**
|
|
862
|
+
* Supported operations
|
|
863
|
+
*/
|
|
864
|
+
supportedOperations: string[];
|
|
865
|
+
}
|
|
866
|
+
/**
|
|
867
|
+
* Token usage information
|
|
868
|
+
*/
|
|
869
|
+
export interface TokenUsage {
|
|
870
|
+
/**
|
|
871
|
+
* Number of prompt tokens
|
|
872
|
+
*/
|
|
873
|
+
promptTokens: number;
|
|
874
|
+
/**
|
|
875
|
+
* Number of completion tokens
|
|
876
|
+
*/
|
|
877
|
+
completionTokens: number;
|
|
878
|
+
/**
|
|
879
|
+
* Total tokens used
|
|
880
|
+
*/
|
|
881
|
+
totalTokens: number;
|
|
882
|
+
}
|
|
883
|
+
/**
|
|
884
|
+
* Usage event emitted via the `onUsage` callback after each API call.
|
|
885
|
+
* Provides token usage, timing, and context for tracking and analytics.
|
|
886
|
+
*
|
|
887
|
+
* @example
|
|
888
|
+
* ```typescript
|
|
889
|
+
* const ai = await getAI({
|
|
890
|
+
* type: 'openai',
|
|
891
|
+
* apiKey: '...',
|
|
892
|
+
* onUsage: (event) => {
|
|
893
|
+
* console.log(`[${event.provider}/${event.model}] ${event.operation}: ${event.usage?.totalTokens} tokens in ${event.duration}ms`);
|
|
894
|
+
* },
|
|
895
|
+
* });
|
|
896
|
+
* ```
|
|
897
|
+
*/
|
|
898
|
+
export interface UsageEvent {
|
|
899
|
+
/** Provider that handled the request (e.g. 'openai', 'anthropic', 'gemini') */
|
|
900
|
+
provider: string;
|
|
901
|
+
/** Model that was used (e.g. 'gpt-4o', 'claude-3-5-sonnet-20241022') */
|
|
902
|
+
model: string;
|
|
903
|
+
/** Operation type that generated this usage */
|
|
904
|
+
operation: 'chat' | 'complete' | 'message' | 'embed' | 'embedImage' | 'describeImage' | 'generateImage' | 'stream';
|
|
905
|
+
/** Token usage breakdown, if available from the provider */
|
|
906
|
+
usage?: TokenUsage;
|
|
907
|
+
/** Wall-clock duration of the API call in milliseconds */
|
|
908
|
+
duration: number;
|
|
909
|
+
/** Timestamp when the call completed */
|
|
910
|
+
timestamp: Date;
|
|
911
|
+
/** Custom tags from global `usageTags` and per-call `usageTags`, merged */
|
|
912
|
+
tags?: Record<string, string>;
|
|
913
|
+
}
|
|
914
|
+
/**
|
|
915
|
+
* AI response structure
|
|
916
|
+
*/
|
|
917
|
+
export interface AIResponse {
|
|
918
|
+
/**
|
|
919
|
+
* Generated content
|
|
920
|
+
*/
|
|
921
|
+
content: string;
|
|
922
|
+
/**
|
|
923
|
+
* Token usage information
|
|
924
|
+
*/
|
|
925
|
+
usage?: TokenUsage;
|
|
926
|
+
/**
|
|
927
|
+
* Model used for generation
|
|
928
|
+
*/
|
|
929
|
+
model?: string;
|
|
930
|
+
/**
|
|
931
|
+
* Finish reason
|
|
932
|
+
*/
|
|
933
|
+
finishReason?: 'stop' | 'length' | 'tool_calls' | 'content_filter';
|
|
934
|
+
/**
|
|
935
|
+
* Tool calls made by the model
|
|
936
|
+
*/
|
|
937
|
+
toolCalls?: Array<{
|
|
938
|
+
id: string;
|
|
939
|
+
type: 'function';
|
|
940
|
+
function: {
|
|
941
|
+
name: string;
|
|
942
|
+
arguments: string;
|
|
943
|
+
};
|
|
944
|
+
}>;
|
|
945
|
+
}
|
|
946
|
+
/**
|
|
947
|
+
* Embedding response structure
|
|
948
|
+
*/
|
|
949
|
+
export interface EmbeddingResponse {
|
|
950
|
+
/**
|
|
951
|
+
* Generated embeddings
|
|
952
|
+
*/
|
|
953
|
+
embeddings: number[][];
|
|
954
|
+
/**
|
|
955
|
+
* Token usage information
|
|
956
|
+
*/
|
|
957
|
+
usage?: TokenUsage;
|
|
958
|
+
/**
|
|
959
|
+
* Model used for embeddings
|
|
960
|
+
*/
|
|
961
|
+
model?: string;
|
|
962
|
+
}
|
|
963
|
+
/**
|
|
964
|
+
* Core AI interface that all providers must implement
|
|
965
|
+
*/
|
|
966
|
+
export interface AIInterface {
|
|
967
|
+
/**
|
|
968
|
+
* Optional admin surface for gateway providers that support provisioning.
|
|
969
|
+
*/
|
|
970
|
+
admin?: AIAdminInterface;
|
|
971
|
+
/**
|
|
972
|
+
* Generate a chat completion from a sequence of messages.
|
|
973
|
+
*
|
|
974
|
+
* @param messages - Conversation messages (system, user, assistant, tool roles)
|
|
975
|
+
* @param options - Chat options including model, temperature, tools, etc.
|
|
976
|
+
* @returns Promise resolving to the model's response with content and usage info
|
|
977
|
+
* @throws {AIError} When the request fails
|
|
978
|
+
* @throws {AuthenticationError} When credentials are invalid
|
|
979
|
+
* @throws {RateLimitError} When the provider's rate limit is exceeded
|
|
980
|
+
*/
|
|
981
|
+
chat(messages: AIMessage[], options?: ChatOptions): Promise<AIResponse>;
|
|
982
|
+
/**
|
|
983
|
+
* Generate a text completion from a prompt string (non-chat interface).
|
|
984
|
+
*
|
|
985
|
+
* @param prompt - The text prompt to complete
|
|
986
|
+
* @param options - Completion options including model, temperature, etc.
|
|
987
|
+
* @returns Promise resolving to the model's response
|
|
988
|
+
* @throws {AIError} When the request fails
|
|
989
|
+
*/
|
|
990
|
+
complete(prompt: string, options?: CompletionOptions): Promise<AIResponse>;
|
|
991
|
+
/**
|
|
992
|
+
* Simple message interface for single-turn interactions
|
|
993
|
+
*
|
|
994
|
+
* This is a convenience method that wraps chat() for simpler use cases.
|
|
995
|
+
* It accepts a text string and optional configuration, returning just
|
|
996
|
+
* the response content as a string.
|
|
997
|
+
*
|
|
998
|
+
* Supports conversation history via the `history` option for multi-turn
|
|
999
|
+
* conversations while maintaining a simple API.
|
|
1000
|
+
*
|
|
1001
|
+
* @param text - The message text to send
|
|
1002
|
+
* @param options - Configuration options including history, model, etc.
|
|
1003
|
+
* @returns Promise resolving to the response content string
|
|
1004
|
+
*
|
|
1005
|
+
* @example
|
|
1006
|
+
* ```typescript
|
|
1007
|
+
* // Simple single-turn usage
|
|
1008
|
+
* const response = await ai.message('Hello, how are you?');
|
|
1009
|
+
*
|
|
1010
|
+
* // With options
|
|
1011
|
+
* const response = await ai.message('Analyze this data', {
|
|
1012
|
+
* model: 'gpt-4o',
|
|
1013
|
+
* responseFormat: { type: 'json_object' },
|
|
1014
|
+
* maxTokens: 1000
|
|
1015
|
+
* });
|
|
1016
|
+
*
|
|
1017
|
+
* // With conversation history
|
|
1018
|
+
* const response = await ai.message('What did I ask before?', {
|
|
1019
|
+
* history: [
|
|
1020
|
+
* { role: 'user', content: 'Hello' },
|
|
1021
|
+
* { role: 'assistant', content: 'Hi there!' }
|
|
1022
|
+
* ]
|
|
1023
|
+
* });
|
|
1024
|
+
* ```
|
|
1025
|
+
*/
|
|
1026
|
+
message(text: string, options?: MessageOptions): Promise<string>;
|
|
1027
|
+
/**
|
|
1028
|
+
* Generate vector embeddings for one or more text inputs.
|
|
1029
|
+
*
|
|
1030
|
+
* @param text - A single string or array of strings to embed
|
|
1031
|
+
* @param options - Embedding options including model and dimensions
|
|
1032
|
+
* @returns Promise resolving to embedding vectors and usage info
|
|
1033
|
+
* @throws {AIError} When embeddings are not supported by this provider or request fails
|
|
1034
|
+
*/
|
|
1035
|
+
embed(text: string | string[], options?: EmbeddingOptions): Promise<EmbeddingResponse>;
|
|
1036
|
+
/**
|
|
1037
|
+
* Generate embeddings for an image
|
|
1038
|
+
*
|
|
1039
|
+
* Implementation varies by provider:
|
|
1040
|
+
* - Gemini: Uses native multimodal embeddings
|
|
1041
|
+
* - OpenAI: Uses describe-then-embed pattern (describeImage → embed)
|
|
1042
|
+
* - Others: Throws NOT_IMPLEMENTED
|
|
1043
|
+
*
|
|
1044
|
+
* @param image - Image as URL, base64 data URL, or Buffer
|
|
1045
|
+
* @param options - Optional configuration for image embeddings
|
|
1046
|
+
* @returns Promise resolving to embeddings response
|
|
1047
|
+
* @throws {AIError} When embeddings are not supported or request fails
|
|
1048
|
+
*
|
|
1049
|
+
* @example
|
|
1050
|
+
* ```typescript
|
|
1051
|
+
* // From URL
|
|
1052
|
+
* const embedding = await ai.embedImage('https://example.com/image.jpg');
|
|
1053
|
+
*
|
|
1054
|
+
* // From Buffer
|
|
1055
|
+
* const buffer = fs.readFileSync('image.png');
|
|
1056
|
+
* const embedding = await ai.embedImage(buffer);
|
|
1057
|
+
*
|
|
1058
|
+
* // With options
|
|
1059
|
+
* const embedding = await ai.embedImage(imageUrl, { dimensions: 768 });
|
|
1060
|
+
* ```
|
|
1061
|
+
*/
|
|
1062
|
+
embedImage(image: string | Buffer, options?: ImageEmbeddingOptions): Promise<EmbeddingResponse>;
|
|
1063
|
+
/**
|
|
1064
|
+
* Generate a text description of an image
|
|
1065
|
+
*
|
|
1066
|
+
* @param image - Image as URL, base64 data URL, or Buffer
|
|
1067
|
+
* @param prompt - Custom prompt for description (optional)
|
|
1068
|
+
* @param options - Optional configuration
|
|
1069
|
+
* @returns Promise resolving to the description string
|
|
1070
|
+
* @throws {AIError} When vision is not supported or request fails
|
|
1071
|
+
*
|
|
1072
|
+
* @example
|
|
1073
|
+
* ```typescript
|
|
1074
|
+
* // Default description for search indexing
|
|
1075
|
+
* const description = await ai.describeImage('https://example.com/image.jpg');
|
|
1076
|
+
*
|
|
1077
|
+
* // Custom prompt
|
|
1078
|
+
* const description = await ai.describeImage(imageBuffer, 'What product is shown?');
|
|
1079
|
+
*
|
|
1080
|
+
* // With options
|
|
1081
|
+
* const description = await ai.describeImage(imageUrl, undefined, {
|
|
1082
|
+
* model: 'gpt-4o',
|
|
1083
|
+
* maxTokens: 500,
|
|
1084
|
+
* detail: 'high'
|
|
1085
|
+
* });
|
|
1086
|
+
* ```
|
|
1087
|
+
*/
|
|
1088
|
+
describeImage(image: string | Buffer, prompt?: string, options?: ImageDescriptionOptions): Promise<string>;
|
|
1089
|
+
/**
|
|
1090
|
+
* Generate an image from a text prompt
|
|
1091
|
+
*
|
|
1092
|
+
* @param prompt - Text description of the image to generate
|
|
1093
|
+
* @param options - Optional configuration for image generation
|
|
1094
|
+
* @returns Promise resolving to generated image(s)
|
|
1095
|
+
* @throws {AIError} When image generation is not supported or request fails
|
|
1096
|
+
*
|
|
1097
|
+
* @example
|
|
1098
|
+
* ```typescript
|
|
1099
|
+
* // Basic generation (returns Buffer by default)
|
|
1100
|
+
* const result = await ai.generateImage('A sunset over mountains');
|
|
1101
|
+
* fs.writeFileSync('image.png', result.images[0].data);
|
|
1102
|
+
*
|
|
1103
|
+
* // With options
|
|
1104
|
+
* const result = await ai.generateImage('A cat wearing a hat', {
|
|
1105
|
+
* outputFormat: 'base64',
|
|
1106
|
+
* size: '1024x1024',
|
|
1107
|
+
* style: 'vivid'
|
|
1108
|
+
* });
|
|
1109
|
+
* ```
|
|
1110
|
+
*/
|
|
1111
|
+
generateImage(prompt: string, options?: ImageGenerationOptions): Promise<ImageGenerationResponse>;
|
|
1112
|
+
/**
|
|
1113
|
+
* Stream a chat completion, yielding text chunks as they arrive.
|
|
1114
|
+
*
|
|
1115
|
+
* @param messages - Conversation messages
|
|
1116
|
+
* @param options - Chat options including model, temperature, etc.
|
|
1117
|
+
* @returns Async iterable of string chunks
|
|
1118
|
+
* @throws {AIError} When the request fails
|
|
1119
|
+
*/
|
|
1120
|
+
stream(messages: AIMessage[], options?: ChatOptions): AsyncIterable<string>;
|
|
1121
|
+
/**
|
|
1122
|
+
* Estimate or calculate the token count for a text string.
|
|
1123
|
+
*
|
|
1124
|
+
* @param text - The text to tokenize
|
|
1125
|
+
* @returns Promise resolving to the token count
|
|
1126
|
+
*/
|
|
1127
|
+
countTokens(text: string): Promise<number>;
|
|
1128
|
+
/**
|
|
1129
|
+
* List models available from this provider.
|
|
1130
|
+
*
|
|
1131
|
+
* @returns Promise resolving to an array of model descriptors
|
|
1132
|
+
*/
|
|
1133
|
+
getModels(): Promise<AIModel[]>;
|
|
1134
|
+
/**
|
|
1135
|
+
* Query the capabilities supported by this provider (chat, embeddings, vision, TTS, etc.).
|
|
1136
|
+
*
|
|
1137
|
+
* @returns Promise resolving to a capabilities descriptor
|
|
1138
|
+
*/
|
|
1139
|
+
getCapabilities(): Promise<AICapabilities>;
|
|
1140
|
+
/**
|
|
1141
|
+
* Synthesize speech from text
|
|
1142
|
+
*
|
|
1143
|
+
* @param text - The text to synthesize into speech
|
|
1144
|
+
* @param options - Optional configuration for TTS synthesis
|
|
1145
|
+
* @returns Promise resolving to audio data with metadata
|
|
1146
|
+
* @throws {AIError} When TTS is not supported or request fails
|
|
1147
|
+
*
|
|
1148
|
+
* @example
|
|
1149
|
+
* ```typescript
|
|
1150
|
+
* // Basic synthesis
|
|
1151
|
+
* const result = await ai.synthesizeSpeech('Hello, world!');
|
|
1152
|
+
* fs.writeFileSync('speech.wav', result.audio);
|
|
1153
|
+
*
|
|
1154
|
+
* // With options
|
|
1155
|
+
* const result = await ai.synthesizeSpeech('News broadcast text', {
|
|
1156
|
+
* voice: 'news-anchor-1',
|
|
1157
|
+
* speed: 1.1,
|
|
1158
|
+
* includeWordTimings: true
|
|
1159
|
+
* });
|
|
1160
|
+
* console.log(`Duration: ${result.duration}s`);
|
|
1161
|
+
* ```
|
|
1162
|
+
*/
|
|
1163
|
+
synthesizeSpeech(text: string, options?: TTSOptions): Promise<TTSResponse>;
|
|
1164
|
+
/**
|
|
1165
|
+
* Stream speech synthesis for real-time playback
|
|
1166
|
+
*
|
|
1167
|
+
* @param text - The text to synthesize into speech
|
|
1168
|
+
* @param options - Optional configuration for TTS synthesis
|
|
1169
|
+
* @returns AsyncIterable of audio chunks
|
|
1170
|
+
* @throws {AIError} When TTS streaming is not supported or request fails
|
|
1171
|
+
*
|
|
1172
|
+
* @example
|
|
1173
|
+
* ```typescript
|
|
1174
|
+
* const chunks: Buffer[] = [];
|
|
1175
|
+
* for await (const chunk of ai.streamSpeech('Long text...')) {
|
|
1176
|
+
* chunks.push(chunk);
|
|
1177
|
+
* // Or stream directly to audio output
|
|
1178
|
+
* }
|
|
1179
|
+
* ```
|
|
1180
|
+
*/
|
|
1181
|
+
streamSpeech(text: string, options?: TTSOptions): AsyncIterable<Buffer>;
|
|
1182
|
+
/**
|
|
1183
|
+
* Clone a voice from an audio sample
|
|
1184
|
+
*
|
|
1185
|
+
* Creates a new voice profile from a 3+ second audio sample.
|
|
1186
|
+
* The cloned voice can be used in subsequent synthesizeSpeech calls.
|
|
1187
|
+
*
|
|
1188
|
+
* @param options - Voice cloning configuration including audio sample
|
|
1189
|
+
* @returns Promise resolving to the cloned voice profile
|
|
1190
|
+
* @throws {AIError} When voice cloning is not supported or request fails
|
|
1191
|
+
*
|
|
1192
|
+
* @example
|
|
1193
|
+
* ```typescript
|
|
1194
|
+
* const sample = fs.readFileSync('voice-sample.wav');
|
|
1195
|
+
* const voice = await ai.cloneVoice({
|
|
1196
|
+
* sampleAudio: sample,
|
|
1197
|
+
* name: 'News Anchor Voice',
|
|
1198
|
+
* language: 'en-US'
|
|
1199
|
+
* });
|
|
1200
|
+
*
|
|
1201
|
+
* // Use the cloned voice
|
|
1202
|
+
* const speech = await ai.synthesizeSpeech('Breaking news...', {
|
|
1203
|
+
* voice: voice.id
|
|
1204
|
+
* });
|
|
1205
|
+
* ```
|
|
1206
|
+
*/
|
|
1207
|
+
cloneVoice(options: VoiceCloneOptions): Promise<Voice>;
|
|
1208
|
+
/**
|
|
1209
|
+
* Design a voice using natural language description
|
|
1210
|
+
*
|
|
1211
|
+
* Creates a new voice profile from a text description of the desired voice.
|
|
1212
|
+
* The designed voice can be used in subsequent synthesizeSpeech calls.
|
|
1213
|
+
*
|
|
1214
|
+
* @param options - Voice design configuration including description
|
|
1215
|
+
* @returns Promise resolving to the designed voice profile
|
|
1216
|
+
* @throws {AIError} When voice design is not supported or request fails
|
|
1217
|
+
*
|
|
1218
|
+
* @example
|
|
1219
|
+
* ```typescript
|
|
1220
|
+
* const voice = await ai.designVoice({
|
|
1221
|
+
* description: 'warm female voice, slight British accent, professional news anchor',
|
|
1222
|
+
* language: 'en-US',
|
|
1223
|
+
* gender: 'female'
|
|
1224
|
+
* });
|
|
1225
|
+
*
|
|
1226
|
+
* // Use the designed voice
|
|
1227
|
+
* const speech = await ai.synthesizeSpeech('Good evening...', {
|
|
1228
|
+
* voice: voice.id
|
|
1229
|
+
* });
|
|
1230
|
+
* ```
|
|
1231
|
+
*/
|
|
1232
|
+
designVoice(options: VoiceDesignOptions): Promise<Voice>;
|
|
1233
|
+
/**
|
|
1234
|
+
* List available voices for TTS synthesis
|
|
1235
|
+
*
|
|
1236
|
+
* @param options - Optional filters for the voice list
|
|
1237
|
+
* @returns Promise resolving to array of available voices
|
|
1238
|
+
* @throws {AIError} When TTS is not supported or request fails
|
|
1239
|
+
*
|
|
1240
|
+
* @example
|
|
1241
|
+
* ```typescript
|
|
1242
|
+
* // List all voices
|
|
1243
|
+
* const voices = await ai.getVoices();
|
|
1244
|
+
*
|
|
1245
|
+
* // Filter by language
|
|
1246
|
+
* const englishVoices = await ai.getVoices({ language: 'en' });
|
|
1247
|
+
*
|
|
1248
|
+
* // Include cloned voices
|
|
1249
|
+
* const allVoices = await ai.getVoices({ includeCloned: true });
|
|
1250
|
+
* ```
|
|
1251
|
+
*/
|
|
1252
|
+
getVoices(options?: VoiceListOptions): Promise<Voice[]>;
|
|
1253
|
+
}
|
|
1254
|
+
/**
|
|
1255
|
+
* Shared rate-limit configuration for AI providers.
|
|
1256
|
+
*
|
|
1257
|
+
* The pacing wrapper activates only when one of the pacing fields
|
|
1258
|
+
* (`enabled`, `key`, `cooldownMs`, `initialDelayMs`, `maxAttempts`) is set.
|
|
1259
|
+
*
|
|
1260
|
+
* `qwen3-tts` also uses `requestsPerMinute` and `maxConcurrent` from this
|
|
1261
|
+
* object for its local token bucket limiter.
|
|
1262
|
+
*/
|
|
1263
|
+
export interface AIRateLimitOptions {
|
|
1264
|
+
/**
|
|
1265
|
+
* Enable shared in-process request pacing for this client.
|
|
1266
|
+
*/
|
|
1267
|
+
enabled?: boolean;
|
|
1268
|
+
/**
|
|
1269
|
+
* Shared budget key used to coordinate pacing across multiple clients.
|
|
1270
|
+
* If omitted, a provider-scoped key is derived from the configured credentials.
|
|
1271
|
+
*/
|
|
1272
|
+
key?: string;
|
|
1273
|
+
/**
|
|
1274
|
+
* Minimum delay in milliseconds between successful calls sharing the same key.
|
|
1275
|
+
*/
|
|
1276
|
+
cooldownMs?: number;
|
|
1277
|
+
/**
|
|
1278
|
+
* Fallback delay in milliseconds before retrying a rate-limited call when
|
|
1279
|
+
* the provider does not return a `Retry-After` hint.
|
|
1280
|
+
*/
|
|
1281
|
+
initialDelayMs?: number;
|
|
1282
|
+
/**
|
|
1283
|
+
* Maximum attempts for retryable rate-limit failures, including the first call.
|
|
1284
|
+
*/
|
|
1285
|
+
maxAttempts?: number;
|
|
1286
|
+
/**
|
|
1287
|
+
* Qwen3-TTS only: maximum requests per minute for its local token bucket.
|
|
1288
|
+
*/
|
|
1289
|
+
requestsPerMinute?: number;
|
|
1290
|
+
/**
|
|
1291
|
+
* Qwen3-TTS only: maximum concurrent requests allowed by its local limiter.
|
|
1292
|
+
*/
|
|
1293
|
+
maxConcurrent?: number;
|
|
1294
|
+
}
|
|
1295
|
+
/**
|
|
1296
|
+
* Base configuration options for all providers
|
|
1297
|
+
*/
|
|
1298
|
+
export interface BaseAIOptions {
|
|
1299
|
+
/**
|
|
1300
|
+
* API timeout in milliseconds
|
|
1301
|
+
*/
|
|
1302
|
+
timeout?: number;
|
|
1303
|
+
/**
|
|
1304
|
+
* Maximum number of retries
|
|
1305
|
+
*/
|
|
1306
|
+
maxRetries?: number;
|
|
1307
|
+
/**
|
|
1308
|
+
* Custom headers
|
|
1309
|
+
*/
|
|
1310
|
+
headers?: Record<string, string>;
|
|
1311
|
+
/**
|
|
1312
|
+
* Default model to use
|
|
1313
|
+
*/
|
|
1314
|
+
defaultModel?: string;
|
|
1315
|
+
/**
|
|
1316
|
+
* Callback invoked after each API call with usage details.
|
|
1317
|
+
* Use this to track token consumption, costs, and performance across providers.
|
|
1318
|
+
*
|
|
1319
|
+
* Errors thrown inside this callback are silently caught and will not
|
|
1320
|
+
* affect the API call result.
|
|
1321
|
+
*
|
|
1322
|
+
* @param event - Usage event with provider, model, operation, tokens, and timing
|
|
1323
|
+
*/
|
|
1324
|
+
onUsage?: (event: UsageEvent) => void;
|
|
1325
|
+
/**
|
|
1326
|
+
* Global tags to include in every usage event.
|
|
1327
|
+
* Per-call `usageTags` on `ChatOptions` / `EmbeddingOptions` / etc.
|
|
1328
|
+
* will be merged on top of these.
|
|
1329
|
+
*/
|
|
1330
|
+
usageTags?: Record<string, string>;
|
|
1331
|
+
/**
|
|
1332
|
+
* Optional shared pacing / retry configuration.
|
|
1333
|
+
*/
|
|
1334
|
+
rateLimit?: AIRateLimitOptions;
|
|
1335
|
+
}
|
|
1336
|
+
/**
|
|
1337
|
+
* OpenAI provider options
|
|
1338
|
+
*/
|
|
1339
|
+
export interface OpenAIOptions extends BaseAIOptions {
|
|
1340
|
+
type?: 'openai';
|
|
1341
|
+
apiKey?: string;
|
|
1342
|
+
baseUrl?: string;
|
|
1343
|
+
organization?: string;
|
|
1344
|
+
}
|
|
1345
|
+
/**
|
|
1346
|
+
* LiteLLM provider options
|
|
1347
|
+
*
|
|
1348
|
+
* LiteLLM exposes an OpenAI-compatible API surface and requires a custom
|
|
1349
|
+
* base URL such as `https://llm.happyvertical.com/v1`.
|
|
1350
|
+
*/
|
|
1351
|
+
export interface LiteLLMOptions extends BaseAIOptions {
|
|
1352
|
+
type: 'litellm';
|
|
1353
|
+
apiKey?: string;
|
|
1354
|
+
baseUrl?: string;
|
|
1355
|
+
organization?: string;
|
|
1356
|
+
adminApiKey?: string;
|
|
1357
|
+
adminBaseUrl?: string;
|
|
1358
|
+
adminUrl?: string;
|
|
1359
|
+
adminHeaders?: Record<string, string>;
|
|
1360
|
+
}
|
|
1361
|
+
/**
|
|
1362
|
+
* Bifrost provider options.
|
|
1363
|
+
*
|
|
1364
|
+
* Bifrost exposes OpenAI-compatible inference through endpoints such as
|
|
1365
|
+
* `/openai` and `/v1`, plus governance admin endpoints at `/api/governance/*`.
|
|
1366
|
+
*/
|
|
1367
|
+
export interface BifrostOptions extends BaseAIOptions {
|
|
1368
|
+
type: 'bifrost';
|
|
1369
|
+
apiKey?: string;
|
|
1370
|
+
baseUrl?: string;
|
|
1371
|
+
organization?: string;
|
|
1372
|
+
/**
|
|
1373
|
+
* Optional virtual key for admin routes. Bifrost OSS admin APIs typically use
|
|
1374
|
+
* username/password Basic auth instead; use `adminUser` / `adminPassword`
|
|
1375
|
+
* when governance auth is enabled without enterprise bearer-token support.
|
|
1376
|
+
*/
|
|
1377
|
+
adminApiKey?: string;
|
|
1378
|
+
/**
|
|
1379
|
+
* Admin API root. Alias: `adminUrl`.
|
|
1380
|
+
*/
|
|
1381
|
+
adminBaseUrl?: string;
|
|
1382
|
+
/**
|
|
1383
|
+
* Admin API root. Kept as a friendly alias for env vars such as
|
|
1384
|
+
* `BIFROST_ADMIN_URL`.
|
|
1385
|
+
*/
|
|
1386
|
+
adminUrl?: string;
|
|
1387
|
+
/**
|
|
1388
|
+
* Bifrost admin username for HTTP Basic auth.
|
|
1389
|
+
*/
|
|
1390
|
+
adminUser?: string;
|
|
1391
|
+
/**
|
|
1392
|
+
* Bifrost admin username for HTTP Basic auth.
|
|
1393
|
+
*/
|
|
1394
|
+
adminUsername?: string;
|
|
1395
|
+
/**
|
|
1396
|
+
* Bifrost admin password for HTTP Basic auth.
|
|
1397
|
+
*/
|
|
1398
|
+
adminPassword?: string;
|
|
1399
|
+
adminHeaders?: Record<string, string>;
|
|
1400
|
+
}
|
|
1401
|
+
/**
|
|
1402
|
+
* Ollama provider options
|
|
1403
|
+
*
|
|
1404
|
+
* Ollama defaults to the local host at `http://localhost:11434` and can also
|
|
1405
|
+
* target remote hosts such as `https://ollama.com/api` when paired with an
|
|
1406
|
+
* API key.
|
|
1407
|
+
*/
|
|
1408
|
+
export interface OllamaOptions extends BaseAIOptions {
|
|
1409
|
+
type: 'ollama';
|
|
1410
|
+
apiKey?: string;
|
|
1411
|
+
baseUrl?: string;
|
|
1412
|
+
/**
|
|
1413
|
+
* Default keep-alive duration for model requests, for example `5m` or `0`.
|
|
1414
|
+
*/
|
|
1415
|
+
keepAlive?: string | number;
|
|
1416
|
+
}
|
|
1417
|
+
/**
|
|
1418
|
+
* Gemini provider options
|
|
1419
|
+
*/
|
|
1420
|
+
export interface GeminiOptions extends BaseAIOptions {
|
|
1421
|
+
type: 'gemini';
|
|
1422
|
+
apiKey?: string;
|
|
1423
|
+
baseUrl?: string;
|
|
1424
|
+
projectId?: string;
|
|
1425
|
+
location?: string;
|
|
1426
|
+
/**
|
|
1427
|
+
* Thinking level for Gemini 3 models (gemini-3-flash-preview, gemini-3-pro)
|
|
1428
|
+
* Controls internal reasoning depth:
|
|
1429
|
+
* - 'minimal': No thinking for most queries (Gemini 3 Flash only)
|
|
1430
|
+
* - 'low': Minimizes latency and cost, good for simple tasks
|
|
1431
|
+
* - 'medium': Balanced thinking for most tasks (Gemini 3 Flash only)
|
|
1432
|
+
* - 'high': Maximizes reasoning depth (default for Gemini 3)
|
|
1433
|
+
*
|
|
1434
|
+
* Note: Only works with Gemini 3 models. Gemini 2.5 uses thinkingBudget instead.
|
|
1435
|
+
*/
|
|
1436
|
+
thinkingLevel?: GeminiThinkingLevel;
|
|
1437
|
+
}
|
|
1438
|
+
/**
|
|
1439
|
+
* Anthropic provider options
|
|
1440
|
+
*/
|
|
1441
|
+
export interface AnthropicOptions extends BaseAIOptions {
|
|
1442
|
+
type: 'anthropic';
|
|
1443
|
+
apiKey?: string;
|
|
1444
|
+
baseUrl?: string;
|
|
1445
|
+
anthropicVersion?: string;
|
|
1446
|
+
}
|
|
1447
|
+
/**
|
|
1448
|
+
* Hugging Face provider options
|
|
1449
|
+
*/
|
|
1450
|
+
export interface HuggingFaceOptions extends BaseAIOptions {
|
|
1451
|
+
type: 'huggingface';
|
|
1452
|
+
apiToken?: string;
|
|
1453
|
+
endpoint?: string;
|
|
1454
|
+
model?: string;
|
|
1455
|
+
useCache?: boolean;
|
|
1456
|
+
waitForModel?: boolean;
|
|
1457
|
+
}
|
|
1458
|
+
/**
|
|
1459
|
+
* AWS Bedrock provider options
|
|
1460
|
+
*/
|
|
1461
|
+
export interface BedrockOptions extends BaseAIOptions {
|
|
1462
|
+
type: 'bedrock';
|
|
1463
|
+
region?: string;
|
|
1464
|
+
credentials?: {
|
|
1465
|
+
accessKeyId: string;
|
|
1466
|
+
secretAccessKey: string;
|
|
1467
|
+
sessionToken?: string;
|
|
1468
|
+
};
|
|
1469
|
+
endpoint?: string;
|
|
1470
|
+
}
|
|
1471
|
+
/**
|
|
1472
|
+
* Claude CLI provider options
|
|
1473
|
+
* Uses the local Claude Code CLI instead of API keys
|
|
1474
|
+
*/
|
|
1475
|
+
export interface ClaudeCliOptions extends BaseAIOptions {
|
|
1476
|
+
type: 'claude-cli';
|
|
1477
|
+
/**
|
|
1478
|
+
* Optional custom path to claude binary
|
|
1479
|
+
* If not specified, will search in PATH
|
|
1480
|
+
*/
|
|
1481
|
+
cliPath?: string;
|
|
1482
|
+
}
|
|
1483
|
+
/**
|
|
1484
|
+
* Qwen3-TTS provider options
|
|
1485
|
+
* Uses Qwen3-TTS for text-to-speech synthesis
|
|
1486
|
+
*
|
|
1487
|
+
* TTS is co-located with ComfyUI for GPU sharing efficiency.
|
|
1488
|
+
*/
|
|
1489
|
+
export interface Qwen3TTSOptions extends BaseAIOptions {
|
|
1490
|
+
type: 'qwen3-tts';
|
|
1491
|
+
/**
|
|
1492
|
+
* TTS service endpoint URL
|
|
1493
|
+
* e.g., 'http://localhost:8880' or 'http://qwen-tts:8000'
|
|
1494
|
+
*/
|
|
1495
|
+
endpoint?: string;
|
|
1496
|
+
/**
|
|
1497
|
+
* Default model variant
|
|
1498
|
+
* - 'qwen3-tts-1.7b': Higher quality (4.54GB VRAM)
|
|
1499
|
+
* - 'qwen3-tts-0.6b': Faster, lower VRAM (2.52GB)
|
|
1500
|
+
*/
|
|
1501
|
+
defaultModel?: 'qwen3-tts-1.7b' | 'qwen3-tts-0.6b';
|
|
1502
|
+
/**
|
|
1503
|
+
* Default voice ID to use for synthesis
|
|
1504
|
+
*/
|
|
1505
|
+
defaultVoice?: string;
|
|
1506
|
+
/**
|
|
1507
|
+
* Default language for synthesis
|
|
1508
|
+
*/
|
|
1509
|
+
defaultLanguage?: string;
|
|
1510
|
+
/**
|
|
1511
|
+
* Rate limiting configuration for the local TTS adapter.
|
|
1512
|
+
* Reuses `BaseAIOptions.rateLimit` and reads `requestsPerMinute` / `maxConcurrent`.
|
|
1513
|
+
*/
|
|
1514
|
+
rateLimit?: AIRateLimitOptions;
|
|
1515
|
+
}
|
|
1516
|
+
/**
|
|
1517
|
+
* Union type for all provider options
|
|
1518
|
+
*/
|
|
1519
|
+
export type GetAIOptions = OpenAIOptions | LiteLLMOptions | BifrostOptions | OllamaOptions | GeminiOptions | AnthropicOptions | HuggingFaceOptions | BedrockOptions | ClaudeCliOptions | Qwen3TTSOptions;
|
|
1520
|
+
/**
|
|
1521
|
+
* Base error class for all AI operations.
|
|
1522
|
+
* Provider-specific errors are mapped to subclasses for structured error handling.
|
|
1523
|
+
*
|
|
1524
|
+
* @param message - Human-readable error description
|
|
1525
|
+
* @param code - Machine-readable error code (e.g., 'AUTH_ERROR', 'RATE_LIMIT')
|
|
1526
|
+
* @param provider - Provider that raised the error (e.g., 'openai', 'anthropic')
|
|
1527
|
+
* @param model - Model involved in the error, if applicable
|
|
1528
|
+
*/
|
|
1529
|
+
export declare class AIError extends Error {
|
|
1530
|
+
code: string;
|
|
1531
|
+
provider?: string | undefined;
|
|
1532
|
+
model?: string | undefined;
|
|
1533
|
+
retryable: boolean;
|
|
1534
|
+
constructor(message: string, code: string, provider?: string | undefined, model?: string | undefined, retryable?: boolean);
|
|
1535
|
+
}
|
|
1536
|
+
/**
|
|
1537
|
+
* Thrown when API key or credentials are invalid or missing.
|
|
1538
|
+
*
|
|
1539
|
+
* @param provider - Provider that rejected authentication
|
|
1540
|
+
*/
|
|
1541
|
+
export declare class AuthenticationError extends AIError {
|
|
1542
|
+
constructor(provider?: string);
|
|
1543
|
+
}
|
|
1544
|
+
/**
|
|
1545
|
+
* Thrown when the provider's rate limit has been exceeded.
|
|
1546
|
+
*
|
|
1547
|
+
* @param provider - Provider that enforced the rate limit
|
|
1548
|
+
* @param retryAfter - Seconds to wait before retrying, if provided by the API
|
|
1549
|
+
*/
|
|
1550
|
+
export declare class RateLimitError extends AIError {
|
|
1551
|
+
retryAfter?: number;
|
|
1552
|
+
constructor(provider?: string, retryAfter?: number);
|
|
1553
|
+
}
|
|
1554
|
+
/**
|
|
1555
|
+
* Thrown when the requested model does not exist or is not available.
|
|
1556
|
+
*
|
|
1557
|
+
* @param model - The model identifier that was not found
|
|
1558
|
+
* @param provider - Provider that was queried
|
|
1559
|
+
*/
|
|
1560
|
+
export declare class ModelNotFoundError extends AIError {
|
|
1561
|
+
constructor(model: string, provider?: string);
|
|
1562
|
+
}
|
|
1563
|
+
/**
|
|
1564
|
+
* Thrown when the input exceeds the model's maximum context window.
|
|
1565
|
+
*
|
|
1566
|
+
* @param provider - Provider that reported the error
|
|
1567
|
+
* @param model - Model whose context limit was exceeded
|
|
1568
|
+
*/
|
|
1569
|
+
export declare class ContextLengthError extends AIError {
|
|
1570
|
+
constructor(provider?: string, model?: string);
|
|
1571
|
+
}
|
|
1572
|
+
/**
|
|
1573
|
+
* Thrown when content is blocked by the provider's safety/content filters.
|
|
1574
|
+
*
|
|
1575
|
+
* @param provider - Provider that filtered the content
|
|
1576
|
+
* @param model - Model that triggered the filter
|
|
1577
|
+
*/
|
|
1578
|
+
export declare class ContentFilterError extends AIError {
|
|
1579
|
+
constructor(provider?: string, model?: string);
|
|
1580
|
+
}
|
|
1581
|
+
/**
|
|
1582
|
+
* Options for text-to-speech synthesis
|
|
1583
|
+
*/
|
|
1584
|
+
export interface TTSOptions {
|
|
1585
|
+
/**
|
|
1586
|
+
* TTS model to use (e.g., 'qwen3-tts-1.7b', 'qwen3-tts-0.6b')
|
|
1587
|
+
*/
|
|
1588
|
+
model?: string;
|
|
1589
|
+
/**
|
|
1590
|
+
* Voice ID or profile reference to use for synthesis
|
|
1591
|
+
*/
|
|
1592
|
+
voice?: string;
|
|
1593
|
+
/**
|
|
1594
|
+
* ISO language code (e.g., 'en-US', 'zh-CN')
|
|
1595
|
+
* Supported: Chinese, English, Japanese, Korean, German, French, Russian, Portuguese, Spanish, Italian
|
|
1596
|
+
*/
|
|
1597
|
+
language?: string;
|
|
1598
|
+
/**
|
|
1599
|
+
* Speech rate multiplier (0.5 - 2.0, default: 1.0)
|
|
1600
|
+
*/
|
|
1601
|
+
speed?: number;
|
|
1602
|
+
/**
|
|
1603
|
+
* Pitch adjustment in semitones (-20 to 20, default: 0)
|
|
1604
|
+
*/
|
|
1605
|
+
pitch?: number;
|
|
1606
|
+
/**
|
|
1607
|
+
* Output audio format
|
|
1608
|
+
*/
|
|
1609
|
+
outputFormat?: 'wav' | 'mp3' | 'ogg';
|
|
1610
|
+
/**
|
|
1611
|
+
* Whether to stream the audio output
|
|
1612
|
+
*/
|
|
1613
|
+
stream?: boolean;
|
|
1614
|
+
/**
|
|
1615
|
+
* Whether to include word-level timing information for lip-sync
|
|
1616
|
+
*/
|
|
1617
|
+
includeWordTimings?: boolean;
|
|
1618
|
+
}
|
|
1619
|
+
/**
|
|
1620
|
+
* Options for voice cloning from audio samples
|
|
1621
|
+
*/
|
|
1622
|
+
export interface VoiceCloneOptions {
|
|
1623
|
+
/**
|
|
1624
|
+
* Model to use for voice cloning
|
|
1625
|
+
*/
|
|
1626
|
+
model?: string;
|
|
1627
|
+
/**
|
|
1628
|
+
* Audio sample for cloning (3+ seconds recommended)
|
|
1629
|
+
* Can be a Buffer or base64-encoded string
|
|
1630
|
+
*/
|
|
1631
|
+
sampleAudio: Buffer | string;
|
|
1632
|
+
/**
|
|
1633
|
+
* MIME type of the sample audio (e.g., 'audio/wav', 'audio/mp3')
|
|
1634
|
+
*/
|
|
1635
|
+
sampleMimeType?: string;
|
|
1636
|
+
/**
|
|
1637
|
+
* Name for the cloned voice profile
|
|
1638
|
+
*/
|
|
1639
|
+
name?: string;
|
|
1640
|
+
/**
|
|
1641
|
+
* Description of the voice
|
|
1642
|
+
*/
|
|
1643
|
+
description?: string;
|
|
1644
|
+
/**
|
|
1645
|
+
* Language of the voice sample
|
|
1646
|
+
*/
|
|
1647
|
+
language?: string;
|
|
1648
|
+
}
|
|
1649
|
+
/**
|
|
1650
|
+
* Options for voice design via natural language description
|
|
1651
|
+
*/
|
|
1652
|
+
export interface VoiceDesignOptions {
|
|
1653
|
+
/**
|
|
1654
|
+
* Model to use for voice design
|
|
1655
|
+
*/
|
|
1656
|
+
model?: string;
|
|
1657
|
+
/**
|
|
1658
|
+
* Natural language description of the desired voice
|
|
1659
|
+
* e.g., "warm female voice with slight British accent, professional news anchor tone"
|
|
1660
|
+
*/
|
|
1661
|
+
description: string;
|
|
1662
|
+
/**
|
|
1663
|
+
* Primary language for the voice
|
|
1664
|
+
*/
|
|
1665
|
+
language?: string;
|
|
1666
|
+
/**
|
|
1667
|
+
* Target gender for the voice
|
|
1668
|
+
*/
|
|
1669
|
+
gender?: 'male' | 'female' | 'neutral';
|
|
1670
|
+
}
|
|
1671
|
+
/**
|
|
1672
|
+
* Word timing information for lip-sync alignment
|
|
1673
|
+
*/
|
|
1674
|
+
export interface WordTiming {
|
|
1675
|
+
/**
|
|
1676
|
+
* The word or phoneme
|
|
1677
|
+
*/
|
|
1678
|
+
word: string;
|
|
1679
|
+
/**
|
|
1680
|
+
* Start time in seconds
|
|
1681
|
+
*/
|
|
1682
|
+
start: number;
|
|
1683
|
+
/**
|
|
1684
|
+
* End time in seconds
|
|
1685
|
+
*/
|
|
1686
|
+
end: number;
|
|
1687
|
+
}
|
|
1688
|
+
/**
|
|
1689
|
+
* Response from text-to-speech synthesis
|
|
1690
|
+
*/
|
|
1691
|
+
export interface TTSResponse {
|
|
1692
|
+
/**
|
|
1693
|
+
* Generated audio data
|
|
1694
|
+
*/
|
|
1695
|
+
audio: Buffer;
|
|
1696
|
+
/**
|
|
1697
|
+
* MIME type of the audio (e.g., 'audio/wav', 'audio/mp3')
|
|
1698
|
+
*/
|
|
1699
|
+
mimeType: string;
|
|
1700
|
+
/**
|
|
1701
|
+
* Duration of the audio in seconds
|
|
1702
|
+
*/
|
|
1703
|
+
duration: number;
|
|
1704
|
+
/**
|
|
1705
|
+
* Word-level timing information for lip-sync (if requested)
|
|
1706
|
+
*/
|
|
1707
|
+
wordTimings?: WordTiming[];
|
|
1708
|
+
/**
|
|
1709
|
+
* Model used for generation
|
|
1710
|
+
*/
|
|
1711
|
+
model?: string;
|
|
1712
|
+
/**
|
|
1713
|
+
* Sample rate in Hz (e.g., 22050, 44100)
|
|
1714
|
+
*/
|
|
1715
|
+
sampleRate?: number;
|
|
1716
|
+
}
|
|
1717
|
+
/**
|
|
1718
|
+
* Voice profile information
|
|
1719
|
+
*/
|
|
1720
|
+
export interface Voice {
|
|
1721
|
+
/**
|
|
1722
|
+
* Unique identifier for the voice
|
|
1723
|
+
*/
|
|
1724
|
+
id: string;
|
|
1725
|
+
/**
|
|
1726
|
+
* Human-readable name for the voice
|
|
1727
|
+
*/
|
|
1728
|
+
name: string;
|
|
1729
|
+
/**
|
|
1730
|
+
* Primary language of the voice (ISO code)
|
|
1731
|
+
*/
|
|
1732
|
+
language: string;
|
|
1733
|
+
/**
|
|
1734
|
+
* Gender of the voice
|
|
1735
|
+
*/
|
|
1736
|
+
gender?: 'male' | 'female' | 'neutral';
|
|
1737
|
+
/**
|
|
1738
|
+
* Description of the voice characteristics
|
|
1739
|
+
*/
|
|
1740
|
+
description?: string;
|
|
1741
|
+
/**
|
|
1742
|
+
* Whether this is a cloned voice
|
|
1743
|
+
*/
|
|
1744
|
+
isCloned?: boolean;
|
|
1745
|
+
/**
|
|
1746
|
+
* Whether this was designed via natural language
|
|
1747
|
+
*/
|
|
1748
|
+
isDesigned?: boolean;
|
|
1749
|
+
/**
|
|
1750
|
+
* URL to a sample of this voice (if available)
|
|
1751
|
+
*/
|
|
1752
|
+
sampleUrl?: string;
|
|
1753
|
+
/**
|
|
1754
|
+
* Provider-specific voice data/embedding
|
|
1755
|
+
*/
|
|
1756
|
+
voiceData?: Record<string, any>;
|
|
1757
|
+
}
|
|
1758
|
+
/**
|
|
1759
|
+
* Options for listing available voices
|
|
1760
|
+
*/
|
|
1761
|
+
export interface VoiceListOptions {
|
|
1762
|
+
/**
|
|
1763
|
+
* Filter by language
|
|
1764
|
+
*/
|
|
1765
|
+
language?: string;
|
|
1766
|
+
/**
|
|
1767
|
+
* Filter by gender
|
|
1768
|
+
*/
|
|
1769
|
+
gender?: 'male' | 'female' | 'neutral';
|
|
1770
|
+
/**
|
|
1771
|
+
* Include cloned voices
|
|
1772
|
+
*/
|
|
1773
|
+
includeCloned?: boolean;
|
|
1774
|
+
/**
|
|
1775
|
+
* Include designed voices
|
|
1776
|
+
*/
|
|
1777
|
+
includeDesigned?: boolean;
|
|
1778
|
+
}
|
|
1779
|
+
//# sourceMappingURL=types.d.ts.map
|