@happyvertical/ai 0.74.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/AGENT.md +33 -0
  2. package/LICENSE +7 -0
  3. package/README.md +384 -0
  4. package/dist/chunks/anthropic-BRwbhwIl.js +463 -0
  5. package/dist/chunks/anthropic-BRwbhwIl.js.map +1 -0
  6. package/dist/chunks/bedrock-Cf1xUerN.js +808 -0
  7. package/dist/chunks/bedrock-Cf1xUerN.js.map +1 -0
  8. package/dist/chunks/bifrost-3mXtQsTj.js +233 -0
  9. package/dist/chunks/bifrost-3mXtQsTj.js.map +1 -0
  10. package/dist/chunks/claude-cli-BrHRfkry.js +603 -0
  11. package/dist/chunks/claude-cli-BrHRfkry.js.map +1 -0
  12. package/dist/chunks/gateway-admin-C4GFPbZF.js +359 -0
  13. package/dist/chunks/gateway-admin-C4GFPbZF.js.map +1 -0
  14. package/dist/chunks/gemini-BfpHXDIQ.js +662 -0
  15. package/dist/chunks/gemini-BfpHXDIQ.js.map +1 -0
  16. package/dist/chunks/huggingface-280qv9iv.js +366 -0
  17. package/dist/chunks/huggingface-280qv9iv.js.map +1 -0
  18. package/dist/chunks/index-BT4thAvS.js +934 -0
  19. package/dist/chunks/index-BT4thAvS.js.map +1 -0
  20. package/dist/chunks/litellm-DhPKa_Jz.js +220 -0
  21. package/dist/chunks/litellm-DhPKa_Jz.js.map +1 -0
  22. package/dist/chunks/ollama-Di1ldur0.js +851 -0
  23. package/dist/chunks/ollama-Di1ldur0.js.map +1 -0
  24. package/dist/chunks/openai-5snI2diE.js +749 -0
  25. package/dist/chunks/openai-5snI2diE.js.map +1 -0
  26. package/dist/chunks/qwen-tts-DgPgdXxG.js +365 -0
  27. package/dist/chunks/qwen-tts-DgPgdXxG.js.map +1 -0
  28. package/dist/chunks/usage-DMWiJ2oB.js +21 -0
  29. package/dist/chunks/usage-DMWiJ2oB.js.map +1 -0
  30. package/dist/cli/claude-context.d.ts +3 -0
  31. package/dist/cli/claude-context.d.ts.map +1 -0
  32. package/dist/cli/claude-context.js +21 -0
  33. package/dist/cli/claude-context.js.map +1 -0
  34. package/dist/index.d.ts +20 -0
  35. package/dist/index.d.ts.map +1 -0
  36. package/dist/index.js +21 -0
  37. package/dist/index.js.map +1 -0
  38. package/dist/node/factory.d.ts +27 -0
  39. package/dist/node/factory.d.ts.map +1 -0
  40. package/dist/shared/client.d.ts +410 -0
  41. package/dist/shared/client.d.ts.map +1 -0
  42. package/dist/shared/factory.d.ts +83 -0
  43. package/dist/shared/factory.d.ts.map +1 -0
  44. package/dist/shared/message.d.ts +71 -0
  45. package/dist/shared/message.d.ts.map +1 -0
  46. package/dist/shared/providers/anthropic.d.ts +82 -0
  47. package/dist/shared/providers/anthropic.d.ts.map +1 -0
  48. package/dist/shared/providers/bedrock.d.ts +49 -0
  49. package/dist/shared/providers/bedrock.d.ts.map +1 -0
  50. package/dist/shared/providers/bifrost.d.ts +25 -0
  51. package/dist/shared/providers/bifrost.d.ts.map +1 -0
  52. package/dist/shared/providers/claude-cli.d.ts +139 -0
  53. package/dist/shared/providers/claude-cli.d.ts.map +1 -0
  54. package/dist/shared/providers/gateway-admin.d.ts +35 -0
  55. package/dist/shared/providers/gateway-admin.d.ts.map +1 -0
  56. package/dist/shared/providers/gemini.d.ts +116 -0
  57. package/dist/shared/providers/gemini.d.ts.map +1 -0
  58. package/dist/shared/providers/huggingface.d.ts +33 -0
  59. package/dist/shared/providers/huggingface.d.ts.map +1 -0
  60. package/dist/shared/providers/litellm.d.ts +25 -0
  61. package/dist/shared/providers/litellm.d.ts.map +1 -0
  62. package/dist/shared/providers/ollama.d.ts +47 -0
  63. package/dist/shared/providers/ollama.d.ts.map +1 -0
  64. package/dist/shared/providers/openai.d.ts +272 -0
  65. package/dist/shared/providers/openai.d.ts.map +1 -0
  66. package/dist/shared/providers/qwen-tts.d.ts +85 -0
  67. package/dist/shared/providers/qwen-tts.d.ts.map +1 -0
  68. package/dist/shared/providers/usage.d.ts +14 -0
  69. package/dist/shared/providers/usage.d.ts.map +1 -0
  70. package/dist/shared/rate-limit.d.ts +13 -0
  71. package/dist/shared/rate-limit.d.ts.map +1 -0
  72. package/dist/shared/thread.d.ts +104 -0
  73. package/dist/shared/thread.d.ts.map +1 -0
  74. package/dist/shared/types.d.ts +1779 -0
  75. package/dist/shared/types.d.ts.map +1 -0
  76. package/metadata.json +35 -0
  77. package/package.json +62 -0
@@ -0,0 +1,1779 @@
1
+ /**
2
+ * Core types and interfaces for the AI library
3
+ */
4
+ /**
5
+ * Thinking level options for Gemini 3 models
6
+ * Controls the amount of internal reasoning the model performs
7
+ */
8
+ export type GeminiThinkingLevel = 'minimal' | 'low' | 'medium' | 'high';
9
+ /**
10
+ * Supported AI provider types
11
+ */
12
+ export declare const AI_PROVIDER_TYPES: readonly ["openai", "litellm", "bifrost", "ollama", "gemini", "anthropic", "huggingface", "bedrock", "claude-cli", "qwen3-tts"];
13
+ /**
14
+ * Supported AI provider type union
15
+ */
16
+ export type AIProviderType = (typeof AI_PROVIDER_TYPES)[number];
17
+ /**
18
+ * Text content part for multimodal messages
19
+ */
20
+ export interface TextContentPart {
21
+ type: 'text';
22
+ text: string;
23
+ }
24
+ /**
25
+ * Image content part for vision-capable models
26
+ */
27
+ export interface ImageContentPart {
28
+ type: 'image_url';
29
+ image_url: {
30
+ /** Image URL (http/https) or base64 data URL */
31
+ url: string;
32
+ /** Image detail level for processing */
33
+ detail?: 'auto' | 'low' | 'high';
34
+ };
35
+ }
36
+ /**
37
+ * Union type for all content parts in multimodal messages
38
+ */
39
+ export type ContentPart = TextContentPart | ImageContentPart;
40
+ /**
41
+ * Extract text content from a message content field.
42
+ *
43
+ * Handles both simple string content and multimodal content arrays,
44
+ * extracting only the text parts and concatenating them.
45
+ *
46
+ * @param content - The message content (string or ContentPart array)
47
+ * @returns The extracted text content
48
+ */
49
+ export declare function extractTextContent(content: string | ContentPart[]): string;
50
+ /**
51
+ * AI message structure for chat interactions
52
+ *
53
+ * Supports both simple string content and multimodal content arrays
54
+ * for vision-capable models.
55
+ *
56
+ * @example Simple text message
57
+ * ```typescript
58
+ * const message: AIMessage = {
59
+ * role: 'user',
60
+ * content: 'Hello, how are you?'
61
+ * };
62
+ * ```
63
+ *
64
+ * @example Multimodal message with image
65
+ * ```typescript
66
+ * const message: AIMessage = {
67
+ * role: 'user',
68
+ * content: [
69
+ * { type: 'text', text: 'What is in this image?' },
70
+ * { type: 'image_url', image_url: { url: 'data:image/png;base64,...' } }
71
+ * ]
72
+ * };
73
+ * ```
74
+ */
75
+ export interface AIMessage {
76
+ /**
77
+ * Role of the message sender
78
+ */
79
+ role: 'system' | 'user' | 'assistant' | 'function' | 'tool';
80
+ /**
81
+ * Content of the message.
82
+ *
83
+ * Can be a simple string for text-only messages, or an array of content parts
84
+ * for multimodal messages (e.g., text + images for vision models).
85
+ */
86
+ content: string | ContentPart[];
87
+ /**
88
+ * Optional name for the message sender
89
+ */
90
+ name?: string;
91
+ /**
92
+ * Optional tool calls
93
+ */
94
+ tool_calls?: Array<{
95
+ id: string;
96
+ type: 'function';
97
+ function: {
98
+ name: string;
99
+ arguments: string;
100
+ };
101
+ }>;
102
+ }
103
+ /**
104
+ * Options for chat completion requests
105
+ */
106
+ export interface ChatOptions {
107
+ /**
108
+ * Model to use for completion
109
+ */
110
+ model?: string;
111
+ /**
112
+ * Maximum number of tokens to generate
113
+ */
114
+ maxTokens?: number;
115
+ /**
116
+ * Sampling temperature (0-2)
117
+ */
118
+ temperature?: number;
119
+ /**
120
+ * Top-p sampling parameter
121
+ */
122
+ topP?: number;
123
+ /**
124
+ * Number of completions to generate
125
+ */
126
+ n?: number;
127
+ /**
128
+ * Sequences that stop generation
129
+ */
130
+ stop?: string | string[];
131
+ /**
132
+ * Whether to stream the response
133
+ */
134
+ stream?: boolean;
135
+ /**
136
+ * Penalty for frequency of tokens
137
+ */
138
+ frequencyPenalty?: number;
139
+ /**
140
+ * Penalty for presence of tokens
141
+ */
142
+ presencePenalty?: number;
143
+ /**
144
+ * User identifier for monitoring
145
+ */
146
+ user?: string;
147
+ /**
148
+ * Available tools/functions
149
+ */
150
+ tools?: AITool[];
151
+ /**
152
+ * Tool choice behavior
153
+ */
154
+ toolChoice?: 'auto' | 'none' | {
155
+ type: 'function';
156
+ function: {
157
+ name: string;
158
+ };
159
+ };
160
+ /**
161
+ * Response format specification
162
+ */
163
+ responseFormat?: {
164
+ type: 'text' | 'json_object';
165
+ };
166
+ /**
167
+ * Random seed for deterministic results
168
+ */
169
+ seed?: number;
170
+ /**
171
+ * Callback for streaming responses
172
+ */
173
+ onProgress?: (chunk: string) => void;
174
+ /**
175
+ * Thinking level for providers that expose reasoning controls.
176
+ * Gemini 3 models use named levels:
177
+ * - 'minimal': No thinking for most queries (Gemini 3 Flash only)
178
+ * - 'low': Minimizes latency and cost, good for simple tasks
179
+ * - 'medium': Balanced thinking for most tasks (Gemini 3 Flash only)
180
+ * - 'high': Maximizes reasoning depth (default for Gemini 3)
181
+ *
182
+ * Ollama also accepts `false` to explicitly disable visible/internal thinking
183
+ * for models that support it.
184
+ */
185
+ thinkingLevel?: GeminiThinkingLevel | false;
186
+ /**
187
+ * Whether to include the model's internal thoughts in the response
188
+ * Only applicable for Gemini 3 models with thinking enabled
189
+ */
190
+ includeThoughts?: boolean;
191
+ /**
192
+ * Custom tags to attach to the usage event for this call.
193
+ * Merged over any global `usageTags` from provider options.
194
+ */
195
+ usageTags?: Record<string, string>;
196
+ }
197
+ /**
198
+ * Options for text completion requests (non-chat models)
199
+ */
200
+ export interface CompletionOptions {
201
+ /**
202
+ * Model to use for completion
203
+ */
204
+ model?: string;
205
+ /**
206
+ * Maximum number of tokens to generate
207
+ */
208
+ maxTokens?: number;
209
+ /**
210
+ * Sampling temperature
211
+ */
212
+ temperature?: number;
213
+ /**
214
+ * Top-p sampling parameter
215
+ */
216
+ topP?: number;
217
+ /**
218
+ * Number of completions to generate
219
+ */
220
+ n?: number;
221
+ /**
222
+ * Sequences that stop generation
223
+ */
224
+ stop?: string | string[];
225
+ /**
226
+ * Whether to stream the response
227
+ */
228
+ stream?: boolean;
229
+ /**
230
+ * Callback for streaming responses
231
+ */
232
+ onProgress?: (chunk: string) => void;
233
+ /**
234
+ * Custom tags to attach to the usage event for this call.
235
+ * Merged over any global `usageTags` from provider options.
236
+ */
237
+ usageTags?: Record<string, string>;
238
+ }
239
+ /**
240
+ * Options for embedding generation
241
+ */
242
+ export interface EmbeddingOptions {
243
+ /**
244
+ * Model to use for embeddings
245
+ */
246
+ model?: string;
247
+ /**
248
+ * User identifier for monitoring
249
+ */
250
+ user?: string;
251
+ /**
252
+ * Encoding format for embeddings
253
+ */
254
+ encodingFormat?: 'float' | 'base64';
255
+ /**
256
+ * Number of dimensions for the embedding
257
+ */
258
+ dimensions?: number;
259
+ /**
260
+ * Custom tags to attach to the usage event for this call.
261
+ * Merged over any global `usageTags` from provider options.
262
+ */
263
+ usageTags?: Record<string, string>;
264
+ }
265
+ /**
266
+ * Options for image embedding generation
267
+ */
268
+ export interface ImageEmbeddingOptions {
269
+ /**
270
+ * Model to use for image embeddings
271
+ * - Gemini: 'multimodalembedding@001' or similar
272
+ * - OpenAI: Uses describe-then-embed with text-embedding-3-small
273
+ */
274
+ model?: string;
275
+ /**
276
+ * Number of dimensions for the embedding output
277
+ */
278
+ dimensions?: number;
279
+ /**
280
+ * User identifier for monitoring
281
+ */
282
+ user?: string;
283
+ }
284
+ /**
285
+ * Options for image description generation
286
+ */
287
+ export interface ImageDescriptionOptions {
288
+ /**
289
+ * Model to use for image description
290
+ * - OpenAI: defaults to 'gpt-4o'
291
+ * - Gemini: defaults to 'gemini-2.5-flash'
292
+ */
293
+ model?: string;
294
+ /**
295
+ * Maximum tokens for the description
296
+ */
297
+ maxTokens?: number;
298
+ /**
299
+ * Detail level for image processing (OpenAI-specific)
300
+ */
301
+ detail?: 'auto' | 'low' | 'high';
302
+ }
303
+ /**
304
+ * Options for image generation
305
+ */
306
+ export interface ImageGenerationOptions {
307
+ /**
308
+ * Model to use for image generation
309
+ * - OpenAI: 'dall-e-3' (default), 'dall-e-2'
310
+ * - Gemini: 'imagen-3.0-generate-002' (default)
311
+ */
312
+ model?: string;
313
+ /**
314
+ * Input image for image-to-image workflows
315
+ * Can be a URL (http/https), base64 data URL, or Buffer
316
+ */
317
+ imageInput?: string | Buffer;
318
+ /**
319
+ * Aspect ratio for the generated image
320
+ * e.g., "16:9", "1:1", "4:3", "3:4", "9:16"
321
+ */
322
+ aspectRatio?: string;
323
+ /**
324
+ * Output format for the generated image
325
+ * - 'buffer': Returns raw image bytes (default)
326
+ * - 'base64': Returns base64-encoded string
327
+ * - 'url': Returns temporary URL (provider-dependent, may expire)
328
+ */
329
+ outputFormat?: 'buffer' | 'base64' | 'url';
330
+ /**
331
+ * Number of images to generate (provider-dependent)
332
+ * - DALL-E 3: Only 1 supported
333
+ * - Imagen 3: 1-4 supported
334
+ */
335
+ n?: number;
336
+ /**
337
+ * Image style (OpenAI DALL-E 3 specific)
338
+ */
339
+ style?: 'vivid' | 'natural';
340
+ /**
341
+ * Quality setting
342
+ * - OpenAI: 'standard' | 'hd'
343
+ */
344
+ quality?: string;
345
+ /**
346
+ * Size specification (for providers that use fixed sizes)
347
+ * - OpenAI DALL-E 3: '1024x1024' | '1792x1024' | '1024x1792'
348
+ */
349
+ size?: string;
350
+ }
351
+ /**
352
+ * Response from image generation
353
+ */
354
+ export interface ImageGenerationResponse {
355
+ /**
356
+ * Generated image(s) - format depends on outputFormat option
357
+ */
358
+ images: Array<{
359
+ /**
360
+ * Image data - Buffer for 'buffer' format, string for 'base64' or 'url'
361
+ */
362
+ data: Buffer | string;
363
+ /**
364
+ * MIME type of the image (e.g., 'image/png', 'image/jpeg')
365
+ */
366
+ mimeType: string;
367
+ /**
368
+ * Revised prompt (if provider modified the original)
369
+ */
370
+ revisedPrompt?: string;
371
+ }>;
372
+ /**
373
+ * Model used for generation
374
+ */
375
+ model?: string;
376
+ }
377
+ /**
378
+ * Options for simple message requests (convenience method)
379
+ * This provides a simpler interface than chat() for single-turn interactions
380
+ */
381
+ export interface MessageOptions {
382
+ /**
383
+ * Model to use for completion
384
+ */
385
+ model?: string;
386
+ /**
387
+ * Role of the message sender (default: 'user')
388
+ */
389
+ role?: 'user' | 'assistant' | 'system';
390
+ /**
391
+ * Conversation history (previous messages)
392
+ */
393
+ history?: AIMessage[];
394
+ /**
395
+ * Maximum number of tokens to generate
396
+ */
397
+ maxTokens?: number;
398
+ /**
399
+ * Sampling temperature (0-2)
400
+ */
401
+ temperature?: number;
402
+ /**
403
+ * Top-p sampling parameter
404
+ */
405
+ topP?: number;
406
+ /**
407
+ * Sequences that stop generation
408
+ */
409
+ stop?: string | string[];
410
+ /**
411
+ * Whether to stream the response
412
+ */
413
+ stream?: boolean;
414
+ /**
415
+ * Penalty for frequency of tokens
416
+ */
417
+ frequencyPenalty?: number;
418
+ /**
419
+ * Penalty for presence of tokens
420
+ */
421
+ presencePenalty?: number;
422
+ /**
423
+ * Response format specification
424
+ */
425
+ responseFormat?: {
426
+ type: 'text' | 'json_object';
427
+ };
428
+ /**
429
+ * Random seed for deterministic results
430
+ */
431
+ seed?: number;
432
+ /**
433
+ * Available tools/functions
434
+ */
435
+ tools?: AITool[];
436
+ /**
437
+ * Tool choice behavior
438
+ */
439
+ toolChoice?: 'auto' | 'none' | {
440
+ type: 'function';
441
+ function: {
442
+ name: string;
443
+ };
444
+ };
445
+ /**
446
+ * Callback for streaming responses
447
+ */
448
+ onProgress?: (chunk: string) => void;
449
+ /**
450
+ * Custom tags to attach to the usage event for this call.
451
+ * Merged over any global `usageTags` from provider options.
452
+ */
453
+ usageTags?: Record<string, string>;
454
+ }
455
+ /**
456
+ * Tool/function definition for AI models
457
+ */
458
+ export interface AITool {
459
+ /**
460
+ * Type of tool
461
+ */
462
+ type: 'function';
463
+ /**
464
+ * Function definition
465
+ */
466
+ function: {
467
+ /**
468
+ * Function name
469
+ */
470
+ name: string;
471
+ /**
472
+ * Function description
473
+ */
474
+ description?: string;
475
+ /**
476
+ * JSON schema for function parameters
477
+ */
478
+ parameters?: Record<string, any>;
479
+ };
480
+ }
481
+ /**
482
+ * Model information structure
483
+ */
484
+ export interface AIModel {
485
+ /**
486
+ * Model identifier
487
+ */
488
+ id: string;
489
+ /**
490
+ * Human-readable model name
491
+ */
492
+ name: string;
493
+ /**
494
+ * Model description
495
+ */
496
+ description?: string;
497
+ /**
498
+ * Maximum context length in tokens
499
+ */
500
+ contextLength: number;
501
+ /**
502
+ * Supported capabilities
503
+ */
504
+ capabilities: string[];
505
+ /**
506
+ * Whether the model supports function calling
507
+ */
508
+ supportsFunctions: boolean;
509
+ /**
510
+ * Whether the model supports vision/multimodal input
511
+ */
512
+ supportsVision: boolean;
513
+ /**
514
+ * Cost per input token (if available)
515
+ */
516
+ inputCostPer1k?: number;
517
+ /**
518
+ * Cost per output token (if available)
519
+ */
520
+ outputCostPer1k?: number;
521
+ }
522
+ /**
523
+ * Budget configuration for AI gateway admin operations.
524
+ *
525
+ * Providers translate this to their native field names:
526
+ * - Bifrost: `budget.max_limit` / `budget.reset_duration`
527
+ * - LiteLLM: `max_budget` / `budget_duration`
528
+ */
529
+ export interface AIAdminBudget {
530
+ /**
531
+ * Maximum spend in USD.
532
+ */
533
+ maxLimit?: number;
534
+ /**
535
+ * Reset duration such as `1h`, `1d`, `30d`, or `1M`.
536
+ */
537
+ resetDuration?: string;
538
+ /**
539
+ * Bifrost only: reset at calendar boundaries for day/week/month/year periods.
540
+ */
541
+ calendarAligned?: boolean;
542
+ }
543
+ /**
544
+ * Rate-limit configuration for AI gateway admin operations.
545
+ */
546
+ export interface AIAdminRateLimit {
547
+ /**
548
+ * Provider-agnostic token limit.
549
+ *
550
+ * Bifrost maps this to `token_max_limit`; LiteLLM maps it to `tpm_limit`.
551
+ */
552
+ tokenMaxLimit?: number;
553
+ /**
554
+ * Bifrost token reset duration such as `1h`.
555
+ */
556
+ tokenResetDuration?: string;
557
+ /**
558
+ * Provider-agnostic request limit.
559
+ *
560
+ * Bifrost maps this to `request_max_limit`; LiteLLM maps it to `rpm_limit`.
561
+ */
562
+ requestMaxLimit?: number;
563
+ /**
564
+ * Bifrost request reset duration such as `1m`.
565
+ */
566
+ requestResetDuration?: string;
567
+ /**
568
+ * LiteLLM tokens-per-minute limit. Overrides `tokenMaxLimit` for LiteLLM.
569
+ */
570
+ tpmLimit?: number;
571
+ /**
572
+ * LiteLLM requests-per-minute limit. Overrides `requestMaxLimit` for LiteLLM.
573
+ */
574
+ rpmLimit?: number;
575
+ }
576
+ /**
577
+ * Bifrost virtual-key routing configuration.
578
+ */
579
+ export interface AIAdminProviderConfig {
580
+ /**
581
+ * Provider identifier such as `openai` or `anthropic`.
582
+ */
583
+ provider: string;
584
+ /**
585
+ * Routing weight for this provider.
586
+ */
587
+ weight?: number;
588
+ /**
589
+ * Models this virtual key may use for the provider.
590
+ */
591
+ allowedModels?: string[];
592
+ /**
593
+ * Bifrost provider key IDs that this virtual key may use.
594
+ */
595
+ keyIds?: string[];
596
+ }
597
+ /**
598
+ * Options for creating a gateway-scoped project.
599
+ *
600
+ * In Bifrost, projects are implemented as governance teams, optionally attached
601
+ * to a customer via `tenantId`. In LiteLLM, projects are implemented as teams.
602
+ */
603
+ export interface CreateAIProjectOptions {
604
+ /**
605
+ * Stable project ID. LiteLLM requires one; if omitted, a slug is derived from
606
+ * the tenant and project name. Bifrost generates its own team ID.
607
+ */
608
+ id?: string;
609
+ /**
610
+ * Human-readable project name.
611
+ */
612
+ name: string;
613
+ /**
614
+ * Tenant/customer identifier to attach the project to where supported.
615
+ */
616
+ tenantId?: string;
617
+ /**
618
+ * Human-readable description. Stored in metadata for providers that support it.
619
+ */
620
+ description?: string;
621
+ /**
622
+ * Models the project may access.
623
+ */
624
+ models?: string[];
625
+ /**
626
+ * Shared project budget.
627
+ */
628
+ budget?: AIAdminBudget;
629
+ /**
630
+ * Shared project rate limits.
631
+ */
632
+ rateLimit?: AIAdminRateLimit;
633
+ /**
634
+ * Provider-specific metadata.
635
+ */
636
+ metadata?: Record<string, unknown>;
637
+ /**
638
+ * Whether the project should be blocked on creation where supported.
639
+ */
640
+ isBlocked?: boolean;
641
+ /**
642
+ * Provider-specific request body overrides.
643
+ */
644
+ raw?: Record<string, unknown>;
645
+ }
646
+ /**
647
+ * Gateway project descriptor returned by admin providers.
648
+ */
649
+ export interface AIAdminProject {
650
+ /**
651
+ * Provider project ID.
652
+ */
653
+ id: string;
654
+ /**
655
+ * Human-readable project name.
656
+ */
657
+ name: string;
658
+ /**
659
+ * Tenant/customer identifier where available.
660
+ */
661
+ tenantId?: string;
662
+ /**
663
+ * Provider budget ID where available.
664
+ */
665
+ budgetId?: string;
666
+ /**
667
+ * Admin provider that created this project.
668
+ */
669
+ provider: string;
670
+ /**
671
+ * Raw provider response.
672
+ */
673
+ raw?: unknown;
674
+ }
675
+ /**
676
+ * Options for creating a gateway virtual key.
677
+ */
678
+ export interface CreateAIVirtualKeyOptions {
679
+ /**
680
+ * Human-readable key name or alias.
681
+ */
682
+ name: string;
683
+ /**
684
+ * Human-readable key description.
685
+ */
686
+ description?: string;
687
+ /**
688
+ * Project/team ID to attach the key to.
689
+ */
690
+ projectId?: string;
691
+ /**
692
+ * Tenant/customer ID to attach the key to when no project is supplied, or to
693
+ * record in LiteLLM metadata.
694
+ */
695
+ tenantId?: string;
696
+ /**
697
+ * Optional end-user ID associated with the key.
698
+ */
699
+ userId?: string;
700
+ /**
701
+ * Models this key may access.
702
+ */
703
+ models?: string[];
704
+ /**
705
+ * Bifrost provider routing configuration.
706
+ */
707
+ providerConfigs?: AIAdminProviderConfig[];
708
+ /**
709
+ * Key-level budget.
710
+ */
711
+ budget?: AIAdminBudget;
712
+ /**
713
+ * Key-level rate limits.
714
+ */
715
+ rateLimit?: AIAdminRateLimit;
716
+ /**
717
+ * Key duration such as `30d`, `1h`, or `permanent` where supported.
718
+ */
719
+ duration?: string;
720
+ /**
721
+ * Provider-specific metadata.
722
+ */
723
+ metadata?: Record<string, unknown>;
724
+ /**
725
+ * Bifrost provider API key IDs this virtual key may use. Use `["*"]` to allow
726
+ * all configured provider keys.
727
+ */
728
+ keyIds?: string[];
729
+ /**
730
+ * Whether the key should be active on creation.
731
+ */
732
+ isActive?: boolean;
733
+ /**
734
+ * LiteLLM model aliases for this key.
735
+ */
736
+ aliases?: Record<string, string>;
737
+ /**
738
+ * LiteLLM key-specific config.
739
+ */
740
+ config?: Record<string, unknown>;
741
+ /**
742
+ * LiteLLM key-specific permissions.
743
+ */
744
+ permissions?: Record<string, unknown>;
745
+ /**
746
+ * Provider-specific request body overrides.
747
+ */
748
+ raw?: Record<string, unknown>;
749
+ }
750
+ /**
751
+ * Gateway virtual key descriptor returned by admin providers.
752
+ */
753
+ export interface AIVirtualKey {
754
+ /**
755
+ * Provider key ID, when returned separately from the key value.
756
+ */
757
+ id?: string;
758
+ /**
759
+ * Human-readable key name or alias.
760
+ */
761
+ name?: string;
762
+ /**
763
+ * Newly generated key value. Some provider list/detail responses may only
764
+ * expose a masked value.
765
+ */
766
+ key?: string;
767
+ /**
768
+ * Masked key value or key name, when provided.
769
+ */
770
+ maskedKey?: string;
771
+ /**
772
+ * Attached project/team ID.
773
+ */
774
+ projectId?: string;
775
+ /**
776
+ * Attached tenant/customer ID.
777
+ */
778
+ tenantId?: string;
779
+ /**
780
+ * Expiration timestamp where supported.
781
+ */
782
+ expiresAt?: string;
783
+ /**
784
+ * Admin provider that created this key.
785
+ */
786
+ provider: string;
787
+ /**
788
+ * Raw provider response.
789
+ */
790
+ raw?: unknown;
791
+ }
792
+ /**
793
+ * Admin operations exposed by gateway providers that support provisioning.
794
+ */
795
+ export interface AIAdminInterface {
796
+ /**
797
+ * Create a project/team for a tenant.
798
+ */
799
+ createProject(options: CreateAIProjectOptions): Promise<AIAdminProject>;
800
+ /**
801
+ * Create a virtual key, optionally attached to a project or tenant.
802
+ */
803
+ createVirtualKey(options: CreateAIVirtualKeyOptions): Promise<AIVirtualKey>;
804
+ }
805
+ /**
806
+ * AI provider capabilities
807
+ */
808
+ export interface AICapabilities {
809
+ /**
810
+ * Whether the provider supports chat completions
811
+ */
812
+ chat: boolean;
813
+ /**
814
+ * Whether the provider supports text completions
815
+ */
816
+ completion: boolean;
817
+ /**
818
+ * Whether the provider supports embeddings
819
+ */
820
+ embeddings: boolean;
821
+ /**
822
+ * Whether the provider supports streaming
823
+ */
824
+ streaming: boolean;
825
+ /**
826
+ * Whether the provider supports function calling
827
+ */
828
+ functions: boolean;
829
+ /**
830
+ * Whether the provider supports vision/multimodal
831
+ */
832
+ vision: boolean;
833
+ /**
834
+ * Whether the provider supports fine-tuning
835
+ */
836
+ fineTuning: boolean;
837
+ /**
838
+ * Whether the provider supports image embeddings
839
+ */
840
+ imageEmbeddings: boolean;
841
+ /**
842
+ * Whether the provider supports image generation
843
+ */
844
+ imageGeneration: boolean;
845
+ /**
846
+ * Whether the provider supports text-to-speech synthesis
847
+ */
848
+ tts: boolean;
849
+ /**
850
+ * Whether the provider supports voice cloning from samples
851
+ */
852
+ voiceCloning: boolean;
853
+ /**
854
+ * Whether the provider supports voice design via description
855
+ */
856
+ voiceDesign: boolean;
857
+ /**
858
+ * Maximum context length supported
859
+ */
860
+ maxContextLength: number;
861
+ /**
862
+ * Supported operations
863
+ */
864
+ supportedOperations: string[];
865
+ }
866
+ /**
867
+ * Token usage information
868
+ */
869
+ export interface TokenUsage {
870
+ /**
871
+ * Number of prompt tokens
872
+ */
873
+ promptTokens: number;
874
+ /**
875
+ * Number of completion tokens
876
+ */
877
+ completionTokens: number;
878
+ /**
879
+ * Total tokens used
880
+ */
881
+ totalTokens: number;
882
+ }
883
+ /**
884
+ * Usage event emitted via the `onUsage` callback after each API call.
885
+ * Provides token usage, timing, and context for tracking and analytics.
886
+ *
887
+ * @example
888
+ * ```typescript
889
+ * const ai = await getAI({
890
+ * type: 'openai',
891
+ * apiKey: '...',
892
+ * onUsage: (event) => {
893
+ * console.log(`[${event.provider}/${event.model}] ${event.operation}: ${event.usage?.totalTokens} tokens in ${event.duration}ms`);
894
+ * },
895
+ * });
896
+ * ```
897
+ */
898
+ export interface UsageEvent {
899
+ /** Provider that handled the request (e.g. 'openai', 'anthropic', 'gemini') */
900
+ provider: string;
901
+ /** Model that was used (e.g. 'gpt-4o', 'claude-3-5-sonnet-20241022') */
902
+ model: string;
903
+ /** Operation type that generated this usage */
904
+ operation: 'chat' | 'complete' | 'message' | 'embed' | 'embedImage' | 'describeImage' | 'generateImage' | 'stream';
905
+ /** Token usage breakdown, if available from the provider */
906
+ usage?: TokenUsage;
907
+ /** Wall-clock duration of the API call in milliseconds */
908
+ duration: number;
909
+ /** Timestamp when the call completed */
910
+ timestamp: Date;
911
+ /** Custom tags from global `usageTags` and per-call `usageTags`, merged */
912
+ tags?: Record<string, string>;
913
+ }
914
+ /**
915
+ * AI response structure
916
+ */
917
+ export interface AIResponse {
918
+ /**
919
+ * Generated content
920
+ */
921
+ content: string;
922
+ /**
923
+ * Token usage information
924
+ */
925
+ usage?: TokenUsage;
926
+ /**
927
+ * Model used for generation
928
+ */
929
+ model?: string;
930
+ /**
931
+ * Finish reason
932
+ */
933
+ finishReason?: 'stop' | 'length' | 'tool_calls' | 'content_filter';
934
+ /**
935
+ * Tool calls made by the model
936
+ */
937
+ toolCalls?: Array<{
938
+ id: string;
939
+ type: 'function';
940
+ function: {
941
+ name: string;
942
+ arguments: string;
943
+ };
944
+ }>;
945
+ }
946
+ /**
947
+ * Embedding response structure
948
+ */
949
+ export interface EmbeddingResponse {
950
+ /**
951
+ * Generated embeddings
952
+ */
953
+ embeddings: number[][];
954
+ /**
955
+ * Token usage information
956
+ */
957
+ usage?: TokenUsage;
958
+ /**
959
+ * Model used for embeddings
960
+ */
961
+ model?: string;
962
+ }
963
+ /**
964
+ * Core AI interface that all providers must implement
965
+ */
966
+ export interface AIInterface {
967
+ /**
968
+ * Optional admin surface for gateway providers that support provisioning.
969
+ */
970
+ admin?: AIAdminInterface;
971
+ /**
972
+ * Generate a chat completion from a sequence of messages.
973
+ *
974
+ * @param messages - Conversation messages (system, user, assistant, tool roles)
975
+ * @param options - Chat options including model, temperature, tools, etc.
976
+ * @returns Promise resolving to the model's response with content and usage info
977
+ * @throws {AIError} When the request fails
978
+ * @throws {AuthenticationError} When credentials are invalid
979
+ * @throws {RateLimitError} When the provider's rate limit is exceeded
980
+ */
981
+ chat(messages: AIMessage[], options?: ChatOptions): Promise<AIResponse>;
982
+ /**
983
+ * Generate a text completion from a prompt string (non-chat interface).
984
+ *
985
+ * @param prompt - The text prompt to complete
986
+ * @param options - Completion options including model, temperature, etc.
987
+ * @returns Promise resolving to the model's response
988
+ * @throws {AIError} When the request fails
989
+ */
990
+ complete(prompt: string, options?: CompletionOptions): Promise<AIResponse>;
991
+ /**
992
+ * Simple message interface for single-turn interactions
993
+ *
994
+ * This is a convenience method that wraps chat() for simpler use cases.
995
+ * It accepts a text string and optional configuration, returning just
996
+ * the response content as a string.
997
+ *
998
+ * Supports conversation history via the `history` option for multi-turn
999
+ * conversations while maintaining a simple API.
1000
+ *
1001
+ * @param text - The message text to send
1002
+ * @param options - Configuration options including history, model, etc.
1003
+ * @returns Promise resolving to the response content string
1004
+ *
1005
+ * @example
1006
+ * ```typescript
1007
+ * // Simple single-turn usage
1008
+ * const response = await ai.message('Hello, how are you?');
1009
+ *
1010
+ * // With options
1011
+ * const response = await ai.message('Analyze this data', {
1012
+ * model: 'gpt-4o',
1013
+ * responseFormat: { type: 'json_object' },
1014
+ * maxTokens: 1000
1015
+ * });
1016
+ *
1017
+ * // With conversation history
1018
+ * const response = await ai.message('What did I ask before?', {
1019
+ * history: [
1020
+ * { role: 'user', content: 'Hello' },
1021
+ * { role: 'assistant', content: 'Hi there!' }
1022
+ * ]
1023
+ * });
1024
+ * ```
1025
+ */
1026
+ message(text: string, options?: MessageOptions): Promise<string>;
1027
+ /**
1028
+ * Generate vector embeddings for one or more text inputs.
1029
+ *
1030
+ * @param text - A single string or array of strings to embed
1031
+ * @param options - Embedding options including model and dimensions
1032
+ * @returns Promise resolving to embedding vectors and usage info
1033
+ * @throws {AIError} When embeddings are not supported by this provider or request fails
1034
+ */
1035
+ embed(text: string | string[], options?: EmbeddingOptions): Promise<EmbeddingResponse>;
1036
+ /**
1037
+ * Generate embeddings for an image
1038
+ *
1039
+ * Implementation varies by provider:
1040
+ * - Gemini: Uses native multimodal embeddings
1041
+ * - OpenAI: Uses describe-then-embed pattern (describeImage → embed)
1042
+ * - Others: Throws NOT_IMPLEMENTED
1043
+ *
1044
+ * @param image - Image as URL, base64 data URL, or Buffer
1045
+ * @param options - Optional configuration for image embeddings
1046
+ * @returns Promise resolving to embeddings response
1047
+ * @throws {AIError} When embeddings are not supported or request fails
1048
+ *
1049
+ * @example
1050
+ * ```typescript
1051
+ * // From URL
1052
+ * const embedding = await ai.embedImage('https://example.com/image.jpg');
1053
+ *
1054
+ * // From Buffer
1055
+ * const buffer = fs.readFileSync('image.png');
1056
+ * const embedding = await ai.embedImage(buffer);
1057
+ *
1058
+ * // With options
1059
+ * const embedding = await ai.embedImage(imageUrl, { dimensions: 768 });
1060
+ * ```
1061
+ */
1062
+ embedImage(image: string | Buffer, options?: ImageEmbeddingOptions): Promise<EmbeddingResponse>;
1063
+ /**
1064
+ * Generate a text description of an image
1065
+ *
1066
+ * @param image - Image as URL, base64 data URL, or Buffer
1067
+ * @param prompt - Custom prompt for description (optional)
1068
+ * @param options - Optional configuration
1069
+ * @returns Promise resolving to the description string
1070
+ * @throws {AIError} When vision is not supported or request fails
1071
+ *
1072
+ * @example
1073
+ * ```typescript
1074
+ * // Default description for search indexing
1075
+ * const description = await ai.describeImage('https://example.com/image.jpg');
1076
+ *
1077
+ * // Custom prompt
1078
+ * const description = await ai.describeImage(imageBuffer, 'What product is shown?');
1079
+ *
1080
+ * // With options
1081
+ * const description = await ai.describeImage(imageUrl, undefined, {
1082
+ * model: 'gpt-4o',
1083
+ * maxTokens: 500,
1084
+ * detail: 'high'
1085
+ * });
1086
+ * ```
1087
+ */
1088
+ describeImage(image: string | Buffer, prompt?: string, options?: ImageDescriptionOptions): Promise<string>;
1089
+ /**
1090
+ * Generate an image from a text prompt
1091
+ *
1092
+ * @param prompt - Text description of the image to generate
1093
+ * @param options - Optional configuration for image generation
1094
+ * @returns Promise resolving to generated image(s)
1095
+ * @throws {AIError} When image generation is not supported or request fails
1096
+ *
1097
+ * @example
1098
+ * ```typescript
1099
+ * // Basic generation (returns Buffer by default)
1100
+ * const result = await ai.generateImage('A sunset over mountains');
1101
+ * fs.writeFileSync('image.png', result.images[0].data);
1102
+ *
1103
+ * // With options
1104
+ * const result = await ai.generateImage('A cat wearing a hat', {
1105
+ * outputFormat: 'base64',
1106
+ * size: '1024x1024',
1107
+ * style: 'vivid'
1108
+ * });
1109
+ * ```
1110
+ */
1111
+ generateImage(prompt: string, options?: ImageGenerationOptions): Promise<ImageGenerationResponse>;
1112
+ /**
1113
+ * Stream a chat completion, yielding text chunks as they arrive.
1114
+ *
1115
+ * @param messages - Conversation messages
1116
+ * @param options - Chat options including model, temperature, etc.
1117
+ * @returns Async iterable of string chunks
1118
+ * @throws {AIError} When the request fails
1119
+ */
1120
+ stream(messages: AIMessage[], options?: ChatOptions): AsyncIterable<string>;
1121
+ /**
1122
+ * Estimate or calculate the token count for a text string.
1123
+ *
1124
+ * @param text - The text to tokenize
1125
+ * @returns Promise resolving to the token count
1126
+ */
1127
+ countTokens(text: string): Promise<number>;
1128
+ /**
1129
+ * List models available from this provider.
1130
+ *
1131
+ * @returns Promise resolving to an array of model descriptors
1132
+ */
1133
+ getModels(): Promise<AIModel[]>;
1134
+ /**
1135
+ * Query the capabilities supported by this provider (chat, embeddings, vision, TTS, etc.).
1136
+ *
1137
+ * @returns Promise resolving to a capabilities descriptor
1138
+ */
1139
+ getCapabilities(): Promise<AICapabilities>;
1140
+ /**
1141
+ * Synthesize speech from text
1142
+ *
1143
+ * @param text - The text to synthesize into speech
1144
+ * @param options - Optional configuration for TTS synthesis
1145
+ * @returns Promise resolving to audio data with metadata
1146
+ * @throws {AIError} When TTS is not supported or request fails
1147
+ *
1148
+ * @example
1149
+ * ```typescript
1150
+ * // Basic synthesis
1151
+ * const result = await ai.synthesizeSpeech('Hello, world!');
1152
+ * fs.writeFileSync('speech.wav', result.audio);
1153
+ *
1154
+ * // With options
1155
+ * const result = await ai.synthesizeSpeech('News broadcast text', {
1156
+ * voice: 'news-anchor-1',
1157
+ * speed: 1.1,
1158
+ * includeWordTimings: true
1159
+ * });
1160
+ * console.log(`Duration: ${result.duration}s`);
1161
+ * ```
1162
+ */
1163
+ synthesizeSpeech(text: string, options?: TTSOptions): Promise<TTSResponse>;
1164
+ /**
1165
+ * Stream speech synthesis for real-time playback
1166
+ *
1167
+ * @param text - The text to synthesize into speech
1168
+ * @param options - Optional configuration for TTS synthesis
1169
+ * @returns AsyncIterable of audio chunks
1170
+ * @throws {AIError} When TTS streaming is not supported or request fails
1171
+ *
1172
+ * @example
1173
+ * ```typescript
1174
+ * const chunks: Buffer[] = [];
1175
+ * for await (const chunk of ai.streamSpeech('Long text...')) {
1176
+ * chunks.push(chunk);
1177
+ * // Or stream directly to audio output
1178
+ * }
1179
+ * ```
1180
+ */
1181
+ streamSpeech(text: string, options?: TTSOptions): AsyncIterable<Buffer>;
1182
+ /**
1183
+ * Clone a voice from an audio sample
1184
+ *
1185
+ * Creates a new voice profile from a 3+ second audio sample.
1186
+ * The cloned voice can be used in subsequent synthesizeSpeech calls.
1187
+ *
1188
+ * @param options - Voice cloning configuration including audio sample
1189
+ * @returns Promise resolving to the cloned voice profile
1190
+ * @throws {AIError} When voice cloning is not supported or request fails
1191
+ *
1192
+ * @example
1193
+ * ```typescript
1194
+ * const sample = fs.readFileSync('voice-sample.wav');
1195
+ * const voice = await ai.cloneVoice({
1196
+ * sampleAudio: sample,
1197
+ * name: 'News Anchor Voice',
1198
+ * language: 'en-US'
1199
+ * });
1200
+ *
1201
+ * // Use the cloned voice
1202
+ * const speech = await ai.synthesizeSpeech('Breaking news...', {
1203
+ * voice: voice.id
1204
+ * });
1205
+ * ```
1206
+ */
1207
+ cloneVoice(options: VoiceCloneOptions): Promise<Voice>;
1208
+ /**
1209
+ * Design a voice using natural language description
1210
+ *
1211
+ * Creates a new voice profile from a text description of the desired voice.
1212
+ * The designed voice can be used in subsequent synthesizeSpeech calls.
1213
+ *
1214
+ * @param options - Voice design configuration including description
1215
+ * @returns Promise resolving to the designed voice profile
1216
+ * @throws {AIError} When voice design is not supported or request fails
1217
+ *
1218
+ * @example
1219
+ * ```typescript
1220
+ * const voice = await ai.designVoice({
1221
+ * description: 'warm female voice, slight British accent, professional news anchor',
1222
+ * language: 'en-US',
1223
+ * gender: 'female'
1224
+ * });
1225
+ *
1226
+ * // Use the designed voice
1227
+ * const speech = await ai.synthesizeSpeech('Good evening...', {
1228
+ * voice: voice.id
1229
+ * });
1230
+ * ```
1231
+ */
1232
+ designVoice(options: VoiceDesignOptions): Promise<Voice>;
1233
+ /**
1234
+ * List available voices for TTS synthesis
1235
+ *
1236
+ * @param options - Optional filters for the voice list
1237
+ * @returns Promise resolving to array of available voices
1238
+ * @throws {AIError} When TTS is not supported or request fails
1239
+ *
1240
+ * @example
1241
+ * ```typescript
1242
+ * // List all voices
1243
+ * const voices = await ai.getVoices();
1244
+ *
1245
+ * // Filter by language
1246
+ * const englishVoices = await ai.getVoices({ language: 'en' });
1247
+ *
1248
+ * // Include cloned voices
1249
+ * const allVoices = await ai.getVoices({ includeCloned: true });
1250
+ * ```
1251
+ */
1252
+ getVoices(options?: VoiceListOptions): Promise<Voice[]>;
1253
+ }
1254
+ /**
1255
+ * Shared rate-limit configuration for AI providers.
1256
+ *
1257
+ * The pacing wrapper activates only when one of the pacing fields
1258
+ * (`enabled`, `key`, `cooldownMs`, `initialDelayMs`, `maxAttempts`) is set.
1259
+ *
1260
+ * `qwen3-tts` also uses `requestsPerMinute` and `maxConcurrent` from this
1261
+ * object for its local token bucket limiter.
1262
+ */
1263
+ export interface AIRateLimitOptions {
1264
+ /**
1265
+ * Enable shared in-process request pacing for this client.
1266
+ */
1267
+ enabled?: boolean;
1268
+ /**
1269
+ * Shared budget key used to coordinate pacing across multiple clients.
1270
+ * If omitted, a provider-scoped key is derived from the configured credentials.
1271
+ */
1272
+ key?: string;
1273
+ /**
1274
+ * Minimum delay in milliseconds between successful calls sharing the same key.
1275
+ */
1276
+ cooldownMs?: number;
1277
+ /**
1278
+ * Fallback delay in milliseconds before retrying a rate-limited call when
1279
+ * the provider does not return a `Retry-After` hint.
1280
+ */
1281
+ initialDelayMs?: number;
1282
+ /**
1283
+ * Maximum attempts for retryable rate-limit failures, including the first call.
1284
+ */
1285
+ maxAttempts?: number;
1286
+ /**
1287
+ * Qwen3-TTS only: maximum requests per minute for its local token bucket.
1288
+ */
1289
+ requestsPerMinute?: number;
1290
+ /**
1291
+ * Qwen3-TTS only: maximum concurrent requests allowed by its local limiter.
1292
+ */
1293
+ maxConcurrent?: number;
1294
+ }
1295
+ /**
1296
+ * Base configuration options for all providers
1297
+ */
1298
+ export interface BaseAIOptions {
1299
+ /**
1300
+ * API timeout in milliseconds
1301
+ */
1302
+ timeout?: number;
1303
+ /**
1304
+ * Maximum number of retries
1305
+ */
1306
+ maxRetries?: number;
1307
+ /**
1308
+ * Custom headers
1309
+ */
1310
+ headers?: Record<string, string>;
1311
+ /**
1312
+ * Default model to use
1313
+ */
1314
+ defaultModel?: string;
1315
+ /**
1316
+ * Callback invoked after each API call with usage details.
1317
+ * Use this to track token consumption, costs, and performance across providers.
1318
+ *
1319
+ * Errors thrown inside this callback are silently caught and will not
1320
+ * affect the API call result.
1321
+ *
1322
+ * @param event - Usage event with provider, model, operation, tokens, and timing
1323
+ */
1324
+ onUsage?: (event: UsageEvent) => void;
1325
+ /**
1326
+ * Global tags to include in every usage event.
1327
+ * Per-call `usageTags` on `ChatOptions` / `EmbeddingOptions` / etc.
1328
+ * will be merged on top of these.
1329
+ */
1330
+ usageTags?: Record<string, string>;
1331
+ /**
1332
+ * Optional shared pacing / retry configuration.
1333
+ */
1334
+ rateLimit?: AIRateLimitOptions;
1335
+ }
1336
+ /**
1337
+ * OpenAI provider options
1338
+ */
1339
+ export interface OpenAIOptions extends BaseAIOptions {
1340
+ type?: 'openai';
1341
+ apiKey?: string;
1342
+ baseUrl?: string;
1343
+ organization?: string;
1344
+ }
1345
+ /**
1346
+ * LiteLLM provider options
1347
+ *
1348
+ * LiteLLM exposes an OpenAI-compatible API surface and requires a custom
1349
+ * base URL such as `https://llm.happyvertical.com/v1`.
1350
+ */
1351
+ export interface LiteLLMOptions extends BaseAIOptions {
1352
+ type: 'litellm';
1353
+ apiKey?: string;
1354
+ baseUrl?: string;
1355
+ organization?: string;
1356
+ adminApiKey?: string;
1357
+ adminBaseUrl?: string;
1358
+ adminUrl?: string;
1359
+ adminHeaders?: Record<string, string>;
1360
+ }
1361
+ /**
1362
+ * Bifrost provider options.
1363
+ *
1364
+ * Bifrost exposes OpenAI-compatible inference through endpoints such as
1365
+ * `/openai` and `/v1`, plus governance admin endpoints at `/api/governance/*`.
1366
+ */
1367
+ export interface BifrostOptions extends BaseAIOptions {
1368
+ type: 'bifrost';
1369
+ apiKey?: string;
1370
+ baseUrl?: string;
1371
+ organization?: string;
1372
+ /**
1373
+ * Optional virtual key for admin routes. Bifrost OSS admin APIs typically use
1374
+ * username/password Basic auth instead; use `adminUser` / `adminPassword`
1375
+ * when governance auth is enabled without enterprise bearer-token support.
1376
+ */
1377
+ adminApiKey?: string;
1378
+ /**
1379
+ * Admin API root. Alias: `adminUrl`.
1380
+ */
1381
+ adminBaseUrl?: string;
1382
+ /**
1383
+ * Admin API root. Kept as a friendly alias for env vars such as
1384
+ * `BIFROST_ADMIN_URL`.
1385
+ */
1386
+ adminUrl?: string;
1387
+ /**
1388
+ * Bifrost admin username for HTTP Basic auth.
1389
+ */
1390
+ adminUser?: string;
1391
+ /**
1392
+ * Bifrost admin username for HTTP Basic auth.
1393
+ */
1394
+ adminUsername?: string;
1395
+ /**
1396
+ * Bifrost admin password for HTTP Basic auth.
1397
+ */
1398
+ adminPassword?: string;
1399
+ adminHeaders?: Record<string, string>;
1400
+ }
1401
+ /**
1402
+ * Ollama provider options
1403
+ *
1404
+ * Ollama defaults to the local host at `http://localhost:11434` and can also
1405
+ * target remote hosts such as `https://ollama.com/api` when paired with an
1406
+ * API key.
1407
+ */
1408
+ export interface OllamaOptions extends BaseAIOptions {
1409
+ type: 'ollama';
1410
+ apiKey?: string;
1411
+ baseUrl?: string;
1412
+ /**
1413
+ * Default keep-alive duration for model requests, for example `5m` or `0`.
1414
+ */
1415
+ keepAlive?: string | number;
1416
+ }
1417
+ /**
1418
+ * Gemini provider options
1419
+ */
1420
+ export interface GeminiOptions extends BaseAIOptions {
1421
+ type: 'gemini';
1422
+ apiKey?: string;
1423
+ baseUrl?: string;
1424
+ projectId?: string;
1425
+ location?: string;
1426
+ /**
1427
+ * Thinking level for Gemini 3 models (gemini-3-flash-preview, gemini-3-pro)
1428
+ * Controls internal reasoning depth:
1429
+ * - 'minimal': No thinking for most queries (Gemini 3 Flash only)
1430
+ * - 'low': Minimizes latency and cost, good for simple tasks
1431
+ * - 'medium': Balanced thinking for most tasks (Gemini 3 Flash only)
1432
+ * - 'high': Maximizes reasoning depth (default for Gemini 3)
1433
+ *
1434
+ * Note: Only works with Gemini 3 models. Gemini 2.5 uses thinkingBudget instead.
1435
+ */
1436
+ thinkingLevel?: GeminiThinkingLevel;
1437
+ }
1438
+ /**
1439
+ * Anthropic provider options
1440
+ */
1441
+ export interface AnthropicOptions extends BaseAIOptions {
1442
+ type: 'anthropic';
1443
+ apiKey?: string;
1444
+ baseUrl?: string;
1445
+ anthropicVersion?: string;
1446
+ }
1447
+ /**
1448
+ * Hugging Face provider options
1449
+ */
1450
+ export interface HuggingFaceOptions extends BaseAIOptions {
1451
+ type: 'huggingface';
1452
+ apiToken?: string;
1453
+ endpoint?: string;
1454
+ model?: string;
1455
+ useCache?: boolean;
1456
+ waitForModel?: boolean;
1457
+ }
1458
+ /**
1459
+ * AWS Bedrock provider options
1460
+ */
1461
+ export interface BedrockOptions extends BaseAIOptions {
1462
+ type: 'bedrock';
1463
+ region?: string;
1464
+ credentials?: {
1465
+ accessKeyId: string;
1466
+ secretAccessKey: string;
1467
+ sessionToken?: string;
1468
+ };
1469
+ endpoint?: string;
1470
+ }
1471
+ /**
1472
+ * Claude CLI provider options
1473
+ * Uses the local Claude Code CLI instead of API keys
1474
+ */
1475
+ export interface ClaudeCliOptions extends BaseAIOptions {
1476
+ type: 'claude-cli';
1477
+ /**
1478
+ * Optional custom path to claude binary
1479
+ * If not specified, will search in PATH
1480
+ */
1481
+ cliPath?: string;
1482
+ }
1483
+ /**
1484
+ * Qwen3-TTS provider options
1485
+ * Uses Qwen3-TTS for text-to-speech synthesis
1486
+ *
1487
+ * TTS is co-located with ComfyUI for GPU sharing efficiency.
1488
+ */
1489
+ export interface Qwen3TTSOptions extends BaseAIOptions {
1490
+ type: 'qwen3-tts';
1491
+ /**
1492
+ * TTS service endpoint URL
1493
+ * e.g., 'http://localhost:8880' or 'http://qwen-tts:8000'
1494
+ */
1495
+ endpoint?: string;
1496
+ /**
1497
+ * Default model variant
1498
+ * - 'qwen3-tts-1.7b': Higher quality (4.54GB VRAM)
1499
+ * - 'qwen3-tts-0.6b': Faster, lower VRAM (2.52GB)
1500
+ */
1501
+ defaultModel?: 'qwen3-tts-1.7b' | 'qwen3-tts-0.6b';
1502
+ /**
1503
+ * Default voice ID to use for synthesis
1504
+ */
1505
+ defaultVoice?: string;
1506
+ /**
1507
+ * Default language for synthesis
1508
+ */
1509
+ defaultLanguage?: string;
1510
+ /**
1511
+ * Rate limiting configuration for the local TTS adapter.
1512
+ * Reuses `BaseAIOptions.rateLimit` and reads `requestsPerMinute` / `maxConcurrent`.
1513
+ */
1514
+ rateLimit?: AIRateLimitOptions;
1515
+ }
1516
+ /**
1517
+ * Union type for all provider options
1518
+ */
1519
+ export type GetAIOptions = OpenAIOptions | LiteLLMOptions | BifrostOptions | OllamaOptions | GeminiOptions | AnthropicOptions | HuggingFaceOptions | BedrockOptions | ClaudeCliOptions | Qwen3TTSOptions;
1520
+ /**
1521
+ * Base error class for all AI operations.
1522
+ * Provider-specific errors are mapped to subclasses for structured error handling.
1523
+ *
1524
+ * @param message - Human-readable error description
1525
+ * @param code - Machine-readable error code (e.g., 'AUTH_ERROR', 'RATE_LIMIT')
1526
+ * @param provider - Provider that raised the error (e.g., 'openai', 'anthropic')
1527
+ * @param model - Model involved in the error, if applicable
1528
+ */
1529
+ export declare class AIError extends Error {
1530
+ code: string;
1531
+ provider?: string | undefined;
1532
+ model?: string | undefined;
1533
+ retryable: boolean;
1534
+ constructor(message: string, code: string, provider?: string | undefined, model?: string | undefined, retryable?: boolean);
1535
+ }
1536
+ /**
1537
+ * Thrown when API key or credentials are invalid or missing.
1538
+ *
1539
+ * @param provider - Provider that rejected authentication
1540
+ */
1541
+ export declare class AuthenticationError extends AIError {
1542
+ constructor(provider?: string);
1543
+ }
1544
+ /**
1545
+ * Thrown when the provider's rate limit has been exceeded.
1546
+ *
1547
+ * @param provider - Provider that enforced the rate limit
1548
+ * @param retryAfter - Seconds to wait before retrying, if provided by the API
1549
+ */
1550
+ export declare class RateLimitError extends AIError {
1551
+ retryAfter?: number;
1552
+ constructor(provider?: string, retryAfter?: number);
1553
+ }
1554
+ /**
1555
+ * Thrown when the requested model does not exist or is not available.
1556
+ *
1557
+ * @param model - The model identifier that was not found
1558
+ * @param provider - Provider that was queried
1559
+ */
1560
+ export declare class ModelNotFoundError extends AIError {
1561
+ constructor(model: string, provider?: string);
1562
+ }
1563
+ /**
1564
+ * Thrown when the input exceeds the model's maximum context window.
1565
+ *
1566
+ * @param provider - Provider that reported the error
1567
+ * @param model - Model whose context limit was exceeded
1568
+ */
1569
+ export declare class ContextLengthError extends AIError {
1570
+ constructor(provider?: string, model?: string);
1571
+ }
1572
+ /**
1573
+ * Thrown when content is blocked by the provider's safety/content filters.
1574
+ *
1575
+ * @param provider - Provider that filtered the content
1576
+ * @param model - Model that triggered the filter
1577
+ */
1578
+ export declare class ContentFilterError extends AIError {
1579
+ constructor(provider?: string, model?: string);
1580
+ }
1581
+ /**
1582
+ * Options for text-to-speech synthesis
1583
+ */
1584
+ export interface TTSOptions {
1585
+ /**
1586
+ * TTS model to use (e.g., 'qwen3-tts-1.7b', 'qwen3-tts-0.6b')
1587
+ */
1588
+ model?: string;
1589
+ /**
1590
+ * Voice ID or profile reference to use for synthesis
1591
+ */
1592
+ voice?: string;
1593
+ /**
1594
+ * ISO language code (e.g., 'en-US', 'zh-CN')
1595
+ * Supported: Chinese, English, Japanese, Korean, German, French, Russian, Portuguese, Spanish, Italian
1596
+ */
1597
+ language?: string;
1598
+ /**
1599
+ * Speech rate multiplier (0.5 - 2.0, default: 1.0)
1600
+ */
1601
+ speed?: number;
1602
+ /**
1603
+ * Pitch adjustment in semitones (-20 to 20, default: 0)
1604
+ */
1605
+ pitch?: number;
1606
+ /**
1607
+ * Output audio format
1608
+ */
1609
+ outputFormat?: 'wav' | 'mp3' | 'ogg';
1610
+ /**
1611
+ * Whether to stream the audio output
1612
+ */
1613
+ stream?: boolean;
1614
+ /**
1615
+ * Whether to include word-level timing information for lip-sync
1616
+ */
1617
+ includeWordTimings?: boolean;
1618
+ }
1619
+ /**
1620
+ * Options for voice cloning from audio samples
1621
+ */
1622
+ export interface VoiceCloneOptions {
1623
+ /**
1624
+ * Model to use for voice cloning
1625
+ */
1626
+ model?: string;
1627
+ /**
1628
+ * Audio sample for cloning (3+ seconds recommended)
1629
+ * Can be a Buffer or base64-encoded string
1630
+ */
1631
+ sampleAudio: Buffer | string;
1632
+ /**
1633
+ * MIME type of the sample audio (e.g., 'audio/wav', 'audio/mp3')
1634
+ */
1635
+ sampleMimeType?: string;
1636
+ /**
1637
+ * Name for the cloned voice profile
1638
+ */
1639
+ name?: string;
1640
+ /**
1641
+ * Description of the voice
1642
+ */
1643
+ description?: string;
1644
+ /**
1645
+ * Language of the voice sample
1646
+ */
1647
+ language?: string;
1648
+ }
1649
+ /**
1650
+ * Options for voice design via natural language description
1651
+ */
1652
+ export interface VoiceDesignOptions {
1653
+ /**
1654
+ * Model to use for voice design
1655
+ */
1656
+ model?: string;
1657
+ /**
1658
+ * Natural language description of the desired voice
1659
+ * e.g., "warm female voice with slight British accent, professional news anchor tone"
1660
+ */
1661
+ description: string;
1662
+ /**
1663
+ * Primary language for the voice
1664
+ */
1665
+ language?: string;
1666
+ /**
1667
+ * Target gender for the voice
1668
+ */
1669
+ gender?: 'male' | 'female' | 'neutral';
1670
+ }
1671
+ /**
1672
+ * Word timing information for lip-sync alignment
1673
+ */
1674
+ export interface WordTiming {
1675
+ /**
1676
+ * The word or phoneme
1677
+ */
1678
+ word: string;
1679
+ /**
1680
+ * Start time in seconds
1681
+ */
1682
+ start: number;
1683
+ /**
1684
+ * End time in seconds
1685
+ */
1686
+ end: number;
1687
+ }
1688
+ /**
1689
+ * Response from text-to-speech synthesis
1690
+ */
1691
+ export interface TTSResponse {
1692
+ /**
1693
+ * Generated audio data
1694
+ */
1695
+ audio: Buffer;
1696
+ /**
1697
+ * MIME type of the audio (e.g., 'audio/wav', 'audio/mp3')
1698
+ */
1699
+ mimeType: string;
1700
+ /**
1701
+ * Duration of the audio in seconds
1702
+ */
1703
+ duration: number;
1704
+ /**
1705
+ * Word-level timing information for lip-sync (if requested)
1706
+ */
1707
+ wordTimings?: WordTiming[];
1708
+ /**
1709
+ * Model used for generation
1710
+ */
1711
+ model?: string;
1712
+ /**
1713
+ * Sample rate in Hz (e.g., 22050, 44100)
1714
+ */
1715
+ sampleRate?: number;
1716
+ }
1717
+ /**
1718
+ * Voice profile information
1719
+ */
1720
+ export interface Voice {
1721
+ /**
1722
+ * Unique identifier for the voice
1723
+ */
1724
+ id: string;
1725
+ /**
1726
+ * Human-readable name for the voice
1727
+ */
1728
+ name: string;
1729
+ /**
1730
+ * Primary language of the voice (ISO code)
1731
+ */
1732
+ language: string;
1733
+ /**
1734
+ * Gender of the voice
1735
+ */
1736
+ gender?: 'male' | 'female' | 'neutral';
1737
+ /**
1738
+ * Description of the voice characteristics
1739
+ */
1740
+ description?: string;
1741
+ /**
1742
+ * Whether this is a cloned voice
1743
+ */
1744
+ isCloned?: boolean;
1745
+ /**
1746
+ * Whether this was designed via natural language
1747
+ */
1748
+ isDesigned?: boolean;
1749
+ /**
1750
+ * URL to a sample of this voice (if available)
1751
+ */
1752
+ sampleUrl?: string;
1753
+ /**
1754
+ * Provider-specific voice data/embedding
1755
+ */
1756
+ voiceData?: Record<string, any>;
1757
+ }
1758
+ /**
1759
+ * Options for listing available voices
1760
+ */
1761
+ export interface VoiceListOptions {
1762
+ /**
1763
+ * Filter by language
1764
+ */
1765
+ language?: string;
1766
+ /**
1767
+ * Filter by gender
1768
+ */
1769
+ gender?: 'male' | 'female' | 'neutral';
1770
+ /**
1771
+ * Include cloned voices
1772
+ */
1773
+ includeCloned?: boolean;
1774
+ /**
1775
+ * Include designed voices
1776
+ */
1777
+ includeDesigned?: boolean;
1778
+ }
1779
+ //# sourceMappingURL=types.d.ts.map