llmist 2.3.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,218 @@
1
1
  import { ZodTypeAny } from 'zod';
2
2
  import { Logger, ILogObj } from 'tslog';
3
3
 
4
+ /**
5
+ * Types and interfaces for multimodal generation (image, speech).
6
+ *
7
+ * These types support non-token-based billing models where costs are calculated
8
+ * per-image, per-character, or per-second rather than per-token.
9
+ */
10
+ /**
11
+ * Options for image generation requests.
12
+ */
13
+ interface ImageGenerationOptions {
14
+ /** Model to use (e.g., "dall-e-3", "imagen-3.0-generate-002") */
15
+ model: string;
16
+ /** Text prompt describing the desired image */
17
+ prompt: string;
18
+ /**
19
+ * Image size/dimensions.
20
+ * - OpenAI: "1024x1024", "1024x1792", "1792x1024"
21
+ * - Gemini: "1:1", "3:4", "4:3", "9:16", "16:9"
22
+ */
23
+ size?: string;
24
+ /**
25
+ * Image quality level.
26
+ * - OpenAI: "standard", "hd"
27
+ */
28
+ quality?: string;
29
+ /**
30
+ * Number of images to generate.
31
+ * Note: DALL-E 3 only supports n=1
32
+ */
33
+ n?: number;
34
+ /**
35
+ * Response format for the generated image.
36
+ * - "url": Returns a URL to the image (expires after ~1 hour)
37
+ * - "b64_json": Returns base64-encoded image data
38
+ */
39
+ responseFormat?: "url" | "b64_json";
40
+ }
41
+ /**
42
+ * A single generated image.
43
+ */
44
+ interface GeneratedImage {
45
+ /** URL to the generated image (if responseFormat is "url") */
46
+ url?: string;
47
+ /** Base64-encoded image data (if responseFormat is "b64_json") */
48
+ b64Json?: string;
49
+ /** Revised prompt (if the model modified the original prompt) */
50
+ revisedPrompt?: string;
51
+ }
52
+ /**
53
+ * Usage information for image generation.
54
+ */
55
+ interface ImageUsage {
56
+ /** Number of images generated */
57
+ imagesGenerated: number;
58
+ /** Size of generated images */
59
+ size: string;
60
+ /** Quality level used */
61
+ quality: string;
62
+ }
63
+ /**
64
+ * Result of an image generation request.
65
+ */
66
+ interface ImageGenerationResult {
67
+ /** Array of generated images */
68
+ images: GeneratedImage[];
69
+ /** Model used for generation */
70
+ model: string;
71
+ /** Usage information */
72
+ usage: ImageUsage;
73
+ /** Estimated cost in USD */
74
+ cost?: number;
75
+ }
76
+ /**
77
+ * Available audio formats for speech generation.
78
+ */
79
+ type AudioFormat = "mp3" | "opus" | "aac" | "flac" | "wav" | "pcm";
80
+ /**
81
+ * Options for speech (TTS) generation requests.
82
+ */
83
+ interface SpeechGenerationOptions {
84
+ /** Model to use (e.g., "tts-1", "tts-1-hd") */
85
+ model: string;
86
+ /** Text to convert to speech */
87
+ input: string;
88
+ /**
89
+ * Voice to use for generation.
90
+ * - OpenAI: "alloy", "echo", "fable", "onyx", "nova", "shimmer"
91
+ * - Gemini: "Zephyr", "Puck", "Charon", "Kore", etc.
92
+ */
93
+ voice: string;
94
+ /** Output audio format (default: "mp3") */
95
+ responseFormat?: AudioFormat;
96
+ /**
97
+ * Speed of the generated audio.
98
+ * Range: 0.25 to 4.0 (default: 1.0)
99
+ */
100
+ speed?: number;
101
+ }
102
+ /**
103
+ * Usage information for speech generation.
104
+ */
105
+ interface SpeechUsage {
106
+ /** Number of characters processed */
107
+ characterCount: number;
108
+ }
109
+ /**
110
+ * Result of a speech generation request.
111
+ */
112
+ interface SpeechGenerationResult {
113
+ /** Generated audio data */
114
+ audio: ArrayBuffer;
115
+ /** Model used for generation */
116
+ model: string;
117
+ /** Usage information */
118
+ usage: SpeechUsage;
119
+ /** Estimated cost in USD */
120
+ cost?: number;
121
+ /** Audio format of the result */
122
+ format: AudioFormat;
123
+ }
124
+ /**
125
+ * Pricing structure for image models.
126
+ * Maps size -> quality -> price per image.
127
+ */
128
+ interface ImageModelPricing {
129
+ /** Simple per-image price (for models with uniform pricing) */
130
+ perImage?: number;
131
+ /**
132
+ * Size-based pricing.
133
+ * Maps size (e.g., "1024x1024") to quality-based pricing or flat price.
134
+ */
135
+ bySize?: Record<string, Record<string, number> | number>;
136
+ }
137
+ /**
138
+ * Pricing structure for speech models.
139
+ * Supports both character-based pricing (tts-1, tts-1-hd) and
140
+ * token-based pricing (gpt-4o-mini-tts).
141
+ */
142
+ interface SpeechModelPricing {
143
+ /** Price per character (e.g., 0.000015 for $15 per 1M chars) - for tts-1, tts-1-hd */
144
+ perCharacter?: number;
145
+ /** Token-based pricing (for gpt-4o-mini-tts) */
146
+ perInputToken?: number;
147
+ perAudioOutputToken?: number;
148
+ /** Approximate cost per minute of generated audio (for estimation) */
149
+ perMinute?: number;
150
+ }
151
+ /**
152
+ * Specification for an image generation model.
153
+ */
154
+ interface ImageModelSpec {
155
+ /** Provider identifier (e.g., "openai", "gemini") */
156
+ provider: string;
157
+ /** Model identifier */
158
+ modelId: string;
159
+ /** Human-readable display name */
160
+ displayName: string;
161
+ /** Pricing information */
162
+ pricing: ImageModelPricing;
163
+ /** Supported image sizes */
164
+ supportedSizes: string[];
165
+ /** Supported quality levels (optional) */
166
+ supportedQualities?: string[];
167
+ /** Maximum images per request */
168
+ maxImages: number;
169
+ /** Default size if not specified */
170
+ defaultSize?: string;
171
+ /** Default quality if not specified */
172
+ defaultQuality?: string;
173
+ /** Additional feature flags */
174
+ features?: {
175
+ /** Supports conversational/multi-turn image editing */
176
+ conversational?: boolean;
177
+ /** Optimized for text rendering in images */
178
+ textRendering?: boolean;
179
+ /** Supports transparency */
180
+ transparency?: boolean;
181
+ };
182
+ }
183
+ /**
184
+ * Specification for a speech generation model.
185
+ */
186
+ interface SpeechModelSpec {
187
+ /** Provider identifier (e.g., "openai", "gemini") */
188
+ provider: string;
189
+ /** Model identifier */
190
+ modelId: string;
191
+ /** Human-readable display name */
192
+ displayName: string;
193
+ /** Pricing information */
194
+ pricing: SpeechModelPricing;
195
+ /** Available voice options */
196
+ voices: string[];
197
+ /** Supported audio formats */
198
+ formats: AudioFormat[];
199
+ /** Maximum input text length (characters) */
200
+ maxInputLength: number;
201
+ /** Default voice if not specified */
202
+ defaultVoice?: string;
203
+ /** Default format if not specified */
204
+ defaultFormat?: AudioFormat;
205
+ /** Additional feature flags */
206
+ features?: {
207
+ /** Supports multi-speaker output */
208
+ multiSpeaker?: boolean;
209
+ /** Number of supported languages */
210
+ languages?: number;
211
+ /** Supports voice instructions/steering */
212
+ voiceInstructions?: boolean;
213
+ };
214
+ }
215
+
4
216
  /**
5
217
  * Model Catalog Types
6
218
  *
@@ -649,8 +861,21 @@ interface ParsedGadgetCall {
649
861
  parametersRaw: string;
650
862
  parameters?: Record<string, unknown>;
651
863
  parseError?: string;
864
+ /** List of invocation IDs this gadget depends on. Empty array if no dependencies. */
865
+ dependencies: string[];
652
866
  }
653
867
 
868
+ /** Event emitted when a gadget is skipped due to a failed dependency */
869
+ interface GadgetSkippedEvent {
870
+ type: "gadget_skipped";
871
+ gadgetName: string;
872
+ invocationId: string;
873
+ parameters: Record<string, unknown>;
874
+ /** The invocation ID of the dependency that failed */
875
+ failedDependency: string;
876
+ /** The error message from the failed dependency */
877
+ failedDependencyError: string;
878
+ }
654
879
  type StreamEvent = {
655
880
  type: "text";
656
881
  content: string;
@@ -660,7 +885,7 @@ type StreamEvent = {
660
885
  } | {
661
886
  type: "gadget_result";
662
887
  result: GadgetExecutionResult;
663
- } | {
888
+ } | GadgetSkippedEvent | {
664
889
  type: "human_input_required";
665
890
  question: string;
666
891
  gadgetName: string;
@@ -743,6 +968,26 @@ type TextOnlyAction = {
743
968
  * }
744
969
  * ```
745
970
  */
971
+ /**
972
+ * Image generation namespace with automatic cost reporting.
973
+ */
974
+ interface CostReportingImageNamespace {
975
+ /**
976
+ * Generate images from a text prompt.
977
+ * Costs are automatically reported to the execution context.
978
+ */
979
+ generate(options: ImageGenerationOptions): Promise<ImageGenerationResult>;
980
+ }
981
+ /**
982
+ * Speech generation namespace with automatic cost reporting.
983
+ */
984
+ interface CostReportingSpeechNamespace {
985
+ /**
986
+ * Generate speech audio from text.
987
+ * Costs are automatically reported to the execution context.
988
+ */
989
+ generate(options: SpeechGenerationOptions): Promise<SpeechGenerationResult>;
990
+ }
746
991
  interface CostReportingLLMist {
747
992
  /**
748
993
  * Quick completion - returns final text response.
@@ -763,6 +1008,16 @@ interface CostReportingLLMist {
763
1008
  * Access to model registry for cost estimation.
764
1009
  */
765
1010
  readonly modelRegistry: ModelRegistry;
1011
+ /**
1012
+ * Image generation with automatic cost reporting.
1013
+ * Costs are reported based on model and generation parameters.
1014
+ */
1015
+ readonly image: CostReportingImageNamespace;
1016
+ /**
1017
+ * Speech generation with automatic cost reporting.
1018
+ * Costs are reported based on input length and model pricing.
1019
+ */
1020
+ readonly speech: CostReportingSpeechNamespace;
766
1021
  }
767
1022
  /**
768
1023
  * Execution context provided to gadgets during execution.
@@ -1079,6 +1334,215 @@ declare abstract class BaseGadget {
1079
1334
  }): string;
1080
1335
  }
1081
1336
 
1337
+ /**
1338
+ * Types and interfaces for multimodal input content.
1339
+ *
1340
+ * These types define the structure for sending images, audio, and other
1341
+ * media alongside text in LLM messages. They complement the output types
1342
+ * in media-types.ts.
1343
+ */
1344
+ /**
1345
+ * Supported image MIME types for input.
1346
+ * All major providers support these formats.
1347
+ */
1348
+ type ImageMimeType = "image/jpeg" | "image/png" | "image/gif" | "image/webp";
1349
+ /**
1350
+ * Supported audio MIME types for input.
1351
+ * Currently only Gemini supports audio input.
1352
+ */
1353
+ type AudioMimeType = "audio/mp3" | "audio/mpeg" | "audio/wav" | "audio/webm" | "audio/ogg";
1354
+ /**
1355
+ * Base interface for all content parts.
1356
+ */
1357
+ interface BaseContentPart {
1358
+ type: string;
1359
+ }
1360
+ /**
1361
+ * Text content part.
1362
+ */
1363
+ interface TextContentPart extends BaseContentPart {
1364
+ type: "text";
1365
+ text: string;
1366
+ }
1367
+ /**
1368
+ * Image content part.
1369
+ */
1370
+ interface ImageContentPart extends BaseContentPart {
1371
+ type: "image";
1372
+ source: ImageSource;
1373
+ }
1374
+ /**
1375
+ * Audio content part.
1376
+ * Currently only supported by Gemini.
1377
+ */
1378
+ interface AudioContentPart extends BaseContentPart {
1379
+ type: "audio";
1380
+ source: AudioSource;
1381
+ }
1382
+ /**
1383
+ * Union of all supported content part types.
1384
+ */
1385
+ type ContentPart = TextContentPart | ImageContentPart | AudioContentPart;
1386
+ /**
1387
+ * Image can come from base64 data or a URL.
1388
+ */
1389
+ type ImageSource = ImageBase64Source | ImageUrlSource;
1390
+ /**
1391
+ * Base64-encoded image data.
1392
+ * Supported by all providers.
1393
+ */
1394
+ interface ImageBase64Source {
1395
+ type: "base64";
1396
+ mediaType: ImageMimeType;
1397
+ data: string;
1398
+ }
1399
+ /**
1400
+ * Image URL reference.
1401
+ * Only supported by OpenAI.
1402
+ */
1403
+ interface ImageUrlSource {
1404
+ type: "url";
1405
+ url: string;
1406
+ }
1407
+ /**
1408
+ * Audio source (base64 only).
1409
+ * URL sources are not currently supported for audio.
1410
+ */
1411
+ interface AudioSource {
1412
+ type: "base64";
1413
+ mediaType: AudioMimeType;
1414
+ data: string;
1415
+ }
1416
+ /**
1417
+ * Check if a content part is a text part.
1418
+ */
1419
+ declare function isTextPart(part: ContentPart): part is TextContentPart;
1420
+ /**
1421
+ * Check if a content part is an image part.
1422
+ */
1423
+ declare function isImagePart(part: ContentPart): part is ImageContentPart;
1424
+ /**
1425
+ * Check if a content part is an audio part.
1426
+ */
1427
+ declare function isAudioPart(part: ContentPart): part is AudioContentPart;
1428
+ /**
1429
+ * Create a text content part.
1430
+ *
1431
+ * @example
1432
+ * ```typescript
1433
+ * const part = text("What's in this image?");
1434
+ * ```
1435
+ */
1436
+ declare function text(content: string): TextContentPart;
1437
+ /**
1438
+ * Create an image content part from base64-encoded data.
1439
+ *
1440
+ * @param data - Base64-encoded image data
1441
+ * @param mediaType - MIME type of the image
1442
+ *
1443
+ * @example
1444
+ * ```typescript
1445
+ * const part = imageFromBase64(base64Data, "image/jpeg");
1446
+ * ```
1447
+ */
1448
+ declare function imageFromBase64(data: string, mediaType: ImageMimeType): ImageContentPart;
1449
+ /**
1450
+ * Create an image content part from a URL.
1451
+ * Note: Only supported by OpenAI.
1452
+ *
1453
+ * @param url - URL to the image (must be accessible)
1454
+ *
1455
+ * @example
1456
+ * ```typescript
1457
+ * const part = imageFromUrl("https://example.com/image.jpg");
1458
+ * ```
1459
+ */
1460
+ declare function imageFromUrl(url: string): ImageContentPart;
1461
+ /**
1462
+ * Detect the MIME type of image data from magic bytes.
1463
+ *
1464
+ * @param data - Raw image data
1465
+ * @returns Detected MIME type or null if unknown
1466
+ */
1467
+ declare function detectImageMimeType(data: Buffer | Uint8Array): ImageMimeType | null;
1468
+ /**
1469
+ * Detect the MIME type of audio data from magic bytes.
1470
+ *
1471
+ * @param data - Raw audio data
1472
+ * @returns Detected MIME type or null if unknown
1473
+ */
1474
+ declare function detectAudioMimeType(data: Buffer | Uint8Array): AudioMimeType | null;
1475
+ /**
1476
+ * Convert data to base64 string.
1477
+ *
1478
+ * @param data - Data to encode (Buffer, Uint8Array, or already base64 string)
1479
+ * @returns Base64-encoded string
1480
+ */
1481
+ declare function toBase64(data: Buffer | Uint8Array | string): string;
1482
+ /**
1483
+ * Create an image content part from a Buffer or Uint8Array.
1484
+ * Automatically detects the MIME type if not provided.
1485
+ *
1486
+ * @param buffer - Image data
1487
+ * @param mediaType - Optional MIME type (auto-detected if not provided)
1488
+ *
1489
+ * @example
1490
+ * ```typescript
1491
+ * const imageData = await fs.readFile("photo.jpg");
1492
+ * const part = imageFromBuffer(imageData); // Auto-detects JPEG
1493
+ * ```
1494
+ */
1495
+ declare function imageFromBuffer(buffer: Buffer | Uint8Array, mediaType?: ImageMimeType): ImageContentPart;
1496
+ /**
1497
+ * Create an audio content part from base64-encoded data.
1498
+ *
1499
+ * @param data - Base64-encoded audio data
1500
+ * @param mediaType - MIME type of the audio
1501
+ *
1502
+ * @example
1503
+ * ```typescript
1504
+ * const part = audioFromBase64(base64Audio, "audio/mp3");
1505
+ * ```
1506
+ */
1507
+ declare function audioFromBase64(data: string, mediaType: AudioMimeType): AudioContentPart;
1508
+ /**
1509
+ * Create an audio content part from a Buffer or Uint8Array.
1510
+ * Automatically detects the MIME type if not provided.
1511
+ *
1512
+ * @param buffer - Audio data
1513
+ * @param mediaType - Optional MIME type (auto-detected if not provided)
1514
+ *
1515
+ * @example
1516
+ * ```typescript
1517
+ * const audioData = await fs.readFile("audio.mp3");
1518
+ * const part = audioFromBuffer(audioData); // Auto-detects MP3
1519
+ * ```
1520
+ */
1521
+ declare function audioFromBuffer(buffer: Buffer | Uint8Array, mediaType?: AudioMimeType): AudioContentPart;
1522
+ /**
1523
+ * Check if a string is a data URL.
1524
+ *
1525
+ * @param input - String to check
1526
+ * @returns True if it's a data URL
1527
+ */
1528
+ declare function isDataUrl(input: string): boolean;
1529
+ /**
1530
+ * Parse a data URL into its components.
1531
+ *
1532
+ * @param url - Data URL to parse
1533
+ * @returns Parsed components or null if invalid
1534
+ *
1535
+ * @example
1536
+ * ```typescript
1537
+ * const result = parseDataUrl("data:image/jpeg;base64,/9j/4AAQ...");
1538
+ * // { mimeType: "image/jpeg", data: "/9j/4AAQ..." }
1539
+ * ```
1540
+ */
1541
+ declare function parseDataUrl(url: string): {
1542
+ mimeType: string;
1543
+ data: string;
1544
+ } | null;
1545
+
1082
1546
  /**
1083
1547
  * Context provided to prompt template functions for rendering dynamic content.
1084
1548
  */
@@ -1212,12 +1676,33 @@ declare function resolveRulesTemplate(rules: PromptConfig["rules"] | undefined,
1212
1676
  declare function resolveHintTemplate(template: HintTemplate | undefined, defaultValue: string, context: HintContext): string;
1213
1677
 
1214
1678
  type LLMRole = "system" | "user" | "assistant";
1679
+ /**
1680
+ * Message content can be a simple string (text only) or an array of content parts (multimodal).
1681
+ * Using a string is simpler for text-only messages, while arrays support images and audio.
1682
+ */
1683
+ type MessageContent = string | ContentPart[];
1215
1684
  interface LLMMessage {
1216
1685
  role: LLMRole;
1217
- content: string;
1686
+ content: MessageContent;
1218
1687
  name?: string;
1219
1688
  metadata?: Record<string, unknown>;
1220
1689
  }
1690
+ /**
1691
+ * Normalize message content to an array of content parts.
1692
+ * Converts string content to a single text part.
1693
+ *
1694
+ * @param content - Message content (string or ContentPart[])
1695
+ * @returns Array of content parts
1696
+ */
1697
+ declare function normalizeContent(content: MessageContent): ContentPart[];
1698
+ /**
1699
+ * Extract text from message content.
1700
+ * Concatenates all text parts in the content.
1701
+ *
1702
+ * @param content - Message content (string or ContentPart[])
1703
+ * @returns Combined text from all text parts
1704
+ */
1705
+ declare function extractText(content: MessageContent): string;
1221
1706
  declare class LLMMessageBuilder {
1222
1707
  private readonly messages;
1223
1708
  private startPrefix;
@@ -1240,8 +1725,92 @@ declare class LLMMessageBuilder {
1240
1725
  private buildUsageSection;
1241
1726
  private buildExamplesSection;
1242
1727
  private buildRulesSection;
1243
- addUser(content: string, metadata?: Record<string, unknown>): this;
1728
+ /**
1729
+ * Add a user message.
1730
+ * Content can be a string (text only) or an array of content parts (multimodal).
1731
+ *
1732
+ * @param content - Message content
1733
+ * @param metadata - Optional metadata
1734
+ *
1735
+ * @example
1736
+ * ```typescript
1737
+ * // Text only
1738
+ * builder.addUser("Hello!");
1739
+ *
1740
+ * // Multimodal
1741
+ * builder.addUser([
1742
+ * text("What's in this image?"),
1743
+ * imageFromBuffer(imageData),
1744
+ * ]);
1745
+ * ```
1746
+ */
1747
+ addUser(content: MessageContent, metadata?: Record<string, unknown>): this;
1244
1748
  addAssistant(content: string, metadata?: Record<string, unknown>): this;
1749
+ /**
1750
+ * Add a user message with an image attachment.
1751
+ *
1752
+ * @param textContent - Text prompt
1753
+ * @param imageData - Image data (Buffer, Uint8Array, or base64 string)
1754
+ * @param mimeType - Optional MIME type (auto-detected if not provided)
1755
+ *
1756
+ * @example
1757
+ * ```typescript
1758
+ * builder.addUserWithImage(
1759
+ * "What's in this image?",
1760
+ * await fs.readFile("photo.jpg"),
1761
+ * "image/jpeg" // Optional - auto-detected
1762
+ * );
1763
+ * ```
1764
+ */
1765
+ addUserWithImage(textContent: string, imageData: Buffer | Uint8Array | string, mimeType?: ImageMimeType): this;
1766
+ /**
1767
+ * Add a user message with an image URL (OpenAI only).
1768
+ *
1769
+ * @param textContent - Text prompt
1770
+ * @param imageUrl - URL to the image
1771
+ *
1772
+ * @example
1773
+ * ```typescript
1774
+ * builder.addUserWithImageUrl(
1775
+ * "What's in this image?",
1776
+ * "https://example.com/image.jpg"
1777
+ * );
1778
+ * ```
1779
+ */
1780
+ addUserWithImageUrl(textContent: string, imageUrl: string): this;
1781
+ /**
1782
+ * Add a user message with an audio attachment (Gemini only).
1783
+ *
1784
+ * @param textContent - Text prompt
1785
+ * @param audioData - Audio data (Buffer, Uint8Array, or base64 string)
1786
+ * @param mimeType - Optional MIME type (auto-detected if not provided)
1787
+ *
1788
+ * @example
1789
+ * ```typescript
1790
+ * builder.addUserWithAudio(
1791
+ * "Transcribe this audio",
1792
+ * await fs.readFile("recording.mp3"),
1793
+ * "audio/mp3" // Optional - auto-detected
1794
+ * );
1795
+ * ```
1796
+ */
1797
+ addUserWithAudio(textContent: string, audioData: Buffer | Uint8Array | string, mimeType?: AudioMimeType): this;
1798
+ /**
1799
+ * Add a user message with multiple content parts.
1800
+ * Provides full flexibility for complex multimodal messages.
1801
+ *
1802
+ * @param parts - Array of content parts
1803
+ *
1804
+ * @example
1805
+ * ```typescript
1806
+ * builder.addUserMultimodal([
1807
+ * text("Compare these images:"),
1808
+ * imageFromBuffer(image1),
1809
+ * imageFromBuffer(image2),
1810
+ * ]);
1811
+ * ```
1812
+ */
1813
+ addUserMultimodal(parts: ContentPart[]): this;
1245
1814
  addGadgetCall(gadget: string, parameters: Record<string, unknown>, result: string): this;
1246
1815
  /**
1247
1816
  * Format parameters as Block format with JSON Pointer paths.
@@ -1283,6 +1852,281 @@ interface ProviderAdapter {
1283
1852
  * @returns Promise resolving to the number of input tokens
1284
1853
  */
1285
1854
  countTokens?(messages: LLMMessage[], descriptor: ModelDescriptor, spec?: ModelSpec): Promise<number>;
1855
+ /**
1856
+ * Get image model specifications for this provider.
1857
+ * Returns undefined if the provider doesn't support image generation.
1858
+ */
1859
+ getImageModelSpecs?(): ImageModelSpec[];
1860
+ /**
1861
+ * Check if this provider supports image generation for a given model.
1862
+ * @param modelId - Model identifier (e.g., "dall-e-3")
1863
+ */
1864
+ supportsImageGeneration?(modelId: string): boolean;
1865
+ /**
1866
+ * Generate images from a text prompt.
1867
+ * @param options - Image generation options
1868
+ * @returns Promise resolving to the generation result with images and cost
1869
+ */
1870
+ generateImage?(options: ImageGenerationOptions): Promise<ImageGenerationResult>;
1871
+ /**
1872
+ * Get speech model specifications for this provider.
1873
+ * Returns undefined if the provider doesn't support speech generation.
1874
+ */
1875
+ getSpeechModelSpecs?(): SpeechModelSpec[];
1876
+ /**
1877
+ * Check if this provider supports speech generation for a given model.
1878
+ * @param modelId - Model identifier (e.g., "tts-1", "tts-1-hd")
1879
+ */
1880
+ supportsSpeechGeneration?(modelId: string): boolean;
1881
+ /**
1882
+ * Generate speech audio from text.
1883
+ * @param options - Speech generation options
1884
+ * @returns Promise resolving to the generation result with audio and cost
1885
+ */
1886
+ generateSpeech?(options: SpeechGenerationOptions): Promise<SpeechGenerationResult>;
1887
+ }
1888
+
1889
+ /**
1890
+ * Image Generation Namespace
1891
+ *
1892
+ * Provides image generation methods.
1893
+ *
1894
+ * @example
1895
+ * ```typescript
1896
+ * const llmist = new LLMist();
1897
+ *
1898
+ * const result = await llmist.image.generate({
1899
+ * model: "dall-e-3",
1900
+ * prompt: "A cat in space",
1901
+ * size: "1024x1024",
1902
+ * quality: "hd",
1903
+ * });
1904
+ *
1905
+ * console.log(result.images[0].url);
1906
+ * console.log("Cost:", result.cost);
1907
+ * ```
1908
+ */
1909
+
1910
+ declare class ImageNamespace {
1911
+ private readonly adapters;
1912
+ private readonly defaultProvider;
1913
+ constructor(adapters: ProviderAdapter[], defaultProvider: string);
1914
+ /**
1915
+ * Generate images from a text prompt.
1916
+ *
1917
+ * @param options - Image generation options
1918
+ * @returns Promise resolving to the generation result with images and cost
1919
+ * @throws Error if the provider doesn't support image generation
1920
+ */
1921
+ generate(options: ImageGenerationOptions): Promise<ImageGenerationResult>;
1922
+ /**
1923
+ * List all available image generation models.
1924
+ */
1925
+ listModels(): ImageModelSpec[];
1926
+ /**
1927
+ * Check if a model is supported for image generation.
1928
+ */
1929
+ supportsModel(modelId: string): boolean;
1930
+ private findImageAdapter;
1931
+ }
1932
+
1933
+ /**
1934
+ * Speech Generation Namespace
1935
+ *
1936
+ * Provides text-to-speech generation methods.
1937
+ *
1938
+ * @example
1939
+ * ```typescript
1940
+ * const llmist = new LLMist();
1941
+ *
1942
+ * const result = await llmist.speech.generate({
1943
+ * model: "tts-1-hd",
1944
+ * input: "Hello, world!",
1945
+ * voice: "nova",
1946
+ * });
1947
+ *
1948
+ * // Save the audio
1949
+ * fs.writeFileSync("output.mp3", Buffer.from(result.audio));
1950
+ * console.log("Cost:", result.cost);
1951
+ * ```
1952
+ */
1953
+
1954
+ declare class SpeechNamespace {
1955
+ private readonly adapters;
1956
+ private readonly defaultProvider;
1957
+ constructor(adapters: ProviderAdapter[], defaultProvider: string);
1958
+ /**
1959
+ * Generate speech audio from text.
1960
+ *
1961
+ * @param options - Speech generation options
1962
+ * @returns Promise resolving to the generation result with audio and cost
1963
+ * @throws Error if the provider doesn't support speech generation
1964
+ */
1965
+ generate(options: SpeechGenerationOptions): Promise<SpeechGenerationResult>;
1966
+ /**
1967
+ * List all available speech generation models.
1968
+ */
1969
+ listModels(): SpeechModelSpec[];
1970
+ /**
1971
+ * Check if a model is supported for speech generation.
1972
+ */
1973
+ supportsModel(modelId: string): boolean;
1974
+ private findSpeechAdapter;
1975
+ }
1976
+
1977
+ /**
1978
+ * Text Generation Namespace
1979
+ *
1980
+ * Provides text completion and streaming methods.
1981
+ * Replaces the deprecated llmist.complete() and llmist.stream() methods.
1982
+ *
1983
+ * @example
1984
+ * ```typescript
1985
+ * const llmist = new LLMist();
1986
+ *
1987
+ * // Complete
1988
+ * const answer = await llmist.text.complete("What is 2+2?");
1989
+ *
1990
+ * // Stream
1991
+ * for await (const chunk of llmist.text.stream("Tell me a story")) {
1992
+ * process.stdout.write(chunk);
1993
+ * }
1994
+ * ```
1995
+ */
1996
+
1997
+ declare class TextNamespace {
1998
+ private readonly client;
1999
+ constructor(client: LLMist);
2000
+ /**
2001
+ * Generate a complete text response.
2002
+ *
2003
+ * @param prompt - User prompt
2004
+ * @param options - Optional configuration
2005
+ * @returns Complete text response
2006
+ */
2007
+ complete(prompt: string, options?: QuickOptions): Promise<string>;
2008
+ /**
2009
+ * Stream text chunks.
2010
+ *
2011
+ * @param prompt - User prompt
2012
+ * @param options - Optional configuration
2013
+ * @returns Async generator yielding text chunks
2014
+ */
2015
+ stream(prompt: string, options?: QuickOptions): AsyncGenerator<string>;
2016
+ }
2017
+
2018
+ /**
2019
+ * Vision Analysis Namespace
2020
+ *
2021
+ * Provides one-shot image analysis without agent setup.
2022
+ * Useful for quick image understanding tasks.
2023
+ *
2024
+ * @example
2025
+ * ```typescript
2026
+ * const llmist = new LLMist();
2027
+ *
2028
+ * const description = await llmist.vision.analyze({
2029
+ * model: "gpt-4o",
2030
+ * image: await readFile("photo.jpg"),
2031
+ * prompt: "Describe this image in detail",
2032
+ * });
2033
+ *
2034
+ * console.log(description);
2035
+ * ```
2036
+ */
2037
+
2038
+ /**
2039
+ * Options for vision analysis.
2040
+ */
2041
+ interface VisionAnalyzeOptions {
2042
+ /** Model to use (must support vision, e.g., "gpt-4o", "claude-sonnet-4-20250514", "gemini-2.5-flash") */
2043
+ model: string;
2044
+ /** Image data: Buffer, Uint8Array, base64 string, data URL, or HTTPS URL */
2045
+ image: string | Buffer | Uint8Array;
2046
+ /** Analysis prompt describing what to do with the image */
2047
+ prompt: string;
2048
+ /** MIME type (auto-detected if not provided for Buffer/Uint8Array) */
2049
+ mimeType?: ImageMimeType;
2050
+ /** System prompt for analysis context */
2051
+ systemPrompt?: string;
2052
+ /** Max tokens for response */
2053
+ maxTokens?: number;
2054
+ /** Temperature (0-1) */
2055
+ temperature?: number;
2056
+ }
2057
+ /**
2058
+ * Result of vision analysis.
2059
+ */
2060
+ interface VisionAnalyzeResult {
2061
+ /** The analysis text */
2062
+ text: string;
2063
+ /** Model used */
2064
+ model: string;
2065
+ /** Token usage if available */
2066
+ usage?: {
2067
+ inputTokens: number;
2068
+ outputTokens: number;
2069
+ totalTokens: number;
2070
+ };
2071
+ }
2072
+ declare class VisionNamespace {
2073
+ private readonly client;
2074
+ constructor(client: LLMist);
2075
+ /**
2076
+ * Build a message builder with the image content attached.
2077
+ * Handles URLs, data URLs, base64 strings, and binary buffers.
2078
+ */
2079
+ private buildImageMessage;
2080
+ /**
2081
+ * Stream the response and collect text and usage information.
2082
+ */
2083
+ private streamAndCollect;
2084
+ /**
2085
+ * Analyze an image with a vision-capable model.
2086
+ * Returns the analysis as a string.
2087
+ *
2088
+ * @param options - Vision analysis options
2089
+ * @returns Promise resolving to the analysis text
2090
+ * @throws Error if the image format is unsupported or model doesn't support vision
2091
+ *
2092
+ * @example
2093
+ * ```typescript
2094
+ * // From file
2095
+ * const result = await llmist.vision.analyze({
2096
+ * model: "gpt-4o",
2097
+ * image: await fs.readFile("photo.jpg"),
2098
+ * prompt: "What's in this image?",
2099
+ * });
2100
+ *
2101
+ * // From URL (OpenAI only)
2102
+ * const result = await llmist.vision.analyze({
2103
+ * model: "gpt-4o",
2104
+ * image: "https://example.com/image.jpg",
2105
+ * prompt: "Describe this image",
2106
+ * });
2107
+ * ```
2108
+ */
2109
+ analyze(options: VisionAnalyzeOptions): Promise<string>;
2110
+ /**
2111
+ * Analyze an image and return detailed result with usage info.
2112
+ *
2113
+ * @param options - Vision analysis options
2114
+ * @returns Promise resolving to the analysis result with usage info
2115
+ */
2116
+ analyzeWithUsage(options: VisionAnalyzeOptions): Promise<VisionAnalyzeResult>;
2117
+ /**
2118
+ * Check if a model supports vision/image input.
2119
+ *
2120
+ * @param modelId - Model ID to check
2121
+ * @returns True if the model supports vision
2122
+ */
2123
+ supportsModel(modelId: string): boolean;
2124
+ /**
2125
+ * List all models that support vision.
2126
+ *
2127
+ * @returns Array of model IDs that support vision
2128
+ */
2129
+ listModels(): string[];
1286
2130
  }
1287
2131
 
1288
2132
  interface LLMistOptions {
@@ -1326,8 +2170,13 @@ interface LLMistOptions {
1326
2170
  }
1327
2171
  declare class LLMist {
1328
2172
  private readonly parser;
2173
+ private readonly defaultProvider;
1329
2174
  readonly modelRegistry: ModelRegistry;
1330
2175
  private readonly adapters;
2176
+ readonly text: TextNamespace;
2177
+ readonly image: ImageNamespace;
2178
+ readonly speech: SpeechNamespace;
2179
+ readonly vision: VisionNamespace;
1331
2180
  constructor();
1332
2181
  constructor(adapters: ProviderAdapter[]);
1333
2182
  constructor(adapters: ProviderAdapter[], defaultProvider: string);
@@ -1555,12 +2404,15 @@ interface EventHandlers {
1555
2404
  /** Called when a gadget is about to be executed */
1556
2405
  onGadgetCall?: (call: {
1557
2406
  gadgetName: string;
2407
+ invocationId: string;
1558
2408
  parameters?: Record<string, unknown>;
1559
2409
  parametersRaw: string;
2410
+ dependencies: string[];
1560
2411
  }) => void | Promise<void>;
1561
2412
  /** Called when a gadget execution completes */
1562
2413
  onGadgetResult?: (result: {
1563
2414
  gadgetName: string;
2415
+ invocationId: string;
1564
2416
  result?: string;
1565
2417
  error?: string;
1566
2418
  parameters: Record<string, unknown>;
@@ -1839,6 +2691,21 @@ interface ObserveGadgetCompleteContext {
1839
2691
  cost?: number;
1840
2692
  logger: Logger<ILogObj>;
1841
2693
  }
2694
+ /**
2695
+ * Context provided when a gadget is skipped due to a failed dependency.
2696
+ * Read-only observation point.
2697
+ */
2698
+ interface ObserveGadgetSkippedContext {
2699
+ iteration: number;
2700
+ gadgetName: string;
2701
+ invocationId: string;
2702
+ parameters: Readonly<Record<string, unknown>>;
2703
+ /** The invocation ID of the dependency that failed */
2704
+ failedDependency: string;
2705
+ /** The error message from the failed dependency */
2706
+ failedDependencyError: string;
2707
+ logger: Logger<ILogObj>;
2708
+ }
1842
2709
  /**
1843
2710
  * Context provided for each stream chunk.
1844
2711
  * Read-only observation point.
@@ -1872,6 +2739,8 @@ interface Observers {
1872
2739
  onGadgetExecutionStart?: (context: ObserveGadgetStartContext) => void | Promise<void>;
1873
2740
  /** Called when a gadget execution completes (success or error) */
1874
2741
  onGadgetExecutionComplete?: (context: ObserveGadgetCompleteContext) => void | Promise<void>;
2742
+ /** Called when a gadget is skipped due to a failed dependency */
2743
+ onGadgetSkipped?: (context: ObserveGadgetSkippedContext) => void | Promise<void>;
1875
2744
  /** Called for each stream chunk */
1876
2745
  onStreamChunk?: (context: ObserveChunkContext) => void | Promise<void>;
1877
2746
  /** Called when context compaction occurs */
@@ -2111,6 +2980,39 @@ type AfterGadgetExecutionAction = {
2111
2980
  action: "recover";
2112
2981
  fallbackResult: string;
2113
2982
  };
2983
+ /**
2984
+ * Context for dependency skip controller.
2985
+ * Called when a gadget would be skipped due to a failed dependency.
2986
+ */
2987
+ interface DependencySkipControllerContext {
2988
+ iteration: number;
2989
+ gadgetName: string;
2990
+ invocationId: string;
2991
+ /** Parameters of the gadget that would be skipped */
2992
+ parameters: Record<string, unknown>;
2993
+ /** The invocation ID of the dependency that failed */
2994
+ failedDependency: string;
2995
+ /** The error message from the failed dependency */
2996
+ failedDependencyError: string;
2997
+ logger: Logger<ILogObj>;
2998
+ }
2999
+ /**
3000
+ * Action returned by onDependencySkipped controller.
3001
+ */
3002
+ type DependencySkipAction =
3003
+ /** Skip execution and propagate failure to downstream dependents */
3004
+ {
3005
+ action: "skip";
3006
+ }
3007
+ /** Execute the gadget anyway despite the failed dependency */
3008
+ | {
3009
+ action: "execute_anyway";
3010
+ }
3011
+ /** Skip execution but provide a fallback result (doesn't propagate failure) */
3012
+ | {
3013
+ action: "use_fallback";
3014
+ fallbackResult: string;
3015
+ };
2114
3016
  /**
2115
3017
  * Controllers: Async lifecycle hooks that control execution flow.
2116
3018
  * - Can short-circuit execution
@@ -2143,6 +3045,11 @@ interface Controllers {
2143
3045
  * Can provide a fallback result to recover from errors.
2144
3046
  */
2145
3047
  afterGadgetExecution?: (context: AfterGadgetExecutionControllerContext) => Promise<AfterGadgetExecutionAction>;
3048
+ /**
3049
+ * Called before skipping a gadget due to a failed dependency.
3050
+ * Can override the default skip behavior to execute anyway or provide a fallback.
3051
+ */
3052
+ onDependencySkipped?: (context: DependencySkipControllerContext) => Promise<DependencySkipAction>;
2146
3053
  }
2147
3054
  /**
2148
3055
  * Clean hooks system with three distinct categories:
@@ -2176,8 +3083,8 @@ interface AgentOptions {
2176
3083
  model: string;
2177
3084
  /** System prompt */
2178
3085
  systemPrompt?: string;
2179
- /** Initial user prompt (optional if using build()) */
2180
- userPrompt?: string;
3086
+ /** Initial user prompt (optional if using build()). Can be text or multimodal content. */
3087
+ userPrompt?: string | ContentPart[];
2181
3088
  /** Maximum iterations */
2182
3089
  maxIterations?: number;
2183
3090
  /** Temperature */
@@ -2196,10 +3103,10 @@ interface AgentOptions {
2196
3103
  gadgetEndPrefix?: string;
2197
3104
  /** Custom gadget argument prefix for block format parameters */
2198
3105
  gadgetArgPrefix?: string;
2199
- /** Initial messages */
3106
+ /** Initial messages. User messages support multimodal content. */
2200
3107
  initialMessages?: Array<{
2201
3108
  role: "system" | "user" | "assistant";
2202
- content: string;
3109
+ content: MessageContent;
2203
3110
  }>;
2204
3111
  /** Text-only handler */
2205
3112
  textOnlyHandler?: TextOnlyHandler;
@@ -2408,9 +3315,10 @@ declare class Agent {
2408
3315
 
2409
3316
  /**
2410
3317
  * Message for conversation history.
3318
+ * User messages can be text (string) or multimodal (ContentPart[]).
2411
3319
  */
2412
3320
  type HistoryMessage = {
2413
- user: string;
3321
+ user: string | ContentPart[];
2414
3322
  } | {
2415
3323
  assistant: string;
2416
3324
  } | {
@@ -2929,7 +3837,62 @@ declare class AgentBuilder {
2929
3837
  * }
2930
3838
  * ```
2931
3839
  */
3840
+ /**
3841
+ * Build AgentOptions with the given user prompt.
3842
+ * Centralizes options construction for ask(), askWithImage(), and askWithContent().
3843
+ */
3844
+ private buildAgentOptions;
2932
3845
  ask(userPrompt: string): Agent;
3846
+ /**
3847
+ * Build and create the agent with a multimodal user prompt (text + image).
3848
+ * Returns the Agent instance ready to run.
3849
+ *
3850
+ * @param textPrompt - Text prompt describing what to do with the image
3851
+ * @param imageData - Image data (Buffer, Uint8Array, or base64 string)
3852
+ * @param mimeType - Optional MIME type (auto-detected if not provided)
3853
+ * @returns Configured Agent instance
3854
+ *
3855
+ * @example
3856
+ * ```typescript
3857
+ * const agent = LLMist.createAgent()
3858
+ * .withModel("gpt-4o")
3859
+ * .withSystem("You analyze images")
3860
+ * .askWithImage(
3861
+ * "What's in this image?",
3862
+ * await fs.readFile("photo.jpg")
3863
+ * );
3864
+ *
3865
+ * for await (const event of agent.run()) {
3866
+ * // handle events
3867
+ * }
3868
+ * ```
3869
+ */
3870
+ askWithImage(textPrompt: string, imageData: Buffer | Uint8Array | string, mimeType?: ImageMimeType): Agent;
3871
+ /**
3872
+ * Build and return an Agent configured with multimodal content.
3873
+ * More flexible than askWithImage - accepts any combination of content parts.
3874
+ *
3875
+ * @param content - Array of content parts (text, images, audio)
3876
+ * @returns A configured Agent ready for execution
3877
+ *
3878
+ * @example
3879
+ * ```typescript
3880
+ * import { text, imageFromBuffer, audioFromBuffer } from "llmist";
3881
+ *
3882
+ * const agent = LLMist.createAgent()
3883
+ * .withModel("gemini:gemini-2.5-flash")
3884
+ * .askWithContent([
3885
+ * text("Describe this image and transcribe the audio:"),
3886
+ * imageFromBuffer(imageData),
3887
+ * audioFromBuffer(audioData),
3888
+ * ]);
3889
+ *
3890
+ * for await (const event of agent.run()) {
3891
+ * // handle events
3892
+ * }
3893
+ * ```
3894
+ */
3895
+ askWithContent(content: ContentPart[]): Agent;
2933
3896
  /**
2934
3897
  * Build, run, and collect only the text response.
2935
3898
  * Convenient for simple queries where you just want the final answer.
@@ -3009,8 +3972,9 @@ declare class AgentBuilder {
3009
3972
  interface IConversationManager {
3010
3973
  /**
3011
3974
  * Adds a user message to the conversation.
3975
+ * Supports multimodal content (text + images/audio).
3012
3976
  */
3013
- addUserMessage(content: string): void;
3977
+ addUserMessage(content: MessageContent): void;
3014
3978
  /**
3015
3979
  * Adds an assistant message to the conversation.
3016
3980
  */
@@ -3078,6 +4042,26 @@ interface MockMatcherContext {
3078
4042
  * const matcher: MockMatcher = (ctx) => ctx.provider === 'anthropic';
3079
4043
  */
3080
4044
  type MockMatcher = (context: MockMatcherContext) => boolean | Promise<boolean>;
4045
+ /**
4046
+ * Image data in a mock response.
4047
+ */
4048
+ interface MockImageData {
4049
+ /** Base64-encoded image data */
4050
+ data: string;
4051
+ /** MIME type of the image */
4052
+ mimeType: ImageMimeType;
4053
+ /** Revised prompt (for image generation responses) */
4054
+ revisedPrompt?: string;
4055
+ }
4056
+ /**
4057
+ * Audio data in a mock response.
4058
+ */
4059
+ interface MockAudioData {
4060
+ /** Base64-encoded audio data */
4061
+ data: string;
4062
+ /** MIME type of the audio */
4063
+ mimeType: AudioMimeType;
4064
+ }
3081
4065
  /**
3082
4066
  * A mock response that will be returned when a matcher succeeds.
3083
4067
  */
@@ -3097,6 +4081,16 @@ interface MockResponse {
3097
4081
  /** Optional invocationId, will be auto-generated if not provided */
3098
4082
  invocationId?: string;
3099
4083
  }>;
4084
+ /**
4085
+ * Image data to return in the response (e.g., for image generation mocks).
4086
+ * Each image will be yielded as a separate chunk in the stream.
4087
+ */
4088
+ images?: MockImageData[];
4089
+ /**
4090
+ * Audio data to return in the response (e.g., for speech synthesis mocks).
4091
+ * Will be yielded as a chunk in the stream.
4092
+ */
4093
+ audio?: MockAudioData;
3100
4094
  /**
3101
4095
  * Simulated token usage statistics
3102
4096
  */
@@ -3203,9 +4197,58 @@ declare class MockProviderAdapter implements ProviderAdapter {
3203
4197
  readonly priority = 100;
3204
4198
  private readonly mockManager;
3205
4199
  constructor(options?: MockOptions);
3206
- supports(descriptor: ModelDescriptor): boolean;
3207
- stream(options: LLMGenerationOptions, descriptor: ModelDescriptor, spec?: unknown): LLMStream;
4200
+ supports(_descriptor: ModelDescriptor): boolean;
4201
+ stream(options: LLMGenerationOptions, descriptor: ModelDescriptor, _spec?: unknown): LLMStream;
3208
4202
  private createMockStreamFromContext;
4203
+ /**
4204
+ * Check if this adapter supports image generation for a given model.
4205
+ * Returns true if there's a registered mock with images for this model.
4206
+ */
4207
+ supportsImageGeneration(_modelId: string): boolean;
4208
+ /**
4209
+ * Generate mock images based on registered mocks.
4210
+ *
4211
+ * @param options - Image generation options
4212
+ * @returns Mock image generation result
4213
+ */
4214
+ generateImage(options: ImageGenerationOptions): Promise<ImageGenerationResult>;
4215
+ /**
4216
+ * Transform mock response into ImageGenerationResult format.
4217
+ *
4218
+ * @param options - Original image generation options
4219
+ * @param mockResponse - Mock response containing image data
4220
+ * @returns ImageGenerationResult with mock data and zero cost
4221
+ */
4222
+ private createImageResult;
4223
+ /**
4224
+ * Check if this adapter supports speech generation for a given model.
4225
+ * Returns true if there's a registered mock with audio for this model.
4226
+ */
4227
+ supportsSpeechGeneration(_modelId: string): boolean;
4228
+ /**
4229
+ * Generate mock speech based on registered mocks.
4230
+ *
4231
+ * @param options - Speech generation options
4232
+ * @returns Mock speech generation result
4233
+ */
4234
+ generateSpeech(options: SpeechGenerationOptions): Promise<SpeechGenerationResult>;
4235
+ /**
4236
+ * Transform mock response into SpeechGenerationResult format.
4237
+ * Converts base64 audio data to ArrayBuffer.
4238
+ *
4239
+ * @param options - Original speech generation options
4240
+ * @param mockResponse - Mock response containing audio data
4241
+ * @returns SpeechGenerationResult with mock data and zero cost
4242
+ */
4243
+ private createSpeechResult;
4244
+ /**
4245
+ * Map MIME type to audio format for SpeechGenerationResult.
4246
+ * Defaults to "mp3" for unknown MIME types.
4247
+ *
4248
+ * @param mimeType - Audio MIME type string
4249
+ * @returns Audio format identifier
4250
+ */
4251
+ private mimeTypeToAudioFormat;
3209
4252
  }
3210
4253
  /**
3211
4254
  * Create a mock provider adapter instance.
@@ -3336,6 +4379,27 @@ declare class MockBuilder {
3336
4379
  * })
3337
4380
  */
3338
4381
  when(matcher: MockMatcher): this;
4382
+ /**
4383
+ * Match when any message contains an image.
4384
+ *
4385
+ * @example
4386
+ * mockLLM().whenMessageHasImage().returns("I see an image of a sunset.")
4387
+ */
4388
+ whenMessageHasImage(): this;
4389
+ /**
4390
+ * Match when any message contains audio.
4391
+ *
4392
+ * @example
4393
+ * mockLLM().whenMessageHasAudio().returns("I hear music playing.")
4394
+ */
4395
+ whenMessageHasAudio(): this;
4396
+ /**
4397
+ * Match based on the number of images in the last message.
4398
+ *
4399
+ * @example
4400
+ * mockLLM().whenImageCount((n) => n >= 2).returns("Comparing multiple images...")
4401
+ */
4402
+ whenImageCount(predicate: (count: number) => boolean): this;
3339
4403
  /**
3340
4404
  * Set the text response to return.
3341
4405
  * Can be a static string or a function that returns a string dynamically.
@@ -3368,6 +4432,51 @@ declare class MockBuilder {
3368
4432
  * .returnsGadgetCall('logger', { message: 'Done!' })
3369
4433
  */
3370
4434
  returnsGadgetCall(gadgetName: string, parameters: Record<string, unknown>): this;
4435
+ /**
4436
+ * Return a single image in the response.
4437
+ * Useful for mocking image generation endpoints.
4438
+ *
4439
+ * @param data - Image data (base64 string or Buffer)
4440
+ * @param mimeType - MIME type (auto-detected if Buffer provided without type)
4441
+ *
4442
+ * @example
4443
+ * mockLLM()
4444
+ * .forModel('dall-e-3')
4445
+ * .returnsImage(pngBuffer)
4446
+ * .register();
4447
+ */
4448
+ returnsImage(data: string | Buffer | Uint8Array, mimeType?: ImageMimeType): this;
4449
+ /**
4450
+ * Return multiple images in the response.
4451
+ *
4452
+ * @example
4453
+ * mockLLM()
4454
+ * .forModel('dall-e-3')
4455
+ * .returnsImages([
4456
+ * { data: pngBuffer1 },
4457
+ * { data: pngBuffer2 },
4458
+ * ])
4459
+ * .register();
4460
+ */
4461
+ returnsImages(images: Array<{
4462
+ data: string | Buffer | Uint8Array;
4463
+ mimeType?: ImageMimeType;
4464
+ revisedPrompt?: string;
4465
+ }>): this;
4466
+ /**
4467
+ * Return audio data in the response.
4468
+ * Useful for mocking speech synthesis endpoints.
4469
+ *
4470
+ * @param data - Audio data (base64 string or Buffer)
4471
+ * @param mimeType - MIME type (auto-detected if Buffer provided without type)
4472
+ *
4473
+ * @example
4474
+ * mockLLM()
4475
+ * .forModel('tts-1')
4476
+ * .returnsAudio(mp3Buffer)
4477
+ * .register();
4478
+ */
4479
+ returnsAudio(data: string | Buffer | Uint8Array, mimeType?: AudioMimeType): this;
3371
4480
  /**
3372
4481
  * Set the complete mock response object.
3373
4482
  * This allows full control over all response properties.
@@ -3609,4 +4718,4 @@ declare function createTextMockStream(text: string, options?: {
3609
4718
  usage?: MockResponse["usage"];
3610
4719
  }): LLMStream;
3611
4720
 
3612
- export { type AfterLLMCallAction as $, type AgentHooks as A, BaseGadget as B, type CompactionStrategy as C, type ProviderAdapter as D, type ExecutionContext as E, type ModelDescriptor as F, GadgetRegistry as G, type HintTemplate as H, type IConversationManager as I, type ModelSpec as J, type LLMGenerationOptions as K, type LLMStream as L, MockProviderAdapter as M, type HistoryMessage as N, type TrailingMessage as O, type ParsedGadgetCall as P, type TrailingMessageContext as Q, type ResolvedCompactionConfig as R, type StreamEvent as S, type TokenUsage as T, AgentBuilder as U, type EventHandlers as V, collectEvents as W, collectText as X, runWithHandlers as Y, type AfterGadgetExecutionAction as Z, type AfterGadgetExecutionControllerContext as _, type LLMStreamChunk as a, type AfterLLMCallControllerContext as a0, type AfterLLMErrorAction as a1, type AgentOptions as a2, type BeforeGadgetExecutionAction as a3, type BeforeLLMCallAction as a4, type ChunkInterceptorContext as a5, type Controllers as a6, type GadgetExecutionControllerContext as a7, type GadgetParameterInterceptorContext as a8, type GadgetResultInterceptorContext as a9, type PromptContext as aA, type PromptTemplate as aB, DEFAULT_HINTS as aC, DEFAULT_PROMPTS as aD, resolveHintTemplate as aE, resolvePromptTemplate as aF, resolveRulesTemplate as aG, type QuickOptions as aH, complete as aI, stream as aJ, type GadgetClass as aK, type GadgetOrClass as aL, type CostReportingLLMist as aM, type GadgetExecuteResult as aN, type TextOnlyAction as aO, type TextOnlyContext as aP, type TextOnlyCustomHandler as aQ, type TextOnlyGadgetConfig as aR, type TextOnlyHandler as aS, type TextOnlyStrategy as aT, type Interceptors as aa, type LLMCallControllerContext as ab, type LLMErrorControllerContext as ac, type MessageInterceptorContext as ad, type ObserveChunkContext as ae, type ObserveGadgetCompleteContext as af, type ObserveGadgetStartContext as ag, type ObserveLLMCallContext as ah, type ObserveLLMCompleteContext as ai, type ObserveLLMErrorContext as aj, type Observers as ak, type MessageTurn as al, type ObserveCompactionContext as am, DEFAULT_COMPACTION_CONFIG as an, DEFAULT_SUMMARIZATION_PROMPT as ao, type LLMistOptions as ap, type LLMRole as aq, LLMMessageBuilder as ar, type CostEstimate as as, type ModelFeatures as at, type ModelLimits as au, type ModelPricing as av, type ProviderIdentifier as aw, ModelIdentifierParser as ax, type HintContext as ay, type PromptConfig as az, type LLMMessage as b, createMockAdapter as c, MockBuilder as d, createMockClient as e, MockManager as f, getMockManager as g, createMockStream as h, createTextMockStream as i, type MockMatcher as j, type MockMatcherContext as k, type MockOptions as l, mockLLM as m, type MockRegistration as n, type MockResponse as o, type MockStats as p, ModelRegistry as q, LLMist as r, type CompactionContext as s, type CompactionResult as t, type CompactionConfig as u, type CompactionEvent as v, type CompactionStats as w, type GadgetExecuteReturn as x, type GadgetExample as y, type GadgetExecutionResult as z };
4721
+ export { type TrailingMessage as $, type AgentHooks as A, BaseGadget as B, type CompactionStrategy as C, type GadgetExecuteReturn as D, type ExecutionContext as E, type GadgetExample as F, GadgetRegistry as G, type HintTemplate as H, type IConversationManager as I, type GadgetExecutionResult as J, type ProviderAdapter as K, type LLMStream as L, MockProviderAdapter as M, type ModelDescriptor as N, type ModelSpec as O, type ParsedGadgetCall as P, type LLMGenerationOptions as Q, type ResolvedCompactionConfig as R, type StreamEvent as S, type TokenUsage as T, type ImageModelSpec as U, type ImageGenerationOptions as V, type ImageGenerationResult as W, type SpeechModelSpec as X, type SpeechGenerationOptions as Y, type SpeechGenerationResult as Z, type HistoryMessage as _, type LLMStreamChunk as a, type VisionAnalyzeOptions as a$, type TrailingMessageContext as a0, AgentBuilder as a1, type EventHandlers as a2, collectEvents as a3, collectText as a4, runWithHandlers as a5, type AfterGadgetExecutionAction as a6, type AfterGadgetExecutionControllerContext as a7, type AfterLLMCallAction as a8, type AfterLLMCallControllerContext as a9, type AudioMimeType as aA, type AudioSource as aB, type ContentPart as aC, type ImageBase64Source as aD, type ImageContentPart as aE, type ImageMimeType as aF, type ImageSource as aG, type ImageUrlSource as aH, type TextContentPart as aI, audioFromBase64 as aJ, audioFromBuffer as aK, detectAudioMimeType as aL, detectImageMimeType as aM, imageFromBase64 as aN, imageFromBuffer as aO, imageFromUrl as aP, isAudioPart as aQ, isDataUrl as aR, isImagePart as aS, isTextPart as aT, parseDataUrl as aU, text as aV, toBase64 as aW, type LLMRole as aX, extractText as aY, LLMMessageBuilder as aZ, normalizeContent as a_, type AfterLLMErrorAction as aa, type AgentOptions as ab, type BeforeGadgetExecutionAction as ac, type BeforeLLMCallAction as ad, type ChunkInterceptorContext as ae, type Controllers as af, type GadgetExecutionControllerContext as ag, type GadgetParameterInterceptorContext as ah, type GadgetResultInterceptorContext as ai, type Interceptors as aj, type LLMCallControllerContext as ak, type LLMErrorControllerContext as al, type MessageInterceptorContext as am, type ObserveChunkContext as an, type ObserveGadgetCompleteContext as ao, type ObserveGadgetStartContext as ap, type ObserveLLMCallContext as aq, type ObserveLLMCompleteContext as ar, type ObserveLLMErrorContext as as, type Observers as at, type MessageTurn as au, type ObserveCompactionContext as av, DEFAULT_COMPACTION_CONFIG as aw, DEFAULT_SUMMARIZATION_PROMPT as ax, type LLMistOptions as ay, type AudioContentPart as az, type LLMMessage as b, type VisionAnalyzeResult as b0, type CostEstimate as b1, type ModelFeatures as b2, type ModelLimits as b3, type ModelPricing as b4, type ProviderIdentifier as b5, ModelIdentifierParser as b6, type HintContext as b7, type PromptConfig as b8, type PromptContext as b9, type PromptTemplate as ba, DEFAULT_HINTS as bb, DEFAULT_PROMPTS as bc, resolveHintTemplate as bd, resolvePromptTemplate as be, resolveRulesTemplate as bf, type QuickOptions as bg, complete as bh, stream as bi, type GadgetClass as bj, type GadgetOrClass as bk, type CostReportingLLMist as bl, type GadgetExecuteResult as bm, type GadgetSkippedEvent as bn, type TextOnlyAction as bo, type TextOnlyContext as bp, type TextOnlyCustomHandler as bq, type TextOnlyGadgetConfig as br, type TextOnlyHandler as bs, type TextOnlyStrategy as bt, createMockAdapter as c, MockBuilder as d, createMockClient as e, MockManager as f, getMockManager as g, createMockStream as h, createTextMockStream as i, type MockAudioData as j, type MockImageData as k, type MockMatcher as l, mockLLM as m, type MockMatcherContext as n, type MockOptions as o, type MockRegistration as p, type MockResponse as q, type MockStats as r, ModelRegistry as s, type MessageContent as t, LLMist as u, type CompactionContext as v, type CompactionResult as w, type CompactionConfig as x, type CompactionEvent as y, type CompactionStats as z };