npm - llmist - Versions diffs - 2.4.0 → 2.5.0 - Mend

llmist 2.4.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/README.md +7 -0
package/dist/{chunk-QFRVTS5F.js → chunk-IHSZUAYN.js} +4 -2
package/dist/chunk-IHSZUAYN.js.map +1 -0
package/dist/{chunk-6ZDUWO6N.js → chunk-YHS2DYXP.js} +1781 -528
package/dist/chunk-YHS2DYXP.js.map +1 -0
package/dist/cli.cjs +1218 -151
package/dist/cli.cjs.map +1 -1
package/dist/cli.js +172 -26
package/dist/cli.js.map +1 -1
package/dist/index.cjs +1393 -124
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +93 -20
package/dist/index.d.ts +93 -20
package/dist/index.js +34 -2
package/dist/{mock-stream-BQcC2VCP.d.cts → mock-stream-ga4KIiwX.d.cts} +714 -12
package/dist/{mock-stream-BQcC2VCP.d.ts → mock-stream-ga4KIiwX.d.ts} +714 -12
package/dist/testing/index.cjs +1713 -508
package/dist/testing/index.cjs.map +1 -1
package/dist/testing/index.d.cts +2 -2
package/dist/testing/index.d.ts +2 -2
package/dist/testing/index.js +1 -1
package/package.json +1 -1
package/dist/chunk-6ZDUWO6N.js.map +0 -1
package/dist/chunk-QFRVTS5F.js.map +0 -1

package/dist/{mock-stream-BQcC2VCP.d.cts → mock-stream-ga4KIiwX.d.cts} RENAMED Viewed

@@ -861,8 +861,21 @@ interface ParsedGadgetCall {
     parametersRaw: string;
     parameters?: Record<string, unknown>;
     parseError?: string;
+    /** List of invocation IDs this gadget depends on. Empty array if no dependencies. */
+    dependencies: string[];
 }
+/** Event emitted when a gadget is skipped due to a failed dependency */
+interface GadgetSkippedEvent {
+    type: "gadget_skipped";
+    gadgetName: string;
+    invocationId: string;
+    parameters: Record<string, unknown>;
+    /** The invocation ID of the dependency that failed */
+    failedDependency: string;
+    /** The error message from the failed dependency */
+    failedDependencyError: string;
+}
 type StreamEvent = {
     type: "text";
     content: string;
@@ -872,7 +885,7 @@ type StreamEvent = {
 } | {
     type: "gadget_result";
     result: GadgetExecutionResult;
-} | {
+} | GadgetSkippedEvent | {
     type: "human_input_required";
     question: string;
     gadgetName: string;
@@ -1321,6 +1334,215 @@ declare abstract class BaseGadget {
     }): string;
 }
+/**
+ * Types and interfaces for multimodal input content.
+ *
+ * These types define the structure for sending images, audio, and other
+ * media alongside text in LLM messages. They complement the output types
+ * in media-types.ts.
+ */
+/**
+ * Supported image MIME types for input.
+ * All major providers support these formats.
+ */
+type ImageMimeType = "image/jpeg" | "image/png" | "image/gif" | "image/webp";
+/**
+ * Supported audio MIME types for input.
+ * Currently only Gemini supports audio input.
+ */
+type AudioMimeType = "audio/mp3" | "audio/mpeg" | "audio/wav" | "audio/webm" | "audio/ogg";
+/**
+ * Base interface for all content parts.
+ */
+interface BaseContentPart {
+    type: string;
+}
+/**
+ * Text content part.
+ */
+interface TextContentPart extends BaseContentPart {
+    type: "text";
+    text: string;
+}
+/**
+ * Image content part.
+ */
+interface ImageContentPart extends BaseContentPart {
+    type: "image";
+    source: ImageSource;
+}
+/**
+ * Audio content part.
+ * Currently only supported by Gemini.
+ */
+interface AudioContentPart extends BaseContentPart {
+    type: "audio";
+    source: AudioSource;
+}
+/**
+ * Union of all supported content part types.
+ */
+type ContentPart = TextContentPart | ImageContentPart | AudioContentPart;
+/**
+ * Image can come from base64 data or a URL.
+ */
+type ImageSource = ImageBase64Source | ImageUrlSource;
+/**
+ * Base64-encoded image data.
+ * Supported by all providers.
+ */
+interface ImageBase64Source {
+    type: "base64";
+    mediaType: ImageMimeType;
+    data: string;
+}
+/**
+ * Image URL reference.
+ * Only supported by OpenAI.
+ */
+interface ImageUrlSource {
+    type: "url";
+    url: string;
+}
+/**
+ * Audio source (base64 only).
+ * URL sources are not currently supported for audio.
+ */
+interface AudioSource {
+    type: "base64";
+    mediaType: AudioMimeType;
+    data: string;
+}
+/**
+ * Check if a content part is a text part.
+ */
+declare function isTextPart(part: ContentPart): part is TextContentPart;
+/**
+ * Check if a content part is an image part.
+ */
+declare function isImagePart(part: ContentPart): part is ImageContentPart;
+/**
+ * Check if a content part is an audio part.
+ */
+declare function isAudioPart(part: ContentPart): part is AudioContentPart;
+/**
+ * Create a text content part.
+ *
+ * @example
+ * ```typescript
+ * const part = text("What's in this image?");
+ * ```
+ */
+declare function text(content: string): TextContentPart;
+/**
+ * Create an image content part from base64-encoded data.
+ *
+ * @param data - Base64-encoded image data
+ * @param mediaType - MIME type of the image
+ *
+ * @example
+ * ```typescript
+ * const part = imageFromBase64(base64Data, "image/jpeg");
+ * ```
+ */
+declare function imageFromBase64(data: string, mediaType: ImageMimeType): ImageContentPart;
+/**
+ * Create an image content part from a URL.
+ * Note: Only supported by OpenAI.
+ *
+ * @param url - URL to the image (must be accessible)
+ *
+ * @example
+ * ```typescript
+ * const part = imageFromUrl("https://example.com/image.jpg");
+ * ```
+ */
+declare function imageFromUrl(url: string): ImageContentPart;
+/**
+ * Detect the MIME type of image data from magic bytes.
+ *
+ * @param data - Raw image data
+ * @returns Detected MIME type or null if unknown
+ */
+declare function detectImageMimeType(data: Buffer | Uint8Array): ImageMimeType | null;
+/**
+ * Detect the MIME type of audio data from magic bytes.
+ *
+ * @param data - Raw audio data
+ * @returns Detected MIME type or null if unknown
+ */
+declare function detectAudioMimeType(data: Buffer | Uint8Array): AudioMimeType | null;
+/**
+ * Convert data to base64 string.
+ *
+ * @param data - Data to encode (Buffer, Uint8Array, or already base64 string)
+ * @returns Base64-encoded string
+ */
+declare function toBase64(data: Buffer | Uint8Array | string): string;
+/**
+ * Create an image content part from a Buffer or Uint8Array.
+ * Automatically detects the MIME type if not provided.
+ *
+ * @param buffer - Image data
+ * @param mediaType - Optional MIME type (auto-detected if not provided)
+ *
+ * @example
+ * ```typescript
+ * const imageData = await fs.readFile("photo.jpg");
+ * const part = imageFromBuffer(imageData); // Auto-detects JPEG
+ * ```
+ */
+declare function imageFromBuffer(buffer: Buffer | Uint8Array, mediaType?: ImageMimeType): ImageContentPart;
+/**
+ * Create an audio content part from base64-encoded data.
+ *
+ * @param data - Base64-encoded audio data
+ * @param mediaType - MIME type of the audio
+ *
+ * @example
+ * ```typescript
+ * const part = audioFromBase64(base64Audio, "audio/mp3");
+ * ```
+ */
+declare function audioFromBase64(data: string, mediaType: AudioMimeType): AudioContentPart;
+/**
+ * Create an audio content part from a Buffer or Uint8Array.
+ * Automatically detects the MIME type if not provided.
+ *
+ * @param buffer - Audio data
+ * @param mediaType - Optional MIME type (auto-detected if not provided)
+ *
+ * @example
+ * ```typescript
+ * const audioData = await fs.readFile("audio.mp3");
+ * const part = audioFromBuffer(audioData); // Auto-detects MP3
+ * ```
+ */
+declare function audioFromBuffer(buffer: Buffer | Uint8Array, mediaType?: AudioMimeType): AudioContentPart;
+/**
+ * Check if a string is a data URL.
+ *
+ * @param input - String to check
+ * @returns True if it's a data URL
+ */
+declare function isDataUrl(input: string): boolean;
+/**
+ * Parse a data URL into its components.
+ *
+ * @param url - Data URL to parse
+ * @returns Parsed components or null if invalid
+ *
+ * @example
+ * ```typescript
+ * const result = parseDataUrl("data:image/jpeg;base64,/9j/4AAQ...");
+ * // { mimeType: "image/jpeg", data: "/9j/4AAQ..." }
+ * ```
+ */
+declare function parseDataUrl(url: string): {
+    mimeType: string;
+    data: string;
+} | null;
 /**
  * Context provided to prompt template functions for rendering dynamic content.
  */
@@ -1454,12 +1676,33 @@ declare function resolveRulesTemplate(rules: PromptConfig["rules"] | undefined,
 declare function resolveHintTemplate(template: HintTemplate | undefined, defaultValue: string, context: HintContext): string;
 type LLMRole = "system" | "user" | "assistant";
+/**
+ * Message content can be a simple string (text only) or an array of content parts (multimodal).
+ * Using a string is simpler for text-only messages, while arrays support images and audio.
+ */
+type MessageContent = string | ContentPart[];
 interface LLMMessage {
     role: LLMRole;
-    content: string;
+    content: MessageContent;
     name?: string;
     metadata?: Record<string, unknown>;
 }
+/**
+ * Normalize message content to an array of content parts.
+ * Converts string content to a single text part.
+ *
+ * @param content - Message content (string or ContentPart[])
+ * @returns Array of content parts
+ */
+declare function normalizeContent(content: MessageContent): ContentPart[];
+/**
+ * Extract text from message content.
+ * Concatenates all text parts in the content.
+ *
+ * @param content - Message content (string or ContentPart[])
+ * @returns Combined text from all text parts
+ */
+declare function extractText(content: MessageContent): string;
 declare class LLMMessageBuilder {
     private readonly messages;
     private startPrefix;
@@ -1482,8 +1725,92 @@ declare class LLMMessageBuilder {
     private buildUsageSection;
     private buildExamplesSection;
     private buildRulesSection;
-    addUser(content: string, metadata?: Record<string, unknown>): this;
+    /**
+     * Add a user message.
+     * Content can be a string (text only) or an array of content parts (multimodal).
+     *
+     * @param content - Message content
+     * @param metadata - Optional metadata
+     *
+     * @example
+     * ```typescript
+     * // Text only
+     * builder.addUser("Hello!");
+     *
+     * // Multimodal
+     * builder.addUser([
+     *   text("What's in this image?"),
+     *   imageFromBuffer(imageData),
+     * ]);
+     * ```
+     */
+    addUser(content: MessageContent, metadata?: Record<string, unknown>): this;
     addAssistant(content: string, metadata?: Record<string, unknown>): this;
+    /**
+     * Add a user message with an image attachment.
+     *
+     * @param textContent - Text prompt
+     * @param imageData - Image data (Buffer, Uint8Array, or base64 string)
+     * @param mimeType - Optional MIME type (auto-detected if not provided)
+     *
+     * @example
+     * ```typescript
+     * builder.addUserWithImage(
+     *   "What's in this image?",
+     *   await fs.readFile("photo.jpg"),
+     *   "image/jpeg"  // Optional - auto-detected
+     * );
+     * ```
+     */
+    addUserWithImage(textContent: string, imageData: Buffer | Uint8Array | string, mimeType?: ImageMimeType): this;
+    /**
+     * Add a user message with an image URL (OpenAI only).
+     *
+     * @param textContent - Text prompt
+     * @param imageUrl - URL to the image
+     *
+     * @example
+     * ```typescript
+     * builder.addUserWithImageUrl(
+     *   "What's in this image?",
+     *   "https://example.com/image.jpg"
+     * );
+     * ```
+     */
+    addUserWithImageUrl(textContent: string, imageUrl: string): this;
+    /**
+     * Add a user message with an audio attachment (Gemini only).
+     *
+     * @param textContent - Text prompt
+     * @param audioData - Audio data (Buffer, Uint8Array, or base64 string)
+     * @param mimeType - Optional MIME type (auto-detected if not provided)
+     *
+     * @example
+     * ```typescript
+     * builder.addUserWithAudio(
+     *   "Transcribe this audio",
+     *   await fs.readFile("recording.mp3"),
+     *   "audio/mp3"  // Optional - auto-detected
+     * );
+     * ```
+     */
+    addUserWithAudio(textContent: string, audioData: Buffer | Uint8Array | string, mimeType?: AudioMimeType): this;
+    /**
+     * Add a user message with multiple content parts.
+     * Provides full flexibility for complex multimodal messages.
+     *
+     * @param parts - Array of content parts
+     *
+     * @example
+     * ```typescript
+     * builder.addUserMultimodal([
+     *   text("Compare these images:"),
+     *   imageFromBuffer(image1),
+     *   imageFromBuffer(image2),
+     * ]);
+     * ```
+     */
+    addUserMultimodal(parts: ContentPart[]): this;
     addGadgetCall(gadget: string, parameters: Record<string, unknown>, result: string): this;
     /**
      * Format parameters as Block format with JSON Pointer paths.
@@ -1688,6 +2015,120 @@ declare class TextNamespace {
     stream(prompt: string, options?: QuickOptions): AsyncGenerator<string>;
 }
+/**
+ * Vision Analysis Namespace
+ *
+ * Provides one-shot image analysis without agent setup.
+ * Useful for quick image understanding tasks.
+ *
+ * @example
+ * ```typescript
+ * const llmist = new LLMist();
+ *
+ * const description = await llmist.vision.analyze({
+ *   model: "gpt-4o",
+ *   image: await readFile("photo.jpg"),
+ *   prompt: "Describe this image in detail",
+ * });
+ *
+ * console.log(description);
+ * ```
+ */
+/**
+ * Options for vision analysis.
+ */
+interface VisionAnalyzeOptions {
+    /** Model to use (must support vision, e.g., "gpt-4o", "claude-sonnet-4-20250514", "gemini-2.5-flash") */
+    model: string;
+    /** Image data: Buffer, Uint8Array, base64 string, data URL, or HTTPS URL */
+    image: string | Buffer | Uint8Array;
+    /** Analysis prompt describing what to do with the image */
+    prompt: string;
+    /** MIME type (auto-detected if not provided for Buffer/Uint8Array) */
+    mimeType?: ImageMimeType;
+    /** System prompt for analysis context */
+    systemPrompt?: string;
+    /** Max tokens for response */
+    maxTokens?: number;
+    /** Temperature (0-1) */
+    temperature?: number;
+}
+/**
+ * Result of vision analysis.
+ */
+interface VisionAnalyzeResult {
+    /** The analysis text */
+    text: string;
+    /** Model used */
+    model: string;
+    /** Token usage if available */
+    usage?: {
+        inputTokens: number;
+        outputTokens: number;
+        totalTokens: number;
+    };
+}
+declare class VisionNamespace {
+    private readonly client;
+    constructor(client: LLMist);
+    /**
+     * Build a message builder with the image content attached.
+     * Handles URLs, data URLs, base64 strings, and binary buffers.
+     */
+    private buildImageMessage;
+    /**
+     * Stream the response and collect text and usage information.
+     */
+    private streamAndCollect;
+    /**
+     * Analyze an image with a vision-capable model.
+     * Returns the analysis as a string.
+     *
+     * @param options - Vision analysis options
+     * @returns Promise resolving to the analysis text
+     * @throws Error if the image format is unsupported or model doesn't support vision
+     *
+     * @example
+     * ```typescript
+     * // From file
+     * const result = await llmist.vision.analyze({
+     *   model: "gpt-4o",
+     *   image: await fs.readFile("photo.jpg"),
+     *   prompt: "What's in this image?",
+     * });
+     *
+     * // From URL (OpenAI only)
+     * const result = await llmist.vision.analyze({
+     *   model: "gpt-4o",
+     *   image: "https://example.com/image.jpg",
+     *   prompt: "Describe this image",
+     * });
+     * ```
+     */
+    analyze(options: VisionAnalyzeOptions): Promise<string>;
+    /**
+     * Analyze an image and return detailed result with usage info.
+     *
+     * @param options - Vision analysis options
+     * @returns Promise resolving to the analysis result with usage info
+     */
+    analyzeWithUsage(options: VisionAnalyzeOptions): Promise<VisionAnalyzeResult>;
+    /**
+     * Check if a model supports vision/image input.
+     *
+     * @param modelId - Model ID to check
+     * @returns True if the model supports vision
+     */
+    supportsModel(modelId: string): boolean;
+    /**
+     * List all models that support vision.
+     *
+     * @returns Array of model IDs that support vision
+     */
+    listModels(): string[];
+}
 interface LLMistOptions {
     /**
      * Provider adapters to register manually.
@@ -1735,6 +2176,7 @@ declare class LLMist {
     readonly text: TextNamespace;
     readonly image: ImageNamespace;
     readonly speech: SpeechNamespace;
+    readonly vision: VisionNamespace;
     constructor();
     constructor(adapters: ProviderAdapter[]);
     constructor(adapters: ProviderAdapter[], defaultProvider: string);
@@ -1962,12 +2404,15 @@ interface EventHandlers {
     /** Called when a gadget is about to be executed */
     onGadgetCall?: (call: {
         gadgetName: string;
+        invocationId: string;
         parameters?: Record<string, unknown>;
         parametersRaw: string;
+        dependencies: string[];
     }) => void | Promise<void>;
     /** Called when a gadget execution completes */
     onGadgetResult?: (result: {
         gadgetName: string;
+        invocationId: string;
         result?: string;
         error?: string;
         parameters: Record<string, unknown>;
@@ -2246,6 +2691,21 @@ interface ObserveGadgetCompleteContext {
     cost?: number;
     logger: Logger<ILogObj>;
 }
+/**
+ * Context provided when a gadget is skipped due to a failed dependency.
+ * Read-only observation point.
+ */
+interface ObserveGadgetSkippedContext {
+    iteration: number;
+    gadgetName: string;
+    invocationId: string;
+    parameters: Readonly<Record<string, unknown>>;
+    /** The invocation ID of the dependency that failed */
+    failedDependency: string;
+    /** The error message from the failed dependency */
+    failedDependencyError: string;
+    logger: Logger<ILogObj>;
+}
 /**
  * Context provided for each stream chunk.
  * Read-only observation point.
@@ -2279,6 +2739,8 @@ interface Observers {
     onGadgetExecutionStart?: (context: ObserveGadgetStartContext) => void | Promise<void>;
     /** Called when a gadget execution completes (success or error) */
     onGadgetExecutionComplete?: (context: ObserveGadgetCompleteContext) => void | Promise<void>;
+    /** Called when a gadget is skipped due to a failed dependency */
+    onGadgetSkipped?: (context: ObserveGadgetSkippedContext) => void | Promise<void>;
     /** Called for each stream chunk */
     onStreamChunk?: (context: ObserveChunkContext) => void | Promise<void>;
     /** Called when context compaction occurs */
@@ -2518,6 +2980,39 @@ type AfterGadgetExecutionAction = {
     action: "recover";
     fallbackResult: string;
 };
+/**
+ * Context for dependency skip controller.
+ * Called when a gadget would be skipped due to a failed dependency.
+ */
+interface DependencySkipControllerContext {
+    iteration: number;
+    gadgetName: string;
+    invocationId: string;
+    /** Parameters of the gadget that would be skipped */
+    parameters: Record<string, unknown>;
+    /** The invocation ID of the dependency that failed */
+    failedDependency: string;
+    /** The error message from the failed dependency */
+    failedDependencyError: string;
+    logger: Logger<ILogObj>;
+}
+/**
+ * Action returned by onDependencySkipped controller.
+ */
+type DependencySkipAction =
+/** Skip execution and propagate failure to downstream dependents */
+{
+    action: "skip";
+}
+/** Execute the gadget anyway despite the failed dependency */
+ | {
+    action: "execute_anyway";
+}
+/** Skip execution but provide a fallback result (doesn't propagate failure) */
+ | {
+    action: "use_fallback";
+    fallbackResult: string;
+};
 /**
  * Controllers: Async lifecycle hooks that control execution flow.
  * - Can short-circuit execution
@@ -2550,6 +3045,11 @@ interface Controllers {
      * Can provide a fallback result to recover from errors.
      */
     afterGadgetExecution?: (context: AfterGadgetExecutionControllerContext) => Promise<AfterGadgetExecutionAction>;
+    /**
+     * Called before skipping a gadget due to a failed dependency.
+     * Can override the default skip behavior to execute anyway or provide a fallback.
+     */
+    onDependencySkipped?: (context: DependencySkipControllerContext) => Promise<DependencySkipAction>;
 }
 /**
  * Clean hooks system with three distinct categories:
@@ -2583,8 +3083,8 @@ interface AgentOptions {
     model: string;
     /** System prompt */
     systemPrompt?: string;
-    /** Initial user prompt (optional if using build()) */
-    userPrompt?: string;
+    /** Initial user prompt (optional if using build()). Can be text or multimodal content. */
+    userPrompt?: string | ContentPart[];
     /** Maximum iterations */
     maxIterations?: number;
     /** Temperature */
@@ -2603,10 +3103,10 @@ interface AgentOptions {
     gadgetEndPrefix?: string;
     /** Custom gadget argument prefix for block format parameters */
     gadgetArgPrefix?: string;
-    /** Initial messages */
+    /** Initial messages. User messages support multimodal content. */
     initialMessages?: Array<{
         role: "system" | "user" | "assistant";
-        content: string;
+        content: MessageContent;
     }>;
     /** Text-only handler */
     textOnlyHandler?: TextOnlyHandler;
@@ -2815,9 +3315,10 @@ declare class Agent {
 /**
  * Message for conversation history.
+ * User messages can be text (string) or multimodal (ContentPart[]).
  */
 type HistoryMessage = {
-    user: string;
+    user: string | ContentPart[];
 } | {
     assistant: string;
 } | {
@@ -3336,7 +3837,62 @@ declare class AgentBuilder {
      * }
      * ```
      */
+    /**
+     * Build AgentOptions with the given user prompt.
+     * Centralizes options construction for ask(), askWithImage(), and askWithContent().
+     */
+    private buildAgentOptions;
     ask(userPrompt: string): Agent;
+    /**
+     * Build and create the agent with a multimodal user prompt (text + image).
+     * Returns the Agent instance ready to run.
+     *
+     * @param textPrompt - Text prompt describing what to do with the image
+     * @param imageData - Image data (Buffer, Uint8Array, or base64 string)
+     * @param mimeType - Optional MIME type (auto-detected if not provided)
+     * @returns Configured Agent instance
+     *
+     * @example
+     * ```typescript
+     * const agent = LLMist.createAgent()
+     *   .withModel("gpt-4o")
+     *   .withSystem("You analyze images")
+     *   .askWithImage(
+     *     "What's in this image?",
+     *     await fs.readFile("photo.jpg")
+     *   );
+     *
+     * for await (const event of agent.run()) {
+     *   // handle events
+     * }
+     * ```
+     */
+    askWithImage(textPrompt: string, imageData: Buffer | Uint8Array | string, mimeType?: ImageMimeType): Agent;
+    /**
+     * Build and return an Agent configured with multimodal content.
+     * More flexible than askWithImage - accepts any combination of content parts.
+     *
+     * @param content - Array of content parts (text, images, audio)
+     * @returns A configured Agent ready for execution
+     *
+     * @example
+     * ```typescript
+     * import { text, imageFromBuffer, audioFromBuffer } from "llmist";
+     *
+     * const agent = LLMist.createAgent()
+     *   .withModel("gemini:gemini-2.5-flash")
+     *   .askWithContent([
+     *     text("Describe this image and transcribe the audio:"),
+     *     imageFromBuffer(imageData),
+     *     audioFromBuffer(audioData),
+     *   ]);
+     *
+     * for await (const event of agent.run()) {
+     *   // handle events
+     * }
+     * ```
+     */
+    askWithContent(content: ContentPart[]): Agent;
     /**
      * Build, run, and collect only the text response.
      * Convenient for simple queries where you just want the final answer.
@@ -3416,8 +3972,9 @@ declare class AgentBuilder {
 interface IConversationManager {
     /**
      * Adds a user message to the conversation.
+     * Supports multimodal content (text + images/audio).
      */
-    addUserMessage(content: string): void;
+    addUserMessage(content: MessageContent): void;
     /**
      * Adds an assistant message to the conversation.
      */
@@ -3485,6 +4042,26 @@ interface MockMatcherContext {
  * const matcher: MockMatcher = (ctx) => ctx.provider === 'anthropic';
  */
 type MockMatcher = (context: MockMatcherContext) => boolean | Promise<boolean>;
+/**
+ * Image data in a mock response.
+ */
+interface MockImageData {
+    /** Base64-encoded image data */
+    data: string;
+    /** MIME type of the image */
+    mimeType: ImageMimeType;
+    /** Revised prompt (for image generation responses) */
+    revisedPrompt?: string;
+}
+/**
+ * Audio data in a mock response.
+ */
+interface MockAudioData {
+    /** Base64-encoded audio data */
+    data: string;
+    /** MIME type of the audio */
+    mimeType: AudioMimeType;
+}
 /**
  * A mock response that will be returned when a matcher succeeds.
  */
@@ -3504,6 +4081,16 @@ interface MockResponse {
         /** Optional invocationId, will be auto-generated if not provided */
         invocationId?: string;
     }>;
+    /**
+     * Image data to return in the response (e.g., for image generation mocks).
+     * Each image will be yielded as a separate chunk in the stream.
+     */
+    images?: MockImageData[];
+    /**
+     * Audio data to return in the response (e.g., for speech synthesis mocks).
+     * Will be yielded as a chunk in the stream.
+     */
+    audio?: MockAudioData;
     /**
      * Simulated token usage statistics
      */
@@ -3610,9 +4197,58 @@ declare class MockProviderAdapter implements ProviderAdapter {
     readonly priority = 100;
     private readonly mockManager;
     constructor(options?: MockOptions);
-    supports(descriptor: ModelDescriptor): boolean;
-    stream(options: LLMGenerationOptions, descriptor: ModelDescriptor, spec?: unknown): LLMStream;
+    supports(_descriptor: ModelDescriptor): boolean;
+    stream(options: LLMGenerationOptions, descriptor: ModelDescriptor, _spec?: unknown): LLMStream;
     private createMockStreamFromContext;
+    /**
+     * Check if this adapter supports image generation for a given model.
+     * Returns true if there's a registered mock with images for this model.
+     */
+    supportsImageGeneration(_modelId: string): boolean;
+    /**
+     * Generate mock images based on registered mocks.
+     *
+     * @param options - Image generation options
+     * @returns Mock image generation result
+     */
+    generateImage(options: ImageGenerationOptions): Promise<ImageGenerationResult>;
+    /**
+     * Transform mock response into ImageGenerationResult format.
+     *
+     * @param options - Original image generation options
+     * @param mockResponse - Mock response containing image data
+     * @returns ImageGenerationResult with mock data and zero cost
+     */
+    private createImageResult;
+    /**
+     * Check if this adapter supports speech generation for a given model.
+     * Returns true if there's a registered mock with audio for this model.
+     */
+    supportsSpeechGeneration(_modelId: string): boolean;
+    /**
+     * Generate mock speech based on registered mocks.
+     *
+     * @param options - Speech generation options
+     * @returns Mock speech generation result
+     */
+    generateSpeech(options: SpeechGenerationOptions): Promise<SpeechGenerationResult>;
+    /**
+     * Transform mock response into SpeechGenerationResult format.
+     * Converts base64 audio data to ArrayBuffer.
+     *
+     * @param options - Original speech generation options
+     * @param mockResponse - Mock response containing audio data
+     * @returns SpeechGenerationResult with mock data and zero cost
+     */
+    private createSpeechResult;
+    /**
+     * Map MIME type to audio format for SpeechGenerationResult.
+     * Defaults to "mp3" for unknown MIME types.
+     *
+     * @param mimeType - Audio MIME type string
+     * @returns Audio format identifier
+     */
+    private mimeTypeToAudioFormat;
 }
 /**
  * Create a mock provider adapter instance.
@@ -3743,6 +4379,27 @@ declare class MockBuilder {
      * })
      */
     when(matcher: MockMatcher): this;
+    /**
+     * Match when any message contains an image.
+     *
+     * @example
+     * mockLLM().whenMessageHasImage().returns("I see an image of a sunset.")
+     */
+    whenMessageHasImage(): this;
+    /**
+     * Match when any message contains audio.
+     *
+     * @example
+     * mockLLM().whenMessageHasAudio().returns("I hear music playing.")
+     */
+    whenMessageHasAudio(): this;
+    /**
+     * Match based on the number of images in the last message.
+     *
+     * @example
+     * mockLLM().whenImageCount((n) => n >= 2).returns("Comparing multiple images...")
+     */
+    whenImageCount(predicate: (count: number) => boolean): this;
     /**
      * Set the text response to return.
      * Can be a static string or a function that returns a string dynamically.
@@ -3775,6 +4432,51 @@ declare class MockBuilder {
      *   .returnsGadgetCall('logger', { message: 'Done!' })
      */
     returnsGadgetCall(gadgetName: string, parameters: Record<string, unknown>): this;
+    /**
+     * Return a single image in the response.
+     * Useful for mocking image generation endpoints.
+     *
+     * @param data - Image data (base64 string or Buffer)
+     * @param mimeType - MIME type (auto-detected if Buffer provided without type)
+     *
+     * @example
+     * mockLLM()
+     *   .forModel('dall-e-3')
+     *   .returnsImage(pngBuffer)
+     *   .register();
+     */
+    returnsImage(data: string | Buffer | Uint8Array, mimeType?: ImageMimeType): this;
+    /**
+     * Return multiple images in the response.
+     *
+     * @example
+     * mockLLM()
+     *   .forModel('dall-e-3')
+     *   .returnsImages([
+     *     { data: pngBuffer1 },
+     *     { data: pngBuffer2 },
+     *   ])
+     *   .register();
+     */
+    returnsImages(images: Array<{
+        data: string | Buffer | Uint8Array;
+        mimeType?: ImageMimeType;
+        revisedPrompt?: string;
+    }>): this;
+    /**
+     * Return audio data in the response.
+     * Useful for mocking speech synthesis endpoints.
+     *
+     * @param data - Audio data (base64 string or Buffer)
+     * @param mimeType - MIME type (auto-detected if Buffer provided without type)
+     *
+     * @example
+     * mockLLM()
+     *   .forModel('tts-1')
+     *   .returnsAudio(mp3Buffer)
+     *   .register();
+     */
+    returnsAudio(data: string | Buffer | Uint8Array, mimeType?: AudioMimeType): this;
     /**
      * Set the complete mock response object.
      * This allows full control over all response properties.
@@ -4016,4 +4718,4 @@ declare function createTextMockStream(text: string, options?: {
     usage?: MockResponse["usage"];
 }): LLMStream;
-export { type EventHandlers as $, type AgentHooks as A, BaseGadget as B, type CompactionStrategy as C, type ProviderAdapter as D, type ExecutionContext as E, type ModelDescriptor as F, GadgetRegistry as G, type HintTemplate as H, type IConversationManager as I, type ModelSpec as J, type LLMGenerationOptions as K, type LLMStream as L, MockProviderAdapter as M, type ImageModelSpec as N, type ImageGenerationOptions as O, type ParsedGadgetCall as P, type ImageGenerationResult as Q, type ResolvedCompactionConfig as R, type StreamEvent as S, type TokenUsage as T, type SpeechModelSpec as U, type SpeechGenerationOptions as V, type SpeechGenerationResult as W, type HistoryMessage as X, type TrailingMessage as Y, type TrailingMessageContext as Z, AgentBuilder as _, type LLMStreamChunk as a, collectEvents as a0, collectText as a1, runWithHandlers as a2, type AfterGadgetExecutionAction as a3, type AfterGadgetExecutionControllerContext as a4, type AfterLLMCallAction as a5, type AfterLLMCallControllerContext as a6, type AfterLLMErrorAction as a7, type AgentOptions as a8, type BeforeGadgetExecutionAction as a9, type ModelLimits as aA, type ModelPricing as aB, type ProviderIdentifier as aC, ModelIdentifierParser as aD, type HintContext as aE, type PromptConfig as aF, type PromptContext as aG, type PromptTemplate as aH, DEFAULT_HINTS as aI, DEFAULT_PROMPTS as aJ, resolveHintTemplate as aK, resolvePromptTemplate as aL, resolveRulesTemplate as aM, type QuickOptions as aN, complete as aO, stream as aP, type GadgetClass as aQ, type GadgetOrClass as aR, type CostReportingLLMist as aS, type GadgetExecuteResult as aT, type TextOnlyAction as aU, type TextOnlyContext as aV, type TextOnlyCustomHandler as aW, type TextOnlyGadgetConfig as aX, type TextOnlyHandler as aY, type TextOnlyStrategy as aZ, type BeforeLLMCallAction as aa, type ChunkInterceptorContext as ab, type Controllers as ac, type GadgetExecutionControllerContext as ad, type GadgetParameterInterceptorContext as ae, type GadgetResultInterceptorContext as af, type Interceptors as ag, type LLMCallControllerContext as ah, type LLMErrorControllerContext as ai, type MessageInterceptorContext as aj, type ObserveChunkContext as ak, type ObserveGadgetCompleteContext as al, type ObserveGadgetStartContext as am, type ObserveLLMCallContext as an, type ObserveLLMCompleteContext as ao, type ObserveLLMErrorContext as ap, type Observers as aq, type MessageTurn as ar, type ObserveCompactionContext as as, DEFAULT_COMPACTION_CONFIG as at, DEFAULT_SUMMARIZATION_PROMPT as au, type LLMistOptions as av, type LLMRole as aw, LLMMessageBuilder as ax, type CostEstimate as ay, type ModelFeatures as az, type LLMMessage as b, createMockAdapter as c, MockBuilder as d, createMockClient as e, MockManager as f, getMockManager as g, createMockStream as h, createTextMockStream as i, type MockMatcher as j, type MockMatcherContext as k, type MockOptions as l, mockLLM as m, type MockRegistration as n, type MockResponse as o, type MockStats as p, ModelRegistry as q, LLMist as r, type CompactionContext as s, type CompactionResult as t, type CompactionConfig as u, type CompactionEvent as v, type CompactionStats as w, type GadgetExecuteReturn as x, type GadgetExample as y, type GadgetExecutionResult as z };
+export { type TrailingMessage as $, type AgentHooks as A, BaseGadget as B, type CompactionStrategy as C, type GadgetExecuteReturn as D, type ExecutionContext as E, type GadgetExample as F, GadgetRegistry as G, type HintTemplate as H, type IConversationManager as I, type GadgetExecutionResult as J, type ProviderAdapter as K, type LLMStream as L, MockProviderAdapter as M, type ModelDescriptor as N, type ModelSpec as O, type ParsedGadgetCall as P, type LLMGenerationOptions as Q, type ResolvedCompactionConfig as R, type StreamEvent as S, type TokenUsage as T, type ImageModelSpec as U, type ImageGenerationOptions as V, type ImageGenerationResult as W, type SpeechModelSpec as X, type SpeechGenerationOptions as Y, type SpeechGenerationResult as Z, type HistoryMessage as _, type LLMStreamChunk as a, type VisionAnalyzeOptions as a$, type TrailingMessageContext as a0, AgentBuilder as a1, type EventHandlers as a2, collectEvents as a3, collectText as a4, runWithHandlers as a5, type AfterGadgetExecutionAction as a6, type AfterGadgetExecutionControllerContext as a7, type AfterLLMCallAction as a8, type AfterLLMCallControllerContext as a9, type AudioMimeType as aA, type AudioSource as aB, type ContentPart as aC, type ImageBase64Source as aD, type ImageContentPart as aE, type ImageMimeType as aF, type ImageSource as aG, type ImageUrlSource as aH, type TextContentPart as aI, audioFromBase64 as aJ, audioFromBuffer as aK, detectAudioMimeType as aL, detectImageMimeType as aM, imageFromBase64 as aN, imageFromBuffer as aO, imageFromUrl as aP, isAudioPart as aQ, isDataUrl as aR, isImagePart as aS, isTextPart as aT, parseDataUrl as aU, text as aV, toBase64 as aW, type LLMRole as aX, extractText as aY, LLMMessageBuilder as aZ, normalizeContent as a_, type AfterLLMErrorAction as aa, type AgentOptions as ab, type BeforeGadgetExecutionAction as ac, type BeforeLLMCallAction as ad, type ChunkInterceptorContext as ae, type Controllers as af, type GadgetExecutionControllerContext as ag, type GadgetParameterInterceptorContext as ah, type GadgetResultInterceptorContext as ai, type Interceptors as aj, type LLMCallControllerContext as ak, type LLMErrorControllerContext as al, type MessageInterceptorContext as am, type ObserveChunkContext as an, type ObserveGadgetCompleteContext as ao, type ObserveGadgetStartContext as ap, type ObserveLLMCallContext as aq, type ObserveLLMCompleteContext as ar, type ObserveLLMErrorContext as as, type Observers as at, type MessageTurn as au, type ObserveCompactionContext as av, DEFAULT_COMPACTION_CONFIG as aw, DEFAULT_SUMMARIZATION_PROMPT as ax, type LLMistOptions as ay, type AudioContentPart as az, type LLMMessage as b, type VisionAnalyzeResult as b0, type CostEstimate as b1, type ModelFeatures as b2, type ModelLimits as b3, type ModelPricing as b4, type ProviderIdentifier as b5, ModelIdentifierParser as b6, type HintContext as b7, type PromptConfig as b8, type PromptContext as b9, type PromptTemplate as ba, DEFAULT_HINTS as bb, DEFAULT_PROMPTS as bc, resolveHintTemplate as bd, resolvePromptTemplate as be, resolveRulesTemplate as bf, type QuickOptions as bg, complete as bh, stream as bi, type GadgetClass as bj, type GadgetOrClass as bk, type CostReportingLLMist as bl, type GadgetExecuteResult as bm, type GadgetSkippedEvent as bn, type TextOnlyAction as bo, type TextOnlyContext as bp, type TextOnlyCustomHandler as bq, type TextOnlyGadgetConfig as br, type TextOnlyHandler as bs, type TextOnlyStrategy as bt, createMockAdapter as c, MockBuilder as d, createMockClient as e, MockManager as f, getMockManager as g, createMockStream as h, createTextMockStream as i, type MockAudioData as j, type MockImageData as k, type MockMatcher as l, mockLLM as m, type MockMatcherContext as n, type MockOptions as o, type MockRegistration as p, type MockResponse as q, type MockStats as r, ModelRegistry as s, type MessageContent as t, LLMist as u, type CompactionContext as v, type CompactionResult as w, type CompactionConfig as x, type CompactionEvent as y, type CompactionStats as z };