npm - llmist - Versions diffs - 2.2.0 → 2.4.0 - Mend

llmist 2.2.0 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/README.md +54 -9
package/dist/{chunk-GANXNBIZ.js → chunk-6ZDUWO6N.js} +1029 -22
package/dist/chunk-6ZDUWO6N.js.map +1 -0
package/dist/{chunk-ZDNV7DDO.js → chunk-QFRVTS5F.js} +2 -2
package/dist/cli.cjs +1507 -48
package/dist/cli.cjs.map +1 -1
package/dist/cli.js +483 -31
package/dist/cli.js.map +1 -1
package/dist/index.cjs +1025 -18
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +18 -2
package/dist/index.d.ts +18 -2
package/dist/index.js +2 -2
package/dist/{mock-stream-wRfUqXx4.d.cts → mock-stream-BQcC2VCP.d.cts} +408 -1
package/dist/{mock-stream-wRfUqXx4.d.ts → mock-stream-BQcC2VCP.d.ts} +408 -1
package/dist/testing/index.cjs +1025 -18
package/dist/testing/index.cjs.map +1 -1
package/dist/testing/index.d.cts +2 -2
package/dist/testing/index.d.ts +2 -2
package/dist/testing/index.js +1 -1
package/package.json +11 -4
package/dist/chunk-GANXNBIZ.js.map +0 -1
/package/dist/{chunk-ZDNV7DDO.js.map → chunk-QFRVTS5F.js.map} +0 -0

package/dist/cli.cjs CHANGED Viewed

@@ -2498,7 +2498,27 @@ var init_cost_reporting_client = __esm({
       constructor(client, reportCost) {
         this.client = client;
         this.reportCost = reportCost;
+        this.image = {
+          generate: async (options) => {
+            const result = await this.client.image.generate(options);
+            if (result.cost !== void 0 && result.cost > 0) {
+              this.reportCost(result.cost);
+            }
+            return result;
+          }
+        };
+        this.speech = {
+          generate: async (options) => {
+            const result = await this.client.speech.generate(options);
+            if (result.cost !== void 0 && result.cost > 0) {
+              this.reportCost(result.cost);
+            }
+            return result;
+          }
+        };
       }
+      image;
+      speech;
       /**
        * Access to model registry for cost estimation.
        */
@@ -4591,6 +4611,28 @@ var init_anthropic = __esm({
       getModelSpecs() {
         return ANTHROPIC_MODELS;
       }
+      // =========================================================================
+      // Image Generation (Not Supported)
+      // =========================================================================
+      supportsImageGeneration(_modelId) {
+        return false;
+      }
+      async generateImage() {
+        throw new Error(
+          "Anthropic does not support image generation. Use OpenAI (DALL-E, GPT Image) or Google Gemini (Imagen) instead."
+        );
+      }
+      // =========================================================================
+      // Speech Generation (Not Supported)
+      // =========================================================================
+      supportsSpeechGeneration(_modelId) {
+        return false;
+      }
+      async generateSpeech() {
+        throw new Error(
+          "Anthropic does not support speech generation. Use OpenAI (TTS) or Google Gemini (TTS) instead."
+        );
+      }
       buildRequestPayload(options, descriptor, spec, messages) {
         const systemMessages = messages.filter((message) => message.role === "system");
         const system = systemMessages.length > 0 ? systemMessages.map((m, index) => ({
@@ -4745,6 +4787,182 @@ var init_anthropic = __esm({
   }
 });
+// src/providers/gemini-image-models.ts
+function getGeminiImageModelSpec(modelId) {
+  return geminiImageModels.find((m) => m.modelId === modelId);
+}
+function isGeminiImageModel(modelId) {
+  return geminiImageModels.some((m) => m.modelId === modelId);
+}
+function calculateGeminiImageCost(modelId, size = "1:1", n = 1) {
+  const spec = getGeminiImageModelSpec(modelId);
+  if (!spec) return void 0;
+  if (spec.pricing.perImage !== void 0) {
+    return spec.pricing.perImage * n;
+  }
+  if (spec.pricing.bySize) {
+    const sizePrice = spec.pricing.bySize[size];
+    if (typeof sizePrice === "number") {
+      return sizePrice * n;
+    }
+  }
+  return void 0;
+}
+var IMAGEN4_ASPECT_RATIOS, GEMINI_IMAGE_ASPECT_RATIOS, geminiImageModels;
+var init_gemini_image_models = __esm({
+  "src/providers/gemini-image-models.ts"() {
+    "use strict";
+    IMAGEN4_ASPECT_RATIOS = ["1:1", "3:4", "4:3", "9:16", "16:9"];
+    GEMINI_IMAGE_ASPECT_RATIOS = ["1:1", "3:4", "4:3", "9:16", "16:9"];
+    geminiImageModels = [
+      // Imagen 4 Family (standalone image generation)
+      {
+        provider: "gemini",
+        modelId: "imagen-4.0-fast-generate-001",
+        displayName: "Imagen 4 Fast",
+        pricing: {
+          perImage: 0.02
+        },
+        supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
+        maxImages: 4,
+        defaultSize: "1:1",
+        features: {
+          textRendering: true
+        }
+      },
+      {
+        provider: "gemini",
+        modelId: "imagen-4.0-generate-001",
+        displayName: "Imagen 4",
+        pricing: {
+          perImage: 0.04
+        },
+        supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
+        maxImages: 4,
+        defaultSize: "1:1",
+        features: {
+          textRendering: true
+        }
+      },
+      {
+        provider: "gemini",
+        modelId: "imagen-4.0-ultra-generate-001",
+        displayName: "Imagen 4 Ultra",
+        pricing: {
+          perImage: 0.06
+        },
+        supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
+        maxImages: 4,
+        defaultSize: "1:1",
+        features: {
+          textRendering: true
+        }
+      },
+      // Preview versions
+      {
+        provider: "gemini",
+        modelId: "imagen-4.0-generate-preview-06-06",
+        displayName: "Imagen 4 (Preview)",
+        pricing: {
+          perImage: 0.04
+        },
+        supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
+        maxImages: 4,
+        defaultSize: "1:1",
+        features: {
+          textRendering: true
+        }
+      },
+      {
+        provider: "gemini",
+        modelId: "imagen-4.0-ultra-generate-preview-06-06",
+        displayName: "Imagen 4 Ultra (Preview)",
+        pricing: {
+          perImage: 0.06
+        },
+        supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
+        maxImages: 4,
+        defaultSize: "1:1",
+        features: {
+          textRendering: true
+        }
+      },
+      // Gemini Native Image Generation (multimodal models)
+      {
+        provider: "gemini",
+        modelId: "gemini-2.5-flash-image",
+        displayName: "Gemini 2.5 Flash Image",
+        pricing: {
+          perImage: 0.039
+        },
+        supportedSizes: [...GEMINI_IMAGE_ASPECT_RATIOS],
+        maxImages: 1,
+        defaultSize: "1:1",
+        features: {
+          conversational: true,
+          textRendering: true
+        }
+      },
+      {
+        provider: "gemini",
+        modelId: "gemini-2.5-flash-image-preview",
+        displayName: "Gemini 2.5 Flash Image (Preview)",
+        pricing: {
+          perImage: 0.039
+        },
+        supportedSizes: [...GEMINI_IMAGE_ASPECT_RATIOS],
+        maxImages: 1,
+        defaultSize: "1:1",
+        features: {
+          conversational: true,
+          textRendering: true
+        }
+      },
+      {
+        provider: "gemini",
+        modelId: "gemini-3-pro-image-preview",
+        displayName: "Gemini 3 Pro Image (Preview)",
+        pricing: {
+          // Token-based: ~$0.134 per 1K/2K image, $0.24 per 4K
+          // Using 2K as default
+          bySize: {
+            "1K": 0.134,
+            "2K": 0.134,
+            "4K": 0.24
+          }
+        },
+        supportedSizes: ["1K", "2K", "4K"],
+        maxImages: 1,
+        defaultSize: "2K",
+        features: {
+          conversational: true,
+          textRendering: true
+        }
+      },
+      // Alias: nano-banana-pro-preview is gemini-3-pro-image-preview
+      {
+        provider: "gemini",
+        modelId: "nano-banana-pro-preview",
+        displayName: "Nano Banana Pro (Gemini 3 Pro Image)",
+        pricing: {
+          bySize: {
+            "1K": 0.134,
+            "2K": 0.134,
+            "4K": 0.24
+          }
+        },
+        supportedSizes: ["1K", "2K", "4K"],
+        maxImages: 1,
+        defaultSize: "2K",
+        features: {
+          conversational: true,
+          textRendering: true
+        }
+      }
+    ];
+  }
+});
 // src/providers/gemini-models.ts
 var GEMINI_MODELS;
 var init_gemini_models = __esm({
@@ -4918,7 +5136,171 @@ var init_gemini_models = __esm({
   }
 });
+// src/providers/gemini-speech-models.ts
+function getGeminiSpeechModelSpec(modelId) {
+  return geminiSpeechModels.find((m) => m.modelId === modelId);
+}
+function isGeminiSpeechModel(modelId) {
+  return geminiSpeechModels.some((m) => m.modelId === modelId);
+}
+function calculateGeminiSpeechCost(modelId, characterCount, estimatedMinutes) {
+  const spec = getGeminiSpeechModelSpec(modelId);
+  if (!spec) return void 0;
+  if (spec.pricing.perMinute !== void 0) {
+    if (estimatedMinutes !== void 0) {
+      return estimatedMinutes * spec.pricing.perMinute;
+    }
+    const approxMinutes = characterCount / 750;
+    return approxMinutes * spec.pricing.perMinute;
+  }
+  return void 0;
+}
+var GEMINI_TTS_VOICES, GEMINI_TTS_FORMATS, geminiSpeechModels;
+var init_gemini_speech_models = __esm({
+  "src/providers/gemini-speech-models.ts"() {
+    "use strict";
+    GEMINI_TTS_VOICES = [
+      "Zephyr",
+      // Bright
+      "Puck",
+      // Upbeat
+      "Charon",
+      // Informative
+      "Kore",
+      // Firm
+      "Fenrir",
+      // Excitable
+      "Leda",
+      // Youthful
+      "Orus",
+      // Firm
+      "Aoede",
+      // Breezy
+      "Callirrhoe",
+      // Easy-going
+      "Autonoe",
+      // Bright
+      "Enceladus",
+      // Breathy
+      "Iapetus",
+      // Clear
+      "Umbriel",
+      // Easy-going
+      "Algieba",
+      // Smooth
+      "Despina",
+      // Smooth
+      "Erinome",
+      // Clear
+      "Algenib",
+      // Gravelly
+      "Rasalgethi",
+      // Informative
+      "Laomedeia",
+      // Upbeat
+      "Achernar",
+      // Soft
+      "Alnilam",
+      // Firm
+      "Schedar",
+      // Even
+      "Gacrux",
+      // Mature
+      "Pulcherrima",
+      // Forward
+      "Achird",
+      // Friendly
+      "Zubenelgenubi",
+      // Casual
+      "Vindemiatrix",
+      // Gentle
+      "Sadachbia",
+      // Lively
+      "Sadaltager",
+      // Knowledgeable
+      "Sulafat"
+      // Warm
+    ];
+    GEMINI_TTS_FORMATS = ["pcm", "wav"];
+    geminiSpeechModels = [
+      {
+        provider: "gemini",
+        modelId: "gemini-2.5-flash-preview-tts",
+        displayName: "Gemini 2.5 Flash TTS (Preview)",
+        pricing: {
+          // $0.50 per 1M input tokens = $0.0000005 per token
+          perInputToken: 5e-7,
+          // $10.00 per 1M audio output tokens = $0.00001 per token
+          perAudioOutputToken: 1e-5,
+          // Rough estimate: ~$0.01 per minute of audio
+          perMinute: 0.01
+        },
+        voices: [...GEMINI_TTS_VOICES],
+        formats: GEMINI_TTS_FORMATS,
+        maxInputLength: 8e3,
+        // bytes (text + prompt combined)
+        defaultVoice: "Zephyr",
+        defaultFormat: "wav",
+        features: {
+          multiSpeaker: true,
+          languages: 24,
+          voiceInstructions: true
+        }
+      },
+      {
+        provider: "gemini",
+        modelId: "gemini-2.5-pro-preview-tts",
+        displayName: "Gemini 2.5 Pro TTS (Preview)",
+        pricing: {
+          // $1.00 per 1M input tokens = $0.000001 per token
+          perInputToken: 1e-6,
+          // $20.00 per 1M audio output tokens = $0.00002 per token
+          perAudioOutputToken: 2e-5,
+          // Rough estimate: ~$0.02 per minute of audio
+          perMinute: 0.02
+        },
+        voices: [...GEMINI_TTS_VOICES],
+        formats: GEMINI_TTS_FORMATS,
+        maxInputLength: 8e3,
+        // bytes
+        defaultVoice: "Zephyr",
+        defaultFormat: "wav",
+        features: {
+          multiSpeaker: true,
+          languages: 24,
+          voiceInstructions: true
+        }
+      }
+    ];
+  }
+});
 // src/providers/gemini.ts
+function wrapPcmInWav(pcmData, sampleRate, bitsPerSample, numChannels) {
+  const byteRate = sampleRate * numChannels * bitsPerSample / 8;
+  const blockAlign = numChannels * bitsPerSample / 8;
+  const dataSize = pcmData.length;
+  const headerSize = 44;
+  const fileSize = headerSize + dataSize - 8;
+  const buffer = new ArrayBuffer(headerSize + dataSize);
+  const view = new DataView(buffer);
+  const uint8 = new Uint8Array(buffer);
+  view.setUint32(0, 1380533830, false);
+  view.setUint32(4, fileSize, true);
+  view.setUint32(8, 1463899717, false);
+  view.setUint32(12, 1718449184, false);
+  view.setUint32(16, 16, true);
+  view.setUint16(20, 1, true);
+  view.setUint16(22, numChannels, true);
+  view.setUint32(24, sampleRate, true);
+  view.setUint32(28, byteRate, true);
+  view.setUint16(32, blockAlign, true);
+  view.setUint16(34, bitsPerSample, true);
+  view.setUint32(36, 1684108385, false);
+  view.setUint32(40, dataSize, true);
+  uint8.set(pcmData, headerSize);
+  return buffer;
+}
 function createGeminiProviderFromEnv() {
   return createProviderFromEnv("GEMINI_API_KEY", import_genai.GoogleGenAI, GeminiGenerativeProvider);
 }
@@ -4929,7 +5311,9 @@ var init_gemini = __esm({
     import_genai = require("@google/genai");
     init_base_provider();
     init_constants2();
+    init_gemini_image_models();
     init_gemini_models();
+    init_gemini_speech_models();
     init_utils();
     GEMINI_ROLE_MAP = {
       system: "user",
@@ -4944,6 +5328,139 @@ var init_gemini = __esm({
       getModelSpecs() {
         return GEMINI_MODELS;
       }
+      // =========================================================================
+      // Image Generation
+      // =========================================================================
+      getImageModelSpecs() {
+        return geminiImageModels;
+      }
+      supportsImageGeneration(modelId) {
+        return isGeminiImageModel(modelId);
+      }
+      async generateImage(options) {
+        const client = this.client;
+        const spec = getGeminiImageModelSpec(options.model);
+        const isImagenModel = options.model.startsWith("imagen");
+        const aspectRatio = options.size ?? spec?.defaultSize ?? "1:1";
+        const n = options.n ?? 1;
+        if (isImagenModel) {
+          const response2 = await client.models.generateImages({
+            model: options.model,
+            prompt: options.prompt,
+            config: {
+              numberOfImages: n,
+              aspectRatio,
+              outputMimeType: options.responseFormat === "b64_json" ? "image/png" : "image/jpeg"
+            }
+          });
+          const images2 = response2.generatedImages ?? [];
+          const cost2 = calculateGeminiImageCost(options.model, aspectRatio, images2.length);
+          return {
+            // Gemini's imageBytes is already base64 encoded, so use it directly
+            images: images2.map((img) => ({
+              b64Json: img.image?.imageBytes ?? void 0
+            })),
+            model: options.model,
+            usage: {
+              imagesGenerated: images2.length,
+              size: aspectRatio,
+              quality: "standard"
+            },
+            cost: cost2
+          };
+        }
+        const response = await client.models.generateContent({
+          model: options.model,
+          contents: [{ role: "user", parts: [{ text: options.prompt }] }],
+          config: {
+            responseModalities: [import_genai.Modality.IMAGE, import_genai.Modality.TEXT]
+          }
+        });
+        const images = [];
+        const candidate = response.candidates?.[0];
+        if (candidate?.content?.parts) {
+          for (const part of candidate.content.parts) {
+            if ("inlineData" in part && part.inlineData) {
+              images.push({
+                b64Json: part.inlineData.data
+              });
+            }
+          }
+        }
+        const cost = calculateGeminiImageCost(options.model, aspectRatio, images.length);
+        return {
+          images,
+          model: options.model,
+          usage: {
+            imagesGenerated: images.length,
+            size: aspectRatio,
+            quality: "standard"
+          },
+          cost
+        };
+      }
+      // =========================================================================
+      // Speech Generation
+      // =========================================================================
+      getSpeechModelSpecs() {
+        return geminiSpeechModels;
+      }
+      supportsSpeechGeneration(modelId) {
+        return isGeminiSpeechModel(modelId);
+      }
+      async generateSpeech(options) {
+        const client = this.client;
+        const spec = getGeminiSpeechModelSpec(options.model);
+        const voice = options.voice ?? spec?.defaultVoice ?? "Zephyr";
+        const response = await client.models.generateContent({
+          model: options.model,
+          contents: [
+            {
+              role: "user",
+              parts: [{ text: options.input }]
+            }
+          ],
+          config: {
+            responseModalities: [import_genai.Modality.AUDIO],
+            speechConfig: {
+              voiceConfig: {
+                prebuiltVoiceConfig: {
+                  voiceName: voice
+                }
+              }
+            }
+          }
+        });
+        let pcmData;
+        const candidate = response.candidates?.[0];
+        if (candidate?.content?.parts) {
+          for (const part of candidate.content.parts) {
+            if ("inlineData" in part && part.inlineData?.data) {
+              const base64 = part.inlineData.data;
+              const binary = atob(base64);
+              pcmData = new Uint8Array(binary.length);
+              for (let i = 0; i < binary.length; i++) {
+                pcmData[i] = binary.charCodeAt(i);
+              }
+              break;
+            }
+          }
+        }
+        if (!pcmData) {
+          throw new Error("No audio data in Gemini TTS response");
+        }
+        const audioData = wrapPcmInWav(pcmData, 24e3, 16, 1);
+        const cost = calculateGeminiSpeechCost(options.model, options.input.length);
+        return {
+          audio: audioData,
+          model: options.model,
+          usage: {
+            characterCount: options.input.length
+          },
+          cost,
+          format: spec?.defaultFormat ?? "wav"
+        };
+      }
       buildRequestPayload(options, descriptor, _spec, messages) {
         const contents = this.convertMessagesToContents(messages);
         const generationConfig = this.buildGenerationConfig(options);
@@ -5139,6 +5656,121 @@ var init_gemini = __esm({
   }
 });
+// src/providers/openai-image-models.ts
+function getOpenAIImageModelSpec(modelId) {
+  return openaiImageModels.find((m) => m.modelId === modelId);
+}
+function isOpenAIImageModel(modelId) {
+  return openaiImageModels.some((m) => m.modelId === modelId);
+}
+function calculateOpenAIImageCost(modelId, size, quality = "standard", n = 1) {
+  const spec = getOpenAIImageModelSpec(modelId);
+  if (!spec) return void 0;
+  const sizePrice = spec.pricing.bySize?.[size];
+  if (sizePrice === void 0) return void 0;
+  let pricePerImage;
+  if (typeof sizePrice === "number") {
+    pricePerImage = sizePrice;
+  } else {
+    pricePerImage = sizePrice[quality];
+    if (pricePerImage === void 0) return void 0;
+  }
+  return pricePerImage * n;
+}
+var GPT_IMAGE_SIZES, GPT_IMAGE_QUALITIES, DALLE3_SIZES, DALLE3_QUALITIES, DALLE2_SIZES, openaiImageModels;
+var init_openai_image_models = __esm({
+  "src/providers/openai-image-models.ts"() {
+    "use strict";
+    GPT_IMAGE_SIZES = ["1024x1024", "1024x1536", "1536x1024"];
+    GPT_IMAGE_QUALITIES = ["low", "medium", "high"];
+    DALLE3_SIZES = ["1024x1024", "1024x1792", "1792x1024"];
+    DALLE3_QUALITIES = ["standard", "hd"];
+    DALLE2_SIZES = ["256x256", "512x512", "1024x1024"];
+    openaiImageModels = [
+      // GPT Image 1 Family (flagship)
+      {
+        provider: "openai",
+        modelId: "gpt-image-1",
+        displayName: "GPT Image 1",
+        pricing: {
+          bySize: {
+            "1024x1024": { low: 0.011, medium: 0.04, high: 0.17 },
+            "1024x1536": { low: 0.016, medium: 0.06, high: 0.25 },
+            "1536x1024": { low: 0.016, medium: 0.06, high: 0.25 }
+          }
+        },
+        supportedSizes: [...GPT_IMAGE_SIZES],
+        supportedQualities: [...GPT_IMAGE_QUALITIES],
+        maxImages: 1,
+        defaultSize: "1024x1024",
+        defaultQuality: "medium",
+        features: {
+          textRendering: true,
+          transparency: true
+        }
+      },
+      {
+        provider: "openai",
+        modelId: "gpt-image-1-mini",
+        displayName: "GPT Image 1 Mini",
+        pricing: {
+          bySize: {
+            "1024x1024": { low: 5e-3, medium: 0.02, high: 0.052 },
+            "1024x1536": { low: 75e-4, medium: 0.03, high: 0.078 },
+            "1536x1024": { low: 75e-4, medium: 0.03, high: 0.078 }
+          }
+        },
+        supportedSizes: [...GPT_IMAGE_SIZES],
+        supportedQualities: [...GPT_IMAGE_QUALITIES],
+        maxImages: 1,
+        defaultSize: "1024x1024",
+        defaultQuality: "medium",
+        features: {
+          textRendering: true,
+          transparency: true
+        }
+      },
+      // DALL-E Family
+      {
+        provider: "openai",
+        modelId: "dall-e-3",
+        displayName: "DALL-E 3",
+        pricing: {
+          bySize: {
+            "1024x1024": { standard: 0.04, hd: 0.08 },
+            "1024x1792": { standard: 0.08, hd: 0.12 },
+            "1792x1024": { standard: 0.08, hd: 0.12 }
+          }
+        },
+        supportedSizes: [...DALLE3_SIZES],
+        supportedQualities: [...DALLE3_QUALITIES],
+        maxImages: 1,
+        // DALL-E 3 only supports n=1
+        defaultSize: "1024x1024",
+        defaultQuality: "standard",
+        features: {
+          textRendering: true
+        }
+      },
+      {
+        provider: "openai",
+        modelId: "dall-e-2",
+        displayName: "DALL-E 2 (Legacy)",
+        pricing: {
+          bySize: {
+            "256x256": 0.016,
+            "512x512": 0.018,
+            "1024x1024": 0.02
+          }
+        },
+        supportedSizes: [...DALLE2_SIZES],
+        maxImages: 10,
+        defaultSize: "1024x1024"
+      }
+    ];
+  }
+});
 // src/providers/openai-models.ts
 var OPENAI_MODELS;
 var init_openai_models = __esm({
@@ -5503,6 +6135,144 @@ var init_openai_models = __esm({
   }
 });
+// src/providers/openai-speech-models.ts
+function getOpenAISpeechModelSpec(modelId) {
+  return openaiSpeechModels.find((m) => m.modelId === modelId);
+}
+function isOpenAISpeechModel(modelId) {
+  return openaiSpeechModels.some((m) => m.modelId === modelId);
+}
+function calculateOpenAISpeechCost(modelId, characterCount, estimatedMinutes) {
+  const spec = getOpenAISpeechModelSpec(modelId);
+  if (!spec) return void 0;
+  if (spec.pricing.perCharacter !== void 0) {
+    return characterCount * spec.pricing.perCharacter;
+  }
+  if (spec.pricing.perMinute !== void 0 && estimatedMinutes !== void 0) {
+    return estimatedMinutes * spec.pricing.perMinute;
+  }
+  if (spec.pricing.perMinute !== void 0) {
+    const approxMinutes = characterCount / 750;
+    return approxMinutes * spec.pricing.perMinute;
+  }
+  return void 0;
+}
+var OPENAI_TTS_VOICES, OPENAI_TTS_EXTENDED_VOICES, OPENAI_TTS_FORMATS, openaiSpeechModels;
+var init_openai_speech_models = __esm({
+  "src/providers/openai-speech-models.ts"() {
+    "use strict";
+    OPENAI_TTS_VOICES = [
+      "alloy",
+      "echo",
+      "fable",
+      "onyx",
+      "nova",
+      "shimmer"
+    ];
+    OPENAI_TTS_EXTENDED_VOICES = [
+      ...OPENAI_TTS_VOICES,
+      "ash",
+      "ballad",
+      "coral",
+      "sage",
+      "verse"
+    ];
+    OPENAI_TTS_FORMATS = ["mp3", "opus", "aac", "flac", "wav", "pcm"];
+    openaiSpeechModels = [
+      // Standard TTS models (character-based pricing)
+      {
+        provider: "openai",
+        modelId: "tts-1",
+        displayName: "TTS-1",
+        pricing: {
+          // $15 per 1M characters = $0.000015 per character
+          perCharacter: 15e-6
+        },
+        voices: [...OPENAI_TTS_VOICES],
+        formats: OPENAI_TTS_FORMATS,
+        maxInputLength: 4096,
+        defaultVoice: "alloy",
+        defaultFormat: "mp3",
+        features: {
+          voiceInstructions: false
+        }
+      },
+      {
+        provider: "openai",
+        modelId: "tts-1-1106",
+        displayName: "TTS-1 (Nov 2023)",
+        pricing: {
+          perCharacter: 15e-6
+        },
+        voices: [...OPENAI_TTS_VOICES],
+        formats: OPENAI_TTS_FORMATS,
+        maxInputLength: 4096,
+        defaultVoice: "alloy",
+        defaultFormat: "mp3",
+        features: {
+          voiceInstructions: false
+        }
+      },
+      {
+        provider: "openai",
+        modelId: "tts-1-hd",
+        displayName: "TTS-1 HD",
+        pricing: {
+          // $30 per 1M characters = $0.00003 per character
+          perCharacter: 3e-5
+        },
+        voices: [...OPENAI_TTS_VOICES],
+        formats: OPENAI_TTS_FORMATS,
+        maxInputLength: 4096,
+        defaultVoice: "alloy",
+        defaultFormat: "mp3",
+        features: {
+          voiceInstructions: false
+        }
+      },
+      {
+        provider: "openai",
+        modelId: "tts-1-hd-1106",
+        displayName: "TTS-1 HD (Nov 2023)",
+        pricing: {
+          perCharacter: 3e-5
+        },
+        voices: [...OPENAI_TTS_VOICES],
+        formats: OPENAI_TTS_FORMATS,
+        maxInputLength: 4096,
+        defaultVoice: "alloy",
+        defaultFormat: "mp3",
+        features: {
+          voiceInstructions: false
+        }
+      },
+      // Token-based TTS model with voice instructions support
+      {
+        provider: "openai",
+        modelId: "gpt-4o-mini-tts",
+        displayName: "GPT-4o Mini TTS",
+        pricing: {
+          // $0.60 per 1M input tokens = $0.0000006 per token
+          perInputToken: 6e-7,
+          // $12 per 1M audio output tokens = $0.000012 per token
+          perAudioOutputToken: 12e-6,
+          // ~$0.015 per minute of audio
+          perMinute: 0.015
+        },
+        voices: [...OPENAI_TTS_EXTENDED_VOICES],
+        formats: OPENAI_TTS_FORMATS,
+        maxInputLength: 2e3,
+        // tokens, not characters
+        defaultVoice: "alloy",
+        defaultFormat: "mp3",
+        features: {
+          voiceInstructions: true
+        }
+      }
+    ];
+  }
+});
 // src/providers/openai.ts
 function sanitizeExtra(extra, allowTemperature) {
   if (!extra) {
@@ -5524,7 +6294,9 @@ var init_openai = __esm({
     import_tiktoken = require("tiktoken");
     init_base_provider();
     init_constants2();
+    init_openai_image_models();
     init_openai_models();
+    init_openai_speech_models();
     init_utils();
     ROLE_MAP = {
       system: "system",
@@ -5539,6 +6311,87 @@ var init_openai = __esm({
       getModelSpecs() {
         return OPENAI_MODELS;
       }
+      // =========================================================================
+      // Image Generation
+      // =========================================================================
+      getImageModelSpecs() {
+        return openaiImageModels;
+      }
+      supportsImageGeneration(modelId) {
+        return isOpenAIImageModel(modelId);
+      }
+      async generateImage(options) {
+        const client = this.client;
+        const spec = getOpenAIImageModelSpec(options.model);
+        const size = options.size ?? spec?.defaultSize ?? "1024x1024";
+        const quality = options.quality ?? spec?.defaultQuality ?? "standard";
+        const n = options.n ?? 1;
+        const isDallE2 = options.model === "dall-e-2";
+        const isGptImage = options.model.startsWith("gpt-image");
+        const requestParams = {
+          model: options.model,
+          prompt: options.prompt,
+          size,
+          n
+        };
+        if (!isDallE2 && !isGptImage) {
+          requestParams.quality = quality;
+        }
+        if (isGptImage) {
+        } else if (!isDallE2) {
+          requestParams.response_format = options.responseFormat ?? "url";
+        }
+        const response = await client.images.generate(requestParams);
+        const cost = calculateOpenAIImageCost(options.model, size, quality, n);
+        const images = response.data ?? [];
+        return {
+          images: images.map((img) => ({
+            url: img.url,
+            b64Json: img.b64_json,
+            revisedPrompt: img.revised_prompt
+          })),
+          model: options.model,
+          usage: {
+            imagesGenerated: images.length,
+            size,
+            quality
+          },
+          cost
+        };
+      }
+      // =========================================================================
+      // Speech Generation
+      // =========================================================================
+      getSpeechModelSpecs() {
+        return openaiSpeechModels;
+      }
+      supportsSpeechGeneration(modelId) {
+        return isOpenAISpeechModel(modelId);
+      }
+      async generateSpeech(options) {
+        const client = this.client;
+        const spec = getOpenAISpeechModelSpec(options.model);
+        const format = options.responseFormat ?? spec?.defaultFormat ?? "mp3";
+        const voice = options.voice ?? spec?.defaultVoice ?? "alloy";
+        const response = await client.audio.speech.create({
+          model: options.model,
+          input: options.input,
+          voice,
+          response_format: format,
+          speed: options.speed ?? 1
+        });
+        const audioBuffer = await response.arrayBuffer();
+        const cost = calculateOpenAISpeechCost(options.model, options.input.length);
+        return {
+          audio: audioBuffer,
+          model: options.model,
+          usage: {
+            characterCount: options.input.length
+          },
+          cost,
+          format
+        };
+      }
       buildRequestPayload(options, descriptor, spec, messages) {
         const { maxTokens, temperature, topP, stopSequences, extra } = options;
         const supportsTemperature = spec?.metadata?.supportsTemperature !== false;
@@ -5879,30 +6732,109 @@ var init_model_registry = __esm({
   }
 });
-// src/core/options.ts
-var ModelIdentifierParser;
-var init_options = __esm({
-  "src/core/options.ts"() {
+// src/core/namespaces/image.ts
+var ImageNamespace;
+var init_image = __esm({
+  "src/core/namespaces/image.ts"() {
     "use strict";
-    ModelIdentifierParser = class {
-      constructor(defaultProvider = "openai") {
+    ImageNamespace = class {
+      constructor(adapters, defaultProvider) {
+        this.adapters = adapters;
+        this.defaultProvider = defaultProvider;
+      }
+      /**
+       * Generate images from a text prompt.
+       *
+       * @param options - Image generation options
+       * @returns Promise resolving to the generation result with images and cost
+       * @throws Error if the provider doesn't support image generation
+       */
+      async generate(options) {
+        const modelId = options.model;
+        const adapter = this.findImageAdapter(modelId);
+        if (!adapter || !adapter.generateImage) {
+          throw new Error(
+            `No provider supports image generation for model "${modelId}". Available image models: ${this.listModels().map((m) => m.modelId).join(", ")}`
+          );
+        }
+        return adapter.generateImage(options);
+      }
+      /**
+       * List all available image generation models.
+       */
+      listModels() {
+        const models = [];
+        for (const adapter of this.adapters) {
+          if (adapter.getImageModelSpecs) {
+            models.push(...adapter.getImageModelSpecs());
+          }
+        }
+        return models;
+      }
+      /**
+       * Check if a model is supported for image generation.
+       */
+      supportsModel(modelId) {
+        return this.findImageAdapter(modelId) !== void 0;
+      }
+      findImageAdapter(modelId) {
+        return this.adapters.find(
+          (adapter) => adapter.supportsImageGeneration?.(modelId) ?? false
+        );
+      }
+    };
+  }
+});
+// src/core/namespaces/speech.ts
+var SpeechNamespace;
+var init_speech = __esm({
+  "src/core/namespaces/speech.ts"() {
+    "use strict";
+    SpeechNamespace = class {
+      constructor(adapters, defaultProvider) {
+        this.adapters = adapters;
         this.defaultProvider = defaultProvider;
       }
-      parse(identifier) {
-        const trimmed = identifier.trim();
-        if (!trimmed) {
-          throw new Error("Model identifier cannot be empty");
-        }
-        const [maybeProvider, ...rest] = trimmed.split(":");
-        if (rest.length === 0) {
-          return { provider: this.defaultProvider, name: maybeProvider };
+      /**
+       * Generate speech audio from text.
+       *
+       * @param options - Speech generation options
+       * @returns Promise resolving to the generation result with audio and cost
+       * @throws Error if the provider doesn't support speech generation
+       */
+      async generate(options) {
+        const modelId = options.model;
+        const adapter = this.findSpeechAdapter(modelId);
+        if (!adapter || !adapter.generateSpeech) {
+          throw new Error(
+            `No provider supports speech generation for model "${modelId}". Available speech models: ${this.listModels().map((m) => m.modelId).join(", ")}`
+          );
         }
-        const provider = maybeProvider;
-        const name = rest.join(":");
-        if (!name) {
-          throw new Error("Model name cannot be empty");
+        return adapter.generateSpeech(options);
+      }
+      /**
+       * List all available speech generation models.
+       */
+      listModels() {
+        const models = [];
+        for (const adapter of this.adapters) {
+          if (adapter.getSpeechModelSpecs) {
+            models.push(...adapter.getSpeechModelSpecs());
+          }
         }
-        return { provider, name };
+        return models;
+      }
+      /**
+       * Check if a model is supported for speech generation.
+       */
+      supportsModel(modelId) {
+        return this.findSpeechAdapter(modelId) !== void 0;
+      }
+      findSpeechAdapter(modelId) {
+        return this.adapters.find(
+          (adapter) => adapter.supportsSpeechGeneration?.(modelId) ?? false
+        );
       }
     };
   }
@@ -5951,6 +6883,69 @@ var init_quick_methods = __esm({
   }
 });
+// src/core/namespaces/text.ts
+var TextNamespace;
+var init_text = __esm({
+  "src/core/namespaces/text.ts"() {
+    "use strict";
+    init_quick_methods();
+    TextNamespace = class {
+      constructor(client) {
+        this.client = client;
+      }
+      /**
+       * Generate a complete text response.
+       *
+       * @param prompt - User prompt
+       * @param options - Optional configuration
+       * @returns Complete text response
+       */
+      async complete(prompt, options) {
+        return complete(this.client, prompt, options);
+      }
+      /**
+       * Stream text chunks.
+       *
+       * @param prompt - User prompt
+       * @param options - Optional configuration
+       * @returns Async generator yielding text chunks
+       */
+      stream(prompt, options) {
+        return stream(this.client, prompt, options);
+      }
+    };
+  }
+});
+// src/core/options.ts
+var ModelIdentifierParser;
+var init_options = __esm({
+  "src/core/options.ts"() {
+    "use strict";
+    ModelIdentifierParser = class {
+      constructor(defaultProvider = "openai") {
+        this.defaultProvider = defaultProvider;
+      }
+      parse(identifier) {
+        const trimmed = identifier.trim();
+        if (!trimmed) {
+          throw new Error("Model identifier cannot be empty");
+        }
+        const [maybeProvider, ...rest] = trimmed.split(":");
+        if (rest.length === 0) {
+          return { provider: this.defaultProvider, name: maybeProvider };
+        }
+        const provider = maybeProvider;
+        const name = rest.join(":");
+        if (!name) {
+          throw new Error("Model name cannot be empty");
+        }
+        return { provider, name };
+      }
+    };
+  }
+});
 // src/core/client.ts
 var client_exports = {};
 __export(client_exports, {
@@ -5963,12 +6958,20 @@ var init_client = __esm({
     init_builder();
     init_discovery();
     init_model_registry();
+    init_image();
+    init_speech();
+    init_text();
     init_options();
     init_quick_methods();
     LLMist = class _LLMist {
       parser;
+      defaultProvider;
       modelRegistry;
       adapters;
+      // Namespaces for different generation types
+      text;
+      image;
+      speech;
       constructor(...args) {
         let adapters = [];
         let defaultProvider;
@@ -6007,6 +7010,7 @@ var init_client = __esm({
           const priorityB = b.priority ?? 0;
           return priorityB - priorityA;
         });
+        this.defaultProvider = resolvedDefaultProvider;
         this.parser = new ModelIdentifierParser(resolvedDefaultProvider);
         this.modelRegistry = new ModelRegistry();
         for (const adapter of this.adapters) {
@@ -6015,6 +7019,9 @@ var init_client = __esm({
         if (customModels.length > 0) {
           this.modelRegistry.registerModels(customModels);
         }
+        this.text = new TextNamespace(this);
+        this.image = new ImageNamespace(this.adapters, this.defaultProvider);
+        this.speech = new SpeechNamespace(this.adapters, this.defaultProvider);
       }
       stream(options) {
         const descriptor = this.parser.parse(options.model);
@@ -6995,7 +8002,9 @@ var COMMANDS = {
   complete: "complete",
   agent: "agent",
   models: "models",
-  gadget: "gadget"
+  gadget: "gadget",
+  image: "image",
+  speech: "speech"
 };
 var LOG_LEVELS = ["silly", "trace", "debug", "info", "warn", "error", "fatal"];
 var DEFAULT_MODEL = "openai:gpt-5-nano";
@@ -7016,7 +8025,17 @@ var OPTION_FLAGS = {
   docker: "--docker",
   dockerRo: "--docker-ro",
   noDocker: "--no-docker",
-  dockerDev: "--docker-dev"
+  dockerDev: "--docker-dev",
+  // Image generation options
+  imageSize: "--size <size>",
+  imageQuality: "--quality <quality>",
+  imageCount: "-n, --count <number>",
+  imageOutput: "-o, --output <path>",
+  // Speech generation options
+  voice: "--voice <name>",
+  speechFormat: "--format <format>",
+  speechSpeed: "--speed <value>",
+  speechOutput: "-o, --output <path>"
 };
 var OPTION_DESCRIPTIONS = {
   model: "Model identifier, e.g. openai:gpt-5-nano or anthropic:claude-sonnet-4-5.",
@@ -7035,7 +8054,17 @@ var OPTION_DESCRIPTIONS = {
   docker: "Run agent in a Docker sandbox container for security isolation.",
   dockerRo: "Run in Docker with current directory mounted read-only.",
   noDocker: "Disable Docker sandboxing (override config).",
-  dockerDev: "Run in Docker dev mode (mount local source instead of npm install)."
+  dockerDev: "Run in Docker dev mode (mount local source instead of npm install).",
+  // Image generation descriptions
+  imageSize: "Image size/aspect ratio, e.g. '1024x1024', '1:1', '16:9'.",
+  imageQuality: "Image quality: 'standard', 'hd', 'low', 'medium', 'high'.",
+  imageCount: "Number of images to generate (model dependent, usually 1-4).",
+  imageOutput: "Output path for the generated image. Defaults to stdout if not specified.",
+  // Speech generation descriptions
+  voice: "Voice name for speech generation, e.g. 'nova', 'alloy', 'Zephyr'.",
+  speechFormat: "Audio format: 'mp3', 'opus', 'aac', 'flac', 'wav', 'pcm'.",
+  speechSpeed: "Speech speed multiplier (0.25 to 4.0, default 1.0).",
+  speechOutput: "Output path for audio file. Defaults to stdout if not specified."
 };
 var SUMMARY_PREFIX = "[llmist]";
@@ -7045,8 +8074,8 @@ var import_commander2 = require("commander");
 // package.json
 var package_default = {
   name: "llmist",
-  version: "2.1.0",
-  description: "Universal TypeScript LLM client with streaming-first agent framework. Works with any model - no structured outputs or native tool calling required. Implements its own flexible grammar for function calling.",
+  version: "2.4.0",
+  description: "TypeScript LLM client with streaming tool execution. Tools fire mid-stream. Built-in function calling works with any model\u2014no structured outputs or native tool support required.",
   type: "module",
   main: "dist/index.cjs",
   module: "dist/index.js",
@@ -7120,9 +8149,16 @@ var package_default = {
     "universal-client",
     "multi-provider",
     "hooks",
-    "gadgets"
+    "gadgets",
+    "chatbot",
+    "chatgpt",
+    "agentic",
+    "language-model",
+    "generative-ai",
+    "bun",
+    "nodejs"
   ],
-  author: "",
+  author: "Zbigniew Sobiecki <zbigniew@sobiecki.name>",
   license: "MIT",
   dependencies: {
     "@anthropic-ai/sdk": "^0.69.0",
@@ -9120,6 +10156,22 @@ var AGENT_CONFIG_KEYS = /* @__PURE__ */ new Set([
   "docker-cwd-permission"
   // Override CWD mount permission for this profile
 ]);
+var IMAGE_CONFIG_KEYS = /* @__PURE__ */ new Set([
+  "model",
+  "size",
+  "quality",
+  "count",
+  "output",
+  "quiet"
+]);
+var SPEECH_CONFIG_KEYS = /* @__PURE__ */ new Set([
+  "model",
+  "voice",
+  "format",
+  "speed",
+  "output",
+  "quiet"
+]);
 var CUSTOM_CONFIG_KEYS = /* @__PURE__ */ new Set([
   ...COMPLETE_CONFIG_KEYS,
   ...AGENT_CONFIG_KEYS,
@@ -9380,6 +10432,75 @@ function validateAgentConfig(raw, section) {
   }
   return result;
 }
+function validateImageConfig(raw, section) {
+  if (typeof raw !== "object" || raw === null) {
+    throw new ConfigError(`[${section}] must be a table`);
+  }
+  const rawObj = raw;
+  for (const key of Object.keys(rawObj)) {
+    if (!IMAGE_CONFIG_KEYS.has(key)) {
+      throw new ConfigError(`[${section}].${key} is not a valid option`);
+    }
+  }
+  const result = {};
+  if ("model" in rawObj) {
+    result.model = validateString(rawObj.model, "model", section);
+  }
+  if ("size" in rawObj) {
+    result.size = validateString(rawObj.size, "size", section);
+  }
+  if ("quality" in rawObj) {
+    result.quality = validateString(rawObj.quality, "quality", section);
+  }
+  if ("count" in rawObj) {
+    result.count = validateNumber(rawObj.count, "count", section, {
+      integer: true,
+      min: 1,
+      max: 10
+    });
+  }
+  if ("output" in rawObj) {
+    result.output = validateString(rawObj.output, "output", section);
+  }
+  if ("quiet" in rawObj) {
+    result.quiet = validateBoolean(rawObj.quiet, "quiet", section);
+  }
+  return result;
+}
+function validateSpeechConfig(raw, section) {
+  if (typeof raw !== "object" || raw === null) {
+    throw new ConfigError(`[${section}] must be a table`);
+  }
+  const rawObj = raw;
+  for (const key of Object.keys(rawObj)) {
+    if (!SPEECH_CONFIG_KEYS.has(key)) {
+      throw new ConfigError(`[${section}].${key} is not a valid option`);
+    }
+  }
+  const result = {};
+  if ("model" in rawObj) {
+    result.model = validateString(rawObj.model, "model", section);
+  }
+  if ("voice" in rawObj) {
+    result.voice = validateString(rawObj.voice, "voice", section);
+  }
+  if ("format" in rawObj) {
+    result.format = validateString(rawObj.format, "format", section);
+  }
+  if ("speed" in rawObj) {
+    result.speed = validateNumber(rawObj.speed, "speed", section, {
+      min: 0.25,
+      max: 4
+    });
+  }
+  if ("output" in rawObj) {
+    result.output = validateString(rawObj.output, "output", section);
+  }
+  if ("quiet" in rawObj) {
+    result.quiet = validateBoolean(rawObj.quiet, "quiet", section);
+  }
+  return result;
+}
 function validateStringOrBoolean(value, field, section) {
   if (typeof value === "string" || typeof value === "boolean") {
     return value;
@@ -9502,6 +10623,10 @@ function validateConfig(raw, configPath) {
         result.complete = validateCompleteConfig(value, key);
       } else if (key === "agent") {
         result.agent = validateAgentConfig(value, key);
+      } else if (key === "image") {
+        result.image = validateImageConfig(value, key);
+      } else if (key === "speech") {
+        result.speech = validateSpeechConfig(value, key);
       } else if (key === "prompts") {
         result.prompts = validatePromptsConfig(value, key);
       } else if (key === "docker") {
@@ -9546,7 +10671,7 @@ function loadConfig() {
   return resolveTemplatesInConfig(inherited, configPath);
 }
 function getCustomCommandNames(config) {
-  const reserved = /* @__PURE__ */ new Set(["global", "complete", "agent", "prompts", "docker"]);
+  const reserved = /* @__PURE__ */ new Set(["global", "complete", "agent", "image", "speech", "prompts", "docker"]);
   return Object.keys(config).filter((key) => !reserved.has(key));
 }
 function resolveTemplatesInConfig(config, configPath) {
@@ -11141,19 +12266,118 @@ function registerGadgetCommand(program, env) {
   );
 }
+// src/cli/image-command.ts
+var import_node_fs11 = require("fs");
+var DEFAULT_IMAGE_MODEL = "dall-e-3";
+async function executeImage(promptArg, options, env) {
+  const prompt = await resolvePrompt(promptArg, env);
+  const client = env.createClient();
+  const model = options.model;
+  const n = options.count ? Number.parseInt(options.count, 10) : 1;
+  const stderrTTY = env.stderr.isTTY === true;
+  if (!options.quiet && stderrTTY) {
+    env.stderr.write(`${SUMMARY_PREFIX} Generating image with ${model}...
+`);
+  }
+  const result = await client.image.generate({
+    model,
+    prompt,
+    size: options.size,
+    quality: options.quality,
+    n,
+    responseFormat: options.output ? "b64_json" : "url"
+  });
+  if (options.output) {
+    const imageData = result.images[0];
+    if (imageData.b64Json) {
+      const buffer = Buffer.from(imageData.b64Json, "base64");
+      (0, import_node_fs11.writeFileSync)(options.output, buffer);
+      if (!options.quiet) {
+        env.stderr.write(`${SUMMARY_PREFIX} Image saved to ${options.output}
+`);
+      }
+    } else if (imageData.url) {
+      env.stdout.write(`${imageData.url}
+`);
+    }
+  } else {
+    for (const image of result.images) {
+      if (image.url) {
+        env.stdout.write(`${image.url}
+`);
+      } else if (image.b64Json) {
+        env.stdout.write(image.b64Json);
+      }
+    }
+  }
+  if (!options.quiet && stderrTTY) {
+    const parts = [
+      `${result.images.length} image(s)`,
+      `size: ${result.usage.size}`,
+      `quality: ${result.usage.quality}`
+    ];
+    if (result.cost !== void 0) {
+      parts.push(`cost: ${formatCost(result.cost)}`);
+    }
+    env.stderr.write(`${SUMMARY_PREFIX} ${parts.join(" | ")}
+`);
+  }
+}
+function registerImageCommand(program, env, config) {
+  program.command(COMMANDS.image).description("Generate images from a text prompt.").argument("[prompt]", "Image generation prompt. If omitted, stdin is used when available.").option(
+    OPTION_FLAGS.model,
+    OPTION_DESCRIPTIONS.model,
+    config?.model ?? DEFAULT_IMAGE_MODEL
+  ).option(OPTION_FLAGS.imageSize, OPTION_DESCRIPTIONS.imageSize, config?.size).option(OPTION_FLAGS.imageQuality, OPTION_DESCRIPTIONS.imageQuality, config?.quality).option(OPTION_FLAGS.imageCount, OPTION_DESCRIPTIONS.imageCount, config?.count?.toString()).option(OPTION_FLAGS.imageOutput, OPTION_DESCRIPTIONS.imageOutput, config?.output).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, config?.quiet ?? false).action(
+    (prompt, options) => executeAction(() => executeImage(prompt, options, env), env)
+  );
+}
 // src/cli/models-command.ts
 var import_chalk8 = __toESM(require("chalk"), 1);
 init_model_shortcuts();
 async function handleModelsCommand(options, env) {
   const client = env.createClient();
-  const models = client.modelRegistry.listModels(options.provider);
+  const showText = options.all || options.text || !options.image && !options.speech;
+  const showImage = options.all || options.image;
+  const showSpeech = options.all || options.speech;
+  const textModels = showText ? client.modelRegistry.listModels(options.provider) : [];
+  const imageModels = showImage ? client.image.listModels().filter((m) => !options.provider || m.provider === options.provider) : [];
+  const speechModels = showSpeech ? client.speech.listModels().filter((m) => !options.provider || m.provider === options.provider) : [];
   if (options.format === "json") {
-    renderJSON(models, env.stdout);
+    renderJSON(textModels, imageModels, speechModels, env.stdout);
   } else {
-    renderTable(models, options.verbose || false, env.stdout);
+    renderAllTables(textModels, imageModels, speechModels, options.verbose || false, env.stdout);
+  }
+}
+function renderAllTables(textModels, imageModels, speechModels, verbose, stream2) {
+  const hasAnyModels = textModels.length > 0 || imageModels.length > 0 || speechModels.length > 0;
+  if (!hasAnyModels) {
+    stream2.write(import_chalk8.default.yellow("\nNo models found matching the specified criteria.\n\n"));
+    return;
+  }
+  stream2.write(import_chalk8.default.bold.cyan("\nAvailable Models\n"));
+  stream2.write(import_chalk8.default.cyan("=".repeat(80)) + "\n\n");
+  if (textModels.length > 0) {
+    renderTextTable(textModels, verbose, stream2);
+  }
+  if (imageModels.length > 0) {
+    renderImageTable(imageModels, verbose, stream2);
+  }
+  if (speechModels.length > 0) {
+    renderSpeechTable(speechModels, verbose, stream2);
+  }
+  if (textModels.length > 0) {
+    stream2.write(import_chalk8.default.bold.magenta("Model Shortcuts\n"));
+    stream2.write(import_chalk8.default.dim("\u2500".repeat(80)) + "\n");
+    const shortcuts = Object.entries(MODEL_ALIASES).sort((a, b) => a[0].localeCompare(b[0]));
+    for (const [shortcut, fullName] of shortcuts) {
+      stream2.write(import_chalk8.default.cyan(`  ${shortcut.padEnd(15)}`) + import_chalk8.default.dim(" \u2192 ") + import_chalk8.default.white(fullName) + "\n");
+    }
+    stream2.write("\n");
   }
 }
-function renderTable(models, verbose, stream2) {
+function renderTextTable(models, verbose, stream2) {
   const grouped = /* @__PURE__ */ new Map();
   for (const model of models) {
     const provider = model.provider;
@@ -11162,13 +12386,13 @@ function renderTable(models, verbose, stream2) {
     }
     grouped.get(provider).push(model);
   }
-  stream2.write(import_chalk8.default.bold.cyan("\nAvailable Models\n"));
-  stream2.write(import_chalk8.default.cyan("=".repeat(80)) + "\n\n");
+  stream2.write(import_chalk8.default.bold.blue("\u{1F4DD} Text/LLM Models\n"));
+  stream2.write(import_chalk8.default.dim("\u2500".repeat(80)) + "\n\n");
   const providers = Array.from(grouped.keys()).sort();
   for (const provider of providers) {
     const providerModels = grouped.get(provider);
     const providerName = provider.charAt(0).toUpperCase() + provider.slice(1);
-    stream2.write(import_chalk8.default.bold.yellow(`${providerName} Models
+    stream2.write(import_chalk8.default.bold.yellow(`${providerName}
 `));
     if (verbose) {
       renderVerboseTable(providerModels, stream2);
@@ -11177,13 +12401,6 @@ function renderTable(models, verbose, stream2) {
     }
     stream2.write("\n");
   }
-  stream2.write(import_chalk8.default.bold.magenta("Model Shortcuts\n"));
-  stream2.write(import_chalk8.default.dim("\u2500".repeat(80)) + "\n");
-  const shortcuts = Object.entries(MODEL_ALIASES).sort((a, b) => a[0].localeCompare(b[0]));
-  for (const [shortcut, fullName] of shortcuts) {
-    stream2.write(import_chalk8.default.cyan(`  ${shortcut.padEnd(15)}`) + import_chalk8.default.dim(" \u2192 ") + import_chalk8.default.white(fullName) + "\n");
-  }
-  stream2.write("\n");
 }
 function renderCompactTable(models, stream2) {
   const idWidth = 25;
@@ -11260,9 +12477,171 @@ function renderVerboseTable(models, stream2) {
   }
   stream2.write("\n");
 }
-function renderJSON(models, stream2) {
-  const output = {
-    models: models.map((model) => ({
+function renderImageTable(models, verbose, stream2) {
+  stream2.write(import_chalk8.default.bold.green("\u{1F3A8} Image Generation Models\n"));
+  stream2.write(import_chalk8.default.dim("\u2500".repeat(80)) + "\n\n");
+  const grouped = /* @__PURE__ */ new Map();
+  for (const model of models) {
+    if (!grouped.has(model.provider)) {
+      grouped.set(model.provider, []);
+    }
+    grouped.get(model.provider).push(model);
+  }
+  for (const [provider, providerModels] of Array.from(grouped.entries()).sort()) {
+    const providerName = provider.charAt(0).toUpperCase() + provider.slice(1);
+    stream2.write(import_chalk8.default.bold.yellow(`${providerName}
+`));
+    if (verbose) {
+      for (const model of providerModels) {
+        stream2.write(import_chalk8.default.bold.green(`
+  ${model.modelId}
+`));
+        stream2.write(import_chalk8.default.dim("  " + "\u2500".repeat(60)) + "\n");
+        stream2.write(`  ${import_chalk8.default.dim("Name:")}      ${import_chalk8.default.white(model.displayName)}
+`);
+        stream2.write(`  ${import_chalk8.default.dim("Sizes:")}     ${import_chalk8.default.yellow(model.supportedSizes.join(", "))}
+`);
+        if (model.supportedQualities) {
+          stream2.write(`  ${import_chalk8.default.dim("Qualities:")} ${import_chalk8.default.yellow(model.supportedQualities.join(", "))}
+`);
+        }
+        stream2.write(`  ${import_chalk8.default.dim("Max Images:")} ${import_chalk8.default.yellow(model.maxImages.toString())}
+`);
+        stream2.write(`  ${import_chalk8.default.dim("Pricing:")}   ${import_chalk8.default.cyan(formatImagePrice(model))}
+`);
+        if (model.features) {
+          const features = [];
+          if (model.features.textRendering) features.push("text-rendering");
+          if (model.features.transparency) features.push("transparency");
+          if (model.features.conversational) features.push("conversational");
+          if (features.length > 0) {
+            stream2.write(`  ${import_chalk8.default.dim("Features:")}  ${import_chalk8.default.blue(features.join(", "))}
+`);
+          }
+        }
+      }
+    } else {
+      const idWidth = 32;
+      const nameWidth = 25;
+      const sizesWidth = 20;
+      const priceWidth = 15;
+      stream2.write(import_chalk8.default.dim("\u2500".repeat(idWidth + nameWidth + sizesWidth + priceWidth + 6)) + "\n");
+      stream2.write(
+        import_chalk8.default.bold(
+          "Model ID".padEnd(idWidth) + "  " + "Display Name".padEnd(nameWidth) + "  " + "Sizes".padEnd(sizesWidth) + "  " + "Price".padEnd(priceWidth)
+        ) + "\n"
+      );
+      stream2.write(import_chalk8.default.dim("\u2500".repeat(idWidth + nameWidth + sizesWidth + priceWidth + 6)) + "\n");
+      for (const model of providerModels) {
+        const sizes = model.supportedSizes.length > 2 ? model.supportedSizes.slice(0, 2).join(", ") + "..." : model.supportedSizes.join(", ");
+        stream2.write(
+          import_chalk8.default.green(model.modelId.padEnd(idWidth)) + "  " + import_chalk8.default.white(model.displayName.substring(0, nameWidth - 1).padEnd(nameWidth)) + "  " + import_chalk8.default.yellow(sizes.padEnd(sizesWidth)) + "  " + import_chalk8.default.cyan(formatImagePrice(model).padEnd(priceWidth)) + "\n"
+        );
+      }
+      stream2.write(import_chalk8.default.dim("\u2500".repeat(idWidth + nameWidth + sizesWidth + priceWidth + 6)) + "\n");
+    }
+    stream2.write("\n");
+  }
+}
+function renderSpeechTable(models, verbose, stream2) {
+  stream2.write(import_chalk8.default.bold.magenta("\u{1F3A4} Speech (TTS) Models\n"));
+  stream2.write(import_chalk8.default.dim("\u2500".repeat(80)) + "\n\n");
+  const grouped = /* @__PURE__ */ new Map();
+  for (const model of models) {
+    if (!grouped.has(model.provider)) {
+      grouped.set(model.provider, []);
+    }
+    grouped.get(model.provider).push(model);
+  }
+  for (const [provider, providerModels] of Array.from(grouped.entries()).sort()) {
+    const providerName = provider.charAt(0).toUpperCase() + provider.slice(1);
+    stream2.write(import_chalk8.default.bold.yellow(`${providerName}
+`));
+    if (verbose) {
+      for (const model of providerModels) {
+        stream2.write(import_chalk8.default.bold.green(`
+  ${model.modelId}
+`));
+        stream2.write(import_chalk8.default.dim("  " + "\u2500".repeat(60)) + "\n");
+        stream2.write(`  ${import_chalk8.default.dim("Name:")}    ${import_chalk8.default.white(model.displayName)}
+`);
+        stream2.write(`  ${import_chalk8.default.dim("Voices:")}  ${import_chalk8.default.yellow(model.voices.length.toString())} voices
+`);
+        if (model.voices.length <= 6) {
+          stream2.write(`            ${import_chalk8.default.dim(model.voices.join(", "))}
+`);
+        } else {
+          stream2.write(`            ${import_chalk8.default.dim(model.voices.slice(0, 6).join(", ") + "...")}
+`);
+        }
+        stream2.write(`  ${import_chalk8.default.dim("Formats:")} ${import_chalk8.default.yellow(model.formats.join(", "))}
+`);
+        stream2.write(`  ${import_chalk8.default.dim("Max Input:")} ${import_chalk8.default.yellow(model.maxInputLength.toString())} chars
+`);
+        stream2.write(`  ${import_chalk8.default.dim("Pricing:")} ${import_chalk8.default.cyan(formatSpeechPrice(model))}
+`);
+        if (model.features) {
+          const features = [];
+          if (model.features.multiSpeaker) features.push("multi-speaker");
+          if (model.features.voiceInstructions) features.push("voice-instructions");
+          if (model.features.languages) features.push(`${model.features.languages} languages`);
+          if (features.length > 0) {
+            stream2.write(`  ${import_chalk8.default.dim("Features:")} ${import_chalk8.default.blue(features.join(", "))}
+`);
+          }
+        }
+      }
+    } else {
+      const idWidth = 30;
+      const nameWidth = 28;
+      const voicesWidth = 12;
+      const priceWidth = 18;
+      stream2.write(import_chalk8.default.dim("\u2500".repeat(idWidth + nameWidth + voicesWidth + priceWidth + 6)) + "\n");
+      stream2.write(
+        import_chalk8.default.bold(
+          "Model ID".padEnd(idWidth) + "  " + "Display Name".padEnd(nameWidth) + "  " + "Voices".padEnd(voicesWidth) + "  " + "Price".padEnd(priceWidth)
+        ) + "\n"
+      );
+      stream2.write(import_chalk8.default.dim("\u2500".repeat(idWidth + nameWidth + voicesWidth + priceWidth + 6)) + "\n");
+      for (const model of providerModels) {
+        stream2.write(
+          import_chalk8.default.green(model.modelId.padEnd(idWidth)) + "  " + import_chalk8.default.white(model.displayName.substring(0, nameWidth - 1).padEnd(nameWidth)) + "  " + import_chalk8.default.yellow(`${model.voices.length} voices`.padEnd(voicesWidth)) + "  " + import_chalk8.default.cyan(formatSpeechPrice(model).padEnd(priceWidth)) + "\n"
+        );
+      }
+      stream2.write(import_chalk8.default.dim("\u2500".repeat(idWidth + nameWidth + voicesWidth + priceWidth + 6)) + "\n");
+    }
+    stream2.write("\n");
+  }
+}
+function formatImagePrice(model) {
+  if (model.pricing.perImage !== void 0) {
+    return `$${model.pricing.perImage.toFixed(2)}/img`;
+  }
+  if (model.pricing.bySize) {
+    const prices = Object.values(model.pricing.bySize);
+    const minPrice = Math.min(...prices.flatMap((p) => typeof p === "number" ? [p] : Object.values(p)));
+    const maxPrice = Math.max(...prices.flatMap((p) => typeof p === "number" ? [p] : Object.values(p)));
+    if (minPrice === maxPrice) {
+      return `$${minPrice.toFixed(2)}/img`;
+    }
+    return `$${minPrice.toFixed(2)}-${maxPrice.toFixed(2)}`;
+  }
+  return "varies";
+}
+function formatSpeechPrice(model) {
+  if (model.pricing.perCharacter !== void 0) {
+    const perMillion = model.pricing.perCharacter * 1e6;
+    return `$${perMillion.toFixed(0)}/1M chars`;
+  }
+  if (model.pricing.perMinute !== void 0) {
+    return `~$${model.pricing.perMinute.toFixed(2)}/min`;
+  }
+  return "varies";
+}
+function renderJSON(textModels, imageModels, speechModels, stream2) {
+  const output = {};
+  if (textModels.length > 0) {
+    output.textModels = textModels.map((model) => ({
       provider: model.provider,
       modelId: model.modelId,
       displayName: model.displayName,
@@ -11278,9 +12657,33 @@ function renderJSON(models, stream2) {
       knowledgeCutoff: model.knowledgeCutoff,
       features: model.features,
       metadata: model.metadata
-    })),
-    shortcuts: MODEL_ALIASES
-  };
+    }));
+    output.shortcuts = MODEL_ALIASES;
+  }
+  if (imageModels.length > 0) {
+    output.imageModels = imageModels.map((model) => ({
+      provider: model.provider,
+      modelId: model.modelId,
+      displayName: model.displayName,
+      supportedSizes: model.supportedSizes,
+      supportedQualities: model.supportedQualities,
+      maxImages: model.maxImages,
+      pricing: model.pricing,
+      features: model.features
+    }));
+  }
+  if (speechModels.length > 0) {
+    output.speechModels = speechModels.map((model) => ({
+      provider: model.provider,
+      modelId: model.modelId,
+      displayName: model.displayName,
+      voices: model.voices,
+      formats: model.formats,
+      maxInputLength: model.maxInputLength,
+      pricing: model.pricing,
+      features: model.features
+    }));
+  }
   stream2.write(JSON.stringify(output, null, 2) + "\n");
 }
 function formatTokens2(count) {
@@ -11293,7 +12696,7 @@ function formatTokens2(count) {
   }
 }
 function registerModelsCommand(program, env) {
-  program.command(COMMANDS.models).description("List all available LLM models with pricing and capabilities.").option("--provider <name>", "Filter by provider (openai, anthropic, gemini)").option("--format <format>", "Output format: table or json", "table").option("--verbose", "Show detailed model information", false).action(
+  program.command(COMMANDS.models).description("List available models with pricing and capabilities.").option("--provider <name>", "Filter by provider (openai, anthropic, gemini)").option("--format <format>", "Output format: table or json", "table").option("--verbose", "Show detailed model information", false).option("--text", "Show text/LLM models (default if no type specified)").option("--image", "Show image generation models").option("--speech", "Show speech/TTS models").option("--all", "Show all model types (text, image, speech)").action(
     (options) => executeAction(
       () => handleModelsCommand(options, env),
       env
@@ -11301,6 +12704,60 @@ function registerModelsCommand(program, env) {
   );
 }
+// src/cli/speech-command.ts
+var import_node_fs12 = require("fs");
+var DEFAULT_SPEECH_MODEL = "tts-1";
+var DEFAULT_VOICE = "nova";
+async function executeSpeech(textArg, options, env) {
+  const text = await resolvePrompt(textArg, env);
+  const client = env.createClient();
+  const model = options.model;
+  const voice = options.voice ?? DEFAULT_VOICE;
+  const speed = options.speed ? Number.parseFloat(options.speed) : void 0;
+  const stderrTTY = env.stderr.isTTY === true;
+  if (!options.quiet && stderrTTY) {
+    env.stderr.write(`${SUMMARY_PREFIX} Generating speech with ${model} (voice: ${voice})...
+`);
+  }
+  const result = await client.speech.generate({
+    model,
+    input: text,
+    voice,
+    responseFormat: options.format,
+    speed
+  });
+  const audioBuffer = Buffer.from(result.audio);
+  if (options.output) {
+    (0, import_node_fs12.writeFileSync)(options.output, audioBuffer);
+    if (!options.quiet) {
+      env.stderr.write(`${SUMMARY_PREFIX} Audio saved to ${options.output}
+`);
+    }
+  } else {
+    env.stdout.write(audioBuffer);
+  }
+  if (!options.quiet && stderrTTY) {
+    const parts = [
+      `${result.usage.characterCount} characters`,
+      `format: ${result.format}`
+    ];
+    if (result.cost !== void 0) {
+      parts.push(`cost: ${formatCost(result.cost)}`);
+    }
+    env.stderr.write(`${SUMMARY_PREFIX} ${parts.join(" | ")}
+`);
+  }
+}
+function registerSpeechCommand(program, env, config) {
+  program.command(COMMANDS.speech).description("Generate speech audio from text.").argument("[text]", "Text to convert to speech. If omitted, stdin is used when available.").option(
+    OPTION_FLAGS.model,
+    OPTION_DESCRIPTIONS.model,
+    config?.model ?? DEFAULT_SPEECH_MODEL
+  ).option(OPTION_FLAGS.voice, OPTION_DESCRIPTIONS.voice, config?.voice ?? DEFAULT_VOICE).option(OPTION_FLAGS.speechFormat, OPTION_DESCRIPTIONS.speechFormat, config?.format).option(OPTION_FLAGS.speechSpeed, OPTION_DESCRIPTIONS.speechSpeed, config?.speed?.toString()).option(OPTION_FLAGS.speechOutput, OPTION_DESCRIPTIONS.speechOutput, config?.output).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, config?.quiet ?? false).action(
+    (text, options) => executeAction(() => executeSpeech(text, options, env), env)
+  );
+}
 // src/cli/environment.ts
 var import_node_readline = __toESM(require("readline"), 1);
 var import_chalk9 = __toESM(require("chalk"), 1);
@@ -11452,6 +12909,8 @@ function createProgram(env, config) {
   });
   registerCompleteCommand(program, env, config?.complete);
   registerAgentCommand(program, env, config?.agent);
+  registerImageCommand(program, env, config?.image);
+  registerSpeechCommand(program, env, config?.speech);
   registerModelsCommand(program, env);
   registerGadgetCommand(program, env);
   if (config) {